Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from math import sqrt
- import warnings
- import pandas as pd
- import random
- from collections import Counter
- def k_nearest_neighbors(data,predict,k=3):
- if(len(data)>=k):
- warnings.warn('K is set to a value less than total voting groups')
- distances=[]
- for group in data:
- for features in data[group]:
- euclidean_distance=np.linalg.norm(np.array(features)-np.array(predict))
- distances.append([euclidean_distance,group])
- votes=[i[1] for i in sorted(distances)[:k]]
- vote_result=Counter(votes).most_common(1)[0][0] #now two zeroes meaning is clear
- return vote_result
- df=pd.read_csv("breast-cancer-wisconsin.data.txt")
- df.replace('?',-99999,inplace=True)
- df.drop(['id'],1,inplace=True)
- full_data=df.astype(float).values.tolist()
- random.shuffle(full_data)
- test_size=0.2
- train_set={2:[],4:[]} #dictionary of classes
- test_set={2:[],4:[]}
- train_data=full_data[:-int(test_size*len(full_data))]
- test_data=full_data[-int(test_size*len(full_data)):]
- #populating our dictionaries
- for i in train_data:
- train_set[i[-1]].append(i[:-1])
- for i in test_data:
- train_set[i[-1]].append(i[:-1])
- correct=0
- total=0
- print('$')
- for group in test_set:
- for datax in test_set[group]:
- print('$') #this is not printing,loop not running
- vote=k_nearest_neighbors(train_set,datx,k=5)
- if group==vote:
- correct+=1
- total+=1
- print('accuracy:',correct/total)
- Traceback (most recent call last):
- File "my_k_near_neigh.py", line 67, in <module>
- print(c/t)
- ZeroDivisionError: division by zero
Add Comment
Please, Sign In to add comment