Advertisement
saputra_adi

knn-main(python)

Dec 14th, 2016
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.60 KB | None | 0 0
  1. import os
  2. cwd = os.getcwd()
  3. os.chdir('/home/student/Documents/KNN')
  4.  
  5. import csv
  6. with open('iris.csv') as csvfile:
  7.     lines = csv.reader(csvfile)
  8.     for row in lines:
  9.         print(','.join(row))
  10.  
  11. import csv
  12. import random
  13. def loadDataset(filename, split_ratio, trainingSet=[] , testSet=[]):
  14.         with open(filename) as csvfile:
  15.             lines = csv.reader(csvfile)
  16.             dataset = list(lines)
  17.             for x in range(len(dataset)-1):
  18.                 for y in range(4):
  19.                     dataset[x][y] = float(dataset[x][y])
  20.                 if random.random() < split_ratio:
  21.                     trainingSet.append(dataset[x])
  22.                 else:
  23.                     testSet.append(dataset[x])
  24.  
  25.  
  26. import math
  27. def euclideanDistance(instance1, instance2, length):
  28.         distance = 0
  29.         for x in range(length):
  30.             distance += pow((instance1[x]-instance2[x]), 2)
  31.         return math.sqrt(distance)
  32.  
  33.  
  34.  
  35. import operator
  36. def getNeighbors(trainingSet, testInstance, k):
  37.         distances = []
  38.         length = len(testInstance)-1
  39.         for x in range(len(trainingSet)):
  40.             dist = euclideanDistance(testInstance,trainingSet[x], length)
  41.             distances.append((trainingSet[x], dist))
  42.         distances.sort(key=operator.itemgetter(1))
  43.         neighbors = []
  44.         for x in range(k):
  45.             neighbors.append(distances[x][0])
  46.         return neighbors
  47.  
  48. #akurasi
  49. def getAccuracy(testSet, predictions):
  50.     correct = 0
  51.     for x in range(len(testSet)):
  52.         if testSet[x][-1] == predictions[x]:
  53.             correct += 1
  54.         return (correct/float(len(testSet))) * 100.0
  55.  
  56.  
  57. #Response
  58. import operator
  59. def getResponse(neighbors):
  60.     classVotes = {}
  61.     for x in range(len(neighbors)):
  62.         response = neighbors[x][-1]
  63.         if response in classVotes:
  64.             classVotes[response] += 1
  65.         else:
  66.             classVotes[response] = 1
  67.         sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)
  68.         return sortedVotes[0][0]
  69.  
  70. # prepare data
  71. trainingSet=[]
  72. testSet=[]
  73. split = 0.67
  74. loadDataset('iris.csv', split, trainingSet, testSet)
  75. print('Train set: ' + repr(len(trainingSet)))
  76. print ('Test set: ' + repr(len(testSet)))
  77. # generate predictions
  78. predictions=[]
  79. k = 3
  80. for x in range(len(testSet)):
  81.     neighbors = getNeighbors(trainingSet, testSet[x], k)
  82.     result = getResponse(neighbors)
  83.     predictions.append(result)
  84.     print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][-1]))
  85. accuracy = getAccuracy(testSet, predictions)
  86. print('Accuracy: ' + repr(accuracy) + '%')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement