Advertisement
Guest User

kNN

a guest
Jun 20th, 2013
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.18 KB | None | 0 0
  1. def myClassify0(inX, dataSet, labels, k) :
  2.     distances = calculateEuclideanDistance(inX, dataSet)
  3.    
  4.     sortedDistIndices = distances.argsort()
  5.     return findKClosestDataPoint(sortedDistIndices, labels, k)
  6.  
  7.  
  8. def findKClosestDataPoint(sortedDistIndices, labels, k) :
  9.     classCount = {}
  10.     for i in range(k) :
  11.         voteIlabel = labels[sortedDistIndices[i]]
  12.         classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
  13.    
  14.     sortedClassCount = sorted(classCount.iteritems(), key = operator.itemgetter(1), reverse = True)
  15.  
  16.     return sortedClassCount[0][0]    
  17.  
  18. def calculateEuclideanDistance(inX, dataSet) :
  19.     diffMatrix = getDifferences(inX, dataSet)
  20.     squareDiffMatrix = getSquareOfDifferences(diffMatrix)
  21.     sqDistance = sumSqDifferences(squareDiffMatrix)
  22.     return getSquareRootOfSum(sqDistance)
  23.  
  24. def getDifferences(inX, dataSet) :
  25.     dataSetSize = dataSet.shape[0]
  26.     diffMat = tile(inX, (dataSetSize, 1)) - dataSet
  27.    
  28.     return diffMat
  29.  
  30. def getSquareOfDifferences(sqDiffMat) :
  31.     return sqDiffMat**2
  32.  
  33. def sumSqDifferences(sqDiffMat) :
  34.     return sqDiffMat.sum(axis=1)
  35.  
  36. def getSquareRootOfSum(sqDistance) :
  37.     return sqDistance ** 0.5
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement