Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def myClassify0(inX, dataSet, labels, k) :
- distances = calculateEuclideanDistance(inX, dataSet)
- sortedDistIndices = distances.argsort()
- return findKClosestDataPoint(sortedDistIndices, labels, k)
- def findKClosestDataPoint(sortedDistIndices, labels, k) :
- classCount = {}
- for i in range(k) :
- voteIlabel = labels[sortedDistIndices[i]]
- classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
- sortedClassCount = sorted(classCount.iteritems(), key = operator.itemgetter(1), reverse = True)
- return sortedClassCount[0][0]
- def calculateEuclideanDistance(inX, dataSet) :
- diffMatrix = getDifferences(inX, dataSet)
- squareDiffMatrix = getSquareOfDifferences(diffMatrix)
- sqDistance = sumSqDifferences(squareDiffMatrix)
- return getSquareRootOfSum(sqDistance)
- def getDifferences(inX, dataSet) :
- dataSetSize = dataSet.shape[0]
- diffMat = tile(inX, (dataSetSize, 1)) - dataSet
- return diffMat
- def getSquareOfDifferences(sqDiffMat) :
- return sqDiffMat**2
- def sumSqDifferences(sqDiffMat) :
- return sqDiffMat.sum(axis=1)
- def getSquareRootOfSum(sqDistance) :
- return sqDistance ** 0.5
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement