Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class KNN
- require 'date'
- require_relative 'feature_loader.rb'
- attr_accessor :feature_loader
- def initialize(path)
- self.feature_loader = FeatureLoader.new(path)
- end
- def tests(k)
- @k = k
- correct = 0
- incorrect = 0
- for test in self.feature_loader.test_data
- predicted_class = classify(test)
- if test[0] == predicted_class
- correct += 1
- else
- incorrect += 1
- #puts "Expected Class #{test[0]} - Predicted Class #{predicted_class}"
- #puts "fail"
- end
- end
- puts "Correct: #{correct}"
- puts "Incorrect: #{incorrect}"
- puts "Percentage: #{correct.to_f/self.feature_loader.test_data.length}"
- correct
- end
- def classify(review)
- distances = self.feature_loader.train_data.map { |data|
- test_vector = review[1]
- train_vector = data[1]
- [data[0], euclid_distance(test_vector, train_vector)]
- }
- distances = distances.sort_by{ |x| x[1] }[0..@k].map{|x| x[0]}
- mode_class(distances)
- end
- def mode_class(distances)
- frequency = distances.inject(Hash.new(0)){ |h,v| h[v]+=1; h }
- max = distances.max_by { |v| frequency[v] }
- max
- end
- def euclid_distance(v1, v2)
- raise 'Vectors different length' if v1.length != v2.length
- dis = Math.sqrt(v1.each_with_index.map { |x,i| (v1[i]-v2[i])**2 }.inject(0) {|sum,x| sum + x } )
- dis
- end
- end
- knn = KNN.new('CA2data.txt')
- result = knn.tests(9)
- puts "k = #{9} - correct = #{result}"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement