Advertisement
Guest User

knn.rb

a guest
May 27th, 2017
265
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.50 KB | None | 0 0
  1. class KNN
  2. require 'date'
  3. require_relative 'feature_loader.rb'
  4.  
  5. attr_accessor :feature_loader
  6.  
  7. def initialize(path)
  8. self.feature_loader = FeatureLoader.new(path)
  9. end
  10.  
  11. def tests(k)
  12. @k = k
  13. correct = 0
  14. incorrect = 0
  15.  
  16. for test in self.feature_loader.test_data
  17. predicted_class = classify(test)
  18. if test[0] == predicted_class
  19. correct += 1
  20. else
  21. incorrect += 1
  22. #puts "Expected Class #{test[0]} - Predicted Class #{predicted_class}"
  23. #puts "fail"
  24. end
  25.  
  26. end
  27.  
  28. puts "Correct: #{correct}"
  29. puts "Incorrect: #{incorrect}"
  30. puts "Percentage: #{correct.to_f/self.feature_loader.test_data.length}"
  31. correct
  32. end
  33.  
  34. def classify(review)
  35. distances = self.feature_loader.train_data.map { |data|
  36. test_vector = review[1]
  37. train_vector = data[1]
  38. [data[0], euclid_distance(test_vector, train_vector)]
  39. }
  40. distances = distances.sort_by{ |x| x[1] }[0..@k].map{|x| x[0]}
  41.  
  42. mode_class(distances)
  43. end
  44.  
  45. def mode_class(distances)
  46. frequency = distances.inject(Hash.new(0)){ |h,v| h[v]+=1; h }
  47. max = distances.max_by { |v| frequency[v] }
  48. max
  49. end
  50.  
  51. def euclid_distance(v1, v2)
  52. raise 'Vectors different length' if v1.length != v2.length
  53. dis = Math.sqrt(v1.each_with_index.map { |x,i| (v1[i]-v2[i])**2 }.inject(0) {|sum,x| sum + x } )
  54. dis
  55. end
  56. end
  57.  
  58. knn = KNN.new('CA2data.txt')
  59. result = knn.tests(9)
  60. puts "k = #{9} - correct = #{result}"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement