class KNN require 'date' require_relative 'feature_loader.rb' attr_accessor :feature_loader def initialize(path) self.feature_loader = FeatureLoader.new(path) end def tests(k) @k = k correct = 0 incorrect = 0 for test in self.feature_loader.test_data predicted_class = classify(test) if test[0] == predicted_class correct += 1 else incorrect += 1 #puts "Expected Class #{test[0]} - Predicted Class #{predicted_class}" #puts "fail" end end puts "Correct: #{correct}" puts "Incorrect: #{incorrect}" puts "Percentage: #{correct.to_f/self.feature_loader.test_data.length}" correct end def classify(review) distances = self.feature_loader.train_data.map { |data| test_vector = review[1] train_vector = data[1] [data[0], euclid_distance(test_vector, train_vector)] } distances = distances.sort_by{ |x| x[1] }[0..@k].map{|x| x[0]} mode_class(distances) end def mode_class(distances) frequency = distances.inject(Hash.new(0)){ |h,v| h[v]+=1; h } max = distances.max_by { |v| frequency[v] } max end def euclid_distance(v1, v2) raise 'Vectors different length' if v1.length != v2.length dis = Math.sqrt(v1.each_with_index.map { |x,i| (v1[i]-v2[i])**2 }.inject(0) {|sum,x| sum + x } ) dis end end knn = KNN.new('CA2data.txt') result = knn.tests(9) puts "k = #{9} - correct = #{result}"