lalkaed

MovieRate

Sep 3rd, 2018
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.37 KB | None | 0 0
  1. from movies import training_set, training_labels, validation_set, validation_labels
  2.  
  3. def distance(movie1, movie2):
  4.   squared_difference = 0
  5.   for i in range(len(movie1)):
  6.     squared_difference += (movie1[i] - movie2[i]) ** 2
  7.   final_distance = squared_difference ** 0.5
  8.   return final_distance
  9.  
  10. def classify(unknown, dataset, labels, k):
  11.   distances = []
  12.   #Looping through all points in the dataset
  13.   for title in dataset:
  14.     movie = dataset[title]
  15.     distance_to_point = distance(movie, unknown)
  16.     #Adding the distance and point associated with that distance
  17.     distances.append([distance_to_point, title])
  18.   distances.sort()
  19.   #Taking only the k closest points
  20.   neighbors = distances[0:k]
  21.   num_good = 0
  22.   num_bad = 0
  23.   for neighbor in neighbors:
  24.     title = neighbor[1]
  25.     if labels[title] == 0:
  26.       num_bad += 1
  27.     elif labels[title] == 1:
  28.       num_good += 1
  29.   if num_good > num_bad:
  30.     return 1
  31.   else:
  32.     return 0
  33.  
  34.  
  35. def find_validation_accuracy(training_set, training_labels, validation_set, validation_labels, k):
  36.   num_correct = .0
  37.   for mov in validation_set:
  38.     guess = classify(validation_set[mov],training_set, training_labels, k)
  39.     if guess == validation_labels[mov]:
  40.       num_correct += 1
  41.   return num_correct/len(validation_set)
  42. print(find_validation_accuracy(training_set, training_labels, validation_set, validation_labels,3))
Add Comment
Please, Sign In to add comment