Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from movies import training_set, training_labels, validation_set, validation_labels
- def distance(movie1, movie2):
- squared_difference = 0
- for i in range(len(movie1)):
- squared_difference += (movie1[i] - movie2[i]) ** 2
- final_distance = squared_difference ** 0.5
- return final_distance
- def classify(unknown, dataset, labels, k):
- distances = []
- #Looping through all points in the dataset
- for title in dataset:
- movie = dataset[title]
- distance_to_point = distance(movie, unknown)
- #Adding the distance and point associated with that distance
- distances.append([distance_to_point, title])
- distances.sort()
- #Taking only the k closest points
- neighbors = distances[0:k]
- num_good = 0
- num_bad = 0
- for neighbor in neighbors:
- title = neighbor[1]
- if labels[title] == 0:
- num_bad += 1
- elif labels[title] == 1:
- num_good += 1
- if num_good > num_bad:
- return 1
- else:
- return 0
- def find_validation_accuracy(training_set, training_labels, validation_set, validation_labels, k):
- num_correct = .0
- for mov in validation_set:
- guess = classify(validation_set[mov],training_set, training_labels, k)
- if guess == validation_labels[mov]:
- num_correct += 1
- return num_correct/len(validation_set)
- print(find_validation_accuracy(training_set, training_labels, validation_set, validation_labels,3))
Add Comment
Please, Sign In to add comment