Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from math import sqrt
- from collections import Counter
- def forSearch():
- words = {'bit':{1:3,2:4,3:19,4:0},'shoe':{1:0,2:0,3:0,4:0},'dog':{1:3,2:0,3:4,4:5}, 'red':{1:0,2:0,3:15,4:0}}
- search = {'bit':1,'dog':3,'shoe':5}
- num_files = 4 # TODO: , figure this out programatically
- file_relevancy = Counter()
- c = sqrt(sum([x**2 for x in search.values()]))
- for i in range(1, num_files+1):
- words_ith_val = [words[x][i] for x in search.keys() ]
- a = sum([search[key] * words[key][i] for key in search.keys()])
- b = sqrt(sum([x**2 for x in words_ith_val]))
- file_relevancy[i] = (a / (b * c))
- return [x[0] for x in file_relevancy.most_common(num_files)]
- print forSearch()
- for i in range(1, num_files+1):
- if corresponding key in words cannot be found
- insert it and make its value = 0
- words_ith_val = [words[x][i] for x in search.keys() ]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement