Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import math
- def tf(T, di):
- return di.count(T)
- def idf(T, D):
- return math.log10(float(T)/D) if D != 0 else 0
- if __name__ == '__main__':
- appearance = 0
- strings = {}
- #read the input
- num_lines = input()
- for i in range(num_lines):
- temp = raw_input()
- strings[temp] = {}
- strings[temp]['documentnum'] = i + 1;
- search_term = raw_input()
- #calculate tf for each document and find number of documents in which the search term appears
- for sentence in strings:
- strings[sentence]['tf_val'] = tf(search_term, sentence)
- if search_term in sentence:
- appearance = appearance + 1
- #calculate once, value is constant
- idf_val = idf(num_lines, appearance)
- for sentence in strings:
- print str(strings[sentence]['documentnum']) + '\t' + str((strings[sentence]['tf_val'] * idf_val))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement