Advertisement
Guest User

Untitled

a guest
Oct 1st, 2016
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.80 KB | None | 0 0
  1. import math
  2.  
  3. def tf(T, di):
  4. return di.count(T)
  5.  
  6. def idf(T, D):
  7. return math.log10(float(T)/D) if D != 0 else 0
  8.  
  9. if __name__ == '__main__':
  10. appearance = 0
  11. strings = {}
  12.  
  13. #read the input
  14. num_lines = input()
  15. for i in range(num_lines):
  16. temp = raw_input()
  17. strings[temp] = {}
  18. strings[temp]['documentnum'] = i + 1;
  19. search_term = raw_input()
  20.  
  21. #calculate tf for each document and find number of documents in which the search term appears
  22. for sentence in strings:
  23. strings[sentence]['tf_val'] = tf(search_term, sentence)
  24. if search_term in sentence:
  25. appearance = appearance + 1
  26.  
  27. #calculate once, value is constant
  28. idf_val = idf(num_lines, appearance)
  29. for sentence in strings:
  30. print str(strings[sentence]['documentnum']) + '\t' + str((strings[sentence]['tf_val'] * idf_val))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement