Advertisement
Guest User

Untitled

a guest
Sep 5th, 2017
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.16 KB | None | 0 0
  1. import math
  2. import sys
  3.  
  4. if len(sys.argv) != 3:
  5. print "Please specify two terms as argument!";
  6. sys.exit()
  7.  
  8. def cosine_similarity(v1,v2):
  9. sumxx, sumxy, sumyy = 0, 0, 0
  10. for i in range(len(v1)):
  11. x = v1[i]; y = v2[i]
  12. sumxx += x*x
  13. sumyy += y*y
  14. sumxy += x*y
  15. return sumxy/math.sqrt(sumxx*sumyy)
  16.  
  17. # Create a vocabulary using all the terms
  18. vocabulary = {}
  19. with open("types.txt") as fp:
  20. for i, line in enumerate(fp):
  21. vocabulary[line.rstrip()] = i
  22.  
  23. term1 = sys.argv[1]
  24. term2 = sys.argv[2]
  25.  
  26. if term1 not in vocabulary or term2 not in vocabulary:
  27. print 0
  28. sys.exit()
  29.  
  30. term1_index = vocabulary[term1]
  31. term2_index = vocabulary[term2]
  32.  
  33. highest_index = term1_index
  34. if term2_index > term1_index:
  35. highest_index = term2_index
  36.  
  37. v1 = []
  38. v2 = []
  39.  
  40. with open("vectors.txt") as fp:
  41. for i, line in enumerate(fp):
  42. if i == term1_index:
  43. for word in line.rstrip().split():
  44. v1.append(float(word))
  45. elif i == term2_index:
  46. for word in line.rstrip().split():
  47. v2.append(float(word))
  48. elif i > highest_index:
  49. break
  50.  
  51. print cosine_similarity(v1,v2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement