Advertisement
Guest User

Untitled

a guest
Mar 28th, 2017
46
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.48 KB | None | 0 0
  1. from sklearn.feature_extraction.text import TfidfVectorizer
  2. from scipy.spatial.distance import cosine
  3. f = open('input', 'r', -1, 'utf-8')
  4. data = f.read()
  5. vect = TfidfVectorizer(data)
  6. hg=data.split('\n')
  7. b=hg[0]
  8. a=hg[1:]
  9.  
  10. vect=vect.fit(raw_documents=data.split('\n'))
  11. b=vect.transform([b]).toarray()
  12. a=vect.transform(a).toarray()
  13. smallest=1.01
  14.  
  15. ind = 0
  16. for i in range(len(a)):
  17. if cosine(b[0],a[i])<smallest:
  18. ind=i
  19. smallest=cosine(b[0],a[i])
  20. print(ind+1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement