Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.feature_extraction.text import TfidfVectorizer
- from scipy.spatial.distance import cosine
- f = open('input', 'r', -1, 'utf-8')
- data = f.read()
- vect = TfidfVectorizer(data)
- hg=data.split('\n')
- b=hg[0]
- a=hg[1:]
- vect=vect.fit(raw_documents=data.split('\n'))
- b=vect.transform([b]).toarray()
- a=vect.transform(a).toarray()
- smallest=1.01
- ind = 0
- for i in range(len(a)):
- if cosine(b[0],a[i])<smallest:
- ind=i
- smallest=cosine(b[0],a[i])
- print(ind+1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement