Advertisement
Artashes

Untitled

Apr 6th, 2019
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.06 KB | None | 0 0
  1. from gensim.models import Word2Vec
  2.  
  3. import json
  4.  
  5. # define training data
  6. # sentences = [['this', 'is', 'the', 'first', 'sentence', 'for', 'word2vec'],
  7. #           ['this', 'is', 'the', 'second', 'sentence'],
  8. #           ['yet', 'another', 'sentence'],
  9. #           ['one', 'more', 'sentence'],
  10. #           ['one', 'another', 'sent'],
  11. #           ['and', 'the', 'final', 'sentence']]
  12. f = open('text.txt', 'r')
  13. text = f.readlines()
  14.  
  15. sentences2 = []
  16. sentences2.append(text[0].split(' '))
  17. sentences2.append(text[1].split(' '))
  18. sentences2.append(text[3].split(' '))
  19. sentences2.append(text[5].split(' '))
  20.  
  21. textRes = text[0].split(' ')
  22.  
  23.  
  24. # sentences = [['первое', 'первое', 'первое', 'первое', 'первое', 'первое', 'первое'],
  25. #              ['второе', 'второе', 'второе', 'второе', 'второе']]
  26.  
  27. model = Word2Vec(sentences2, min_count=1)
  28. #
  29. words = list(model.wv.vocab)
  30. #
  31. model.save('model.bin')
  32.  
  33. new_model = Word2Vec.load('model.bin')
  34. # print(new_model)
  35.  
  36. print(new_model.most_similar(positive=['будущее'], topn=10))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement