Advertisement
Guest User

Untitled

a guest
Aug 17th, 2019
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.12 KB | None | 0 0
  1. # Import all the dependencies
  2. import nltk
  3. from gensim.models.doc2vec import Doc2Vec, TaggedDocument
  4.  
  5. # nltk.download('punkt')
  6.  
  7. data = ["I love machine learning. Its awesome.",
  8. "I love coding in python",
  9. "I love building chatbots",
  10. "they chat amagingly well"]
  11.  
  12. tagged_data = [TaggedDocument(words=nltk.word_tokenize(element.lower()), tags=[str(i)])
  13. for i, element in enumerate(data)]
  14.  
  15. max_epochs = 100 # Number of iterations (epochs) over the corpus.
  16. vec_size = 20 # Dimensionality of the feature vectors.
  17. alpha = 0.025 # The initial learning rate.
  18.  
  19. model = Doc2Vec(vector_size=vec_size,
  20. alpha=alpha,
  21. min_alpha=0.00025,
  22. min_count=1,
  23. dm=1)
  24. model.build_vocab(tagged_data)
  25.  
  26. for epoch in range(max_epochs):
  27. print('iteration {0}'.format(epoch))
  28. model.train(tagged_data,
  29. total_examples=model.corpus_count,
  30. epochs=model.epochs)
  31. # decrease the learning rate
  32. model.alpha -= 0.0002
  33. # fix the learning rate, no decay
  34. model.min_alpha = model.alpha
  35.  
  36. model.save("d2v.model")
  37. print("Model Saved")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement