Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Import all the dependencies
- import nltk
- from gensim.models.doc2vec import Doc2Vec, TaggedDocument
- # nltk.download('punkt')
- data = ["I love machine learning. Its awesome.",
- "I love coding in python",
- "I love building chatbots",
- "they chat amagingly well"]
- tagged_data = [TaggedDocument(words=nltk.word_tokenize(element.lower()), tags=[str(i)])
- for i, element in enumerate(data)]
- max_epochs = 100 # Number of iterations (epochs) over the corpus.
- vec_size = 20 # Dimensionality of the feature vectors.
- alpha = 0.025 # The initial learning rate.
- model = Doc2Vec(vector_size=vec_size,
- alpha=alpha,
- min_alpha=0.00025,
- min_count=1,
- dm=1)
- model.build_vocab(tagged_data)
- for epoch in range(max_epochs):
- print('iteration {0}'.format(epoch))
- model.train(tagged_data,
- total_examples=model.corpus_count,
- epochs=model.epochs)
- # decrease the learning rate
- model.alpha -= 0.0002
- # fix the learning rate, no decay
- model.min_alpha = model.alpha
- model.save("d2v.model")
- print("Model Saved")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement