Untitled

# Import all the dependencies
import nltk
from gensim.models.doc2vec import Doc2Vec, TaggedDocument

# nltk.download('punkt')

data = ["I love machine learning. Its awesome.",
        "I love coding in python",
        "I love building chatbots",
        "they chat amagingly well"]

tagged_data = [TaggedDocument(words=nltk.word_tokenize(element.lower()), tags=[str(i)])
               for i, element in enumerate(data)]

max_epochs = 100  # Number of iterations (epochs) over the corpus.
vec_size = 20  # Dimensionality of the feature vectors.
alpha = 0.025  # The initial learning rate.

model = Doc2Vec(vector_size=vec_size,
                alpha=alpha,
                min_alpha=0.00025,
                min_count=1,
                dm=1)
model.build_vocab(tagged_data)

for epoch in range(max_epochs):
    print('iteration {0}'.format(epoch))
    model.train(tagged_data,
                total_examples=model.corpus_count,
                epochs=model.epochs)
    # decrease the learning rate
    model.alpha -= 0.0002
    # fix the learning rate, no decay
    model.min_alpha = model.alpha

model.save("d2v.model")
print("Model Saved")