Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- def text2vec(doc_tok, model, dim=300):
- doc_embedding = np.zeros(dim)
- valid_words = 0
- for word in doc_tok:
- if word in model:
- valid_words += 1
- doc_embedding += model.query(word)
- else:
- continue
- if valid_words > 0:
- return doc_embedding / valid_words
- else:
- return doc_embedding
- def get_docs_embedding(docs_tok, model, dim=300):
- all_docs_embedding = []
- for doc in docs_tok:
- all_docs_embedding.append(text2vec(doc, model, dim))
- return np.array(all_docs_embedding)
- headlines_embedding = get_docs_embedding(docs_tok=data['processed_headlines'], model=em_model)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement