Advertisement
Guest User

Untitled

a guest
Oct 18th, 2019
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.68 KB | None | 0 0
  1. import numpy as np
  2.  
  3. def text2vec(doc_tok, model, dim=300):
  4. doc_embedding = np.zeros(dim)
  5. valid_words = 0
  6. for word in doc_tok:
  7. if word in model:
  8. valid_words += 1
  9. doc_embedding += model.query(word)
  10. else:
  11. continue
  12. if valid_words > 0:
  13. return doc_embedding / valid_words
  14. else:
  15. return doc_embedding
  16.  
  17. def get_docs_embedding(docs_tok, model, dim=300):
  18. all_docs_embedding = []
  19. for doc in docs_tok:
  20. all_docs_embedding.append(text2vec(doc, model, dim))
  21. return np.array(all_docs_embedding)
  22.  
  23. headlines_embedding = get_docs_embedding(docs_tok=data['processed_headlines'], model=em_model)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement