Advertisement
Guest User

Untitled

a guest
Mar 25th, 2017
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.68 KB | None | 0 0
  1. from gensim.models import Phrases
  2. documents = ["the mayor of new york was there", "machine learning can be useful sometimes","new york mayor was present"]
  3.  
  4. sentence_stream = [doc.split(" ") for doc in documents]
  5. bigram = Phrases(sentence_stream, min_count=1, threshold=2)
  6. sent = [u'the', u'mayor', u'of', u'new', u'york', u'was', u'there']
  7. print(bigram[sent])
  8.  
  9. [u'the', u'mayor', u'of', u'new_york', u'was', u'there']
  10.  
  11. # read txt documents
  12. os.chdir('text_data')
  13. documents = []
  14. for file in glob.glob("*.txt"): # read all txt files in working directory
  15. file_content = open(file, "r")
  16. lines = file_content.read().splitlines()
  17. for line in lines:
  18. documents.append(line)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement