Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import logging, gensim, bz2
- logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
- # load id->word mapping (the dictionary), one of the results of step 2 above
- id2word = gensim.corpora.Dictionary.load_from_text('wiki_en_wordids.txt')
- # load corpus iterator
- mm = gensim.corpora.MmCorpus('wiki_en_tfidf.mm')
- # mm = gensim.corpora.MmCorpus(bz2.BZ2File('wiki_en_tfidf.mm.bz2')) # use
- #this if you compressed the TFIDF output (recommended)
- print(mm)
- 45549 aa 18622
- 76459 aaa 9951
- 90499 aaaa 953
- 90492 aaas 901
- 76461 aab 1101
- 76460 aac 1817
- [...]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement