Advertisement
Guest User

Untitled

a guest
Nov 26th, 2014
163
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.69 KB | None | 0 0
  1. from gensim import corpora, models
  2. import numpy.random
  3. numpy.random.seed(10)
  4.  
  5. doc0 = [(0, 1), (1, 1)]
  6. doc1 = [(0,1)]
  7. doc2 = [(0, 1), (1, 1)]
  8. doc3 = [(0, 3), (1, 1)]
  9.  
  10. corpus = [doc0,doc1,doc2,doc3]
  11. dictionary = corpora.Dictionary(corpus)
  12.  
  13. tfidf = models.TfidfModel(corpus)
  14. corpus_tfidf = tfidf[corpus]
  15. corpus_tfidf.save('x.corpus_tfidf')
  16.  
  17. corpus_tfidf = corpora.MmCorpus.load('x.corpus_tfidf')
  18.  
  19. lda = models.ldamodel.LdaModel(corpus_tfidf, id2word=dictionary, num_topics=2)
  20.  
  21. #which one i should use from this
  22. **corpus_lda = lda[corpus]** #this one
  23. **corpus_LDA = lda[corpus_tfidf ]** #or this one?
  24.  
  25.  
  26. corpus_lda.save('x.corpus_lda')
  27.  
  28. for i,j in enumerate(corpus_lda):
  29. print j, corpus[i]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement