Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from gensim import corpora, models
- import numpy.random
- numpy.random.seed(10)
- doc0 = [(0, 1), (1, 1)]
- doc1 = [(0,1)]
- doc2 = [(0, 1), (1, 1)]
- doc3 = [(0, 3), (1, 1)]
- corpus = [doc0,doc1,doc2,doc3]
- dictionary = corpora.Dictionary(corpus)
- tfidf = models.TfidfModel(corpus)
- corpus_tfidf = tfidf[corpus]
- corpus_tfidf.save('x.corpus_tfidf')
- corpus_tfidf = corpora.MmCorpus.load('x.corpus_tfidf')
- lda = models.ldamodel.LdaModel(corpus_tfidf, id2word=dictionary, num_topics=2)
- #which one i should use from this
- **corpus_lda = lda[corpus]** #this one
- **corpus_LDA = lda[corpus_tfidf ]** #or this one?
- corpus_lda.save('x.corpus_lda')
- for i,j in enumerate(corpus_lda):
- print j, corpus[i]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement