Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import gensim
- import sqlite3bowcorpus
- class LsiWrapper(object):
- def __init__(self, corpus, lsi_model_file, num_topics):
- self.corpus = corpus
- self.lsi_model_file = lsi_model_file
- self.lsimodel = gensim.models.lsimodel.LsiModel(id2word = self.corpus.tid_to_term, numTopics = num_topics)
- self.lsimodel.load(lsi_model_file)
- def topic_space(self, text):
- '''Compute the topic space representation for bag of words representation,
- bow.
- '''
- bow = self.corpus.text2bow(text)
- tranformation = self.lsimodel[[bow]]
- return list(tranformation[0][0])
- def compute_lsi(dbname, corpus_model_file, num_topics, savefile):
- corpus = sqlite3bowcorpus.Sqlite3BowCorpus(dbname, corpus_model_file)
- corpus.request_documents_cached()
- lsimodel = gensim.models.lsimodel.LsiModel(corpus, num_topics, corpus.tid_to_term)
- lsimodel.save(savefile)
- def compute_lsi_main():
- dbname = sys.argv[1]
- corpus_model_file = sys.argv[2]
- num_topics = int(sys.argv[3])
- savefile = sys.argv[4]
- compute_lsi(dbname, corpus_model_file, num_topics, savefile)
- def print_lsi_main():
- dbname = sys.argv[1]
- corpus_model_file = sys.argv[2]
- num_topics = int(sys.argv[3])
- lsi_model_file = sys.argv[4]
- corpus = sqlite3bowcorpus.Sqlite3BowCorpus(dbname, corpus_model_file)
- lsi_model = LsiWrapper(corpus, lsi_model_file, num_topics)
- lsi_model.topic_space(sys.argv[5])
- if __name__ == '__main__':
- import sys
- print_lsi_main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement