Advertisement
Guest User

Untitled

a guest
Jun 18th, 2011
405
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.53 KB | None | 0 0
  1. import gensim
  2. import sqlite3bowcorpus
  3.  
  4. class LsiWrapper(object):
  5.  
  6.     def __init__(self, corpus, lsi_model_file, num_topics):
  7.         self.corpus = corpus
  8.         self.lsi_model_file = lsi_model_file
  9.         self.lsimodel = gensim.models.lsimodel.LsiModel(id2word = self.corpus.tid_to_term, numTopics = num_topics)
  10.         self.lsimodel.load(lsi_model_file)
  11.  
  12.     def topic_space(self, text):
  13.         '''Compute the topic space representation for bag of words representation,
  14.        bow.
  15.        '''
  16.         bow = self.corpus.text2bow(text)
  17.         tranformation = self.lsimodel[[bow]]
  18.         return list(tranformation[0][0])
  19.  
  20. def compute_lsi(dbname, corpus_model_file, num_topics, savefile):
  21.     corpus = sqlite3bowcorpus.Sqlite3BowCorpus(dbname, corpus_model_file)
  22.     corpus.request_documents_cached()
  23.     lsimodel = gensim.models.lsimodel.LsiModel(corpus, num_topics, corpus.tid_to_term)
  24.     lsimodel.save(savefile)
  25.  
  26. def compute_lsi_main():
  27.     dbname = sys.argv[1]
  28.     corpus_model_file = sys.argv[2]
  29.     num_topics = int(sys.argv[3])
  30.     savefile = sys.argv[4]
  31.     compute_lsi(dbname, corpus_model_file, num_topics, savefile)
  32.  
  33. def print_lsi_main():
  34.     dbname = sys.argv[1]
  35.     corpus_model_file = sys.argv[2]
  36.     num_topics = int(sys.argv[3])
  37.     lsi_model_file = sys.argv[4]
  38.     corpus = sqlite3bowcorpus.Sqlite3BowCorpus(dbname, corpus_model_file)
  39.     lsi_model = LsiWrapper(corpus, lsi_model_file, num_topics)
  40.     lsi_model.topic_space(sys.argv[5])
  41.  
  42. if __name__ == '__main__':
  43.     import sys
  44.     print_lsi_main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement