Advertisement
Guest User

sumyapi.py

a guest
Jun 16th, 2015
26
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.86 KB | None | 0 0
  1. from __future__ import absolute_import
  2. from __future__ import division, print_function, unicode_literals
  3.  
  4. from sumy.parsers.html import HtmlParser
  5. from sumy.parsers.plaintext import PlaintextParser
  6. from sumy.nlp.tokenizers import Tokenizer
  7. from sumy.summarizers.lex_rank import LexRankSummarizer
  8. from sumy.summarizers.lsa import LsaSummarizer
  9. from sumy.summarizers.luhn import LuhnSummarizer
  10. from sumy.summarizers.random import RandomSummarizer
  11. from sumy.summarizers.text_rank import TextRankSummarizer
  12.  
  13. from sumy.nlp.stemmers import Stemmer
  14. from sumy.utils import get_stop_words
  15. import sys
  16. import locale
  17.  
  18.  
  19. # arguments of sumyapi.py call
  20. # 1: chosen summarizer [lex_rank|lsa|luhn|random|text_rank], default lsa
  21. # 2: language, default [german]
  22. # 3: target summary length in sentences, default [3]
  23. # 4: text to summarize
  24.  
  25. if __name__ == "__main__":
  26.  
  27.     LANGUAGE = str(sys.argv[2])
  28.     decode = sys.argv[4].decode(locale.getpreferredencoding())# decode whatever java probably(!) encoded as
  29.     encode = decode.encode("utf-8", "ignore")# make python compliant
  30.  
  31.     parser = PlaintextParser.from_string(encode, Tokenizer(LANGUAGE))
  32.     stemmer = Stemmer(LANGUAGE)
  33.  
  34.     if str(sys.argv[1]) == 'lex_rank':
  35.         summarizer = LexRankSummarizer(stemmer)
  36.     elif str(sys.argv[1]) == 'lsa':
  37.         summarizer = LsaSummarizer(stemmer)
  38.     elif str(sys.argv[1]) == 'luhn':
  39.         summarizer = LuhnSummarizer(stemmer)
  40.     elif str(sys.argv[1]) == 'random':
  41.         summarizer = RandomSummarizer(stemmer)
  42.     elif str(sys.argv[1]) == 'text_rank':
  43.         summarizer = TextRankSummarizer(stemmer)
  44.     else:
  45.         summarizer = LsaSummarizer(stemmer)
  46.  
  47.     summarizer.stop_words = get_stop_words(LANGUAGE)
  48.  
  49.     for sentence in summarizer(parser.document, int(sys.argv[3])):
  50.         print(sentence)# note python prints in utf - 8, make sure java reads stream as such
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement