Want more features on Pastebin? Sign Up, it's FREE!
Guest

Untitled

By: a guest on Oct 9th, 2012  |  syntax: None  |  size: 9.85 KB  |  views: 251  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. # 6.00 Problem Set 5
  2. # RSS Feed Filter
  3.  
  4. import feedparser
  5. import string
  6. import time
  7. from project_util import translate_html
  8. from news_gui import Popup
  9.  
  10. #-----------------------------------------------------------------------
  11. #
  12. # Problem Set 5
  13.  
  14. #======================
  15. # Code for retrieving and parsing
  16. # Google and Yahoo News feeds
  17. # Do not change this code
  18. #======================
  19.  
  20. def process(url):
  21.     """
  22.     Fetches news items from the rss url and parses them.
  23.     Returns a list of NewsStory-s.
  24.     """
  25.     feed = feedparser.parse(url)
  26.     entries = feed.entries
  27.     ret = []
  28.     for entry in entries:
  29.         guid = entry.guid
  30.         title = translate_html(entry.title)
  31.         link = entry.link
  32.         summary = translate_html(entry.summary)
  33.         try:
  34.             subject = translate_html(entry.tags[0]['term'])
  35.         except AttributeError:
  36.             subject = ""
  37.         newsStory = NewsStory(guid, title, subject, summary, link)
  38.         ret.append(newsStory)
  39.     return ret
  40.  
  41. #======================
  42. # Part 1
  43. # Data structure design
  44. #======================
  45.  
  46. # Problem 1
  47.  
  48. # TODO: NewsStory
  49.  
  50. class NewsStory(object):
  51.     '''
  52.     Get to store guid, title, subject, summary, link.
  53.     '''
  54.     #initialize the object with the imformation that parsed
  55.  
  56.     def __init__(self, guid, title, subject, summary, link):
  57.         self.guid = guid
  58.         self.title = title
  59.         self.subject = subject
  60.         self.summary = summary
  61.         self.link = link
  62.     #provide functions to access the information we stored
  63.     def get_guid(self):
  64.         return self.guid
  65.    
  66.     def get_title(self):
  67.         return self.title
  68.  
  69.     def get_subject(self):
  70.         return self.subject
  71.  
  72.     def get_summary(self):
  73.         return self.summary
  74.  
  75.     def get_link(self):
  76.         return self.link
  77.  
  78. #======================
  79. # Part 2
  80. # Triggers
  81. #======================
  82.  
  83. class Trigger(object):
  84.     def evaluate(self, story):
  85.         """
  86.         Returns True if an alert should be generated
  87.         for the given news item, or False otherwise.
  88.         """
  89.         raise NotImplementedError
  90.  
  91. # Whole Word Triggers
  92. # Problems 2-5
  93.  
  94. # TODO: WordTrigger
  95. import string
  96. class WordTrigger(Trigger):
  97.     #store the word that wants to be triggered
  98.     def __init__(self,word):
  99.         self.word = word
  100.  
  101.     #test if the word in text  
  102.     def is_word_in(self,text):
  103.  
  104.         textlist = text.split(' ')
  105.         #check all punctuation in the textlist
  106.         #textlist should be the inner loop, because textlist.remove will allert each time inner loop runs
  107.         for punc in string.punctuation:
  108.             for i in textlist:
  109.                 if punc in i:
  110.                     textlist.extend(i.split(punc))
  111.                     textlist.remove(i)
  112.         return self.word in textlist
  113.  
  114. class TitleTrigger(WordTrigger):
  115.     def __init__(self,word):
  116.         WordTrigger.__init__(self,word.lower())
  117.     def evaluate (self,story):
  118.         title = story.title.lower()
  119.         return WordTrigger.is_word_in(self,title)
  120.    
  121.  
  122. # TODO: SubjectTrigger
  123. class SubjectTrigger(WordTrigger):
  124.     def __init__(self, word):
  125.         WordTrigger.__init__(self,word.lower())
  126.  
  127.     def evaluate (self, story):
  128.         subject = story.subject.lower()
  129.         return WordTrigger.is_word_in(self,subject)
  130.        
  131.  
  132.    
  133. # TODO: SummaryTrigger
  134. class SummaryTrigger(WordTrigger):
  135.     def __init__(self,word):
  136.         WordTrigger.__init__(self,word.lower())
  137.     def evaluate (self,story):
  138.         summary = story.summary.lower()
  139. ##        return WordTrigger.is_word_in(self,summary)
  140.         return self.is_word_in(summary)
  141.  
  142. # Composite Triggers
  143. # Problems 6-8
  144.  
  145. # TODO: NotTrigger
  146. class NotTrigger(WordTrigger):
  147.     def __init__(self,trigger):
  148.         self.trigger = trigger
  149.        
  150.     def evaluate (self, story):
  151.  
  152.         return not self.trigger.evaluate(story)
  153.  
  154.  
  155.  
  156.    
  157. # TODO: AndTrigger
  158. class AndTrigger(WordTrigger):
  159.     #take in two triggers
  160.     def __init__(self,trigger1,trigger2):
  161.         self.trigger1 = trigger1
  162.         self.trigger2 = trigger2
  163.     #return True only if both triggers individually return True
  164.     def evaluate (self, story):
  165.         return self.trigger1.evaluate(story) and self.trigger2.evaluate(story)
  166.    
  167.  
  168. # TODO: OrTrigger
  169. class OrTrigger(WordTrigger):
  170.     #take in two triggers
  171.     def __init__(self, trigger1, trigger2):
  172.         self.trigger1 = trigger1
  173.         self.trigger2 = trigger2
  174.     #return True if either trigger returns True
  175.     def evaluate(self,story):
  176.         return self.trigger1.evaluate(story) or self.trigger2.evaluate(story)
  177.  
  178.  
  179. # Phrase Trigger
  180. # Question 9
  181.  
  182. # TODO: PhraseTrigger
  183. class PhraseTrigger(WordTrigger):
  184.     def __init__(self, phrase):
  185.         self.word = phrase
  186.  
  187.     def is_phrase_in(self,text):
  188.         if self.word in text:
  189.             return True
  190.         else:
  191.             return False
  192.     def evaluate (self, story):
  193.         return self.is_phrase_in(story.subject) or self.is_phrase_in(story.title) or self.is_phrase_in(story.summary)
  194.  
  195. #======================
  196. # Part 3
  197. # Filtering
  198. #======================
  199.  
  200. def filter_stories(stories, triggerlist):
  201.     """
  202.     Takes in a list of NewsStory-s.
  203.     Returns only those stories for whom
  204.     a trigger in triggerlist fires.
  205.     """
  206.     # TODO: Problem 10
  207.     # This is a placeholder (we're just returning all the stories, with no filtering)
  208.     # Feel free to change this line!
  209.  
  210.     #stories wanted stored here
  211.     stories_wanted = []
  212.     for story in stories:
  213.         for trigger in triggerlist:
  214.             if trigger.evaluate(story):
  215.                 stories_wanted.append(story)
  216.                 break
  217.     return stories_wanted
  218.  
  219. #======================
  220. # Part 4
  221. # User-Specified Triggers
  222. #======================
  223. ##problem not fixed:
  224. ###1 len(line) in my code but len(line)-1 in examples
  225. ###unhandled problem in main_thread
  226.  
  227. def readTriggerConfig(filename):
  228.     """
  229.     Returns a list of trigger objects
  230.     that correspond to the rules set
  231.     in the file filename
  232.     """
  233.     # Here's some code that we give you
  234.     # to read in the file and eliminate
  235.     # blank lines and comments
  236.     triggerfile = open(filename, "r")
  237.     all = [ line.rstrip() for line in triggerfile.readlines() ]
  238.     lines = []
  239.     for line in all:
  240.         if len(line) == 0 or line[0] == '#':
  241.             continue
  242.         lines.append(line)
  243.  
  244.     # TODO: Problem 11
  245.     # 'lines' has a list of lines you need to parse
  246.     # Build a set of triggers from it and
  247.     # return the appropriate ones
  248.  
  249.     #return a triggerlist when finished
  250.     triggerlist = []
  251.     triggerdic ={}
  252.     #parse each line in a list of lists
  253.     for line in lines:
  254.         #line is a string
  255.         line = line.split()
  256.         #line converted into a list of string
  257.         #lines become a list of lists
  258.  
  259.     #find ADD
  260.     for line in lines:
  261.         if 'ADD' in line:
  262.             #parse to find triggers
  263.             for i in range (1,len(line)-1):
  264.                 triggerlist.append(triggerdic[line[i]])
  265.         #title, subject, summary triggers built with one argument
  266.         elif 'SUBJECT' in line:
  267.             #SubjectTrigger object definition
  268.             subject_trigger = SubjectTrigger(line[2])
  269.             #added into the dict{trigger's name:trigger}
  270.             triggerdic[line[0]] = subject_trigger
  271.         elif 'TITLE' in line:
  272.             title_trigger = TitleTrigger(line[2])
  273.             triggerdic[line[0]] = title_trigger
  274.         elif 'SUMMARY' in line:
  275.             summary_trigger = SummaryTrigger(line[2])
  276.             triggerdic[line[0]] = summary_trigger
  277.         elif 'PHRASE' in line:
  278.             #add triggerwords together
  279.             word = ''
  280.             for string in range (2,len(line)-1):
  281.                 word = word +' '+ line[string]
  282.             phrase_trigger = PhraseTrigger(word)
  283.             triggerdic[line[0]] = phrase_trigger
  284.         #not, and or triggers take triggers as arguments
  285.         elif 'AND' in line:
  286.             andtrigger = AndTrigger(triggerdic[line[2]],triggerdic[line[3]])
  287.             triggerdic[line[0]] = andtrigger
  288.         elif 'OR' in line:
  289.             ortrigger = OrTrigger(triggerdic[line[2]],triggerdic[line[3]])
  290.             triggerdic[line[0]] = ortrigger
  291.         elif ' NOT' in line:
  292.             not_trigger = NotTrigger(triggerdic[line[2]])
  293.             triggerdic[line[0]] = not_trigger
  294.  
  295.     return triggerlist
  296.        
  297.    
  298. import thread
  299.  
  300. def main_thread(p):
  301.     # A sample trigger list - you'll replace
  302.     # this with something more configurable in Problem 11
  303. ##    t1 = SubjectTrigger("Obama")
  304. ##    t2 = SummaryTrigger("MIT")
  305. ##    t3 = PhraseTrigger("Supreme Court")
  306. ##    t4 = OrTrigger(t2, t3)
  307. ##    triggerlist = [t1, t4]
  308.    
  309.     # TODO: Problem 11
  310.     # After implementing readTriggerConfig, uncomment this line
  311.     triggerlist = readTriggerConfig("triggers.txt")
  312.     print triggerlist
  313.     guidShown = []
  314.    
  315.     while True:
  316.         print "Polling..."
  317.  
  318.         # Get stories from Google's Top Stories RSS news feed
  319.         stories = process("http://news.google.com/?output=rss")
  320.         # Get stories from Yahoo's Top Stories RSS news feed
  321.         stories.extend(process("http://rss.news.yahoo.com/rss/topstories"))
  322.  
  323.         # Only select stories we're interested in
  324.         stories = filter_stories(stories, triggerlist)
  325.    
  326.         # Don't print a story if we have already printed it before
  327.         newstories = []
  328.         for story in stories:
  329.             if story.get_guid() not in guidShown:
  330.                 newstories.append(story)
  331.        
  332.         for story in newstories:
  333.             guidShown.append(story.get_guid())
  334.             p.newWindow(story)
  335.  
  336.         print "Sleeping..."
  337.         time.sleep(SLEEPTIME)
  338.  
  339. SLEEPTIME = 60 #seconds -- how often we poll
  340. if __name__ == '__main__':
  341.     p = Popup()
  342.     thread.start_new_thread(main_thread, (p,))
  343.     p.start()
clone this paste RAW Paste Data