# 6.00 Problem Set 5 # RSS Feed Filter import feedparser import string import time from project_util import translate_html from news_gui import Popup #----------------------------------------------------------------------- # # Problem Set 5 #====================== # Code for retrieving and parsing # Google and Yahoo News feeds # Do not change this code #====================== def process(url): """ Fetches news items from the rss url and parses them. Returns a list of NewsStory-s. """ feed = feedparser.parse(url) entries = feed.entries ret = [] for entry in entries: guid = entry.guid title = translate_html(entry.title) link = entry.link summary = translate_html(entry.summary) try: subject = translate_html(entry.tags[0]['term']) except AttributeError: subject = "" newsStory = NewsStory(guid, title, subject, summary, link) ret.append(newsStory) return ret #====================== # Part 1 # Data structure design #====================== # Problem 1 # TODO: NewsStory class NewsStory(object): ''' Get to store guid, title, subject, summary, link. ''' #initialize the object with the imformation that parsed def __init__(self, guid, title, subject, summary, link): self.guid = guid self.title = title self.subject = subject self.summary = summary self.link = link #provide functions to access the information we stored def get_guid(self): return self.guid def get_title(self): return self.title def get_subject(self): return self.subject def get_summary(self): return self.summary def get_link(self): return self.link #====================== # Part 2 # Triggers #====================== class Trigger(object): def evaluate(self, story): """ Returns True if an alert should be generated for the given news item, or False otherwise. """ raise NotImplementedError # Whole Word Triggers # Problems 2-5 # TODO: WordTrigger import string class WordTrigger(Trigger): #store the word that wants to be triggered def __init__(self,word): self.word = word #test if the word in text def is_word_in(self,text): textlist = text.split(' ') #check all punctuation in the textlist #textlist should be the inner loop, because textlist.remove will allert each time inner loop runs for punc in string.punctuation: for i in textlist: if punc in i: textlist.extend(i.split(punc)) textlist.remove(i) return self.word in textlist class TitleTrigger(WordTrigger): def __init__(self,word): WordTrigger.__init__(self,word.lower()) def evaluate (self,story): title = story.title.lower() return WordTrigger.is_word_in(self,title) # TODO: SubjectTrigger class SubjectTrigger(WordTrigger): def __init__(self, word): WordTrigger.__init__(self,word.lower()) def evaluate (self, story): subject = story.subject.lower() return WordTrigger.is_word_in(self,subject) # TODO: SummaryTrigger class SummaryTrigger(WordTrigger): def __init__(self,word): WordTrigger.__init__(self,word.lower()) def evaluate (self,story): summary = story.summary.lower() ## return WordTrigger.is_word_in(self,summary) return self.is_word_in(summary) # Composite Triggers # Problems 6-8 # TODO: NotTrigger class NotTrigger(WordTrigger): def __init__(self,trigger): self.trigger = trigger def evaluate (self, story): return not self.trigger.evaluate(story) # TODO: AndTrigger class AndTrigger(WordTrigger): #take in two triggers def __init__(self,trigger1,trigger2): self.trigger1 = trigger1 self.trigger2 = trigger2 #return True only if both triggers individually return True def evaluate (self, story): return self.trigger1.evaluate(story) and self.trigger2.evaluate(story) # TODO: OrTrigger class OrTrigger(WordTrigger): #take in two triggers def __init__(self, trigger1, trigger2): self.trigger1 = trigger1 self.trigger2 = trigger2 #return True if either trigger returns True def evaluate(self,story): return self.trigger1.evaluate(story) or self.trigger2.evaluate(story) # Phrase Trigger # Question 9 # TODO: PhraseTrigger class PhraseTrigger(WordTrigger): def __init__(self, phrase): self.word = phrase def is_phrase_in(self,text): if self.word in text: return True else: return False def evaluate (self, story): return self.is_phrase_in(story.subject) or self.is_phrase_in(story.title) or self.is_phrase_in(story.summary) #====================== # Part 3 # Filtering #====================== def filter_stories(stories, triggerlist): """ Takes in a list of NewsStory-s. Returns only those stories for whom a trigger in triggerlist fires. """ # TODO: Problem 10 # This is a placeholder (we're just returning all the stories, with no filtering) # Feel free to change this line! #stories wanted stored here stories_wanted = [] for story in stories: for trigger in triggerlist: if trigger.evaluate(story): stories_wanted.append(story) break return stories_wanted #====================== # Part 4 # User-Specified Triggers #====================== ##problem not fixed: ###1 len(line) in my code but len(line)-1 in examples ###unhandled problem in main_thread def readTriggerConfig(filename): """ Returns a list of trigger objects that correspond to the rules set in the file filename """ # Here's some code that we give you # to read in the file and eliminate # blank lines and comments triggerfile = open(filename, "r") all = [ line.rstrip() for line in triggerfile.readlines() ] lines = [] for line in all: if len(line) == 0 or line[0] == '#': continue lines.append(line) # TODO: Problem 11 # 'lines' has a list of lines you need to parse # Build a set of triggers from it and # return the appropriate ones #return a triggerlist when finished triggerlist = [] triggerdic ={} #parse each line in a list of lists for line in lines: #line is a string line = line.split() #line converted into a list of string #lines become a list of lists #find ADD for line in lines: if 'ADD' in line: #parse to find triggers for i in range (1,len(line)-1): triggerlist.append(triggerdic[line[i]]) #title, subject, summary triggers built with one argument elif 'SUBJECT' in line: #SubjectTrigger object definition subject_trigger = SubjectTrigger(line[2]) #added into the dict{trigger's name:trigger} triggerdic[line[0]] = subject_trigger elif 'TITLE' in line: title_trigger = TitleTrigger(line[2]) triggerdic[line[0]] = title_trigger elif 'SUMMARY' in line: summary_trigger = SummaryTrigger(line[2]) triggerdic[line[0]] = summary_trigger elif 'PHRASE' in line: #add triggerwords together word = '' for string in range (2,len(line)-1): word = word +' '+ line[string] phrase_trigger = PhraseTrigger(word) triggerdic[line[0]] = phrase_trigger #not, and or triggers take triggers as arguments elif 'AND' in line: andtrigger = AndTrigger(triggerdic[line[2]],triggerdic[line[3]]) triggerdic[line[0]] = andtrigger elif 'OR' in line: ortrigger = OrTrigger(triggerdic[line[2]],triggerdic[line[3]]) triggerdic[line[0]] = ortrigger elif ' NOT' in line: not_trigger = NotTrigger(triggerdic[line[2]]) triggerdic[line[0]] = not_trigger return triggerlist import thread def main_thread(p): # A sample trigger list - you'll replace # this with something more configurable in Problem 11 ## t1 = SubjectTrigger("Obama") ## t2 = SummaryTrigger("MIT") ## t3 = PhraseTrigger("Supreme Court") ## t4 = OrTrigger(t2, t3) ## triggerlist = [t1, t4] # TODO: Problem 11 # After implementing readTriggerConfig, uncomment this line triggerlist = readTriggerConfig("triggers.txt") print triggerlist guidShown = [] while True: print "Polling..." # Get stories from Google's Top Stories RSS news feed stories = process("http://news.google.com/?output=rss") # Get stories from Yahoo's Top Stories RSS news feed stories.extend(process("http://rss.news.yahoo.com/rss/topstories")) # Only select stories we're interested in stories = filter_stories(stories, triggerlist) # Don't print a story if we have already printed it before newstories = [] for story in stories: if story.get_guid() not in guidShown: newstories.append(story) for story in newstories: guidShown.append(story.get_guid()) p.newWindow(story) print "Sleeping..." time.sleep(SLEEPTIME) SLEEPTIME = 60 #seconds -- how often we poll if __name__ == '__main__': p = Popup() thread.start_new_thread(main_thread, (p,)) p.start()