Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # 6.00 Problem Set 5
- # RSS Feed Filter
- import feedparser
- import string
- import time
- from project_util import translate_html
- from news_gui import Popup
- #-----------------------------------------------------------------------
- #
- # Problem Set 5
- #======================
- # Code for retrieving and parsing
- # Google and Yahoo News feeds
- # Do not change this code
- #======================
- def process(url):
- """
- Fetches news items from the rss url and parses them.
- Returns a list of NewsStory-s.
- """
- feed = feedparser.parse(url)
- entries = feed.entries
- ret = []
- for entry in entries:
- guid = entry.guid
- title = translate_html(entry.title)
- link = entry.link
- summary = translate_html(entry.summary)
- try:
- subject = translate_html(entry.tags[0]['term'])
- except AttributeError:
- subject = ""
- newsStory = NewsStory(guid, title, subject, summary, link)
- ret.append(newsStory)
- return ret
- #======================
- # Part 1
- # Data structure design
- #======================
- # Problem 1
- # TODO: NewsStory
- class NewsStory(object):
- '''
- Get to store guid, title, subject, summary, link.
- '''
- #initialize the object with the imformation that parsed
- def __init__(self, guid, title, subject, summary, link):
- self.guid = guid
- self.title = title
- self.subject = subject
- self.summary = summary
- self.link = link
- #provide functions to access the information we stored
- def get_guid(self):
- return self.guid
- def get_title(self):
- return self.title
- def get_subject(self):
- return self.subject
- def get_summary(self):
- return self.summary
- def get_link(self):
- return self.link
- #======================
- # Part 2
- # Triggers
- #======================
- class Trigger(object):
- def evaluate(self, story):
- """
- Returns True if an alert should be generated
- for the given news item, or False otherwise.
- """
- raise NotImplementedError
- # Whole Word Triggers
- # Problems 2-5
- # TODO: WordTrigger
- import string
- class WordTrigger(Trigger):
- #store the word that wants to be triggered
- def __init__(self,word):
- self.word = word
- #test if the word in text
- def is_word_in(self,text):
- textlist = text.split(' ')
- #check all punctuation in the textlist
- #textlist should be the inner loop, because textlist.remove will allert each time inner loop runs
- for punc in string.punctuation:
- for i in textlist:
- if punc in i:
- textlist.extend(i.split(punc))
- textlist.remove(i)
- return self.word in textlist
- class TitleTrigger(WordTrigger):
- def __init__(self,word):
- WordTrigger.__init__(self,word.lower())
- def evaluate (self,story):
- title = story.title.lower()
- return WordTrigger.is_word_in(self,title)
- # TODO: SubjectTrigger
- class SubjectTrigger(WordTrigger):
- def __init__(self, word):
- WordTrigger.__init__(self,word.lower())
- def evaluate (self, story):
- subject = story.subject.lower()
- return WordTrigger.is_word_in(self,subject)
- # TODO: SummaryTrigger
- class SummaryTrigger(WordTrigger):
- def __init__(self,word):
- WordTrigger.__init__(self,word.lower())
- def evaluate (self,story):
- summary = story.summary.lower()
- ## return WordTrigger.is_word_in(self,summary)
- return self.is_word_in(summary)
- # Composite Triggers
- # Problems 6-8
- # TODO: NotTrigger
- class NotTrigger(WordTrigger):
- def __init__(self,trigger):
- self.trigger = trigger
- def evaluate (self, story):
- return not self.trigger.evaluate(story)
- # TODO: AndTrigger
- class AndTrigger(WordTrigger):
- #take in two triggers
- def __init__(self,trigger1,trigger2):
- self.trigger1 = trigger1
- self.trigger2 = trigger2
- #return True only if both triggers individually return True
- def evaluate (self, story):
- return self.trigger1.evaluate(story) and self.trigger2.evaluate(story)
- # TODO: OrTrigger
- class OrTrigger(WordTrigger):
- #take in two triggers
- def __init__(self, trigger1, trigger2):
- self.trigger1 = trigger1
- self.trigger2 = trigger2
- #return True if either trigger returns True
- def evaluate(self,story):
- return self.trigger1.evaluate(story) or self.trigger2.evaluate(story)
- # Phrase Trigger
- # Question 9
- # TODO: PhraseTrigger
- class PhraseTrigger(WordTrigger):
- def __init__(self, phrase):
- self.word = phrase
- def is_phrase_in(self,text):
- if self.word in text:
- return True
- else:
- return False
- def evaluate (self, story):
- return self.is_phrase_in(story.subject) or self.is_phrase_in(story.title) or self.is_phrase_in(story.summary)
- #======================
- # Part 3
- # Filtering
- #======================
- def filter_stories(stories, triggerlist):
- """
- Takes in a list of NewsStory-s.
- Returns only those stories for whom
- a trigger in triggerlist fires.
- """
- # TODO: Problem 10
- # This is a placeholder (we're just returning all the stories, with no filtering)
- # Feel free to change this line!
- #stories wanted stored here
- stories_wanted = []
- for story in stories:
- for trigger in triggerlist:
- if trigger.evaluate(story):
- stories_wanted.append(story)
- break
- return stories_wanted
- #======================
- # Part 4
- # User-Specified Triggers
- #======================
- ##problem not fixed:
- ###1 len(line) in my code but len(line)-1 in examples
- ###unhandled problem in main_thread
- def readTriggerConfig(filename):
- """
- Returns a list of trigger objects
- that correspond to the rules set
- in the file filename
- """
- # Here's some code that we give you
- # to read in the file and eliminate
- # blank lines and comments
- triggerfile = open(filename, "r")
- all = [ line.rstrip() for line in triggerfile.readlines() ]
- lines = []
- for line in all:
- if len(line) == 0 or line[0] == '#':
- continue
- lines.append(line)
- # TODO: Problem 11
- # 'lines' has a list of lines you need to parse
- # Build a set of triggers from it and
- # return the appropriate ones
- #return a triggerlist when finished
- triggerlist = []
- triggerdic ={}
- #parse each line in a list of lists
- for line in lines:
- #line is a string
- line = line.split()
- #line converted into a list of string
- #lines become a list of lists
- #find ADD
- for line in lines:
- if 'ADD' in line:
- #parse to find triggers
- for i in range (1,len(line)-1):
- triggerlist.append(triggerdic[line[i]])
- #title, subject, summary triggers built with one argument
- elif 'SUBJECT' in line:
- #SubjectTrigger object definition
- subject_trigger = SubjectTrigger(line[2])
- #added into the dict{trigger's name:trigger}
- triggerdic[line[0]] = subject_trigger
- elif 'TITLE' in line:
- title_trigger = TitleTrigger(line[2])
- triggerdic[line[0]] = title_trigger
- elif 'SUMMARY' in line:
- summary_trigger = SummaryTrigger(line[2])
- triggerdic[line[0]] = summary_trigger
- elif 'PHRASE' in line:
- #add triggerwords together
- word = ''
- for string in range (2,len(line)-1):
- word = word +' '+ line[string]
- phrase_trigger = PhraseTrigger(word)
- triggerdic[line[0]] = phrase_trigger
- #not, and or triggers take triggers as arguments
- elif 'AND' in line:
- andtrigger = AndTrigger(triggerdic[line[2]],triggerdic[line[3]])
- triggerdic[line[0]] = andtrigger
- elif 'OR' in line:
- ortrigger = OrTrigger(triggerdic[line[2]],triggerdic[line[3]])
- triggerdic[line[0]] = ortrigger
- elif ' NOT' in line:
- not_trigger = NotTrigger(triggerdic[line[2]])
- triggerdic[line[0]] = not_trigger
- return triggerlist
- import thread
- def main_thread(p):
- # A sample trigger list - you'll replace
- # this with something more configurable in Problem 11
- ## t1 = SubjectTrigger("Obama")
- ## t2 = SummaryTrigger("MIT")
- ## t3 = PhraseTrigger("Supreme Court")
- ## t4 = OrTrigger(t2, t3)
- ## triggerlist = [t1, t4]
- # TODO: Problem 11
- # After implementing readTriggerConfig, uncomment this line
- triggerlist = readTriggerConfig("triggers.txt")
- print triggerlist
- guidShown = []
- while True:
- print "Polling..."
- # Get stories from Google's Top Stories RSS news feed
- stories = process("http://news.google.com/?output=rss")
- # Get stories from Yahoo's Top Stories RSS news feed
- stories.extend(process("http://rss.news.yahoo.com/rss/topstories"))
- # Only select stories we're interested in
- stories = filter_stories(stories, triggerlist)
- # Don't print a story if we have already printed it before
- newstories = []
- for story in stories:
- if story.get_guid() not in guidShown:
- newstories.append(story)
- for story in newstories:
- guidShown.append(story.get_guid())
- p.newWindow(story)
- print "Sleeping..."
- time.sleep(SLEEPTIME)
- SLEEPTIME = 60 #seconds -- how often we poll
- if __name__ == '__main__':
- p = Popup()
- thread.start_new_thread(main_thread, (p,))
- p.start()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement