Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # 6.00 Problem Set 5
- # RSS Feed Filter
- import feedparser
- import string
- import time
- from project_util import translate_html
- from news_gui import Popup
- #-----------------------------------------------------------------------
- #
- # Problem Set 5
- #======================
- # Code for retrieving and parsing
- # Google and Yahoo News feeds
- # Do not change this code
- #======================
- def process(url):
- """
- Fetches news items from the rss url and parses them.
- Returns a list of NewsStory-s.
- """
- feed = feedparser.parse(url)
- entries = feed.entries
- ret = []
- for entry in entries:
- guid = entry.guid
- title = translate_html(entry.title)
- link = entry.link
- summary = translate_html(entry.summary)
- try:
- subject = translate_html(entry.tags[0]['term'])
- except AttributeError:
- subject = ""
- newsStory = NewsStory(guid, title, subject, summary, link)
- ret.append(newsStory)
- return ret
- #======================
- # Part 1
- # Data structure design
- #======================
- # Problem 1
- # TODO: NewsStory
- class NewsStory(object):
- def __init__(self, guid, title, subject, summary, link):
- self.guid = guid
- self.title = title
- self.subject = subject
- self.summary = summary
- self.link = link
- def get_guid(self):
- return self.guid
- def get_title(self):
- return self.title
- def get_subject(self):
- return self.subject
- def get_summary(self):
- return self.summary
- def get_link(self):
- return self.link
- #======================
- # Part 2
- # Triggers
- #======================
- class Trigger(object):
- def evaluate(self, story):
- """
- Returns True if an alert should be generated
- for the given news item, or False otherwise.
- """
- raise NotImplementedError
- # Whole Word Triggers
- # Problems 2-5
- # TODO: WordTrigger
- class WordTrigger(Trigger):
- def __init__(self, word):
- self.word = word.lower()
- def is_word_in(self, text):
- self.text = text
- self.text = self.text.lower()
- for c in string.punctuation:
- self.text = self.text.replace(c,' ')
- self.text = self.text.split()
- if self.word in self.text:
- return True
- else:
- return False
- # TODO: TitleTrigger
- # TODO: SubjectTrigger
- # TODO: SummaryTrigger
- class TitleTrigger(WordTrigger):
- def evaluate(self, story):
- return self.is_word_in(story.get_title())
- class SubjectTrigger(WordTrigger):
- def evaluate(self, story):
- return self.is_word_in(story.get_subject())
- class SummaryTrigger(WordTrigger):
- def evaluate(self, story):
- return self.is_word_in(story.get_summary())
- # Composite Triggers
- # Problems 6-8
- # TODO: NotTrigger
- # TODO: AndTrigger
- # TODO: OrTrigger
- class NotTrigger(Trigger):
- def __init__(self, other):
- self.other = other
- def evaluate(self, story):
- if not self.other.evaluate(story):
- return True
- else:
- return False
- class AndTrigger(Trigger):
- def __init__(self, other1, other2):
- self.other1 = other1
- self.other2 = other2
- def evaluate(self, story):
- if self.other1.evaluate(story) and self.other2.evaluate(story):
- return True
- else:
- return False
- class OrTrigger(Trigger):
- def __init__(self, other1, other2):
- self.other1 = other1
- self.other2 = other2
- def evaluate(self, story):
- if self.other1.evaluate(story) or self.other2.evaluate(story):
- return True
- else:
- return False
- # Phrase Trigger
- # Question 9
- # TODO: PhraseTrigger
- class PhraseTrigger(Trigger):
- def __init__(self, phrase):
- self.phrase = phrase
- def evaluate(self, story):
- if self.phrase in story.get_title() or \
- self.phrase in story.get_subject() or \
- self.phrase in story.get_summary():
- return True
- else:
- return False
- #======================
- # Part 3
- # Filtering
- #======================
- def filter_stories(stories, triggerlist):
- """
- Takes in a list of NewsStory-s.
- Returns only those stories for whom
- a trigger in triggerlist fires.
- """
- # TODO: Problem 10
- # This is a placeholder (we're just returning all the stories, with no filtering)
- # Feel free to change this line!
- filtered_stories = []
- for i in stories:
- for f in triggerlist:
- if f.evaluate(i) is True:
- print i.get_title()
- filtered_stories.append(i)
- break
- return filtered_stories
- ##story = NewsStory(12,'Why do apples grow on trees?', 'Fruit', 'Because they do', 'link')
- ##text = 'Apples, and trees" and nature stuff!'
- ##word = 'do'
- ##test3 = TitleTrigger(word)
- ##test4 = SubjectTrigger(word)
- ##test5 = SummaryTrigger(word)
- ##test6 = NotTrigger(TitleTrigger(word))
- ##test7 = AndTrigger(TitleTrigger(word),SubjectTrigger(word))
- ##test8 = OrTrigger(TitleTrigger(word),SummaryTrigger(word))
- ##print 'text',test3.is_word_in(text)
- ##print 'title',test3.evaluate(story)
- ##print 'subject',test4.evaluate(story)
- ##print 'summary',test5.evaluate(story)
- ##print 'not=', test6.evaluate(story)
- ##print 'and=', test7.evaluate(story)
- ##print 'or=', test8.evaluate(story)
- #======================
- # Part 4
- # User-Specified Triggers
- #======================
- def readTriggerConfig(filename):
- """
- Returns a list of trigger objects
- that correspond to the rules set
- in the file filename
- """
- # Here's some code that we give you
- # to read in the file and eliminate
- # blank lines and comments
- triggerlist = []
- triggerkeys = {}
- triggered = []
- triggerfile = open(filename, "r")
- all = [ line.rstrip() for line in triggerfile.readlines() ]
- ## print 'all=', all
- lines = []
- for line in all:
- if len(line) == 0 or line[0] == '#':
- continue
- lines.append(line)
- ## print 'lines=', lines
- for i in range(len(lines)):
- triggerlist.append(lines[i].split())
- ## print 'sep=', triggerlist
- for i in range(len(triggerlist)):
- if triggerlist[i][1] == 'TITLE':
- print 'title=', triggerlist[i][2]
- triggerkeys[triggerlist[i][0]] = TitleTrigger(triggerlist[i][2])
- elif triggerlist[i][1] == 'SUBJECT':
- triggerkeys[triggerlist[i][0]] = SubjectTrigger(triggerlist[i][2])
- elif triggerlist[i][1] == 'SUMMARY':
- print 'summary=', triggerlist[i][2]
- triggerkeys[triggerlist[i][0]] = SummaryTrigger(triggerlist[i][2])
- elif triggerlist[i][1] == 'NOT':
- triggerkeys[triggerlist[i][0]] = NotTrigger(triggerlist[i][2])
- elif triggerlist[i][1] == 'PHRASE':
- stitch = ''
- for w in triggerlist[i][2:]:
- if w == triggerlist[i][2]:
- stitch = stitch + w
- else:
- stitch = stitch + ' ' + w
- print 'stitch=', stitch
- triggerkeys[triggerlist[i][0]] = PhraseTrigger(stitch)
- elif triggerlist[i][1] == 'AND':
- triggerkeys[triggerlist[i][0]] = AndTrigger(triggerkeys[triggerlist[i][2]],triggerkeys[triggerlist[i][3]])
- elif triggerlist[i][1] == 'OR':
- triggerkeys[triggerlist[i][0]] = OrTrigger(triggerkeys[triggerlist[i][2]],triggerkeys[triggerlist[i][3]])
- elif triggerlist[i][0] == 'ADD':
- for t in triggerlist[i]:
- ## print t
- if t in triggerkeys:
- ## print 'is a key'
- triggered.append(triggerkeys[t])
- ## print 'trigkey=', triggerkeys
- ## print 'triggered=', triggered
- return triggered
- # TODO: Problem 11
- # 'lines' has a list of lines you need to parse
- # Build a set of triggers from it and
- # return the appropriate ones
- ##t1 = TitleTrigger("Obama")
- ##t2 = SummaryTrigger("MIT")
- ##t3 = PhraseTrigger("Bombing")
- ##t4 = OrTrigger(t2, t3)
- ##triggerlist = [t1, t4]
- ##print 'trig=', triggerlist
- import thread
- def main_thread(p):
- # A sample trigger list - you'll replace
- # this with something more configurable in Problem 11
- ## t1 = TitleTrigger("Obama")
- ## t2 = SummaryTrigger("MIT")
- ## t3 = PhraseTrigger("Bombing")
- ## t4 = OrTrigger(t2, t3)
- ## triggerlist = [t1, t4]
- ## print 'triggerlist=', triggerlist
- # TODO: Problem 11
- # After implementing readTriggerConfig, uncomment this line
- triggerlist = readTriggerConfig("triggers.txt")
- guidShown = []
- while True:
- print "Polling..."
- # Get stories from Google's Top Stories RSS news feed
- stories = process("http://news.google.com/?output=rss")
- # Get stories from Yahoo's Top Stories RSS news feed
- stories.extend(process("http://rss.news.yahoo.com/rss/topstories"))
- # Only select stories we're interested in
- stories = filter_stories(stories, triggerlist)
- # Don't print a story if we have already printed it before
- newstories = []
- for story in stories:
- if story.get_guid() not in guidShown:
- newstories.append(story)
- for story in newstories:
- guidShown.append(story.get_guid())
- p.newWindow(story)
- print "Sleeping..."
- time.sleep(SLEEPTIME)
- SLEEPTIME = 60 #seconds -- how often we poll
- if __name__ == '__main__':
- p = Popup()
- thread.start_new_thread(main_thread, (p,))
- p.start()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement