# 6.00 Problem Set 5
# RSS Feed Filter
import feedparser
import string
import time
from project_util import translate_html
from news_gui import Popup
#-----------------------------------------------------------------------
#
# Problem Set 5
#======================
# Code for retrieving and parsing
# Google and Yahoo News feeds
# Do not change this code
#======================
def process(url):
"""
Fetches news items from the rss url and parses them.
Returns a list of NewsStory-s.
"""
feed = feedparser.parse(url)
entries = feed.entries
ret = []
for entry in entries:
guid = entry.guid
title = translate_html(entry.title)
link = entry.link
summary = translate_html(entry.summary)
try:
subject = translate_html(entry.tags[0]['term'])
except AttributeError:
subject = ""
newsStory = NewsStory(guid, title, subject, summary, link)
ret.append(newsStory)
return ret
#======================
# Part 1
# Data structure design
#======================
# Problem 1
# TODO: NewsStory
class NewsStory(object):
'''
Get to store guid, title, subject, summary, link.
'''
#initialize the object with the imformation that parsed
def __init__(self, guid, title, subject, summary, link):
self.guid = guid
self.title = title
self.subject = subject
self.summary = summary
self.link = link
#provide functions to access the information we stored
def get_guid(self):
return self.guid
def get_title(self):
return self.title
def get_subject(self):
return self.subject
def get_summary(self):
return self.summary
def get_link(self):
return self.link
#======================
# Part 2
# Triggers
#======================
class Trigger(object):
def evaluate(self, story):
"""
Returns True if an alert should be generated
for the given news item, or False otherwise.
"""
raise NotImplementedError
# Whole Word Triggers
# Problems 2-5
# TODO: WordTrigger
import string
class WordTrigger(Trigger):
#store the word that wants to be triggered
def __init__(self,word):
self.word = word
#test if the word in text
def is_word_in(self,text):
textlist = text.split(' ')
#check all punctuation in the textlist
#textlist should be the inner loop, because textlist.remove will allert each time inner loop runs
for punc in string.punctuation:
for i in textlist:
if punc in i:
textlist.extend(i.split(punc))
textlist.remove(i)
return self.word in textlist
class TitleTrigger(WordTrigger):
def __init__(self,word):
WordTrigger.__init__(self,word.lower())
def evaluate (self,story):
title = story.title.lower()
return WordTrigger.is_word_in(self,title)
# TODO: SubjectTrigger
class SubjectTrigger(WordTrigger):
def __init__(self, word):
WordTrigger.__init__(self,word.lower())
def evaluate (self, story):
subject = story.subject.lower()
return WordTrigger.is_word_in(self,subject)
# TODO: SummaryTrigger
class SummaryTrigger(WordTrigger):
def __init__(self,word):
WordTrigger.__init__(self,word.lower())
def evaluate (self,story):
summary = story.summary.lower()
## return WordTrigger.is_word_in(self,summary)
return self.is_word_in(summary)
# Composite Triggers
# Problems 6-8
# TODO: NotTrigger
class NotTrigger(WordTrigger):
def __init__(self,trigger):
self.trigger = trigger
def evaluate (self, story):
return not self.trigger.evaluate(story)
# TODO: AndTrigger
class AndTrigger(WordTrigger):
#take in two triggers
def __init__(self,trigger1,trigger2):
self.trigger1 = trigger1
self.trigger2 = trigger2
#return True only if both triggers individually return True
def evaluate (self, story):
return self.trigger1.evaluate(story) and self.trigger2.evaluate(story)
# TODO: OrTrigger
class OrTrigger(WordTrigger):
#take in two triggers
def __init__(self, trigger1, trigger2):
self.trigger1 = trigger1
self.trigger2 = trigger2
#return True if either trigger returns True
def evaluate(self,story):
return self.trigger1.evaluate(story) or self.trigger2.evaluate(story)
# Phrase Trigger
# Question 9
# TODO: PhraseTrigger
class PhraseTrigger(WordTrigger):
def __init__(self, phrase):
self.word = phrase
def is_phrase_in(self,text):
if self.word in text:
return True
else:
return False
def evaluate (self, story):
return self.is_phrase_in(story.subject) or self.is_phrase_in(story.title) or self.is_phrase_in(story.summary)
#======================
# Part 3
# Filtering
#======================
def filter_stories(stories, triggerlist):
"""
Takes in a list of NewsStory-s.
Returns only those stories for whom
a trigger in triggerlist fires.
"""
# TODO: Problem 10
# This is a placeholder (we're just returning all the stories, with no filtering)
# Feel free to change this line!
#stories wanted stored here
stories_wanted = []
for story in stories:
for trigger in triggerlist:
if trigger.evaluate(story):
stories_wanted.append(story)
break
return stories_wanted
#======================
# Part 4
# User-Specified Triggers
#======================
##problem not fixed:
###1 len(line) in my code but len(line)-1 in examples
###unhandled problem in main_thread
def readTriggerConfig(filename):
"""
Returns a list of trigger objects
that correspond to the rules set
in the file filename
"""
# Here's some code that we give you
# to read in the file and eliminate
# blank lines and comments
triggerfile = open(filename, "r")
all = [ line.rstrip() for line in triggerfile.readlines() ]
lines = []
for line in all:
if len(line) == 0 or line[0] == '#':
continue
lines.append(line)
# TODO: Problem 11
# 'lines' has a list of lines you need to parse
# Build a set of triggers from it and
# return the appropriate ones
#return a triggerlist when finished
triggerlist = []
triggerdic ={}
#parse each line in a list of lists
for line in lines:
#line is a string
line = line.split()
#line converted into a list of string
#lines become a list of lists
#find ADD
for line in lines:
if 'ADD' in line:
#parse to find triggers
for i in range (1,len(line)-1):
triggerlist.append(triggerdic[line[i]])
#title, subject, summary triggers built with one argument
elif 'SUBJECT' in line:
#SubjectTrigger object definition
subject_trigger = SubjectTrigger(line[2])
#added into the dict{trigger's name:trigger}
triggerdic[line[0]] = subject_trigger
elif 'TITLE' in line:
title_trigger = TitleTrigger(line[2])
triggerdic[line[0]] = title_trigger
elif 'SUMMARY' in line:
summary_trigger = SummaryTrigger(line[2])
triggerdic[line[0]] = summary_trigger
elif 'PHRASE' in line:
#add triggerwords together
word = ''
for string in range (2,len(line)-1):
word = word +' '+ line[string]
phrase_trigger = PhraseTrigger(word)
triggerdic[line[0]] = phrase_trigger
#not, and or triggers take triggers as arguments
elif 'AND' in line:
andtrigger = AndTrigger(triggerdic[line[2]],triggerdic[line[3]])
triggerdic[line[0]] = andtrigger
elif 'OR' in line:
ortrigger = OrTrigger(triggerdic[line[2]],triggerdic[line[3]])
triggerdic[line[0]] = ortrigger
elif ' NOT' in line:
not_trigger = NotTrigger(triggerdic[line[2]])
triggerdic[line[0]] = not_trigger
return triggerlist
import thread
def main_thread(p):
# A sample trigger list - you'll replace
# this with something more configurable in Problem 11
## t1 = SubjectTrigger("Obama")
## t2 = SummaryTrigger("MIT")
## t3 = PhraseTrigger("Supreme Court")
## t4 = OrTrigger(t2, t3)
## triggerlist = [t1, t4]
# TODO: Problem 11
# After implementing readTriggerConfig, uncomment this line
triggerlist = readTriggerConfig("triggers.txt")
print triggerlist
guidShown = []
while True:
print "Polling..."
# Get stories from Google's Top Stories RSS news feed
stories = process("http://news.google.com/?output=rss")
# Get stories from Yahoo's Top Stories RSS news feed
stories.extend(process("http://rss.news.yahoo.com/rss/topstories"))
# Only select stories we're interested in
stories = filter_stories(stories, triggerlist)
# Don't print a story if we have already printed it before
newstories = []
for story in stories:
if story.get_guid() not in guidShown:
newstories.append(story)
for story in newstories:
guidShown.append(story.get_guid())
p.newWindow(story)
print "Sleeping..."
time.sleep(SLEEPTIME)
SLEEPTIME = 60 #seconds -- how often we poll
if __name__ == '__main__':
p = Popup()
thread.start_new_thread(main_thread, (p,))
p.start()