Advertisement
Guest User

Untitled

a guest
Oct 9th, 2012
302
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.85 KB | None | 0 0
  1. # 6.00 Problem Set 5
  2. # RSS Feed Filter
  3.  
  4. import feedparser
  5. import string
  6. import time
  7. from project_util import translate_html
  8. from news_gui import Popup
  9.  
  10. #-----------------------------------------------------------------------
  11. #
  12. # Problem Set 5
  13.  
  14. #======================
  15. # Code for retrieving and parsing
  16. # Google and Yahoo News feeds
  17. # Do not change this code
  18. #======================
  19.  
  20. def process(url):
  21. """
  22. Fetches news items from the rss url and parses them.
  23. Returns a list of NewsStory-s.
  24. """
  25. feed = feedparser.parse(url)
  26. entries = feed.entries
  27. ret = []
  28. for entry in entries:
  29. guid = entry.guid
  30. title = translate_html(entry.title)
  31. link = entry.link
  32. summary = translate_html(entry.summary)
  33. try:
  34. subject = translate_html(entry.tags[0]['term'])
  35. except AttributeError:
  36. subject = ""
  37. newsStory = NewsStory(guid, title, subject, summary, link)
  38. ret.append(newsStory)
  39. return ret
  40.  
  41. #======================
  42. # Part 1
  43. # Data structure design
  44. #======================
  45.  
  46. # Problem 1
  47.  
  48. # TODO: NewsStory
  49.  
  50. class NewsStory(object):
  51. '''
  52. Get to store guid, title, subject, summary, link.
  53. '''
  54. #initialize the object with the imformation that parsed
  55.  
  56. def __init__(self, guid, title, subject, summary, link):
  57. self.guid = guid
  58. self.title = title
  59. self.subject = subject
  60. self.summary = summary
  61. self.link = link
  62. #provide functions to access the information we stored
  63. def get_guid(self):
  64. return self.guid
  65.  
  66. def get_title(self):
  67. return self.title
  68.  
  69. def get_subject(self):
  70. return self.subject
  71.  
  72. def get_summary(self):
  73. return self.summary
  74.  
  75. def get_link(self):
  76. return self.link
  77.  
  78. #======================
  79. # Part 2
  80. # Triggers
  81. #======================
  82.  
  83. class Trigger(object):
  84. def evaluate(self, story):
  85. """
  86. Returns True if an alert should be generated
  87. for the given news item, or False otherwise.
  88. """
  89. raise NotImplementedError
  90.  
  91. # Whole Word Triggers
  92. # Problems 2-5
  93.  
  94. # TODO: WordTrigger
  95. import string
  96. class WordTrigger(Trigger):
  97. #store the word that wants to be triggered
  98. def __init__(self,word):
  99. self.word = word
  100.  
  101. #test if the word in text
  102. def is_word_in(self,text):
  103.  
  104. textlist = text.split(' ')
  105. #check all punctuation in the textlist
  106. #textlist should be the inner loop, because textlist.remove will allert each time inner loop runs
  107. for punc in string.punctuation:
  108. for i in textlist:
  109. if punc in i:
  110. textlist.extend(i.split(punc))
  111. textlist.remove(i)
  112. return self.word in textlist
  113.  
  114. class TitleTrigger(WordTrigger):
  115. def __init__(self,word):
  116. WordTrigger.__init__(self,word.lower())
  117. def evaluate (self,story):
  118. title = story.title.lower()
  119. return WordTrigger.is_word_in(self,title)
  120.  
  121.  
  122. # TODO: SubjectTrigger
  123. class SubjectTrigger(WordTrigger):
  124. def __init__(self, word):
  125. WordTrigger.__init__(self,word.lower())
  126.  
  127. def evaluate (self, story):
  128. subject = story.subject.lower()
  129. return WordTrigger.is_word_in(self,subject)
  130.  
  131.  
  132.  
  133. # TODO: SummaryTrigger
  134. class SummaryTrigger(WordTrigger):
  135. def __init__(self,word):
  136. WordTrigger.__init__(self,word.lower())
  137. def evaluate (self,story):
  138. summary = story.summary.lower()
  139. ## return WordTrigger.is_word_in(self,summary)
  140. return self.is_word_in(summary)
  141.  
  142. # Composite Triggers
  143. # Problems 6-8
  144.  
  145. # TODO: NotTrigger
  146. class NotTrigger(WordTrigger):
  147. def __init__(self,trigger):
  148. self.trigger = trigger
  149.  
  150. def evaluate (self, story):
  151.  
  152. return not self.trigger.evaluate(story)
  153.  
  154.  
  155.  
  156.  
  157. # TODO: AndTrigger
  158. class AndTrigger(WordTrigger):
  159. #take in two triggers
  160. def __init__(self,trigger1,trigger2):
  161. self.trigger1 = trigger1
  162. self.trigger2 = trigger2
  163. #return True only if both triggers individually return True
  164. def evaluate (self, story):
  165. return self.trigger1.evaluate(story) and self.trigger2.evaluate(story)
  166.  
  167.  
  168. # TODO: OrTrigger
  169. class OrTrigger(WordTrigger):
  170. #take in two triggers
  171. def __init__(self, trigger1, trigger2):
  172. self.trigger1 = trigger1
  173. self.trigger2 = trigger2
  174. #return True if either trigger returns True
  175. def evaluate(self,story):
  176. return self.trigger1.evaluate(story) or self.trigger2.evaluate(story)
  177.  
  178.  
  179. # Phrase Trigger
  180. # Question 9
  181.  
  182. # TODO: PhraseTrigger
  183. class PhraseTrigger(WordTrigger):
  184. def __init__(self, phrase):
  185. self.word = phrase
  186.  
  187. def is_phrase_in(self,text):
  188. if self.word in text:
  189. return True
  190. else:
  191. return False
  192. def evaluate (self, story):
  193. return self.is_phrase_in(story.subject) or self.is_phrase_in(story.title) or self.is_phrase_in(story.summary)
  194.  
  195. #======================
  196. # Part 3
  197. # Filtering
  198. #======================
  199.  
  200. def filter_stories(stories, triggerlist):
  201. """
  202. Takes in a list of NewsStory-s.
  203. Returns only those stories for whom
  204. a trigger in triggerlist fires.
  205. """
  206. # TODO: Problem 10
  207. # This is a placeholder (we're just returning all the stories, with no filtering)
  208. # Feel free to change this line!
  209.  
  210. #stories wanted stored here
  211. stories_wanted = []
  212. for story in stories:
  213. for trigger in triggerlist:
  214. if trigger.evaluate(story):
  215. stories_wanted.append(story)
  216. break
  217. return stories_wanted
  218.  
  219. #======================
  220. # Part 4
  221. # User-Specified Triggers
  222. #======================
  223. ##problem not fixed:
  224. ###1 len(line) in my code but len(line)-1 in examples
  225. ###unhandled problem in main_thread
  226.  
  227. def readTriggerConfig(filename):
  228. """
  229. Returns a list of trigger objects
  230. that correspond to the rules set
  231. in the file filename
  232. """
  233. # Here's some code that we give you
  234. # to read in the file and eliminate
  235. # blank lines and comments
  236. triggerfile = open(filename, "r")
  237. all = [ line.rstrip() for line in triggerfile.readlines() ]
  238. lines = []
  239. for line in all:
  240. if len(line) == 0 or line[0] == '#':
  241. continue
  242. lines.append(line)
  243.  
  244. # TODO: Problem 11
  245. # 'lines' has a list of lines you need to parse
  246. # Build a set of triggers from it and
  247. # return the appropriate ones
  248.  
  249. #return a triggerlist when finished
  250. triggerlist = []
  251. triggerdic ={}
  252. #parse each line in a list of lists
  253. for line in lines:
  254. #line is a string
  255. line = line.split()
  256. #line converted into a list of string
  257. #lines become a list of lists
  258.  
  259. #find ADD
  260. for line in lines:
  261. if 'ADD' in line:
  262. #parse to find triggers
  263. for i in range (1,len(line)-1):
  264. triggerlist.append(triggerdic[line[i]])
  265. #title, subject, summary triggers built with one argument
  266. elif 'SUBJECT' in line:
  267. #SubjectTrigger object definition
  268. subject_trigger = SubjectTrigger(line[2])
  269. #added into the dict{trigger's name:trigger}
  270. triggerdic[line[0]] = subject_trigger
  271. elif 'TITLE' in line:
  272. title_trigger = TitleTrigger(line[2])
  273. triggerdic[line[0]] = title_trigger
  274. elif 'SUMMARY' in line:
  275. summary_trigger = SummaryTrigger(line[2])
  276. triggerdic[line[0]] = summary_trigger
  277. elif 'PHRASE' in line:
  278. #add triggerwords together
  279. word = ''
  280. for string in range (2,len(line)-1):
  281. word = word +' '+ line[string]
  282. phrase_trigger = PhraseTrigger(word)
  283. triggerdic[line[0]] = phrase_trigger
  284. #not, and or triggers take triggers as arguments
  285. elif 'AND' in line:
  286. andtrigger = AndTrigger(triggerdic[line[2]],triggerdic[line[3]])
  287. triggerdic[line[0]] = andtrigger
  288. elif 'OR' in line:
  289. ortrigger = OrTrigger(triggerdic[line[2]],triggerdic[line[3]])
  290. triggerdic[line[0]] = ortrigger
  291. elif ' NOT' in line:
  292. not_trigger = NotTrigger(triggerdic[line[2]])
  293. triggerdic[line[0]] = not_trigger
  294.  
  295. return triggerlist
  296.  
  297.  
  298. import thread
  299.  
  300. def main_thread(p):
  301. # A sample trigger list - you'll replace
  302. # this with something more configurable in Problem 11
  303. ## t1 = SubjectTrigger("Obama")
  304. ## t2 = SummaryTrigger("MIT")
  305. ## t3 = PhraseTrigger("Supreme Court")
  306. ## t4 = OrTrigger(t2, t3)
  307. ## triggerlist = [t1, t4]
  308.  
  309. # TODO: Problem 11
  310. # After implementing readTriggerConfig, uncomment this line
  311. triggerlist = readTriggerConfig("triggers.txt")
  312. print triggerlist
  313. guidShown = []
  314.  
  315. while True:
  316. print "Polling..."
  317.  
  318. # Get stories from Google's Top Stories RSS news feed
  319. stories = process("http://news.google.com/?output=rss")
  320. # Get stories from Yahoo's Top Stories RSS news feed
  321. stories.extend(process("http://rss.news.yahoo.com/rss/topstories"))
  322.  
  323. # Only select stories we're interested in
  324. stories = filter_stories(stories, triggerlist)
  325.  
  326. # Don't print a story if we have already printed it before
  327. newstories = []
  328. for story in stories:
  329. if story.get_guid() not in guidShown:
  330. newstories.append(story)
  331.  
  332. for story in newstories:
  333. guidShown.append(story.get_guid())
  334. p.newWindow(story)
  335.  
  336. print "Sleeping..."
  337. time.sleep(SLEEPTIME)
  338.  
  339. SLEEPTIME = 60 #seconds -- how often we poll
  340. if __name__ == '__main__':
  341. p = Popup()
  342. thread.start_new_thread(main_thread, (p,))
  343. p.start()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement