Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on Feb 26th, 2012  |  syntax: None  |  size: 2.91 KB  |  hits: 30  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. import httplib
  2. import xml.etree.ElementTree as ET
  3.  
  4. read_items_filename = "read.txt"
  5.  
  6. class Error(Exception):
  7.     pass
  8.  
  9. class CannotRetrieveUrlError(Error):
  10.     def __init__(self, url):
  11.         self.msg = "Cannot retrieve URL: " + url
  12.  
  13. class FeedItem(object):
  14.     def __init__(self,title,description,guid):
  15.         self.title = title
  16.         self.description = description
  17.         self.guid = guid
  18.  
  19. class Feed(object):
  20.    
  21.     def __init__(self, feed_xml):
  22.         self.feed_xml = feed_xml
  23.  
  24.         self.title, self.desc, self.lang = self.parse_feed_info()
  25.  
  26.     def print_info(self):
  27.         print "====================="
  28.         print "Feed info:"
  29.         print "Title: " + self.title
  30.         print "Desc: " + self.desc
  31.         print "Lang: " + self.lang
  32.         print "====================="
  33.         print
  34.  
  35.     def parse_feed_info(self):
  36.         root = ET.fromstring(self.feed_xml)
  37.         channel = root.find('channel')
  38.         title = channel.find('title').text
  39.         description = channel.find('description').text
  40.         language = channel.find('language').text
  41.  
  42.         return (title, description, language)
  43.         pass
  44.  
  45.     def items(self):
  46.         tree = ET.fromstring(self.feed_xml)
  47.         channel = tree.find("channel")
  48.         items = channel.getiterator('item')
  49.        
  50.         for item in items:
  51.             title = item.find('title').text
  52.             description = item.find('description').text
  53.             guid = item.find('guid').text
  54.             yield FeedItem(title,description,guid)
  55.         #z XML dat vrati vsechny itemy, ktere feed obsahuje
  56.         pass
  57.  
  58. class Reader(object):
  59.  
  60.     def __init__(self, url):
  61.         self.read_items_filename = 'rss_read.txt'
  62.         self.url = url
  63.         self.feed = None
  64.  
  65.     def fetch_feed(self):
  66.         conn = httplib.HTTPConnection(self.url)
  67.         conn.request("GET", "/")
  68.         r = conn.getresponse()
  69.         #print type(r.status), type(r.reason)
  70.         if r.status != 200:
  71.             raise CannotRetrieveUrlError(self.url)
  72.  
  73.         xml = r.read()
  74.         self.feed = Feed(xml)
  75.         self.feed.print_info()
  76.        
  77.         conn.close()
  78.  
  79.     def get_already_read_items(self):
  80.         # ziskat list jiz prectenych itemu ze souboru
  81.         items = []
  82.         f = None
  83.         try:
  84.             f = open(read_items_filename,"rt")
  85.             for line in f:
  86.                 item = line.strip()
  87.                 items.append(item)
  88.         except IOError, e:
  89.             f = open(read_items_filename,"wt")
  90.         finally:
  91.             if f is not None:
  92.                 f.close()
  93.         return items
  94.  
  95.     def read_feed(self):
  96.         # z feedu precist neprectene itemy
  97.         read_feed = self.get_already_read_items()
  98.         for item in self.feed.items():
  99.             print item.title
  100.  
  101. def main():
  102.     url = 'localhost:9000'
  103.     reader = Reader(url)
  104.  
  105.     try:
  106.         reader.fetch_feed()
  107.         reader.read_feed()
  108.     except CannotRetrieveUrlError, e:
  109.         print e
  110.  
  111. if __name__ == '__main__':
  112.     main()