Advertisement
Guest User

Untitled

a guest
May 29th, 2017
51
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.82 KB | None | 0 0
  1. from celery.decorators import task
  2. from lxml import etree
  3. from feeds import InvalidFeed
  4. from feeds.atom10 import Atom10Feed
  5. from feeds.rss20 import RSS20Feed
  6. import urllib2
  7.  
  8. FEED_TYPES = (Atom10Feed, RSS20Feed)
  9. USER_AGENT = "Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11"
  10.  
  11. @task
  12. def get_resource(url, callback):
  13.     request = urllib2.Request(url=url,headers={'User-Agent': USER_AGENT})
  14.     stream  = urllib2.urlopen(request, timeout=5)
  15.     content = stream.read()
  16.     if callback:
  17.         callback.delay(content.get())
  18.     return
  19.  
  20. @task
  21. def parse_feed(content):
  22.     root = etree.fromstring(content)
  23.    
  24.     for type in FEED_TYPES:
  25.         try:
  26.             feed = type(root)
  27.         except InvalidFeed:
  28.             return None
  29.         else:
  30.             break
  31.  
  32.     return feed.__class__
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement