Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from celery.decorators import task
- from lxml import etree
- from feeds import InvalidFeed
- from feeds.atom10 import Atom10Feed
- from feeds.rss20 import RSS20Feed
- import urllib2
- FEED_TYPES = (Atom10Feed, RSS20Feed)
- USER_AGENT = "Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11"
- @task
- def get_resource(url, callback):
- request = urllib2.Request(url=url,headers={'User-Agent': USER_AGENT})
- stream = urllib2.urlopen(request, timeout=5)
- content = stream.read()
- if callback:
- callback.delay(content.get())
- return
- @task
- def parse_feed(content):
- root = etree.fromstring(content)
- for type in FEED_TYPES:
- try:
- feed = type(root)
- except InvalidFeed:
- return None
- else:
- break
- return feed.__class__
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement