Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # from book the Python Standard Library
- from Queue import Queue
- from threading import Thread
- import time
- import urllib
- import urlparse
- import feedparser
- # Set up some global variables
- num_fetch_threads = 2
- enclosure_queue = Queue()
- # A real app wouldn't use hard-coded data...
- feed_urls = ['http://www.gadgetgangster.com/news.feed?type=rss'
- ]
- def downloadEnclosures(i, q):
- """ This is the worker thread function.
- It processes items in the queue one after
- another. These daemon threads go into an
- infinate loop, and only exit when
- the main thread ends.
- """
- while True:
- print '%s: Looking for the next enclosure' % i
- url = q.get()
- parsed_url = urlparse.urlparse(url)
- data = response.read()
- # Save the downloaded file to the current dir
- outfile_name = url.rpartition('/')[-1]
- with open(outputfile_name, 'wb') as outfile:
- outfile.write(data)
- q.task_done()
- # Set up some threads to fetch the enclosures
- for i in range(num_fetch_threads):
- worker = Thread(target=downloadEnclosures,
- args=(i, enclosure_queue,))
- worker.setDaemon(True)
- worker.start()
- # Download the feed(s) and put the enclosure URLs into
- # the queue.
- for url in feed_urls:
- response = feedparser.parse(url, agent='fetch_podcasts.py')
- for entry in response['entries'][-5:]:
- for enclosure in entry.get('enclosures', []):
- parsed_url = urlparse.urlparse(enclosure['url'])
- print 'Queueing:', parsed_url.path
- enclosure_queue.put(enclosure['url'])
- # Now wait for the queue to be empty, indicating that we have
- # processed all the downloads.
- print '*** Main thread waiting'
- enclosure_queue.join()
- print '*** Done'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement