Advertisement
rfmonk

fetch_podcasts.py

Jan 13th, 2014
127
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.75 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # from book the Python Standard Library
  3.  
  4.  
  5. from Queue import Queue
  6. from threading import Thread
  7. import time
  8. import urllib
  9. import urlparse
  10.  
  11. import feedparser
  12.  
  13. # Set up some global variables
  14. num_fetch_threads = 2
  15. enclosure_queue = Queue()
  16.  
  17. # A real app wouldn't use hard-coded data...
  18. feed_urls = ['http://www.gadgetgangster.com/news.feed?type=rss'
  19.              ]
  20.  
  21.  
  22. def downloadEnclosures(i, q):
  23.     """ This is the worker thread function.
  24.    It processes items in the queue one after
  25.    another. These daemon threads go into an
  26.    infinate loop, and only exit when
  27.    the main thread ends.
  28.    """
  29.     while True:
  30.         print '%s: Looking for the next enclosure' % i
  31.         url = q.get()
  32.         parsed_url = urlparse.urlparse(url)
  33.         data = response.read()
  34.         # Save the downloaded file to the current dir
  35.         outfile_name = url.rpartition('/')[-1]
  36.         with open(outputfile_name, 'wb') as outfile:
  37.             outfile.write(data)
  38.         q.task_done()
  39.  
  40. # Set up some threads to fetch the enclosures
  41. for i in range(num_fetch_threads):
  42.     worker = Thread(target=downloadEnclosures,
  43.                     args=(i, enclosure_queue,))
  44.     worker.setDaemon(True)
  45.     worker.start()
  46.  
  47. # Download the feed(s) and put the enclosure URLs into
  48. # the queue.
  49. for url in feed_urls:
  50.     response = feedparser.parse(url, agent='fetch_podcasts.py')
  51.     for entry in response['entries'][-5:]:
  52.         for enclosure in entry.get('enclosures', []):
  53.             parsed_url = urlparse.urlparse(enclosure['url'])
  54.             print 'Queueing:', parsed_url.path
  55.             enclosure_queue.put(enclosure['url'])
  56.  
  57. # Now wait for the queue to be empty, indicating that we have
  58. # processed all the downloads.
  59. print '*** Main thread waiting'
  60. enclosure_queue.join()
  61. print '*** Done'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement