mengyuxin

multi_download_Xkcd.py

Jan 6th, 2018
216
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.65 KB | None | 0 0
  1. #! python3
  2. # multi_download_Xkcd.py - Downloads XKCD comics using mutiple threads.
  3.  
  4. import requests
  5. import os
  6. import bs4
  7. import threading
  8.  
  9. url = 'http://xkcd.com' # starting url
  10. os.makedirs('XKCD', exist_ok=True) # store comics in ./XKCD
  11.  
  12. def download_XKCD(start_comic, end_comic):
  13.     for url_number in range(start_comic, end_comic):
  14.         # Download the page.
  15.         print('Downloading page %s/%s ...' % (url, url_number))
  16.         res = requests.get('%s/%s' % (url, url_number))
  17.         res.raise_for_status()
  18.        
  19.         soup = bs4.BeautifulSoup(res.text, "lxml")
  20.  
  21.         # Find the URL of the comic image.
  22.         comic_elem = soup.select('#comic img')
  23.         if comic_elem == []:
  24.             print('Could not find comic image.')
  25.         else:
  26.             comic_URL = comic_elem[0].get('src')
  27.             # Download the image.
  28.             print('Downloading image http:%s ...' % (comic_URL))
  29.             res = requests.get('http:' + comic_URL)
  30.             res.raise_for_status()    
  31.  
  32.             # Save the image to ./XKCD.
  33.             image_file = open(os.path.join('XKCD', os.path.basename(comic_URL)), 'wb')
  34.             for chunk in res.iter_content(100000):
  35.                 image_file.write(chunk)
  36.             image_file.close()
  37.    
  38. # Create and start the Thread object.
  39. download_threads = []   # a list of all the thread object
  40. for i in range(0, 1400, 100):
  41.     download_thread = threading.Thread(target=download_XKCD, args=(i, i + 99))
  42.     download_threads.append(download_thread)
  43.     download_thread.start()
  44.  
  45. # Wait for all threads to end.
  46. for download_thread in download_threads:
  47.     download_thread.join()
  48. print('Done.')
Add Comment
Please, Sign In to add comment