Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! python3
- # multi_download_Xkcd.py - Downloads XKCD comics using mutiple threads.
- import requests
- import os
- import bs4
- import threading
- url = 'http://xkcd.com' # starting url
- os.makedirs('XKCD', exist_ok=True) # store comics in ./XKCD
- def download_XKCD(start_comic, end_comic):
- for url_number in range(start_comic, end_comic):
- # Download the page.
- print('Downloading page %s/%s ...' % (url, url_number))
- res = requests.get('%s/%s' % (url, url_number))
- res.raise_for_status()
- soup = bs4.BeautifulSoup(res.text, "lxml")
- # Find the URL of the comic image.
- comic_elem = soup.select('#comic img')
- if comic_elem == []:
- print('Could not find comic image.')
- else:
- comic_URL = comic_elem[0].get('src')
- # Download the image.
- print('Downloading image http:%s ...' % (comic_URL))
- res = requests.get('http:' + comic_URL)
- res.raise_for_status()
- # Save the image to ./XKCD.
- image_file = open(os.path.join('XKCD', os.path.basename(comic_URL)), 'wb')
- for chunk in res.iter_content(100000):
- image_file.write(chunk)
- image_file.close()
- # Create and start the Thread object.
- download_threads = [] # a list of all the thread object
- for i in range(0, 1400, 100):
- download_thread = threading.Thread(target=download_XKCD, args=(i, i + 99))
- download_threads.append(download_thread)
- download_thread.start()
- # Wait for all threads to end.
- for download_thread in download_threads:
- download_thread.join()
- print('Done.')
Add Comment
Please, Sign In to add comment