mengyuxin

meng.downloadXkcd.py

Jan 3rd, 2018
242
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.17 KB | None | 0 0
  1. #! python3
  2. # downloadXkcd.py - Downloads every single XKCD comic.
  3.  
  4. import requests
  5. import os
  6. import bs4
  7.  
  8. url = 'http://xkcd.com' # starting url
  9. os.makedirs('XKCD', exist_ok=True) # store comics in ./XKCD
  10. while not url.endswith('#'):
  11.     # Download the page.
  12.     print('Downloading page %s ...' % url)
  13.     res = requests.get(url)
  14.     res.raise_for_status()
  15.     soup = bs4.BeautifulSoup(res.text, "lxml")
  16.  
  17.     # Find the URL of the comic image.
  18.     comicElem = soup.select('#comic img')
  19.     if comicElem == []:
  20.         print('Could not find comic image.')
  21.     else:
  22.         comicUrl = comicElem[0].get('src')
  23.         # Download the image.
  24.         print('Downloading image http:%s ...' % (comicUrl))
  25.         res = requests.get('http:' + comicUrl)
  26.         res.raise_for_status()    
  27.  
  28.         # Save the image to ./XKCD.
  29.         imageFile = open(os.path.join('XKCD', os.path.basename(comicUrl)), 'wb')
  30.         for chunk in res.iter_content(100000):
  31.             imageFile.write(chunk)
  32.         imageFile.close()
  33.    
  34.     # Get the Prev button's url.
  35.     prevLink = soup.select('a[rel="prev"]')[0]
  36.     url = 'http://xkcd.com' + prevLink.get('href')
  37.  
  38. print('Done.')
Add Comment
Please, Sign In to add comment