Advertisement
pablopalacios

get_all_xkcd _comics

Jan 4th, 2014
54
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.59 KB | None | 0 0
  1. #!/usr/bin/python
  2. """Script to get all the xkcd comics"""
  3. import os
  4. import subprocess
  5. import urllib.request
  6. from bs4 import BeautifulSoup as BS
  7.  
  8. dir_name = 'xkcd'
  9. dir_origin = os.getcwd()
  10. os.mkdir(dir_name)
  11. os.chdir(dir_name)
  12.  
  13. root = 'http://xkcd.com'
  14. archive = BS(urllib.request.urlopen(root+'/archive/'))
  15. links = [link['href'] for link in archive.find('div',{'id':'middleContainer'}).find_all('a')]
  16.  
  17. for link in links:
  18.     page = BS(urllib.request.urlopen(root+link))
  19.     img = page.find('div',{'id':'comic'}).find('img')['src']
  20.     subprocess.call(['wget',img])
  21.  
  22. os.chdir(dir_origin)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement