xsot

xkcd download script

Mar 25th, 2011
228
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.60 KB | None | 0 0
  1. #!/usr/bin/python
  2. #Downloads all xkcd comics (as jpgs) and alt-texts
  3.  
  4. import urllib, os, re
  5. dl_folder = raw_input('Enter destinated folder: ')
  6. if not dl_folder:
  7.   dl_folder = 'xkcd'
  8.   print 'Folder is xkcd by default'
  9. if not os.path.exists(dl_folder):
  10.   print 'Folder does not exists\nFolder created'
  11.   os.mkdir(dl_folder)
  12. else: print 'Folder found'
  13. if not os.path.isfile('alt_text.txt'): alt = file('alt_text.txt','wt')
  14. alt = open('alt_text.txt','r+')
  15. print 'Searching for latest comic'
  16. for line in urllib.urlopen(r'http://www.xkcd.com/').readlines():
  17.   lastcomic = re.search(r'http://xkcd.com/(\d*?)/',line)
  18.   if lastcomic:
  19.     lastcomic = int(lastcomic.group(1))
  20.     print 'Latest comic: '+`lastcomic`
  21.     break
  22. raw_input('You may press CTRL-C to terminate the program at any time\n\nPress ENTER to continue')
  23. try:
  24.   for comic in xrange(1,lastcomic+1):
  25.     for line in urllib.urlopen(r'http://www.xkcd.com/'+`comic`).readlines():
  26.       img = re.search(r'(http://imgs.xkcd.com/comics/.*?(?:png|jpg))">?.*? title="(.*?)"',line)
  27.       if img:
  28.         dl_path = os.path.join(dl_folder,(4-len(`comic`))*'0'+`comic`+'.jpg')
  29.         if not os.path.isfile(dl_path):
  30.           alt.write(`comic`+': '+img.group(2).replace('"','"').replace(''',"'")+'\n')
  31.           urllib.urlretrieve(img.group(1),dl_path)
  32.           print 'Comic '+`comic`+' downloaded'
  33.         else: print 'Comic '+`comic`+' found in folder. Download skipped'
  34.         break
  35.     else: print ('Download of comic '+`comic`+' was unsuccessful','Comic 404 skipped')[comic==404]
  36. except KeyboardInterrupt: print 'Program terminated'
  37. alt.close()
Add Comment
Please, Sign In to add comment