Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- #Downloads all xkcd comics (as jpgs) and alt-texts
- import urllib, os, re
- dl_folder = raw_input('Enter destinated folder: ')
- if not dl_folder:
- dl_folder = 'xkcd'
- print 'Folder is xkcd by default'
- if not os.path.exists(dl_folder):
- print 'Folder does not exists\nFolder created'
- os.mkdir(dl_folder)
- else: print 'Folder found'
- if not os.path.isfile('alt_text.txt'): alt = file('alt_text.txt','wt')
- alt = open('alt_text.txt','r+')
- print 'Searching for latest comic'
- for line in urllib.urlopen(r'http://www.xkcd.com/').readlines():
- lastcomic = re.search(r'http://xkcd.com/(\d*?)/',line)
- if lastcomic:
- lastcomic = int(lastcomic.group(1))
- print 'Latest comic: '+`lastcomic`
- break
- raw_input('You may press CTRL-C to terminate the program at any time\n\nPress ENTER to continue')
- try:
- for comic in xrange(1,lastcomic+1):
- for line in urllib.urlopen(r'http://www.xkcd.com/'+`comic`).readlines():
- img = re.search(r'(http://imgs.xkcd.com/comics/.*?(?:png|jpg))">?.*? title="(.*?)"',line)
- if img:
- dl_path = os.path.join(dl_folder,(4-len(`comic`))*'0'+`comic`+'.jpg')
- if not os.path.isfile(dl_path):
- alt.write(`comic`+': '+img.group(2).replace('"','"').replace(''',"'")+'\n')
- urllib.urlretrieve(img.group(1),dl_path)
- print 'Comic '+`comic`+' downloaded'
- else: print 'Comic '+`comic`+' found in folder. Download skipped'
- break
- else: print ('Download of comic '+`comic`+' was unsuccessful','Comic 404 skipped')[comic==404]
- except KeyboardInterrupt: print 'Program terminated'
- alt.close()
Add Comment
Please, Sign In to add comment