Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import os
- import sys
- import json
- import shutil
- import datetime
- import requests
- class Comic(object):
- def __init__(self, data):
- for k, v in data.items():
- self.__dict__[k] = v
- if k in ('day', 'month', 'year'):
- self.__dict__[k] = int(v)
- def __str__(self):
- pdate = datetime.date(year=self.year, month=self.month, day=self.day)
- out = 'Title: {}\n'.format(self.title)
- out += 'Index: {}\n'.format(self.num)
- out += 'Date: {}\n'.format(pdate.strftime('%Y-%m-%d'))
- out += 'Alt-Text: {}\n'.format(self.alt)
- out += 'Permalink: https://xkcd.com/{}/\n'.format(self.num)
- out += 'Image link: {}\n\n'.format(self.img)
- out += 'Transcript\n----------\n\n{}'.format(self.transcript)
- return out
- def __repr__(self):
- return json.dumps(self.__dict__, indent = 4)
- def filename(self, ext=''):
- safer = self.safe_title.replace('?', '').replace('/', '-')
- return '{:04d}-{}{}'.format(self.num, safer, ext)
- base_url = r'https://xkcd.com/{0}/info.0.json'
- base_dir = os.path.join(os.getcwd(), 'xkcd')
- meta_dir = os.path.join(base_dir, 'metadata')
- if not os.path.isdir(base_dir):
- print('Creating xkcd dir at {}'.format(base_dir))
- os.makedirs(base_dir)
- if not os.path.isdir(meta_dir):
- print('Creating metadata dir at {}'.format(meta_dir))
- os.makedirs(meta_dir)
- with requests.Session() as sess:
- comic = 1 if len(sys.argv) == 1 else int(sys.argv[1])
- if comic == 404:
- comic += 1
- cdata = sess.get(base_url.format(comic))
- print('Starting from comic #{}'.format(comic))
- while cdata.status_code != 404:
- com = Comic(cdata.json())
- print('Archiving {}'.format(com.filename()))
- imgd = sess.get(com.img, stream=True)
- with open(os.path.join(base_dir, com.filename('.jpg')), 'wb') as imgf:
- shutil.copyfileobj(imgd.raw, imgf)
- with open(os.path.join(meta_dir, com.filename('.txt')), 'w', encoding='utf-8') as plainf:
- plainf.write(str(com))
- with open(os.path.join(meta_dir, com.filename('.json')), 'w', encoding='utf-8') as jsonf:
- jsonf.write(repr(com))
- comic += 1
- if comic == 404:
- comic += 1
- cdata = sess.get(base_url.format(comic))
- print('Archiving complete.')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement