archive-xkcd.py v2

#!/usr/bin/env python3

import os
import sys
import json
import shutil
import datetime
import requests

class Comic(object):
    def __init__(self, data):
        for k, v in data.items():
            self.__dict__[k] = v
            if k in ('day', 'month', 'year'):
                self.__dict__[k] = int(v)
    def __str__(self):
        pdate = datetime.date(year=self.year, month=self.month, day=self.day)
        out = 'Title: {}\n'.format(self.title)
        out += 'Index: {}\n'.format(self.num)
        out += 'Date: {}\n'.format(pdate.strftime('%Y-%m-%d'))
        out += 'Alt-Text: {}\n'.format(self.alt)
        out += 'Permalink: https://xkcd.com/{}/\n'.format(self.num)
        out += 'Image link: {}\n\n'.format(self.img)
        out += 'Transcript\n----------\n\n{}'.format(self.transcript)
        return out
    def __repr__(self):
        return json.dumps(self.__dict__, indent = 4)
    def filename(self, ext=''):
        safer = self.safe_title.replace('?', '').replace('/', '-')
        return '{:04d}-{}{}'.format(self.num, safer, ext)

base_url = r'https://xkcd.com/{0}/info.0.json'
base_dir = os.path.join(os.getcwd(), 'xkcd')
meta_dir = os.path.join(base_dir, 'metadata')

if not os.path.isdir(base_dir):
    print('Creating xkcd dir at {}'.format(base_dir))
    os.makedirs(base_dir)
if not os.path.isdir(meta_dir):
    print('Creating metadata dir at {}'.format(meta_dir))
    os.makedirs(meta_dir)

with requests.Session() as sess:
    comic = 1 if len(sys.argv) == 1 else int(sys.argv[1])
    if comic == 404:
        comic += 1
    cdata = sess.get(base_url.format(comic))
    print('Starting from comic #{}'.format(comic))
    while cdata.status_code != 404:
        com = Comic(cdata.json())
        print('Archiving {}'.format(com.filename()))
        imgd = sess.get(com.img, stream=True)
        with open(os.path.join(base_dir, com.filename('.jpg')), 'wb') as imgf:
            shutil.copyfileobj(imgd.raw, imgf)
        with open(os.path.join(meta_dir, com.filename('.txt')), 'w', encoding='utf-8') as plainf:
            plainf.write(str(com))
        with open(os.path.join(meta_dir, com.filename('.json')), 'w', encoding='utf-8') as jsonf:
            jsonf.write(repr(com))
        comic += 1
        if comic == 404:
            comic += 1
        cdata = sess.get(base_url.format(comic))
    print('Archiving complete.')