Advertisement
Guest User

archive-xkcd.py v2

a guest
Feb 1st, 2018
198
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.35 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. import os
  4. import sys
  5. import json
  6. import shutil
  7. import datetime
  8. import requests
  9.  
  10. class Comic(object):
  11.     def __init__(self, data):
  12.         for k, v in data.items():
  13.             self.__dict__[k] = v
  14.             if k in ('day', 'month', 'year'):
  15.                 self.__dict__[k] = int(v)
  16.     def __str__(self):
  17.         pdate = datetime.date(year=self.year, month=self.month, day=self.day)
  18.         out = 'Title: {}\n'.format(self.title)
  19.         out += 'Index: {}\n'.format(self.num)
  20.         out += 'Date: {}\n'.format(pdate.strftime('%Y-%m-%d'))
  21.         out += 'Alt-Text: {}\n'.format(self.alt)
  22.         out += 'Permalink: https://xkcd.com/{}/\n'.format(self.num)
  23.         out += 'Image link: {}\n\n'.format(self.img)
  24.         out += 'Transcript\n----------\n\n{}'.format(self.transcript)
  25.         return out
  26.     def __repr__(self):
  27.         return json.dumps(self.__dict__, indent = 4)
  28.     def filename(self, ext=''):
  29.         safer = self.safe_title.replace('?', '').replace('/', '-')
  30.         return '{:04d}-{}{}'.format(self.num, safer, ext)
  31.  
  32. base_url = r'https://xkcd.com/{0}/info.0.json'
  33. base_dir = os.path.join(os.getcwd(), 'xkcd')
  34. meta_dir = os.path.join(base_dir, 'metadata')
  35.  
  36. if not os.path.isdir(base_dir):
  37.     print('Creating xkcd dir at {}'.format(base_dir))
  38.     os.makedirs(base_dir)
  39. if not os.path.isdir(meta_dir):
  40.     print('Creating metadata dir at {}'.format(meta_dir))
  41.     os.makedirs(meta_dir)
  42.  
  43. with requests.Session() as sess:
  44.     comic = 1 if len(sys.argv) == 1 else int(sys.argv[1])
  45.     if comic == 404:
  46.         comic += 1
  47.     cdata = sess.get(base_url.format(comic))
  48.     print('Starting from comic #{}'.format(comic))
  49.     while cdata.status_code != 404:
  50.         com = Comic(cdata.json())
  51.         print('Archiving {}'.format(com.filename()))
  52.         imgd = sess.get(com.img, stream=True)
  53.         with open(os.path.join(base_dir, com.filename('.jpg')), 'wb') as imgf:
  54.             shutil.copyfileobj(imgd.raw, imgf)
  55.         with open(os.path.join(meta_dir, com.filename('.txt')), 'w', encoding='utf-8') as plainf:
  56.             plainf.write(str(com))
  57.         with open(os.path.join(meta_dir, com.filename('.json')), 'w', encoding='utf-8') as jsonf:
  58.             jsonf.write(repr(com))
  59.         comic += 1
  60.         if comic == 404:
  61.             comic += 1
  62.         cdata = sess.get(base_url.format(comic))
  63.     print('Archiving complete.')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement