#!/usr/bin/python #coding: utf-8 """ This bot reads data from xkcd.com and creates the appropriate pages. This bot is designed specifically for explainxkcd.com. No changes are made if the pages already exist. Specific arguments: -test Don't actually make any changes -latest Create pages for the latest comic -all Create pages for all comics -start:xxx Specify the comic number to start at (inclusive). This requires -all. Defaults to 1. -end:xxx Specify the comic number to end at (inclusive). This requires -all. Defaults to 99999. Example usage: # Make sure everthing is set up properly, but don't actually do anything python xkcd.py -latest -test # Add pages for the latest comic (if they don't exist yet) python xkcd.py -latest # Add pages for all comics (if they don't exist yet) python xkcd.py -all # Add pages for comics 50-100 (if they don't exist yet) python xkcd.py -all -start:50 -end:100 """ # # Distributed under the terms of the MIT license. # # # This script is based on pagefromfile.py, which has the following license: # ## (C) Andre Engels, 2004 ## (C) Pywikipedia bot team, 2005-2010 ## ## Distributed under the terms of the MIT license. # __version__='$Id$' # import re import wikipedia as pywikibot import urllib import json import datetime import upload def formatComicPage(data, imageFilename): # Parse the date date = datetime.date(int(data['year']), int(data['month']), int(data['day'])) # Format as "August 01, 2012", then replace "01" with "1" dateText = re.sub(r"\b0([0-9])\b", r"\1", date.strftime("%B %d, %Y")) # Return the contents of the comic page return """{{ComicHeader|%d|%s}} [[File:%s|%s]] == Image Text == %s == Description == This comic doesn't have a description yet. Why don't you add one? """ % (data['num'], dateText, imageFilename, data['alt'], data['alt']) class XKCDRobot: def __init__(self, test, createAll, createLatest, start, end): self.test = test self.createAll = createAll self.createLatest = createLatest self.start = start self.end = end def run(self): pywikibot.output(u"") if self.test: pywikibot.output(u"TESTING - Nothing will actually be done.") if self.createLatest: self.addLatestComic() if self.createAll: self.addAllComics() def addAllComics(self): pywikibot.output(u"Adding all comics from %d to %d..." % (self.start, self.end)) num = self.start while num <= self.end: try: pywikibot.output(u"Reading comic data for %d..." % num) data = self.getComicData(num) pywikibot.output(u"Read comic data (%s: '%s')." % (data['num'], data['safe_title'])) except ValueError: # Likely a 404, which means we're done break self.addComic(data) num += 1 def addLatestComic(self): pywikibot.output(u"Reading latest comic data...") data = self.getComicData() pywikibot.output(u"Read latest comic data (%s: %s)." % (data['num'], data['safe_title'])) self.addComic(data) def getComicData(self, num = None): if num == None: # Latest url = "http://xkcd.com/info.0.json" else: url = "http://xkcd.com/%d/info.0.json" % num return json.load(urllib.urlopen(url)) def addComic(self, data): self.createRedirectPage(data) self.createComicPage(data) def createRedirectPage(self, data): page = self.getPage(str(data['num'])) if not page.exists(): pywikibot.output(u"Creating redirect page for %s..." % data['num']) contents = "#REDIRECT [[%s]]" % data['safe_title'] self.writePage(page, contents, "Created page with redirect to '%s'." % data['safe_title']) pywikibot.output(u"Created redirect page." % data['num']) else: pywikibot.output(u"Redirect page already exists for %s." % data['num']) def createComicPage(self, data): page = self.getPage(data['safe_title']) if not page.exists(): # Upload image first imageFilename = data['safe_title'].replace(" ", "_") + ".png" pywikibot.output(u"Uploading image '%s' for %s..." % (imageFilename, data['num'])) if not self.test: uploadBot = upload.UploadRobot(data['img'], \ description="Image for comic '%s'" % data['safe_title'], \ useFilename = imageFilename, \ verifyDescription = False, \ targetSite = pywikibot.getSite(), \ uploadByUrl=True) pywikibot.output(u"Uploaded image.") pywikibot.output(u"Creating comic page '%s' for %s..." % (data['safe_title'], data['num'])) contents = formatComicPage(data, imageFilename) self.writePage(page, contents, "Created comic stub") pywikibot.output(u"Created comic page.") else: pywikibot.output(u"Comic page already exists for %s." % data['num']) def getPage(self, name): mysite = pywikibot.getSite() page = pywikibot.Page(mysite, name) return page def writePage(self, page, contents, comment): try: if not self.test: page.put(contents, comment = comment) except pywikibot.LockedPage: pywikibot.output(u"Page is locked; skipping.") except pywikibot.EditConflict: pywikibot.output(u'Skipping because of edit conflict') except pywikibot.SpamfilterError, error: pywikibot.output( u'Cannot change because of spam blacklist entry %s' % (error.url)) def main(): createAll = False createLatest = False test = False start = 1 end = 99999 for arg in pywikibot.handleArgs(): if arg.startswith("-start:"): start = int(arg[7:]) elif arg.startswith("-end:"): end = int(arg[5:]) elif arg == "-latest": createLatest = True elif arg == "-all": createAll = True elif arg == "-test": test = True else: pywikibot.output(u"Disregarding unknown argument %s." % arg) bot = XKCDRobot(test, createAll, createLatest, start, end) bot.run() if __name__ == "__main__": try: main() finally: pywikibot.stopme()