#!/usr/bin/python
#coding: utf-8
"""
This bot reads data from xkcd.com and creates the appropriate pages.
This bot is designed specifically for explainxkcd.com.
No changes are made if the pages already exist.
Specific arguments:
-test Don't actually make any changes
-latest Create pages for the latest comic
-all Create pages for all comics
-start:xxx Specify the comic number to start at (inclusive).
This requires -all. Defaults to 1.
-end:xxx Specify the comic number to end at (inclusive).
This requires -all. Defaults to 99999.
Example usage:
# Make sure everthing is set up properly, but don't actually do anything
python xkcd.py -latest -test
# Add pages for the latest comic (if they don't exist yet)
python xkcd.py -latest
# Add pages for all comics (if they don't exist yet)
python xkcd.py -all
# Add pages for comics 50-100 (if they don't exist yet)
python xkcd.py -all -start:50 -end:100
"""
#
# Distributed under the terms of the MIT license.
#
#
# This script is based on pagefromfile.py, which has the following license:
#
## (C) Andre Engels, 2004
## (C) Pywikipedia bot team, 2005-2010
##
## Distributed under the terms of the MIT license.
#
__version__='$Id$'
#
import re
import wikipedia as pywikibot
import urllib
import json
import datetime
import upload
def formatComicPage(data, imageFilename):
# Parse the date
date = datetime.date(int(data['year']), int(data['month']), int(data['day']))
# Format as "August 01, 2012", then replace "01" with "1"
dateText = re.sub(r"\b0([0-9])\b", r"\1", date.strftime("%B %d, %Y"))
# Return the contents of the comic page
return """{{ComicHeader|%d|%s}}
[[File:%s|%s]]
== Image Text ==
%s
== Description ==
This comic doesn't have a description yet. Why don't you add one?
""" % (data['num'], dateText, imageFilename, data['alt'], data['alt'])
class XKCDRobot:
def __init__(self, test, createAll, createLatest, start, end):
self.test = test
self.createAll = createAll
self.createLatest = createLatest
self.start = start
self.end = end
def run(self):
pywikibot.output(u"")
if self.test:
pywikibot.output(u"TESTING - Nothing will actually be done.")
if self.createLatest:
self.addLatestComic()
if self.createAll:
self.addAllComics()
def addAllComics(self):
pywikibot.output(u"Adding all comics from %d to %d..." % (self.start, self.end))
num = self.start
while num <= self.end:
try:
pywikibot.output(u"Reading comic data for %d..." % num)
data = self.getComicData(num)
pywikibot.output(u"Read comic data (%s: '%s')." % (data['num'], data['safe_title']))
except ValueError:
# Likely a 404, which means we're done
break
self.addComic(data)
num += 1
def addLatestComic(self):
pywikibot.output(u"Reading latest comic data...")
data = self.getComicData()
pywikibot.output(u"Read latest comic data (%s: %s)." % (data['num'], data['safe_title']))
self.addComic(data)
def getComicData(self, num = None):
if num == None:
# Latest
url = "http://xkcd.com/info.0.json"
else:
url = "http://xkcd.com/%d/info.0.json" % num
return json.load(urllib.urlopen(url))
def addComic(self, data):
self.createRedirectPage(data)
self.createComicPage(data)
def createRedirectPage(self, data):
page = self.getPage(str(data['num']))
if not page.exists():
pywikibot.output(u"Creating redirect page for %s..." % data['num'])
contents = "#REDIRECT [[%s]]" % data['safe_title']
self.writePage(page, contents, "Created page with redirect to '%s'." % data['safe_title'])
pywikibot.output(u"Created redirect page." % data['num'])
else:
pywikibot.output(u"Redirect page already exists for %s." % data['num'])
def createComicPage(self, data):
page = self.getPage(data['safe_title'])
if not page.exists():
# Upload image first
imageFilename = data['safe_title'].replace(" ", "_") + ".png"
pywikibot.output(u"Uploading image '%s' for %s..." % (imageFilename, data['num']))
if not self.test:
uploadBot = upload.UploadRobot(data['img'], \
description="Image for comic '%s'" % data['safe_title'], \
useFilename = imageFilename, \
verifyDescription = False, \
targetSite = pywikibot.getSite(), \
uploadByUrl=True)
pywikibot.output(u"Uploaded image.")
pywikibot.output(u"Creating comic page '%s' for %s..." % (data['safe_title'], data['num']))
contents = formatComicPage(data, imageFilename)
self.writePage(page, contents, "Created comic stub")
pywikibot.output(u"Created comic page.")
else:
pywikibot.output(u"Comic page already exists for %s." % data['num'])
def getPage(self, name):
mysite = pywikibot.getSite()
page = pywikibot.Page(mysite, name)
return page
def writePage(self, page, contents, comment):
try:
if not self.test:
page.put(contents, comment = comment)
except pywikibot.LockedPage:
pywikibot.output(u"Page is locked; skipping.")
except pywikibot.EditConflict:
pywikibot.output(u'Skipping because of edit conflict')
except pywikibot.SpamfilterError, error:
pywikibot.output(
u'Cannot change because of spam blacklist entry %s'
% (error.url))
def main():
createAll = False
createLatest = False
test = False
start = 1
end = 99999
for arg in pywikibot.handleArgs():
if arg.startswith("-start:"):
start = int(arg[7:])
elif arg.startswith("-end:"):
end = int(arg[5:])
elif arg == "-latest":
createLatest = True
elif arg == "-all":
createAll = True
elif arg == "-test":
test = True
else:
pywikibot.output(u"Disregarding unknown argument %s." % arg)
bot = XKCDRobot(test, createAll, createLatest, start, end)
bot.run()
if __name__ == "__main__":
try:
main()
finally:
pywikibot.stopme()