Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- #coding: utf-8
- """
- This bot reads data from xkcd.com and creates the appropriate pages.
- This bot is designed specifically for explainxkcd.com.
- No changes are made if the pages already exist.
- Specific arguments:
- -test Don't actually make any changes
- -latest Create pages for the latest comic
- -all Create pages for all comics
- -start:xxx Specify the comic number to start at (inclusive).
- This requires -all. Defaults to 1.
- -end:xxx Specify the comic number to end at (inclusive).
- This requires -all. Defaults to 99999.
- Example usage:
- # Make sure everthing is set up properly, but don't actually do anything
- python xkcd.py -latest -test
- # Add pages for the latest comic (if they don't exist yet)
- python xkcd.py -latest
- # Add pages for all comics (if they don't exist yet)
- python xkcd.py -all
- # Add pages for comics 50-100 (if they don't exist yet)
- python xkcd.py -all -start:50 -end:100
- """
- #
- # Distributed under the terms of the MIT license.
- #
- #
- # This script is based on pagefromfile.py, which has the following license:
- #
- ## (C) Andre Engels, 2004
- ## (C) Pywikipedia bot team, 2005-2010
- ##
- ## Distributed under the terms of the MIT license.
- #
- __version__='$Id$'
- #
- import re
- import wikipedia as pywikibot
- import urllib
- import json
- import datetime
- import upload
- def formatComicPage(data, imageFilename):
- # Parse the date
- date = datetime.date(int(data['year']), int(data['month']), int(data['day']))
- # Format as "August 01, 2012", then replace "01" with "1"
- dateText = re.sub(r"\b0([0-9])\b", r"\1", date.strftime("%B %d, %Y"))
- # Return the contents of the comic page
- return """{{ComicHeader|%d|%s}}
- [[File:%s|%s]]
- == Image Text ==
- %s
- == Description ==
- This comic doesn't have a description yet. Why don't you add one?
- """ % (data['num'], dateText, imageFilename, data['alt'], data['alt'])
- class XKCDRobot:
- def __init__(self, test, createAll, createLatest, start, end):
- self.test = test
- self.createAll = createAll
- self.createLatest = createLatest
- self.start = start
- self.end = end
- def run(self):
- pywikibot.output(u"")
- if self.test:
- pywikibot.output(u"TESTING - Nothing will actually be done.")
- if self.createLatest:
- self.addLatestComic()
- if self.createAll:
- self.addAllComics()
- def addAllComics(self):
- pywikibot.output(u"Adding all comics from %d to %d..." % (self.start, self.end))
- num = self.start
- while num <= self.end:
- try:
- pywikibot.output(u"Reading comic data for %d..." % num)
- data = self.getComicData(num)
- pywikibot.output(u"Read comic data (%s: '%s')." % (data['num'], data['safe_title']))
- except ValueError:
- # Likely a 404, which means we're done
- break
- self.addComic(data)
- num += 1
- def addLatestComic(self):
- pywikibot.output(u"Reading latest comic data...")
- data = self.getComicData()
- pywikibot.output(u"Read latest comic data (%s: %s)." % (data['num'], data['safe_title']))
- self.addComic(data)
- def getComicData(self, num = None):
- if num == None:
- # Latest
- url = "http://xkcd.com/info.0.json"
- else:
- url = "http://xkcd.com/%d/info.0.json" % num
- return json.load(urllib.urlopen(url))
- def addComic(self, data):
- self.createRedirectPage(data)
- self.createComicPage(data)
- def createRedirectPage(self, data):
- page = self.getPage(str(data['num']))
- if not page.exists():
- pywikibot.output(u"Creating redirect page for %s..." % data['num'])
- contents = "#REDIRECT [[%s]]" % data['safe_title']
- self.writePage(page, contents, "Created page with redirect to '%s'." % data['safe_title'])
- pywikibot.output(u"Created redirect page." % data['num'])
- else:
- pywikibot.output(u"Redirect page already exists for %s." % data['num'])
- def createComicPage(self, data):
- page = self.getPage(data['safe_title'])
- if not page.exists():
- # Upload image first
- imageFilename = data['safe_title'].replace(" ", "_") + ".png"
- pywikibot.output(u"Uploading image '%s' for %s..." % (imageFilename, data['num']))
- if not self.test:
- uploadBot = upload.UploadRobot(data['img'], \
- description="Image for comic '%s'" % data['safe_title'], \
- useFilename = imageFilename, \
- verifyDescription = False, \
- targetSite = pywikibot.getSite(), \
- uploadByUrl=True)
- pywikibot.output(u"Uploaded image.")
- pywikibot.output(u"Creating comic page '%s' for %s..." % (data['safe_title'], data['num']))
- contents = formatComicPage(data, imageFilename)
- self.writePage(page, contents, "Created comic stub")
- pywikibot.output(u"Created comic page.")
- else:
- pywikibot.output(u"Comic page already exists for %s." % data['num'])
- def getPage(self, name):
- mysite = pywikibot.getSite()
- page = pywikibot.Page(mysite, name)
- return page
- def writePage(self, page, contents, comment):
- try:
- if not self.test:
- page.put(contents, comment = comment)
- except pywikibot.LockedPage:
- pywikibot.output(u"Page %s is locked; skipping." % title)
- except pywikibot.EditConflict:
- pywikibot.output(u'Skipping %s because of edit conflict' % title)
- except pywikibot.SpamfilterError, error:
- pywikibot.output(
- u'Cannot change %s because of spam blacklist entry %s'
- % (title, error.url))
- def main():
- createAll = False
- createLatest = False
- test = False
- start = 1
- end = 99999
- for arg in pywikibot.handleArgs():
- if arg.startswith("-start:"):
- start = int(arg[7:])
- elif arg.startswith("-end:"):
- end = int(arg[5:])
- elif arg == "-latest":
- createLatest = True
- elif arg == "-all":
- createAll = True
- elif arg == "-test":
- test = True
- else:
- pywikibot.output(u"Disregarding unknown argument %s." % arg)
- bot = XKCDRobot(test, createAll, createLatest, start, end)
- bot.run()
- if __name__ == "__main__":
- try:
- main()
- finally:
- pywikibot.stopme()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement