# I hereby place this script into the Public Domain! import os, sys import time import mwclient import mercurial.ui from mercurial import localrepo from mercurial import commands article = 'Love' #start_time = None start_time = '2011-01-01T00:00:00Z' # set up mercurial repo ui = mercurial.ui.ui() repo_dir = article repo = localrepo.localrepository(ui, path=repo_dir, create = not os.path.isdir(repo_dir)) #if not os.path.isdir(article): # os.mkdir(article) #os.chdir(article) print "rep in", repo.root content_path = os.path.join(repo.root, article + '.wiki') site = mwclient.Site('en.wikipedia.org') page = site.Pages[article] for rev in page.revisions(start=start_time, limit=50,dir='newer', prop='ids|timestamp|flags|comment|user|content'): content = rev['*'] timestamp = time.asctime(rev['timestamp']) comment = rev['comment'].encode('utf8') if len(comment) == 0: comment = "blank" print "writing revision from", timestamp f = open(content_path, 'wb') f.write(content.encode('utf8')) f.close() commands.addremove(ui, repo) commands.commit(ui, repo, message=comment, user=rev['user'].encode('utf8'), date=timestamp)