Want more features on Pastebin? Sign Up, it's FREE!
Guest

wikiarticle2hg.py

By: a guest on Feb 4th, 2012  |  syntax: Python  |  size: 1.15 KB  |  views: 76  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. # I hereby place this script into the Public Domain!
  2. import os, sys
  3. import time
  4.  
  5. import mwclient
  6.  
  7. import mercurial.ui
  8. from mercurial import localrepo
  9. from mercurial import commands
  10.  
  11. article = 'Love'
  12. #start_time = None
  13. start_time = '2011-01-01T00:00:00Z'
  14.  
  15. # set up mercurial repo
  16. ui = mercurial.ui.ui()
  17. repo_dir = article
  18. repo = localrepo.localrepository(ui, path=repo_dir, create = not os.path.isdir(repo_dir))
  19. #if not os.path.isdir(article):
  20. #       os.mkdir(article)
  21. #os.chdir(article)
  22. print "rep in", repo.root
  23. content_path = os.path.join(repo.root, article + '.wiki')
  24.  
  25. site = mwclient.Site('en.wikipedia.org')
  26. page = site.Pages[article]
  27.  
  28. for rev in page.revisions(start=start_time, limit=50,dir='newer', prop='ids|timestamp|flags|comment|user|content'):
  29.         content = rev['*']
  30.         timestamp = time.asctime(rev['timestamp'])
  31.         comment = rev['comment'].encode('utf8')
  32.         if len(comment) == 0: comment = "blank"
  33.         print "writing revision from", timestamp
  34.        
  35.         f = open(content_path, 'wb')
  36.         f.write(content.encode('utf8'))
  37.         f.close()
  38.  
  39.         commands.addremove(ui, repo)
  40.         commands.commit(ui, repo, message=comment, user=rev['user'].encode('utf8'), date=timestamp)
clone this paste RAW Paste Data