SHARE
TWEET

wikiarticle2hg.py

a guest Feb 4th, 2012 86 Never
  1. # I hereby place this script into the Public Domain!
  2. import os, sys
  3. import time
  4.  
  5. import mwclient
  6.  
  7. import mercurial.ui
  8. from mercurial import localrepo
  9. from mercurial import commands
  10.  
  11. article = 'Love'
  12. #start_time = None
  13. start_time = '2011-01-01T00:00:00Z'
  14.  
  15. # set up mercurial repo
  16. ui = mercurial.ui.ui()
  17. repo_dir = article
  18. repo = localrepo.localrepository(ui, path=repo_dir, create = not os.path.isdir(repo_dir))
  19. #if not os.path.isdir(article):
  20. #       os.mkdir(article)
  21. #os.chdir(article)
  22. print "rep in", repo.root
  23. content_path = os.path.join(repo.root, article + '.wiki')
  24.  
  25. site = mwclient.Site('en.wikipedia.org')
  26. page = site.Pages[article]
  27.  
  28. for rev in page.revisions(start=start_time, limit=50,dir='newer', prop='ids|timestamp|flags|comment|user|content'):
  29.         content = rev['*']
  30.         timestamp = time.asctime(rev['timestamp'])
  31.         comment = rev['comment'].encode('utf8')
  32.         if len(comment) == 0: comment = "blank"
  33.         print "writing revision from", timestamp
  34.        
  35.         f = open(content_path, 'wb')
  36.         f.write(content.encode('utf8'))
  37.         f.close()
  38.  
  39.         commands.addremove(ui, repo)
  40.         commands.commit(ui, repo, message=comment, user=rev['user'].encode('utf8'), date=timestamp)
RAW Paste Data
Top