Advertisement
Guest User

wikiarticle2hg.py

a guest
Feb 4th, 2012
468
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.15 KB | None | 0 0
  1. # I hereby place this script into the Public Domain!
  2. import os, sys
  3. import time
  4.  
  5. import mwclient
  6.  
  7. import mercurial.ui
  8. from mercurial import localrepo
  9. from mercurial import commands
  10.  
  11. article = 'Love'
  12. #start_time = None
  13. start_time = '2011-01-01T00:00:00Z'
  14.  
  15. # set up mercurial repo
  16. ui = mercurial.ui.ui()
  17. repo_dir = article
  18. repo = localrepo.localrepository(ui, path=repo_dir, create = not os.path.isdir(repo_dir))
  19. #if not os.path.isdir(article):
  20. #   os.mkdir(article)
  21. #os.chdir(article)
  22. print "rep in", repo.root
  23. content_path = os.path.join(repo.root, article + '.wiki')
  24.  
  25. site = mwclient.Site('en.wikipedia.org')
  26. page = site.Pages[article]
  27.  
  28. for rev in page.revisions(start=start_time, limit=50,dir='newer', prop='ids|timestamp|flags|comment|user|content'):
  29.     content = rev['*']
  30.     timestamp = time.asctime(rev['timestamp'])
  31.     comment = rev['comment'].encode('utf8')
  32.     if len(comment) == 0: comment = "blank"
  33.     print "writing revision from", timestamp
  34.    
  35.     f = open(content_path, 'wb')
  36.     f.write(content.encode('utf8'))
  37.     f.close()
  38.  
  39.     commands.addremove(ui, repo)
  40.     commands.commit(ui, repo, message=comment, user=rev['user'].encode('utf8'), date=timestamp)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement