Share Pastebin
Guest
Public paste!

The Earwig

By: a guest | Mar 16th, 2010 | Syntax: Python | Size: 4.63 KB | Hits: 42 | Expires: Never
Copy text to clipboard
  1. import wikipedia, urllib, urllib2, codecs, os, re, string, time, simplejson, traceback
  2.  
  3. def main():
  4.     print """
  5. $$$$$$\ $$\                                    $$$$$$$\            $$\    
  6. $$  __$$\ $$ |                                    $$  __$$\           $$ |    
  7. $$ /  \__|$$$$$$$\  $$$$$$\  $$$$$$\  $$$$$$\ $$ |  $$ | $$$$$$\ $$$$$$\  
  8. \$$$$$$\ $$  __$$\ $$  __$$\ $$  __$$\ $$  __$$\ $$$$$$$\ |$$  __$$\\_$$  _|  
  9. \____$$\ $$ |  $$ |$$$$$$$$ |$$$$$$$$ |$$ /  $$ |$$  __$$\ $$ /  $$ | $$ |    
  10. $$\  $$ |$$ |  $$ |$$   ____|$$   ____|$$ |  $$ |$$ |  $$ |$$ |  $$ | $$ |$$\
  11. \$$$$$$  |$$ |  $$ |\$$$$$$$\ \$$$$$$$\ $$$$$$$  |$$$$$$$  |\$$$$$$  | \$$$$  |
  12. \______/ \__|  \__| \_______| \_______|$$  ____/ \_______/  \______/   \____/
  13.                                        $$ |                                  
  14.                                        $$ |                                  
  15.                                        \__|                                   """
  16.     print "\nv0.4\n"
  17.     while 1:
  18.         run()
  19.         print "Run complete, sleeping for one hour..."
  20.         time.sleep(3600)
  21.  
  22. def run():
  23.     wikipedia.output(u"Retrieving pages list...")
  24.     data = urllib2.urlopen("http://toolserver.org/~earwig/reports/enwiki/deadend_pages.txt").read() # Get the list of pages.
  25.     wikipedia.output(u"Page list retrieved, working on pages.")
  26.     site = wikipedia.getSite()
  27.     for pagename in string.split(data, "\n"): # For each page in the list.
  28.         pagename = pagename[2:-2]
  29.         if pagename:
  30.             pagedata = wikipedia.Page(site, pagename)
  31.         else:
  32.             continue
  33.         try:
  34.             process(pagedata) # Process it.
  35.         except wikipedia.Error:
  36.             wikipedia.output(u"Could not access page, it was probably deleted.")
  37.         checked(pagedata.aslink())
  38.  
  39. def process(page):
  40.     wikipedia.output(u"\nWorking on page %s." % page.aslink())
  41.     if ifChecked(page.aslink()):
  42.         wikipedia.output(u"Page has already been checked.")
  43.         return
  44.     content = page.get()
  45.     if not page.botMayEdit(username="SheepBot"):
  46.         wikipedia.output(u"Bot is not allowed to edit this page, skipping.")
  47.         return
  48.     tempy = re.search("\{\{((dead(_| )?end)|(internal(_| )?links)|(wi?k?i?fy?)|(de?(p|b))|((article|multiple|)( |_)?issues)|(ai)|(needs(_| )links))", string.lower(content))
  49.     if tempy:
  50.         wikipedia.output(u"Found wikify/dead end template in page, stopping.")
  51.         return
  52.     if "[[" not in content and "{{" not in content:
  53.         wikipedia.output(u"No links or templates found in page; adding dead end template.")
  54.         addTemplate(page)
  55.         return
  56.     try:
  57.         links = queryLinks(page.title())
  58.     except Exception:
  59.         traceback.print_exc()
  60.         wikipedia.output(u"Cannot read API query.")
  61.         return
  62.     if not links:
  63.         wikipedia.output(u"No non/category/image links found in page. Adding template.")
  64.         addTemplate(page)
  65.         return
  66.     else:
  67.         wikipedia.output(u"Links found from API are actual links. Stopping.")
  68.         return
  69.     wikipedia.output(u"Links found. Stopping.")
  70.     return
  71.  
  72. def checked(page):
  73.     f = codecs.open("edited_pages.txt", "a", 'utf-8')
  74.     f.write("\n%s" % page)
  75.     f.close()
  76.  
  77. def ifChecked(page):
  78.     f = codecs.open("edited_pages.txt", "r", 'utf-8')
  79.     checked_pages = f.read()
  80.     f.close()
  81.     if page in checked_pages:
  82.         return True
  83.     return False
  84.  
  85. def queryLinks(title):
  86.     params = {'action':'query', 'prop':'links', 'format':'json', 'pllimit':500, 'plnamespace':0}
  87.     params['titles'] = title
  88.     data = urllib.urlencode(params)
  89.     raw = urllib2.urlopen("http://en.wikipedia.org/w/api.php", data)
  90.     res = simplejson.loads(raw.read())
  91.     pageid = res['query']['pages'].keys()[0]
  92.     try:
  93.         links = res['query']['pages'][pageid]['links']
  94.         print links
  95.         return True
  96.     except KeyError:
  97.         return False
  98.  
  99. def addTemplate(page):
  100.     content = page.get()
  101.     newpage = "{{subst:dated|Dead end}}\n" + content
  102.     comment = "([[WP:BOT|Bot]]): Adding {{dead end}} template to article because of lack of wikilinks."
  103.     try:
  104.         wikipedia.showDiff(content, newpage)
  105.         page.put(newpage, comment)
  106.     except wikipedia.LockedPage:
  107.         wikipedia.output(u"Page protected, unable to save.")
  108.     except wikipedia.PageNotSaved:
  109.         wikipedia.output(u"Page unable to be saved.")
  110.     except wikipedia.Error:
  111.         wikipedia.output(u"MALFUNCTION, MALFUNCTION!")
  112.     wikipedia.output("Page saved successfully.")
  113.  
  114. if __name__ == '__main__':
  115.     try:
  116.         main()
  117.     finally:
  118.         wikipedia.stopme()