Untitled

# © 2011 Betacommand
# used with permission => Addihockey10
import catlib
import codecs
import image
import pagegenerators
import re
import sys
import time
import wikipedia
site = wikipedia.Site('commons',"commons")


def main():
  # define a list of namespaces where we don't want to replace the file
  skipns = [1,2,3,5,7,9,11,13,15,101,109]
  # grab a list of already confirmed orphaned files that there is no need to re-check
  checked = get_lines('globalusage.log')
  # grab the list of files from a category on commons
  gen = pagegenerators.CategorizedPageGenerator(catlib.Category(site,u'Category:Vector version available'),start=u'600px Amaranto2.png')
  for page in gen:
    # check to ensure that the page we are working on is in fact an image and hasn't already been checked
    if page.namespace() == 6 and page.title() not in checked:
      wikipedia.output(page.title())
      # get the file discription page contents
      text = page.get()
      # look for the template and the new file name.
      new = re.search(ur'\{\{ ?(Vector version available|vva|NowSVG|SupersededSVG|SVG available|vectorversionavailable) ?\|(.*?)\}\}',text,re.I).group(2)
      # depending on how the template functions it sometimes includes "File:" or "Image:" go ahead and strip that out
      new = re.sub(ur'([Ff]ile|[Ii]mage):','',new,re.I)
      new_image = wikipedia.Page(site,u'File:%s' % new)
      # confirm that the new file actually exists
      if new_image.exists():
        # work on all uses of the file
        for pg in page.globalUsage():
          # excluding the skipped namespaces
          if pg.namespace() not in skipns and not re.search(ur'(WikiProject Chemistry\/Image|Graphic Lab\/|[Aa]rchive|Featured picture candidates|[Aa]rchivio)',pg.title(),re.I):
            wikipedia.output(pg.title())
            # send the work off to image.py to do the replacements one page at a time as it doesn't handle cross-wiki too well
            bot = image.ImageRobot([pg],page.titleWithoutNamespace(),re.sub(u'_',u' ',new),loose=True)
            bot.run()
            # add in a second check to avoid self edit conflicts due to replace.py's put_async feature
            while not wikipedia.page_put_queue.empty():
              time.sleep(1)
            # just to be safe lets wait two more seconds
            time.sleep(2)
        # now that we have finished with a file lets double check its an orphan
        list = [x for x in page.globalUsage()]
        list2 = []
        for item in list:
          if item.namespace() not in skipns:
              list2.append(item)
        # file hasnt been used so lets exclude it from all further checks
        if len(list2) == 0:
          log(u'\n%s' % page.title(),'globalusage.log')
        else:
          # for some reason the file wasn't correctly orphaned lets log it for further human review later
          log(u'\n%s' % page.title(),'globalusage_error.log')
      else:
        # the given target file doesnt exist. logging this odd case for human review
        log(u'\n%s' % page.title(),'globalusage_missing.log')

# basic function to retrieve a file and convert it into a usable python list: each line is an item in the list
def get_lines(file):
  try:
    f = codecs.open(file,'r', 'utf-8')
    items = f.read().split('\n')
    f.close()
    try:
      items.remove('')
    except:
      pass
  except:
    items = []
  return items

# basic function for saving information to a file
def log(text,file):
  f = codecs.open(file, 'a', 'utf-8')
  f.write(text)
  f.close()

if __name__ == "__main__":
  try:
    main()
  finally:
    wikipedia.stopme()