Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # © 2011 Betacommand
- # used with permission => Addihockey10
- import catlib
- import codecs
- import image
- import pagegenerators
- import re
- import sys
- import time
- import wikipedia
- site = wikipedia.Site('commons',"commons")
- def main():
- # define a list of namespaces where we don't want to replace the file
- skipns = [1,2,3,5,7,9,11,13,15,101,109]
- # grab a list of already confirmed orphaned files that there is no need to re-check
- checked = get_lines('globalusage.log')
- # grab the list of files from a category on commons
- gen = pagegenerators.CategorizedPageGenerator(catlib.Category(site,u'Category:Vector version available'),start=u'600px Amaranto2.png')
- for page in gen:
- # check to ensure that the page we are working on is in fact an image and hasn't already been checked
- if page.namespace() == 6 and page.title() not in checked:
- wikipedia.output(page.title())
- # get the file discription page contents
- text = page.get()
- # look for the template and the new file name.
- new = re.search(ur'\{\{ ?(Vector version available|vva|NowSVG|SupersededSVG|SVG available|vectorversionavailable) ?\|(.*?)\}\}',text,re.I).group(2)
- # depending on how the template functions it sometimes includes "File:" or "Image:" go ahead and strip that out
- new = re.sub(ur'([Ff]ile|[Ii]mage):','',new,re.I)
- new_image = wikipedia.Page(site,u'File:%s' % new)
- # confirm that the new file actually exists
- if new_image.exists():
- # work on all uses of the file
- for pg in page.globalUsage():
- # excluding the skipped namespaces
- if pg.namespace() not in skipns and not re.search(ur'(WikiProject Chemistry\/Image|Graphic Lab\/|[Aa]rchive|Featured picture candidates|[Aa]rchivio)',pg.title(),re.I):
- wikipedia.output(pg.title())
- # send the work off to image.py to do the replacements one page at a time as it doesn't handle cross-wiki too well
- bot = image.ImageRobot([pg],page.titleWithoutNamespace(),re.sub(u'_',u' ',new),loose=True)
- bot.run()
- # add in a second check to avoid self edit conflicts due to replace.py's put_async feature
- while not wikipedia.page_put_queue.empty():
- time.sleep(1)
- # just to be safe lets wait two more seconds
- time.sleep(2)
- # now that we have finished with a file lets double check its an orphan
- list = [x for x in page.globalUsage()]
- list2 = []
- for item in list:
- if item.namespace() not in skipns:
- list2.append(item)
- # file hasnt been used so lets exclude it from all further checks
- if len(list2) == 0:
- log(u'\n%s' % page.title(),'globalusage.log')
- else:
- # for some reason the file wasn't correctly orphaned lets log it for further human review later
- log(u'\n%s' % page.title(),'globalusage_error.log')
- else:
- # the given target file doesnt exist. logging this odd case for human review
- log(u'\n%s' % page.title(),'globalusage_missing.log')
- # basic function to retrieve a file and convert it into a usable python list: each line is an item in the list
- def get_lines(file):
- try:
- f = codecs.open(file,'r', 'utf-8')
- items = f.read().split('\n')
- f.close()
- try:
- items.remove('')
- except:
- pass
- except:
- items = []
- return items
- # basic function for saving information to a file
- def log(text,file):
- f = codecs.open(file, 'a', 'utf-8')
- f.write(text)
- f.close()
- if __name__ == "__main__":
- try:
- main()
- finally:
- wikipedia.stopme()
Add Comment
Please, Sign In to add comment