Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- """
- നീക്കം ചെയ്ത പ്രമാണങ്ങൾ താളുകളിൽ നിന്നും ഒഴിവാക്കുന്നതിന്
- നിർമ്മിച്ചത്: സുനിൽ വി.എസ്.
- തിയതി: 2010-11-27
- """
- import wikipedia
- import pagegenerators
- import unicodedata
- import codecs
- import re
- import pywikibot
- imageNamespaces = [ur'file',ur'File', ur'image', ur'Image', ur'ചിത്രം', ur'പ്രമാണം']
- #to replace all whitespaces in text to underscore
- def getGenericText(pText):
- return pText.replace(' ','_')
- #to remove imagetext from pagetext
- def hideImageText(pText,pImageText,pOccurances):
- for i in range(pOccurances):
- myLocation=getGenericText(pText).find(pImageText)
- myStartLocation=0
- myEndLocation=0
- #If it is from gallery
- if pText[myLocation-1]=='\n':
- wikipedia.output('Removing from Gallery')
- myEndLocation=pText.find('\n',myLocation+1)
- myStartLocation=myLocation
- #If it is image inclusion
- elif pText[myLocation-2:myLocation]=='[[':
- wikipedia.output ('Removing image')
- myLPCount=0 #Left paranthesis count ( [ )
- myRPCount=0 #Right paranthesis count ( ] )
- j=2
- while True:
- if pText[myLocation+j]=='[':
- myLPCount+=1
- if pText[myLocation+j]==']':
- myRPCount+=1
- j+=1
- if myRPCount-myLPCount==2:
- break
- myEndLocation=myLocation+j
- myStartLocation=myLocation-2
- wikipedia.output(pText[myLocation-2:myEndLocation])
- myString=pText[myStartLocation:myEndLocation]
- myStringPart1=pText[0:myStartLocation]
- myStringPart2=pText[myEndLocation+1:]
- pText=myStringPart1+'<!--' + myString + '\n-->\n' + myStringPart2
- return pText
- #main program starts here
- siteFamily = 'wikipedia'
- siteLangCode = 'ml'
- wikiSite = wikipedia.Site(code=siteLangCode, fam=siteFamily)
- log = codecs.open('logs/remove-deleted-images.log', mode='at', encoding = 'utf-8')
- for myImage in pagegenerators.LogpagesPageGenerator(number=100,mode='delete',namespace=[6]):
- wikipedia.output('\n' + myImage.title())
- if not myImage.exists():
- try:
- if myImage.fileIsOnCommons():
- wikipedia.output('File is on Commons')
- except pywikibot.NoPage:
- wikipedia.output('File does not exists, removing from linked pages')
- for myLinkedPage in myImage.usingPages():
- if myLinkedPage.namespace()<>0: continue #skip if not main namespace
- wikipedia.output('Removing from ' + myLinkedPage.title())
- myText=myLinkedPage.get()
- for myImageNamespace in imageNamespaces:
- myImageTitle=myImage.titleWithoutNamespace()
- mySearchStringNormal=getGenericText(myImageNamespace + ':' + myImageTitle)
- mySearchStringLower=getGenericText(myImageNamespace + ':' + myImageTitle.replace(myImageTitle[0],myImageTitle[0].lower(),1)) #first letter might be lower
- wikipedia.output('Checking for ' + mySearchStringNormal + ' or ' + mySearchStringLower)
- myOccuranceNormal=getGenericText(myText).count(mySearchStringNormal)
- myOccuranceLower=0
- if mySearchStringNormal<>mySearchStringLower:
- myOccuranceLower=getGenericText(myText).count(mySearchStringLower)
- if myOccuranceNormal>0:
- myText=hideImageText(myText,mySearchStringNormal,myOccuranceNormal)
- if myOccuranceLower>0:
- myText=hideImageText(myText,mySearchStringLower,myOccuranceLower)
- myLinkedPage.put(myText,comment=ur'യന്ത്രം: നിലവിലില്ലാത്ത ' + myImageTitle.title() + ur' എന്ന ചിത്രം ഒഴിവാക്കുന്നു')
- else:
- wikipedia.output('File exists')
- wikipedia.stopme()
- log.flush()
- log.close()
Add Comment
Please, Sign In to add comment