Guest User

Untitled

a guest
Jul 15th, 2018
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.56 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3.  
  4. """
  5. നീക്കം ചെയ്ത പ്രമാണങ്ങൾ താളുകളിൽ നിന്നും ഒഴിവാക്കുന്നതിന്
  6. നിർമ്മിച്ചത്: സുനിൽ വി.എസ്.
  7. തിയതി: 2010-11-27
  8. """
  9.  
  10. import wikipedia
  11. import pagegenerators
  12. import unicodedata
  13. import codecs
  14. import re
  15. import pywikibot
  16.  
  17. imageNamespaces = [ur'file',ur'File', ur'image', ur'Image', ur'ചിത്രം', ur'പ്രമാണം']
  18.  
  19. #to replace all whitespaces in text to underscore
  20. def getGenericText(pText):
  21. return pText.replace(' ','_')
  22.  
  23. #to remove imagetext from pagetext
  24. def hideImageText(pText,pImageText,pOccurances):
  25. for i in range(pOccurances):
  26. myLocation=getGenericText(pText).find(pImageText)
  27. myStartLocation=0
  28. myEndLocation=0
  29. #If it is from gallery
  30. if pText[myLocation-1]=='\n':
  31. wikipedia.output('Removing from Gallery')
  32. myEndLocation=pText.find('\n',myLocation+1)
  33. myStartLocation=myLocation
  34. #If it is image inclusion
  35. elif pText[myLocation-2:myLocation]=='[[':
  36. wikipedia.output ('Removing image')
  37. myLPCount=0 #Left paranthesis count ( [ )
  38. myRPCount=0 #Right paranthesis count ( ] )
  39. j=2
  40. while True:
  41. if pText[myLocation+j]=='[':
  42. myLPCount+=1
  43. if pText[myLocation+j]==']':
  44. myRPCount+=1
  45. j+=1
  46. if myRPCount-myLPCount==2:
  47. break
  48. myEndLocation=myLocation+j
  49. myStartLocation=myLocation-2
  50. wikipedia.output(pText[myLocation-2:myEndLocation])
  51.  
  52. myString=pText[myStartLocation:myEndLocation]
  53. myStringPart1=pText[0:myStartLocation]
  54. myStringPart2=pText[myEndLocation+1:]
  55. pText=myStringPart1+'<!--' + myString + '\n-->\n' + myStringPart2
  56. return pText
  57.  
  58. #main program starts here
  59.  
  60. siteFamily = 'wikipedia'
  61. siteLangCode = 'ml'
  62. wikiSite = wikipedia.Site(code=siteLangCode, fam=siteFamily)
  63. log = codecs.open('logs/remove-deleted-images.log', mode='at', encoding = 'utf-8')
  64.  
  65. for myImage in pagegenerators.LogpagesPageGenerator(number=100,mode='delete',namespace=[6]):
  66. wikipedia.output('\n' + myImage.title())
  67. if not myImage.exists():
  68. try:
  69. if myImage.fileIsOnCommons():
  70. wikipedia.output('File is on Commons')
  71. except pywikibot.NoPage:
  72. wikipedia.output('File does not exists, removing from linked pages')
  73. for myLinkedPage in myImage.usingPages():
  74. if myLinkedPage.namespace()<>0: continue #skip if not main namespace
  75. wikipedia.output('Removing from ' + myLinkedPage.title())
  76. myText=myLinkedPage.get()
  77. for myImageNamespace in imageNamespaces:
  78. myImageTitle=myImage.titleWithoutNamespace()
  79. mySearchStringNormal=getGenericText(myImageNamespace + ':' + myImageTitle)
  80. mySearchStringLower=getGenericText(myImageNamespace + ':' + myImageTitle.replace(myImageTitle[0],myImageTitle[0].lower(),1)) #first letter might be lower
  81. wikipedia.output('Checking for ' + mySearchStringNormal + ' or ' + mySearchStringLower)
  82. myOccuranceNormal=getGenericText(myText).count(mySearchStringNormal)
  83. myOccuranceLower=0
  84. if mySearchStringNormal<>mySearchStringLower:
  85. myOccuranceLower=getGenericText(myText).count(mySearchStringLower)
  86. if myOccuranceNormal>0:
  87. myText=hideImageText(myText,mySearchStringNormal,myOccuranceNormal)
  88. if myOccuranceLower>0:
  89. myText=hideImageText(myText,mySearchStringLower,myOccuranceLower)
  90. myLinkedPage.put(myText,comment=ur'യന്ത്രം: നിലവിലില്ലാത്ത ' + myImageTitle.title() + ur' എന്ന ചിത്രം ഒഴിവാക്കുന്നു')
  91. else:
  92. wikipedia.output('File exists')
  93. wikipedia.stopme()
  94. log.flush()
  95. log.close()
Add Comment
Please, Sign In to add comment