Advertisement
Wauteurz

WV BannerBot adapted, redacted

Jan 16th, 2018
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.66 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. # AUTHOR: Kizar ( *redacted* ); Adapted by Wauteurz ( *redacted* )
  3. # To purge category pages, specifically Voy:en:Category:Banner missing from Wikidata
  4. # python pwb.py touch -lang:en -family:wikivoyage -cat:"Banner missing from Wikidata"
  5. # python pwb.py newitem -lang:en -family:wikivoyage -namespace:0 -pageage:0 -lastedit:0 -cat:"Banner missing from Wikidata"
  6.  
  7. import sys, re, time
  8.  
  9. import pwb # only needed if you haven't installed the framework as side-package
  10. import pywikibot
  11. from pywikibot import pagegenerators
  12.  
  13. clean = re.compile('\\s*(.*\\S)?\\s*')
  14.  
  15. def has_regexp(article, regexp):
  16. old = re.compile(regexp, re.UNICODE | re.DOTALL) # Important, but do not replace . with any other character
  17. tmp_article = pywikibot.replaceExcept(article, old, r'88888888888888888888999998888888888888888888', [])
  18. if article != tmp_article:
  19. return True
  20. else:
  21. return False
  22.  
  23. def replace_regexp(article, regexp, chang):
  24. old = re.compile(regexp, re.UNICODE | re.DOTALL) # Important, but do not replace . with any other character
  25. return pywikibot.replaceExcept(article, old, chang, [])
  26.  
  27. def search_regexp(article, regexp):
  28. old = re.compile(regexp, re.UNICODE | re.DOTALL) # Important, but do not replace . with any other character
  29. return re.search(old, article)
  30.  
  31. def is_commons_img(img): # Returns true if image is in commons
  32. img_page = pywikibot.ImagePage(pywikibot.Site('commons', 'commons'), u'Image:'+img)
  33. if img_page.exists():
  34. return True
  35. else:
  36. return False
  37.  
  38. site = pywikibot.Site('en', 'wikivoyage') # Any site will work, this is just an example.
  39. repo = pywikibot.Site("wikidata", "wikidata").data_repository() # This is a DataSite object
  40.  
  41. cat = pywikibot.Category(site, 'Category:Banner missing from Wikidata')
  42. gen = pagegenerators.CategorizedPageGenerator(cat)
  43. for page in gen:
  44. if page.isRedirectPage() or page.isDisambig():
  45. continue
  46. pywikibot.output(page.title())
  47.  
  48. try:
  49. item = pywikibot.ItemPage.fromPage(page) # This can be used for any page object
  50. except:
  51. print("!!! Doesn't exist in Wikidata")
  52. continue
  53. item.get() # You need to call it to access any data.
  54.  
  55. if not 'P948' in item.claims:
  56. content = page.text
  57.  
  58. if has_regexp(content, r'\{\{\s*[Pp]agebanner\s*\|\s*([^|}]+)\s*[|}]' ) == True: # When has correct image
  59. # Check if the image is in Commons
  60. photo = search_regexp(content, r'\{\{\s*[Pp]agebanner\s*\|\s*([^|}]+)\s*[|}]' ).group(1)
  61. photo = clean.match(photo).group(1) # Clean the weird stuff
  62. pywikibot.output(photo)
  63. if photo != 'Pagebanner default.jpg' and photo != 'Disambiguation banner.png' and photo != 'TT Banner.jpg' and photo != 'Mena-asia_default_banner.jpg' and != 'Other Destinations Wikivoyage banner.jpg' and != 'Welcome banner.jpg' and != 'Australia-oceania default banner.jpg' and != 'Carribean default banner.jpg' and != 'Disambiguation banner1.jpg' and != 'Itinerary banner.jpg' and != 'NZ default banner.jpg' and != 'S-amer africa default banner.jpg' and != 'TT Banner.jpg' and != 'Whole world North gy.jpg':
  64. #if is_commons_img(photo):
  65. img_page = pywikibot.ImagePage(pywikibot.Site('commons', 'commons'), u'Image:'+photo)
  66. if img_page.exists():
  67. claim = pywikibot.Claim(repo, 'P948')
  68. claim.setTarget(img_page)
  69. item.addClaim(claim)
  70. else:
  71. print("!!! Doesn't exist in commons")
  72.  
  73. # Purging commands above
  74. page.save() # Purging cache
  75. time.sleep(2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement