Advertisement
Guest User

Untitled

a guest
Feb 16th, 2018
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.89 KB | None | 0 0
  1. # -*- coding: utf-8  -*-
  2. ########################
  3. ## AmpersandBot  code ##
  4. ##   by T. H. Kelly   ##
  5. ## aka  PinkAmpersand ##
  6. ########################
  7.  
  8. import pywikibot
  9. import requests
  10. import urllib.request
  11. from urllib.request import urlopen
  12. import json
  13. import unidecode
  14. from unidecode import unidecode
  15. import sys
  16.  
  17. max = 15000
  18. total = 0
  19.    
  20. # following part possibly still buggy. only thing that didn't
  21. # work in test run. has been modified since, in a way that
  22. # hopefully will work. not super important, but would like to fix.
  23.    
  24. def log(a,b,c):
  25.     with open("C:\\Users\\Tom\\Desktop\\python\\AmpersandBot\\" + a + ".txt", "a") as file:
  26.         file.write("[[" + b + "]] " + c + " \n")
  27.    
  28. # the function at the script's core
  29.    
  30. def Ukraine():
  31.     # parse layers of JSON to get to the links themselves
  32.     query = API_JSON.get("query")
  33.     backlinks = query.get("backlinks")
  34.     # the loop that does the actual heavy lifting
  35.     for link in backlinks:
  36.         title = link["title"]
  37.         site = pywikibot.Site("wikidata", "wikidata")
  38.         repo = site.data_repository()
  39.         item = pywikibot.ItemPage(repo, title)
  40.         ic = item.get()["claims"]
  41.         id = item.get()["descriptions"]
  42.         il = item.get()["labels"]
  43.         # various steps to weed through the ones we don't want and pick out the ones we do
  44.         if ic:
  45.             if "P31" in ic:
  46.                 p31val = str(ic["P31"][0].getTarget()) # get value of "instance of"
  47.                 p17val = str(ic["P17"][0].getTarget()) # "   "     "  "country"
  48.                 if p31val == "[[wikidata:Q21672098]]" and p17val == "[[wikidata:Q212]]":
  49.                     if id.get("en"):
  50.                         badDescs = ("Ukrainian village","village in Ukraine","village of Ukraine","administrative territorial entity of Ukraine")
  51.                         if id.get("en") in badDescs:
  52.                             updateItem(item,ic,il,title)
  53.                         else:
  54.                             print(title + " already properly described and labeled") # marker in cmd line for non-updated items
  55.                     else:
  56.                      updateItem(item,ic,il,title)
  57.                    
  58.                 else:
  59.                     log("P17errors",title,"")
  60.                     print("P17error: " + title)
  61.  
  62. def updateItem(item,ic,il,title):
  63.     def defineAs(a): # function for setting descriptions
  64.         item.editDescriptions(descriptions={'en': a}, summary=(u'added [en] description "' + a + '", using P17, P31, and P131 values'))
  65.     def labelAs(a): # function for setting labels
  66.         item.editLabels(labels={'en': a}, summary=(u'set [en] label to "' + a + '" based on automated romanization of Ukrainian label'))
  67.     if "P131" in ic: # does the item have a parent entity listed?
  68.         p131val = ic["P131"][0].getTarget() # the parent entity
  69.         if "en" in p131val.get()["labels"]: # does the entity have an English label?
  70.             p131label = p131val.get()["labels"]["en"] # the English label of the parent entity
  71.             level2c = p131val.claims # claims taken from the parent entity
  72.             if "P131" in level2c: # does *that* entity have a parent entity listed?
  73.                 level2val = level2c["P131"][0].getTarget() # the grandparent entity
  74.                 level3c = level2val.get()["claims"] # claims taken from the grandparent entity (doing it the other way doesn't work, for w/e reason)
  75.                 if str(level3c["P31"][0].getTarget()) == "[[wikidata:Q3348196]]": # is the grandparent entity an oblast of Ukraine
  76.                     if "en" in level2val.get()["labels"]: # and does it have an English label?
  77.                         level2label = level2val.get()["labels"]["en"]
  78.                         if total < max:
  79.                             global total
  80.                             total += 1
  81.                             defineAs("village in " + p131label + ", " + level2label + ", Ukraine")
  82.                             log("updates",title,"description") # for consultation after run
  83.                             print("updated " + title + " description (#" + str(total) + ")") # marker in cmd line for updated items
  84.                         else:
  85.                             sys.exit("total reached: " + str(max))
  86.                     else:
  87.                         log("lvl2noEn",title,"")
  88.                         print("lvl2noEn: " + title)
  89.                 else:
  90.                     log("notOblast",title,"")
  91.                     print ("notOblast" + title)
  92.             else:
  93.                 log("nolvl2",title,"")
  94.                 print("nolvl2: " + title)
  95.         else:
  96.             log("p131noEn",title,"")
  97.             print("p131noEn: " + title)
  98.     else:
  99.         log("noP131",title,"")
  100.         print("noP131: " + title)
  101.     if not "en" in il and "uk" in il:
  102.         try:
  103.             ukval = il["uk"]
  104.             ukroman = unidecode(ukval)
  105.             if total < max:
  106.                 global total
  107.                 total += 1
  108.                 labelAs(ukroman)
  109.                 log("updates",title,"label")
  110.                 print("updated " + title + " label (#" + str(total) + ")")
  111.             else:
  112.                 sys.exit("total reached: " + str(max))
  113.         except (pywikibot.exceptions.OtherPageSaveError, pywikibot.data.api.APIError, pywikibot.OtherPageSaveError):
  114.             log("dupeErrors",title,"")
  115.             print("dupeError: " + title)
  116.  
  117. # The framework of the script
  118.  
  119. # get JSON from API and make usable
  120. # TODO: move to predefined function, if it's not too hard.
  121. # would be useful for adapting script to other tasks
  122. blcont = "&"
  123. while blcont:
  124.     token = urlopen(u"https://www.wikidata.org/w/api.php?action=query&meta=tokens&format=json&type=login")
  125.     token_read = token.read()
  126.     token_str = str(token_read)
  127.     token_replace = token_str.replace("'",'"')
  128.     token_sr = token_replace.strip('b"').replace('}"','}')
  129.     token_j = json.loads(token_sr)
  130.     token_j2 = token_j["query"]["tokens"]["logintoken"]
  131.     login_url = u"https://www.wikidata.org/w/api.php?&action=clientlogin&username=AmpersandBot&password=diagonalcutters&loginreturnurl=http://tomkel.ly&logintoken="
  132.     def login():
  133.         urlopen(login_url + token_j2)
  134.     login()
  135.     API = urlopen(u"https://www.wikidata.org/w/api.php?action=query&list=backlinks&bltitle=Q21672098&bllimit=5000&indexpageids=&format=json" + blcont).read()
  136.     API_decode = API.decode(encoding="utf-8", errors="strict")
  137.     API_JSON = json.loads(API_decode)
  138.  
  139.     if API_JSON:
  140.         if "query" in API_JSON:
  141.             print ("query success")
  142.             Ukraine() # actually execute the whole damn thing
  143.         # load next batch of JSON results if extant
  144.         if "continue" in API_JSON:
  145.             cont = API_JSON.get("continue")
  146.             if cont["continue"] == "-||":
  147.                 blcont = "&continue=-||&blcontinue=" + cont["blcontinue"]
  148.                 print("loading next page of API")
  149.             else:
  150.                 sys.exit("API completely processed")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement