Advertisement
Guest User

Untitled

a guest
Feb 25th, 2020
143
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.33 KB | None | 0 0
  1. from collections import defaultdict
  2.  
  3. def make_http_query(url, query_params):
  4.  
  5. try:
  6. req = requests.get(url, params=query_params)
  7. req.raise_for_status()
  8. return req
  9. except HTTPError as http_err:
  10. print("404: Data not found")
  11. except Exception as e:
  12. print(e)
  13. return None
  14.  
  15.  
  16. def query_wikidata_for_lemma(lemma, hyponym=False):
  17. h_nyms = defaultdict(float)
  18. lemma_senses = get_lemma_ids(lemma)
  19. for sense_id in lemma_senses:
  20. nym_counts = get_lemma_nyms(sense_id, hyponym)
  21. for x in nym_counts.keys():
  22. h_nyms[x.lower()] += nym_counts[x]
  23. return h_nyms
  24.  
  25. def get_lemma_ids(lemma):
  26. """Returns list of word IDs for each sense of a word.
  27.  
  28. Ex:
  29. get_lemma_ids('iphone')
  30. >>> ['Q2766', 'Q621427', 'Q1577319', 'Q48493', 'Q14850354', 'Q26831164', 'Q61504']
  31.  
  32. :param lemma: str, word/lemma to search for.
  33. :return: List[str]
  34. """
  35. lemma = lemma.lower()
  36. wiki_url = "https://www.wikidata.org/w/api.php"
  37. params = {'language':'en','format': 'json', 'action':'wbsearchentities', 'search':"{}".format(lemma)}
  38. req = make_http_query(wiki_url, params)
  39. if not req or not req.content:
  40. return []
  41. lemma_infos = json.loads(req.content)['search']
  42. lemma_senses = [sense['id'] for sense in lemma_infos if sense['label'].lower() == lemma]
  43. return lemma_senses
  44.  
  45. def get_lemma_nyms(lemma_id, hyponym):
  46. """Returns list of hypernyms/hyponyms for a particular lemma_id.
  47.  
  48. Ex:
  49. get_lemma_ids('Q2766') # iphone
  50. >>> ['smartphone', 'mobile phone', 'mobile computer', 'iPhone', 'smart device', 'information appliance']
  51. get_lemma_ids('Q2766', hypernym=True) # iphone
  52. >>> ['iPhone 6s Plus', 'iPhone 5S', 'iPhone X', ..., iPhone 5c', 'Apple iPhone 7 128GB Jet Black']
  53.  
  54. :param lemma: str, word/lemma to search for.
  55. :param hypernym: bool, if true return hypernyms (subclass lemmas)
  56. :return: List[str]
  57. """
  58. direction = 'Reverse' if hyponym else 'Forward'
  59. wiki_url = "https://query.wikidata.org/sparql"
  60. query_struct = """PREFIX gas: <http://www.bigdata.com/rdf/gas#>
  61. SELECT DISTINCT ?item1Label ?depth1
  62. WHERE {
  63. {
  64. SELECT ?item1 ?item2 ?depth1
  65. WHERE {
  66. SERVICE gas:service {
  67. gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS";
  68. gas:in wd:%s;
  69. gas:traversalDirection "%s" ;
  70. gas:out ?item1 ;
  71. gas:out1 ?depth1 ;
  72. gas:out2 ?item2 ;
  73. gas:linkType wdt:P279 ;
  74. }
  75. }
  76. }
  77. SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
  78. }
  79. """
  80. query = query_struct % (lemma_id, direction)
  81. req = requests.get(wiki_url, params={'format': 'json', 'query': query})
  82. if not req or not req.content:
  83. return {}
  84. query_results = json.loads(req.content)['results']
  85. query_bindings = query_results['bindings'] if 'bindings' in query_results else []
  86. if not query_bindings:
  87. return {}
  88. nyms = dict()
  89. for i in range(0, len(query_bindings), 2):
  90. depth = int(query_bindings[i]['depth1']['value'])
  91. item1Label = query_bindings[i]['item1Label']['value']
  92. nyms[item1Label] = depth
  93. return nyms
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement