Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from collections import defaultdict
- def make_http_query(url, query_params):
- try:
- req = requests.get(url, params=query_params)
- req.raise_for_status()
- return req
- except HTTPError as http_err:
- print("404: Data not found")
- except Exception as e:
- print(e)
- return None
- def query_wikidata_for_lemma(lemma, hyponym=False):
- h_nyms = defaultdict(float)
- lemma_senses = get_lemma_ids(lemma)
- for sense_id in lemma_senses:
- nym_counts = get_lemma_nyms(sense_id, hyponym)
- for x in nym_counts.keys():
- h_nyms[x.lower()] += nym_counts[x]
- return h_nyms
- def get_lemma_ids(lemma):
- """Returns list of word IDs for each sense of a word.
- Ex:
- get_lemma_ids('iphone')
- >>> ['Q2766', 'Q621427', 'Q1577319', 'Q48493', 'Q14850354', 'Q26831164', 'Q61504']
- :param lemma: str, word/lemma to search for.
- :return: List[str]
- """
- lemma = lemma.lower()
- wiki_url = "https://www.wikidata.org/w/api.php"
- params = {'language':'en','format': 'json', 'action':'wbsearchentities', 'search':"{}".format(lemma)}
- req = make_http_query(wiki_url, params)
- if not req or not req.content:
- return []
- lemma_infos = json.loads(req.content)['search']
- lemma_senses = [sense['id'] for sense in lemma_infos if sense['label'].lower() == lemma]
- return lemma_senses
- def get_lemma_nyms(lemma_id, hyponym):
- """Returns list of hypernyms/hyponyms for a particular lemma_id.
- Ex:
- get_lemma_ids('Q2766') # iphone
- >>> ['smartphone', 'mobile phone', 'mobile computer', 'iPhone', 'smart device', 'information appliance']
- get_lemma_ids('Q2766', hypernym=True) # iphone
- >>> ['iPhone 6s Plus', 'iPhone 5S', 'iPhone X', ..., iPhone 5c', 'Apple iPhone 7 128GB Jet Black']
- :param lemma: str, word/lemma to search for.
- :param hypernym: bool, if true return hypernyms (subclass lemmas)
- :return: List[str]
- """
- direction = 'Reverse' if hyponym else 'Forward'
- wiki_url = "https://query.wikidata.org/sparql"
- query_struct = """PREFIX gas: <http://www.bigdata.com/rdf/gas#>
- SELECT DISTINCT ?item1Label ?depth1
- WHERE {
- {
- SELECT ?item1 ?item2 ?depth1
- WHERE {
- SERVICE gas:service {
- gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS";
- gas:in wd:%s;
- gas:traversalDirection "%s" ;
- gas:out ?item1 ;
- gas:out1 ?depth1 ;
- gas:out2 ?item2 ;
- gas:linkType wdt:P279 ;
- }
- }
- }
- SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
- }
- """
- query = query_struct % (lemma_id, direction)
- req = requests.get(wiki_url, params={'format': 'json', 'query': query})
- if not req or not req.content:
- return {}
- query_results = json.loads(req.content)['results']
- query_bindings = query_results['bindings'] if 'bindings' in query_results else []
- if not query_bindings:
- return {}
- nyms = dict()
- for i in range(0, len(query_bindings), 2):
- depth = int(query_bindings[i]['depth1']['value'])
- item1Label = query_bindings[i]['item1Label']['value']
- nyms[item1Label] = depth
- return nyms
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement