Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from nltk.corpus import wordnet as wn
- from collections import defaultdict
- from nltk.corpus.reader.wordnet import Synset
- import cPickle as pickle
- def synset2pos(x):
- return str(x.offset)+'-'+x.pos
- try:
- wordnet = pickle.load(open('wn.pk','rb'))
- except:
- wordnet = defaultdict(dict)
- for i in wn.all_synsets():
- offset= synset2pos(i)
- hyper = [synset2pos(j) for j in i.hypernyms()]
- hypo = [synset2pos(j) for j in i.hyponyms()]
- wordnet[offset] = {'hypers':hyper,'hypos':hypo, 'lemmas':i.lemma_names}
- with open('wn.pk','wb') as fout:
- pickle.dump(wordnet,fout)
- wordnet['99999999-n'] = {'hypers':['12345678-n','23456789-n'],
- 'hypos':['98765432-n','87654321-n'],
- 'lemmas': ['foobar','barfoo']}
- wordnet['98765432-n'] = {'hypers':['99999999-n','23456789-n'],
- 'hypos':['87654321-n'],
- 'lemmas': ['barbar']}
- wordnet['87654321-n'] = {'hypers':['98765432-n','23456789-n'],
- 'hypos':[],
- 'lemmas': ['blacksheep']}
- # To access synsets with query words.
- query = 'dog'
- print [i for i in wordnet if query in wordnet[i]['lemmas']]
- print
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement