Advertisement
Guest User

Untitled

a guest
Sep 23rd, 2019
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.30 KB | None | 0 0
  1. import os
  2.  
  3. nltkdata_wn = '/home/alvas/nltk_data/corpora/wordnet/'
  4. wn31 = "http://wordnetcode.princeton.edu/wn3.1.dict.tar.gz"
  5. if not os.path.exists(nltkdata_wn+'wn3.0'):
  6.     os.mkdir(nltkdata_wn+'wn3.0')
  7. os.system('mv '+nltkdata_wn+"* "+nltkdata_wn+"wn3.0/")
  8. if not os.path.exists('wn3.1.dict.tar.gz'):
  9.     os.system('wget '+wn31)
  10. os.system("tar zxf wn3.1.dict.tar.gz -C "+nltkdata_wn)
  11. os.system("mv "+nltkdata_wn+"dict/* "+nltkdata_wn)
  12. os.rmdir('/home/alvas/nltk_data/corpora/wordnet/dict')
  13.  
  14. # Creating lexnames file.
  15.  
  16. dbfiles = nltkdata_wn+'dbfiles'
  17. with open(nltkdata_wn+'lexnames', 'w') as fout:
  18.     for i,j in enumerate(sorted(os.listdir(dbfiles))):
  19.         pos = j.partition('.')[0]
  20.         if pos == "noun":
  21.             syncat = 1
  22.         elif pos == "verb":
  23.             syncat = 2
  24.         elif pos == "adj":
  25.             syncat = 3
  26.         elif pos == "adv":
  27.             syncat = 4
  28.         elif j == "cntlist":
  29.             syncat = "cntlist"
  30.         fout.write("\t".join([str(i).zfill(2),j,str(syncat)])+"\n")
  31.        
  32. from nltk.corpus import wordnet as wn
  33.  
  34. # Checking generated lexnames file.
  35. for i, line in enumerate(open('/home/alvas/nltk_data/corpora/wordnet/lexnames','r')):
  36.     index, lexname, _ = line.split()
  37.     ##print line.split(), int(index), i
  38.     assert int(index) == i
  39.    
  40. # Testing wordnet function.
  41. print wn.synsets('dog')
  42. for i in wn.all_synsets():
  43.     print i, i.pos(), i.definition()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement