Advertisement
alvations

Snippets from XWN

Apr 29th, 2013
254
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.69 KB | None | 0 0
  1. import codecs
  2.  
  3. '''
  4. Snippets from xWN, a wordnet toy-tool developed with Open Multilingual WordNet http://www.casta-net.jp/~kuribayashi/multi/
  5. '''
  6.  
  7. # This function reads a .tab WN file and returns the WN in dic format.
  8. def readWNfile(wnfile, option="ss"):
  9.   reader = codecs.open(wnfile, "r", "utf8").readlines()
  10.   wn = {}
  11.   for l in reader:
  12.     if l[0] == "#": continue
  13.     if l.split("\t")[0][-1] == "n":
  14.       if option=="ss":
  15.         k = l.split("\t")[0] #ss as key
  16.         v = l.split("\t")[2][:-1] #word
  17.       else:
  18.         v = l.split("\t")[0] #ss as value
  19.         k = l.split("\t")[2][:-1] #word as key
  20.       try:
  21.         temp = wn[k]
  22.         wn[k] = temp + ";" + v
  23.       except KeyError:
  24.         wn[k] = v
  25.     else:
  26.       continue  
  27.   return wn
  28.  
  29. def createDicfrom2WNs(wnfile1, wnfile2, outfile=None, delimiter=None):
  30.   wn1 = readWNfile(wnfile1,"ss")
  31.   wn2 = readWNfile(wnfile2,"ss")
  32.   newdic = {}
  33.   # Load WNs' entries into dictionary.
  34.   for i in wn1:
  35.     if i in wn2:
  36.       if len(wn1[i]) or len(wn2[i]) is 1:
  37.         for j in wn1[i].split(";"):
  38.           print j, wn2[i]
  39.           newdic[j] = wn2[i]
  40.     # Configure output file.
  41.   outfile = wnfile1[-7:-4]+"-"+wnfile2[-7:-4]+".dic" if outfile==None else outfile
  42.   out = codecs.open(outfile,"w","utf8")
  43.   delimiter = "\t" if delimiter ==None else delimiter
  44.   # Loop through dictionary and output entries.
  45.   for k in sorted(newdic):
  46.     for v in sorted(newdic[k].split(";")):
  47.         print>>out, k + delimiter + v
  48.   return newdic
  49.  
  50. eng_wnfile = '/media/E418A6B618A686E0/xling/wordnet/wn-data-eng.tab'
  51. spa_wnfile = '/media/E418A6B618A686E0/xling/wordnet/wn-data-ind.tab'
  52.  
  53. createDicfrom2WNs(eng_wnfile,spa_wnfile, delimiter=" @ ")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement