Advertisement
Guest User

Untitled

a guest
Aug 18th, 2017
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.46 KB | None | 0 0
  1. '''
  2. Created on 8/17/17
  3. Sample code for
  4. @author: Noushin
  5. '''
  6.  
  7. class Line:
  8.   def __init__(self, wordIndex, token, lemma, POS, NER, head, depRel):
  9.       self.wordIndex = wordIndex
  10.       self.token = token
  11.       self.lemma =lemma
  12.       self.POS = POS
  13.       self.NER = NER
  14.       self.head = head
  15.       self.depRel = depRel
  16.      
  17.      
  18.      
  19.      
  20.  
  21.  
  22. def selectional_preferencer(indata, out_dict):
  23.     with open(indata, "r") as file:
  24.         lines = file.readlines()
  25.     output = open('{0}_output.txt'.format(out_dict), 'w')
  26.  
  27.     verbs = set(["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"])
  28.     subjects = set(["nsubj", "nsubjpass", "csubj", "csubjpass"])
  29.     sentence_index = 1
  30.     verb_dict_temp = {}
  31.     verb_obj = []
  32.     main_dict = {}
  33.     for line in lines:
  34.         if not line.strip():
  35.  
  36.             if verb_obj:
  37.                 main_dict[sentence_index] = verb_obj
  38.             verb_dict_temp.clear()
  39.             verb_obj = []
  40.             sentence_index += 1
  41.             continue
  42.         else:
  43.  
  44.             word_index = line.strip().split("\t")[0]
  45.             lemma = line.strip().split("\t")[2]
  46.             if line.strip().split("\t")[3] in verbs:
  47.                 verb_dict_temp[int(word_index)] = lemma
  48.                 # verb_obj.append(lemma+": verb")
  49.             if line.strip().split("\t")[-1] == "dobj":
  50.                 if line.strip().split("\t")[4] == "O" and line.strip().split("\t")[3] != "PRP":
  51.                     dobj = line.strip().split("\t")[1]
  52.                     index_v = int(line.strip().split("\t")[-2])
  53.                     try:
  54.                         verb_of_obj = verb_dict_temp[index_v]
  55.                         verb_obj.append(verb_of_obj + ": object : " + dobj)
  56.                     except:  # since sometimes the index does not exist in verb dictionary
  57.                         pass
  58.  
  59.                 elif line.strip().split("\t")[4] != "O":
  60.                     NER_type = line.strip().split("\t")[4]
  61.                     index_ner = int(line.strip().split("\t")[-2])
  62.                     try:
  63.                         verb_of_obj = verb_dict_temp[index_ner]
  64.                         # verb_obj.append(str(verb_of_obj + ":" + NER_type))
  65.                         verb_obj.append(verb_of_obj + ":" + NER_type)
  66.                     except:
  67.                         pass
  68.  
  69.             if line.strip().split("\t")[-1] in subjects:
  70.                 if line.strip().split("\t")[4] == "O" and line.strip().split("\t")[3] != "PRP":
  71.                     dobj = line.strip().split("\t")[1]
  72.                     index_v = int(line.strip().split("\t")[-2])
  73.                     try:
  74.                         verb_of_obj = verb_dict_temp[index_v]
  75.                         verb_obj.append(verb_of_obj + ": subject : " + dobj)
  76.                     except:  # since sometimes the index does not exist in verb dictionary
  77.                         pass
  78.  
  79.                 elif line.strip().split("\t")[4] != "O":
  80.                     NER_type = line.strip().split("\t")[4]
  81.                     index_ner = int(line.strip().split("\t")[-2])
  82.                     try:
  83.                         verb_of_obj = verb_dict_temp[index_ner]
  84.                         # verb_obj.append(str(verb_of_obj + ":" + NER_type))
  85.                         verb_obj.append(verb_of_obj + ":" + NER_type)
  86.                     except:
  87.                         pass
  88.  
  89.     output.write(str(main_dict))
  90.     output.close()
  91.     return (main_dict)
  92.  
  93.  
  94. selectional_preferencer("simple_test.conll", "select")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement