Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- this is just a suggestion on how it should work
- '''
- def selectional_preferencer(indata, out_dict):
- with open(indata, "r") as file:
- lines = file.readlines()
- verbs = set(["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"])
- subjects = set(["nsubj", "nsubjpass", "csubj", "csubjpass"])
- sentence_index = 1
- verb_dict_temp = {}
- verb_obj = []
- main_dict = {}
- block_counter = 0
- for c, line in enumerate(lines):
- line = line.strip()
- # here you say, if the line is empty, then clear verb_obj and verb_dict_temp
- if not line:
- if verb_obj:
- main_dict[sentence_index] = verb_obj
- verb_dict_temp.clear()
- verb_obj = []
- sentence_index += 1
- block_counter = c
- else:
- linearr = line.split("\t")
- print(linearr)
- wordIndex = linearr[0]
- token = linearr[1]
- lemma = linearr[2]
- POS = linearr[3]
- NER = linearr[4]
- head = linearr[5]
- depRel = linearr[6]
- if POS in verbs:
- verb_dict_temp[int(wordIndex)] = lemma
- isObject = False
- isSubject = False
- isNERO = False
- if depRel == "dobj":
- isObject = True
- if depRel in subjects:
- isSubject = True
- if NER == "=":
- isNERO = True
- print(isObject, isSubject)
- if isObject or isSubject:
- print(POS)
- if POS != "PRP":
- index_v = int(head)
- verb_of_obj = None
- try:
- verb_of_obj = verb_dict_temp[index_v]
- except KeyError: # Never leave a naked expression, except KeyError: is better
- print("this key doesn't exist:", index_v)
- print("looking for line:", index_v)
- bc = -1 if block_counter == 0 else block_counter
- nline = lines[bc + index_v]
- nlinearr = nline.split("\t")
- nwordIndex = nlinearr[0]
- nlemma = nlinearr[2]
- print("new entry:", nwordIndex, nlemma)
- verb_dict_temp[int(nwordIndex)] = nlemma
- verb_of_obj = verb_dict_temp[index_v]
- print(verb_of_obj)
- if verb_of_obj is not None:
- if isObject:
- verb_obj.append(verb_of_obj + ": object : " + token)
- if isSubject or isNERO:
- NER_type = NER
- verb_obj.append(verb_of_obj + ":" + NER_type)
- with open('{0}_output.txt'.format(out_dict), 'w') as output:
- output.write(str(main_dict))
- output.close()
- return (main_dict)
- selectional_preferencer("d1dHm8uS.txt", "select")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement