Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- Created on 8/17/17
- Sample code for
- @author: Noushin
- '''
- def selectional_preferencer(indata, out_dict):
- with open(indata, "r") as file:
- lines = file.readlines()
- lines = lines + ['\n']
- verbs = set(["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"])
- subjects = set(["nsubj", "nsubjpass", "csubj", "csubjpass"])
- sentence_index = 0
- verb_dict_temp = {}
- new_dict = {}
- verb_obj = []
- #main_dict = {}
- main_list = []
- cnt1 = 0
- cnt2 = 0
- cnt3 = 0
- cnt4 = 0
- block_counter = 0
- for c, line in enumerate(lines):
- line = line.strip()
- #if the line is empty, clear verb_obj and verb_dict_temp
- if not line:
- #if verb_obj:
- #main_dict[sentence_index] = verb_obj
- ##main_list.extend(verb_obj)
- verb_dict_temp.clear()
- ##verb_obj = []
- sentence_index += 1
- block_counter = c
- else:
- linearr = line.split("\t")
- wordIndex = linearr[0]
- token = linearr[1]
- lemma = linearr[2]
- POS = linearr[3]
- NER = linearr[4]
- head = linearr[5]
- depRel = linearr[6]
- if POS in verbs:
- verb_dict_temp[int(wordIndex)] = lemma
- isObject = False
- isSubject = False
- isNER = False
- if depRel == "dobj":
- isObject = True
- if depRel in subjects:
- isSubject = True
- if NER != "O":
- isNER = True
- if isObject or isSubject:
- if POS != "PRP":
- index_v = int(head)
- verb_of_obj = None
- try:
- verb_of_obj = verb_dict_temp[index_v]
- except KeyError:
- ##print("this key doesn't exist:", index_v)
- ##print("looking for line:", index_v)
- bc = -1 if block_counter == 0 else block_counter
- nline = lines[bc + index_v]
- nlinearr = nline.split("\t")
- nwordIndex = nlinearr[0]
- nlemma = nlinearr[2]
- #print("new entry:", nwordIndex, nlemma)
- verb_dict_temp[int(nwordIndex)] = nlemma
- if nlinearr[3] in verbs:
- verb_of_obj = verb_dict_temp[index_v]
- if verb_of_obj is not None:
- if isNER:
- NER_type = NER
- if isSubject and isNER:
- previous_key = verb_of_obj + "-subject"
- print(type(previous_key))
- print("**************************************************************************")
- if previous_key in new_dict:
- if new_dict[previous_key][NER_type]:
- cnt1 += 1
- new_dict[previous_key][NER_type] = cnt1
- else:
- new_dict[previous_key][NER_type] = cnt1
- '''
- if isSubject and not isNER:
- previous_key = verb_of_obj + "-subject"
- if previous_key in new_dict:
- cnt2 += 1
- new_dict[previous_key] = {token:cnt2}
- else:
- new_dict[previous_key] = {token:cnt2}
- '''
- if isObject and not isNER:
- previous_key = verb_of_obj + "-object"
- if previous_key in new_dict:
- cnt3 += 1
- new_dict[previous_key] = {token:cnt3}
- else:
- new_dict[previous_key] = {token:cnt3}
- '''
- if isObject and isNER:
- previous_key = verb_of_obj + "-object"
- if previous_key in new_dict:
- cnt4 += 1
- new_dict[previous_key] = {NER_type:cnt4}
- else:
- new_dict[previous_key] = {NER_type:cnt4}
- '''
- main_list.append(new_dict)
- print(new_dict)
- #print(type(new_dict))
- with open('{0}_output.txt'.format(out_dict), 'w') as output:
- output.write(str(main_list))
- output.close()
- return (main_list)
- selectional_preferencer("simple_test.conll", "selecttt")
- #selectional_preferencer("dev-muc3-0001-0100.conll", "select_pe")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement