Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def delete_triple(triples: list,
- complete_triples: List) -> List:
- new_triples = []
- triples = set(triples)
- for x in tqdm.tqdm(complete_triples, ncols=80):
- if x in triples:
- continue
- new_triples.append(x)
- return list(new_triples)
- def fb5m_delete_triples_explicit(triples:List[Tuple], save_folder: str = "/home/qzheng/data/kb_bias/subgraph/ypd_fb5m") -> None:
- complete_triples = load_fb5m_triples()
- new_triples = delete_triple(triples, complete_triples)
- rel_id_dict = {}
- ent_id_dict = {}
- for x in tqdm.tqdm(new_triples, ncols=100):
- sub = x[0]
- rel = x[1]
- obj = x[2]
- if (obj.startswith("m.") or obj.startswith("g.")) is False: continue
- if sub not in ent_id_dict:
- ent_id_dict[sub] = len(ent_id_dict)
- if obj not in ent_id_dict:
- ent_id_dict[obj] = len(ent_id_dict)
- if rel not in rel_id_dict:
- rel_id_dict[rel] = len(rel_id_dict)
- if os.path.isdir(save_folder) is False:
- os.system("mkdir " + save_folder)
- with open(save_folder + "relation2id.txt", "w") as f1:
- f1.write(str(len(rel_id_dict)) + '\n')
- for x in tqdm.tqdm(rel_id_dict, ncols=100):
- f1.write(str(x) + "\t" + str(rel_id_dict[x]) + "\n")
- print("entitiy num {}".format(len(ent_id_dict)))
- with open(save_folder + "entity2id.txt", "w") as f1:
- f1.write(str(len(ent_id_dict)) + '\n')
- for x in tqdm.tqdm(ent_id_dict, ncols=100):
- f1.write(str(x) + "\t" + str(ent_id_dict[x]) + "\n")
- with open(save_folder + "train2id.txt", "w") as f1:
- f1.write(str(len(new_triples)) + '\n')
- random.shuffle(new_triples)
- for x in tqdm.tqdm(new_triples, ncols=100):
- sub = x[0]
- rel = x[1]
- obj = x[2]
- f1.write(str(ent_id_dict[sub]) + " " + str(ent_id_dict[obj]) + " " + str(rel_id_dict[rel]) + "\n")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement