Advertisement
Guest User

Untitled

a guest
Feb 28th, 2020
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.96 KB | None | 0 0
  1. def delete_triple(triples: list,
  2.                   complete_triples: List) -> List:
  3.  
  4.     new_triples = []
  5.     triples = set(triples)
  6.     for x in tqdm.tqdm(complete_triples, ncols=80):
  7.         if x in triples:
  8.             continue
  9.         new_triples.append(x)
  10.  
  11.     return list(new_triples)
  12.  
  13.  
  14. def fb5m_delete_triples_explicit(triples:List[Tuple], save_folder: str = "/home/qzheng/data/kb_bias/subgraph/ypd_fb5m") -> None:
  15.     complete_triples = load_fb5m_triples()
  16.     new_triples = delete_triple(triples, complete_triples)
  17.  
  18.     rel_id_dict = {}
  19.     ent_id_dict = {}
  20.     for x in tqdm.tqdm(new_triples, ncols=100):
  21.         sub = x[0]
  22.         rel = x[1]
  23.         obj = x[2]
  24.         if (obj.startswith("m.") or obj.startswith("g.")) is False: continue
  25.         if sub not in ent_id_dict:
  26.             ent_id_dict[sub] = len(ent_id_dict)
  27.  
  28.         if obj not in ent_id_dict:
  29.             ent_id_dict[obj] = len(ent_id_dict)
  30.  
  31.         if rel not in rel_id_dict:
  32.             rel_id_dict[rel] = len(rel_id_dict)
  33.  
  34.     if os.path.isdir(save_folder) is False:
  35.         os.system("mkdir " + save_folder)
  36.  
  37.     with open(save_folder + "relation2id.txt", "w") as f1:
  38.         f1.write(str(len(rel_id_dict)) + '\n')
  39.         for x in tqdm.tqdm(rel_id_dict, ncols=100):
  40.             f1.write(str(x) + "\t" + str(rel_id_dict[x]) + "\n")
  41.  
  42.     print("entitiy num {}".format(len(ent_id_dict)))
  43.     with open(save_folder + "entity2id.txt", "w") as f1:
  44.         f1.write(str(len(ent_id_dict)) + '\n')
  45.         for x in tqdm.tqdm(ent_id_dict, ncols=100):
  46.             f1.write(str(x) + "\t" + str(ent_id_dict[x]) + "\n")
  47.  
  48.     with open(save_folder + "train2id.txt", "w") as f1:
  49.         f1.write(str(len(new_triples)) + '\n')
  50.         random.shuffle(new_triples)
  51.         for x in tqdm.tqdm(new_triples, ncols=100):
  52.             sub = x[0]
  53.             rel = x[1]
  54.             obj = x[2]
  55.             f1.write(str(ent_id_dict[sub]) + " " + str(ent_id_dict[obj]) + " " + str(rel_id_dict[rel]) + "\n")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement