Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import networkx as nx
- import random as rand
- from networkx.algorithms import bipartite
- from sklearn.metrics import roc_auc_score
- import pandas as pd
- def searchForPattern(A,x,Neighbor_of_V,Neighbor_of_U,y_true):
- search_pattern = set()
- pattern = set()
- nei_A = Neighbor_of_U[A]
- nei_x = Neighbor_of_V[x]
- if A in nei_x:
- return "DL"
- else:
- nei_nei_x = set()
- for n in nei_x:
- nei_nei_x=nei_nei_x.union(Neighbor_of_U[n])
- if x in nei_nei_x:
- nei_nei_x.remove(x)
- nei_nei_A = set()
- for n in nei_A:
- nei_nei_A = nei_nei_A.union(Neighbor_of_V[n])
- if A in nei_nei_A:
- nei_nei_A.remove(A)
- V_part_for_projection = set()
- V_part_for_projection = nei_A.intersection(nei_nei_x)
- nei_V_part_for_projection = set()
- for e in V_part_for_projection:
- nei_V_part_for_projection=nei_V_part_for_projection.union(Neighbor_of_V[e])
- if len(V_part_for_projection) > 0:
- for e in nei_V_part_for_projection:
- if e in nei_x:
- tup = (A,e)
- pattern.add(tup)
- return pattern
- else:
- for U in nei_nei_A:
- # print('U',U)
- tup = (U,x)
- search_pattern.add(tup)
- # print('upper',search_pattern)
- for V in nei_nei_x:
- # print('V',V)
- tup = (A,V)
- search_pattern.add(tup)
- for pt in search_pattern:
- a,b = pt
- a_index = sorted(list(set(DRUGS))).index(a)+1
- # print(a_index)
- b_index = sorted(list(set(ISE))).index(b)+1
- # print(b_index)
- index = (a_index-1)*(b_index)+b_index
- c = len(set(ISE))
- index = (a_index-1)*c+b_index - 1
- if y_true[index] == 1:
- Neighbor_of_U[a].add(b)
- Neighbor_of_V[b].add(a)
- # DRUGS.append(a)
- # ISE.append(b)
- else:
- return 'NL'
- pattern=searchForPattern(A,x,Neighbor_of_V,Neighbor_of_U,y_true)
- return pattern
- def sample(ISE,DRUGS,Neighbor_of_U,Neighbor_of_V,node_pair_list):
- for i in rand.sample(range(1,800),200):
- ise_remove = ISE[i]
- drugs_remove = DRUGS[i]
- Neighbor_of_U[drugs_remove].remove(ise_remove)
- Neighbor_of_V[ise_remove].remove(drugs_remove)
- tup = (drugs_remove,ise_remove)
- node_pair_list.remove(tup)
- return node_pair_list, Neighbor_of_U, Neighbor_of_V
- def calc_weight(pattern,GU_new,GV_new,Neighbor_of_U):
- score = 0.0
- if pattern=='DL':
- score = 1.0
- elif pattern == 'NL':
- score = 0.0
- else:
- for pt in pattern:
- u,v = pt
- deg_u = GU_new.degree(u)
- deg_v = GU_new.degree(v)
- common_nei = set()
- common_nei = Neighbor_of_U[u].union(Neighbor_of_U[v])
- # print(common_nei)
- if len(common_nei) == 0:
- score = 0.0
- else:
- cn_score = 0.0
- try:
- for cn in common_nei:
- # print(GV_new.degree(cn))
- cn_score += (1/GV_new.degree(cn))
- except ZeroDivisionError:
- score = 0.0
- try:
- score += (2/(deg_u+deg_v))*cn_score
- except ZeroDivisionError:
- score = 0.0
- return(score)
- if __name__ == '__main__':
- filename = "/home/gaudel/Desktop/monopharmacy.csv"
- df_edge_list = pd.read_csv(filename).drop("SEN",axis=1).head(5000)
- ISE_in = df_edge_list["ISE"].values.tolist()
- ISE = [str(a) for a in ISE_in]
- DRUGS = df_edge_list["DRUGS"].values.tolist()
- Neighbor_of_U = {}
- for u,v in zip(DRUGS,ISE):
- if u not in Neighbor_of_U:
- Neighbor_of_U[u] = set()
- Neighbor_of_U[u].add(v)
- Neighbor_of_V = {}
- for u,v in zip(ISE,DRUGS):
- if u not in Neighbor_of_V:
- Neighbor_of_V[u] = set()
- Neighbor_of_V[u].add(v)
- node_pair_list = set()
- for u,v in zip(DRUGS,ISE):
- tup = (u,v)
- node_pair_list.add(tup)
- node_pair_list
- _true = []
- for x in sorted(list(set(DRUGS))):
- for y in sorted(list(set(ISE))):
- tup = (x,y)
- # print(tup)
- if tup in node_pair_list:
- _true.append(1)
- else:
- _true.append(0)
- w= []
- B = nx.Graph()
- B.add_nodes_from(list(set(DRUGS)),bipartite=0)
- B.add_nodes_from(list(set(ISE)),bipartite=1)
- edge_list,Neighbor_of_U,Neighbor_of_V = sample(ISE,DRUGS,Neighbor_of_U,Neighbor_of_V,node_pair_list) ### calling sample function
- B.add_edges_from(edge_list)
- GU = bipartite.weighted_projected_graph(B,DRUGS)
- GV = bipartite.weighted_projected_graph(B,ISE)
- GU_new = nx.Graph()
- GU_new.add_nodes_from(set(DRUGS))
- GV_new = nx.Graph()
- ise_filter_edges = []
- for edges in GV.edges(data=True):
- if edges[2]['weight']>0:
- ise_filter_edges.append(edges)
- GV_new.add_nodes_from(set(ISE))
- GV_new.add_edges_from(ise_filter_edges)
- # print(GV.edges(data=True))
- drugs_filter_edges = []
- for edges in GU.edges(data=True):
- if edges[2]['weight']>18:
- drugs_filter_edges.append(edges)
- GU_new.add_nodes_from(DRUGS)
- GU_new.add_edges_from(drugs_filter_edges)
- # GU_new.edges(data=True)
- # GU_new.add_edges_from(filtered_edge)
- # print(GU.edges(data=True))
- for c in sorted(list(set(DRUGS))):
- for d in sorted(list(set(ISE))):
- pattern=searchForPattern(c,d,Neighbor_of_V,Neighbor_of_U,_true) ## search for pattern function
- score=calc_weight(pattern,GU_new,GV_new,Neighbor_of_U) ## calling calc_weight function
- w.append(score)
- print(c,d,' : ',score)
- #print(_true)
- #print(w)
- auc = roc_auc_score(y_true=_true,y_score=w)
- print('auc',auc)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement