Advertisement
Guest User

Untitled

a guest
Apr 23rd, 2019
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.70 KB | None | 0 0
  1. import networkx as nx
  2. import random as rand
  3. from networkx.algorithms import bipartite
  4. from sklearn.metrics import roc_auc_score
  5. import pandas as pd
  6.  
  7.  
  8. def searchForPattern(A,x,Neighbor_of_V,Neighbor_of_U,y_true):
  9.  
  10.  
  11.  
  12. search_pattern = set()
  13. pattern = set()
  14.  
  15. nei_A = Neighbor_of_U[A]
  16.  
  17. nei_x = Neighbor_of_V[x]
  18.  
  19. if A in nei_x:
  20. return "DL"
  21.  
  22. else:
  23. nei_nei_x = set()
  24. for n in nei_x:
  25. nei_nei_x=nei_nei_x.union(Neighbor_of_U[n])
  26.  
  27. if x in nei_nei_x:
  28. nei_nei_x.remove(x)
  29.  
  30. nei_nei_A = set()
  31. for n in nei_A:
  32. nei_nei_A = nei_nei_A.union(Neighbor_of_V[n])
  33.  
  34. if A in nei_nei_A:
  35. nei_nei_A.remove(A)
  36.  
  37. V_part_for_projection = set()
  38.  
  39. V_part_for_projection = nei_A.intersection(nei_nei_x)
  40.  
  41. nei_V_part_for_projection = set()
  42.  
  43. for e in V_part_for_projection:
  44. nei_V_part_for_projection=nei_V_part_for_projection.union(Neighbor_of_V[e])
  45.  
  46. if len(V_part_for_projection) > 0:
  47.  
  48. for e in nei_V_part_for_projection:
  49. if e in nei_x:
  50.  
  51. tup = (A,e)
  52.  
  53. pattern.add(tup)
  54.  
  55. return pattern
  56.  
  57. else:
  58.  
  59. for U in nei_nei_A:
  60. # print('U',U)
  61. tup = (U,x)
  62. search_pattern.add(tup)
  63. # print('upper',search_pattern)
  64.  
  65. for V in nei_nei_x:
  66. # print('V',V)
  67. tup = (A,V)
  68. search_pattern.add(tup)
  69.  
  70. for pt in search_pattern:
  71. a,b = pt
  72. a_index = sorted(list(set(DRUGS))).index(a)+1
  73. # print(a_index)
  74. b_index = sorted(list(set(ISE))).index(b)+1
  75. # print(b_index)
  76.  
  77. index = (a_index-1)*(b_index)+b_index
  78.  
  79. c = len(set(ISE))
  80. index = (a_index-1)*c+b_index - 1
  81.  
  82. if y_true[index] == 1:
  83. Neighbor_of_U[a].add(b)
  84. Neighbor_of_V[b].add(a)
  85. # DRUGS.append(a)
  86. # ISE.append(b)
  87.  
  88. else:
  89. return 'NL'
  90. pattern=searchForPattern(A,x,Neighbor_of_V,Neighbor_of_U,y_true)
  91. return pattern
  92.  
  93.  
  94. def sample(ISE,DRUGS,Neighbor_of_U,Neighbor_of_V,node_pair_list):
  95. for i in rand.sample(range(1,800),200):
  96. ise_remove = ISE[i]
  97. drugs_remove = DRUGS[i]
  98.  
  99. Neighbor_of_U[drugs_remove].remove(ise_remove)
  100. Neighbor_of_V[ise_remove].remove(drugs_remove)
  101.  
  102. tup = (drugs_remove,ise_remove)
  103.  
  104. node_pair_list.remove(tup)
  105.  
  106. return node_pair_list, Neighbor_of_U, Neighbor_of_V
  107.  
  108.  
  109. def calc_weight(pattern,GU_new,GV_new,Neighbor_of_U):
  110. score = 0.0
  111.  
  112. if pattern=='DL':
  113. score = 1.0
  114. elif pattern == 'NL':
  115. score = 0.0
  116.  
  117. else:
  118.  
  119. for pt in pattern:
  120. u,v = pt
  121. deg_u = GU_new.degree(u)
  122. deg_v = GU_new.degree(v)
  123. common_nei = set()
  124. common_nei = Neighbor_of_U[u].union(Neighbor_of_U[v])
  125. # print(common_nei)
  126. if len(common_nei) == 0:
  127. score = 0.0
  128. else:
  129. cn_score = 0.0
  130.  
  131. try:
  132. for cn in common_nei:
  133. # print(GV_new.degree(cn))
  134. cn_score += (1/GV_new.degree(cn))
  135.  
  136. except ZeroDivisionError:
  137. score = 0.0
  138.  
  139. try:
  140. score += (2/(deg_u+deg_v))*cn_score
  141. except ZeroDivisionError:
  142. score = 0.0
  143.  
  144.  
  145.  
  146. return(score)
  147.  
  148.  
  149. if __name__ == '__main__':
  150. filename = "/home/gaudel/Desktop/monopharmacy.csv"
  151. df_edge_list = pd.read_csv(filename).drop("SEN",axis=1).head(5000)
  152. ISE_in = df_edge_list["ISE"].values.tolist()
  153. ISE = [str(a) for a in ISE_in]
  154. DRUGS = df_edge_list["DRUGS"].values.tolist()
  155.  
  156. Neighbor_of_U = {}
  157. for u,v in zip(DRUGS,ISE):
  158. if u not in Neighbor_of_U:
  159. Neighbor_of_U[u] = set()
  160. Neighbor_of_U[u].add(v)
  161.  
  162. Neighbor_of_V = {}
  163. for u,v in zip(ISE,DRUGS):
  164. if u not in Neighbor_of_V:
  165. Neighbor_of_V[u] = set()
  166. Neighbor_of_V[u].add(v)
  167.  
  168. node_pair_list = set()
  169. for u,v in zip(DRUGS,ISE):
  170. tup = (u,v)
  171. node_pair_list.add(tup)
  172. node_pair_list
  173.  
  174. _true = []
  175. for x in sorted(list(set(DRUGS))):
  176. for y in sorted(list(set(ISE))):
  177. tup = (x,y)
  178. # print(tup)
  179. if tup in node_pair_list:
  180. _true.append(1)
  181. else:
  182. _true.append(0)
  183.  
  184.  
  185. w= []
  186.  
  187. B = nx.Graph()
  188. B.add_nodes_from(list(set(DRUGS)),bipartite=0)
  189. B.add_nodes_from(list(set(ISE)),bipartite=1)
  190.  
  191. edge_list,Neighbor_of_U,Neighbor_of_V = sample(ISE,DRUGS,Neighbor_of_U,Neighbor_of_V,node_pair_list) ### calling sample function
  192.  
  193. B.add_edges_from(edge_list)
  194.  
  195. GU = bipartite.weighted_projected_graph(B,DRUGS)
  196. GV = bipartite.weighted_projected_graph(B,ISE)
  197.  
  198. GU_new = nx.Graph()
  199. GU_new.add_nodes_from(set(DRUGS))
  200. GV_new = nx.Graph()
  201. ise_filter_edges = []
  202.  
  203. for edges in GV.edges(data=True):
  204. if edges[2]['weight']>0:
  205. ise_filter_edges.append(edges)
  206.  
  207. GV_new.add_nodes_from(set(ISE))
  208. GV_new.add_edges_from(ise_filter_edges)
  209. # print(GV.edges(data=True))
  210.  
  211. drugs_filter_edges = []
  212. for edges in GU.edges(data=True):
  213. if edges[2]['weight']>18:
  214. drugs_filter_edges.append(edges)
  215.  
  216.  
  217. GU_new.add_nodes_from(DRUGS)
  218. GU_new.add_edges_from(drugs_filter_edges)
  219. # GU_new.edges(data=True)
  220.  
  221. # GU_new.add_edges_from(filtered_edge)
  222. # print(GU.edges(data=True))
  223.  
  224. for c in sorted(list(set(DRUGS))):
  225. for d in sorted(list(set(ISE))):
  226. pattern=searchForPattern(c,d,Neighbor_of_V,Neighbor_of_U,_true) ## search for pattern function
  227. score=calc_weight(pattern,GU_new,GV_new,Neighbor_of_U) ## calling calc_weight function
  228. w.append(score)
  229. print(c,d,' : ',score)
  230. #print(_true)
  231. #print(w)
  232. auc = roc_auc_score(y_true=_true,y_score=w)
  233. print('auc',auc)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement