Advertisement
Guest User

Untitled

a guest
Aug 19th, 2017
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.75 KB | None | 0 0
  1. # coding: utf-8
  2.  
  3. # In[1]:
  4.  
  5. import sys
  6. import itertools
  7. import traceback
  8.  
  9. import psycopg2
  10. import psycopg2.extras
  11. import psycopg2.extensions
  12. import networkx as nx
  13.  
  14. conn_pg = psycopg2.connect(host='cassius.dc2.lan', database='kb_spider', user='blackduck', port=5432,
  15. password = 'i907$LTIfX3ar5bX', cursor_factory=psycopg2.extras.DictCursor)
  16. print (conn_pg)
  17.  
  18. d_scores = {}
  19.  
  20.  
  21. # In[2]:
  22.  
  23. def get_edges(edge_stmt, bindings):
  24. """Get edges of candidate pairs for building the graph"""
  25. edges = []
  26.  
  27. with conn_pg.cursor() as cur:
  28. cur.execute(edge_stmt, bindings)
  29.  
  30. for row in cur:
  31. edge = [row['p1_uuid'], row['p2_uuid']]
  32. edges.append(edge)
  33.  
  34. if 'dws_jaccard' in row:
  35. # Fow dws candidates, confidence score is based on a weighted average of Jaccard and Containment scores
  36. try:
  37. confidence = 0.7 * float(row['dns_jaccard']) + 0.3 * float(row['dns_containment'])
  38. except:
  39. input(row)
  40. elif 'des_sim' in row:
  41. confidence = float(row['des_sim'])
  42.  
  43. d_scores[frozenset(edge)] = confidence
  44.  
  45. return edges
  46.  
  47.  
  48. edge_stmt_dws = """ SELECT cand.p1_uuid, cand.p2_uuid, cand.dws_jaccard, cand.dws_containment, cand.dns_jaccard, cand.dns_containment
  49. FROM automerge.project_pair_sig_similarity cand
  50. JOIN public.project AS p1 ON cand.p1_uuid = p1.uuid::text
  51. JOIN public.project AS p2 ON cand.p2_uuid = p2.uuid::text
  52. WHERE cand.category IN (1)
  53. AND (
  54. (cand.dws_jaccard >= %s AND cand.dws_containment >= %s)
  55. OR (cand.dws_jaccard >= 0.6 AND cand.dws_containment >= 0.9)
  56. OR (cand.dns_jaccard >= %s AND cand.dns_containment >= %s)
  57. OR (cand.dns_jaccard >= 0.6 AND cand.dns_containment >= 0.9)
  58. )
  59. AND p1.state_code = 1002
  60. AND p2.state_code = 1002;
  61. """
  62. bindings = (0.7, 0.7, 0.9, 0.9)
  63.  
  64.  
  65. ## Data shape: These projects are duplicated
  66. # pid1 | pid2
  67. # 1 | 2
  68. # 2 | 1
  69. # 3 | 2
  70. # 4 | 5
  71. # 6 | 7
  72.  
  73. # I want
  74. # group1: 1,2,3
  75. # group2: 4,5
  76. # goup3: 6
  77.  
  78. graph = nx.Graph()
  79. graph.add_edges_from(get_edges(edge_stmt_dws, bindings))
  80. print("Number of edges: " + str(graph.number_of_edges()))
  81. print("Number of nodes: " + str(graph.number_of_nodes()))
  82. print("Number of connected_components: " + str(nx.number_connected_components(graph)))
  83.  
  84. # In[ ]:
  85.  
  86. # In[ ]:
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement