techpaste222

Untitled

Jul 15th, 2025
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.57 KB | Source Code | 0 0
  1. from arango.client import ArangoClient
  2.  
  3. client = ArangoClient(hosts="http://localhost:8529")
  4. db = client.db("unitres", username="root", password="12345")
  5.  
  6. edge_weights = {
  7.     "has_email": 2,
  8.     "has_mobile": 7,
  9.     "has_device": 9,
  10.     "has_ip": 5,
  11. }
  12.  
  13. def clear(db):
  14.     # Drop all non-system collections
  15.     for coll in db.collections():
  16.         if not coll['system']:
  17.             db.delete_collection(coll['name'])
  18.  
  19. def start_set(db):
  20.     # Create collections and edge collections if not present
  21.     vertex = ['case','email','device','mobile','ip','supernode']
  22.     edges  = ['has_email','has_device','has_mobile','has_ip','is_P1']
  23.     for name in vertex:
  24.         if not db.has_collection(name):
  25.             db.create_collection(name)
  26.     for name in edges:
  27.         if not db.has_collection(name):
  28.             db.create_collection(name, edge=True)
  29.  
  30.     # Insert the supernode (qnode) if not exists
  31.     db.aql.execute("""
  32.    UPSERT { _key: 'qnode' }
  33.    INSERT { _key: 'qnode' }
  34.    UPDATE { }
  35.    IN supernode
  36.    """)
  37.  
  38.     # Create indexes (persistent is the modern equivalent of hash)
  39.     # db.collection('email').add_index({'type': 'persistent', 'fields': ['email_id', 'disposable'], 'unique': True})
  40.     # db.collection('device').add_index({'type': 'persistent', 'fields': ['fingerprint'], 'unique': True})
  41.     # db.collection('mobile').add_index({'type': 'persistent', 'fields': ['mobile_number', 'prepaid', 'imei'], 'unique': True})
  42.     # db.collection('ip').add_index({'type': 'persistent', 'fields': ['ip_address'], 'unique': True})
  43.  
  44. # 2. Insert a case and all their attributes, connect with weighted edges
  45. def insert(visitor_id, session_id, lead_id, case_id, risk_score, mobile_number, email_id, ip_address):
  46.     cursor = db.aql.execute(f"""
  47.    INSERT {{
  48.        visitor_id: '{visitor_id}',
  49.        session_id: '{session_id}',
  50.        lead_id: '{lead_id}',
  51.        case_id: '{case_id}',
  52.        risk_score: '{risk_score}'
  53.        }} IN case
  54.        RETURN NEW._key
  55.    """)
  56.     graph_case_id = next(cursor, None)
  57.     attribute_specs = [
  58.         ('email',   'has_email',  {'email': email_id}),
  59.         ('device',  'has_device', {'fingerprint': visitor_id}),
  60.         ('mobile',  'has_mobile', {'mobile': mobile_number}),
  61.         ('ip',      'has_ip',     {'ip': ip_address})
  62.     ]
  63.     for coll, edge_coll, attrs in attribute_specs:
  64.         attrs_with_key = dict(attrs, _key=graph_case_id)
  65.         node_key_cursor = db.aql.execute(
  66.             f"""
  67.            UPSERT {attrs}
  68.                INSERT {attrs_with_key}
  69.                UPDATE {{}}
  70.                IN {coll}
  71.                RETURN NEW._key
  72.            """
  73.         )
  74.         node_key = next(node_key_cursor, None)
  75.         db.aql.execute(f"""INSERT {{ _from: 'case/{graph_case_id}', _to: '{coll}/{node_key}', weight: {edge_weights[edge_coll]} }} IN {edge_coll}""")
  76.     return graph_case_id
  77.  
  78. # 3. Efficient, scalable EFR score using K_SHORTEST_PATHS
  79. def get_efr(graph_case_id):
  80.     hops = 6
  81.     query = f"""
  82.    LET paths = (
  83.    FOR v, e, p IN 1..{hops} ANY 'case/{graph_case_id}'
  84.        has_mobile, has_email, has_ip, has_device, is_P1
  85.        //OPTIONS {{ uniqueVertices: "path" }}
  86.        FILTER v._id == 'supernode/qnode'
  87.        RETURN SUM(p.edges[*].weight)/2
  88.    )
  89.    RETURN LENGTH(paths) > 0 ? MIN(paths) : 0
  90.    """
  91.     cursor = db.aql.execute(query)
  92.     return next(cursor, 0)
  93.  
  94. # 4. Update risk_score/case_id, connect to supernode if P1
  95. def update_caseid_riskscore(graph_case_id, case_id, new_risk_score):
  96.     db.aql.execute(f"""UPDATE '{graph_case_id}' WITH {{ risk_score: '{new_risk_score}', case_id: '{case_id}' }} IN case""")
  97.     if new_risk_score == 'P1':
  98.         db.aql.execute(f"""INSERT {{ _from: 'case/{graph_case_id}', _to: 'supernode/qnode' }} IN is_P1""")
  99.  
  100. # 5. Get subgraph (for visualization/debugging)
  101. def get_subgraph(graph_case_id, k):
  102.     return db.aql.execute(f"""
  103.        FOR v, e, p IN 1..{k} ANY 'case/{graph_case_id}'
  104.        has_mobile, has_email, has_ip, has_device, is_P1
  105.        OPTIONS {{order: "bfs"}}
  106.        RETURN p
  107.    """)
  108.  
  109. # --- Example usage and tests ---
  110. if __name__ == "__main__":
  111.     clear(db)
  112.     start_set(db)
  113.     # visitor_id, session_id, lead_id, case_id, risk_score, mobile_number, email_id, ip_address
  114.     # p1 = insert('vid1','sid1','lid1', None, None,'+919999999999', '[email protected]', '192.168.1.1')
  115.     # p2 = insert('vid2','sid2','lid2', None, None, '+919999999999', '[email protected]', '192.168.1.1')
  116.     # p3 = insert('vid3','sid3','lid3', None, None,'+919999999998', 1, 'imei123', '[email protected]', 1, '192.167.1.1')
  117.     # p4 = insert('vid4','sid4','lid4', None, None,'+949999999992', 1, 'imei123', '[email protected]', 0, '192.1688.1.1')
  118.  
  119.     # print('p1:', p1)
  120.     # print('p2:', p2)
  121.     # print('p3:', p3)
  122.     # print('p4:', p4)
  123.  
  124.     # print('EFR p1:', get_efr(p1))
  125.     # print('EFR p2:', get_efr(p2))
  126.     # print('EFR p3:', get_efr(p3))
  127.     # print('EFR p4:', get_efr(p4))
  128.  
  129.     # update_caseid_riskscore(p1, 678, 'P1')
  130.     # update_caseid_riskscore(p2, 675, 'P1')
  131.     # print('EFR p1 after update:', get_efr(p1))
  132.  
  133.     # Visualize the subgraph
  134.     # subgraph = get_subgraph(p1, 6)
  135.     # print(next(subgraph, None))
  136.  
  137.     # import random
  138.     # # insert('f5b2f01ef2c249e005db7f521d9a6dc9','202.189.251.191-1752232265528','911111111111',None,None,'911111111111','[email protected]','202.189.251.191')
  139.     # for i in range(46,1047):
  140.     #     globals()[f'p{i}'] = insert(f'vid{i}',
  141.     #                                 f'sid{i}',
  142.     #                                 f'lid{i}',
  143.     #                                 None,
  144.     #                                 None,
  145.     #                                 f"919999999{random.randint(100,999)}",
  146.     #                                 f'user{random.randint(100,999)}@example.com',
  147.     #                                 f"202.189.{random.randint(100,256)}.{random.randint(0,256)}")
  148.     #     print(globals()[f'p{i}'])
  149.     #     update_caseid_riskscore(globals()[f'p{i}'],f'c{i}','P'+str(random.randint(1,5)))
  150.  
  151.     import random
  152.     import string
  153.  
  154.     # --- Device Fingerprint Generator ---
  155.     def generate_device_fingerprint(length=32):
  156.         chars = string.ascii_lowercase + string.digits
  157.         return ''.join(random.choices(chars, k=length))
  158.  
  159.     # --- Data Population Loop ---
  160.     for i in range(48, 1047):
  161.         device_fingerprint = generate_device_fingerprint()
  162.         ip_address = f"202.189.{random.randint(100, 255)}.{random.randint(100, 255)}"
  163.         timestamp = random.randint(1_600_000_000_000, 1_700_000_000_000)  # Example timestamp
  164.         session_id = f"{ip_address}-{timestamp}"
  165.         mobile_number = f"9199999{random.randint(100, 999)}"
  166.         lead_id = mobile_number
  167.         email_id = f'user{random.randint(100,999)}@gmail.com'
  168.         visitor_id = device_fingerprint
  169.  
  170.         # Insert person and attributes
  171.         globals()[f'p{i}'] = insert(
  172.             visitor_id,         # visitor_id (device fingerprint)
  173.             session_id,         # session_id (ip address + timestamp)
  174.             lead_id,            # lead_id (mobile number)
  175.             None,               # case_id (None at insert)
  176.             None,               # risk_score (None at insert)
  177.             mobile_number,      # mobile_number
  178.             email_id,           # email_id
  179.             ip_address          # ip_address
  180.         )
  181.         print(globals()[f'p{i}'])
  182.         # Update case_id and risk_score
  183.         # new_case_id = f'c{i}'
  184.         # new_risk_category = f'P{random.randint(1, 5)}'
  185.         update_caseid_riskscore(globals()[f'p{i}'],i, f'P{random.randint(1, 5)}')
  186.  
Advertisement
Add Comment
Please, Sign In to add comment