Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from arango.client import ArangoClient
- client = ArangoClient(hosts="http://localhost:8529")
- db = client.db("unitres", username="root", password="12345")
- edge_weights = {
- "has_email": 2,
- "has_mobile": 7,
- "has_device": 9,
- "has_ip": 5,
- }
- def clear(db):
- # Drop all non-system collections
- for coll in db.collections():
- if not coll['system']:
- db.delete_collection(coll['name'])
- def start_set(db):
- # Create collections and edge collections if not present
- vertex = ['case','email','device','mobile','ip','supernode']
- edges = ['has_email','has_device','has_mobile','has_ip','is_P1']
- for name in vertex:
- if not db.has_collection(name):
- db.create_collection(name)
- for name in edges:
- if not db.has_collection(name):
- db.create_collection(name, edge=True)
- # Insert the supernode (qnode) if not exists
- db.aql.execute("""
- UPSERT { _key: 'qnode' }
- INSERT { _key: 'qnode' }
- UPDATE { }
- IN supernode
- """)
- # Create indexes (persistent is the modern equivalent of hash)
- # db.collection('email').add_index({'type': 'persistent', 'fields': ['email_id', 'disposable'], 'unique': True})
- # db.collection('device').add_index({'type': 'persistent', 'fields': ['fingerprint'], 'unique': True})
- # db.collection('mobile').add_index({'type': 'persistent', 'fields': ['mobile_number', 'prepaid', 'imei'], 'unique': True})
- # db.collection('ip').add_index({'type': 'persistent', 'fields': ['ip_address'], 'unique': True})
- # 2. Insert a case and all their attributes, connect with weighted edges
- def insert(visitor_id, session_id, lead_id, case_id, risk_score, mobile_number, email_id, ip_address):
- cursor = db.aql.execute(f"""
- INSERT {{
- visitor_id: '{visitor_id}',
- session_id: '{session_id}',
- lead_id: '{lead_id}',
- case_id: '{case_id}',
- risk_score: '{risk_score}'
- }} IN case
- RETURN NEW._key
- """)
- graph_case_id = next(cursor, None)
- attribute_specs = [
- ('email', 'has_email', {'email': email_id}),
- ('device', 'has_device', {'fingerprint': visitor_id}),
- ('mobile', 'has_mobile', {'mobile': mobile_number}),
- ('ip', 'has_ip', {'ip': ip_address})
- ]
- for coll, edge_coll, attrs in attribute_specs:
- attrs_with_key = dict(attrs, _key=graph_case_id)
- node_key_cursor = db.aql.execute(
- f"""
- UPSERT {attrs}
- INSERT {attrs_with_key}
- UPDATE {{}}
- IN {coll}
- RETURN NEW._key
- """
- )
- node_key = next(node_key_cursor, None)
- db.aql.execute(f"""INSERT {{ _from: 'case/{graph_case_id}', _to: '{coll}/{node_key}', weight: {edge_weights[edge_coll]} }} IN {edge_coll}""")
- return graph_case_id
- # 3. Efficient, scalable EFR score using K_SHORTEST_PATHS
- def get_efr(graph_case_id):
- hops = 6
- query = f"""
- LET paths = (
- FOR v, e, p IN 1..{hops} ANY 'case/{graph_case_id}'
- has_mobile, has_email, has_ip, has_device, is_P1
- //OPTIONS {{ uniqueVertices: "path" }}
- FILTER v._id == 'supernode/qnode'
- RETURN SUM(p.edges[*].weight)/2
- )
- RETURN LENGTH(paths) > 0 ? MIN(paths) : 0
- """
- cursor = db.aql.execute(query)
- return next(cursor, 0)
- # 4. Update risk_score/case_id, connect to supernode if P1
- def update_caseid_riskscore(graph_case_id, case_id, new_risk_score):
- db.aql.execute(f"""UPDATE '{graph_case_id}' WITH {{ risk_score: '{new_risk_score}', case_id: '{case_id}' }} IN case""")
- if new_risk_score == 'P1':
- db.aql.execute(f"""INSERT {{ _from: 'case/{graph_case_id}', _to: 'supernode/qnode' }} IN is_P1""")
- # 5. Get subgraph (for visualization/debugging)
- def get_subgraph(graph_case_id, k):
- return db.aql.execute(f"""
- FOR v, e, p IN 1..{k} ANY 'case/{graph_case_id}'
- has_mobile, has_email, has_ip, has_device, is_P1
- OPTIONS {{order: "bfs"}}
- RETURN p
- """)
- # --- Example usage and tests ---
- if __name__ == "__main__":
- clear(db)
- start_set(db)
- # visitor_id, session_id, lead_id, case_id, risk_score, mobile_number, email_id, ip_address
- # p1 = insert('vid1','sid1','lid1', None, None,'+919999999999', '[email protected]', '192.168.1.1')
- # p2 = insert('vid2','sid2','lid2', None, None, '+919999999999', '[email protected]', '192.168.1.1')
- # p3 = insert('vid3','sid3','lid3', None, None,'+919999999998', 1, 'imei123', '[email protected]', 1, '192.167.1.1')
- # p4 = insert('vid4','sid4','lid4', None, None,'+949999999992', 1, 'imei123', '[email protected]', 0, '192.1688.1.1')
- # print('p1:', p1)
- # print('p2:', p2)
- # print('p3:', p3)
- # print('p4:', p4)
- # print('EFR p1:', get_efr(p1))
- # print('EFR p2:', get_efr(p2))
- # print('EFR p3:', get_efr(p3))
- # print('EFR p4:', get_efr(p4))
- # update_caseid_riskscore(p1, 678, 'P1')
- # update_caseid_riskscore(p2, 675, 'P1')
- # print('EFR p1 after update:', get_efr(p1))
- # Visualize the subgraph
- # subgraph = get_subgraph(p1, 6)
- # print(next(subgraph, None))
- # import random
- # # insert('f5b2f01ef2c249e005db7f521d9a6dc9','202.189.251.191-1752232265528','911111111111',None,None,'911111111111','[email protected]','202.189.251.191')
- # for i in range(46,1047):
- # globals()[f'p{i}'] = insert(f'vid{i}',
- # f'sid{i}',
- # f'lid{i}',
- # None,
- # None,
- # f"919999999{random.randint(100,999)}",
- # f'user{random.randint(100,999)}@example.com',
- # f"202.189.{random.randint(100,256)}.{random.randint(0,256)}")
- # print(globals()[f'p{i}'])
- # update_caseid_riskscore(globals()[f'p{i}'],f'c{i}','P'+str(random.randint(1,5)))
- import random
- import string
- # --- Device Fingerprint Generator ---
- def generate_device_fingerprint(length=32):
- chars = string.ascii_lowercase + string.digits
- return ''.join(random.choices(chars, k=length))
- # --- Data Population Loop ---
- for i in range(48, 1047):
- device_fingerprint = generate_device_fingerprint()
- ip_address = f"202.189.{random.randint(100, 255)}.{random.randint(100, 255)}"
- timestamp = random.randint(1_600_000_000_000, 1_700_000_000_000) # Example timestamp
- session_id = f"{ip_address}-{timestamp}"
- mobile_number = f"9199999{random.randint(100, 999)}"
- lead_id = mobile_number
- email_id = f'user{random.randint(100,999)}@gmail.com'
- visitor_id = device_fingerprint
- # Insert person and attributes
- globals()[f'p{i}'] = insert(
- visitor_id, # visitor_id (device fingerprint)
- session_id, # session_id (ip address + timestamp)
- lead_id, # lead_id (mobile number)
- None, # case_id (None at insert)
- None, # risk_score (None at insert)
- mobile_number, # mobile_number
- email_id, # email_id
- ip_address # ip_address
- )
- print(globals()[f'p{i}'])
- # Update case_id and risk_score
- # new_case_id = f'c{i}'
- # new_risk_category = f'P{random.randint(1, 5)}'
- update_caseid_riskscore(globals()[f'p{i}'],i, f'P{random.randint(1, 5)}')
Advertisement
Add Comment
Please, Sign In to add comment