Advertisement
vmamontov

proccessing_with_recursive

Jan 30th, 2023 (edited)
832
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.39 KB | None | 0 0
  1. import pandas as pd
  2.  
  3.  
  4. # some functions
  5. # =================================================
  6.  
  7.  
  8. def id_emails(raw):
  9.     data = {}
  10.     for id, email in raw:
  11.         prev = data.get(id, set())
  12.         prev.add(email)
  13.         data[id] = prev
  14.  
  15.     return data
  16.  
  17.  
  18. def add2dict(res, n, id, email):
  19.     if n not in res:
  20.         res[n] = [[id, email]]
  21.     else:
  22.         res[n].append([id, email])
  23.  
  24.  
  25. def recur(n, id, email):
  26.  
  27.     add2dict(res, n, id, email)
  28.  
  29.     temp = {k: v for k, v in data.items() if k != id}
  30.  
  31.     for id_, email_ in temp.items():
  32.         if email in email_:
  33.             res[n].append([id_, email])
  34.             email_ = email_ - {email}
  35.             for em in email_:
  36.                 recur(n, id_, em)
  37.  
  38.  
  39. def check(raw, item):
  40.     for v in raw.values():
  41.         if v == sorted(item):
  42.             return True
  43.         else:
  44.             return False
  45.  
  46.  
  47. def create_unique_data(res):
  48.     i, raw = 0, {}
  49.  
  50.     for k, v in res.items():
  51.         if i == 0:
  52.             raw[i] = sorted(v)
  53.         else:
  54.             if check(raw, v) == False:
  55.                 raw[i] = sorted(v)
  56.         i += 1
  57.     return raw
  58. # =================================================
  59.  
  60.  
  61. # 0. user data
  62. raw = [['001', 'm001@com'],
  63.        ['0404', 'other@com'],
  64.        ['001', 'm100@com'],
  65.        ['020', 'B020@com'],
  66.        ['020', 'm001@com'],
  67.        ['300', 'B020@com'],
  68.        ]
  69.  
  70. data = id_emails(raw)
  71.  
  72. # data = {'001': {'m001@com', 'm100@com'},
  73. #         '020': {'B020@com', 'm001@com'},
  74. #         '300': {'B020@com'},
  75. #         '0404': {'other@com'}}
  76.  
  77.  
  78. # 1. processing
  79. n, res = 0, {}
  80. for key in data.keys():
  81.     for email in data[key]:
  82.         recur(n, key, email)
  83.     n += 1
  84.  
  85. # 2. deleting duplicates
  86. uniq_data = create_unique_data(res)
  87.  
  88.  
  89. # # print result raw data:
  90. # for k, v in res.items():
  91. #     print(k, v)
  92.  
  93. # # print normal result:
  94. # for k, v in uniq_data.items():
  95. #     print(k, v)
  96.  
  97.  
  98.  
  99. # 3. converting
  100. df = pd.DataFrame.from_dict({(i,j): uniq_data[i][j]
  101.                                 for i in uniq_data.keys()
  102.                                 for j in range(len(uniq_data[i]))},
  103.                                 orient='index')
  104.  
  105. df = df.reset_index()
  106.  
  107. df.columns = ['multiindex', 'id', 'email']
  108. df['group'] = df['multiindex'].apply(lambda x: x[0])
  109. df['number_in_group'] = df['multiindex'].apply(lambda x: x[1])
  110. # df = df[['group', 'id', 'email']]
  111. df
  112.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement