Advertisement
Guest User

Untitled

a guest
Aug 25th, 2019
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.91 KB | None | 0 0
  1. import pandas as pd
  2. import hashlib
  3.  
  4. pd.set_option('display.max_columns', 1000)
  5. pd.set_option('display.width', 1000)
  6. pd.set_option('display.max_colwidth', 128)
  7.  
  8. _SENSITIVE_COL_NAMES = ['FIRST', 'LAST', 'DOB', 'POSTCODE']
  9.  
  10.  
  11. def __hash_sensitive_data(first, last, dob, post_code):
  12. to_hash = __prep_hash(dob, first, last, post_code)
  13. return __hash_generic_string(to_hash)
  14.  
  15.  
  16. def __hash_generic_string(user_uuid):
  17. salt = 'SALT_VALUE'
  18. m = hashlib.sha256()
  19. m.update(salt.encode('utf-8'))
  20. m.update(user_uuid.encode('utf-8'))
  21. return m.hexdigest()
  22.  
  23.  
  24. def __prep_hash(dob, first, last, post_code):
  25. s_first = __fix_char_length(first)
  26. s_last = __fix_char_length(last)
  27. blob = "{}::{}::{}::{}".format(s_first, s_last, dob, post_code.replace(" ", ""))
  28. return blob
  29.  
  30.  
  31. def __fix_char_length(generic_str):
  32. configured_length = 5
  33. if len(generic_str) > configured_length:
  34. return generic_str[:configured_length]
  35. else:
  36. return generic_str
  37.  
  38.  
  39. def __do_pii_hash(df_row):
  40. return __hash_sensitive_data(df_row[_SENSITIVE_COL_NAMES[0]].upper(), df_row[_SENSITIVE_COL_NAMES[1]].upper(),
  41. df_row[_SENSITIVE_COL_NAMES[2]].upper(), df_row[_SENSITIVE_COL_NAMES[3]].upper())
  42.  
  43.  
  44. def __do_uuid_hash(df_row):
  45. return __hash_generic_string(df_row['USER_ID'].upper())
  46.  
  47.  
  48. def main() -> None:
  49. file_name = 'retro/resources/SENSITIVE-OUTPUT-FULL.csv'
  50. csv_df = pd.read_csv(file_name)
  51.  
  52. csv_df['HASHED_UUID'] = csv_df.apply(__do_uuid_hash, axis=1)
  53. csv_df['HASHED_PII'] = csv_df.apply(__do_pii_hash, axis=1)
  54. pii_stripped = csv_df.drop(_SENSITIVE_COL_NAMES, axis=1).copy(deep=True)
  55. print(pii_stripped.head())
  56. pii_stripped.to_csv('retro/resources/AVIVA-RETRO-10K-CLEARSCORE-VERSION.csv', index=False)
  57. uuid_stripped = pii_stripped.drop('USER_ID', axis=1).copy(deep=True)
  58. uuid_stripped.to_csv('retro/resources/AVIVA-RETRO-10K-AVIVA-VERSION.csv', index=False)
  59.  
  60.  
  61. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement