m8_

check values across dataframes

m8_
Sep 3rd, 2019
288
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.31 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3.  
  4. # create data mapping fields to code names
  5. map_data = [['Gender','gender_codes'],['Race','race_codes'],
  6.          ['Ethnicity','ethnicity_codes']]
  7. # create dataframe
  8. map_df = pd.DataFrame(map_data,columns=['Field Name','Code Name'])
  9.  
  10. # create data of codes
  11. codes_data = {'gender_codes':['1','2',np.nan,np.nan,np.nan,np.nan,np.nan],
  12.               'race_codes':['1','2','3','4',np.nan,np.nan,np.nan],
  13.               'ethnicity_codes':['1','2','3','4','5','6','7']}
  14. # create dataframe
  15. codes_df = pd.DataFrame(codes_data)
  16.  
  17.  
  18. # create data of data to check
  19. data = [['Alex','99','1',np.nan],
  20.         ['Cindy',np.nan,'4','5'],
  21.         ['Tom','1','99','1']]
  22. # create dataframe
  23. data_df = pd.DataFrame(data,columns=['Name','Gender','Race','Ethnicity'])
  24.  
  25. # code column to list
  26. gender_codes = codes_df["gender_codes"].tolist()
  27.  
  28. # remove nan string
  29. gender_codes = [gender_codes
  30.                for gender_codes in gender_codes
  31.                if  str(gender_codes) != "nan"]
  32.  
  33. # check each value against code list
  34. result_df = data_df.loc[(~data_df.Gender.isin(gender_codes))]
  35. # filter dataframe
  36. result_df = result_df.filter(items = ["Name","Gender"])
  37. # rename column
  38. result_df.rename(columns = {"Gender":"Value"}, inplace = True)
  39. # add new column
  40. result_df['Column'] = 'Gender'
Advertisement
Add Comment
Please, Sign In to add comment