Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- # create data mapping fields to code names
- map_data = [['Gender','gender_codes'],['Race','race_codes'],
- ['Ethnicity','ethnicity_codes']]
- # create dataframe
- map_df = pd.DataFrame(map_data,columns=['Field Name','Code Name'])
- # create data of codes
- codes_data = {'gender_codes':['1','2',np.nan,np.nan,np.nan,np.nan,np.nan],
- 'race_codes':['1','2','3','4',np.nan,np.nan,np.nan],
- 'ethnicity_codes':['1','2','3','4','5','6','7']}
- # create dataframe
- codes_df = pd.DataFrame(codes_data)
- # create data of data to check
- data = [['Alex','99','1',np.nan],
- ['Cindy',np.nan,'4','5'],
- ['Tom','1','99','1']]
- # create dataframe
- data_df = pd.DataFrame(data,columns=['Name','Gender','Race','Ethnicity'])
- # code column to list
- gender_codes = codes_df["gender_codes"].tolist()
- # remove nan string
- gender_codes = [gender_codes
- for gender_codes in gender_codes
- if str(gender_codes) != "nan"]
- # check each value against code list
- result_df = data_df.loc[(~data_df.Gender.isin(gender_codes))]
- # filter dataframe
- result_df = result_df.filter(items = ["Name","Gender"])
- # rename column
- result_df.rename(columns = {"Gender":"Value"}, inplace = True)
- # add new column
- result_df['Column'] = 'Gender'
Advertisement
Add Comment
Please, Sign In to add comment