Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- d1 = pd.DataFrame({
- 'ID':['GFJH325','DGJR434','TGJF756','HJMG124','FGJF807',
- 'HKUU675','VNVC345','GHPH638','EGHF352','VGBM572'],
- 'T_MEMBER':['TAA','TAB','TAC','TAA','TAC',
- 'TAC','TAB','TAA','TAA','TAA'],
- 'T_CATEGORY':['Client', 'Client', 'Client', 'House', 'Client',
- 'Client', 'House', 'Client', 'Client', 'Client',],
- 'SOURSE':['FHGK', 'JFYT', 'FHGK', 'FHGK', 'DBTR',
- 'DBTR', 'FHGK', 'FHGK', 'DBTR', 'DBTR',],
- 'T_CUR':['EUR', 'EUR', 'EUR', 'USD', 'USD', 'EUR', 'EUR', 'USD', 'USD', 'EUR']
- })
- d2 = pd.DataFrame({
- 'FIRM_ID':['TAA','TAA','TAB','TAB','TAC','TAC',
- 'TAA','TAA','TAB','TAB',
- 'TAC','TAC','TAC',
- 'TAB','TAB','TAB'],
- 'MEMBER':['CAA','CAB','CAA','CAB','CAA','CAB',
- 'CAA','CAA','CAB','CAB',
- 'CAB','CAB','CAB',
- 'CAA','CAA','CAA'],
- 'RANK':['1', '1', '1', '1', '1', '1',
- '2', '2', '2', '2',
- '3', '3', '3',
- '4','4','4',],
- 'TRANSACTION_GROUP':['EUR', 'USD', 'EUR', 'USD', 'EUR','USD',
- 'E-C', 'E-C', 'U-C', 'U-C',
- 'E-C-FHGK', 'E-C-FHGK', 'E-C-FHGK',
- 'E-C-FHGK-JFYT','E-C-FHGK-JFYT','E-C-FHGK-JFYT',],
- 'FIELD_NAME':['CUR', 'CUR', 'CUR', 'CUR', 'CUR', 'CUR',
- 'CUR', 'CATEGORY', 'CUR', 'CATEGORY',
- 'CUR', 'CATEGORY', 'SOURSE',
- 'CUR', 'CATEGORY', 'SOURSE',],
- 'OPERATOR':['==', '==', '==', '==', '==', '==',
- '==', '==', '==', '==',
- '==', '==', '==',
- '==', '==', 'in'],
- 'TG_EXP_VALUES':['EUR', 'USD', 'EUR', 'USD', 'EUR', 'USD',
- 'EUR', 'Client', 'USD', 'Client',
- 'EUR', 'Client', 'FHGK',
- 'EUR', 'Client', 'FHGK,JFYT']
- })
- ID SOURSE T_CATEGORY T_CUR T_MEMBER C_MEMBER
- 0 GFJH325 FHGK Client EUR TAA CAA
- 1 DGJR434 JFYT Client EUR TAB CAA
- 2 TGJF756 FHGK Client EUR TAC CAB
- 3 HJMG124 FHGK House USD TAA CAB
- 4 FGJF807 DBTR Client USD TAC CAB
- 5 HKUU675 DBTR Client EUR TAC CAA
- 6 VNVC345 FHGK House EUR TAB CAA
- 7 GHPH638 FHGK Client USD TAA CAB
- 8 EGHF352 DBTR Client USD TAA CAB
- 9 VGBM572 DBTR Client EUR TAA CAA
- def build_query_string(tg, additional_mapping ={}):
- tg = tg.copy()
- tg.drop_duplicates(inplace=True)
- tg.TG_EXP_VALUES = "'" + tg.TG_EXP_VALUES + "'"
- FIELD_NAME = tg.FIELD_NAME.map(additional_mapping).fillna(tg.FIELD_NAME)
- tg['q'] = '(' + FIELD_NAME +' '+ tg.OPERATOR +' ('+ tg.TG_EXP_VALUES + '))'
- return tg['q'].str.cat(sep = ' & ')
- def add_column(df_base, dataframe, maps, left_on, right_on, column, all_columns,subset):
- ranks = np.sort(pd.Series(dataframe.RANK.values.ravel()).unique())[::-1]
- df_base = df_base.copy()
- df_base[column] = np.nan
- for rank in ranks:
- df_base_w_null = df_base[df_base[column].isna()]
- if len(df_base_w_null)==0:
- break
- df_rank = dataframe[dataframe.RANK == rank]
- query_df = pd.DataFrame({'query':df_rank[['TRANSACTION_GROUP','FIELD_NAME','OPERATOR','TG_EXP_VALUES']]
- .groupby(['TRANSACTION_GROUP'])
- .apply(lambda x: build_query_string(x,maps))}).reset_index()
- h = []
- for q in query_df.itertuples():
- df_map=df_rank[df_rank.TRANSACTION_GROUP==q.TRANSACTION_GROUP][all_columns].drop_duplicates()
- tmp = df_base_w_null.query(q.query).copy()
- del tmp[column]
- if len(tmp)!=0 and len(df_map)!=0:
- tmp[column] = tmp.reset_index().merge(df_map, how='left',left_on=left_on,
- right_on=right_on).set_index('index')[column]
- h.append(tmp)
- if len(h) != 0:
- df_base.update(pd.concat(h).drop_duplicates(subset=subset, keep="last")[column])
- return df_base[column]
- maps={'CATEGORY':'T_CATEGORY',
- 'CUR':'T_CUR'}
- column = 'MEMBER'
- left_on_buy = ['T_MEMBER',]
- right_on = ['FIRM_ID',]
- all_columns = ['FIRM_ID','MEMBER']
- subset = 'ID'
- d1['C_MEMBER'] = add_column(d1, d2, maps, left_on_buy,right_on, column, all_columns,subset)
Add Comment
Please, Sign In to add comment