daily pastebin goal
68%
SHARE
TWEET

Untitled

a guest Dec 13th, 2018 60 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. d1 = pd.DataFrame({
  2.     'ID':['GFJH325','DGJR434','TGJF756','HJMG124','FGJF807',
  3.           'HKUU675','VNVC345','GHPH638','EGHF352','VGBM572'],
  4.     'T_MEMBER':['TAA','TAB','TAC','TAA','TAC',
  5.                'TAC','TAB','TAA','TAA','TAA'],
  6.     'T_CATEGORY':['Client', 'Client', 'Client', 'House', 'Client',
  7.                 'Client', 'House', 'Client', 'Client', 'Client',],
  8.     'SOURSE':['FHGK', 'JFYT', 'FHGK', 'FHGK', 'DBTR',
  9.               'DBTR', 'FHGK', 'FHGK', 'DBTR', 'DBTR',],
  10.     'T_CUR':['EUR', 'EUR', 'EUR', 'USD', 'USD', 'EUR', 'EUR', 'USD', 'USD', 'EUR']
  11. })  
  12. d2 = pd.DataFrame({    
  13.     'FIRM_ID':['TAA','TAA','TAB','TAB','TAC','TAC',
  14.                'TAA','TAA','TAB','TAB',
  15.                'TAC','TAC','TAC',
  16.                'TAB','TAB','TAB'],
  17.     'MEMBER':['CAA','CAB','CAA','CAB','CAA','CAB',
  18.               'CAA','CAA','CAB','CAB',
  19.               'CAB','CAB','CAB',
  20.               'CAA','CAA','CAA'],
  21.     'RANK':['1', '1', '1', '1', '1', '1',
  22.             '2', '2', '2', '2',
  23.             '3', '3', '3',
  24.             '4','4','4',],
  25.     'TRANSACTION_GROUP':['EUR', 'USD', 'EUR', 'USD', 'EUR','USD',
  26.                          'E-C', 'E-C', 'U-C', 'U-C',
  27.                          'E-C-FHGK', 'E-C-FHGK', 'E-C-FHGK',
  28.                          'E-C-FHGK-JFYT','E-C-FHGK-JFYT','E-C-FHGK-JFYT',],
  29.     'FIELD_NAME':['CUR', 'CUR', 'CUR', 'CUR', 'CUR', 'CUR',
  30.                   'CUR', 'CATEGORY', 'CUR', 'CATEGORY',
  31.                   'CUR', 'CATEGORY', 'SOURSE',
  32.                   'CUR', 'CATEGORY', 'SOURSE',],
  33.     'OPERATOR':['==', '==', '==', '==', '==', '==',
  34.                 '==', '==', '==', '==',
  35.                 '==', '==', '==',
  36.                 '==', '==', 'in'],
  37.     'TG_EXP_VALUES':['EUR', 'USD', 'EUR', 'USD', 'EUR', 'USD',
  38.                      'EUR', 'Client', 'USD', 'Client',
  39.                      'EUR', 'Client', 'FHGK',
  40.                      'EUR', 'Client', 'FHGK,JFYT']
  41. })
  42.    
  43. ID SOURSE T_CATEGORY T_CUR T_MEMBER C_MEMBER
  44. 0  GFJH325   FHGK     Client   EUR      TAA      CAA
  45. 1  DGJR434   JFYT     Client   EUR      TAB      CAA
  46. 2  TGJF756   FHGK     Client   EUR      TAC      CAB
  47. 3  HJMG124   FHGK      House   USD      TAA      CAB
  48. 4  FGJF807   DBTR     Client   USD      TAC      CAB
  49. 5  HKUU675   DBTR     Client   EUR      TAC      CAA
  50. 6  VNVC345   FHGK      House   EUR      TAB      CAA
  51. 7  GHPH638   FHGK     Client   USD      TAA      CAB
  52. 8  EGHF352   DBTR     Client   USD      TAA      CAB
  53. 9  VGBM572   DBTR     Client   EUR      TAA      CAA
  54.    
  55. def build_query_string(tg, additional_mapping ={}):
  56.     tg = tg.copy()
  57.     tg.drop_duplicates(inplace=True)
  58.     tg.TG_EXP_VALUES = "'" + tg.TG_EXP_VALUES + "'"
  59.     FIELD_NAME = tg.FIELD_NAME.map(additional_mapping).fillna(tg.FIELD_NAME)
  60.     tg['q'] = '(' + FIELD_NAME +' '+ tg.OPERATOR +' ('+ tg.TG_EXP_VALUES + '))'
  61.     return tg['q'].str.cat(sep = ' & ')
  62.  
  63.  
  64. def add_column(df_base, dataframe, maps, left_on, right_on, column, all_columns,subset):
  65.     ranks = np.sort(pd.Series(dataframe.RANK.values.ravel()).unique())[::-1]
  66.     df_base = df_base.copy()
  67.     df_base[column] = np.nan    
  68.     for rank in ranks:
  69.         df_base_w_null = df_base[df_base[column].isna()]
  70.         if len(df_base_w_null)==0:
  71.             break
  72.         df_rank = dataframe[dataframe.RANK == rank]
  73.         query_df = pd.DataFrame({'query':df_rank[['TRANSACTION_GROUP','FIELD_NAME','OPERATOR','TG_EXP_VALUES']]
  74.                     .groupby(['TRANSACTION_GROUP'])
  75.                     .apply(lambda x: build_query_string(x,maps))}).reset_index()
  76.         h = []
  77.         for q in query_df.itertuples():
  78.             df_map=df_rank[df_rank.TRANSACTION_GROUP==q.TRANSACTION_GROUP][all_columns].drop_duplicates()
  79.             tmp = df_base_w_null.query(q.query).copy()
  80.             del tmp[column]
  81.             if len(tmp)!=0 and len(df_map)!=0:
  82.                 tmp[column] = tmp.reset_index().merge(df_map, how='left',left_on=left_on,
  83.                                                         right_on=right_on).set_index('index')[column]
  84.                 h.append(tmp)
  85.         if len(h) != 0:
  86.             df_base.update(pd.concat(h).drop_duplicates(subset=subset, keep="last")[column])
  87.     return df_base[column]
  88.  
  89.  
  90. maps={'CATEGORY':'T_CATEGORY',
  91.       'CUR':'T_CUR'}
  92. column = 'MEMBER'
  93. left_on_buy = ['T_MEMBER',]
  94. right_on = ['FIRM_ID',]
  95. all_columns = ['FIRM_ID','MEMBER']
  96. subset = 'ID'
  97. d1['C_MEMBER'] = add_column(d1, d2, maps, left_on_buy,right_on, column, all_columns,subset)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top