Guest User

Untitled

a guest
Jan 27th, 2018
95
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import pandas as pd
  2. import numpy as np
  3.  
  4. n_rows = 10000  # number of rows in dataframe
  5. n_cat  = 5      # number of categories per column; gives a total of n_cat*n_cat unique combinations
  6.  
  7. # Dataframe en dictionary used for replacement
  8. convert_df = pd.DataFrame(columns=['c1','c2','value'])
  9. convert_dict = {}
  10.  
  11. for i in range(n_cat):
  12.     for j in range(n_cat):
  13.         value = int(0.5*(i+j)*(i+j+1)+j)
  14.         convert_dict[(i,j)] = value
  15.         convert_df.loc[j+i*n_cat] = {'c1':i, 'c2':j, 'value':value}
  16.  
  17. def convert_with_df(df, convert_df):
  18.     df = df.merge(convert_df, on=["c1", "c2"], how="left")
  19.     return df
  20.  
  21. def convert_with_dict(df, convert_dict):
  22.     # Added copy to preserve original df.
  23.     df = df.copy()
  24.     df['value'] = list(map(convert_dict.get, map(tuple, df[['c1', 'c2']].values)))
  25.     return df
  26.  
  27. # Create some data and dataframe
  28. data = np.random.randint(low=0, high=n_cat, size=[n_rows, 2])
  29. df1 = pd.DataFrame(data, columns=['c1','c2'])
  30.  
  31. # Test test..
  32. #df2 = convert_with_df  (df1, convert_df)
  33. #df3 = convert_with_dict(df1, convert_dict)
  34.  
  35. # --------------------------
  36. # Timing results in iPython:
  37. # In [97]: timeit convert_with_df  (df1, convert_df)
  38. # 6.73 ms ± 79.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
  39.  
  40. # In [98]: timeit convert_with_dict(df1, convert_dict)
  41. # 24.5 ms ± 176 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
RAW Paste Data