Untitled

a guest
Jan 27th, 2018
95
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import pandas as pd
2. import numpy as np
3.
4. n_rows = 10000  # number of rows in dataframe
5. n_cat  = 5      # number of categories per column; gives a total of n_cat*n_cat unique combinations
6.
7. # Dataframe en dictionary used for replacement
8. convert_df = pd.DataFrame(columns=['c1','c2','value'])
9. convert_dict = {}
10.
11. for i in range(n_cat):
12.     for j in range(n_cat):
13.         value = int(0.5*(i+j)*(i+j+1)+j)
14.         convert_dict[(i,j)] = value
15.         convert_df.loc[j+i*n_cat] = {'c1':i, 'c2':j, 'value':value}
16.
17. def convert_with_df(df, convert_df):
18.     df = df.merge(convert_df, on=["c1", "c2"], how="left")
19.     return df
20.
21. def convert_with_dict(df, convert_dict):
22.     # Added copy to preserve original df.
23.     df = df.copy()
24.     df['value'] = list(map(convert_dict.get, map(tuple, df[['c1', 'c2']].values)))
25.     return df
26.
27. # Create some data and dataframe
28. data = np.random.randint(low=0, high=n_cat, size=[n_rows, 2])
29. df1 = pd.DataFrame(data, columns=['c1','c2'])
30.
31. # Test test..
32. #df2 = convert_with_df  (df1, convert_df)
33. #df3 = convert_with_dict(df1, convert_dict)
34.
35. # --------------------------
36. # Timing results in iPython:
37. # In [97]: timeit convert_with_df  (df1, convert_df)
38. # 6.73 ms ± 79.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
39.
40. # In [98]: timeit convert_with_dict(df1, convert_dict)
41. # 24.5 ms ± 176 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
RAW Paste Data