Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- n_rows = 10000 # number of rows in dataframe
- n_cat = 5 # number of categories per column; gives a total of n_cat*n_cat unique combinations
- # Dataframe en dictionary used for replacement
- convert_df = pd.DataFrame(columns=['c1','c2','value'])
- convert_dict = {}
- for i in range(n_cat):
- for j in range(n_cat):
- value = int(0.5*(i+j)*(i+j+1)+j)
- convert_dict[(i,j)] = value
- convert_df.loc[j+i*n_cat] = {'c1':i, 'c2':j, 'value':value}
- def convert_with_df(df, convert_df):
- df = df.merge(convert_df, on=["c1", "c2"], how="left")
- return df
- def convert_with_dict(df, convert_dict):
- # Added copy to preserve original df.
- df = df.copy()
- df['value'] = list(map(convert_dict.get, map(tuple, df[['c1', 'c2']].values)))
- return df
- # Create some data and dataframe
- data = np.random.randint(low=0, high=n_cat, size=[n_rows, 2])
- df1 = pd.DataFrame(data, columns=['c1','c2'])
- # Test test..
- #df2 = convert_with_df (df1, convert_df)
- #df3 = convert_with_dict(df1, convert_dict)
- # --------------------------
- # Timing results in iPython:
- # In [97]: timeit convert_with_df (df1, convert_df)
- # 6.73 ms ± 79.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
- # In [98]: timeit convert_with_dict(df1, convert_dict)
- # 24.5 ms ± 176 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement