Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas
- import random
- import time
- # creating fake dataset
- sample_size = 10000
- cat1_pop = range(1, 2001)
- cat2_pop = ['a', 'b', 'c', 'd', 'e']
- cat1_vals = [random.choice(cat1_pop) for _ in range(sample_size)]
- cat2_vals = [random.choice(cat2_pop) for _ in range(len(cat1_pop))]
- df1 = pandas.DataFrame({'category1': cat1_vals})
- # df2 contains information about every unique value in df1.category1
- df2 = pandas.DataFrame({'category1': cat1_pop,
- 'category2': cat2_vals})
- # The goal is to map the category2 information from df2 to df1. Essentially to merge the dataset.
- # Method 1
- # using apply with lambda
- ## time taken: 4.7313s
- df1['category2_1'] = df1.category1.apply(lambda x: df2[df2.category1 == x]['category2'].values[0])
- # Method 2
- # using map with lambda
- ## time taken: 4.5687s
- df1['category2_2'] = df1.category1.map(lambda x: df2[df2.category1 == x]['category2'].values[0])
- # Method 3
- # by creating a mapping dictionary and then map
- ## time taken: 0.0058s
- cat_mapping = {}
- for idx, cat2 in df2['category2'].iteritems():
- cat_mapping[idx+1] = cat2
- df1['category2_3'] = df1.category1.map(cat_mapping)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement