Advertisement
Guest User

Untitled

a guest
Feb 23rd, 2017
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.12 KB | None | 0 0
  1. import pandas
  2. import random
  3. import time
  4.  
  5. # creating fake dataset
  6. sample_size = 10000
  7. cat1_pop = range(1, 2001)
  8. cat2_pop = ['a', 'b', 'c', 'd', 'e']
  9. cat1_vals = [random.choice(cat1_pop) for _ in range(sample_size)]
  10. cat2_vals = [random.choice(cat2_pop) for _ in range(len(cat1_pop))]
  11.  
  12. df1 = pandas.DataFrame({'category1': cat1_vals})
  13.  
  14. # df2 contains information about every unique value in df1.category1
  15. df2 = pandas.DataFrame({'category1': cat1_pop,
  16. 'category2': cat2_vals})
  17.  
  18.  
  19. # The goal is to map the category2 information from df2 to df1. Essentially to merge the dataset.
  20.  
  21. # Method 1
  22. # using apply with lambda
  23. ## time taken: 4.7313s
  24. df1['category2_1'] = df1.category1.apply(lambda x: df2[df2.category1 == x]['category2'].values[0])
  25.  
  26. # Method 2
  27. # using map with lambda
  28. ## time taken: 4.5687s
  29. df1['category2_2'] = df1.category1.map(lambda x: df2[df2.category1 == x]['category2'].values[0])
  30.  
  31.  
  32. # Method 3
  33. # by creating a mapping dictionary and then map
  34. ## time taken: 0.0058s
  35. cat_mapping = {}
  36. for idx, cat2 in df2['category2'].iteritems():
  37. cat_mapping[idx+1] = cat2
  38. df1['category2_3'] = df1.category1.map(cat_mapping)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement