Advertisement
Guest User

Untitled

a guest
Jun 27th, 2019
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.88 KB | None | 0 0
  1. import pandas as pd
  2. from sklearn.preprocessing import MultiLabelBinarizer
  3. mlb = MultiLabelBinarizer()
  4.  
  5. # load sample data
  6. df = pd.DataFrame( {'user_id':['1','1','2','2','2','3'], 'fruits':['banana','orange','orange','apple','banana','mango']})
  7.  
  8. # collect fruits for each user
  9. transformed_df= df.groupby('user_id').agg({'fruits':lambda x: list(x)}).reset_index()
  10.  
  11. print(transformed_df)
  12. user_id fruits
  13. 0 1 [banana, orange]
  14. 1 2 [orange, apple, banana]
  15. 2 3 [mango]
  16.  
  17. # perform MultiLabelBinarizer
  18. final_df = transformed_df.join(pd.DataFrame(mlb.fit_transform(transformed_df.pop('fruits')),columns=mlb.classes_,index=transformed_df.index))
  19.  
  20. print(final_df)
  21. user_id apple banana mango orange
  22. 0 1 0 1 0 1
  23. 1 2 1 1 0 1
  24. 2 3 0 0 1 0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement