Advertisement
Guest User

Untitled

a guest
Apr 19th, 2019
158
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.91 KB | None | 0 0
  1. df = pd.DataFrame({'Categories' : ['comedy | action | romcom', 'horror | thriller', 'comedy | coming-of-age'],
  2. 'Score' : [7.8, 5.2, 3.4]})
  3.  
  4.  
  5. #Now how to get the cat list
  6.  
  7. cat_list = []
  8.  
  9. for row in df.itertuples(index=True, name='Pandas'):
  10. categories = getattr(row, 'Categories').split('|')
  11. categories = [x.strip() for x in categories]
  12. for cat in categories:
  13. cat_list.append(cat)
  14.  
  15. cat_list = list(set(cat_list)) #lazily remove dups
  16.  
  17.  
  18. #Below will get an average of something for every cat
  19.  
  20.  
  21. meanslist = []
  22.  
  23. for cat in cat_list:
  24. mean = []
  25. for row in df.itertuples(index=True, name='Pandas'):
  26. categories = getattr(row, 'Categories')
  27. if re.search(cat, (categories + ' \||\| ' + categories)):
  28. mean.append(getattr(row, "Score"))
  29.  
  30. mean = sum(mean)/len(mean)
  31. meanslist.append([cat, mean])
  32.  
  33. print(meanslist)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement