Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- df = pd.DataFrame({'Categories' : ['comedy | action | romcom', 'horror | thriller', 'comedy | coming-of-age'],
- 'Score' : [7.8, 5.2, 3.4]})
- #Now how to get the cat list
- cat_list = []
- for row in df.itertuples(index=True, name='Pandas'):
- categories = getattr(row, 'Categories').split('|')
- categories = [x.strip() for x in categories]
- for cat in categories:
- cat_list.append(cat)
- cat_list = list(set(cat_list)) #lazily remove dups
- #Below will get an average of something for every cat
- meanslist = []
- for cat in cat_list:
- mean = []
- for row in df.itertuples(index=True, name='Pandas'):
- categories = getattr(row, 'Categories')
- if re.search(cat, (categories + ' \||\| ' + categories)):
- mean.append(getattr(row, "Score"))
- mean = sum(mean)/len(mean)
- meanslist.append([cat, mean])
- print(meanslist)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement