Advertisement
elena1234

Grouping, Aggregating and Transform in Python

Apr 25th, 2022 (edited)
900
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.26 KB | Source Code | 0 0
  1. # Group by Year
  2. df_year_quantity = df_category_date_quantity.groupby(df_category_date_quantity.TransactionDate.dt.year).agg({'Quantity': 'sum'}).reset_index()
  3. df_year_quantity.head()
  4.  
  5. # Group by Year and Month
  6. df_category_date_quantity.assign(yr = df_category_date_quantity['TransactionDate'].dt.year, mnth = df_category_date_quantity['TransactionDate'].dt.month).groupby(['yr', 'mnth']).agg({'Quantity': 'sum'}).reset_index()
  7.  
  8. # Grouping by Category, Year and Month using 'assign'
  9. df_category_year_month_quantity = df_category_date_quantity.assign(category = df_category_date_quantity['ACM Application L2 Ver2'],yr = df_category_date_quantity['TransactionDate'].dt.year, mnth = df_category_date_quantity['TransactionDate'].dt.month).groupby(['category', 'yr', 'mnth']).agg({'Quantity': 'sum'}).reset_index()
  10. df_category_year_month_quantity
  11.  
  12.  
  13. ####################################################################
  14. directors = df.groupby('director').agg({'title' : 'count', 'revenue_musd' : ['sum', 'mean'], 'vote_count' : 'mean'})
  15. directors
  16.  
  17. directors.nlargest(20, ('title', 'count'))
  18. directors.nlargest(20, ('revenue_musd', 'sum'))
  19. directors.nlargest(20, ('revenue_musd', 'mean'))
  20. directors.nlargest(20, ('vote_count', 'mean'))
  21.  
  22. titanic.groupby('sex').agg(survived_total = ('survived_column', 'sum'), survival_rate = ('survived_column', 'mean'), mean_age = ('age', 'mean')) # we don't have multi indexes, we have separаted columns
  23.  
  24. cars.groupby(['model_year', 'origin']).agg(mean_mpg = ('mpg', 'mean'),
  25.                                            min_mpg = ('mpg', 'min'),
  26.                                            max_mpg = ('mpg', 'max'))
  27.  
  28. ##############################################################
  29. df.groupby('Franchise').budget_musd.mean()
  30. df.groupby('Franchise').revenue_musd.mean().nlargest(n=20)
  31. df.groupby('Franchise').ROI.median()
  32. df.groupby('Franchise').popularity.mean().sort_values(ascending = False)
  33.  
  34. new_df = df.groupby('Franchise')[['budget_musd', 'ROI']].sum()
  35. new_df.plot(kind = 'bar', subplots = True, figsize = (8,15), fontsize = 13) # plotting after grouping and applying function
  36. plt.show()
  37.  
  38. ################################################################
  39. # if you want a new column in a dataframe for specific groups, use transform method
  40. titanic['group_survived_rate'] = titanic.groupby(['sex','pclass']).survived.transform('mean')
  41. titanic.head()
  42.  
  43. ####################################################################
  44. # Use apply on groups
  45. def five_oldest_survived(group):
  46.     return group[group.survived ==1].nlargest(5, 'age')
  47.  
  48. titanic.groupby('sex').apply(five_oldest_survived)
  49.  
  50. def  two_most_fuel_efficient_cars(group):
  51.     return group.nlargest(2, 'mpg').loc[:, ["name", "mpg"]]
  52.  
  53. df_the_most_fuel_eff_cars = cars.groupby(['model_year', 'origin']).apply(two_most_fuel_efficient_cars)
  54.  
  55. #####################################################################
  56. # How to expand column after grouping and create visualization
  57. df_year_origin = cars.groupby(['model_year', 'origin']).mpg.mean().unstack().round(2)
  58. cars.groupby(['model_year', 'origin']).mpg.mean().unstack().T # transposing rows and columns
  59.  
  60. df_year_origin.plot(kind = 'bar',  subplots = True, sharex = False, figsize = (8,17), fontsize = 12)
  61. plt.ylabel('avg_mpg per year')
  62. plt.show()
  63.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement