Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- purchases = pd.read_csv('/datasets/game_purchases.csv')
- first_purchase_dates = purchases.groupby('player_id')['purchase_datetime'].min()
- first_purchase_dates.name = 'first_purchase_datetime'
- purchases = purchases.join(first_purchase_dates,on='player_id')
- purchases['purchase_month'] = purchases['purchase_datetime'].astype('datetime64[M]')
- purchases['first_purchase_month'] = purchases['first_purchase_datetime'].astype('datetime64[M]')
- purchases_grouped_by_cohorts = purchases.groupby(['first_purchase_month','purchase_month']).agg({'purchase_id':'nunique','player_id':'nunique'})
- purchases_grouped_by_cohorts['purchases_per_player'] = purchases_grouped_by_cohorts['purchase_id']/purchases_grouped_by_cohorts['player_id']
- purchases_grouped_by_cohorts = purchases_grouped_by_cohorts.reset_index()
- purchases_grouped_by_cohorts['cohort_lifetime'] = purchases_grouped_by_cohorts['purchase_month'] - purchases_grouped_by_cohorts['first_purchase_month']
- purchases_grouped_by_cohorts['cohort_lifetime'] = purchases_grouped_by_cohorts['cohort_lifetime'] / np.timedelta64(1,'M')
- purchases_grouped_by_cohorts['cohort_lifetime'] = purchases_grouped_by_cohorts['cohort_lifetime'].round().astype('int')
- lifetime_pivot = purchases_grouped_by_cohorts.pivot_table(index='first_purchase_month',
- columns='cohort_lifetime',
- values='purchases_per_player',
- aggfunc='mean')
- print(lifetime_pivot)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement