Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- user_activity = pd.read_csv('/datasets/work_user_activity.csv')
- user_activity['activity_date'] = pd.to_datetime(user_activity['activity_date'])
- first_activity_date = user_activity.groupby(['user_id'])['activity_date'].min()
- first_activity_date.name = 'first_activity_date'
- user_activity = user_activity.join(first_activity_date,on='user_id')
- user_activity['activity_week'] = pd.to_datetime(user_activity['activity_date'], unit='d') - pd.to_timedelta(user_activity['activity_date'].dt.dayofweek, unit='d')
- # Тут ваш код
- user_activity['first_activity_week'] = pd.to_datetime(user_activity['first_activity_date'], unit='d') - pd.to_timedelta(user_activity['first_activity_date'].dt.dayofweek, unit='d')
- # Тут ваш код
- import numpy as np
- user_activity['cohort_lifetime'] = user_activity['activity_week'] - user_activity['first_activity_week']
- user_activity['cohort_lifetime'] = user_activity['cohort_lifetime'] / np.timedelta64(1,'W')
- user_activity['cohort_lifetime'] = user_activity['cohort_lifetime'].astype(int)
- cohorts = user_activity.groupby(['first_activity_week','cohort_lifetime']).agg({'user_id':'nunique'}).reset_index()
- initial_users_count = cohorts[cohorts['cohort_lifetime'] == 0][['first_activity_week','user_id']]
- initial_users_count = initial_users_count.rename(columns={'user_id':'cohort_users'})
- cohorts = cohorts.merge(initial_users_count,on='first_activity_week')
- cohorts['retention'] = cohorts['user_id']/cohorts['cohort_users']
- retention_pivot = cohorts.pivot_table(index='first_activity_week',columns='cohort_lifetime',values='retention',aggfunc='sum')
- print(retention_pivot)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement