Advertisement
Guest User

Untitled

a guest
Dec 13th, 2019
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.59 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3.  
  4. user_activity = pd.read_csv('/datasets/work_user_activity.csv')
  5. user_activity['activity_date'] = pd.to_datetime(user_activity['activity_date'])
  6. first_activity_date = user_activity.groupby(['user_id'])['activity_date'].min()
  7. first_activity_date.name = 'first_activity_date'
  8. user_activity = user_activity.join(first_activity_date,on='user_id')
  9. user_activity['activity_week'] = pd.to_datetime(user_activity['activity_date'], unit='d') - pd.to_timedelta(user_activity['activity_date'].dt.dayofweek, unit='d')
  10. user_activity['first_activity_week'] = pd.to_datetime(user_activity['first_activity_date'], unit='d') - pd.to_timedelta(user_activity['first_activity_date'].dt.dayofweek, unit='d')
  11.  
  12. user_activity['cohort_lifetime'] = user_activity['activity_week'] - user_activity['first_activity_week']
  13. user_activity['cohort_lifetime'] = user_activity['cohort_lifetime'] / np.timedelta64(1,'W')
  14. user_activity['cohort_lifetime'] = user_activity['cohort_lifetime'].astype(int)
  15.  
  16.  
  17. cohorts = user_activity.groupby(['first_activity_week','cohort_lifetime']).agg({'user_id':'nunique'}).reset_index()
  18.  
  19. initial_users_count = cohorts[cohorts['cohort_lifetime'] == 0][['first_activity_week','user_id']]
  20.  
  21. initial_users_count = initial_users_count.rename(columns={'user_id':'cohort_users'})
  22.  
  23. cohorts = cohorts.merge(initial_users_count,on='first_activity_week')
  24.  
  25. cohorts['retention'] = cohorts['user_id'] / cohorts['cohort_users']
  26.  
  27. retention_pivot = cohorts.pivot_table(index='first_activity_week',columns='cohort_lifetime',values='retention',aggfunc='sum')
  28.  
  29. print(retention_pivot)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement