Advertisement
Guest User

Untitled

a guest
Dec 13th, 2019
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.62 KB | None | 0 0
  1. import pandas as pd
  2. user_activity = pd.read_csv('/datasets/work_user_activity.csv')
  3. user_activity['activity_date'] = pd.to_datetime(user_activity['activity_date'])
  4. first_activity_date = user_activity.groupby(['user_id'])['activity_date'].min()
  5. first_activity_date.name = 'first_activity_date'
  6. user_activity = user_activity.join(first_activity_date,on='user_id')
  7. user_activity['activity_week'] = pd.to_datetime(user_activity['activity_date'], unit='d') - pd.to_timedelta(user_activity['activity_date'].dt.dayofweek, unit='d')
  8. # Тут ваш код
  9.  
  10. user_activity['first_activity_week'] = pd.to_datetime(user_activity['first_activity_date'], unit='d') - pd.to_timedelta(user_activity['first_activity_date'].dt.dayofweek, unit='d')
  11. # Тут ваш код
  12. import numpy as np
  13. user_activity['cohort_lifetime'] = user_activity['activity_week'] - user_activity['first_activity_week']
  14. user_activity['cohort_lifetime'] = user_activity['cohort_lifetime'] / np.timedelta64(1,'W')
  15. user_activity['cohort_lifetime'] = user_activity['cohort_lifetime'].astype(int)
  16.  
  17. cohorts = user_activity.groupby(['first_activity_week','cohort_lifetime']).agg({'user_id':'nunique'}).reset_index()
  18. initial_users_count = cohorts[cohorts['cohort_lifetime'] == 0][['first_activity_week','user_id']]
  19. initial_users_count = initial_users_count.rename(columns={'user_id':'cohort_users'})
  20. cohorts = cohorts.merge(initial_users_count,on='first_activity_week')
  21. cohorts['retention'] = cohorts['user_id']/cohorts['cohort_users']
  22. retention_pivot = cohorts.pivot_table(index='first_activity_week',columns='cohort_lifetime',values='retention',aggfunc='sum')
  23. print(retention_pivot)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement