Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import matplotlib as mpl
- df = pd.read_excel('LPWHistoricalData_MasterListWITHID.xlsx', encoding = "ISO-8859-1", dayfirst = True, infer_datetime_format = False)
- df.head()
- df['First_Order_Date'] = pd.to_datetime(df['First_Order_Date'])
- df['OrderPeriod'] = df.First_Order_Date.apply(lambda x: x.strftime('%Y-%W'))
- df.set_index('ID', inplace=True)
- df['CohortGroup'] = df.groupby(level=0)['First_Order_Date'].min().apply(lambda x: x.strftime('%Y-%W'))
- df.reset_index(inplace=True)
- # exit(1)
- grouped = df.groupby(['CohortGroup', 'OrderPeriod'])
- # count the unique users, orders, and total revenue per Group + Period
- cohorts = grouped.agg({'ID': pd.Series.count})
- # make the column names more meaningful
- # cohorts.rename(columns={'ID': 'TotalUsers',
- # 'OrderId': 'TotalOrders'}, inplace=True)
- cohorts.head()
- def cohort_period(df):
- """
- Creates a `CohortPeriod` column, which is the Nth period based on the user's first purchase.
- Example
- -------
- Say you want to get the 3rd month for every user:
- df.sort(['UserId', 'OrderTime', inplace=True)
- df = df.groupby('UserId').apply(cohort_period)
- df[df.CohortPeriod == 3]
- """
- df['CohortPeriod'] = np.arange(len(df)) + 1
- return df
- cohorts.to_csv("GregWeekly1.csv")
- cohorts = cohorts.groupby(level=0).apply(cohort_period)
- cohorts.head()
- cohorts.to_csv("GregWeekly2.csv")
- cohorts.reset_index(inplace=True)
- let_see = cohorts.pivot(index='CohortGroup', columns='OrderPeriod', values='ID')
- # cohorts.set_index(['CohortGroup', 'CohortPeriod'], inplace=True)
- let_see.to_csv("GregWeekly3.csv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement