WeeklyAnalysis.py

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

df = pd.read_excel('LPWHistoricalData_MasterListWITHID.xlsx', encoding = "ISO-8859-1", dayfirst = True, infer_datetime_format = False)
df.head()
df['First_Order_Date'] = pd.to_datetime(df['First_Order_Date'])
df['OrderPeriod'] = df.First_Order_Date.apply(lambda x: x.strftime('%Y-%W'))
df.set_index('ID', inplace=True)

df['CohortGroup'] = df.groupby(level=0)['First_Order_Date'].min().apply(lambda x: x.strftime('%Y-%W'))
df.reset_index(inplace=True)
# exit(1)
grouped = df.groupby(['CohortGroup', 'OrderPeriod'])

# count the unique users, orders, and total revenue per Group + Period
cohorts = grouped.agg({'ID': pd.Series.count})

# make the column names more meaningful
# cohorts.rename(columns={'ID': 'TotalUsers',
#                         'OrderId': 'TotalOrders'}, inplace=True)
cohorts.head()

def cohort_period(df):
    """
    Creates a `CohortPeriod` column, which is the Nth period based on the user's first purchase.

    Example
    -------
    Say you want to get the 3rd month for every user:
        df.sort(['UserId', 'OrderTime', inplace=True)
        df = df.groupby('UserId').apply(cohort_period)
        df[df.CohortPeriod == 3]
    """
    df['CohortPeriod'] = np.arange(len(df)) + 1
    return df

cohorts.to_csv("GregWeekly1.csv")
cohorts = cohorts.groupby(level=0).apply(cohort_period)
cohorts.head()


cohorts.to_csv("GregWeekly2.csv")

cohorts.reset_index(inplace=True)
let_see = cohorts.pivot(index='CohortGroup', columns='OrderPeriod', values='ID')
# cohorts.set_index(['CohortGroup', 'CohortPeriod'], inplace=True)

let_see.to_csv("GregWeekly3.csv")