Colby

import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_excel("Downloads/The Other List.xlsx", sheet_name=None)


def split_string(s):
    start, *rest = s.split(".")
    return pd.Series([int(start), ".".join(rest).strip()], index=["date", "drinks"])


months = ["January", "February", "March", "April", "May", "June", "July"]


def unit_split(string):
    amount, unit, *beverage = string.split()
    return pd.Series(
        [float(amount), unit, " ".join(beverage)], index=["volume", "unit", "beverage"]
    )


def extract_month(month):
    column = data[month].T.melt().dropna()["value"]
    days = column[column.str.match(r"^\d+\. ", na=False)].apply(split_string)
    df = days[((days["date"] == 1).cumsum() == 1)].copy()
    df["date"] = pd.to_datetime(df["date"].apply(lambda x: f"2023 {month} {x}"))
    return df


series = (
    pd.concat([extract_month(month) for month in months])
    .set_index("date")["drinks"]
    .apply(lambda x: [z for y in x.split("  ") if (z := y.strip())])
    .explode()
)

series[series.str.match("N/A")] = "0 ml Water"
series[series.str.match("1 Kamikazi")] = "375 ml Kamikazi Blu-Dacious"
series[series.str.match("1L Tacozilla")] = "1000 ml Tacozilla 4.2%"
series[series.str.match("Green")] = "1.5 oz Green Tea Shot"
series[series.str.match("1 Jello shot")] = "1 oz Jello shot"

df = series.apply(unit_split)
regex = "(\d+\.?\d*)%"
df["abv"] = df["beverage"].str.extract(regex, expand=False).apply(float)
abvs = {
    "PBR": 4.8,
    "Blue Moon": 5.4,
    "Coors Light": 4.2,
    "Busch Light": 4.1,
    "Modelo": 4.4,
    "Citizens Cider": 5.2,
    "Maker's Mark": 45,
    "Barefoot Moscato": 9,
    "Modelo Negra": 5.4,
    "Budweiser": 5,
    "Jack Daniels": 40,
    "Guinness": 4.2,
    "Corona Extra": 4.6,
    "Corona": 4.8,
    "Bud Light": 4.2,
    "Miller High Life": 4.6,
    "Kamikazi Blu-Dacious": 15,
    "Smirnoff Ice Red White and Berry": 5.4,
    "Boston Lager": 5,
    "Heineken": 5.4,
    "Coors Banquet": 5,
    "Green Tea Shot": 30,
    "Jello shot": 30,
    "Water": 0,
}
df.loc[df["abv"].isna(), "abv"] = df.loc[df["abv"].isna(), "beverage"].map(abvs)
df["unit_volume"] = df["unit"].map({"ml": 1, "oz": 28.41306})
df["alcohol"] = df["volume"] * df["abv"] * df["unit_volume"] / 100 / 17.7
daily = df.groupby(df.index)["alcohol"].sum()
ax = daily.plot()
ax.set_xlabel("Date")
ax.set_ylabel("Daily standard drinks")
plt.savefig("drinks_daily.png", bbox_inches="tight")
plt.close()

ax = daily.rolling("7d").sum().plot()
ax.set_xlabel("Date")
ax.set_ylabel("Weekly standard drinks (rolling)")
plt.savefig("drinks_rolling.png", bbox_inches="tight")
plt.close()

weekdays = {0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"}
by_day = daily.groupby(daily.index.dayofweek).mean().to_frame()
ax = by_day.rename(index=weekdays)["alcohol"].plot.bar(rot=0)
ax.set_xlabel("Weekday")
ax.set_ylabel("Average number of standard drinks")
plt.savefig("drinks_weekday.png", bbox_inches="tight")
plt.close()