Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import matplotlib.pyplot as plt
- data = pd.read_excel("Downloads/The Other List.xlsx", sheet_name=None)
- def split_string(s):
- start, *rest = s.split(".")
- return pd.Series([int(start), ".".join(rest).strip()], index=["date", "drinks"])
- months = ["January", "February", "March", "April", "May", "June", "July"]
- def unit_split(string):
- amount, unit, *beverage = string.split()
- return pd.Series(
- [float(amount), unit, " ".join(beverage)], index=["volume", "unit", "beverage"]
- )
- def extract_month(month):
- column = data[month].T.melt().dropna()["value"]
- days = column[column.str.match(r"^\d+\. ", na=False)].apply(split_string)
- df = days[((days["date"] == 1).cumsum() == 1)].copy()
- df["date"] = pd.to_datetime(df["date"].apply(lambda x: f"2023 {month} {x}"))
- return df
- series = (
- pd.concat([extract_month(month) for month in months])
- .set_index("date")["drinks"]
- .apply(lambda x: [z for y in x.split(" ") if (z := y.strip())])
- .explode()
- )
- series[series.str.match("N/A")] = "0 ml Water"
- series[series.str.match("1 Kamikazi")] = "375 ml Kamikazi Blu-Dacious"
- series[series.str.match("1L Tacozilla")] = "1000 ml Tacozilla 4.2%"
- series[series.str.match("Green")] = "1.5 oz Green Tea Shot"
- series[series.str.match("1 Jello shot")] = "1 oz Jello shot"
- df = series.apply(unit_split)
- regex = "(\d+\.?\d*)%"
- df["abv"] = df["beverage"].str.extract(regex, expand=False).apply(float)
- abvs = {
- "PBR": 4.8,
- "Blue Moon": 5.4,
- "Coors Light": 4.2,
- "Busch Light": 4.1,
- "Modelo": 4.4,
- "Citizens Cider": 5.2,
- "Maker's Mark": 45,
- "Barefoot Moscato": 9,
- "Modelo Negra": 5.4,
- "Budweiser": 5,
- "Jack Daniels": 40,
- "Guinness": 4.2,
- "Corona Extra": 4.6,
- "Corona": 4.8,
- "Bud Light": 4.2,
- "Miller High Life": 4.6,
- "Kamikazi Blu-Dacious": 15,
- "Smirnoff Ice Red White and Berry": 5.4,
- "Boston Lager": 5,
- "Heineken": 5.4,
- "Coors Banquet": 5,
- "Green Tea Shot": 30,
- "Jello shot": 30,
- "Water": 0,
- }
- df.loc[df["abv"].isna(), "abv"] = df.loc[df["abv"].isna(), "beverage"].map(abvs)
- df["unit_volume"] = df["unit"].map({"ml": 1, "oz": 28.41306})
- df["alcohol"] = df["volume"] * df["abv"] * df["unit_volume"] / 100 / 17.7
- daily = df.groupby(df.index)["alcohol"].sum()
- ax = daily.plot()
- ax.set_xlabel("Date")
- ax.set_ylabel("Daily standard drinks")
- plt.savefig("drinks_daily.png", bbox_inches="tight")
- plt.close()
- ax = daily.rolling("7d").sum().plot()
- ax.set_xlabel("Date")
- ax.set_ylabel("Weekly standard drinks (rolling)")
- plt.savefig("drinks_rolling.png", bbox_inches="tight")
- plt.close()
- weekdays = {0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"}
- by_day = daily.groupby(daily.index.dayofweek).mean().to_frame()
- ax = by_day.rename(index=weekdays)["alcohol"].plot.bar(rot=0)
- ax.set_xlabel("Weekday")
- ax.set_ylabel("Average number of standard drinks")
- plt.savefig("drinks_weekday.png", bbox_inches="tight")
- plt.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement