Advertisement
Guest User

Colby

a guest
Jul 4th, 2023
46
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.01 KB | None | 0 0
  1. import pandas as pd
  2. import matplotlib.pyplot as plt
  3.  
  4. data = pd.read_excel("Downloads/The Other List.xlsx", sheet_name=None)
  5.  
  6.  
  7. def split_string(s):
  8.     start, *rest = s.split(".")
  9.     return pd.Series([int(start), ".".join(rest).strip()], index=["date", "drinks"])
  10.  
  11.  
  12. months = ["January", "February", "March", "April", "May", "June", "July"]
  13.  
  14.  
  15. def unit_split(string):
  16.     amount, unit, *beverage = string.split()
  17.     return pd.Series(
  18.         [float(amount), unit, " ".join(beverage)], index=["volume", "unit", "beverage"]
  19.     )
  20.  
  21.  
  22. def extract_month(month):
  23.     column = data[month].T.melt().dropna()["value"]
  24.     days = column[column.str.match(r"^\d+\. ", na=False)].apply(split_string)
  25.     df = days[((days["date"] == 1).cumsum() == 1)].copy()
  26.     df["date"] = pd.to_datetime(df["date"].apply(lambda x: f"2023 {month} {x}"))
  27.     return df
  28.  
  29.  
  30. series = (
  31.     pd.concat([extract_month(month) for month in months])
  32.     .set_index("date")["drinks"]
  33.     .apply(lambda x: [z for y in x.split("  ") if (z := y.strip())])
  34.     .explode()
  35. )
  36.  
  37. series[series.str.match("N/A")] = "0 ml Water"
  38. series[series.str.match("1 Kamikazi")] = "375 ml Kamikazi Blu-Dacious"
  39. series[series.str.match("1L Tacozilla")] = "1000 ml Tacozilla 4.2%"
  40. series[series.str.match("Green")] = "1.5 oz Green Tea Shot"
  41. series[series.str.match("1 Jello shot")] = "1 oz Jello shot"
  42.  
  43. df = series.apply(unit_split)
  44. regex = "(\d+\.?\d*)%"
  45. df["abv"] = df["beverage"].str.extract(regex, expand=False).apply(float)
  46. abvs = {
  47.     "PBR": 4.8,
  48.     "Blue Moon": 5.4,
  49.     "Coors Light": 4.2,
  50.     "Busch Light": 4.1,
  51.     "Modelo": 4.4,
  52.     "Citizens Cider": 5.2,
  53.     "Maker's Mark": 45,
  54.     "Barefoot Moscato": 9,
  55.     "Modelo Negra": 5.4,
  56.     "Budweiser": 5,
  57.     "Jack Daniels": 40,
  58.     "Guinness": 4.2,
  59.     "Corona Extra": 4.6,
  60.     "Corona": 4.8,
  61.     "Bud Light": 4.2,
  62.     "Miller High Life": 4.6,
  63.     "Kamikazi Blu-Dacious": 15,
  64.     "Smirnoff Ice Red White and Berry": 5.4,
  65.     "Boston Lager": 5,
  66.     "Heineken": 5.4,
  67.     "Coors Banquet": 5,
  68.     "Green Tea Shot": 30,
  69.     "Jello shot": 30,
  70.     "Water": 0,
  71. }
  72. df.loc[df["abv"].isna(), "abv"] = df.loc[df["abv"].isna(), "beverage"].map(abvs)
  73. df["unit_volume"] = df["unit"].map({"ml": 1, "oz": 28.41306})
  74. df["alcohol"] = df["volume"] * df["abv"] * df["unit_volume"] / 100 / 17.7
  75. daily = df.groupby(df.index)["alcohol"].sum()
  76. ax = daily.plot()
  77. ax.set_xlabel("Date")
  78. ax.set_ylabel("Daily standard drinks")
  79. plt.savefig("drinks_daily.png", bbox_inches="tight")
  80. plt.close()
  81.  
  82. ax = daily.rolling("7d").sum().plot()
  83. ax.set_xlabel("Date")
  84. ax.set_ylabel("Weekly standard drinks (rolling)")
  85. plt.savefig("drinks_rolling.png", bbox_inches="tight")
  86. plt.close()
  87.  
  88. weekdays = {0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"}
  89. by_day = daily.groupby(daily.index.dayofweek).mean().to_frame()
  90. ax = by_day.rename(index=weekdays)["alcohol"].plot.bar(rot=0)
  91. ax.set_xlabel("Weekday")
  92. ax.set_ylabel("Average number of standard drinks")
  93. plt.savefig("drinks_weekday.png", bbox_inches="tight")
  94. plt.close()
  95.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement