uni proj

# -*- coding: utf-8 -*-
import numpy as np # for creating an array
import pandas as pd # for dataframe
import scipy # for statistical cal
from scipy.stats import chi2

np.random.seed(10)

type_bottle = np.random.choice(a= ["paper","cans","glass","others","plastic"],
                              p = [0.05, 0.15 ,0.25, 0.05, 0.5],
                              size=1000)

month = np.random.choice(a= ["January","February","March"],
                              p = [0.4, 0.2, 0.4],
                              size=1000)

bottles = pd.DataFrame({"types":type_bottle,
                       "months":month})

bottles_tab = pd.crosstab(bottles.types, bottles.months, margins = True)

bottles_tab.columns = ["January","February","March","row_totals"]

bottles_tab.index = ["paper","cans","glass","others","plastic","col_totals"]

observed = bottles_tab.iloc[0:5,0:3]
bottles_tab

expected =  np.outer(bottles_tab["row_totals"][0:5],
                     bottles_tab.loc["col_totals"][0:3]) / 1000

expected = pd.DataFrame(expected)

expected.columns = ["Janurary","Feburary","March"]
expected.index = ["paper","cans","glass","others","plastic"]

expected

chi_squared_stat = (((observed-expected)**2)/expected).sum().sum()

print(chi_squared_stat)

critical_value = chi2.ppf(q = 0.95,
                      df = 8)   # *
print("Critical value:",critical_value)

p_value = 1 - chi2.cdf(x=chi_squared_stat,
                             df=8)
print("P value:",p_value)

scipy.stats.chi2_contingency(observed= observed)