Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import numpy as np # for creating an array
- import pandas as pd # for dataframe
- import scipy # for statistical cal
- from scipy.stats import chi2
- np.random.seed(10)
- type_bottle = np.random.choice(a= ["paper","cans","glass","others","plastic"],
- p = [0.05, 0.15 ,0.25, 0.05, 0.5],
- size=1000)
- month = np.random.choice(a= ["January","February","March"],
- p = [0.4, 0.2, 0.4],
- size=1000)
- bottles = pd.DataFrame({"types":type_bottle,
- "months":month})
- bottles_tab = pd.crosstab(bottles.types, bottles.months, margins = True)
- bottles_tab.columns = ["January","February","March","row_totals"]
- bottles_tab.index = ["paper","cans","glass","others","plastic","col_totals"]
- observed = bottles_tab.iloc[0:5,0:3]
- bottles_tab
- expected = np.outer(bottles_tab["row_totals"][0:5],
- bottles_tab.loc["col_totals"][0:3]) / 1000
- expected = pd.DataFrame(expected)
- expected.columns = ["Janurary","Feburary","March"]
- expected.index = ["paper","cans","glass","others","plastic"]
- expected
- chi_squared_stat = (((observed-expected)**2)/expected).sum().sum()
- print(chi_squared_stat)
- critical_value = chi2.ppf(q = 0.95,
- df = 8) # *
- print("Critical value:",critical_value)
- p_value = 1 - chi2.cdf(x=chi_squared_stat,
- df=8)
- print("P value:",p_value)
- scipy.stats.chi2_contingency(observed= observed)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement