xDDD

load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/gospodarstwa.rda"))
load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/osoby.rda"))
load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/gospodarstwaDict.rda"))
load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/osobyDict.rda"))
library(weights)
library(SDMTools)
library(ggplot2)
#1. 90% confidence interval for an average number of cigarettes smoked by men per day in last available year
extract <- na.omit(osoby[,c('plec_all', 'waga_2015_osoby','hp44')])
extract <- extract[order(extract$hp44),]

extract <- extract[extract$plec_all==1,]

#wtd.t.test(x=extract$hp44, weight = extract$waga_2015_osoby)

mean1 <- wt.mean(extract$hp44, extract$waga_2015_osoby)
sd1 <- wt.sd(extract$hp44, extract$waga_2015_osoby)
n <- nrow(extract)
SE <- round(qnorm(.95)*sd1/sqrt(n), digits = 4)
a <- mean1 - SE
b <- mean1 + SE

answer <- c(a, mean1, b)
answer
ggplot(extract, aes(y=hp44, x=rep(1:nrow(extract)))) +
  geom_point() +
  geom_hline(yintercept = a, col="red") +
  geom_hline(yintercept = b, col="blue")

#WITH 90% CONFIDENCE I CAN SAY THAT AN AVERAGE NR OF CIGARETTES SMOKED PER DAY BY MEN IN 2015 WAS BETWEEN
# 16.2482 AND 16.7102, WHICH GIVES 16 AS IT IS BINOMIAL VARIABLE


#2. More than 58% men owned phone in 2007

phones <- na.omit(osoby[,c('plec_all', 'waga_2007_osoby', 'dc24')])
phones <- phones[phones$plec_all ==1, ]
proportions <- table(phones$dc24)
n <- proportions[1] + proportions[2]
n <- sum(proportions)
x <- proportions[1]
x/n
proportions[2]
prop.test(x=x, n=n, p=.58, alternative = "greater", conf.level = .95, correct = F)
#WITH P-VALUE BEING MUCH LESS THAN ALPHA I CAN SAY, THAT MORE THAN 58% MEN OWNED PHONE IN 2007
ggplot(phones) +
  geom_bar(aes(x=dc24)) +
  geom_hline(yintercept = n*.59, col="red") +
  ylab("Nr of men") +
  xlab("Owned a phone? (Y|N)")


#3. More than 50% of people who had chosen prawo i sprawiedliwosc in question about political parties attend at least
#   4 devotions or religious meetings per month(in last year)
religious <- na.omit(osoby[,c('waga_2015_osoby', 'fp39','fp106')])
religious <- religious[order(religious$fp39),]
#pis == 2
religious <- religious[religious$fp106==2,]
pis <- table(religious$fp39)
n1 <- sum(pis)
x1 <- pis[5:30]
x1 <- x1[!is.na(x1)]
x1 <- sum(x1)
p0 <- x1/n1
prop.test(x=x1,n=n1,p=.53,alternative = "greater", conf.level = .95)

ggplot() +
  geom_point(aes(y=religious$fp39, x=1:nrow(religious))) +
  geom_vline(xintercept = nrow(religious) * p0, col="blue") +
  geom_vline(xintercept = nrow(religious) * .53, col = "red") +
  ylab("Nr of attended devotions per month") +
  xlab("Nr of people who voted for PiS") +
  ggtitle("Support for PiS and attendance for devotions")

# WITH P-SCORE BEING LESS THAN ALPHA I CAN ACCEPT ALTERNATIVE HYPOTHESIS, THAT MORE THAN 53% OF PEOPLE WHO VOTED
# FOR PIS IN 2015 ATTENDED AT LEAST 4 DEVOTIONS OR RELIGIOUS MEETINGS PER MONTH
# RED LINE - OUR NULL HYPOTHESIS
# BLUE LINE - REAL VALUE