Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/gospodarstwa.rda"))
- load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/osoby.rda"))
- load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/gospodarstwaDict.rda"))
- load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/osobyDict.rda"))
- library(weights)
- library(SDMTools)
- library(ggplot2)
- #1. 90% confidence interval for an average number of cigarettes smoked by men per day in last available year
- extract <- na.omit(osoby[,c('plec_all', 'waga_2015_osoby','hp44')])
- extract <- extract[order(extract$hp44),]
- extract <- extract[extract$plec_all==1,]
- #wtd.t.test(x=extract$hp44, weight = extract$waga_2015_osoby)
- mean1 <- wt.mean(extract$hp44, extract$waga_2015_osoby)
- sd1 <- wt.sd(extract$hp44, extract$waga_2015_osoby)
- n <- nrow(extract)
- SE <- round(qnorm(.95)*sd1/sqrt(n), digits = 4)
- a <- mean1 - SE
- b <- mean1 + SE
- answer <- c(a, mean1, b)
- answer
- ggplot(extract, aes(y=hp44, x=rep(1:nrow(extract)))) +
- geom_point() +
- geom_hline(yintercept = a, col="red") +
- geom_hline(yintercept = b, col="blue")
- #WITH 90% CONFIDENCE I CAN SAY THAT AN AVERAGE NR OF CIGARETTES SMOKED PER DAY BY MEN IN 2015 WAS BETWEEN
- # 16.2482 AND 16.7102, WHICH GIVES 16 AS IT IS BINOMIAL VARIABLE
- #2. More than 58% men owned phone in 2007
- phones <- na.omit(osoby[,c('plec_all', 'waga_2007_osoby', 'dc24')])
- phones <- phones[phones$plec_all ==1, ]
- proportions <- table(phones$dc24)
- n <- proportions[1] + proportions[2]
- n <- sum(proportions)
- x <- proportions[1]
- x/n
- proportions[2]
- prop.test(x=x, n=n, p=.58, alternative = "greater", conf.level = .95, correct = F)
- #WITH P-VALUE BEING MUCH LESS THAN ALPHA I CAN SAY, THAT MORE THAN 58% MEN OWNED PHONE IN 2007
- ggplot(phones) +
- geom_bar(aes(x=dc24)) +
- geom_hline(yintercept = n*.59, col="red") +
- ylab("Nr of men") +
- xlab("Owned a phone? (Y|N)")
- #3. More than 50% of people who had chosen prawo i sprawiedliwosc in question about political parties attend at least
- # 4 devotions or religious meetings per month(in last year)
- religious <- na.omit(osoby[,c('waga_2015_osoby', 'fp39','fp106')])
- religious <- religious[order(religious$fp39),]
- #pis == 2
- religious <- religious[religious$fp106==2,]
- pis <- table(religious$fp39)
- n1 <- sum(pis)
- x1 <- pis[5:30]
- x1 <- x1[!is.na(x1)]
- x1 <- sum(x1)
- p0 <- x1/n1
- prop.test(x=x1,n=n1,p=.53,alternative = "greater", conf.level = .95)
- ggplot() +
- geom_point(aes(y=religious$fp39, x=1:nrow(religious))) +
- geom_vline(xintercept = nrow(religious) * p0, col="blue") +
- geom_vline(xintercept = nrow(religious) * .53, col = "red") +
- ylab("Nr of attended devotions per month") +
- xlab("Nr of people who voted for PiS") +
- ggtitle("Support for PiS and attendance for devotions")
- # WITH P-SCORE BEING LESS THAN ALPHA I CAN ACCEPT ALTERNATIVE HYPOTHESIS, THAT MORE THAN 53% OF PEOPLE WHO VOTED
- # FOR PIS IN 2015 ATTENDED AT LEAST 4 DEVOTIONS OR RELIGIOUS MEETINGS PER MONTH
- # RED LINE - OUR NULL HYPOTHESIS
- # BLUE LINE - REAL VALUE
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement