SHARE
TWEET

xDDD

a guest Jan 24th, 2020 101 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/gospodarstwa.rda"))
  2. load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/osoby.rda"))
  3. load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/gospodarstwaDict.rda"))
  4. load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/osobyDict.rda"))
  5. library(weights)
  6. library(SDMTools)
  7. library(ggplot2)
  8. #1. 90% confidence interval for an average number of cigarettes smoked by men per day in last available year
  9. extract <- na.omit(osoby[,c('plec_all', 'waga_2015_osoby','hp44')])
  10. extract <- extract[order(extract$hp44),]
  11.  
  12. extract <- extract[extract$plec_all==1,]
  13.  
  14. #wtd.t.test(x=extract$hp44, weight = extract$waga_2015_osoby)
  15.  
  16. mean1 <- wt.mean(extract$hp44, extract$waga_2015_osoby)
  17. sd1 <- wt.sd(extract$hp44, extract$waga_2015_osoby)
  18. n <- nrow(extract)
  19. SE <- round(qnorm(.95)*sd1/sqrt(n), digits = 4)
  20. a <- mean1 - SE
  21. b <- mean1 + SE
  22.  
  23. answer <- c(a, mean1, b)
  24. answer
  25. ggplot(extract, aes(y=hp44, x=rep(1:nrow(extract)))) +
  26.   geom_point() +
  27.   geom_hline(yintercept = a, col="red") +
  28.   geom_hline(yintercept = b, col="blue")
  29.  
  30. #WITH 90% CONFIDENCE I CAN SAY THAT AN AVERAGE NR OF CIGARETTES SMOKED PER DAY BY MEN IN 2015 WAS BETWEEN
  31. # 16.2482 AND 16.7102, WHICH GIVES 16 AS IT IS BINOMIAL VARIABLE
  32.  
  33.  
  34.  
  35. #2. More than 58% men owned phone in 2007
  36.  
  37. phones <- na.omit(osoby[,c('plec_all', 'waga_2007_osoby', 'dc24')])
  38. phones <- phones[phones$plec_all ==1, ]
  39. proportions <- table(phones$dc24)
  40. n <- proportions[1] + proportions[2]
  41. n <- sum(proportions)
  42. x <- proportions[1]
  43. x/n
  44. proportions[2]
  45. prop.test(x=x, n=n, p=.58, alternative = "greater", conf.level = .95, correct = F)
  46. #WITH P-VALUE BEING MUCH LESS THAN ALPHA I CAN SAY, THAT MORE THAN 58% MEN OWNED PHONE IN 2007
  47. ggplot(phones) +
  48.   geom_bar(aes(x=dc24)) +
  49.   geom_hline(yintercept = n*.59, col="red") +
  50.   ylab("Nr of men") +
  51.   xlab("Owned a phone? (Y|N)")
  52.  
  53.  
  54.  
  55.  
  56. #3. More than 50% of people who had chosen prawo i sprawiedliwosc in question about political parties attend at least
  57. #   4 devotions or religious meetings per month(in last year)
  58. religious <- na.omit(osoby[,c('waga_2015_osoby', 'fp39','fp106')])
  59. religious <- religious[order(religious$fp39),]
  60. #pis == 2
  61. religious <- religious[religious$fp106==2,]
  62. pis <- table(religious$fp39)
  63. n1 <- sum(pis)
  64. x1 <- pis[5:30]
  65. x1 <- x1[!is.na(x1)]
  66. x1 <- sum(x1)
  67. p0 <- x1/n1
  68. prop.test(x=x1,n=n1,p=.53,alternative = "greater", conf.level = .95)
  69.  
  70. ggplot() +
  71.   geom_point(aes(y=religious$fp39, x=1:nrow(religious))) +
  72.   geom_vline(xintercept = nrow(religious) * p0, col="blue") +
  73.   geom_vline(xintercept = nrow(religious) * .53, col = "red") +
  74.   ylab("Nr of attended devotions per month") +
  75.   xlab("Nr of people who voted for PiS") +
  76.   ggtitle("Support for PiS and attendance for devotions")
  77.  
  78. # WITH P-SCORE BEING LESS THAN ALPHA I CAN ACCEPT ALTERNATIVE HYPOTHESIS, THAT MORE THAN 53% OF PEOPLE WHO VOTED
  79. # FOR PIS IN 2015 ATTENDED AT LEAST 4 DEVOTIONS OR RELIGIOUS MEETINGS PER MONTH
  80. # RED LINE - OUR NULL HYPOTHESIS
  81. # BLUE LINE - REAL VALUE
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Top