Advertisement
Guest User

xDDD

a guest
Jan 24th, 2020
174
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.01 KB | None | 0 0
  1. load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/gospodarstwa.rda"))
  2. load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/osoby.rda"))
  3. load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/gospodarstwaDict.rda"))
  4. load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/osobyDict.rda"))
  5. library(weights)
  6. library(SDMTools)
  7. library(ggplot2)
  8. #1. 90% confidence interval for an average number of cigarettes smoked by men per day in last available year
  9. extract <- na.omit(osoby[,c('plec_all', 'waga_2015_osoby','hp44')])
  10. extract <- extract[order(extract$hp44),]
  11.  
  12. extract <- extract[extract$plec_all==1,]
  13.  
  14. #wtd.t.test(x=extract$hp44, weight = extract$waga_2015_osoby)
  15.  
  16. mean1 <- wt.mean(extract$hp44, extract$waga_2015_osoby)
  17. sd1 <- wt.sd(extract$hp44, extract$waga_2015_osoby)
  18. n <- nrow(extract)
  19. SE <- round(qnorm(.95)*sd1/sqrt(n), digits = 4)
  20. a <- mean1 - SE
  21. b <- mean1 + SE
  22.  
  23. answer <- c(a, mean1, b)
  24. answer
  25. ggplot(extract, aes(y=hp44, x=rep(1:nrow(extract)))) +
  26.   geom_point() +
  27.   geom_hline(yintercept = a, col="red") +
  28.   geom_hline(yintercept = b, col="blue")
  29.  
  30. #WITH 90% CONFIDENCE I CAN SAY THAT AN AVERAGE NR OF CIGARETTES SMOKED PER DAY BY MEN IN 2015 WAS BETWEEN
  31. # 16.2482 AND 16.7102, WHICH GIVES 16 AS IT IS BINOMIAL VARIABLE
  32.  
  33.  
  34.  
  35. #2. More than 58% men owned phone in 2007
  36.  
  37. phones <- na.omit(osoby[,c('plec_all', 'waga_2007_osoby', 'dc24')])
  38. phones <- phones[phones$plec_all ==1, ]
  39. proportions <- table(phones$dc24)
  40. n <- proportions[1] + proportions[2]
  41. n <- sum(proportions)
  42. x <- proportions[1]
  43. x/n
  44. proportions[2]
  45. prop.test(x=x, n=n, p=.58, alternative = "greater", conf.level = .95, correct = F)
  46. #WITH P-VALUE BEING MUCH LESS THAN ALPHA I CAN SAY, THAT MORE THAN 58% MEN OWNED PHONE IN 2007
  47. ggplot(phones) +
  48.   geom_bar(aes(x=dc24)) +
  49.   geom_hline(yintercept = n*.59, col="red") +
  50.   ylab("Nr of men") +
  51.   xlab("Owned a phone? (Y|N)")
  52.  
  53.  
  54.  
  55.  
  56. #3. More than 50% of people who had chosen prawo i sprawiedliwosc in question about political parties attend at least
  57. #   4 devotions or religious meetings per month(in last year)
  58. religious <- na.omit(osoby[,c('waga_2015_osoby', 'fp39','fp106')])
  59. religious <- religious[order(religious$fp39),]
  60. #pis == 2
  61. religious <- religious[religious$fp106==2,]
  62. pis <- table(religious$fp39)
  63. n1 <- sum(pis)
  64. x1 <- pis[5:30]
  65. x1 <- x1[!is.na(x1)]
  66. x1 <- sum(x1)
  67. p0 <- x1/n1
  68. prop.test(x=x1,n=n1,p=.53,alternative = "greater", conf.level = .95)
  69.  
  70. ggplot() +
  71.   geom_point(aes(y=religious$fp39, x=1:nrow(religious))) +
  72.   geom_vline(xintercept = nrow(religious) * p0, col="blue") +
  73.   geom_vline(xintercept = nrow(religious) * .53, col = "red") +
  74.   ylab("Nr of attended devotions per month") +
  75.   xlab("Nr of people who voted for PiS") +
  76.   ggtitle("Support for PiS and attendance for devotions")
  77.  
  78. # WITH P-SCORE BEING LESS THAN ALPHA I CAN ACCEPT ALTERNATIVE HYPOTHESIS, THAT MORE THAN 53% OF PEOPLE WHO VOTED
  79. # FOR PIS IN 2015 ATTENDED AT LEAST 4 DEVOTIONS OR RELIGIOUS MEETINGS PER MONTH
  80. # RED LINE - OUR NULL HYPOTHESIS
  81. # BLUE LINE - REAL VALUE
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement