Advertisement
Guest User

Untitled

a guest
May 2nd, 2016
48
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.54 KB | None | 0 0
  1. title: "Final project Stats"
  2. output: html_document
  3. ---
  4.  
  5. ---
  6. title: "Final project- life expectancy"
  7. output: html_document
  8. ---
  9. read.csv(Life expectancy data, header = FALSE)
  10. setwd("/Users/FreddieCamerlynck/Downloads")
  11. tempdata <- data.frame(read.csv(file="WDI1.csv", header=TRUE))
  12.  
  13. install.packages("mice", lib = "/Users/FreddieCamerlynck/Downloads")
  14. library(mice, lib.loc = "/Users/FreddieCamerlynck/Downloads")
  15. data <- as.matrix(tempdata)
  16.  
  17. year <- c(1995:2014)
  18.  
  19. ## Low Income Countries
  20. LILEt <-(as.numeric(data[3,40:59]))
  21. LILEf <-(as.numeric(data[1,40:59]))
  22. LILEm <-(as.numeric(data[2,40:59]))
  23. LIGDP <-(as.numeric(data[4,40:59]))
  24. LIHE <-(as.numeric(data[5,40:59]))
  25. LIMR <-(as.numeric(data[6,40:59]))
  26. LIBR <-(as.numeric(data[7,40:59]))
  27. #LIEPC <-(as.numeric(data[8,40:59]))
  28. LIDR <-(as.numeric(data[9,40:59]))
  29. LIFD <-(as.numeric(data[10,40:59]))
  30. LICO2 <-(as.numeric(data[11,40:59]))
  31. LIFR <-(as.numeric(data[12,40:59]))
  32. #LIEU <-(as.numeric(data[13,40:59]))
  33.  
  34. LIcountriestemp <- as.data.frame(cbind(year, LILEt, LILEf, LILEm, LIGDP, LIHE, LIMR, LIBR, LIDR, LIFD, LICO2, LIFR))
  35. impLI <- mice(LIcountriestemp, m=1)
  36. LIdata <- complete(impLI)
  37.  
  38. cor(LIdata)
  39. pairs(LIdata)
  40.  
  41.  
  42. LIlmF <- lm(LILEf~year+LIGDP+LIHE+LIMR+LIBR+LIDR+LIFD+LICO2+LIFR, data = LIdata)
  43. summary(LIlmF)
  44. plot(LILmF)
  45. LIbackF <- step(LIlmF, direction="backward")
  46. LIbackF$call
  47.  
  48. LIlmM <- lm(LILEm~year+LIGDP+LIHE+LIMR+LIBR+LIDR+LIFD+LICO2+LIFR, data = LIdata)
  49. summary(LIlmM)
  50. plot(LILmM)
  51. LIbackM <- step(LIlmM, direction="backward")
  52. LIbackM$call
  53.  
  54. LIlmT <- lm(LILEt~year+LIGDP+LIHE+LIMR+LIBR+LIDR+LIFD+LICO2+LIFR, data = LIdata)
  55. summary(LIlmT)
  56. plot(LILmT)
  57. LIbackT <- step(LIlmT, direction="backward")
  58. LIlmT$call
  59.  
  60. t.test(LILEf, LILEm, data = LIdata)
  61. aov(LILEt ~ LILEf+LILEm, data = LIdata)
  62. summary(aov(LILEt ~ LILEf+LILEm, data = LIdata))
  63.  
  64.  
  65. ##Lower Middle Income Countries
  66. LMILEf <-(as.numeric(data[14,40:59]))
  67. LMILEm <-(as.numeric(data[15,40:59]))
  68. LMILEt <-(as.numeric(data[16,40:59]))
  69. LMIGDP <-(as.numeric(data[17,40:59]))
  70. LMIHE <-(as.numeric(data[18,40:59]))
  71. LMIMR <-(as.numeric(data[19,40:59]))
  72. LMIBR <-(as.numeric(data[20,40:59]))
  73. LMIEPC <-(as.numeric(data[21,40:59]))
  74. LMIDR <-(as.numeric(data[22,40:59]))
  75. LMIFD <-(as.numeric(data[23,40:59]))
  76. LMICO2 <-(as.numeric(data[24,40:59]))
  77. LMIFR <-(as.numeric(data[25,40:59]))
  78. #LMIEU <-(as.numeric(data[26,40:56]))
  79.  
  80. LMIcountriestemp <- as.data.frame(cbind(year, LMILEt, LMILEf, LMILEm, LMIGDP, LMIHE, LMIMR, LMIBR, LMIEPC, LMIDR, LMIFD, LMICO2, LMIFR))
  81. impLMI <- mice(LMIcountriestemp, m=1)
  82. LMIdata <- complete(impLMI)
  83.  
  84. cor(LMIdata)
  85. pairs(LMIdata)
  86.  
  87. LMIlmF <- lm(LMILEf~year+LMIGDP+LMIHE+LMIMR+LMIBR+LMIEPC+LMIDR+LMIFD+LMICO2+LMIFR, data=LMIdata)
  88. summary(LMIlmF)
  89. plot(LMIlmF)
  90. LMIbackF <- step(LMIlmF, direction="backward")
  91. LMIbackF$call
  92.  
  93. LMIlmM <- lm(LMILEm~year+LMIGDP+LMIHE+LMIMR+LMIBR+LMIEPC+LMIDR+LMIFD+LMICO2+LMIFR, data=LMIdata)
  94. summary(LMIlmM)
  95. plot(LMIlmM)
  96. LMIbackM <- step(LMIlmM, direction="backward")
  97. LMIbackM$call
  98.  
  99. LMIlmT <- lm(LMILEt~year+LMIGDP+LMIHE+LMIMR+LMIBR+LMIEPC+LMIDR+LMIFD+LMICO2+LMIFR, data=LMIdata)
  100. summary(LMIlmT)
  101. plot(LMIlmT)
  102. LMIbackT <- step(LMIlmT, direction="backward")
  103. LMIbackT$call
  104.  
  105. t.test(LMILEf, LMILEm, data= LMIdata)
  106. aov(LMILEt ~ LMILEf + LMILEm, data = LMIdata)
  107. summary(aov(LMILEt ~LMILEf + LMILEm, data = LMIdata))
  108.  
  109.  
  110. ##Middle Income Countries
  111. MILEf <-(as.numeric(data[27,40:59]))
  112. MILEm <-(as.numeric(data[28,40:59]))
  113. MILEt <-(as.numeric(data[29,40:59]))
  114. MIGDP <-(as.numeric(data[30,40:59]))
  115. MIHE <-(as.numeric(data[31,40:59]))
  116. MIMR <-(as.numeric(data[32,40:59]))
  117. MIBR <-(as.numeric(data[33,40:59]))
  118. MIEPC <-(as.numeric(data[34,40:59]))
  119. MIDR <-(as.numeric(data[35,40:59]))
  120. MIFD <-(as.numeric(data[36,40:59]))
  121. MICO2 <-(as.numeric(data[37,40:59]))
  122. MIFR <-(as.numeric(data[38,40:59]))
  123. #MIEU <-(as.numeric(data[39,40:59]))
  124.  
  125. MIcountriestemp <- as.data.frame(cbind(year, MILEt, MILEf, MILEm, MIGDP, MIHE, MIMR, MIBR, MIEPC, MIDR, MIFD, MICO2, MIFR))
  126. impMI <- mice(MIcountriestemp, m=1)
  127. MIdata <- complete(impMI)
  128.  
  129. cor(MIdata)
  130. pairs(MIdata)
  131.  
  132. MIlmF <- lm(MILEf~year+MIGDP+MIHE+MIMR+MIBR+MIEPC+MIDR+MIFD+MICO2+MIFR, data = MIdata)
  133. summary(MIlmF)
  134. plot(MIlmF)
  135. MIbackF <- step(MIlmF, direction="backward")
  136. MIbackF$call
  137.  
  138. MIlmM <- lm(MILEm~year+MIGDP+MIHE+MIMR+MIBR+MIEPC+MIDR+MIFD+MICO2+MIFR, data = MIdata)
  139. summary(MIlmM)
  140. plot(MIlmM)
  141. MIbackM <- step(MIlmM, direction="backward")
  142. MIbackM$call
  143.  
  144. MIlmT <- lm(MILEt~year+MIGDP+MIHE+MIMR+MIBR+MIEPC+MIDR+MIFD+MICO2+MIFR, data = MIdata)
  145. summary(MIlmT)
  146. plot(MIlmT)
  147. MIbackT <- step(MIlmT, direction="backward")
  148. MIbackT$call
  149.  
  150. t.test(MILEf, MILEm, data = MIdata)
  151. aov(MILEt ~ MILEf + MILEm, data = MIdata)
  152. summary(aov(MILEt ~MILEf + MILEm, data = MIdata))
  153.  
  154.  
  155. ##Upper Middle Income Countries
  156. UMILEf <-(as.numeric(data[53,40:59]))
  157. UMILEm <-(as.numeric(data[54,40:59]))
  158. UMILEt <-(as.numeric(data[55,40:59]))
  159. UMIGDP <-(as.numeric(data[56,40:59]))
  160. UMIHE <-(as.numeric(data[57,40:59]))
  161. UMIMR <-(as.numeric(data[58,40:59]))
  162. UMIBR <-(as.numeric(data[59,40:59]))
  163. UMIEPC <-(as.numeric(data[60,40:59]))
  164. UMIDR <-(as.numeric(data[61,40:59]))
  165. UMIFD <-(as.numeric(data[62,40:59]))
  166. UMICO2 <-(as.numeric(data[63,40:59]))
  167. UMIFR <-(as.numeric(data[64,40:59]))
  168. #UMIEU <-(as.numeric(data[65,40:59]))
  169.  
  170. UMIcountriestemp <- as.data.frame(cbind(year, UMILEt, UMILEf, UMILEm, UMIGDP, UMIHE, UMIMR, UMIBR, UMIEPC, UMIDR, UMIFD, UMICO2, UMIFR))
  171. impUMI <- mice(UMIcountriestemp, m=1)
  172. UMIdata <- complete(impUMI)
  173.  
  174. cor(UMIdata)
  175. pairs(UMIdata)
  176.  
  177. UMIlmF <- lm(UMILEf~year+UMIGDP+UMIHE+UMIMR+UMIBR+UMIEPC+UMIDR+UMIFD+UMICO2+UMIFR, data = UMIdata)
  178. summary(UMIlmF)
  179. plot(UMIlmF)
  180. UMIbackF <- step(UMIlmF, direction="backward")
  181. UMIbackF$call
  182.  
  183. UMIlmM <- lm(UMILEm~year+UMIGDP+UMIHE+UMIMR+UMIBR+UMIEPC+UMIDR+UMIFD+UMICO2+UMIFR, data = UMIdata)
  184. summary(UMIlmM)
  185. plot(UMIlmM)
  186. UMIbackM <- step(UMIlmM, direction="backward")
  187. UMIbackM$call
  188.  
  189. UMIlmT <- lm(UMILEt~year+UMIGDP+UMIHE+UMIMR+UMIBR+UMIEPC+UMIDR+UMIFD+UMICO2+UMIFR, data = UMIdata)
  190. summary(UMIlmT)
  191. plot(UMIlmT)
  192. UMIbackT <- step(UMIlmT, direction="backward")
  193. UMIbackT$call
  194.  
  195. t.test(UMILEf, UMILEm, data = UMIdata)
  196. aov(UMILEt ~UMILEf + UMILEm, data = UMIdata)
  197. summary(aov(UMILEt ~UMILEf + UMILEm, data = UMIdata))
  198.  
  199.  
  200. ##High Income Countries
  201. HILEf <-(as.numeric(data[40,40:59]))
  202. HILEm <-(as.numeric(data[41,40:59]))
  203. HILEt <-(as.numeric(data[42,40:59]))
  204. HIGDP <-(as.numeric(data[43,40:59]))
  205. HIHE <-(as.numeric(data[44,40:59]))
  206. HIMR <-(as.numeric(data[45,40:59]))
  207. HIBR <-(as.numeric(data[46,40:59]))
  208. HIEPC <-(as.numeric(data[47,40:59]))
  209. HIDR <-(as.numeric(data[48,40:59]))
  210. #HIFD <-(as.numeric(data[49,40:59]))
  211. HICO2 <-(as.numeric(data[50,40:59]))
  212. HIFR <-(as.numeric(data[51,40:59]))
  213. #HIEU <-(as.numeric(data[52,40:59]))
  214.  
  215. HIcountriestemp <- as.data.frame(cbind(year, HILEt, HILEf, HILEm, HIGDP, HIHE, HIMR, HIBR, HIEPC, HIDR, HICO2, HIFR))
  216. impHI <- mice(HIcountriestemp, m=1)
  217. HIdata <- complete(impHI)
  218.  
  219. cor(HIdata)
  220. pairs(HIdata)
  221.  
  222. HIlmF <- lm(HILEf~year+HIGDP+HIHE+HIMR+HIBR+HIEPC+HIDR+HICO2+HIFR, data = HIdata)
  223. summary(HIlmF)
  224. plot(HIlmF)
  225. HIbackF <- step(HIlmF, direction="backward")
  226. HIbackF$call
  227.  
  228. HIlmM <- lm(HILEm~year+HIGDP+HIHE+HIMR+HIBR+HIEPC+HIDR+HICO2+HIFR, data = HIdata)
  229. summary(HIlmM)
  230. plot(HIlmM)
  231. HIbackM <- step(HIlmM, direction="backward")
  232. HIbackM$call
  233.  
  234. HIlmT <- lm(HILEt~year+HIGDP+HIHE+HIMR+HIBR+HIEPC+HIDR+HICO2+HIFR, data = HIdata)
  235. summary(HIlmT)
  236. plot(HIlmT)
  237. HIbackT <- step(HIlmT, direction="backward")
  238. HIbackT$call
  239.  
  240. t.test(HILEf, HILEm, data = HIdata)
  241. aov(HILEt ~HILEf + HILEm, data = HIdata)
  242. summary(aov(HILEt ~HILEf + HILEm, data = HIdata))
  243.  
  244. ##ANOVA between regions
  245.  
  246. ########EXAMPLE: let's do an analysis of variance to test if all three population means are equal
  247. #######we save the output in an object called "doganova" so we can reuse it without redoing the analysis
  248. ########doganova = aov(Heart.Rate ~ Group, data = dogs)
  249. ########let's look at the output from the anova, including the anova table
  250. #######summary(doganova)
  251.  
  252. alldata <- cbind(LIdata LMIdata, MIdata, UMIdata, HIdata)
  253.  
  254. allT <- cbind(LILEt, LMILEt, MILEt, UMILEt, HILEt)
  255. allF<- cbind(LILEf, LMILEf, MILEf, UMILEf, HILEf)
  256. allM<- cbind(LILEm, LMILEm, MILEm, UMILEm, HILEm)
  257. regionanova <- aov(allT ~ allF + allM, data = alldata)
  258. summary(regionanova)
  259.  
  260.  
  261. allT <- cbind(LILEt+LMILEt+MILEt+UMILEt+HILEt)
  262. allF<- cbind(LILEf+LMILEf+MILEf+UMILEf+HILEf)
  263. allM<- cbind(LILEm+LMILEm+MILEm+UMILEm+HILEm)
  264. regionanova2 <- aov(allT ~ allF + allM, data = alldata)
  265. summary(regionanova2)
  266.  
  267.  
  268. #Remove explanatory variables based on redundancy and looking at correlation
  269. #Ask prof about how to compare the multiple regression models (ANOVA?)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement