Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- title: "Final project Stats"
- output: html_document
- ---
- ---
- title: "Final project- life expectancy"
- output: html_document
- ---
- read.csv(Life expectancy data, header = FALSE)
- setwd("/Users/FreddieCamerlynck/Downloads")
- tempdata <- data.frame(read.csv(file="WDI1.csv", header=TRUE))
- install.packages("mice", lib = "/Users/FreddieCamerlynck/Downloads")
- library(mice, lib.loc = "/Users/FreddieCamerlynck/Downloads")
- data <- as.matrix(tempdata)
- year <- c(1995:2014)
- ## Low Income Countries
- LILEt <-(as.numeric(data[3,40:59]))
- LILEf <-(as.numeric(data[1,40:59]))
- LILEm <-(as.numeric(data[2,40:59]))
- LIGDP <-(as.numeric(data[4,40:59]))
- LIHE <-(as.numeric(data[5,40:59]))
- LIMR <-(as.numeric(data[6,40:59]))
- LIBR <-(as.numeric(data[7,40:59]))
- #LIEPC <-(as.numeric(data[8,40:59]))
- LIDR <-(as.numeric(data[9,40:59]))
- LIFD <-(as.numeric(data[10,40:59]))
- LICO2 <-(as.numeric(data[11,40:59]))
- LIFR <-(as.numeric(data[12,40:59]))
- #LIEU <-(as.numeric(data[13,40:59]))
- LIcountriestemp <- as.data.frame(cbind(year, LILEt, LILEf, LILEm, LIGDP, LIHE, LIMR, LIBR, LIDR, LIFD, LICO2, LIFR))
- impLI <- mice(LIcountriestemp, m=1)
- LIdata <- complete(impLI)
- cor(LIdata)
- pairs(LIdata)
- LIlmF <- lm(LILEf~year+LIGDP+LIHE+LIMR+LIBR+LIDR+LIFD+LICO2+LIFR, data = LIdata)
- summary(LIlmF)
- plot(LILmF)
- LIbackF <- step(LIlmF, direction="backward")
- LIbackF$call
- LIlmM <- lm(LILEm~year+LIGDP+LIHE+LIMR+LIBR+LIDR+LIFD+LICO2+LIFR, data = LIdata)
- summary(LIlmM)
- plot(LILmM)
- LIbackM <- step(LIlmM, direction="backward")
- LIbackM$call
- LIlmT <- lm(LILEt~year+LIGDP+LIHE+LIMR+LIBR+LIDR+LIFD+LICO2+LIFR, data = LIdata)
- summary(LIlmT)
- plot(LILmT)
- LIbackT <- step(LIlmT, direction="backward")
- LIlmT$call
- t.test(LILEf, LILEm, data = LIdata)
- aov(LILEt ~ LILEf+LILEm, data = LIdata)
- summary(aov(LILEt ~ LILEf+LILEm, data = LIdata))
- ##Lower Middle Income Countries
- LMILEf <-(as.numeric(data[14,40:59]))
- LMILEm <-(as.numeric(data[15,40:59]))
- LMILEt <-(as.numeric(data[16,40:59]))
- LMIGDP <-(as.numeric(data[17,40:59]))
- LMIHE <-(as.numeric(data[18,40:59]))
- LMIMR <-(as.numeric(data[19,40:59]))
- LMIBR <-(as.numeric(data[20,40:59]))
- LMIEPC <-(as.numeric(data[21,40:59]))
- LMIDR <-(as.numeric(data[22,40:59]))
- LMIFD <-(as.numeric(data[23,40:59]))
- LMICO2 <-(as.numeric(data[24,40:59]))
- LMIFR <-(as.numeric(data[25,40:59]))
- #LMIEU <-(as.numeric(data[26,40:56]))
- LMIcountriestemp <- as.data.frame(cbind(year, LMILEt, LMILEf, LMILEm, LMIGDP, LMIHE, LMIMR, LMIBR, LMIEPC, LMIDR, LMIFD, LMICO2, LMIFR))
- impLMI <- mice(LMIcountriestemp, m=1)
- LMIdata <- complete(impLMI)
- cor(LMIdata)
- pairs(LMIdata)
- LMIlmF <- lm(LMILEf~year+LMIGDP+LMIHE+LMIMR+LMIBR+LMIEPC+LMIDR+LMIFD+LMICO2+LMIFR, data=LMIdata)
- summary(LMIlmF)
- plot(LMIlmF)
- LMIbackF <- step(LMIlmF, direction="backward")
- LMIbackF$call
- LMIlmM <- lm(LMILEm~year+LMIGDP+LMIHE+LMIMR+LMIBR+LMIEPC+LMIDR+LMIFD+LMICO2+LMIFR, data=LMIdata)
- summary(LMIlmM)
- plot(LMIlmM)
- LMIbackM <- step(LMIlmM, direction="backward")
- LMIbackM$call
- LMIlmT <- lm(LMILEt~year+LMIGDP+LMIHE+LMIMR+LMIBR+LMIEPC+LMIDR+LMIFD+LMICO2+LMIFR, data=LMIdata)
- summary(LMIlmT)
- plot(LMIlmT)
- LMIbackT <- step(LMIlmT, direction="backward")
- LMIbackT$call
- t.test(LMILEf, LMILEm, data= LMIdata)
- aov(LMILEt ~ LMILEf + LMILEm, data = LMIdata)
- summary(aov(LMILEt ~LMILEf + LMILEm, data = LMIdata))
- ##Middle Income Countries
- MILEf <-(as.numeric(data[27,40:59]))
- MILEm <-(as.numeric(data[28,40:59]))
- MILEt <-(as.numeric(data[29,40:59]))
- MIGDP <-(as.numeric(data[30,40:59]))
- MIHE <-(as.numeric(data[31,40:59]))
- MIMR <-(as.numeric(data[32,40:59]))
- MIBR <-(as.numeric(data[33,40:59]))
- MIEPC <-(as.numeric(data[34,40:59]))
- MIDR <-(as.numeric(data[35,40:59]))
- MIFD <-(as.numeric(data[36,40:59]))
- MICO2 <-(as.numeric(data[37,40:59]))
- MIFR <-(as.numeric(data[38,40:59]))
- #MIEU <-(as.numeric(data[39,40:59]))
- MIcountriestemp <- as.data.frame(cbind(year, MILEt, MILEf, MILEm, MIGDP, MIHE, MIMR, MIBR, MIEPC, MIDR, MIFD, MICO2, MIFR))
- impMI <- mice(MIcountriestemp, m=1)
- MIdata <- complete(impMI)
- cor(MIdata)
- pairs(MIdata)
- MIlmF <- lm(MILEf~year+MIGDP+MIHE+MIMR+MIBR+MIEPC+MIDR+MIFD+MICO2+MIFR, data = MIdata)
- summary(MIlmF)
- plot(MIlmF)
- MIbackF <- step(MIlmF, direction="backward")
- MIbackF$call
- MIlmM <- lm(MILEm~year+MIGDP+MIHE+MIMR+MIBR+MIEPC+MIDR+MIFD+MICO2+MIFR, data = MIdata)
- summary(MIlmM)
- plot(MIlmM)
- MIbackM <- step(MIlmM, direction="backward")
- MIbackM$call
- MIlmT <- lm(MILEt~year+MIGDP+MIHE+MIMR+MIBR+MIEPC+MIDR+MIFD+MICO2+MIFR, data = MIdata)
- summary(MIlmT)
- plot(MIlmT)
- MIbackT <- step(MIlmT, direction="backward")
- MIbackT$call
- t.test(MILEf, MILEm, data = MIdata)
- aov(MILEt ~ MILEf + MILEm, data = MIdata)
- summary(aov(MILEt ~MILEf + MILEm, data = MIdata))
- ##Upper Middle Income Countries
- UMILEf <-(as.numeric(data[53,40:59]))
- UMILEm <-(as.numeric(data[54,40:59]))
- UMILEt <-(as.numeric(data[55,40:59]))
- UMIGDP <-(as.numeric(data[56,40:59]))
- UMIHE <-(as.numeric(data[57,40:59]))
- UMIMR <-(as.numeric(data[58,40:59]))
- UMIBR <-(as.numeric(data[59,40:59]))
- UMIEPC <-(as.numeric(data[60,40:59]))
- UMIDR <-(as.numeric(data[61,40:59]))
- UMIFD <-(as.numeric(data[62,40:59]))
- UMICO2 <-(as.numeric(data[63,40:59]))
- UMIFR <-(as.numeric(data[64,40:59]))
- #UMIEU <-(as.numeric(data[65,40:59]))
- UMIcountriestemp <- as.data.frame(cbind(year, UMILEt, UMILEf, UMILEm, UMIGDP, UMIHE, UMIMR, UMIBR, UMIEPC, UMIDR, UMIFD, UMICO2, UMIFR))
- impUMI <- mice(UMIcountriestemp, m=1)
- UMIdata <- complete(impUMI)
- cor(UMIdata)
- pairs(UMIdata)
- UMIlmF <- lm(UMILEf~year+UMIGDP+UMIHE+UMIMR+UMIBR+UMIEPC+UMIDR+UMIFD+UMICO2+UMIFR, data = UMIdata)
- summary(UMIlmF)
- plot(UMIlmF)
- UMIbackF <- step(UMIlmF, direction="backward")
- UMIbackF$call
- UMIlmM <- lm(UMILEm~year+UMIGDP+UMIHE+UMIMR+UMIBR+UMIEPC+UMIDR+UMIFD+UMICO2+UMIFR, data = UMIdata)
- summary(UMIlmM)
- plot(UMIlmM)
- UMIbackM <- step(UMIlmM, direction="backward")
- UMIbackM$call
- UMIlmT <- lm(UMILEt~year+UMIGDP+UMIHE+UMIMR+UMIBR+UMIEPC+UMIDR+UMIFD+UMICO2+UMIFR, data = UMIdata)
- summary(UMIlmT)
- plot(UMIlmT)
- UMIbackT <- step(UMIlmT, direction="backward")
- UMIbackT$call
- t.test(UMILEf, UMILEm, data = UMIdata)
- aov(UMILEt ~UMILEf + UMILEm, data = UMIdata)
- summary(aov(UMILEt ~UMILEf + UMILEm, data = UMIdata))
- ##High Income Countries
- HILEf <-(as.numeric(data[40,40:59]))
- HILEm <-(as.numeric(data[41,40:59]))
- HILEt <-(as.numeric(data[42,40:59]))
- HIGDP <-(as.numeric(data[43,40:59]))
- HIHE <-(as.numeric(data[44,40:59]))
- HIMR <-(as.numeric(data[45,40:59]))
- HIBR <-(as.numeric(data[46,40:59]))
- HIEPC <-(as.numeric(data[47,40:59]))
- HIDR <-(as.numeric(data[48,40:59]))
- #HIFD <-(as.numeric(data[49,40:59]))
- HICO2 <-(as.numeric(data[50,40:59]))
- HIFR <-(as.numeric(data[51,40:59]))
- #HIEU <-(as.numeric(data[52,40:59]))
- HIcountriestemp <- as.data.frame(cbind(year, HILEt, HILEf, HILEm, HIGDP, HIHE, HIMR, HIBR, HIEPC, HIDR, HICO2, HIFR))
- impHI <- mice(HIcountriestemp, m=1)
- HIdata <- complete(impHI)
- cor(HIdata)
- pairs(HIdata)
- HIlmF <- lm(HILEf~year+HIGDP+HIHE+HIMR+HIBR+HIEPC+HIDR+HICO2+HIFR, data = HIdata)
- summary(HIlmF)
- plot(HIlmF)
- HIbackF <- step(HIlmF, direction="backward")
- HIbackF$call
- HIlmM <- lm(HILEm~year+HIGDP+HIHE+HIMR+HIBR+HIEPC+HIDR+HICO2+HIFR, data = HIdata)
- summary(HIlmM)
- plot(HIlmM)
- HIbackM <- step(HIlmM, direction="backward")
- HIbackM$call
- HIlmT <- lm(HILEt~year+HIGDP+HIHE+HIMR+HIBR+HIEPC+HIDR+HICO2+HIFR, data = HIdata)
- summary(HIlmT)
- plot(HIlmT)
- HIbackT <- step(HIlmT, direction="backward")
- HIbackT$call
- t.test(HILEf, HILEm, data = HIdata)
- aov(HILEt ~HILEf + HILEm, data = HIdata)
- summary(aov(HILEt ~HILEf + HILEm, data = HIdata))
- ##ANOVA between regions
- ########EXAMPLE: let's do an analysis of variance to test if all three population means are equal
- #######we save the output in an object called "doganova" so we can reuse it without redoing the analysis
- ########doganova = aov(Heart.Rate ~ Group, data = dogs)
- ########let's look at the output from the anova, including the anova table
- #######summary(doganova)
- alldata <- cbind(LIdata LMIdata, MIdata, UMIdata, HIdata)
- allT <- cbind(LILEt, LMILEt, MILEt, UMILEt, HILEt)
- allF<- cbind(LILEf, LMILEf, MILEf, UMILEf, HILEf)
- allM<- cbind(LILEm, LMILEm, MILEm, UMILEm, HILEm)
- regionanova <- aov(allT ~ allF + allM, data = alldata)
- summary(regionanova)
- allT <- cbind(LILEt+LMILEt+MILEt+UMILEt+HILEt)
- allF<- cbind(LILEf+LMILEf+MILEf+UMILEf+HILEf)
- allM<- cbind(LILEm+LMILEm+MILEm+UMILEm+HILEm)
- regionanova2 <- aov(allT ~ allF + allM, data = alldata)
- summary(regionanova2)
- #Remove explanatory variables based on redundancy and looking at correlation
- #Ask prof about how to compare the multiple regression models (ANOVA?)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement