Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # plotting
- library(ggplot2)
- # Necessary for multinomial regression:
- library(nnet)
- rm(list=ls())
- plasma <- read.delim("plasma.txt")
- head(plasma)
- summary(plasma)
- plasma$sex <- factor(plasma$sex,
- levels = c(1, 2),
- labels = c("Male", "Female"))
- plasma$smokstat <- factor(plasma$smokstat,
- levels = c(1, 2, 3),
- labels = c("Never smoked", "Former smoker", "Current smoker"))
- plasma$bmicat <- factor(plasma$bmicat,
- levels = c(1, 2, 3, 4),
- labels = c("Underweight", "Normal weight", "Overweight", "Obese"))
- plasma$vituse <- factor(plasma$vituse,
- levels = c(1, 2, 3),
- labels = c("Frequently", "Not_frequently", "Never"))
- #low category
- (I_plasma_low <- as.numeric(plasma$betaplasma < 75))
- sum(I_plasma_low)
- #medium category
- (I_plasma_med <- as.numeric(plasma$betaplasma >= 75 & plasma$betaplasma < 300))
- sum(I_plasma_med)
- #high category
- (I_plasma_high <- as.numeric(plasma$betaplasma >= 300))
- sum(I_plasma_high)
- #changing reference categories
- plasma$bmicat <- relevel(plasma$bmicat, "Normal weight")
- plasma$sex <- relevel(plasma$sex, "Female")
- plasma$smokstat <- relevel(plasma$smokstat, "Never smoked")
- plasma$vituse <- relevel(plasma$vituse, "Never")
- ############# new stuff
- #adding the values
- plasma$betaplasma_cat <- I_plasma_low + 2*I_plasma_med + 3*I_plasma_high
- #changing 1, 2, 3 to low, medium, high
- plasma$betaplasma_cat <- factor(plasma$betaplasma_cat,
- levels = c(1, 2, 3),
- labels = c("low", "medium", "high"))
- #relevel to medium being reference category
- plasma$betaplasma_cat <- relevel(plasma$betaplasma_cat, ref="medium")
- #presenting table
- table(plasma$betaplasma_cat)
- #############
- #####Null and full model for the step models
- # null model for comparisons####
- (model.null <- multinom(betaplasma_cat ~ 1, data = plasma))
- (sum.null <- summary(model.null))
- # full model for upper limit in stepwise####
- (model.full <- multinom(betaplasma_cat ~ age + sex + smokstat + quetelet + bmicat + vituse + calories + fat, data = plasma))
- (sum.full <- summary(model.full))
- ############# EVERYTHING FOR AIC AS CRITERION
- #####
- ##### Stepwise procedure using AIC####
- model.AIC <- step(model.null,
- scope = list(upper = model.full),
- direction = "both")
- (sum.AIC <- summary(model.AIC))
- ####### TEST for the AIC model -- beta values, standard error, z-value, P-value
- (beta.AIC <- sum.AIC$coefficients)
- (se.beta.AIC <- sum.AIC$standard.errors)
- (z.value.AIC <- beta.AIC/se.beta.AIC)
- (P.value.AIC <- pnorm(abs(z.value.AIC), lower.tail = FALSE))
- #odds ratios for the AIC model####
- (OR.AIC <- exp(beta.AIC))
- OR.AIC["low", ]
- OR.AIC["high", ]
- # Confidence intervals for OR, AIC model:
- ci.AIC <- exp(confint(model.AIC))
- # a 3-dimensional matrix!
- ci.AIC
- ## beta values, P values, odds ratio as well as confidence interval for the odds ratio
- (AIC.test.beta <- cbind(
- beta = round(beta.AIC["low", ], digits = 2),
- P.value = round(P.value.AIC["low", ], digits = 3),
- Odds_Ratio = round(OR.AIC["low", ], digits = 2),
- round(ci.AIC[, , "low"], digits = 2)))
- #####
- ##### Stepwise procedure using BIC####
- model.BIC <- step(model.null,
- scope = list(upper = model.full, lower = model.null),
- direction = "both",
- k = log(nrow(plasma)))
- (sum.BIC <- summary(model.BIC))
- model.grades
- ####### TEST for the BIC model -- beta values, standard error, z-value, P-value
- (beta.BIC <- sum.BIC$coefficients)
- (se.beta.BIC <- sum.BIC$standard.errors)
- (z.value.BIC <- beta.BIC/se.beta.BIC)
- (P.value.BIC <- pnorm(abs(z.value.BIC), lower.tail = FALSE))
- #odds ratios for the AIC model####
- (OR.BIC <- exp(beta.BIC))
- OR.BIC["low", ]
- OR.BIC["high", ]
- # Confidence intervals for OR, AIC model:
- ci.BIC <- exp(confint(model.BIC))
- # a 3-dimensional matrix!
- ci.BIC
- ## beta values, P values, odds ratio as well as confidence interval for the odds ratio
- (BIC.test.beta <- cbind(
- beta = round(beta.BIC["low", ], digits = 2),
- P.value = round(P.value.BIC["low", ], digits = 3),
- Odds_Ratio = round(OR.BIC["low", ], digits = 2),
- round(ci.BIC[, , "low"], digits = 2)))
- ###################
- ###Likelihood ratio test for the AIC model
- anova(update(model.AIC, . ~ . - quetelet), model.AIC)
- anova(update(model.AIC, . ~ . - vituse), model.AIC)
- anova(update(model.AIC, . ~ . - age), model.AIC)
- anova(update(model.AIC, . ~ . - sex), model.AIC) #[2,"Pr(Chi)"]
- ###Likelihood ratio test for the BIC model
- anova(update(model.BIC, . ~ . - quetelet), model.BIC)
- anova(update(model.BIC, . ~ . - vituse), model.BIC)
- ###################
- #PLOTTING AIC
- ######### AVERAGE AGE, VARYING SEX, VARYING BMI
- # range of betaplasma levels, repeated twice,
- # for public and private school
- x0 <- data.frame(quetelet = rep(seq(16, 51), 2))
- # the same ses, socst and science for all:
- #x0$vituse <- "Never"
- x0$vituse <- "Frequently"
- x0$age <- mean(plasma$age)
- x0$sex <- c(rep("Female", 51 - 16 + 1),
- rep("Male", 51 - 16 + 1))
- pred.x0 <- cbind(
- x0,
- predict(model.AIC, newdata = x0, type = "probs"))
- head(pred.x0)
- ggplot(pred.x0, aes(x = quetelet)) +
- geom_line(aes(y = low, color = "3. Low"), size = 2) +
- geom_line(aes(y = medium, color = "2. Medium"), size = 2) +
- geom_line(aes(y = high, color = "1. High"), size = 2) +
- expand_limits(y = c(0, 1)) +
- facet_grid(~ sex) +
- labs(title = "Multinomial: Average age with varying sex and BMI, frequent vituse, for AIC model",
- y = "probability",
- fill = "Program") +
- theme(text = element_text(size = 14))
- ggplot(pred.x0, aes(x = quetelet)) +
- geom_ribbon(aes(ymin = 0, ymax = low, fill = "3. Low")) +
- geom_ribbon(aes(ymin = low, ymax = low + medium,
- fill = "2. Medium")) +
- geom_ribbon(aes(ymin = low + medium, ymax = 1,
- fill = "1. High")) +
- facet_grid(~ sex) +
- labs(title = "Multinomial: Average age with varying sex and BMI, frequent vituse, for AIC model",
- y = "probability",
- fill = "Level of concentration") +
- theme(text = element_text(size = 14))
- ###################
- #PLOTTING BIC
- ######### AVERAGE AGE, VARYING SEX, VARYING BMI, FREQUENT VITUSE
- # range of betaplasma levels, repeated twice,
- # for public and private school
- x0.BIC <- data.frame(quetelet = rep(seq(16, 51), 3))
- # the same ses, socst and science for all:
- x0.BIC$vituse <- c(rep("Frequently", 51 - 16 + 1),
- rep("Not_frequently", 51 - 16 + 1),
- rep("Never", 51 - 16 + 1))
- pred.x0.BIC <- cbind(
- x0.BIC,
- predict(model.BIC, newdata = x0.BIC, type = "probs"))
- head(pred.x0.BIC)
- ggplot(pred.x0.BIC, aes(x = quetelet)) +
- geom_line(aes(y = low, color = "3. Low"), size = 2) +
- geom_line(aes(y = medium, color = "2. Medium"), size = 2) +
- geom_line(aes(y = high, color = "1. High"), size = 2) +
- facet_grid(~ vituse) +
- expand_limits(y = c(0, 1)) +
- labs(title = "Multinomial: Varying vituse and BMI, for BIC model",
- y = "probability",
- fill = "Program") +
- theme(text = element_text(size = 14))
- ggplot(pred.x0.BIC, aes(x = quetelet)) +
- geom_ribbon(aes(ymin = 0, ymax = low, fill = "3. Low")) +
- geom_ribbon(aes(ymin = low, ymax = low + medium,
- fill = "2. Medium")) +
- geom_ribbon(aes(ymin = low + medium, ymax = 1,
- fill = "1. High")) +
- facet_grid(~ vituse) +
- labs(title = "Multinomial: Varying vituse and BMI, for BIC model",
- y = "probability",
- fill = "Level of concentration") +
- theme(text = element_text(size = 14))
- ################################################
- ######LIKELIHOOD RATIO TEST FOR THE MODELS######
- ################################################
- # LR (deviance) tests####
- # AIC against null model
- anova(model.null, model.AIC)
- # BIC against null model
- anova(model.null, model.BIC)
- # AIC against BIC model
- anova(model.BIC, model.AIC)
- # AIC against full model
- anova(model.AIC, model.full)
- # BIC against full model
- anova(model.BIC, model.full)
- #AIC and BIC for the models
- info <- cbind(aic = AIC(model.null, model.AIC, model.BIC, model.full),
- bic = BIC(model.null, model.AIC, model.BIC, model.full))
- #R^2 for the models (McF?)
- info$r2 <- round(100*c(
- 0,
- 1 - model.AIC$deviance/model.null$deviance,
- 1 - model.BIC$deviance/model.null$deviance,
- 1 - model.full$deviance/model.null$deviance), digits = 1)
- #R^2 adjusted for the models (McF?)
- info$r2.adj <- round(100*c(
- 0,
- 1 - (model.AIC$deviance + (model.AIC$edf - model.null$edf))/model.null$deviance,
- 1 - (model.BIC$deviance + (model.BIC$edf - model.null$edf))/model.null$deviance,
- 1 - (model.full$deviance + (model.full$edf - model.null$edf))/model.null$deviance),
- digits = 1)
- #The AIC, BIC, R^2 and R^2 adjusted for all models
- (info)
- ################################################
- ######CONFUSION MATRICES FOR THE AIC MODEL######
- ################################################
- # estimated probs####
- predict(model.AIC, type = "prob")
- # predicted category
- predict(model.AIC)
- predict(model.AIC, type = "class")
- pred.final.AIC <- cbind(
- plasma,
- predict(model.AIC, type = "probs"),
- yhat = predict(model.AIC))
- head(pred.final.AIC)
- # confusion matrix####
- table(pred.final.AIC$betaplasma_cat)
- table(pred.final.AIC$yhat)
- (conf.final <- table(pred.final.AIC$betaplasma_cat, pred.final.AIC$yhat))
- # Sensitivities:
- sens.AIC <- round(100*diag(conf.final)/table(pred.final.AIC$betaplasma_cat), digits = 1)
- # precisions:
- prec.AIC <-round(100*diag(conf.final)/table(pred.final.AIC$yhat), digits = 1)
- # accuracy:
- acc.AIC <- 100*sum(diag(conf.final))/sum(conf.final)
- (Conf.stats.table.AIC <- data.frame(value = c("Medium", "Low", "High"), Sens_Spec = as.numeric(sens.AIC), Prec = as.numeric(prec.AIC), Acc = as.numeric(acc.AIC)))
- ################################################
- ######CONFUSION MATRICES FOR THE BIC MODEL######
- ################################################
- # estimated probs####
- predict(model.BIC, type = "prob")
- # predicted category
- predict(model.BIC)
- predict(model.BIC, type = "class")
- pred.final.BIC <- cbind(
- plasma,
- predict(model.BIC, type = "probs"),
- yhat = predict(model.BIC))
- head(pred.final.BIC)
- # confusion matrix####
- table(pred.final.BIC$betaplasma_cat)
- table(pred.final.BIC$yhat)
- (conf.final.BIC <- table(pred.final.BIC$betaplasma_cat, pred.final.BIC$yhat))
- # Sensitivities:
- sens.BIC <- round(100*diag(conf.final.BIC)/table(pred.final.BIC$betaplasma_cat), digits = 1)
- # precisions:
- prec.BIC <-round(100*diag(conf.final.BIC)/table(pred.final.BIC$yhat), digits = 1)
- # accuracy:
- acc.BIC <- 100*sum(diag(conf.final.BIC))/sum(conf.final.BIC)
- (Conf.stats.table.BIC <- data.frame(value = c("Medium", "Low", "High"), Sens_Spec = as.numeric(sens.BIC), Prec = as.numeric(prec.BIC), Acc = as.numeric(acc.BIC)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement