Advertisement
gl0Ppy

Untitled

May 12th, 2022
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 10.96 KB | None | 0 0
  1.  
  2. # plotting
  3. library(ggplot2)
  4. # Necessary for multinomial regression:
  5. library(nnet)
  6.  
  7. rm(list=ls())
  8.  
  9. plasma <- read.delim("plasma.txt")
  10. head(plasma)
  11. summary(plasma)
  12.  
  13. plasma$sex <- factor(plasma$sex,
  14.                       levels = c(1, 2),
  15.                       labels = c("Male", "Female"))
  16. plasma$smokstat <- factor(plasma$smokstat,
  17.                            levels = c(1, 2, 3),
  18.                            labels = c("Never smoked", "Former smoker", "Current smoker"))
  19. plasma$bmicat <- factor(plasma$bmicat,
  20.                          levels = c(1, 2, 3, 4),
  21.                          labels = c("Underweight", "Normal weight", "Overweight", "Obese"))
  22. plasma$vituse <- factor(plasma$vituse,
  23.                          levels = c(1, 2, 3),
  24.                          labels = c("Frequently", "Not_frequently", "Never"))
  25.  
  26. #low category
  27. (I_plasma_low <- as.numeric(plasma$betaplasma < 75))
  28. sum(I_plasma_low)
  29.  
  30. #medium category
  31. (I_plasma_med <- as.numeric(plasma$betaplasma >= 75 & plasma$betaplasma < 300))
  32. sum(I_plasma_med)
  33.  
  34. #high category
  35. (I_plasma_high <- as.numeric(plasma$betaplasma >= 300))
  36. sum(I_plasma_high)
  37.  
  38. #changing reference categories
  39. plasma$bmicat <- relevel(plasma$bmicat, "Normal weight")
  40. plasma$sex <- relevel(plasma$sex, "Female")
  41. plasma$smokstat <- relevel(plasma$smokstat, "Never smoked")
  42. plasma$vituse <- relevel(plasma$vituse, "Never")
  43.  
  44. ############# new stuff
  45. #adding the values
  46. plasma$betaplasma_cat <- I_plasma_low + 2*I_plasma_med + 3*I_plasma_high
  47.  
  48. #changing 1, 2, 3 to low, medium, high
  49. plasma$betaplasma_cat <- factor(plasma$betaplasma_cat,
  50.                         levels = c(1, 2, 3),
  51.                         labels = c("low", "medium", "high"))
  52.  
  53. #relevel to medium being reference category
  54. plasma$betaplasma_cat <- relevel(plasma$betaplasma_cat, ref="medium")
  55.  
  56. #presenting table
  57. table(plasma$betaplasma_cat)
  58.  
  59. #############
  60.  
  61.  
  62. #####Null and full model for the step models
  63. # null model for comparisons####
  64. (model.null <- multinom(betaplasma_cat ~ 1, data = plasma))
  65. (sum.null <- summary(model.null))
  66.  
  67. # full model for upper limit in stepwise####
  68. (model.full <- multinom(betaplasma_cat ~ age + sex + smokstat + quetelet + bmicat + vituse + calories + fat, data = plasma))
  69. (sum.full <- summary(model.full))
  70.  
  71.  
  72. ############# EVERYTHING FOR AIC AS CRITERION
  73.  
  74. #####
  75. ##### Stepwise procedure using AIC####
  76. model.AIC <- step(model.null,
  77.                     scope = list(upper = model.full),
  78.                     direction = "both")
  79. (sum.AIC <- summary(model.AIC))
  80.  
  81. ####### TEST for the AIC model -- beta values, standard error, z-value, P-value
  82. (beta.AIC <- sum.AIC$coefficients)
  83. (se.beta.AIC <- sum.AIC$standard.errors)
  84. (z.value.AIC <- beta.AIC/se.beta.AIC)
  85. (P.value.AIC <- pnorm(abs(z.value.AIC), lower.tail = FALSE))
  86.  
  87. #odds ratios for the AIC model####
  88. (OR.AIC <- exp(beta.AIC))
  89. OR.AIC["low", ]
  90. OR.AIC["high", ]
  91.  
  92. # Confidence intervals for OR, AIC model:
  93. ci.AIC <- exp(confint(model.AIC))
  94. # a 3-dimensional matrix!
  95. ci.AIC
  96.  
  97.  
  98. ## beta values, P values, odds ratio as well as confidence interval for the odds ratio
  99. (AIC.test.beta <- cbind(
  100.   beta = round(beta.AIC["low", ], digits = 2),
  101.   P.value = round(P.value.AIC["low", ], digits = 3),
  102.   Odds_Ratio = round(OR.AIC["low", ], digits = 2),
  103.   round(ci.AIC[, , "low"], digits = 2)))
  104.  
  105.  
  106. #####
  107. ##### Stepwise procedure using BIC####
  108. model.BIC <- step(model.null,
  109.                      scope = list(upper = model.full, lower = model.null),
  110.                      direction = "both",
  111.                      k = log(nrow(plasma)))
  112. (sum.BIC <- summary(model.BIC))
  113. model.grades
  114.  
  115. ####### TEST for the BIC model -- beta values, standard error, z-value, P-value
  116. (beta.BIC <- sum.BIC$coefficients)
  117. (se.beta.BIC <- sum.BIC$standard.errors)
  118. (z.value.BIC <- beta.BIC/se.beta.BIC)
  119. (P.value.BIC <- pnorm(abs(z.value.BIC), lower.tail = FALSE))
  120.  
  121. #odds ratios for the AIC model####
  122. (OR.BIC <- exp(beta.BIC))
  123. OR.BIC["low", ]
  124. OR.BIC["high", ]
  125.  
  126. # Confidence intervals for OR, AIC model:
  127. ci.BIC <- exp(confint(model.BIC))
  128. # a 3-dimensional matrix!
  129. ci.BIC
  130.  
  131.  
  132. ## beta values, P values, odds ratio as well as confidence interval for the odds ratio
  133. (BIC.test.beta <- cbind(
  134.   beta = round(beta.BIC["low", ], digits = 2),
  135.   P.value = round(P.value.BIC["low", ], digits = 3),
  136.   Odds_Ratio = round(OR.BIC["low", ], digits = 2),
  137.   round(ci.BIC[, , "low"], digits = 2)))
  138.  
  139.  
  140. ###################
  141.  
  142. ###Likelihood ratio test for the AIC model
  143. anova(update(model.AIC, . ~ . - quetelet), model.AIC)
  144. anova(update(model.AIC, . ~ . - vituse), model.AIC)
  145. anova(update(model.AIC, . ~ . - age), model.AIC)
  146. anova(update(model.AIC, . ~ . - sex), model.AIC) #[2,"Pr(Chi)"]
  147.  
  148. ###Likelihood ratio test for the BIC model
  149. anova(update(model.BIC, . ~ . - quetelet), model.BIC)
  150. anova(update(model.BIC, . ~ . - vituse), model.BIC)
  151.  
  152.  
  153.  
  154. ###################
  155. #PLOTTING AIC
  156. ######### AVERAGE AGE, VARYING SEX, VARYING BMI
  157. # range of betaplasma levels, repeated twice,
  158. # for public and private school
  159. x0 <- data.frame(quetelet = rep(seq(16, 51), 2))
  160. # the same ses, socst and science for all:
  161. #x0$vituse <- "Never"
  162. x0$vituse <- "Frequently"
  163. x0$age <- mean(plasma$age)
  164. x0$sex <- c(rep("Female", 51 - 16 + 1),
  165.                rep("Male", 51 - 16 + 1))
  166.  
  167.  
  168. pred.x0 <- cbind(
  169.   x0,
  170.   predict(model.AIC, newdata = x0, type = "probs"))
  171. head(pred.x0)
  172.  
  173.  
  174. ggplot(pred.x0, aes(x = quetelet)) +
  175.   geom_line(aes(y = low, color = "3. Low"), size = 2) +
  176.   geom_line(aes(y = medium, color = "2. Medium"), size = 2) +
  177.   geom_line(aes(y = high, color = "1. High"), size = 2) +
  178.   expand_limits(y = c(0, 1)) +
  179.   facet_grid(~ sex) +
  180.   labs(title = "Multinomial: Average age with varying sex and BMI, frequent vituse, for AIC model",
  181.        y = "probability",
  182.        fill = "Program") +
  183.   theme(text = element_text(size = 14))
  184.  
  185. ggplot(pred.x0, aes(x = quetelet)) +
  186.   geom_ribbon(aes(ymin = 0, ymax = low, fill = "3. Low")) +
  187.   geom_ribbon(aes(ymin = low, ymax = low + medium,
  188.                   fill = "2. Medium")) +
  189.   geom_ribbon(aes(ymin = low + medium, ymax = 1,
  190.                   fill = "1. High")) +
  191.   facet_grid(~ sex) +
  192.   labs(title = "Multinomial: Average age with varying sex and BMI, frequent vituse, for AIC model",
  193.        y = "probability",
  194.        fill = "Level of concentration") +
  195.   theme(text = element_text(size = 14))
  196.  
  197.  
  198.  
  199. ###################
  200. #PLOTTING BIC
  201. ######### AVERAGE AGE, VARYING SEX, VARYING BMI, FREQUENT VITUSE
  202. # range of betaplasma levels, repeated twice,
  203. # for public and private school
  204. x0.BIC <- data.frame(quetelet = rep(seq(16, 51), 3))
  205. # the same ses, socst and science for all:
  206. x0.BIC$vituse <- c(rep("Frequently", 51 - 16 + 1),
  207.             rep("Not_frequently", 51 - 16 + 1),
  208.             rep("Never", 51 - 16 + 1))
  209.  
  210.  
  211. pred.x0.BIC <- cbind(
  212.   x0.BIC,
  213.   predict(model.BIC, newdata = x0.BIC, type = "probs"))
  214. head(pred.x0.BIC)
  215.  
  216.  
  217. ggplot(pred.x0.BIC, aes(x = quetelet)) +
  218.   geom_line(aes(y = low, color = "3. Low"), size = 2) +
  219.   geom_line(aes(y = medium, color = "2. Medium"), size = 2) +
  220.   geom_line(aes(y = high, color = "1. High"), size = 2) +
  221.   facet_grid(~ vituse) +
  222.   expand_limits(y = c(0, 1)) +
  223.   labs(title = "Multinomial: Varying vituse and BMI, for BIC model",
  224.        y = "probability",
  225.        fill = "Program") +
  226.   theme(text = element_text(size = 14))
  227.  
  228. ggplot(pred.x0.BIC, aes(x = quetelet)) +
  229.   geom_ribbon(aes(ymin = 0, ymax = low, fill = "3. Low")) +
  230.   geom_ribbon(aes(ymin = low, ymax = low + medium,
  231.                   fill = "2. Medium")) +
  232.   geom_ribbon(aes(ymin = low + medium, ymax = 1,
  233.                   fill = "1. High")) +
  234.   facet_grid(~ vituse) +
  235.   labs(title = "Multinomial: Varying vituse and BMI, for BIC model",
  236.        y = "probability",
  237.        fill = "Level of concentration") +
  238.   theme(text = element_text(size = 14))
  239.  
  240. ################################################
  241. ######LIKELIHOOD RATIO TEST FOR THE MODELS######
  242. ################################################
  243.  
  244. # LR (deviance) tests####
  245. # AIC against null model
  246. anova(model.null, model.AIC)
  247. # BIC against null model
  248. anova(model.null, model.BIC)
  249. # AIC against BIC model
  250. anova(model.BIC, model.AIC)
  251. # AIC against full model
  252. anova(model.AIC, model.full)
  253. # BIC against full model
  254. anova(model.BIC, model.full)
  255.  
  256. #AIC and BIC for the models
  257. info <- cbind(aic = AIC(model.null, model.AIC, model.BIC, model.full),
  258.               bic = BIC(model.null, model.AIC, model.BIC, model.full))
  259. #R^2 for the models (McF?)
  260. info$r2 <- round(100*c(
  261.   0,
  262.   1 - model.AIC$deviance/model.null$deviance,
  263.   1 - model.BIC$deviance/model.null$deviance,
  264.   1 - model.full$deviance/model.null$deviance), digits = 1)
  265. #R^2 adjusted for the models (McF?)
  266. info$r2.adj <- round(100*c(
  267.   0,
  268.   1 - (model.AIC$deviance + (model.AIC$edf - model.null$edf))/model.null$deviance,
  269.   1 - (model.BIC$deviance + (model.BIC$edf - model.null$edf))/model.null$deviance,
  270.   1 - (model.full$deviance + (model.full$edf - model.null$edf))/model.null$deviance),
  271.   digits = 1)
  272.  
  273. #The AIC, BIC, R^2 and R^2 adjusted for all models
  274. (info)
  275.  
  276.  
  277.  
  278. ################################################
  279. ######CONFUSION MATRICES FOR THE AIC MODEL######
  280. ################################################
  281.  
  282. # estimated probs####
  283. predict(model.AIC, type = "prob")
  284. # predicted category
  285. predict(model.AIC)
  286. predict(model.AIC, type = "class")
  287.  
  288. pred.final.AIC <- cbind(
  289.   plasma,
  290.   predict(model.AIC, type = "probs"),
  291.   yhat = predict(model.AIC))
  292. head(pred.final.AIC)
  293.  
  294. # confusion matrix####
  295. table(pred.final.AIC$betaplasma_cat)
  296. table(pred.final.AIC$yhat)
  297.  
  298. (conf.final <- table(pred.final.AIC$betaplasma_cat, pred.final.AIC$yhat))
  299. # Sensitivities:
  300. sens.AIC <- round(100*diag(conf.final)/table(pred.final.AIC$betaplasma_cat), digits = 1)
  301. # precisions:
  302. prec.AIC <-round(100*diag(conf.final)/table(pred.final.AIC$yhat), digits = 1)
  303. # accuracy:
  304. acc.AIC <- 100*sum(diag(conf.final))/sum(conf.final)
  305.  
  306. (Conf.stats.table.AIC <- data.frame(value = c("Medium", "Low", "High"), Sens_Spec = as.numeric(sens.AIC), Prec = as.numeric(prec.AIC), Acc = as.numeric(acc.AIC)))
  307.  
  308.  
  309. ################################################
  310. ######CONFUSION MATRICES FOR THE BIC MODEL######
  311. ################################################
  312.  
  313.  
  314. # estimated probs####
  315. predict(model.BIC, type = "prob")
  316. # predicted category
  317. predict(model.BIC)
  318. predict(model.BIC, type = "class")
  319.  
  320. pred.final.BIC <- cbind(
  321.   plasma,
  322.   predict(model.BIC, type = "probs"),
  323.   yhat = predict(model.BIC))
  324. head(pred.final.BIC)
  325.  
  326. # confusion matrix####
  327. table(pred.final.BIC$betaplasma_cat)
  328. table(pred.final.BIC$yhat)
  329.  
  330. (conf.final.BIC <- table(pred.final.BIC$betaplasma_cat, pred.final.BIC$yhat))
  331. # Sensitivities:
  332. sens.BIC <- round(100*diag(conf.final.BIC)/table(pred.final.BIC$betaplasma_cat), digits = 1)
  333. # precisions:
  334. prec.BIC <-round(100*diag(conf.final.BIC)/table(pred.final.BIC$yhat), digits = 1)
  335. # accuracy:
  336. acc.BIC <- 100*sum(diag(conf.final.BIC))/sum(conf.final.BIC)
  337.  
  338. (Conf.stats.table.BIC <- data.frame(value = c("Medium", "Low", "High"), Sens_Spec = as.numeric(sens.BIC), Prec = as.numeric(prec.BIC), Acc = as.numeric(acc.BIC)))
  339.  
  340.  
  341.  
  342.  
  343.  
  344.  
  345.  
  346.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement