Advertisement
Guest User

Untitled

a guest
Jan 15th, 2019
130
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.77 KB | None | 0 0
  1. library(glmnet)
  2. library(ggplot2)
  3. library(dplyr)
  4.  
  5.  
  6. X <- scale(na.omit(psych::bfi[,1:25]), scale = F)
  7. y <- X[,1]
  8. X <- X[,-1]
  9.  
  10.  
  11.  
  12.  
  13.  
  14.  
  15. func_cv <- function(train_size){
  16.  
  17. index <- sample(1:nrow(X), size = train_size, replace = F)
  18.  
  19. y_train <- y[index]
  20. x_train <- X[index,]
  21.  
  22. y_test  <- y[-index]
  23. X_test <-  X[ - index,]
  24.  
  25.  
  26.  
  27. fit_reg <- cv.glmnet(x = x_train, y = y_train)
  28. pred_reg <- predict(fit_reg, newx = as.matrix(X_test), s = fit_reg$lambda.1se)
  29.  
  30.  
  31.  
  32. fit_nonreg <- bestglm::bestglm(cbind.data.frame(x_train, y_train))
  33.  
  34. fit_full <- lm(y_train ~ x_train)
  35. pred_full <-  fit_full$coefficients %*% t(cbind(1, X_test))
  36.  
  37. selected_names <- names(fit_nonreg$BestModel$coefficients)[-1]
  38. selected_beta <- fit_nonreg$BestModel$coefficients
  39.  
  40.  
  41.  
  42. X_test_selected <- cbind(1,  X_test[, selected_names])
  43.  
  44. pred_non <- selected_beta %*% t(X_test_selected)
  45.  
  46. mse_nonreg <- mean((pred_non - y_test)^2)
  47.  
  48. mse_reg  <- mean((pred_reg - y_test)^2)
  49.  
  50. mse_full <- mean((pred_full - y_test)^2)
  51.  
  52. data.frame(mse = c(mse_nonreg, mse_reg, mse_full),
  53.            method = c("non_regularized_bic",
  54.                       "regularized_l1", "full_model"),
  55.            proportion_np = 25 / train_size)
  56.  
  57. }
  58.  
  59.  
  60. # note p = 25
  61. train_size <- c(50, 100, 200)
  62.  
  63.  
  64. simulation <- lapply(train_size, function(x)    
  65.   do.call(rbind.data.frame, replicate(200,
  66.                                       func_cv(x), simplify = F)) )
  67.  
  68.  
  69.  
  70.  
  71.  
  72. do.call(rbind.data.frame, simulation) %>%
  73.   group_by(proportion_np, method) %>%
  74.   summarise(mse_mu = mean(mse)) %>%
  75.   ggplot(aes(x = as.factor(proportion_np), y = mse_mu, group = method, color  = method)) +
  76.   geom_line() +
  77.   theme_bw() +
  78.   scale_x_discrete(breaks = 25 / train_size) +
  79.   ylab("MSE on New Data") +
  80.   xlab("p/n") +
  81.   theme(legend.position = "top")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement