Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(glmnet)
- library(ggplot2)
- library(dplyr)
- X <- scale(na.omit(psych::bfi[,1:25]), scale = F)
- y <- X[,1]
- X <- X[,-1]
- func_cv <- function(train_size){
- index <- sample(1:nrow(X), size = train_size, replace = F)
- y_train <- y[index]
- x_train <- X[index,]
- y_test <- y[-index]
- X_test <- X[ - index,]
- fit_reg <- cv.glmnet(x = x_train, y = y_train)
- pred_reg <- predict(fit_reg, newx = as.matrix(X_test), s = fit_reg$lambda.1se)
- fit_nonreg <- bestglm::bestglm(cbind.data.frame(x_train, y_train))
- fit_full <- lm(y_train ~ x_train)
- pred_full <- fit_full$coefficients %*% t(cbind(1, X_test))
- selected_names <- names(fit_nonreg$BestModel$coefficients)[-1]
- selected_beta <- fit_nonreg$BestModel$coefficients
- X_test_selected <- cbind(1, X_test[, selected_names])
- pred_non <- selected_beta %*% t(X_test_selected)
- mse_nonreg <- mean((pred_non - y_test)^2)
- mse_reg <- mean((pred_reg - y_test)^2)
- mse_full <- mean((pred_full - y_test)^2)
- data.frame(mse = c(mse_nonreg, mse_reg, mse_full),
- method = c("non_regularized_bic",
- "regularized_l1", "full_model"),
- proportion_np = 25 / train_size)
- }
- # note p = 25
- train_size <- c(50, 100, 200)
- simulation <- lapply(train_size, function(x)
- do.call(rbind.data.frame, replicate(200,
- func_cv(x), simplify = F)) )
- do.call(rbind.data.frame, simulation) %>%
- group_by(proportion_np, method) %>%
- summarise(mse_mu = mean(mse)) %>%
- ggplot(aes(x = as.factor(proportion_np), y = mse_mu, group = method, color = method)) +
- geom_line() +
- theme_bw() +
- scale_x_discrete(breaks = 25 / train_size) +
- ylab("MSE on New Data") +
- xlab("p/n") +
- theme(legend.position = "top")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement