Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(magrittr) # %>%, %<>%
- library(data.table) # data.table, setnames
- library(dplyr) # filter, mutate
- library(purrr) # map_dbl
- # data generation
- logit_f <- function(x, b) 1 / (1 + exp(x %*% b))
- dat <- matrix(rnorm(1000), 100, 10) %>%
- cbind(map_dbl(logit_f(cbind(1, .), runif(11, -5, 5)), ~rbinom(1, 1, .))) %>%
- data.table %>% setnames(c(paste0("X", 1:10), "Y"))
- # k-fold cv index
- cv_index_f <- function(numSampleSize, numFold){
- cvIndex <- rep(1:numFold, each = floor(numSampleSize / numFold))
- m <- numSampleSize %% numFold
- if (m > 0)
- cvIndex %<>% c(1:m)
- return(cvIndex %>% sample(length(.)))
- }
- # cv function
- cv_f <- function(formula, data, threshold = 0.5, kfold = NULL){
- y <- model.frame(formula, data) %>% model.response
- if (is.null(kfold) || kfold == 0)
- kfold <- nrow(data)
- cvIndex <- cv_index_f(nrow(data), kfold)
- predictRes <- vector('numeric', nrow(data))
- for (i in 1:kfold)
- {
- model <- filter(data, cvIndex != i) %>%
- glm(formula, data = ., binomial)
- predictRes[cvIndex == i] <- filter(data, cvIndex == i) %>%
- predict(model, ., type='response')
- }
- return(mean((predictRes > threshold) == y))
- }
- cv_f(Y ~ X1 + X2 + X3, dat)
- cv_f(Y ~ ., dat)
- dat %<>% setnames("Y", "sss")
- cv_f(sss ~ ., dat)
Advertisement
Add Comment
Please, Sign In to add comment