celestialgod

cv function

Mar 13th, 2016
175
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.28 KB | None | 0 0
  1. library(magrittr)   # %>%, %<>%
  2. library(data.table) # data.table, setnames
  3. library(dplyr)      # filter, mutate
  4. library(purrr)      # map_dbl
  5. # data generation
  6. logit_f <- function(x, b) 1 / (1 + exp(x %*% b))
  7. dat <- matrix(rnorm(1000), 100, 10) %>%
  8.   cbind(map_dbl(logit_f(cbind(1, .), runif(11, -5, 5)), ~rbinom(1, 1, .))) %>%
  9.   data.table %>% setnames(c(paste0("X", 1:10), "Y"))
  10.  
  11. # k-fold cv index
  12. cv_index_f <- function(numSampleSize, numFold){
  13.   cvIndex <- rep(1:numFold, each = floor(numSampleSize / numFold))
  14.   m <- numSampleSize %% numFold
  15.   if (m > 0)
  16.     cvIndex %<>% c(1:m)
  17.   return(cvIndex %>% sample(length(.)))
  18. }
  19.  
  20. # cv function
  21. cv_f <- function(formula, data, threshold = 0.5, kfold = NULL){
  22.   y <- model.frame(formula, data) %>% model.response
  23.   if (is.null(kfold) || kfold == 0)
  24.     kfold <- nrow(data)
  25.   cvIndex <- cv_index_f(nrow(data), kfold)
  26.   predictRes <- vector('numeric', nrow(data))
  27.   for (i in 1:kfold)
  28.   {
  29.     model <- filter(data, cvIndex != i) %>%
  30.       glm(formula, data = ., binomial)
  31.     predictRes[cvIndex == i] <- filter(data, cvIndex == i) %>%
  32.       predict(model, ., type='response')
  33.   }
  34.   return(mean((predictRes > threshold) == y))  
  35. }
  36. cv_f(Y ~ X1 + X2 + X3, dat)
  37. cv_f(Y ~ ., dat)
  38. dat %<>% setnames("Y", "sss")
  39. cv_f(sss ~ ., dat)
Advertisement
Add Comment
Please, Sign In to add comment