Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(dplyr)
- library(purrr)
- set.seed(2017-06-27)
- # Imagine this function gives us measurements from nature
- data_generator <- function(n = 500){
- data_frame(var1 = rnorm(500)) %>%
- mutate(var2 = var1 + rnorm(500)) %>%
- mutate(var3 = var1 * var2 + rnorm(500)) %>%
- mutate(resp = abs(var1 + var2 + var3)) %>%
- mutate(resp = abs(resp)/max(resp)) %>%
- mutate(resp = as.numeric(resp > median(resp)))
- }
- # Collect some data
- dataset1 <- data_generator()
- # Build a model
- model <- glm(resp ~ ., data = dataset1, family = binomial())
- model %>%
- summary()
- # Coefficients:
- # Estimate Std. Error z value Pr(>|z|)
- # (Intercept) -0.10570 0.10664 -0.991 0.32160
- # var1 0.78714 0.15443 5.097 3.45e-07 ***
- # var2 0.11033 0.09607 1.148 0.25082
- # var3 0.18189 0.06002 3.031 0.00244 **
- # Function to calculate RMSE
- rmse <- . %>%
- mutate(pred = predict(model, newdata = ., type = "response")) %>%
- mutate(error = resp - pred) %>%
- select(error) %>%
- unlist() %>%
- `^`(2) %>% mean() %>% sqrt()
- # Function to do sampling with replacement
- bootstrapped_rmse <- . %>%
- sample_frac(replace = TRUE) %>%
- rmse()
- # 100 bootstrap replicates (do 1000 or 10000)
- rmse_ci <- 1:100 %>%
- map(~ dataset1) %>%
- map_dbl(bootstrapped_rmse)
- # Here's your 95% CI
- quantile(rmse_ci, c(0.025, 0.975))
- # 2.5% 97.5%
- # 0.4473360 0.4744052
- # Collect more data
- dataset2 <- data_generator()
- # Inspect RMSE
- dataset2 %>%
- rmse()
- # 0.4580332
- # It's in the CI which suggests the model might be good.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement