Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # the upshot is that in this case it seemed to work fine - maybe it depends
- # the degree of collinearity between the variables?
- library(tibble)
- library(dplyr)
- library(broom)
- set.seed(nchar("them regression models, eh?") ^ 3)
- # set some background conditions
- sample_size <- 10000
- prop <- 0.1
- dat <-
- tibble(
- iv1 = sample(c(1, 0), size = sample_size, replace = TRUE, prob = c(prop, 1 - prop)),
- iv2 = sample(c(1, 0), size = sample_size, replace = TRUE, prob = c(prop, 1 - prop)),
- ) %>%
- mutate(iv2 = if_else(iv1 == 0, 0, iv2),
- dv = rnorm(sample_size, mean = iv1 + (iv2 * 0.5), sd = 1))
- # when iv1 is 0 iv2 is zero,
- # the real parameters for iv1 is 1 and for iv2 is 0.5
- dat %>%
- summarise(cor = cor(iv1, iv2))
- # they're weakly correlated
- dat %>%
- count(iv1, iv2)
- # we've got a good enough number of observations in each group
- broom::tidy(lm(dv ~ iv1 + iv2, data = dat))
- # this separates out the two effects correctly
- glance(lm(dv ~ iv1 + iv2, data = dat))
- broom::tidy(lm(dv ~ iv2, data = dat))
- # the estimate for iv2 picks up the effect of both iv1 and iv2
- glance(lm(dv ~ iv2, data = dat))
- # and the model fits pretty badly
- broom::tidy(lm(dv ~ iv1, data = dat))
- # this picks up the effect of iv1 correctly
- glance(lm(dv ~ iv1, data = dat))
- # and it fits almost as well as the model with both variables
- # ... I guess because there are so few cases with iv2
- dat %>%
- ggplot(aes(x = dv)) +
- geom_density()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement