1. # the upshot is that in this case it seemed to work fine - maybe it depends
2. # the degree of collinearity between the variables?
3.
4. library(tibble)
5. library(dplyr)
6. library(broom)
7.
8. set.seed(nchar("them regression models, eh?") ^ 3)
9.
10. # set some background conditions
11.
12. sample_size <- 10000
13. prop <- 0.1
14.
15. dat <-
16.   tibble(
17.     iv1 = sample(c(1, 0), size = sample_size, replace = TRUE, prob = c(prop, 1 - prop)),
18.     iv2 = sample(c(1, 0), size = sample_size, replace = TRUE, prob = c(prop, 1 - prop)),
19.   ) %>%
20.   mutate(iv2 = if_else(iv1 == 0, 0, iv2),
21.          dv = rnorm(sample_size, mean = iv1 + (iv2 * 0.5), sd = 1))
22.
23. # when iv1 is 0 iv2 is zero,
24. # the real parameters for iv1 is 1 and for iv2 is 0.5
25.
26.
27. dat %>%
28.   summarise(cor = cor(iv1, iv2))
29.
30. # they're weakly correlated
31.
32. dat %>%
33.   count(iv1, iv2)
34.
35. # we've got a good enough number of observations in each group
36.
37. broom::tidy(lm(dv ~ iv1 + iv2, data = dat))
38. # this separates out the two effects correctly
39.
40. glance(lm(dv ~ iv1 + iv2, data = dat))
41.
42. broom::tidy(lm(dv ~ iv2, data = dat))
43.
44. # the estimate for iv2 picks up the effect of both iv1 and iv2
45.
46. glance(lm(dv ~ iv2, data = dat))
47.
48. # and the model fits pretty badly
49.
50.
51. broom::tidy(lm(dv ~ iv1, data = dat))
52. # this picks up the effect of iv1 correctly
53.
54. glance(lm(dv ~ iv1, data = dat))
55.
56. # and it fits almost as well as the model with both variables
57. # ... I guess because there are so few cases with iv2
58.
59. dat %>%
60.   ggplot(aes(x = dv)) +
61.   geom_density()
