Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2019
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.42 KB | None | 0 0
  1. # the upshot is that in this case it seemed to work fine - maybe it depends
  2. # the degree of collinearity between the variables?
  3.  
  4. library(tibble)
  5. library(dplyr)
  6. library(broom)
  7.  
  8. set.seed(nchar("them regression models, eh?") ^ 3)
  9.  
  10. # set some background conditions
  11.  
  12. sample_size <- 10000
  13. prop <- 0.1
  14.  
  15. dat <-
  16. tibble(
  17. iv1 = sample(c(1, 0), size = sample_size, replace = TRUE, prob = c(prop, 1 - prop)),
  18. iv2 = sample(c(1, 0), size = sample_size, replace = TRUE, prob = c(prop, 1 - prop)),
  19. ) %>%
  20. mutate(iv2 = if_else(iv1 == 0, 0, iv2),
  21. dv = rnorm(sample_size, mean = iv1 + (iv2 * 0.5), sd = 1))
  22.  
  23. # when iv1 is 0 iv2 is zero,
  24. # the real parameters for iv1 is 1 and for iv2 is 0.5
  25.  
  26.  
  27. dat %>%
  28. summarise(cor = cor(iv1, iv2))
  29.  
  30. # they're weakly correlated
  31.  
  32. dat %>%
  33. count(iv1, iv2)
  34.  
  35. # we've got a good enough number of observations in each group
  36.  
  37. broom::tidy(lm(dv ~ iv1 + iv2, data = dat))
  38. # this separates out the two effects correctly
  39.  
  40. glance(lm(dv ~ iv1 + iv2, data = dat))
  41.  
  42. broom::tidy(lm(dv ~ iv2, data = dat))
  43.  
  44. # the estimate for iv2 picks up the effect of both iv1 and iv2
  45.  
  46. glance(lm(dv ~ iv2, data = dat))
  47.  
  48. # and the model fits pretty badly
  49.  
  50.  
  51. broom::tidy(lm(dv ~ iv1, data = dat))
  52. # this picks up the effect of iv1 correctly
  53.  
  54. glance(lm(dv ~ iv1, data = dat))
  55.  
  56. # and it fits almost as well as the model with both variables
  57. # ... I guess because there are so few cases with iv2
  58.  
  59. dat %>%
  60. ggplot(aes(x = dv)) +
  61. geom_density()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement