SHARE
TWEET

ANOVA and t-test

a guest Oct 10th, 2019 93 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Session 2 - Other statistical tests (30 minutes)
  2.  
  3. # Lecture: Understanding two-sample t-tests and ANOVA, the curse of dimensionality
  4. # Discussion: Similarities to regression
  5. # Demonstration: Executing t-test and ANOVA as linear models
  6. # Hands-on Exercises: Exploring model forms in R
  7.  
  8. # ---- ANOVA ----
  9. # ANALYSIS OF VARIANCE (i.e. SST, SSM, SSR, also... SST = SSM + SSR)
  10. # classical approach
  11. PlantGrowth
  12.  
  13. Plant_lm <- lm(weight ~ group, data = PlantGrowth)
  14. Plant_lm
  15. # (Intercept)    grouptrt1    grouptrt2  
  16. # 5.032       -0.371        0.494
  17.  
  18. # What do these coefficients mean?
  19. PlantGrowth %>%
  20.   group_by(group) %>%
  21.   summarise(avg = mean(weight))
  22. # 1 ctrl   5.03
  23. # 2 trt1   4.66
  24. # 3 trt2   5.53
  25.  
  26. # name the coefficients:
  27. b_0 <- 5.032
  28. b_1 <- -0.371 # Difference in mean between ctrl & trt1
  29. b_2 <- 0.494  # Difference in mean between ctrl & trt2
  30.  
  31. # Let's define the two models (ANOVA, NULL)
  32. PlantGrowth %>%
  33.   mutate(global_mean = mean(weight)) %>%
  34.   group_by(group) %>%
  35.   mutate(group_mean = mean(weight)) -> Plant_stats
  36.  
  37. # Calculate the Variances: SSR, SSM, SST
  38. SST <- sum(( Plant_stats$weight  - Plant_stats$global_mean )^2)
  39. SSR <- sum(( Plant_stats$weight  - Plant_stats$group_mean )^2)
  40. SSM <- sum(( Plant_stats$global_mean  - Plant_stats$group_mean )^2)
  41.  
  42. SSM + SSR
  43. SST
  44.  
  45. N <- nrow(PlantGrowth)
  46. K <- 3 # The number of coefficients (b_0, b_1, b_2)
  47. MSM <- SSM/(K - 1) # The MEAN squared model
  48. MSR <- SSR/(N - K) # The MEAN squared residuals
  49.  
  50. # Consequence is ... MSR will decrease as sample size increases
  51. # ratio will increase
  52. MSM/MSR # 4.846088 # put this on an F distribution
  53.  
  54. # The F dist is just the T dist squared:
  55. pf(MSM/MSR, df1 = (K - 1), df2 = (N - K), lower=FALSE) # 0.01590996
  56.  
  57. # Typical:
  58. anova(Plant_lm) # 0.01591
  59. # Same as calculating above
  60. # low p-value indicates that it is unlikely to see this data
  61. # Is there was no influcence of X on Y, here 1.5% chance of observing this
  62. # Results purely by chance alone.
  63.  
  64. # Can we do this with a two-sample t-test??
  65. # ---- Two-Sample t-test ----
  66.  
  67. # Do different treatments result in extra sleep?
  68. sleep
  69.  
  70. # typical:
  71. t.test(extra ~ group, data = sleep, var.equal = TRUE)
  72. # p-value = 0.07919
  73.  
  74. # Let's define the two models (ANOVA, NULL)
  75. sleep %>%
  76.   mutate(global_mean = mean(extra)) %>%
  77.   group_by(group) %>%
  78.   mutate(group_mean = mean(extra)) -> sleep_stats
  79.  
  80. # What is b_1? 1.58
  81. lm(extra ~ group, data = sleep)
  82. # mean in group 1 mean in group 2
  83. # 0.75            2.33
  84. 2.33 - 0.75 # b_1
  85.  
  86. # Let's treat this as an ANOVA:
  87. # Calculate the Variances: SSR, SSM, SST
  88. SST <- sum(( sleep_stats$extra  - sleep_stats$global_mean )^2)
  89. SSR <- sum(( sleep_stats$extra  - sleep_stats$group_mean )^2)
  90. SSM <- sum(( sleep_stats$global_mean  - sleep_stats$group_mean )^2)
  91.  
  92. SSM + SSR
  93. SST
  94.  
  95. N <- nrow(sleep)
  96. K <- 2 # The number of coefficients (b_0, b_1, b_2)
  97. MSM <- SSM/(K - 1) # The MEAN squared model
  98. MSR <- SSR/(N - K) # The MEAN squared residuals
  99.  
  100. # Consequence is ... MSR will decrease as sample size increases
  101. # ratio will increase
  102. MSM/MSR # 4.846088 # put this on an F distribution
  103.  
  104. # The F dist is just the T dist squared:
  105. pf(MSM/MSR, df1 = (K - 1), df2 = (N - K), lower=FALSE) # 0.07918671
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top