Advertisement
Guest User

ANOVA and t-test

a guest
Oct 10th, 2019
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.17 KB | None | 0 0
  1. # Session 2 - Other statistical tests (30 minutes)
  2.  
  3. # Lecture: Understanding two-sample t-tests and ANOVA, the curse of dimensionality
  4. # Discussion: Similarities to regression
  5. # Demonstration: Executing t-test and ANOVA as linear models
  6. # Hands-on Exercises: Exploring model forms in R
  7.  
  8. # ---- ANOVA ----
  9. # ANALYSIS OF VARIANCE (i.e. SST, SSM, SSR, also... SST = SSM + SSR)
  10. # classical approach
  11. PlantGrowth
  12.  
  13. Plant_lm <- lm(weight ~ group, data = PlantGrowth)
  14. Plant_lm
  15. # (Intercept) grouptrt1 grouptrt2
  16. # 5.032 -0.371 0.494
  17.  
  18. # What do these coefficients mean?
  19. PlantGrowth %>%
  20. group_by(group) %>%
  21. summarise(avg = mean(weight))
  22. # 1 ctrl 5.03
  23. # 2 trt1 4.66
  24. # 3 trt2 5.53
  25.  
  26. # name the coefficients:
  27. b_0 <- 5.032
  28. b_1 <- -0.371 # Difference in mean between ctrl & trt1
  29. b_2 <- 0.494 # Difference in mean between ctrl & trt2
  30.  
  31. # Let's define the two models (ANOVA, NULL)
  32. PlantGrowth %>%
  33. mutate(global_mean = mean(weight)) %>%
  34. group_by(group) %>%
  35. mutate(group_mean = mean(weight)) -> Plant_stats
  36.  
  37. # Calculate the Variances: SSR, SSM, SST
  38. SST <- sum(( Plant_stats$weight - Plant_stats$global_mean )^2)
  39. SSR <- sum(( Plant_stats$weight - Plant_stats$group_mean )^2)
  40. SSM <- sum(( Plant_stats$global_mean - Plant_stats$group_mean )^2)
  41.  
  42. SSM + SSR
  43. SST
  44.  
  45. N <- nrow(PlantGrowth)
  46. K <- 3 # The number of coefficients (b_0, b_1, b_2)
  47. MSM <- SSM/(K - 1) # The MEAN squared model
  48. MSR <- SSR/(N - K) # The MEAN squared residuals
  49.  
  50. # Consequence is ... MSR will decrease as sample size increases
  51. # ratio will increase
  52. MSM/MSR # 4.846088 # put this on an F distribution
  53.  
  54. # The F dist is just the T dist squared:
  55. pf(MSM/MSR, df1 = (K - 1), df2 = (N - K), lower=FALSE) # 0.01590996
  56.  
  57. # Typical:
  58. anova(Plant_lm) # 0.01591
  59. # Same as calculating above
  60. # low p-value indicates that it is unlikely to see this data
  61. # Is there was no influcence of X on Y, here 1.5% chance of observing this
  62. # Results purely by chance alone.
  63.  
  64. # Can we do this with a two-sample t-test??
  65. # ---- Two-Sample t-test ----
  66.  
  67. # Do different treatments result in extra sleep?
  68. sleep
  69.  
  70. # typical:
  71. t.test(extra ~ group, data = sleep, var.equal = TRUE)
  72. # p-value = 0.07919
  73.  
  74. # Let's define the two models (ANOVA, NULL)
  75. sleep %>%
  76. mutate(global_mean = mean(extra)) %>%
  77. group_by(group) %>%
  78. mutate(group_mean = mean(extra)) -> sleep_stats
  79.  
  80. # What is b_1? 1.58
  81. lm(extra ~ group, data = sleep)
  82. # mean in group 1 mean in group 2
  83. # 0.75 2.33
  84. 2.33 - 0.75 # b_1
  85.  
  86. # Let's treat this as an ANOVA:
  87. # Calculate the Variances: SSR, SSM, SST
  88. SST <- sum(( sleep_stats$extra - sleep_stats$global_mean )^2)
  89. SSR <- sum(( sleep_stats$extra - sleep_stats$group_mean )^2)
  90. SSM <- sum(( sleep_stats$global_mean - sleep_stats$group_mean )^2)
  91.  
  92. SSM + SSR
  93. SST
  94.  
  95. N <- nrow(sleep)
  96. K <- 2 # The number of coefficients (b_0, b_1, b_2)
  97. MSM <- SSM/(K - 1) # The MEAN squared model
  98. MSR <- SSR/(N - K) # The MEAN squared residuals
  99.  
  100. # Consequence is ... MSR will decrease as sample size increases
  101. # ratio will increase
  102. MSM/MSR # 4.846088 # put this on an F distribution
  103.  
  104. # The F dist is just the T dist squared:
  105. pf(MSM/MSR, df1 = (K - 1), df2 = (N - K), lower=FALSE) # 0.07918671
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement