• API
• FAQ
• Tools
• Archive
SHARE
TWEET ANOVA and t-test a guest Oct 10th, 2019 93 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. # Session 2 - Other statistical tests (30 minutes)
2.
3. # Lecture: Understanding two-sample t-tests and ANOVA, the curse of dimensionality
4. # Discussion: Similarities to regression
5. # Demonstration: Executing t-test and ANOVA as linear models
6. # Hands-on Exercises: Exploring model forms in R
7.
8. # ---- ANOVA ----
9. # ANALYSIS OF VARIANCE (i.e. SST, SSM, SSR, also... SST = SSM + SSR)
10. # classical approach
11. PlantGrowth
12.
13. Plant_lm <- lm(weight ~ group, data = PlantGrowth)
14. Plant_lm
15. # (Intercept)    grouptrt1    grouptrt2
16. # 5.032       -0.371        0.494
17.
18. # What do these coefficients mean?
19. PlantGrowth %>%
20.   group_by(group) %>%
21.   summarise(avg = mean(weight))
22. # 1 ctrl   5.03
23. # 2 trt1   4.66
24. # 3 trt2   5.53
25.
26. # name the coefficients:
27. b_0 <- 5.032
28. b_1 <- -0.371 # Difference in mean between ctrl & trt1
29. b_2 <- 0.494  # Difference in mean between ctrl & trt2
30.
31. # Let's define the two models (ANOVA, NULL)
32. PlantGrowth %>%
33.   mutate(global_mean = mean(weight)) %>%
34.   group_by(group) %>%
35.   mutate(group_mean = mean(weight)) -> Plant_stats
36.
37. # Calculate the Variances: SSR, SSM, SST
38. SST <- sum(( Plant_stats\$weight  - Plant_stats\$global_mean )^2)
39. SSR <- sum(( Plant_stats\$weight  - Plant_stats\$group_mean )^2)
40. SSM <- sum(( Plant_stats\$global_mean  - Plant_stats\$group_mean )^2)
41.
42. SSM + SSR
43. SST
44.
45. N <- nrow(PlantGrowth)
46. K <- 3 # The number of coefficients (b_0, b_1, b_2)
47. MSM <- SSM/(K - 1) # The MEAN squared model
48. MSR <- SSR/(N - K) # The MEAN squared residuals
49.
50. # Consequence is ... MSR will decrease as sample size increases
51. # ratio will increase
52. MSM/MSR # 4.846088 # put this on an F distribution
53.
54. # The F dist is just the T dist squared:
55. pf(MSM/MSR, df1 = (K - 1), df2 = (N - K), lower=FALSE) # 0.01590996
56.
57. # Typical:
58. anova(Plant_lm) # 0.01591
59. # Same as calculating above
60. # low p-value indicates that it is unlikely to see this data
61. # Is there was no influcence of X on Y, here 1.5% chance of observing this
62. # Results purely by chance alone.
63.
64. # Can we do this with a two-sample t-test??
65. # ---- Two-Sample t-test ----
66.
67. # Do different treatments result in extra sleep?
68. sleep
69.
70. # typical:
71. t.test(extra ~ group, data = sleep, var.equal = TRUE)
72. # p-value = 0.07919
73.
74. # Let's define the two models (ANOVA, NULL)
75. sleep %>%
76.   mutate(global_mean = mean(extra)) %>%
77.   group_by(group) %>%
78.   mutate(group_mean = mean(extra)) -> sleep_stats
79.
80. # What is b_1? 1.58
81. lm(extra ~ group, data = sleep)
82. # mean in group 1 mean in group 2
83. # 0.75            2.33
84. 2.33 - 0.75 # b_1
85.
86. # Let's treat this as an ANOVA:
87. # Calculate the Variances: SSR, SSM, SST
88. SST <- sum(( sleep_stats\$extra  - sleep_stats\$global_mean )^2)
89. SSR <- sum(( sleep_stats\$extra  - sleep_stats\$group_mean )^2)
90. SSM <- sum(( sleep_stats\$global_mean  - sleep_stats\$group_mean )^2)
91.
92. SSM + SSR
93. SST
94.
95. N <- nrow(sleep)
96. K <- 2 # The number of coefficients (b_0, b_1, b_2)
97. MSM <- SSM/(K - 1) # The MEAN squared model
98. MSR <- SSR/(N - K) # The MEAN squared residuals
99.
100. # Consequence is ... MSR will decrease as sample size increases
101. # ratio will increase
102. MSM/MSR # 4.846088 # put this on an F distribution
103.
104. # The F dist is just the T dist squared:
105. pf(MSM/MSR, df1 = (K - 1), df2 = (N - K), lower=FALSE) # 0.07918671
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top