Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # 探討在不同母體分配(BETA為例)之下,樣本數是否會影響樣本平均數的抽樣分配
- # 收斂到常態分配的情形。
- # 探討樣本數需達多少,樣本平均數的抽樣分配與常態分配間的差異才可被忽略
- # 以R生成BETA分配的隨機樣本模擬,比較在相同母體不同參數下
- # 及不同樣本數的收斂情形。
- # 每次抽取n(n=10,20,30,40...120個樣本並計算其平均數),重複500次後
- # 再對這500個平均數的資料作常態分配的檢定,得出檢定的 P value.
- # 重複1000次並計算在1000個 P value小於顯著水準0.05的比例
- library(plyr)
- library(magrittr)
- type_I_error_f = function(shape1 = 8, shape2 = 8,
- n = seq(10, 120, by = 10), N = 500, R = 1000){
- pvalues = replicate(R,
- aaply(n, 1, function(x){
- rbeta(x*N, shape1, shape2) %>%
- matrix(x) %>% colMeans() %>%
- shapiro.test() %>% use_series(p.value)
- })
- )
- rowMeans(pvalues < .05) %>% set_names(n)
- }
- type_I_error_f() # default => a symmetric distribution
- # 10 20 30 40 50 60 70 80 90 100 110 120
- # 0.043 0.043 0.059 0.048 0.041 0.041 0.057 0.042 0.058 0.043 0.040 0.043
- type_I_error_f(8, 3) # a non-symmetric distribution
- # 10 20 30 40 50 60 70 80 90 100 110 120
- # 0.271 0.159 0.129 0.099 0.095 0.082 0.068 0.068 0.079 0.073 0.076 0.062
- ## non-symmetric distribution need more samples to approximate normal.
- library(snowfall)
- type_I_error_par_f = function(shape1 = 8, shape2 = 8,
- n = seq(10, 120, by = 10), N = 500, R = 1000){
- sfInit(TRUE, 4)
- sfLibrary(plyr)
- sfLibrary(magrittr)
- sfExport(list = c("shape1", "shape2", "n", "N"))
- pvalues = sfSapply(1:R, function(i){
- aaply(n, 1, function(x){
- rbeta(x*N, shape1, shape2) %>%
- matrix(x) %>% colMeans() %>%
- shapiro.test() %>% use_series(p.value)
- })
- })
- sfStop()
- rowMeans(pvalues < .05) %>% set_names(n)
- }
- type_I_error_par_f()
- # 10 20 30 40 50 60 70 80 90 100 110 120
- # 0.043 0.035 0.052 0.052 0.049 0.050 0.055 0.055 0.052 0.055 0.058 0.053
- type_I_error_par_f(8, 3)
- # 10 20 30 40 50 60 70 80 90 100 110 120
- # 0.251 0.142 0.098 0.079 0.095 0.088 0.089 0.078 0.084 0.065 0.062 0.068
Advertisement
Add Comment
Please, Sign In to add comment