Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #functions used in ExactProb
- #Minimalist chi square, default equal probability for each bin
- chiStat <- function(v,p=rep(1/length(v),length(v))){sum(((v - sum(v)*p)^2)/(sum(v)*p))}
- #multinomial prob based on set of probabilities, defaults to equal probabilities
- exactMult <- function(v,p=rep(1/length(v),length(v))){
- n <- factorial(sum(v))
- d <- prod(factorial(v))
- p <- prod(p^v)
- return( (n/d)*p )
- }
- #This generates all the permutations given n number of balls in m bins and
- #then calculates the exact probability according to the multinomial
- #distribution and the CDF of the chi-square statistic
- exactProb <- function(n,m,p=rep(1/m,m)){
- library(partitions)
- AllDat <- t(compositions(n,m))
- ExactProb <- apply(AllDat,1,exactMult,p=p)
- chiStat <- function(v,p){sum(((v - sum(v)*p)^2)/(sum(v)*p))}
- Chi <- apply(AllDat,1,chiStat,p=p)
- #order according to chi-stat
- MyData <- data.frame(as.matrix(AllDat),ExactProb, Chi)[order(Chi),]
- MyData$cumprob <- cumsum(MyData$ExactProb)
- return(MyData)
- }
- #My wrapping all up in a global function to return items in list
- #given the initial data
- SmallSampChi <- function(d,p=rep(1/length(d),length(d))){
- n <- sum(d)
- m <- length(d)
- cdf <- exactProb(n=n,m=m,p=p) #generate exact probability
- chiSamp <- chiStat(d,p) #Chi stat for sample
- #p-value to the right of the test statistic
- pvalue <- sum(cdf[cdf[,'Chi'] >= chiSamp,'ExactProb'])
- #return object
- t <- list(cdf,p,d,chiSamp,pvalue)
- names(t) <- c("CDF","probabilities","data","Chi-Square Statistic","p-value")
- return(t)
- }
- #now with an example dataset, three events all on the third day
- d <- c(0,0,3,0,0) #format N observations in M bins, 3 in third bin
- p <- c(0.03,0.12,0.6,0.2,0.05) #arbitrary PMF in comments
- t <- SmallSampChi(d=d,p=p)
- plot(t$CDF$Chi,t$CDF$cumprob,type='s',xlab='Chi-Square value',ylab='Exact CDF')
- abline(v=t$'Chi-Square Statistic',col='#FF000099')
- a <- .05
- t$CDF[t$CDF[,'cumprob'] > (1-a),1:5]
- # X1 X2 X3 X4 X5
- #6 1 1 1 0 0
- #20 0 0 0 3 0
- #12 1 1 0 1 0
- #17 1 0 0 2 0
- #23 0 2 0 0 1
- #24 1 0 1 0 1
- #27 1 0 0 1 1
- #22 1 1 0 0 1
- #3 1 2 0 0 0
- #4 0 3 0 0 0
- #33 0 0 1 0 2
- #34 0 0 0 1 2
- #32 0 1 0 0 2
- #31 1 0 0 0 2
- #5 2 0 1 0 0
- #11 2 0 0 1 0
- #2 2 1 0 0 0
- #21 2 0 0 0 1
- #35 0 0 0 0 3
- #1 3 0 0 0 0
- p_alt <- rep(1/5,5)
- t$CDF$AltProb <- apply(as.matrix(t$CDF[,1:length(p_alt)]),1,exactMult,p=p_alt)
- sum(t$CDF[t$CDF[,'cumprob'] > (1-a),'AltProb']) #power of alt
- #[1] 0.536
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement