Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require(fields) #Used for image.plot
- ###Detect RStudio (modifies plot settings:no live plot)
- using.RStudio=nzchar(Sys.getenv("RSTUDIO_USER_IDENTITY"))
- ###General Settings###
- runs<-1000 # Number of Simulations to Run
- max.samps<-15 # Maximum number of Samples to test
- initial.samp.size<-5 # must be at least 2
- samp.interval<-5 # must be at least 1
- effect<-0 # Difference between population means
- pvalue<-.05
- #Live Plot Settings
- live.plot=F # If false, p values traces from last 10 simulations will be plotted
- sleep.time<-.1 # In seconds, use to slow down live plot
- live.plot.in.own.window=F # If live.plot=T, also plot p value trace in its own device
- ###Investigator Bias Settings###
- ##Sequential Hypothesis Testing
- # Set true (T) to add samples to the previous sample.
- # Otherwise (set to F) each sample is independant
- sequential=T
- ##Multiple Statistical Tests
- # If True, choose lowest of t-test and Mann-Whitney U-test p-values.
- # Otherwise only perform t-test
- also.perform.Utest=T
- ##Multiple Outcomes/Publication Bias, e.g. only report significant comparisons
- #Set greater than 1 if multiple outcome measures will be tested.
- #Probabilities and effect sizes will be for any of the tests significant at each sample size
- #History of each is saved for sequential testing
- number.of.outcomes= 3
- ##Drop Outliers
- # Set to true to drop outliers as defined by outlier.cutoff
- # In case of sequential, outliers are kept for next iteration
- # Outlier bias are multipliers of outlier cutoff for treatment group
- # Set > 1 to be less strict
- # Set < 1 for more strict
- drop.outliers=T
- outlier.method="MAD" # Defaults to "MAD" (median absolute deviation), can also use "SD" (number of standard deviations from mean)
- outlier.cutoff<-5 # Number of sample deviations from sample mean/median (upper and lower)
- outlier.bias.high<-1 # Bias multiplier (see above)
- outlier.bias.low<-1 # Bias multiplier (see above)
- n.cutoff<-2 # Replace values after dropping if sample size is less than the cutoff, min=2, max=sample size
- ###Population Distributions###
- #Treatment Group
- treatment.sd1<-1 # Treatment Group Primary Standard Deviation
- treatment.shape1<-2 # Treatment Group Primary Shape Parameter (set to 2 for normal)
- treatment.sd2<-1 # Treatment Group Secondary Standard Deviation (for bimodal)
- treatment.shape2<-2 # Treatment Group Secondary Shape Parameter (set to 2 for normal, for bimodal)
- #Control Group
- control.sd1<-1 # Control Group Primary Standard Deviation
- control.shape1<-2 # Control Group Primary Shape Parameter (set to 2 for normal)
- control.sd2<-1 # Control Group Secondary Standard Deviation (for bimodal)
- control.shape2<-2 # Control Group Secondary Shape Parameter (set to 2 for normal, for bimodal)
- ###Bimodal Settings###
- larger.bimodal.proportion<- .5 #Set to 1 for unimodal
- bimodal.offset<-8 #Difference Between Primary and Secondary Means for bimodal Populations
- bimodal.treatment.only=F # Simulate treatment effect on subset of treatment group, control will be unimodal.
- ### Advanced: Random Parameter Generation Settings ###
- random.pop.parameters=F # Generate Population Parameters (ignores settings above)
- random.bias=F # Generate Bias Parameters (ignores settings above)
- equal.pop.parameters=T # Sets Treatment and Control Group Population Parameters equal
- random.shape.only=T # Use sd settings above
- batch.size=1 # Set >1 to run batch of random simulations
- ###About Shape (p) Parameter###
- #Generalized normal distribution with Shape(p) Parameter#
- #Modified from package "normalp" to return numeric(0) rather than errors
- rnormp2<-function (n, mu = 0, sigmap = 1, p = 2, method = c("def", "chiodi"))
- {
- if (!is.numeric(n) || !is.numeric(mu) || !is.numeric(sigmap) ||
- !is.numeric(p))
- return(numeric(0))
- if (p < 1)
- stop("p must be at least equal to one")
- if (sigmap <= 0)
- return(numeric(0))
- method <- match.arg(method)
- if (method == "def") {
- qg <- rgamma(n, shape = 1/p, scale = p)
- z <- qg^(1/p)
- z <- ifelse(runif(n) < 0.5, -z, z)
- x <- mu + z * sigmap
- }
- if (method == "chiodi") {
- i <- 0
- x <- c(rep(0, n))
- while (i < n) {
- u <- runif(1, -1, 1)
- v <- runif(1, -1, 1)
- z <- abs(u)^p + abs(v)^(p/(p - 1))
- if (z <= 1) {
- i <- i + 1
- x[i] <- u * (-p * log(z)/z)^(1/p)
- x[i] <- mu + x[i] * sigmap
- }
- }
- }
- x
- }
- # shape < 1 : invalid #
- #plot(seq(-5,5,by=.01),dnormp(seq(-5,5,by=.01), mu=0, sigmap=1, p=.5))
- #lines(seq(-5,5,by=.01), dnorm(seq(-5,5,by=.01), 0,1), lwd=2, col="Blue")
- # 1<= shape < 2 : heavier tails, 1 is laplace dist #
- #plot(seq(-5,5,by=.01),dnormp(seq(-5,5,by=.01), mu=0, sigmap=1, p=1.5))
- #lines(seq(-5,5,by=.01), dnorm(seq(-5,5,by=.01), 0,1), lwd=2, col="Blue")
- # shape = 2 : normal dist #
- #plot(seq(-5,5,by=.01),dnormp(seq(-5,5,by=.01), mu=0, sigmap=1, p=2))
- #lines(seq(-5,5,by=.01), dnorm(seq(-5,5,by=.01), 0,1), lwd=2, col="Blue")
- # shape > 2 :lighter tails, inf is uniform dist #
- #plot(seq(-5,5,by=.01),dnormp(seq(-5,5,by=.01), mu=0, sigmap=1, p=10))
- #lines(seq(-5,5,by=.01), dnorm(seq(-5,5,by=.01), 0,1), lwd=2, col="Blue")
- ###Start Script###
- if (batch.size > 1){
- batchmode=T
- batch.results=NULL
- batch.params=NULL
- }else{
- batchmode=F
- }
- for(b in 1:batch.size){
- ###Randomly Set Bias###
- if(random.bias==T){
- ###Investigator Bias Settings###
- ##Sequential Hypothesis Testing
- seq.samp<-round(runif(1,0,1))
- if(seq.samp==1){
- sequential=T
- }else{
- sequential=F
- }
- ##Multiple Statistical Tests
- Utest.samp<-round(runif(1,0,1))
- if(Utest.samp==1){
- also.perform.Utest=T
- }else{
- also.perform.Utest=F
- }
- ##Multiple Outcomes/Publication Bias, e.g. only report significant comparisons
- number.of.outcomes= round(runif(1, 1, 3),0)
- ##Drop Outliers
- outlier.samp<-round(runif(1,0,1))
- if(outlier.samp==1){
- drop.outliers=T
- }else{
- drop.outliers=F
- }
- outlier.cutoff<-runif(1.5,1000)
- outlier.bias.high<-runif(1,.5,1.5)
- outlier.bias.low<-runif(1,.5,1.5)
- } #End Random Bias Settings
- ###Randomly Set Population Parameters###
- if(random.pop.parameters==T){
- ###Population Distributions###
- #Treatment Group
- if(random.shape.only!=T){
- treatment.sd1<-runif(1, .1, 5)
- }
- treatment.shape1<-runif(1, 1, 10)
- if(random.shape.only!=T){
- treatment.sd2<-runif(1, .1, 5)
- }
- treatment.shape2<-runif(1, 1, 10)
- #Control Group
- if(equal.pop.parameters==T){
- if(random.shape.only!=T){
- control.sd1<-treatment.sd1
- }
- control.shape1<-treatment.shape1
- if(random.shape.only!=T){
- control.sd2<-treatment.sd2
- }
- control.shape2<-treatment.shape2
- }else{
- if(random.shape.only!=T){
- control.sd1<-runif(1, .1, 5)
- }
- control.shape1<-runif(1, 1, 10)
- if(random.shape.only!=T){
- control.sd2<-runif(1, .1, 5)
- }
- control.shape2<-runif(1, 1, 10)
- }
- ###Bimodal Settings###
- larger.bimodal.proportion<- runif(1,.5,1)
- bimodal.offset<-runif(1, .1, 10)
- if(equal.pop.parameters==T){
- bimodal.treatment.only=F
- }else{
- bimod.samp<-round(runif(1,0,1))
- if(bimod.samp==1){
- bimodal.treatment.only=T
- }else{
- bimodal.treatment.only=F
- }
- }
- } #End Random Population Parameter Generation
- ###Start Sampling###
- if(initial.samp.size<2|| samp.interval<1){
- print("Simulation not Run: minimum sample size =2, minimum sample interval=1")
- }else{
- ###Create Population Histogram and Live Chart###
- smaller.bimodal.proportion<- (1-larger.bimodal.proportion)
- bimodal.pop<-c(rep(1,larger.bimodal.proportion*1000),
- rep(0,smaller.bimodal.proportion*1000))
- if(larger.bimodal.proportion==1){
- treatment.sd2<-NA
- treatment.shape2<-NA
- control.sd2<-NA
- control.shape2<-NA
- bimodal.offset<-NA
- bimodal.treatment.only=F
- }
- if(drop.outliers!=T){
- outlier.cutoff<-NA
- outlier.bias.high<-NA
- outlier.bias.low<-NA
- }
- if(outlier.method!="MAD" && outlier.method!="SD"){
- outlier.method="MAD"
- }
- if(bimodal.treatment.only==T){
- control.sd2<-NA
- }
- if(bimodal.treatment.only==T){
- control<-rnormp2(100000,0,control.sd1,control.shape1)
- }else{
- control<-c(rnormp2(larger.bimodal.proportion*100000,0,control.sd1,control.shape1),
- rnormp2(smaller.bimodal.proportion*100000,0+bimodal.offset,control.sd2,control.shape2))
- }
- treatment<-c(rnormp2(larger.bimodal.proportion*100000,0+effect,treatment.sd1,treatment.shape1),
- rnormp2(smaller.bimodal.proportion*100000,0+effect+bimodal.offset,treatment.sd2,treatment.shape2))
- if(batchmode!=T){
- if(using.RStudio!=T){
- dev.new(width=600, height=500)
- }
- }
- if(using.RStudio==T){
- live.plot.in.own.window=F
- live.plot=F
- }
- layout(matrix(c(1,2,3,4,5,6), 3, 2, byrow = TRUE))
- hist(treatment, col=rgb(0,0,1,.5), freq=F,
- breaks=seq(min(c(treatment,control))-1,max(c(treatment,control))+1,by=.1),
- xlab="Value", ylim=c(0,1.2),
- main=c("Population Distributions",
- paste("Difference in Means=", round(effect,2)),
- paste("Bimodal Proportions=", round(larger.bimodal.proportion,2), ",", round(smaller.bimodal.proportion,2),
- " Bimodal Offset=", round(bimodal.offset,2))
- ),
- sub=paste("Bimodal Effect on Treatment Only=",bimodal.treatment.only)
- )
- hist(control, col=rgb(1,0,0,.5), freq=F,
- breaks=seq(min(c(treatment,control))-1,max(c(treatment,control))+1,by=.1),
- add=T)
- legend(x="topleft", legend=c(paste("Treatment:",
- "mu=", 0+effect,
- ", sd1=", round(treatment.sd1,2),
- ", p1=", round(treatment.shape1,2),
- ", sd2=", round(treatment.sd2,2),
- ", p2=", round(treatment.shape2,2)
- ),
- paste("Control:",
- "mu=",0,
- ", sd1=", round(control.sd1,2),
- ", p1=", round(control.shape1,2),
- ", sd2=", round(control.sd2,2),
- ", p2=", round(control.shape2,2))
- ),
- col=c(rgb(0,0,1,.5),rgb(1,0,0,.5)), lwd=6, bty="n")
- plot(initial.samp.size,1, type="l", lwd=2, col="White",
- #xlim=c(initial.samp.size,(max.samps+samp.interval)),
- xlim=c(initial.samp.size,(max.samps)),
- xlab="Sample Size (per group)",
- ylim=c(0,1), ylab="P-value",
- main=c(paste("Representative P-value Traces (Significant: P <",pvalue,")"),
- paste("Outlier Cutoff(",outlier.method,") =",round(outlier.cutoff,2),
- ", Outlier Bias (High,Low) =", "(", round(outlier.bias.high,2), ",", round(outlier.bias.low,2), ")",
- sep=""),
- paste("# of Outcomes=", number.of.outcomes)
- ),
- sub=paste("Sequential Sampling=", sequential, ", U-test=", also.perform.Utest)
- )
- abline(h=pvalue, lwd=2)
- ###Start Simulation###
- all.results=NULL
- pb <- txtProgressBar(min = 0, max = runs*max.samps, style = 3)
- for(r in 1:runs){
- ###Start Run###
- run.result=NULL
- experiment.results=vector("list", number.of.outcomes)
- for(t in 1:number.of.outcomes){
- if(bimodal.treatment.only==T){
- random.sample<-rep(1,initial.samp.size)
- }else{
- random.sample<-bimodal.pop[runif(initial.samp.size,1,1000)]
- }
- control<-c(rnormp2(length(which(random.sample==1)),0,control.sd1,control.shape1),
- rnormp2(length(which(random.sample==0)),0+bimodal.offset,control.sd2,control.shape2))
- random.sample<-bimodal.pop[runif(initial.samp.size,1,1000)]
- treatment<-c(rnormp2(length(which(random.sample==1)),0+effect,treatment.sd1,treatment.shape1),
- rnormp2(length(which(random.sample==0)),0+effect+bimodal.offset,treatment.sd2,treatment.shape2))
- experiment.result<-rbind(control,treatment)
- experiment.results[[t]]<-experiment.result
- }
- x=initial.samp.size
- while(x <(max.samps+samp.interval)){
- sample.result=NULL
- n.cutoff=min(n.cutoff,x)
- test.results=NULL
- for(t in 1:number.of.outcomes){
- control<-experiment.results[[t]][1,]
- treatment<-experiment.results[[t]][2,]
- if(also.perform.Utest==T){
- pval1<-t.test(treatment,control)$p.value
- pval2<-wilcox.test(treatment,control)$p.value
- pval<-min(pval1,pval2)
- #Drop Outliers
- if(drop.outliers==T){
- if(pval>pvalue){
- control.temp<-control
- treatment.temp<-treatment
- if(outlier.method=="SD"){
- high.cutoff<-mean(control.temp, na.rm=T)+outlier.cutoff*sd(control.temp, na.rm=T)
- low.cutoff<-mean(control.temp, na.rm=T)-outlier.cutoff*sd(control.temp, na.rm=T)
- high.outliers<-which(control.temp>high.cutoff)
- low.outliers<-which(control.temp<low.cutoff)
- control.temp[high.outliers]<-NA
- control.temp[low.outliers]<-NA
- high.cutoff<-mean(treatment.temp, na.rm=T)+outlier.bias.high*outlier.cutoff*sd(treatment.temp, na.rm=T)
- low.cutoff<-mean(treatment.temp, na.rm=T)-outlier.bias.low*outlier.cutoff*sd(treatment.temp, na.rm=T)
- high.outliers<-which(treatment.temp>high.cutoff)
- low.outliers<-which(treatment.temp<low.cutoff)
- treatment.temp[high.outliers]<-NA
- treatment.temp[low.outliers]<-NA
- }
- if(outlier.method=="MAD"){
- high.cutoff<-median(control.temp, na.rm=T)+outlier.cutoff*mad(control.temp, na.rm=T)
- low.cutoff<-median(control.temp, na.rm=T)-outlier.cutoff*mad(control.temp, na.rm=T)
- high.outliers<-which(control.temp>high.cutoff)
- low.outliers<-which(control.temp<low.cutoff)
- control.temp[high.outliers]<-NA
- control.temp[low.outliers]<-NA
- high.cutoff<-median(treatment.temp, na.rm=T)+outlier.bias.high*outlier.cutoff*mad(treatment.temp, na.rm=T)
- low.cutoff<-median(treatment.temp, na.rm=T)-outlier.bias.low*outlier.cutoff*mad(treatment.temp, na.rm=T)
- high.outliers<-which(treatment.temp>high.cutoff)
- low.outliers<-which(treatment.temp<low.cutoff)
- treatment.temp[high.outliers]<-NA
- treatment.temp[low.outliers]<-NA
- }
- if(
- length(treatment.temp[which(!is.na(treatment.temp))]) > (n.cutoff - 1) &&
- length(control.temp[which(!is.na(control.temp))]) > (n.cutoff - 1)
- ){
- pval1<-t.test(treatment.temp,control.temp)$p.value
- pval2<-wilcox.test(treatment.temp,control.temp)$p.value
- pval<-min(pval1,pval2)
- control<-control.temp
- treatment<-treatment.temp
- #Replace Outliers for Small N
- }else{
- while(
- length(treatment.temp[which(!is.na(treatment.temp))]) < n.cutoff &&
- length(control.temp[which(!is.na(control.temp))]) < n.cutoff
- ){
- if(bimodal.treatment.only==T){
- random.sample<-rep(1,length(which(is.na(control.temp))))
- }else{
- random.sample<-bimodal.pop[runif(length(which(is.na(control.temp))),1,1000)]
- }
- control.temp[which(is.na(control.temp))]<-c(rnormp2(length(which(random.sample==1)),
- 0,control.sd1,control.shape1),
- rnormp2(length(which(random.sample==0)),
- 0+bimodal.offset,control.sd2,control.shape2))
- random.sample<-bimodal.pop[runif(length(which(is.na(treatment.temp))),1,1000)]
- treatment.temp[which(is.na(treatment.temp))]<-c(rnormp2(length(which(random.sample==1)),
- 0+effect,treatment.sd1,treatment.shape1),
- rnormp2(length(which(random.sample==0)),
- 0+effect+bimodal.offset,treatment.sd2,treatment.shape2))
- if(outlier.method=="SD"){
- high.cutoff<-mean(control.temp, na.rm=T)+outlier.cutoff*sd(control.temp, na.rm=T)
- low.cutoff<-mean(control.temp, na.rm=T)-outlier.cutoff*sd(control.temp, na.rm=T)
- high.outliers<-which(control.temp>high.cutoff)
- low.outliers<-which(control.temp<low.cutoff)
- control.temp[high.outliers]<-NA
- control.temp[low.outliers]<-NA
- high.cutoff<-mean(treatment.temp, na.rm=T)+outlier.bias.high*outlier.cutoff*sd(treatment.temp, na.rm=T)
- low.cutoff<-mean(treatment.temp, na.rm=T)-outlier.bias.low*outlier.cutoff*sd(treatment.temp, na.rm=T)
- high.outliers<-which(treatment.temp>high.cutoff)
- low.outliers<-which(treatment.temp<low.cutoff)
- treatment.temp[high.outliers]<-NA
- treatment.temp[low.outliers]<-NA
- }
- if(outlier.method=="MAD"){
- high.cutoff<-median(control.temp, na.rm=T)+outlier.cutoff*mad(control.temp, na.rm=T)
- low.cutoff<-median(control.temp, na.rm=T)-outlier.cutoff*mad(control.temp, na.rm=T)
- high.outliers<-which(control.temp>high.cutoff)
- low.outliers<-which(control.temp<low.cutoff)
- control.temp[high.outliers]<-NA
- control.temp[low.outliers]<-NA
- high.cutoff<-median(treatment.temp, na.rm=T)+outlier.bias.high*outlier.cutoff*mad(treatment.temp, na.rm=T)
- low.cutoff<-median(treatment.temp, na.rm=T)-outlier.bias.low*outlier.cutoff*mad(treatment.temp, na.rm=T)
- high.outliers<-which(treatment.temp>high.cutoff)
- low.outliers<-which(treatment.temp<low.cutoff)
- treatment.temp[high.outliers]<-NA
- treatment.temp[low.outliers]<-NA
- }
- } #end while sample size too small loop
- pval1<-t.test(treatment.temp,control.temp)$p.value
- pval2<-wilcox.test(treatment.temp,control.temp)$p.value
- pval<-min(pval1,pval2)
- control<-control.temp
- treatment<-treatment.temp
- } # end replace pvals
- } # end if pval...
- } #end drop outliers
- # Do if U-test==F
- }else{
- pval<-t.test(treatment,control)$p.value
- #Drop Outliers
- if(drop.outliers==T){
- if(pval>pvalue){
- control.temp<-control
- treatment.temp<-treatment
- if(outlier.method=="SD"){
- high.cutoff<-mean(control.temp, na.rm=T)+outlier.cutoff*sd(control.temp, na.rm=T)
- low.cutoff<-mean(control.temp, na.rm=T)-outlier.cutoff*sd(control.temp, na.rm=T)
- high.outliers<-which(control.temp>high.cutoff)
- low.outliers<-which(control.temp<low.cutoff)
- control.temp[high.outliers]<-NA
- control.temp[low.outliers]<-NA
- high.cutoff<-mean(treatment.temp, na.rm=T)+outlier.bias.high*outlier.cutoff*sd(treatment.temp, na.rm=T)
- low.cutoff<-mean(treatment.temp, na.rm=T)-outlier.bias.low*outlier.cutoff*sd(treatment.temp, na.rm=T)
- high.outliers<-which(treatment.temp>high.cutoff)
- low.outliers<-which(treatment.temp<low.cutoff)
- treatment.temp[high.outliers]<-NA
- treatment.temp[low.outliers]<-NA
- }
- if(outlier.method=="MAD"){
- high.cutoff<-median(control.temp, na.rm=T)+outlier.cutoff*mad(control.temp, na.rm=T)
- low.cutoff<-median(control.temp, na.rm=T)-outlier.cutoff*mad(control.temp, na.rm=T)
- high.outliers<-which(control.temp>high.cutoff)
- low.outliers<-which(control.temp<low.cutoff)
- control.temp[high.outliers]<-NA
- control.temp[low.outliers]<-NA
- high.cutoff<-median(treatment.temp, na.rm=T)+outlier.bias.high*outlier.cutoff*mad(treatment.temp, na.rm=T)
- low.cutoff<-median(treatment.temp, na.rm=T)-outlier.bias.low*outlier.cutoff*mad(treatment.temp, na.rm=T)
- high.outliers<-which(treatment.temp>high.cutoff)
- low.outliers<-which(treatment.temp<low.cutoff)
- treatment.temp[high.outliers]<-NA
- treatment.temp[low.outliers]<-NA
- }
- if(
- length(treatment.temp[which(!is.na(treatment.temp))]) > (n.cutoff - 1) &&
- length(control.temp[which(!is.na(control.temp))]) > (n.cutoff - 1)
- ){
- pval<-t.test(control.temp,treatment.temp)$p.value
- control<-control.temp
- treatment<-treatment.temp
- #Replace Outliers for Small N
- }else{
- while(
- length(treatment.temp[which(!is.na(treatment.temp))]) < n.cutoff &&
- length(control.temp[which(!is.na(control.temp))]) < n.cutoff
- ){
- if(bimodal.treatment.only==T){
- random.sample<-rep(1,length(which(is.na(control.temp))))
- }else{
- random.sample<-bimodal.pop[runif(length(which(is.na(control.temp))),1,1000)]
- }
- control.temp[which(is.na(control.temp))]<-c(rnormp2(length(which(random.sample==1)),
- 0,control.sd1,control.shape1),
- rnormp2(length(which(random.sample==0)),
- 0+bimodal.offset,control.sd2,control.shape2))
- random.sample<-bimodal.pop[runif(length(which(is.na(treatment.temp))),1,1000)]
- treatment.temp[which(is.na(treatment.temp))]<-c(rnormp2(length(which(random.sample==1)),
- 0+effect,treatment.sd1,treatment.shape1),
- rnormp2(length(which(random.sample==0)),
- 0+effect+bimodal.offset,treatment.sd2,treatment.shape2))
- if(outlier.method=="SD"){
- high.cutoff<-mean(control.temp, na.rm=T)+outlier.cutoff*sd(control.temp, na.rm=T)
- low.cutoff<-mean(control.temp, na.rm=T)-outlier.cutoff*sd(control.temp, na.rm=T)
- high.outliers<-which(control.temp>high.cutoff)
- low.outliers<-which(control.temp<low.cutoff)
- control.temp[high.outliers]<-NA
- control.temp[low.outliers]<-NA
- high.cutoff<-mean(treatment.temp, na.rm=T)+outlier.bias.high*outlier.cutoff*sd(treatment.temp, na.rm=T)
- low.cutoff<-mean(treatment.temp, na.rm=T)-outlier.bias.low*outlier.cutoff*sd(treatment.temp, na.rm=T)
- high.outliers<-which(treatment.temp>high.cutoff)
- low.outliers<-which(treatment.temp<low.cutoff)
- treatment.temp[high.outliers]<-NA
- treatment.temp[low.outliers]<-NA
- }
- if(outlier.method=="MAD"){
- high.cutoff<-median(control.temp, na.rm=T)+outlier.cutoff*mad(control.temp, na.rm=T)
- low.cutoff<-median(control.temp, na.rm=T)-outlier.cutoff*mad(control.temp, na.rm=T)
- high.outliers<-which(control.temp>high.cutoff)
- low.outliers<-which(control.temp<low.cutoff)
- control.temp[high.outliers]<-NA
- control.temp[low.outliers]<-NA
- high.cutoff<-median(treatment.temp, na.rm=T)+outlier.bias.high*outlier.cutoff*mad(treatment.temp, na.rm=T)
- low.cutoff<-median(treatment.temp, na.rm=T)-outlier.bias.low*outlier.cutoff*mad(treatment.temp, na.rm=T)
- high.outliers<-which(treatment.temp>high.cutoff)
- low.outliers<-which(treatment.temp<low.cutoff)
- treatment.temp[high.outliers]<-NA
- treatment.temp[low.outliers]<-NA
- }
- } #end while sample size too small loop
- pval<-t.test(treatment.temp,control.temp)$p.value
- control<-control.temp
- treatment<-treatment.temp
- } # end replace pvals
- }# end if pval>pvalue
- } #end drop outliers
- }#end If U-test==F
- measured.effect=mean(treatment, na.rm=T)-mean(control, na.rm=T)
- test.result<-cbind(pval,measured.effect)
- test.results<-rbind(test.results,test.result)
- }
- pval<-min(test.results[,1])
- measured.effect<-test.results[which(test.results[,1]==min(test.results[,1]))[1],2]
- sample.result<-cbind(sample.result,r, x,pval,measured.effect)
- run.result<-rbind(run.result,sample.result)
- ##Update Live Plot
- if(live.plot==T){
- lines(run.result[,2],run.result[,3], type="l", lwd=3, col=rainbow(runs)[r],
- xlim=c(initial.samp.size,max.samps+initial.samp.size),ylim=c(0,1)
- )
- if(live.plot.in.own.window==T && x==initial.samp.size && r==1){
- dev.new()
- plot(initial.samp.size,1, type="l", lwd=2, col="White",
- xlim=c(initial.samp.size,(max.samps+samp.interval)), xlab="Sample Size (per group)",
- ylim=c(0,1), ylab="P-value",
- main=c(paste("Representative P-value Traces (Significant: P <",pvalue,")"),
- paste("Outlier Cutoff(SDs) =",round(outlier.cutoff,2),
- ", Outlier Bias (High,Low) =", "(", round(outlier.bias.high,2), ",", round(outlier.bias.low,2), ")"),
- paste("# of Outcomes=", number.of.outcomes)
- ),
- sub=paste("Sequential Sampling=", sequential, ", U-test=", also.perform.Utest)
- )
- abline(h=pvalue, lwd=2)
- lines(run.result[,2],run.result[,3], type="l", lwd=3, col=rainbow(runs)[r],
- xlim=c(initial.samp.size,max.samps+initial.samp.size),ylim=c(0,1)
- )
- dev.set(which=dev.prev())
- }
- if(live.plot.in.own.window==T && x*r>initial.samp.size){
- dev.set(which=dev.next())
- lines(run.result[,2],run.result[,3], type="l", lwd=3, col=rainbow(runs)[r],
- xlim=c(initial.samp.size,max.samps+initial.samp.size),ylim=c(0,1)
- )
- dev.set(which=dev.prev())
- }
- # End Update Live Plot
- Sys.sleep(sleep.time)
- }
- #Generate New Samples
- if(sequential==T){
- ##Take Sequential Samples##
- for(t in 1:number.of.outcomes){
- #Control
- if(bimodal.treatment.only==T){
- random.sample<-rep(1,samp.interval)
- }else{
- random.sample<-bimodal.pop[runif(samp.interval,1,1000)]
- }
- control<-append(experiment.results[[t]][1,],c(rnormp2(length(which(random.sample==1)),
- 0,control.sd1,control.shape1),
- rnormp2(length(which(random.sample==0)),
- 0+bimodal.offset,control.sd2,control.shape2)
- ))
- #Treatment
- random.sample<-bimodal.pop[runif(samp.interval,1,1000)]
- treatment<-append(experiment.results[[t]][2,],c(rnormp2(length(which(random.sample==1)),
- 0+effect,treatment.sd1,treatment.shape1),
- rnormp2(length(which(random.sample==0)),
- 0+effect+bimodal.offset,treatment.sd2,treatment.shape2)
- ))
- experiment.result<-rbind(control,treatment)
- experiment.results[[t]]<-experiment.result
- }#End Sequential
- }else{
- ##Take Independant Samples (non sequential)##
- experiment.results=vector("list", number.of.outcomes)
- for(t in 1:number.of.outcomes){
- #Control
- if(bimodal.treatment.only==T){
- random.sample<-rep(1,x)
- }else{
- random.sample<-bimodal.pop[runif(x,1,1000)]
- }
- control<-c(rnormp2(length(which(random.sample==1)),
- 0,control.sd1,control.shape1),
- rnormp2(length(which(random.sample==0)),
- 0+bimodal.offset,control.sd2,control.shape2)
- )
- #Treatment
- random.sample<-bimodal.pop[runif(x,1,1000)]
- treatment<-c(rnormp2(length(which(random.sample==1)),
- 0+effect,treatment.sd1,treatment.shape1),
- rnormp2(length(which(random.sample==0)),
- 0+effect+bimodal.offset,treatment.sd2,treatment.shape2)
- )
- experiment.result<-rbind(control,treatment)
- experiment.results[[t]]<-experiment.result
- } #End independant
- } #End sampling
- setTxtProgressBar(pb, x + max.samps*(r-1))
- x<-x+samp.interval
- } # End Run
- all.results<-rbind(all.results,run.result)
- if(live.plot==F && r==runs){
- colnum<-1
- for(k in (runs-9):runs){
- xvar<-all.results[which(all.results[,1]==k),2]
- yvar<-all.results[which(all.results[,1]==k),3]
- lines(xvar,yvar, type="l", lwd=3, col=rainbow(10)[colnum],
- xlim=c(initial.samp.size,max.samps+initial.samp.size),ylim=c(0,1)
- )
- colnum<-colnum+1
- }
- }
- } # End Simulation
- close(pb)
- ###Extract Significant Results###
- rownames(all.results)<-seq(1,nrow(all.results))
- sigs<-which(all.results[,3]<pvalue)
- percent.sigs<-100*hist(all.results[sigs,2], plot=F,
- breaks=seq(initial.samp.size-1, max.samps+2*samp.interval,by=(samp.interval))
- )$counts/runs
- ###Extract First Significant Result for each Simulation Run###
- first.sigs=NULL
- first.sigs.filtered=NULL
- for(i in 1:runs){
- temp<-all.results[which(all.results[,1]==i),]
- if(length(temp[which(temp[,3]<pvalue),2])==0){
- first.sig<-Inf
- }else{
- first.sig<-min(temp[which(temp[,3]<pvalue),2])
- }
- first.sigs<-rbind(first.sigs,first.sig)
- }
- first.sigs.filtered<-first.sigs[which(first.sigs!=Inf),]
- percent.atLeastOne.sig<-100*cumsum(
- hist(first.sigs.filtered,
- breaks=seq(initial.samp.size-1, max.samps+2*samp.interval,by=(samp.interval)),
- plot=F
- )$counts
- )/runs
- ###Create Summary Plots###
- if(bimodal.treatment.only==T){
- plot(all.results[sigs,2],all.results[sigs,4],
- xlab="Sample Size (per group)",xlim=c(0, max.samps+samp.interval),
- ylab="Measured Effect",
- ylim=c(min(c(-1,all.results[sigs,4]),na.rm=T),max(c(1,all.results[sigs,4]),na.rm=T)),
- main=c("Measured Effect Sizes of Significant Results",
- paste("Significance Level: P <", pvalue),
- paste("True Difference=", round(effect,2), "+",
- round(bimodal.offset,2), "for", round(100*smaller.bimodal.proportion,2), "% of Treatment Group")),
- col="Cyan")
- abline(h=effect, lwd=2)
- abline(h=bimodal.offset, lwd=1, lty=2)
- }else{
- plot(all.results[sigs,2],all.results[sigs,4],
- xlab="Sample Size (per group)",xlim=c(0, max.samps+samp.interval),
- ylab="Measured Effect", ylim=c(min(c(-1,all.results[sigs,4]),na.rm=T),max(c(1,all.results[sigs,4]),na.rm=T)),
- main=c("Measured Effect Sizes of Significant Results",
- paste("Significance Level: P <", pvalue),
- paste("True Difference=", effect)),
- col="Cyan")
- abline(h=effect, lwd=2)
- }
- pval.max.percents=NULL
- for(s in 1:length(unique(all.results[,2]))){
- size<-unique(all.results[,2])[s]
- temp<-max(100*hist(all.results[which(all.results[,2]==size),3],
- breaks=seq(0,1,by=.005),
- plot=F)$counts/runs)
- pval.max.percents<-rbind(pval.max.percents,temp)
- }
- ymax<-min(1.5*max(pval.max.percents),100)
- Sys.sleep(.1)
- plot(seq(0,.995,by=.005),
- 100*hist(all.results[which(all.results[,2]==initial.samp.size),3],
- breaks=seq(0,1,by=.005),
- plot=F)$counts/runs,
- ylab="Percent of P-values", ylim=c(0,ymax),
- xlab="P-value", xlim=c(0,1),
- type="l", col=rainbow(length(unique(all.results[,2])))[1],
- main="Distribution of All P-values"
- )
- for(s in 2:length(unique(all.results[,2]))){
- size<-unique(all.results[,2])[s]
- lines(seq(0,.995,by=.005),
- 100*hist(all.results[which(all.results[,2]==size),3],
- breaks=seq(0,1,by=.005),
- plot=F)$counts/runs,
- type="l",col=rainbow(length(unique(all.results[,2])))[s]
- )
- }
- abline(h=100/length(seq(0,.995,by=.005)))
- image.plot(matrix(seq(initial.samp.size,max.samps,by=samp.interval)),
- col=rainbow(length(unique(all.results[,2]))),
- smallplot=c(.6,.85,.7,.75),
- legend.only=T, horizontal=T, legend.shrink=.5, add=T,
- legend.args=list(text="Sample Size", side=3, line=.2, font=2, cex=.5, bty="o", bg="White"),
- axis.args=list(
- at=c(initial.samp.size,max.samps),
- labels=as.character(c(initial.samp.size,max.samps
- )
- )
- )
- )
- xsamps<-unique(all.results[,2])
- if(sequential==T){
- plot(xsamps,
- percent.atLeastOne.sig[1:length(xsamps)],
- ylab=expression("Pr(# Significant)">=1), ylim=c(0,max(percent.atLeastOne.sig)+10),
- xlab="Sample Size (per Group)",xlim=c(0, max.samps+samp.interval),
- type="s", col="Cyan", lwd=5,
- main=c("Probability of at Least 1 Significant Result",
- paste("Number of Simulations=",runs),
- paste("Initial Sample Size=",initial.samp.size, " Sampling Interval=",samp.interval)
- )
- )
- abline(h=seq(0,max(percent.atLeastOne.sig)+10,by=10),
- lwd=1, col=rgb(0,0,0,.5))
- abline(v=seq(0,max.samps+initial.samp.size,by=10), lwd=1, col=rgb(0,0,0,.5))
- abline(h=pvalue*100,
- lwd=2, col=rgb(0,0,0,1))
- }else{
- plot(xsamps,
- percent.sigs[1:length(xsamps)],
- ylim=c(0,max(percent.sigs)+10), ylab="% Significant Results",
- xlim=c(0, max.samps+samp.interval), xlab="Sample Size (per Group)",
- type="l", lwd=4, col="Cyan",
- main=c("Percent of Significant Results for Different Sample Sizes",
- paste("Number of Simulations=",runs),
- paste("Initial Sample Size=",initial.samp.size, " Sampling Interval=",samp.interval)
- )
- )
- abline(h=seq(0,max(percent.sigs)+10,by=10),
- lwd=1, col=rgb(0,0,0,.5))
- abline(v=seq(0,max.samps+initial.samp.size,by=10), lwd=1, col=rgb(0,0,0,.5))
- abline(h=pvalue*100, lwd=2)
- }
- Sys.sleep(.1)
- plot(seq(0,.995,by=.005),
- 100*hist(all.results[which(all.results[,2]==initial.samp.size),3],
- breaks=seq(0,1,by=.005),
- plot=F)$counts/runs,
- ylab="Percent of P-values", ylim=c(0,ymax),
- xlab="P-value", xlim=c(0,pvalue),
- type="l", col=rainbow(length(unique(all.results[,2])))[1],
- main="Distribution of Significant P-values"
- )
- for(s in 2:length(unique(all.results[,2]))){
- size<-unique(all.results[,2])[s]
- lines(seq(0,.995,by=.005),
- 100*hist(all.results[which(all.results[,2]==size),3],
- breaks=seq(0,1,by=.005),
- plot=F)$counts/runs,
- type="l",col=rainbow(length(unique(all.results[,2])))[s]
- )
- }
- abline(h=100/length(seq(0,.995,by=.005)))
- image.plot(matrix(seq(initial.samp.size,max.samps,by=samp.interval)),
- col=rainbow(length(unique(all.results[,2]))),
- smallplot=c(.6,.85,.7,.75),
- legend.only=T, horizontal=T, legend.shrink=.3, add=T,
- legend.args=list(text="Sample Size", side=3, line=.2, font=2, cex=.5),
- axis.args=list(
- at=c(initial.samp.size,max.samps),
- labels=as.character(c(initial.samp.size,max.samps
- )
- )
- )
- )
- } #End Script
- if(batchmode==T){
- effect.info=NULL
- for(samp.point in 1:length(sort(unique(all.results[sigs,2])))){
- sp<-sort(unique(all.results[sigs,2]))[samp.point]
- temp<-all.results[sigs,4][which(all.results[sigs,2]==sp)]
- effect.temp2<-cbind(
- max.pos<-max(temp[which(temp>0)]),
- mean.pos<-mean(temp[which(temp>0)]),
- min.pos<-min(temp[which(temp>0)]),
- max.neg<-max(temp[which(temp<0)]),
- mean.neg<-mean(temp[which(temp<0)]),
- min.neg<-min(temp[which(temp<0)])
- )
- effect.info<-rbind(effect.info,effect.temp2)
- }# End Batch
- batch.result<-cbind(b,
- sort(unique(all.results[sigs,2])),
- percent.sigs[1:length(xsamps)],
- percent.atLeastOne.sig[1:length(xsamps)],
- effect.info
- )
- colnames(batch.result)<-c("Batch", "Samp.Size", "Percent.Sig",
- "Percent.atLeastOne.sig","max.pos","mean.pos","min.pos",
- "max.neg","mean.neg","min.neg")
- batch.results<-rbind(batch.results,batch.result)
- sim.params<-cbind(rep(b,12),
- rbind(
- treatment.sd1,
- treatment.shape1,
- treatment.sd2,
- treatment.shape2,
- control.sd1,
- control.shape1,
- control.sd2,
- control.shape2,
- larger.bimodal.proportion,
- bimodal.offset,
- bimodal.treatment.only,
- sequential,
- also.perform.Utest,
- number.of.outcomes
- )
- )
- batch.params<-rbind(batch.params,sim.params)
- }
- print(paste("Batch #", b, " of", batch.size))
- }
- if(batchmode==T){
- one.sig=NULL
- for (i in 1:length(unique(batch.results[,2]))){
- temp1<-unique(batch.results[,2])[i]
- temp2<-batch.results[which(batch.results[,2]==temp1),]
- temp3<-cbind(temp1,
- temp2[which(temp2[,4]==max(temp2[,4])),1],
- temp2[which(temp2[,4]==max(temp2[,4])),4]
- )
- one.sig<-rbind(one.sig,temp3)
- }
- colnames(one.sig)<-c("Samp.Size", "Batch", "Percent.atleastOne.Significant")
- rownames(one.sig)<-1:nrow(one.sig)
- sampsize.sig=NULL
- for (i in 1:length(unique(batch.results[,2]))){
- temp1<-unique(batch.results[,2])[i]
- temp2<-batch.results[which(batch.results[,2]==temp1),]
- temp3<-cbind(temp1,
- temp2[which(temp2[,3]==max(temp2[,3])),1],
- temp2[which(temp2[,3]==max(temp2[,3])),3]
- )
- sampsize.sig<-rbind(sampsize.sig,temp3)
- }
- colnames(sampsize.sig)<-c("Samp.Size", "Batch", "Percent.Sig")
- rownames(sampsize.sig)<-1:nrow(sampsize.sig)
- one.sig
- sampsize.sig
- batch.params[which(batch.params[,1]==90),]
- } # End Batches
- verify=F
- if(verify==T){
- #control<-c(rnormp2(larger.bimodal.proportion*10000,0,control.sd1,control.shape1),
- #rnormp2(smaller.bimodal.proportion*10000,0+bimodal.offset,control.sd2,control.shape2))
- #treatment<-c(rnormp2(larger.bimodal.proportion*10000,0+effect,treatment.sd1,treatment.shape1),
- #rnormp2(smaller.bimodal.proportion*10000,0+effect+bimodal.offset,treatment.sd2,treatment.shape2))
- #hist(control)
- require(sciplot)
- effect=0.1
- nsamp=1000
- drop.outliers=T
- outlier.cutoff<-2
- out=NULL
- for(i in 1:nsamp){
- dev.new()
- par(mfrow=c(1,2))
- #a<-c(rnorm(8,10,1),rnorm(2,12,1))
- #b<-c(rnorm(8,10,1),rnorm(2,12,1))
- random.sample<-bimodal.pop[runif(initial.samp.size,1,1000)]
- random.sample
- control<-c(rnormp2(length(which(random.sample==1)),0,control.sd1,control.shape1),
- rnormp2(length(which(random.sample==0)),0+bimodal.offset,control.sd2,control.shape2))
- random.sample<-bimodal.pop[runif(initial.samp.size,1,1000)]
- random.sample
- treatment<-c(rnormp2(length(which(random.sample==1)),0+effect,treatment.sd1,treatment.shape1),
- rnormp2(length(which(random.sample==0)),0+effect+bimodal.offset,treatment.sd2,treatment.shape2))
- pval<-t.test(control,treatment)$p.value
- if(drop.outliers==T){
- if(pval>pvalue){
- control.temp<-control
- treatment.temp<-treatment
- high.cutoff<-mean(control.temp)+outlier.cutoff*sd(control.temp)
- low.cutoff<-mean(control.temp)-outlier.cutoff*sd(control.temp)
- high.outliers<-which(control.temp>high.cutoff)
- low.outliers<-which(control.temp<low.cutoff)
- control.temp[high.outliers]<-NA
- control.temp[low.outliers]<-NA
- high.cutoff<-mean(treatment.temp)+outlier.cutoff*sd(treatment.temp)
- low.cutoff<-mean(treatment.temp)-outlier.cutoff*sd(treatment.temp)
- high.outliers<-which(treatment.temp>high.cutoff)
- low.outliers<-which(treatment.temp<low.cutoff)
- treatment.temp[high.outliers]<-NA
- treatment.temp[low.outliers]<-NA
- if(
- length(treatment.temp[which(!is.na(treatment.temp))])>1 &&
- length(control.temp[which(!is.na(control.temp))])>1
- ){
- pval<-t.test(control.temp,treatment.temp)$p.value
- control<-control.temp
- treatment<-treatment.temp
- }
- }
- } #end drop outliers
- adjust=6
- a<-control+adjust
- b<-treatment+adjust
- boxplot(a,b,col=c("Blue","Red"),ylim=c(0,3*adjust))
- stripchart(a, add=T, at=1,
- ylim=c(0,3*adjust),
- vertical=T, pch=16)
- stripchart(b, add=T, at=2,
- vertical=T, pch=16)
- x.fact<-c(rep("a",length(a)),rep("b",length(b)))
- vals<-c(a,b)
- bargraph.CI(x.fact,vals, col=c("Blue","Red"), ylim=c(0,3*adjust),
- main=round(t.test(a,b)$p.value,3)
- )
- control
- treatment
- out<-rbind(out,t.test(a,b)$p.value)
- Sys.sleep(.5)
- dev.off()
- }
- length(which(out<pvalue))/nsamp
- }
- #one.sig
- #sampsize.sig
- #batch.results
- #batch.params[which(batch.params[,1]==40),]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement