Untitled

# Collect data for two groups of sample size nDataPerStep.
# Continue adding to the data for up to maxSteps, but stop if a
# t-test returns a p-value less than sig_lvl.
seq_sampling_model = function(maxSteps     = 100,
                              nDataPerStep = 10,
                              sig_lvl      = 0.05){

  a = b = NULL
  p = matrix(nrow = maxSteps)
  for(i in 1:maxSteps){
    a    = c(a, rnorm(nDataPerStep))
    b    = c(b, rnorm(nDataPerStep))
    p[i] = t.test(a, b)$p.value

    if(p[i] < sig_lvl){ break }
  }
  p = p[1:i, ]

  return(list(a = a, b = b, p = p))
}


# Run simulation
set.seed(0)
nSim         = 1e2
maxSteps     = 100
nDataPerStep = 10
sig_lvl      = 0.05

# Save the *last* p-value
res1 = replicate(nSim, tail(seq_sampling_model(maxSteps, nDataPerStep, sig_lvl)$p, 1))

# Save the *lowest* p-value
res2 = replicate(nSim, min(seq_sampling_model(maxSteps, nDataPerStep, sig_lvl)$p))

# Plot the null (no difference between a and b) distribution of p-values
par(mfrow = c(2, 1))
hist(res1, main = paste0("Last: ", round(mean(res1 < sig_lvl), 3), "% significant"))
hist(res2, main = paste0("Min: ",  round(mean(res2 < sig_lvl), 3), "% significant"))