Untitled

library(scales)
library(vcdExtra)
library(data.table)
library(MASS)
library(lme4)
library(questionr)
library(plyr)
library(dplyr)
library(parallel)
library(Rlab)
library(gtools)
library(sjstats)

#mediation simulation ####
start.time = Sys.time()

#calculate the number of cores
no_cores = detectCores() - 1

#initiate cluster
cl = makeCluster(no_cores, type = "FORK")

mediationSim = function() {

  #draw number of departments
  d = sample(c(1000,1100,1200,1300,1400,1500), size = 1)
  dept = 1:d

  #draw number of obs per department
  s = sample(seq(10,100, by = 2), size = d, replace = T)
  j = s/2
  x = NULL
  for (i in j){
    x_prob = runif(1)
    xtemp = rbern(i, x_prob)
    x = c(x, xtemp)
  }

  #combine variables into a data frame
  s = as.data.frame(s)
  dept = as.data.frame(dept)
  dept = dept[rep(row.names(dept), s$s), 1] #repeat the letter for each department based on number of observations in each department (s)
  dat = data.table(dept, x)

  #compute group means for the x variable
  x_group_mean = dat[, list(x_group_mean = mean(x)), by = "dept"]
  dat = merge(dat, x_group_mean, by = "dept")

  #draw random intercept for path a
  dat$a_int = rep(rnorm(d, mean = 0, sd = 2), times = j*2)

  #draw random slope for path a
  WG_a = runif(1) #true within group effect
  dat$a_slope = rep(mapply(rnorm, n = d, mean = WG_a, sd = .5), times = j*2)

  #between group effect for path a
  BG_a = runif(1)

  #create mediator variable
  dat$m_prob = 1 / (1 + exp(-(dat$a_int + BG_a*dat$x_group_mean + dat$a_slope*dat$x)))
  dat$m = rbern(n=length(dat$dept), prob=dat$m_prob)

  #compute group means for the m variable
  m_group_mean = dat[, list(m_group_mean = mean(m)), by = "dept"]
  dat = merge(dat, m_group_mean, by = "dept")

  #create random intercept for path b
  dat$b_int = rep(rnorm(d, mean = 0, sd = 2), times = j*2)

  #draw random slope for path b
  WG_b = runif(1)
  dat$b_slope = rep(mapply(rnorm, n = d, mean = WG_b, sd = .5), times = j*2)

  #between group effect for path b
  BG_b = runif(1)

  #path c slope
  WG_c = runif(1)

  #create y variable
  dat$y_prob = 1 / (1 + exp(-(dat$b_int + BG_b*dat$m_group_mean + dat$b_slope*dat$m + WG_c*dat$x)))
  dat$y = rbern(n=length(dat$dept), prob=dat$y_prob)

  #compute grand means
  dat$x_grand_mean = mean(dat$x)
  dat$m_grand_mean = mean(dat$m)

  #create gmc variables
  dat$x_gmc = dat$x - dat$x_grand_mean
  dat$m_gmc = dat$m - dat$m_grand_mean

  #create cwc variables
  dat$x_cwc = dat$x - dat$x_group_mean
  dat$m_cwc = dat$m - dat$m_group_mean

  #ICC ####
  icc1_m = glmer(m ~ (1|dept), data = dat, family = "binomial")
  #icc1 = as.data.frame(VarCorr(icc1_m))$vcov / (as.data.frame(VarCorr(icc1_m))$vcov + (pi^2/3))
  icc1 = as.data.frame(icc(icc1_m))[1,1]

  icc2_m = glmer(y ~ (1|dept), data = dat, family = "binomial")
  icc2 = as.data.frame(icc(icc2_m))[1,1]

  #CWC ####
  #estimate model for path a
  path_a_cwc = summary(glmer(m ~ x_cwc + x_group_mean + (1+x_cwc|dept), data = dat, family = "binomial"))
  WG_a_est_cwc = summary(path_a_cwc)$coef[2,1]
  WG_a_SE_cwc = summary(path_a_cwc)$coef[2,2]
  #WG_a_recov_cwc = ifelse(WG_a < WG_a_est_cwc+1.96*WG_a_SE_cwc & WG_a > WG_a_est_cwc-1.96*WG_a_SE_cwc, 1, 0)

  BG_a_est_cwc = summary(path_a_cwc)$coef[3,1]
  BG_a_SE_cwc = summary(path_a_cwc)$coef[3,2]
  #BG_a_recov_cwc = ifelse(BG_a < BG_a_est_cwc+1.96*BG_a_SE_cwc & BG_a > BG_a_est_cwc-1.96*BG_a_SE_cwc, 1, 0)

  #estimate model for path b + c'
  path_b_cwc = glmer(y ~ x_cwc + m_cwc + m_group_mean + (1+m_cwc|dept), data = dat, family = "binomial")
  WG_b_est_cwc = summary(path_b_cwc)$coef[3,1]
  WG_b_SE_cwc = summary(path_b_cwc)$coef[3,2]
  #WG_b_recov_cwc = ifelse(WG_b < WG_b_est_cwc+1.96*WG_b_SE_cwc & WG_b > WG_b_est_cwc-1.96*WG_b_SE_cwc, 1, 0)

  BG_b_est_cwc = summary(path_b_cwc)$coef[4,1]
  BG_b_SE_cwc = summary(path_b_cwc)$coef[4,2]
  #BG_b_recov_cwc = ifelse(BG_b < BG_b_est_cwc+1.96*BG_b_SE_cwc & BG_b > BG_b_est_cwc-1.96*BG_b_SE_cwc, 1, 0)

  WG_c_est_cwc = summary(path_b_cwc)$coef[2,1]
  WG_c_SE_cwc = summary(path_b_cwc)$coef[2,2]
  #WG_c_recov_cwc = ifelse(WG_c < WG_c_est_cwc+1.96*WG_c_SE_cwc & WG_c > WG_c_est_cwc-1.96*WG_c_SE_cwc, 1, 0)

  #GMC ####
  #estimate model for path a
  path_a_gmc = summary(glmer(m ~ x_gmc + x_group_mean + (1+x_gmc|dept), data = dat, family = "binomial"))
  WG_a_est_gmc = summary(path_a_gmc)$coef[2,1]
  WG_a_SE_gmc = summary(path_a_gmc)$coef[2,2]
  #WG_a_recov_gmc = ifelse(WG_a < WG_a_est_gmc+1.96*WG_a_SE_gmc & WG_a > WG_a_est_gmc-1.96*WG_a_SE_gmc, 1, 0)

  BG_a_est_gmc = summary(path_a_gmc)$coef[3,1]
  BG_a_SE_gmc = summary(path_a_gmc)$coef[3,2]
  #BG_a_recov_gmc = ifelse(BG_a < BG_a_est_gmc+1.96*BG_a_SE_gmc & BG_a > BG_a_est_gmc-1.96*BG_a_SE_gmc, 1, 0)

  #estimate model for path b + c'
  path_b_gmc = glmer(y ~ x_gmc + m_gmc + m_group_mean + (1+m_gmc|dept), data = dat, family = "binomial")
  WG_b_est_gmc = summary(path_b_gmc)$coef[3,1]
  WG_b_SE_gmc = summary(path_b_gmc)$coef[3,2]
  #WG_b_recov_gmc = ifelse(WG_b < WG_b_est_gmc+1.96*WG_b_SE_gmc & WG_b > WG_b_est_gmc-1.96*WG_b_SE_gmc, 1, 0)

  BG_b_est_gmc = summary(path_b_gmc)$coef[4,1]
  BG_b_SE_gmc = summary(path_b_gmc)$coef[4,2]
  #BG_b_recov_gmc = ifelse(BG_b < BG_b_est_gmc+1.96*BG_b_SE_gmc & BG_b > BG_b_est_gmc-1.96*BG_b_SE_gmc, 1, 0)

  WG_c_est_gmc = summary(path_b_gmc)$coef[2,1]
  WG_c_SE_gmc = summary(path_b_gmc)$coef[2,2]
  #WG_c_recov_gmc = ifelse(WG_c < WG_c_est_gmc+1.96*WG_c_SE_gmc & WG_c > WG_c_est_gmc-1.96*WG_c_SE_gmc, 1, 0)

  #no centering ####
  #estimate model for path a
  path_a = summary(glmer(m ~ x + x_group_mean + (1+x|dept), data = dat, family = "binomial"))
  WG_a_est = summary(path_a)$coef[2,1]
  WG_a_SE = summary(path_a)$coef[2,2]
  #WG_a_recov = ifelse(WG_a < WG_a_est+1.96*WG_a_SE & WG_a > WG_a_est-1.96*WG_a_SE, 1, 0)

  BG_a_est = summary(path_a)$coef[3,1]
  BG_a_SE = summary(path_a)$coef[3,2]
  #BG_a_recov = ifelse(BG_a < BG_a_est+1.96*BG_a_SE & BG_a > BG_a_est-1.96*BG_a_SE, 1, 0)

  #estimate model for path b + c'
  path_b = glmer(y ~ x + m + m_group_mean + (1+m|dept), data = dat, family = "binomial")
  WG_b_est = summary(path_b)$coef[3,1]
  WG_b_SE = summary(path_b)$coef[3,2]
  #WG_b_recov = ifelse(WG_b < WG_b_est+1.96*WG_b_SE & WG_b > WG_b_est-1.96*WG_b_SE, 1, 0)

  BG_b_est = summary(path_b)$coef[4,1]
  BG_b_SE = summary(path_b)$coef[4,2]
  #BG_b_recov = ifelse(BG_b < BG_b_est+1.96*BG_b_SE & BG_b > BG_b_est-1.96*BG_b_SE, 1, 0)

  WG_c_est = summary(path_b)$coef[2,1]
  WG_c_SE = summary(path_b)$coef[2,2]
  #WG_c_recov = ifelse(WG_c < WG_c_est+1.96*WG_c_SE & WG_c > WG_c_est-1.96*WG_c_SE, 1, 0)

  res = c(d, icc1, icc2, BG_a, BG_b, WG_a, WG_b, WG_c,
          BG_a_est, BG_a_SE, BG_b_est, BG_b_SE,
          WG_a_est, WG_a_SE, WG_b_est, WG_b_SE, WG_c_est, WG_c_SE,
          BG_a_est_gmc, BG_a_SE_gmc, BG_b_est_gmc, BG_b_SE_gmc,
          WG_a_est_gmc, WG_a_SE_gmc, WG_b_est_gmc, WG_b_SE_gmc, WG_c_est_gmc, WG_c_SE_gmc,
          BG_a_est_cwc, BG_a_SE_cwc, BG_b_est_cwc, BG_b_SE_cwc,
          WG_a_est_cwc, WG_a_SE_cwc, WG_b_est_cwc, WG_b_SE_cwc, WG_c_est_cwc, WG_c_SE_cwc)

  return(res)

}

clusterExport(cl, c("mediationSim", "rbern"))
clusterEvalQ(cl, library(scales))
clusterEvalQ(cl, library(MASS))
clusterEvalQ(cl, library(vcdExtra))
clusterEvalQ(cl, library(data.table))
clusterEvalQ(cl, library(lme4))
clusterEvalQ(cl, library(questionr))
clusterEvalQ(cl, library(plyr))
clusterEvalQ(cl, library(dplyr))
clusterEvalQ(cl, library(Rlab))
clusterEvalQ(cl, library(gtools))
clusterEvalQ(cl, library(sjstats))

mediationres = do.call(rbind, parLapply(cl, 1:5, function(i) mediationSim()))
stopCluster(cl)

end.time = Sys.time()
time.taken = end.time - start.time
time.taken