Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #----
- # Project title: time vs. age based models
- #---Project contributors:
- #----Donald Williams
- #----Marilu Isiordia
- #----Erin Winters
- #----Philippe Rast
- # (order does not reflect authorship)
- #----
- #----citation
- # Hoffman, L. (2012). Considering alternative metrics of time:
- # Does anybody really know what "time" is. Advances in longitudinal
- # methods in the social and behavioral sciences. Charlotte,
- # NC: Information Age Publishing, 255-287.
- #----
- library(reshape2)
- library(lme4)
- library(MASS)
- library(parallel)
- library(ggplot2)
- library(hydroGOF)
- #######################################
- ### Simulation for (10.9; pg. 279) ####
- #######################################
- # 1) time based:
- #-----a) balanced design
- #-----b) consecutive years (50, 51, 52,...)
- #-----c) cohort effectrm(list = ls())
- # function to generate data
- # y_time = generated with time as the predictor
- # y_age = generated with age as the predictor
- # function for each subjects' age being unique
- unique_value <- function(waves, age_min, age_max) {
- repeat {
- # returns unduplicated whole numbers
- # given a number of waves
- i <- round(sort(runif(waves, age_min, age_max)), 0)
- x <- sum(duplicated(i))
- # exit if the condition is met
- if (x < 1) break
- }
- return(i)
- }
- # function to generate data
- sim_data <- function(N, waves, age_min, age_max,
- b0, b1, b2, u_int, u_slp, e_var, u_cor, ...){
- # simulate data for 10.5 in Hoffman(2015)
- # Args:
- # N: total number of subjects
- # time_pts: waves
- # age_min: minimum age in study
- # age_max: maximum age in study
- # b0: fixed intercept
- # b1: fixed effect of time
- # (Age - AgeT1)_ij
- # b2: fixed effect of age at T1
- # (AgeT1)_ij
- # u_int: varying ("random") intercept
- # variance
- # u_slp: varying ("random") slope
- # variance
- # u_cor: correlation between u_int & u_slp
- # e_var: residual variance
- # Returns:
- # a data.frame with the columns id, time, age, age_t1, y_age
- # y_time; as well as GM centered variables: time, age, age_t1
- # subject id variable
- id <- rep(1:N, rep(waves, N))
- # temporary holder for age
- holder <- replicate(N, unique_value(waves, age_min, age_max))
- # melt columns into one columns, and select column 3 (age values)
- age <- reshape2::melt(holder)[,3]
- # center age
- c_age <- age - mean(age)
- # age at which each subject entered the study
- age_t1 <- rep(age[seq(1, length(age), by = waves)], each = waves)
- # center age at t1
- c_age_t1 <- age_t1 - mean(age_t1)
- # time variable; time point 1 == 0
- time <- unlist(lapply(unique(id), function(x) seq(sum(id == x)))) - 1
- c_time <- time - mean(time)
- #fixed effects
- #--b0 = intercept
- #--b1 = time/age effect
- #--b2 = T1_age effect
- #random effects
- #--u_int = intercept
- #--u_slp = slope
- #--e_var = residual variation
- re_matrix <- matrix(c(1, u_cor, u_cor, 1), nrow = 2)
- u <- c(sqrt(u_int), sqrt(u_slp))
- re_matrix <- diag(u) %*% re_matrix %*% diag(u)
- eff <- MASS::mvrnorm(N, mu = c(0, 0), Sigma = re_matrix)
- colnames(eff) <- c("ran_int", "ran_slp")
- e_var <- rnorm(length(id), mean = 0, sd = sqrt(e_var))
- # generate outcomes
- y_time <- b0 + b1 * time + b2 * age_t1 + eff[,"ran_slp"][id] * time +
- eff[,"ran_int"][id] + e_var
- y_age <- b0 + b1 * c_age + b2 * age_t1 + eff[,"ran_slp"][id] * c_age +
- eff[,"ran_int"][id] + e_var
- # returned data frame
- data.frame(id, time, c_time, age, c_age, age_t1, c_age_t1, y_time, y_age)
- }
- ##################################
- ## run this code first
- ## N = 50 , 3 waves
- ##################################
- mat <- matrix(nrow = 500, ncol = 2)
- for (i in 1:500) {
- dat <- sim_data(N =50, waves = 3, age_min = 50, age_max = 80,
- b0 = 5, b1 = -.5, b2 = 2, u_int = 75, u_slp = 1, u_cor = 0, e_var = 15)
- options(warn=-1)
- m1 <- lme4::lmer(y_time ~ time + age_t1 + (time|id), dat)
- mat[i,1] <- unlist(VarCorr(m1))[4]
- m2 <- lme4::lmer(y_age ~ c_age + age_t1 + (c_age|id), dat)
- mat[i,2] <- unlist(VarCorr(m2))[4]
- options(warn=0)
- }
- # run the above to seee the bias
- mean(mat[,1])
- mean(mat[,2])
- sd(mat[,1])
- sd(mat[,2])
- mse(mat[,1], rep(1, 500))
- mse(mat[,2], rep(1, 500))
- est <- c(mean(mat[,1]), mean(mat[,2]))
- std <- c(sd(mat[,1]), sd(mat[,2]))
- mse <- c(mse(mat[,1], rep(1, 500)), mse(mat[,2], rep(1, 500)))
- model <- as.factor(c("t_on_t", "a_on_a"))
- df_plot <- data.frame(est, std, mse, model)
- ggplot(df_plot, aes(x = as.factor(model), y = est, group = model)) +
- geom_hline(yintercept = 1.0, color = "red", linetype = 2) +
- geom_point(size = 5) +
- geom_linerange(ymin = est - std, ymax = est + std, size = 1.25) +
- scale_y_continuous(limits = c(0,2.5))+
- theme_classic() +
- ggtitle("N = 50, Waves = 3", "Age range of 30 years,
- 1/15 slope to variance ratio")
- ##################################
- ##################################
- ## run this code next
- ## n = 250 waves = 7
- ##################################
- ##################################
- mat <- matrix(nrow = 500, ncol = 2)
- for (i in 1:500) {
- dat <- sim_data(N =250, waves = 7, age_min = 50, age_max = 80,
- b0 = 5, b1 = -.5, b2 = 2, u_int = 75, u_slp = 1, u_cor = 0, e_var = 15)
- options(warn=-1)
- m1 <- lme4::lmer(y_time ~ time + age_t1 + (time|id), dat)
- mat[i,1] <- unlist(VarCorr(m1))[4]
- m2 <- lme4::lmer(y_age ~ c_age + age_t1 + (c_age|id), dat)
- mat[i,2] <- unlist(VarCorr(m2))[4]
- options(warn=0)
- }
- # run the above to seee the bias
- mean(mat[,1])
- mean(mat[,2])
- sd(mat[,1])
- sd(mat[,2])
- mse(mat[,1], rep(1, 500))
- mse(mat[,2], rep(1, 500))
- est <- c(mean(mat[,1]), mean(mat[,2]))
- std <- c(sd(mat[,1]), sd(mat[,2]))
- mse <- c(mse(mat[,1], rep(1, 500)), mse(mat[,2], rep(1, 500)))
- model <- as.factor(c("t_on_t", "a_on_a"))
- df_plot <- data.frame(est, std, mse, model)
- ggplot(df_plot, aes(x = as.factor(model), y = est, group = model)) +
- geom_hline(yintercept = 1.0, color = "red", linetype = 2)+
- geom_point(size = 5) +
- geom_linerange(ymin = est - std, ymax = est + std, size = 1.25) +
- scale_y_continuous(limits = c(0,2.5))+
- theme_classic() +
- ggtitle("N = 250, Waves = 7", "Age range of 30 years,
- 1/15 slope to variance ratio")
- ##################################
- ##################################
- ## run this code next
- ## n = 1000 waves = 7
- ##################################
- mat <- matrix(nrow = 500, ncol = 2)
- ##################################
- for (i in 1:500) {
- dat <- sim_data(N =1000, waves = 7, age_min = 50, age_max = 80,
- b0 = 5, b1 = -.5, b2 = 2, u_int = 75, u_slp = 1, u_cor = 0, e_var = 15)
- options(warn=-1)
- m1 <- lme4::lmer(y_time ~ time + age_t1 + (time|id), dat)
- mat[i,1] <- unlist(VarCorr(m1))[4]
- m2 <- lme4::lmer(y_age ~ c_age + age_t1 + (c_age|id), dat)
- mat[i,2] <- unlist(VarCorr(m2))[4]
- options(warn=0)
- }
- # run the above to seee the bias
- mean(mat[,1])
- mean(mat[,2])
- sd(mat[,1])
- sd(mat[,2])
- mse(mat[,1], rep(1, 500))
- mse(mat[,2], rep(1, 500))
- est <- c(mean(mat[,1]), mean(mat[,2]))
- std <- c(sd(mat[,1]), sd(mat[,2]))
- mse <- c(mse(mat[,1], rep(1, 500)), mse(mat[,2], rep(1, 500)))
- model <- as.factor(c("t_on_t", "a_on_a"))
- df_plot <- data.frame(est, std, mse, model)
- ggplot(df_plot, aes(x = as.factor(model), y = est, group = model)) +
- geom_hline(yintercept = 1.0, color = "red", linetype = 2)+
- geom_point(size = 5) +
- geom_linerange(ymin = est - std, ymax = est + std, size = 1.25) +
- scale_y_continuous(limits = c(0,2.5))+
- theme_classic() +
- ggtitle("N = 1000, Waves = 7", "Age range of 30 years,
- 1/15 slope to variance ratio")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement