Advertisement
Guest User

Untitled

a guest
Apr 23rd, 2017
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.67 KB | None | 0 0
  1. #----
  2. # Project title: time vs. age based models
  3. #---Project contributors:
  4. #----Donald Williams
  5. #----Marilu Isiordia
  6. #----Erin Winters
  7. #----Philippe Rast
  8. # (order does not reflect authorship)
  9. #----
  10. #----citation
  11. # Hoffman, L. (2012). Considering alternative metrics of time:
  12. # Does anybody really know what "time" is. Advances in longitudinal
  13. # methods in the social and behavioral sciences. Charlotte,
  14. # NC: Information Age Publishing, 255-287.
  15. #----
  16. library(reshape2)
  17. library(lme4)
  18. library(MASS)
  19. library(parallel)
  20. library(ggplot2)
  21. library(hydroGOF)
  22. #######################################
  23. ### Simulation for (10.9; pg. 279) ####
  24. #######################################
  25. # 1) time based:
  26. #-----a) balanced design
  27. #-----b) consecutive years (50, 51, 52,...)
  28. #-----c) cohort effectrm(list = ls())
  29. # function to generate data
  30. # y_time = generated with time as the predictor
  31. # y_age = generated with age as the predictor
  32.  
  33. # function for each subjects' age being unique
  34. unique_value <- function(waves, age_min, age_max) {
  35. repeat {
  36. # returns unduplicated whole numbers
  37. # given a number of waves
  38. i <- round(sort(runif(waves, age_min, age_max)), 0)
  39. x <- sum(duplicated(i))
  40. # exit if the condition is met
  41. if (x < 1) break
  42. }
  43. return(i)
  44. }
  45.  
  46. # function to generate data
  47. sim_data <- function(N, waves, age_min, age_max,
  48. b0, b1, b2, u_int, u_slp, e_var, u_cor, ...){
  49.  
  50. # simulate data for 10.5 in Hoffman(2015)
  51. # Args:
  52. # N: total number of subjects
  53. # time_pts: waves
  54. # age_min: minimum age in study
  55. # age_max: maximum age in study
  56. # b0: fixed intercept
  57. # b1: fixed effect of time
  58. # (Age - AgeT1)_ij
  59. # b2: fixed effect of age at T1
  60. # (AgeT1)_ij
  61. # u_int: varying ("random") intercept
  62. # variance
  63. # u_slp: varying ("random") slope
  64. # variance
  65. # u_cor: correlation between u_int & u_slp
  66. # e_var: residual variance
  67. # Returns:
  68. # a data.frame with the columns id, time, age, age_t1, y_age
  69. # y_time; as well as GM centered variables: time, age, age_t1
  70.  
  71.  
  72. # subject id variable
  73. id <- rep(1:N, rep(waves, N))
  74.  
  75. # temporary holder for age
  76. holder <- replicate(N, unique_value(waves, age_min, age_max))
  77. # melt columns into one columns, and select column 3 (age values)
  78. age <- reshape2::melt(holder)[,3]
  79. # center age
  80. c_age <- age - mean(age)
  81.  
  82. # age at which each subject entered the study
  83. age_t1 <- rep(age[seq(1, length(age), by = waves)], each = waves)
  84. # center age at t1
  85. c_age_t1 <- age_t1 - mean(age_t1)
  86.  
  87. # time variable; time point 1 == 0
  88. time <- unlist(lapply(unique(id), function(x) seq(sum(id == x)))) - 1
  89. c_time <- time - mean(time)
  90. #fixed effects
  91. #--b0 = intercept
  92. #--b1 = time/age effect
  93. #--b2 = T1_age effect
  94.  
  95. #random effects
  96. #--u_int = intercept
  97. #--u_slp = slope
  98. #--e_var = residual variation
  99. re_matrix <- matrix(c(1, u_cor, u_cor, 1), nrow = 2)
  100. u <- c(sqrt(u_int), sqrt(u_slp))
  101. re_matrix <- diag(u) %*% re_matrix %*% diag(u)
  102. eff <- MASS::mvrnorm(N, mu = c(0, 0), Sigma = re_matrix)
  103. colnames(eff) <- c("ran_int", "ran_slp")
  104. e_var <- rnorm(length(id), mean = 0, sd = sqrt(e_var))
  105.  
  106. # generate outcomes
  107. y_time <- b0 + b1 * time + b2 * age_t1 + eff[,"ran_slp"][id] * time +
  108. eff[,"ran_int"][id] + e_var
  109. y_age <- b0 + b1 * c_age + b2 * age_t1 + eff[,"ran_slp"][id] * c_age +
  110. eff[,"ran_int"][id] + e_var
  111. # returned data frame
  112. data.frame(id, time, c_time, age, c_age, age_t1, c_age_t1, y_time, y_age)
  113. }
  114.  
  115. ##################################
  116. ## run this code first
  117. ## N = 50 , 3 waves
  118. ##################################
  119. mat <- matrix(nrow = 500, ncol = 2)
  120.  
  121.  
  122. for (i in 1:500) {
  123.  
  124. dat <- sim_data(N =50, waves = 3, age_min = 50, age_max = 80,
  125. b0 = 5, b1 = -.5, b2 = 2, u_int = 75, u_slp = 1, u_cor = 0, e_var = 15)
  126. options(warn=-1)
  127. m1 <- lme4::lmer(y_time ~ time + age_t1 + (time|id), dat)
  128. mat[i,1] <- unlist(VarCorr(m1))[4]
  129. m2 <- lme4::lmer(y_age ~ c_age + age_t1 + (c_age|id), dat)
  130. mat[i,2] <- unlist(VarCorr(m2))[4]
  131. options(warn=0)
  132. }
  133.  
  134. # run the above to seee the bias
  135. mean(mat[,1])
  136. mean(mat[,2])
  137. sd(mat[,1])
  138. sd(mat[,2])
  139. mse(mat[,1], rep(1, 500))
  140. mse(mat[,2], rep(1, 500))
  141.  
  142. est <- c(mean(mat[,1]), mean(mat[,2]))
  143. std <- c(sd(mat[,1]), sd(mat[,2]))
  144. mse <- c(mse(mat[,1], rep(1, 500)), mse(mat[,2], rep(1, 500)))
  145. model <- as.factor(c("t_on_t", "a_on_a"))
  146. df_plot <- data.frame(est, std, mse, model)
  147.  
  148. ggplot(df_plot, aes(x = as.factor(model), y = est, group = model)) +
  149. geom_hline(yintercept = 1.0, color = "red", linetype = 2) +
  150. geom_point(size = 5) +
  151. geom_linerange(ymin = est - std, ymax = est + std, size = 1.25) +
  152. scale_y_continuous(limits = c(0,2.5))+
  153. theme_classic() +
  154. ggtitle("N = 50, Waves = 3", "Age range of 30 years,
  155. 1/15 slope to variance ratio")
  156.  
  157.  
  158. ##################################
  159. ##################################
  160. ## run this code next
  161. ## n = 250 waves = 7
  162. ##################################
  163. ##################################
  164.  
  165.  
  166. mat <- matrix(nrow = 500, ncol = 2)
  167.  
  168.  
  169. for (i in 1:500) {
  170.  
  171. dat <- sim_data(N =250, waves = 7, age_min = 50, age_max = 80,
  172. b0 = 5, b1 = -.5, b2 = 2, u_int = 75, u_slp = 1, u_cor = 0, e_var = 15)
  173. options(warn=-1)
  174. m1 <- lme4::lmer(y_time ~ time + age_t1 + (time|id), dat)
  175. mat[i,1] <- unlist(VarCorr(m1))[4]
  176. m2 <- lme4::lmer(y_age ~ c_age + age_t1 + (c_age|id), dat)
  177. mat[i,2] <- unlist(VarCorr(m2))[4]
  178. options(warn=0)
  179. }
  180.  
  181. # run the above to seee the bias
  182. mean(mat[,1])
  183. mean(mat[,2])
  184. sd(mat[,1])
  185. sd(mat[,2])
  186. mse(mat[,1], rep(1, 500))
  187. mse(mat[,2], rep(1, 500))
  188.  
  189. est <- c(mean(mat[,1]), mean(mat[,2]))
  190. std <- c(sd(mat[,1]), sd(mat[,2]))
  191. mse <- c(mse(mat[,1], rep(1, 500)), mse(mat[,2], rep(1, 500)))
  192. model <- as.factor(c("t_on_t", "a_on_a"))
  193. df_plot <- data.frame(est, std, mse, model)
  194.  
  195. ggplot(df_plot, aes(x = as.factor(model), y = est, group = model)) +
  196. geom_hline(yintercept = 1.0, color = "red", linetype = 2)+
  197. geom_point(size = 5) +
  198. geom_linerange(ymin = est - std, ymax = est + std, size = 1.25) +
  199. scale_y_continuous(limits = c(0,2.5))+
  200. theme_classic() +
  201. ggtitle("N = 250, Waves = 7", "Age range of 30 years,
  202. 1/15 slope to variance ratio")
  203.  
  204. ##################################
  205. ##################################
  206. ## run this code next
  207. ## n = 1000 waves = 7
  208. ##################################
  209. mat <- matrix(nrow = 500, ncol = 2)
  210. ##################################
  211.  
  212. for (i in 1:500) {
  213.  
  214. dat <- sim_data(N =1000, waves = 7, age_min = 50, age_max = 80,
  215. b0 = 5, b1 = -.5, b2 = 2, u_int = 75, u_slp = 1, u_cor = 0, e_var = 15)
  216. options(warn=-1)
  217. m1 <- lme4::lmer(y_time ~ time + age_t1 + (time|id), dat)
  218. mat[i,1] <- unlist(VarCorr(m1))[4]
  219. m2 <- lme4::lmer(y_age ~ c_age + age_t1 + (c_age|id), dat)
  220. mat[i,2] <- unlist(VarCorr(m2))[4]
  221. options(warn=0)
  222. }
  223.  
  224. # run the above to seee the bias
  225. mean(mat[,1])
  226. mean(mat[,2])
  227. sd(mat[,1])
  228. sd(mat[,2])
  229. mse(mat[,1], rep(1, 500))
  230. mse(mat[,2], rep(1, 500))
  231.  
  232. est <- c(mean(mat[,1]), mean(mat[,2]))
  233. std <- c(sd(mat[,1]), sd(mat[,2]))
  234. mse <- c(mse(mat[,1], rep(1, 500)), mse(mat[,2], rep(1, 500)))
  235. model <- as.factor(c("t_on_t", "a_on_a"))
  236. df_plot <- data.frame(est, std, mse, model)
  237.  
  238. ggplot(df_plot, aes(x = as.factor(model), y = est, group = model)) +
  239. geom_hline(yintercept = 1.0, color = "red", linetype = 2)+
  240. geom_point(size = 5) +
  241. geom_linerange(ymin = est - std, ymax = est + std, size = 1.25) +
  242. scale_y_continuous(limits = c(0,2.5))+
  243. theme_classic() +
  244. ggtitle("N = 1000, Waves = 7", "Age range of 30 years,
  245. 1/15 slope to variance ratio")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement