Advertisement
Guest User

Untitled

a guest
Oct 19th, 2018
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 19.04 KB | None | 0 0
  1. library(foreach)
  2.  
  3. ########################
  4. ## to create matrices ##
  5. ########################
  6. create_matrices <- function(varnames, hyp){
  7.  
  8.   #varnames <- variable.names(object) #provides the variable names of the linear model object, including intercept
  9.   if(is.null(varnames)) stop("Please input proper linear model object")
  10.   varnames <- gsub("(\\(Intercept\\))", "Intercept", varnames) #remove parentheses around intercept so these don't conflict later
  11.  
  12.   hyp2 <- gsub("[ \n]", "", hyp) #removes all whitespace
  13.   hyp2 <- gsub("(\\(Intercept\\))", "Intercept", hyp2) #If hyp re. intercept input with surrounding parentheses, remove
  14.   if(!grepl("^[0-9a-zA-Z><=,().-]+$", hyp2)) stop("Impermissable characters in hypotheses") #Self-explanatory. NEW parentehese
  15.   if(grepl("[><=]{2,}", hyp2)) stop("Do not use combined comparison signs e.g., '>=' or '=='")
  16.  
  17.   step1 <- unlist(strsplit(hyp2, split = "[<>=,()]")) #split by special characters and unlist
  18.   input_vars <- step1[grep("[a-zA-Z]+", step1)] #extract subunits that contain at least one letter
  19.   #if(!all(input_vars %in% varnames)) stop("Hypothesis variable(s) not in object, check spelling") #Checks if input variables exist in lm-object
  20.  
  21.   framer <- function(x){ #As function because same code used once more later
  22.     pos_comparisons <- unlist(gregexpr("[<>=]", x)) #Gives the positions of all comparison signs
  23.     leftside <- rep(NA, length(pos_comparisons) + 1) #empty vector for loop below
  24.     rightside <- rep(NA, length(pos_comparisons) + 1) #empty vector for loop below
  25.     pos1 <- c(-1, pos_comparisons) #positions to extract data to the leftside of comparisons
  26.     pos2 <- c(pos_comparisons, nchar(x) + 1) #positions to extract data to the rightside of comparisons
  27.     for(i in seq_along(pos1)){
  28.       leftside[i] <- substring(x, pos1[i] + 1, pos1[i+1] - 1) #Extract all variables or outcomes to the leftside of a comparison sign
  29.       rightside[i] <- substring(x, pos2[i] + 1, pos2[i+1] - 1) #Extract all variables or outcomes to the rightside of a comparison sign
  30.     }
  31.     leftside <- leftside[-length(leftside)] #remove last element which is a NA due to loop formatting
  32.     rightside <- rightside[-length(rightside)] #remove last element which is a NA due to loop formatting
  33.     comparisons <- substring(x, pos_comparisons, pos_comparisons) #Extract comparison signs
  34.     data.frame(left = leftside, comp = comparisons, right = rightside, stringsAsFactors = FALSE) #hypotheses as a dataframe
  35.   }
  36.  
  37.   framed <- framer(hyp2) #hypotheses as a dataframe
  38.  
  39.   if(any(grepl(",", framed$left)) || any(grepl(",", framed$right))){ #Larger loop that deals with commas if the specified hypothesis contains any
  40.     if(nrow(framed) > 1){
  41.       for(r in 1:(nrow(framed)-1)){ #If a hypothesis has been specified with commas e.g., "X1 > 0, X2 > 0" or "(X1, X2) > X3"
  42.         if(all.equal(framed$right[r], framed$left[r+1])){ #The right hand side of the hypothesis df will be equal to the next row left side
  43.           if(substring(framed$right[r], 1, 1) == "(") { #If the first row begins with a ( as when "X1 > (X2, X3)" and opposed to "(X2, X3) > X1"
  44.             framed$right[r] <- sub("),.+", ")", framed$right[r])#If so, remove everything to the right of the parenthesis on the right hand side
  45.             framed$left[r+1] <- sub(".+),", "", framed$left[r +1])#and everything to the left of the parenthesis on the left hand side to correct the df
  46.           } else{
  47.             framed$right[r] <- sub(",.+", "", framed$right[r]) #else, remove everything to the right of the comma on the right hand side
  48.             framed$left[r+1] <- sub("[^,]+,", "", framed$left[r+1]) #and everything to the left of the comma on the left hand side to correct the df
  49.           }
  50.         }
  51.       }
  52.     }
  53.    
  54.     commas_left <- framed$left[grep(",", framed$left)] #At this point all remaining elements that contain commas should also have parentheses, check this
  55.     commas_right <- framed$right[grep(",", framed$right)] #Necessary to use is isTRUE below in case one of these contains no commas, and 'any' for several rows
  56.     if(isTRUE(any(!grepl("\\(.+)", commas_left))) || isTRUE(any(!grepl("\\(.+)", commas_right))) || #Check so rows contain parenthesis
  57.        isTRUE(any(grepl(").+", commas_left))) || isTRUE(any(grepl(").+", commas_right))) || #Check so parentheses are not followed by anything
  58.        isTRUE(any(grepl(".+\\(", commas_left))) || isTRUE(any(grepl(".+\\(", commas_right)))) { #chekc so parentheses are not preceded by anything
  59.       stop("Incorrect hypothesis syntax or extra character, check specification")
  60.     }
  61.    
  62.    
  63.     framed$left <- gsub("[()]", "", framed$left) #drop remaining parentheses
  64.     framed$right <- gsub("[()]", "", framed$right)
  65.     commas <- unique(c(grep(",", framed$left), grep(",", framed$right))) #Gives us the unique rows that still contain commas (multiple comparisons) from left or right columns
  66.    
  67.     if(length(commas) > 0){ #If there are any multiple comparisons e.g., (X1, X2) below loop separates these in
  68.       multiples <- vector("list", length = length(commas)) #Empty vector to store results for each row in loop below
  69.      
  70.       for(r in seq_along(commas)){ #for each row containing commas
  71.         several <- framed[commas,][r, ] #select row r
  72.        
  73.         if(several$comp == "="){ #If e.g., (X1, X2) = X3, convert to X1 = X2 = X3
  74.          
  75.           several <- c(several$left, several$right)
  76.           separate <- unlist(strsplit(several, split = ",")) #split by special characters and unlist
  77.           if(any(grepl("^$", several))) stop("Misplaced comma in hypothesis") #if empty element
  78.           converted_equality <- paste(separate, collapse = "=") #convert to X1 = X2 = X3 shape
  79.           multiples[[r]] <- framer(converted_equality) #hypotheses as a dataframe
  80.          
  81.         } else{ #If inequality comparison
  82.           leftvars <- unlist(strsplit(several$left, split = ",")) #separate left hand var
  83.           rightvars <- unlist(strsplit(several$right, split = ",")) #separate right hand vars
  84.           if(any(grepl("^$", leftvars)) || any(grepl("^$", rightvars))) stop("Misplaced comma in hypothesis") #if empty element
  85.          
  86.           left <- rep(leftvars, length.out = length(rightvars)*length(leftvars)) #repeat each leftvars the number of rightvars
  87.           right <- rep(rightvars, each = length(leftvars)) #complement for rightvars
  88.           comp <- rep(several$comp, length(left)) #repeat the comparison a corresponding number of times
  89.          
  90.           multiples[[r]] <- data.frame(left = left, comp = comp, right = right, stringsAsFactors = FALSE) #save as df to be able to combine with 'framed'
  91.         }
  92.       }
  93.      
  94.       framed <- framed[-commas,] #remove old unfixed rows with commas
  95.       multiples <- do.call(rbind, multiples) #make list into dataframe
  96.       framed <- rbind(multiples, framed) #recombine into one dataframe
  97.     }
  98.   } #end comma loop
  99.  
  100.   equality <- framed[framed$comp == "=",]
  101.   inequality <- framed[!framed$comp == "=",]
  102.  
  103.   #********Equality
  104.   if(nrow(equality) == 0) { #If there are no '=' comparisons set to NULL
  105.     list_equality <- NULL
  106.   } else{
  107.     outcomes <- suppressWarnings(apply(equality[, -2], 2, as.numeric)) #Convert left/right to numeric, non-numeric values (variables) coerced to NA
  108.     outcomes <- matrix(outcomes, ncol = 2, byrow = FALSE) #Conversion to matrix in case there was only one row in outcomes
  109.     if(any(rowSums(is.na(outcomes)) == 0)) stop("Value compared with value rather than variable, e.g., '2 = 2', check hypotheses")
  110.     rows <- which(rowSums(is.na(outcomes)) < 2) #which rows contain a numeric value (comparing variable to value), that is not two NA-values
  111.     specified <- t(outcomes[rows,]) #transpose so that specified comparison values are extracted in correct order below, e.g, in case when "X1 = 0, 2 = X2"
  112.     specified <- specified[!is.na(specified)] #extract specified comparison values
  113.     r_e <- ifelse(rowSums(is.na(outcomes)) == 2, 0, specified) #If variable = variable -> 0, if variable = value -> value
  114.     r_e <- matrix(r_e) #convert to matrix
  115.    
  116.     var_locations <- apply(equality[, -2], 2, function(x) ifelse(x %in% varnames, match(x, varnames), 0)) #convert non-variables to 0 and others are given their locations in varnames
  117.     var_locations <- matrix(var_locations, ncol = 2) #Necessary if only one comparison row
  118.    
  119.     R_e <- matrix(rep(0, nrow(equality)*length(varnames)), ncol = length(varnames)) #Create empty variable matrix
  120.    
  121.     for(i in seq_along(r_e)){ # for each row i in R_e, replace the columns specified in var_locations row i
  122.       if(!all(var_locations[i, ] > 0)){ #If only one variable is specified (i.e., other one is set to zero)
  123.         R_e[i, var_locations[i,]] <- 1 #Set this variable to 1 in R_e row i
  124.       } else{ #If two variables specified
  125.         R_e[i, var_locations[i,]] <- c(1, -1) #Set one column to 1 and the other to -1 in R_e row i
  126.       }
  127.     }
  128.     list_equality <- list(R_e = R_e, r_e = r_e) #Note column 1 in R_e is for intercept
  129.   }
  130.  
  131.   #********Inequality
  132.   if(nrow(inequality) == 0) { #If there are no '>' or '<' comparisons set to NULL
  133.     list_inequality <- NULL
  134.   } else{
  135.     outcomes <- suppressWarnings(apply(inequality[, -2], 2, as.numeric)) #Convert left/right to numeric, non-numeric values (variables) coerced to NA
  136.     outcomes <- matrix(outcomes, ncol = 2, byrow = FALSE) #Conversion to matrix in case there was only one row in outcomes
  137.     if(any(rowSums(is.na(outcomes)) == 0)) stop("Value compared with value rather than variable, e.g., '2 > 2', check hypotheses")
  138.     cols <- which(rowSums(is.na(outcomes)) < 2) #which columns contain a numeric value (comparing variable to value), that is not two NA-values
  139.     specified <- t(outcomes[cols,]) #transpose so that specified comparison values are extracted in correct order below
  140.     specified <- specified[!is.na(specified)] #extract specified comparison values
  141.     r_i <- ifelse(rowSums(is.na(outcomes)) == 2, 0, specified) #If variable = variable -> 0, if variable = value -> value
  142.     r_i <- matrix(r_i) #convert to matrix
  143.    
  144.     leq <- which(inequality$comp == "<") #gives the rows that contain '<' (lesser or equal) comparisons
  145.     var_locations <- apply(inequality[, -2], 2, function(x) ifelse(x %in% varnames, match(x, varnames), 0)) #convert non-variables to 0 and others are given their locations
  146.     var_locations <- matrix(var_locations, ncol = 2) #Necessary if only one comparison row
  147.    
  148.     R_i <- matrix(rep(0, nrow(inequality)*length(varnames)), ncol = length(varnames)) #Create empty variable matrix
  149.    
  150.     for(i in seq_along(r_i)){ # for each row i in R_i, replace the columns specified in var_locations row i
  151.       if(!all(var_locations[i, ] > 0)){ #If only one variable is specified (i.e., other one is set to zero)
  152.        
  153.         if(var_locations[i, 1] == 0){ #If first value is not the variable (i.e, a comparison value)
  154.           if(i %in% leq){#Then if comparison is 'lesser or equal'
  155.             value <-  1  #set variable value to 1
  156.           } else{ #else if comparison 'larger or equal'
  157.             r_i[i] <- r_i[i]*-1 #invert comparison value
  158.             value <- -1 # set variable value to -1
  159.           }
  160.         } else{ #If first value is the variable (i.e., the second is a comparison value)
  161.           if(i %in% leq){ #then if comparison is 'lesser or equal'
  162.             r_i[i] <- r_i[i]*-1 #invert comparison value
  163.             value <-  -1  #set variable value to -1
  164.           } else{
  165.             value <- 1 #else if comparison 'larger or equal' set to 1
  166.           }
  167.         }
  168.        
  169.         R_i[i, var_locations[i,]] <- value #Set this variable to 1/-1 in R_i row i
  170.        
  171.       } else{ #If two variables specified
  172.         value <- if(i %in% leq) c(-1, 1) else c(1, -1) #If comparison is 'leq' take var2 - var1, if 'larger or equal' take var1 - var2
  173.         R_i[i, var_locations[i,]] <- value #Set one column to 1 and the other to -1 in R_i row i
  174.       }
  175.     }
  176.    
  177.     list_inequality<- list(R_i = R_i, r_i = r_i) #Note column 1 in R_i is for intercept
  178.   }
  179.  
  180.   matrices <- list(equality = list_equality, inequality = list_inequality); matrices #final output
  181.  
  182. }
  183.  
  184. ###############################
  185. ##### functions to fit ########
  186. ###############################
  187.  
  188. sampling_helper = function(X,  nu, delta,  n_samples){
  189.   # number of variables
  190.   p <- ncol(X)
  191.   # number of observations
  192.   n <- nrow(X)
  193.   # number of partial correlations
  194.   pcors <- (p * (p - 1)) / 2  
  195.   # names for the partial correlations
  196.   mat_name <- matrix(unlist(lapply(colnames(X), function(x) paste(x, colnames(X), sep = ""))), p , p)
  197.   mat_name <- mat_name[upper.tri(mat_name)]
  198.   # center the data
  199.   Xhat <- X - rep(1,n)%*%t(apply(X,2,mean))
  200.   # scatter matrix
  201.   S <- t(Xhat)%*%Xhat
  202.   # storage
  203.   pcor_store <- prior_store <- matrix(NA, nrow = n_samples, ncol = pcors)
  204.   inv_cov_store <-  array(NA, c(p, p, n_samples))
  205.   # initial values
  206.   Psi <- b_inv <- diag(p)
  207.  
  208. for(i in 1:n_samples){
  209.   # draw from posterior
  210.   post <- post_helper(S = S, n = n, nu = nu, p = p, delta = delta, Psi = Psi, b_inv = b_inv)
  211.   # store partials
  212.   pcor_store[i,] <- post$pcors_post
  213.   # store the inverse
  214.   inv_cov_store[,,i] <- post$sigma_inv
  215.   # draw from prior and store
  216.   prior_store[i,] <- prior_helper(nu = nu, delta = delta, p = p)
  217.   # Psi
  218.   Psi <- post$Psi
  219. }
  220.  
  221. # transform posterior samples
  222. fisher_z_post_store <- apply(pcor_store, 2, fisher_z)
  223. fisher_z_prior_store <- apply(prior_store, 2, fisher_z)
  224.  
  225. colnames(fisher_z_prior_store) <- mat_name
  226. colnames(fisher_z_post_store) <- mat_name
  227.  
  228. # returned list
  229. list(fisher_z_post = fisher_z_post_store,
  230.      pcor_post = pcor_store,
  231.      inv_cov_post = inv_cov_store,
  232.      pcor_prior = prior_store,
  233.      fisher_z_prior = fisher_z_prior_store)
  234. }
  235.  
  236. prior_helper <- function(nu, p, delta){
  237.   # sample from inverse Wishart
  238.   inv_wish_prior <- solve(rWishart(1, df =  nu, diag(p))[,,1])
  239.   # sample from Wishart
  240.   sigma_inv_prior <- rWishart(1, df = p - 1 + delta, inv_wish_prior)[,,1]
  241.   # partical correlation matrix
  242.   pcor_mat_prior <- - diag(1/sqrt(diag(sigma_inv_prior)))%*%sigma_inv_prior%*%diag(1/sqrt(diag(sigma_inv_prior)))
  243.   pcors_prior <- pcor_mat_prior[upper.tri(pcor_mat_prior)]
  244.   pcors_prior
  245. }
  246.  
  247.  
  248. post_helper <- function(S, n, nu, p, delta, Psi, b_inv){
  249.   # precision matrix
  250.   sigma_inv <- rWishart(1, delta + n - 1, solve(Psi+S))[,,1]
  251.   # Psi
  252.   Psi <- rWishart(1, nu + delta + p - 2, solve(sigma_inv + b_inv))[,,1]
  253.   # partial correlation matrix
  254.   pcor_mat <- - diag(1/sqrt(diag(sigma_inv)))%*%sigma_inv%*%diag(1/sqrt(diag(sigma_inv)))
  255.   pcors_post = pcor_mat[upper.tri(pcor_mat)]
  256.   # returned list
  257.   list(pcors_post = pcors_post, sigma_inv = sigma_inv, Psi = Psi)
  258. }
  259.  
  260. sampling <- function(X, nu, delta, n_samples = 20000, cores = 4){
  261.   # register parallel
  262.   doSNOW::registerDoSNOW(parallel::makeCluster(cores))
  263.   # samples for each "chain"
  264.   samps <- rep(round(n_samples / cores), cores)
  265.   chains = cores
  266.   # sample from priors and posteriors
  267.   samples <- foreach(i = 1:chains, .export = c("fisher_z", "sampling_helper",
  268.                                               "prior_helper", "post_helper")) %dopar%{
  269.   sampling_helper(X = X, nu = nu, delta = delta,  n_samples = samps[i])
  270.   }
  271. }
  272. fisher_z <- function(rho){
  273.   .5*log((1+rho)/(1-rho))
  274. }
  275.  
  276. sd_helper <- function(post_samples, prior_at_zero){
  277.   prior_at_zero /  dnorm(0, mean(post_samples), sd(post_samples))
  278. }
  279.  
  280. equality_test <- function(X, nu, delta, type, pcor_names, hyp = NULL, n_samples, cores){
  281.  
  282.   if(is.null(colnames(X))){
  283.     stop("The variables must be named. These names cannot included numbers")
  284.    
  285.   }
  286.  
  287.   if(type == "equality"){
  288.     if(is.null(hyp)){
  289.       stop("equality constrained hypotheses must be specified")
  290.       }
  291.     samples <- sampling(X, nu = nu, delta = delta, n_samples = n_samples, cores)
  292.    
  293.     ERr1 <- create_matrices(varnames = pcor_names, hyp = hyp)
  294.    
  295.     prior <- do.call(rbind, lapply(samples, function(x)  x$fisher_z_prior[,pcor_names]))
  296.     post <- do.call(rbind, lapply(samples, function(x)  x$fisher_z_post[,pcor_names]))
  297.    
  298.     Sigma0 <- cov(prior)
  299.     Sigma1 <- cov(post)
  300.    
  301.     mu0 <- ERr1$equality$R_e %*% colMeans(prior)
  302.     mu1 <- ERr1$equality$R_e %*% colMeans(post)
  303.    
  304.     s0 <- ERr1$equality$R_e %*% Sigma0 %*% t(ERr1$equality$R_e)
  305.     s1 <- ERr1$equality$R_e %*% Sigma1 %*% t(ERr1$equality$R_e)
  306.    
  307.     BF <- mvtnorm::dmvnorm(t(ERr1$equality$r_e),  mu0, sigma = s0, log = T) -
  308.           mvtnorm::dmvnorm(t(ERr1$equality$r_e),  mu1, sigma = s1, log = T)
  309.    
  310.     results <- list(BF = exp(BF))
  311.   }
  312.   if(type == "inequality"){
  313.     if(is.null(hyp)){
  314.       stop("inequality constrained hypotheses must be specified")
  315.     }
  316.     samples <- sampling(X, nu = nu, delta = delta, n_samples = n_samples, cores)
  317.    
  318.     ERr1 <- create_matrices(varnames = pcor_names, hyp = hyp)
  319.    
  320.     prior <- do.call(rbind, lapply(samples, function(x)  x$fisher_z_prior[,pcor_names]))
  321.     post <- do.call(rbind, lapply(samples, function(x)  x$fisher_z_post[,pcor_names]))
  322.    
  323.     Sigma0 <- cov(prior)
  324.     Sigma1 <- cov(post)
  325.    
  326.     mu0 <- ERr1$inequality$R_i %*% colMeans(prior)
  327.     mu1 <- ERr1$inequality$R_i %*% colMeans(post)
  328.    
  329.     s0 <- ERr1$inequality$R_i %*% Sigma0 %*% t(ERr1$inequality$R_i)
  330.     s1 <- ERr1$inequality$R_i %*% Sigma1 %*% t(ERr1$inequality$R_i)
  331.    
  332.     prior_prob <- mvtnorm::pmvnorm(lower = as.vector(ERr1$inequality$r_i), upper = Inf,mean = as.vector(mu0),  sigma = s0)[1]
  333.     post_prob <- mvtnorm::pmvnorm(lower = as.vector(ERr1$inequality$r_i), upper = Inf,mean = as.vector(mu1),  sigma = s1)[1]
  334.    
  335.     results <- list(BF = post_prob / prior_prob)
  336.   }
  337.   if(type == "SD_ratio"){
  338.     mat_bf <- mat_mu <- matrix(0, p, p)
  339.    
  340.     samples <- sampling(X, nu = nu, delta = delta, n_samples = n_samples, cores)
  341.    
  342.     prior <- do.call(rbind, lapply(samples, function(x)  x$fisher_z_prior))
  343.     post <- do.call(rbind, lapply(samples, function(x)  x$fisher_z_post))
  344.    
  345.     mat_bf[upper.tri(mat_bf)] <- apply(post, 2, sd_helper, prior_at_zero)
  346.     mat_bf[lower.tri(mat_bf)] <- t(mat_bf)[lower.tri(mat_bf)]
  347.    
  348.     mat_mu[upper.tri(mat_mu)] <- apply(post, 2, mean)
  349.     mat_mu[lower.tri(mat_mu)] <- t(mat_mu)[lower.tri(mat_mu)]
  350.     results <- list(mat_bf = mat_bf, mat_mu = mat_mu)
  351.     }
  352.   results
  353. }
  354.  
  355.  
  356.  
  357.  
  358.  
  359. main <- bdgraph.sim(p = 20, n = 2000, graph = "AR1")
  360. X <- main$data
  361. colnames(X) <- LETTERS[1:20] # has to be named
  362.  
  363.  
  364.  
  365. hyp <- c("(BA, CA, DB) = EC")
  366. pcor_names <- c("BA", "CA", "DB", "EC")
  367.  
  368.  
  369. # sd ratio
  370. fit <-  equality_test(X = X, nu = 200, delta = 1, hyp = hyp,  
  371.                           pcor_names = pcor_names, type = "SD_ratio",
  372.                           n_samples = 2500, cores = 4)
  373.  
  374.  
  375. fit
  376.  
  377. GGMnonreg::Diagnostic(ifelse(eq_test$mat_bf > 3, 1, 0), main$G)
  378.  
  379.  
  380. # here all are equal to zero
  381. hyp <- c("(BA, CA, DB) = EC")
  382. pcor_names <- c("BA", "CA", "DB", "EC")
  383.  
  384. main <- bdgraph.sim(p = 20, n = 2000, prob = 0)
  385. X <- main$data
  386. colnames(X) <- LETTERS[1:20]
  387.  
  388.  
  389. # equality
  390. fit <-  equality_test(X = X, nu = 200, delta = 1, hyp = hyp,  
  391.                       pcor_names = pcor_names, type = "equality",
  392.                       n_samples = 2500, cores = 4)
  393.  
  394. fit
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement