Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(foreach)
- ########################
- ## to create matrices ##
- ########################
- create_matrices <- function(varnames, hyp){
- #varnames <- variable.names(object) #provides the variable names of the linear model object, including intercept
- if(is.null(varnames)) stop("Please input proper linear model object")
- varnames <- gsub("(\\(Intercept\\))", "Intercept", varnames) #remove parentheses around intercept so these don't conflict later
- hyp2 <- gsub("[ \n]", "", hyp) #removes all whitespace
- hyp2 <- gsub("(\\(Intercept\\))", "Intercept", hyp2) #If hyp re. intercept input with surrounding parentheses, remove
- if(!grepl("^[0-9a-zA-Z><=,().-]+$", hyp2)) stop("Impermissable characters in hypotheses") #Self-explanatory. NEW parentehese
- if(grepl("[><=]{2,}", hyp2)) stop("Do not use combined comparison signs e.g., '>=' or '=='")
- step1 <- unlist(strsplit(hyp2, split = "[<>=,()]")) #split by special characters and unlist
- input_vars <- step1[grep("[a-zA-Z]+", step1)] #extract subunits that contain at least one letter
- #if(!all(input_vars %in% varnames)) stop("Hypothesis variable(s) not in object, check spelling") #Checks if input variables exist in lm-object
- framer <- function(x){ #As function because same code used once more later
- pos_comparisons <- unlist(gregexpr("[<>=]", x)) #Gives the positions of all comparison signs
- leftside <- rep(NA, length(pos_comparisons) + 1) #empty vector for loop below
- rightside <- rep(NA, length(pos_comparisons) + 1) #empty vector for loop below
- pos1 <- c(-1, pos_comparisons) #positions to extract data to the leftside of comparisons
- pos2 <- c(pos_comparisons, nchar(x) + 1) #positions to extract data to the rightside of comparisons
- for(i in seq_along(pos1)){
- leftside[i] <- substring(x, pos1[i] + 1, pos1[i+1] - 1) #Extract all variables or outcomes to the leftside of a comparison sign
- rightside[i] <- substring(x, pos2[i] + 1, pos2[i+1] - 1) #Extract all variables or outcomes to the rightside of a comparison sign
- }
- leftside <- leftside[-length(leftside)] #remove last element which is a NA due to loop formatting
- rightside <- rightside[-length(rightside)] #remove last element which is a NA due to loop formatting
- comparisons <- substring(x, pos_comparisons, pos_comparisons) #Extract comparison signs
- data.frame(left = leftside, comp = comparisons, right = rightside, stringsAsFactors = FALSE) #hypotheses as a dataframe
- }
- framed <- framer(hyp2) #hypotheses as a dataframe
- if(any(grepl(",", framed$left)) || any(grepl(",", framed$right))){ #Larger loop that deals with commas if the specified hypothesis contains any
- if(nrow(framed) > 1){
- for(r in 1:(nrow(framed)-1)){ #If a hypothesis has been specified with commas e.g., "X1 > 0, X2 > 0" or "(X1, X2) > X3"
- if(all.equal(framed$right[r], framed$left[r+1])){ #The right hand side of the hypothesis df will be equal to the next row left side
- if(substring(framed$right[r], 1, 1) == "(") { #If the first row begins with a ( as when "X1 > (X2, X3)" and opposed to "(X2, X3) > X1"
- framed$right[r] <- sub("),.+", ")", framed$right[r])#If so, remove everything to the right of the parenthesis on the right hand side
- framed$left[r+1] <- sub(".+),", "", framed$left[r +1])#and everything to the left of the parenthesis on the left hand side to correct the df
- } else{
- framed$right[r] <- sub(",.+", "", framed$right[r]) #else, remove everything to the right of the comma on the right hand side
- framed$left[r+1] <- sub("[^,]+,", "", framed$left[r+1]) #and everything to the left of the comma on the left hand side to correct the df
- }
- }
- }
- }
- commas_left <- framed$left[grep(",", framed$left)] #At this point all remaining elements that contain commas should also have parentheses, check this
- commas_right <- framed$right[grep(",", framed$right)] #Necessary to use is isTRUE below in case one of these contains no commas, and 'any' for several rows
- if(isTRUE(any(!grepl("\\(.+)", commas_left))) || isTRUE(any(!grepl("\\(.+)", commas_right))) || #Check so rows contain parenthesis
- isTRUE(any(grepl(").+", commas_left))) || isTRUE(any(grepl(").+", commas_right))) || #Check so parentheses are not followed by anything
- isTRUE(any(grepl(".+\\(", commas_left))) || isTRUE(any(grepl(".+\\(", commas_right)))) { #chekc so parentheses are not preceded by anything
- stop("Incorrect hypothesis syntax or extra character, check specification")
- }
- framed$left <- gsub("[()]", "", framed$left) #drop remaining parentheses
- framed$right <- gsub("[()]", "", framed$right)
- commas <- unique(c(grep(",", framed$left), grep(",", framed$right))) #Gives us the unique rows that still contain commas (multiple comparisons) from left or right columns
- if(length(commas) > 0){ #If there are any multiple comparisons e.g., (X1, X2) below loop separates these in
- multiples <- vector("list", length = length(commas)) #Empty vector to store results for each row in loop below
- for(r in seq_along(commas)){ #for each row containing commas
- several <- framed[commas,][r, ] #select row r
- if(several$comp == "="){ #If e.g., (X1, X2) = X3, convert to X1 = X2 = X3
- several <- c(several$left, several$right)
- separate <- unlist(strsplit(several, split = ",")) #split by special characters and unlist
- if(any(grepl("^$", several))) stop("Misplaced comma in hypothesis") #if empty element
- converted_equality <- paste(separate, collapse = "=") #convert to X1 = X2 = X3 shape
- multiples[[r]] <- framer(converted_equality) #hypotheses as a dataframe
- } else{ #If inequality comparison
- leftvars <- unlist(strsplit(several$left, split = ",")) #separate left hand var
- rightvars <- unlist(strsplit(several$right, split = ",")) #separate right hand vars
- if(any(grepl("^$", leftvars)) || any(grepl("^$", rightvars))) stop("Misplaced comma in hypothesis") #if empty element
- left <- rep(leftvars, length.out = length(rightvars)*length(leftvars)) #repeat each leftvars the number of rightvars
- right <- rep(rightvars, each = length(leftvars)) #complement for rightvars
- comp <- rep(several$comp, length(left)) #repeat the comparison a corresponding number of times
- multiples[[r]] <- data.frame(left = left, comp = comp, right = right, stringsAsFactors = FALSE) #save as df to be able to combine with 'framed'
- }
- }
- framed <- framed[-commas,] #remove old unfixed rows with commas
- multiples <- do.call(rbind, multiples) #make list into dataframe
- framed <- rbind(multiples, framed) #recombine into one dataframe
- }
- } #end comma loop
- equality <- framed[framed$comp == "=",]
- inequality <- framed[!framed$comp == "=",]
- #********Equality
- if(nrow(equality) == 0) { #If there are no '=' comparisons set to NULL
- list_equality <- NULL
- } else{
- outcomes <- suppressWarnings(apply(equality[, -2], 2, as.numeric)) #Convert left/right to numeric, non-numeric values (variables) coerced to NA
- outcomes <- matrix(outcomes, ncol = 2, byrow = FALSE) #Conversion to matrix in case there was only one row in outcomes
- if(any(rowSums(is.na(outcomes)) == 0)) stop("Value compared with value rather than variable, e.g., '2 = 2', check hypotheses")
- rows <- which(rowSums(is.na(outcomes)) < 2) #which rows contain a numeric value (comparing variable to value), that is not two NA-values
- specified <- t(outcomes[rows,]) #transpose so that specified comparison values are extracted in correct order below, e.g, in case when "X1 = 0, 2 = X2"
- specified <- specified[!is.na(specified)] #extract specified comparison values
- r_e <- ifelse(rowSums(is.na(outcomes)) == 2, 0, specified) #If variable = variable -> 0, if variable = value -> value
- r_e <- matrix(r_e) #convert to matrix
- var_locations <- apply(equality[, -2], 2, function(x) ifelse(x %in% varnames, match(x, varnames), 0)) #convert non-variables to 0 and others are given their locations in varnames
- var_locations <- matrix(var_locations, ncol = 2) #Necessary if only one comparison row
- R_e <- matrix(rep(0, nrow(equality)*length(varnames)), ncol = length(varnames)) #Create empty variable matrix
- for(i in seq_along(r_e)){ # for each row i in R_e, replace the columns specified in var_locations row i
- if(!all(var_locations[i, ] > 0)){ #If only one variable is specified (i.e., other one is set to zero)
- R_e[i, var_locations[i,]] <- 1 #Set this variable to 1 in R_e row i
- } else{ #If two variables specified
- R_e[i, var_locations[i,]] <- c(1, -1) #Set one column to 1 and the other to -1 in R_e row i
- }
- }
- list_equality <- list(R_e = R_e, r_e = r_e) #Note column 1 in R_e is for intercept
- }
- #********Inequality
- if(nrow(inequality) == 0) { #If there are no '>' or '<' comparisons set to NULL
- list_inequality <- NULL
- } else{
- outcomes <- suppressWarnings(apply(inequality[, -2], 2, as.numeric)) #Convert left/right to numeric, non-numeric values (variables) coerced to NA
- outcomes <- matrix(outcomes, ncol = 2, byrow = FALSE) #Conversion to matrix in case there was only one row in outcomes
- if(any(rowSums(is.na(outcomes)) == 0)) stop("Value compared with value rather than variable, e.g., '2 > 2', check hypotheses")
- cols <- which(rowSums(is.na(outcomes)) < 2) #which columns contain a numeric value (comparing variable to value), that is not two NA-values
- specified <- t(outcomes[cols,]) #transpose so that specified comparison values are extracted in correct order below
- specified <- specified[!is.na(specified)] #extract specified comparison values
- r_i <- ifelse(rowSums(is.na(outcomes)) == 2, 0, specified) #If variable = variable -> 0, if variable = value -> value
- r_i <- matrix(r_i) #convert to matrix
- leq <- which(inequality$comp == "<") #gives the rows that contain '<' (lesser or equal) comparisons
- var_locations <- apply(inequality[, -2], 2, function(x) ifelse(x %in% varnames, match(x, varnames), 0)) #convert non-variables to 0 and others are given their locations
- var_locations <- matrix(var_locations, ncol = 2) #Necessary if only one comparison row
- R_i <- matrix(rep(0, nrow(inequality)*length(varnames)), ncol = length(varnames)) #Create empty variable matrix
- for(i in seq_along(r_i)){ # for each row i in R_i, replace the columns specified in var_locations row i
- if(!all(var_locations[i, ] > 0)){ #If only one variable is specified (i.e., other one is set to zero)
- if(var_locations[i, 1] == 0){ #If first value is not the variable (i.e, a comparison value)
- if(i %in% leq){#Then if comparison is 'lesser or equal'
- value <- 1 #set variable value to 1
- } else{ #else if comparison 'larger or equal'
- r_i[i] <- r_i[i]*-1 #invert comparison value
- value <- -1 # set variable value to -1
- }
- } else{ #If first value is the variable (i.e., the second is a comparison value)
- if(i %in% leq){ #then if comparison is 'lesser or equal'
- r_i[i] <- r_i[i]*-1 #invert comparison value
- value <- -1 #set variable value to -1
- } else{
- value <- 1 #else if comparison 'larger or equal' set to 1
- }
- }
- R_i[i, var_locations[i,]] <- value #Set this variable to 1/-1 in R_i row i
- } else{ #If two variables specified
- value <- if(i %in% leq) c(-1, 1) else c(1, -1) #If comparison is 'leq' take var2 - var1, if 'larger or equal' take var1 - var2
- R_i[i, var_locations[i,]] <- value #Set one column to 1 and the other to -1 in R_i row i
- }
- }
- list_inequality<- list(R_i = R_i, r_i = r_i) #Note column 1 in R_i is for intercept
- }
- matrices <- list(equality = list_equality, inequality = list_inequality); matrices #final output
- }
- ###############################
- ##### functions to fit ########
- ###############################
- sampling_helper = function(X, nu, delta, n_samples){
- # number of variables
- p <- ncol(X)
- # number of observations
- n <- nrow(X)
- # number of partial correlations
- pcors <- (p * (p - 1)) / 2
- # names for the partial correlations
- mat_name <- matrix(unlist(lapply(colnames(X), function(x) paste(x, colnames(X), sep = ""))), p , p)
- mat_name <- mat_name[upper.tri(mat_name)]
- # center the data
- Xhat <- X - rep(1,n)%*%t(apply(X,2,mean))
- # scatter matrix
- S <- t(Xhat)%*%Xhat
- # storage
- pcor_store <- prior_store <- matrix(NA, nrow = n_samples, ncol = pcors)
- inv_cov_store <- array(NA, c(p, p, n_samples))
- # initial values
- Psi <- b_inv <- diag(p)
- for(i in 1:n_samples){
- # draw from posterior
- post <- post_helper(S = S, n = n, nu = nu, p = p, delta = delta, Psi = Psi, b_inv = b_inv)
- # store partials
- pcor_store[i,] <- post$pcors_post
- # store the inverse
- inv_cov_store[,,i] <- post$sigma_inv
- # draw from prior and store
- prior_store[i,] <- prior_helper(nu = nu, delta = delta, p = p)
- # Psi
- Psi <- post$Psi
- }
- # transform posterior samples
- fisher_z_post_store <- apply(pcor_store, 2, fisher_z)
- fisher_z_prior_store <- apply(prior_store, 2, fisher_z)
- colnames(fisher_z_prior_store) <- mat_name
- colnames(fisher_z_post_store) <- mat_name
- # returned list
- list(fisher_z_post = fisher_z_post_store,
- pcor_post = pcor_store,
- inv_cov_post = inv_cov_store,
- pcor_prior = prior_store,
- fisher_z_prior = fisher_z_prior_store)
- }
- prior_helper <- function(nu, p, delta){
- # sample from inverse Wishart
- inv_wish_prior <- solve(rWishart(1, df = nu, diag(p))[,,1])
- # sample from Wishart
- sigma_inv_prior <- rWishart(1, df = p - 1 + delta, inv_wish_prior)[,,1]
- # partical correlation matrix
- pcor_mat_prior <- - diag(1/sqrt(diag(sigma_inv_prior)))%*%sigma_inv_prior%*%diag(1/sqrt(diag(sigma_inv_prior)))
- pcors_prior <- pcor_mat_prior[upper.tri(pcor_mat_prior)]
- pcors_prior
- }
- post_helper <- function(S, n, nu, p, delta, Psi, b_inv){
- # precision matrix
- sigma_inv <- rWishart(1, delta + n - 1, solve(Psi+S))[,,1]
- # Psi
- Psi <- rWishart(1, nu + delta + p - 2, solve(sigma_inv + b_inv))[,,1]
- # partial correlation matrix
- pcor_mat <- - diag(1/sqrt(diag(sigma_inv)))%*%sigma_inv%*%diag(1/sqrt(diag(sigma_inv)))
- pcors_post = pcor_mat[upper.tri(pcor_mat)]
- # returned list
- list(pcors_post = pcors_post, sigma_inv = sigma_inv, Psi = Psi)
- }
- sampling <- function(X, nu, delta, n_samples = 20000, cores = 4){
- # register parallel
- doSNOW::registerDoSNOW(parallel::makeCluster(cores))
- # samples for each "chain"
- samps <- rep(round(n_samples / cores), cores)
- chains = cores
- # sample from priors and posteriors
- samples <- foreach(i = 1:chains, .export = c("fisher_z", "sampling_helper",
- "prior_helper", "post_helper")) %dopar%{
- sampling_helper(X = X, nu = nu, delta = delta, n_samples = samps[i])
- }
- }
- fisher_z <- function(rho){
- .5*log((1+rho)/(1-rho))
- }
- sd_helper <- function(post_samples, prior_at_zero){
- prior_at_zero / dnorm(0, mean(post_samples), sd(post_samples))
- }
- equality_test <- function(X, nu, delta, type, pcor_names, hyp = NULL, n_samples, cores){
- if(is.null(colnames(X))){
- stop("The variables must be named. These names cannot included numbers")
- }
- if(type == "equality"){
- if(is.null(hyp)){
- stop("equality constrained hypotheses must be specified")
- }
- samples <- sampling(X, nu = nu, delta = delta, n_samples = n_samples, cores)
- ERr1 <- create_matrices(varnames = pcor_names, hyp = hyp)
- prior <- do.call(rbind, lapply(samples, function(x) x$fisher_z_prior[,pcor_names]))
- post <- do.call(rbind, lapply(samples, function(x) x$fisher_z_post[,pcor_names]))
- Sigma0 <- cov(prior)
- Sigma1 <- cov(post)
- mu0 <- ERr1$equality$R_e %*% colMeans(prior)
- mu1 <- ERr1$equality$R_e %*% colMeans(post)
- s0 <- ERr1$equality$R_e %*% Sigma0 %*% t(ERr1$equality$R_e)
- s1 <- ERr1$equality$R_e %*% Sigma1 %*% t(ERr1$equality$R_e)
- BF <- mvtnorm::dmvnorm(t(ERr1$equality$r_e), mu0, sigma = s0, log = T) -
- mvtnorm::dmvnorm(t(ERr1$equality$r_e), mu1, sigma = s1, log = T)
- results <- list(BF = exp(BF))
- }
- if(type == "inequality"){
- if(is.null(hyp)){
- stop("inequality constrained hypotheses must be specified")
- }
- samples <- sampling(X, nu = nu, delta = delta, n_samples = n_samples, cores)
- ERr1 <- create_matrices(varnames = pcor_names, hyp = hyp)
- prior <- do.call(rbind, lapply(samples, function(x) x$fisher_z_prior[,pcor_names]))
- post <- do.call(rbind, lapply(samples, function(x) x$fisher_z_post[,pcor_names]))
- Sigma0 <- cov(prior)
- Sigma1 <- cov(post)
- mu0 <- ERr1$inequality$R_i %*% colMeans(prior)
- mu1 <- ERr1$inequality$R_i %*% colMeans(post)
- s0 <- ERr1$inequality$R_i %*% Sigma0 %*% t(ERr1$inequality$R_i)
- s1 <- ERr1$inequality$R_i %*% Sigma1 %*% t(ERr1$inequality$R_i)
- prior_prob <- mvtnorm::pmvnorm(lower = as.vector(ERr1$inequality$r_i), upper = Inf,mean = as.vector(mu0), sigma = s0)[1]
- post_prob <- mvtnorm::pmvnorm(lower = as.vector(ERr1$inequality$r_i), upper = Inf,mean = as.vector(mu1), sigma = s1)[1]
- results <- list(BF = post_prob / prior_prob)
- }
- if(type == "SD_ratio"){
- mat_bf <- mat_mu <- matrix(0, p, p)
- samples <- sampling(X, nu = nu, delta = delta, n_samples = n_samples, cores)
- prior <- do.call(rbind, lapply(samples, function(x) x$fisher_z_prior))
- post <- do.call(rbind, lapply(samples, function(x) x$fisher_z_post))
- mat_bf[upper.tri(mat_bf)] <- apply(post, 2, sd_helper, prior_at_zero)
- mat_bf[lower.tri(mat_bf)] <- t(mat_bf)[lower.tri(mat_bf)]
- mat_mu[upper.tri(mat_mu)] <- apply(post, 2, mean)
- mat_mu[lower.tri(mat_mu)] <- t(mat_mu)[lower.tri(mat_mu)]
- results <- list(mat_bf = mat_bf, mat_mu = mat_mu)
- }
- results
- }
- main <- bdgraph.sim(p = 20, n = 2000, graph = "AR1")
- X <- main$data
- colnames(X) <- LETTERS[1:20] # has to be named
- hyp <- c("(BA, CA, DB) = EC")
- pcor_names <- c("BA", "CA", "DB", "EC")
- # sd ratio
- fit <- equality_test(X = X, nu = 200, delta = 1, hyp = hyp,
- pcor_names = pcor_names, type = "SD_ratio",
- n_samples = 2500, cores = 4)
- fit
- GGMnonreg::Diagnostic(ifelse(eq_test$mat_bf > 3, 1, 0), main$G)
- # here all are equal to zero
- hyp <- c("(BA, CA, DB) = EC")
- pcor_names <- c("BA", "CA", "DB", "EC")
- main <- bdgraph.sim(p = 20, n = 2000, prob = 0)
- X <- main$data
- colnames(X) <- LETTERS[1:20]
- # equality
- fit <- equality_test(X = X, nu = 200, delta = 1, hyp = hyp,
- pcor_names = pcor_names, type = "equality",
- n_samples = 2500, cores = 4)
- fit
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement