Untitled

library(foreach)

########################
## to create matrices ##
########################
create_matrices <- function(varnames, hyp){

  #varnames <- variable.names(object) #provides the variable names of the linear model object, including intercept
  if(is.null(varnames)) stop("Please input proper linear model object")
  varnames <- gsub("(\\(Intercept\\))", "Intercept", varnames) #remove parentheses around intercept so these don't conflict later

  hyp2 <- gsub("[ \n]", "", hyp) #removes all whitespace
  hyp2 <- gsub("(\\(Intercept\\))", "Intercept", hyp2) #If hyp re. intercept input with surrounding parentheses, remove
  if(!grepl("^[0-9a-zA-Z><=,().-]+$", hyp2)) stop("Impermissable characters in hypotheses") #Self-explanatory. NEW parentehese
  if(grepl("[><=]{2,}", hyp2)) stop("Do not use combined comparison signs e.g., '>=' or '=='")

  step1 <- unlist(strsplit(hyp2, split = "[<>=,()]")) #split by special characters and unlist
  input_vars <- step1[grep("[a-zA-Z]+", step1)] #extract subunits that contain at least one letter
  #if(!all(input_vars %in% varnames)) stop("Hypothesis variable(s) not in object, check spelling") #Checks if input variables exist in lm-object

  framer <- function(x){ #As function because same code used once more later
    pos_comparisons <- unlist(gregexpr("[<>=]", x)) #Gives the positions of all comparison signs
    leftside <- rep(NA, length(pos_comparisons) + 1) #empty vector for loop below
    rightside <- rep(NA, length(pos_comparisons) + 1) #empty vector for loop below
    pos1 <- c(-1, pos_comparisons) #positions to extract data to the leftside of comparisons
    pos2 <- c(pos_comparisons, nchar(x) + 1) #positions to extract data to the rightside of comparisons
    for(i in seq_along(pos1)){
      leftside[i] <- substring(x, pos1[i] + 1, pos1[i+1] - 1) #Extract all variables or outcomes to the leftside of a comparison sign
      rightside[i] <- substring(x, pos2[i] + 1, pos2[i+1] - 1) #Extract all variables or outcomes to the rightside of a comparison sign
    }
    leftside <- leftside[-length(leftside)] #remove last element which is a NA due to loop formatting
    rightside <- rightside[-length(rightside)] #remove last element which is a NA due to loop formatting
    comparisons <- substring(x, pos_comparisons, pos_comparisons) #Extract comparison signs
    data.frame(left = leftside, comp = comparisons, right = rightside, stringsAsFactors = FALSE) #hypotheses as a dataframe
  }

  framed <- framer(hyp2) #hypotheses as a dataframe

  if(any(grepl(",", framed$left)) || any(grepl(",", framed$right))){ #Larger loop that deals with commas if the specified hypothesis contains any
    if(nrow(framed) > 1){
      for(r in 1:(nrow(framed)-1)){ #If a hypothesis has been specified with commas e.g., "X1 > 0, X2 > 0" or "(X1, X2) > X3"
        if(all.equal(framed$right[r], framed$left[r+1])){ #The right hand side of the hypothesis df will be equal to the next row left side
          if(substring(framed$right[r], 1, 1) == "(") { #If the first row begins with a ( as when "X1 > (X2, X3)" and opposed to "(X2, X3) > X1"
            framed$right[r] <- sub("),.+", ")", framed$right[r])#If so, remove everything to the right of the parenthesis on the right hand side
            framed$left[r+1] <- sub(".+),", "", framed$left[r +1])#and everything to the left of the parenthesis on the left hand side to correct the df
          } else{
            framed$right[r] <- sub(",.+", "", framed$right[r]) #else, remove everything to the right of the comma on the right hand side
            framed$left[r+1] <- sub("[^,]+,", "", framed$left[r+1]) #and everything to the left of the comma on the left hand side to correct the df
          }
        }
      }
    }

    commas_left <- framed$left[grep(",", framed$left)] #At this point all remaining elements that contain commas should also have parentheses, check this
    commas_right <- framed$right[grep(",", framed$right)] #Necessary to use is isTRUE below in case one of these contains no commas, and 'any' for several rows
    if(isTRUE(any(!grepl("\\(.+)", commas_left))) || isTRUE(any(!grepl("\\(.+)", commas_right))) || #Check so rows contain parenthesis
       isTRUE(any(grepl(").+", commas_left))) || isTRUE(any(grepl(").+", commas_right))) || #Check so parentheses are not followed by anything
       isTRUE(any(grepl(".+\\(", commas_left))) || isTRUE(any(grepl(".+\\(", commas_right)))) { #chekc so parentheses are not preceded by anything
      stop("Incorrect hypothesis syntax or extra character, check specification")
    }


    framed$left <- gsub("[()]", "", framed$left) #drop remaining parentheses
    framed$right <- gsub("[()]", "", framed$right)
    commas <- unique(c(grep(",", framed$left), grep(",", framed$right))) #Gives us the unique rows that still contain commas (multiple comparisons) from left or right columns

    if(length(commas) > 0){ #If there are any multiple comparisons e.g., (X1, X2) below loop separates these in
      multiples <- vector("list", length = length(commas)) #Empty vector to store results for each row in loop below

      for(r in seq_along(commas)){ #for each row containing commas
        several <- framed[commas,][r, ] #select row r

        if(several$comp == "="){ #If e.g., (X1, X2) = X3, convert to X1 = X2 = X3

          several <- c(several$left, several$right)
          separate <- unlist(strsplit(several, split = ",")) #split by special characters and unlist
          if(any(grepl("^$", several))) stop("Misplaced comma in hypothesis") #if empty element
          converted_equality <- paste(separate, collapse = "=") #convert to X1 = X2 = X3 shape
          multiples[[r]] <- framer(converted_equality) #hypotheses as a dataframe

        } else{ #If inequality comparison
          leftvars <- unlist(strsplit(several$left, split = ",")) #separate left hand var
          rightvars <- unlist(strsplit(several$right, split = ",")) #separate right hand vars
          if(any(grepl("^$", leftvars)) || any(grepl("^$", rightvars))) stop("Misplaced comma in hypothesis") #if empty element

          left <- rep(leftvars, length.out = length(rightvars)*length(leftvars)) #repeat each leftvars the number of rightvars
          right <- rep(rightvars, each = length(leftvars)) #complement for rightvars
          comp <- rep(several$comp, length(left)) #repeat the comparison a corresponding number of times

          multiples[[r]] <- data.frame(left = left, comp = comp, right = right, stringsAsFactors = FALSE) #save as df to be able to combine with 'framed'
        }
      }

      framed <- framed[-commas,] #remove old unfixed rows with commas
      multiples <- do.call(rbind, multiples) #make list into dataframe
      framed <- rbind(multiples, framed) #recombine into one dataframe
    }
  } #end comma loop

  equality <- framed[framed$comp == "=",]
  inequality <- framed[!framed$comp == "=",]

  #********Equality
  if(nrow(equality) == 0) { #If there are no '=' comparisons set to NULL
    list_equality <- NULL
  } else{
    outcomes <- suppressWarnings(apply(equality[, -2], 2, as.numeric)) #Convert left/right to numeric, non-numeric values (variables) coerced to NA
    outcomes <- matrix(outcomes, ncol = 2, byrow = FALSE) #Conversion to matrix in case there was only one row in outcomes
    if(any(rowSums(is.na(outcomes)) == 0)) stop("Value compared with value rather than variable, e.g., '2 = 2', check hypotheses")
    rows <- which(rowSums(is.na(outcomes)) < 2) #which rows contain a numeric value (comparing variable to value), that is not two NA-values
    specified <- t(outcomes[rows,]) #transpose so that specified comparison values are extracted in correct order below, e.g, in case when "X1 = 0, 2 = X2"
    specified <- specified[!is.na(specified)] #extract specified comparison values
    r_e <- ifelse(rowSums(is.na(outcomes)) == 2, 0, specified) #If variable = variable -> 0, if variable = value -> value
    r_e <- matrix(r_e) #convert to matrix

    var_locations <- apply(equality[, -2], 2, function(x) ifelse(x %in% varnames, match(x, varnames), 0)) #convert non-variables to 0 and others are given their locations in varnames
    var_locations <- matrix(var_locations, ncol = 2) #Necessary if only one comparison row

    R_e <- matrix(rep(0, nrow(equality)*length(varnames)), ncol = length(varnames)) #Create empty variable matrix

    for(i in seq_along(r_e)){ # for each row i in R_e, replace the columns specified in var_locations row i
      if(!all(var_locations[i, ] > 0)){ #If only one variable is specified (i.e., other one is set to zero)
        R_e[i, var_locations[i,]] <- 1 #Set this variable to 1 in R_e row i
      } else{ #If two variables specified
        R_e[i, var_locations[i,]] <- c(1, -1) #Set one column to 1 and the other to -1 in R_e row i
      }
    }
    list_equality <- list(R_e = R_e, r_e = r_e) #Note column 1 in R_e is for intercept
  }

  #********Inequality
  if(nrow(inequality) == 0) { #If there are no '>' or '<' comparisons set to NULL
    list_inequality <- NULL
  } else{
    outcomes <- suppressWarnings(apply(inequality[, -2], 2, as.numeric)) #Convert left/right to numeric, non-numeric values (variables) coerced to NA
    outcomes <- matrix(outcomes, ncol = 2, byrow = FALSE) #Conversion to matrix in case there was only one row in outcomes
    if(any(rowSums(is.na(outcomes)) == 0)) stop("Value compared with value rather than variable, e.g., '2 > 2', check hypotheses")
    cols <- which(rowSums(is.na(outcomes)) < 2) #which columns contain a numeric value (comparing variable to value), that is not two NA-values
    specified <- t(outcomes[cols,]) #transpose so that specified comparison values are extracted in correct order below
    specified <- specified[!is.na(specified)] #extract specified comparison values
    r_i <- ifelse(rowSums(is.na(outcomes)) == 2, 0, specified) #If variable = variable -> 0, if variable = value -> value
    r_i <- matrix(r_i) #convert to matrix

    leq <- which(inequality$comp == "<") #gives the rows that contain '<' (lesser or equal) comparisons
    var_locations <- apply(inequality[, -2], 2, function(x) ifelse(x %in% varnames, match(x, varnames), 0)) #convert non-variables to 0 and others are given their locations
    var_locations <- matrix(var_locations, ncol = 2) #Necessary if only one comparison row

    R_i <- matrix(rep(0, nrow(inequality)*length(varnames)), ncol = length(varnames)) #Create empty variable matrix

    for(i in seq_along(r_i)){ # for each row i in R_i, replace the columns specified in var_locations row i
      if(!all(var_locations[i, ] > 0)){ #If only one variable is specified (i.e., other one is set to zero)

        if(var_locations[i, 1] == 0){ #If first value is not the variable (i.e, a comparison value)
          if(i %in% leq){#Then if comparison is 'lesser or equal'
            value <-  1  #set variable value to 1
          } else{ #else if comparison 'larger or equal'
            r_i[i] <- r_i[i]*-1 #invert comparison value
            value <- -1 # set variable value to -1
          }
        } else{ #If first value is the variable (i.e., the second is a comparison value)
          if(i %in% leq){ #then if comparison is 'lesser or equal'
            r_i[i] <- r_i[i]*-1 #invert comparison value
            value <-  -1  #set variable value to -1
          } else{
            value <- 1 #else if comparison 'larger or equal' set to 1
          }
        }

        R_i[i, var_locations[i,]] <- value #Set this variable to 1/-1 in R_i row i

      } else{ #If two variables specified
        value <- if(i %in% leq) c(-1, 1) else c(1, -1) #If comparison is 'leq' take var2 - var1, if 'larger or equal' take var1 - var2
        R_i[i, var_locations[i,]] <- value #Set one column to 1 and the other to -1 in R_i row i
      }
    }

    list_inequality<- list(R_i = R_i, r_i = r_i) #Note column 1 in R_i is for intercept
  }

  matrices <- list(equality = list_equality, inequality = list_inequality); matrices #final output

}

###############################
##### functions to fit ########
###############################

sampling_helper = function(X,  nu, delta,  n_samples){
  # number of variables
  p <- ncol(X)
  # number of observations
  n <- nrow(X)
  # number of partial correlations
  pcors <- (p * (p - 1)) / 2
  # names for the partial correlations
  mat_name <- matrix(unlist(lapply(colnames(X), function(x) paste(x, colnames(X), sep = ""))), p , p)
  mat_name <- mat_name[upper.tri(mat_name)]
  # center the data
  Xhat <- X - rep(1,n)%*%t(apply(X,2,mean))
  # scatter matrix
  S <- t(Xhat)%*%Xhat
  # storage
  pcor_store <- prior_store <- matrix(NA, nrow = n_samples, ncol = pcors)
  inv_cov_store <-  array(NA, c(p, p, n_samples))
  # initial values
  Psi <- b_inv <- diag(p)

for(i in 1:n_samples){
  # draw from posterior
  post <- post_helper(S = S, n = n, nu = nu, p = p, delta = delta, Psi = Psi, b_inv = b_inv)
  # store partials
  pcor_store[i,] <- post$pcors_post
  # store the inverse
  inv_cov_store[,,i] <- post$sigma_inv
  # draw from prior and store
  prior_store[i,] <- prior_helper(nu = nu, delta = delta, p = p)
  # Psi
  Psi <- post$Psi
}

# transform posterior samples
fisher_z_post_store <- apply(pcor_store, 2, fisher_z)
fisher_z_prior_store <- apply(prior_store, 2, fisher_z)

colnames(fisher_z_prior_store) <- mat_name
colnames(fisher_z_post_store) <- mat_name

# returned list
list(fisher_z_post = fisher_z_post_store,
     pcor_post = pcor_store,
     inv_cov_post = inv_cov_store,
     pcor_prior = prior_store,
     fisher_z_prior = fisher_z_prior_store)
}

prior_helper <- function(nu, p, delta){
  # sample from inverse Wishart
  inv_wish_prior <- solve(rWishart(1, df =  nu, diag(p))[,,1])
  # sample from Wishart
  sigma_inv_prior <- rWishart(1, df = p - 1 + delta, inv_wish_prior)[,,1]
  # partical correlation matrix
  pcor_mat_prior <- - diag(1/sqrt(diag(sigma_inv_prior)))%*%sigma_inv_prior%*%diag(1/sqrt(diag(sigma_inv_prior)))
  pcors_prior <- pcor_mat_prior[upper.tri(pcor_mat_prior)]
  pcors_prior
}


post_helper <- function(S, n, nu, p, delta, Psi, b_inv){
  # precision matrix
  sigma_inv <- rWishart(1, delta + n - 1, solve(Psi+S))[,,1]
  # Psi
  Psi <- rWishart(1, nu + delta + p - 2, solve(sigma_inv + b_inv))[,,1]
  # partial correlation matrix
  pcor_mat <- - diag(1/sqrt(diag(sigma_inv)))%*%sigma_inv%*%diag(1/sqrt(diag(sigma_inv)))
  pcors_post = pcor_mat[upper.tri(pcor_mat)]
  # returned list
  list(pcors_post = pcors_post, sigma_inv = sigma_inv, Psi = Psi)
}

sampling <- function(X, nu, delta, n_samples = 20000, cores = 4){
  # register parallel
  doSNOW::registerDoSNOW(parallel::makeCluster(cores))
  # samples for each "chain"
  samps <- rep(round(n_samples / cores), cores)
  chains = cores
  # sample from priors and posteriors
  samples <- foreach(i = 1:chains, .export = c("fisher_z", "sampling_helper",
                                              "prior_helper", "post_helper")) %dopar%{
  sampling_helper(X = X, nu = nu, delta = delta,  n_samples = samps[i])
  }
}
fisher_z <- function(rho){
  .5*log((1+rho)/(1-rho))
}

sd_helper <- function(post_samples, prior_at_zero){
  prior_at_zero /  dnorm(0, mean(post_samples), sd(post_samples))
}

equality_test <- function(X, nu, delta, type, pcor_names, hyp = NULL, n_samples, cores){

  if(is.null(colnames(X))){
    stop("The variables must be named. These names cannot included numbers")

  }

  if(type == "equality"){
    if(is.null(hyp)){
      stop("equality constrained hypotheses must be specified")
      }
    samples <- sampling(X, nu = nu, delta = delta, n_samples = n_samples, cores)

    ERr1 <- create_matrices(varnames = pcor_names, hyp = hyp)

    prior <- do.call(rbind, lapply(samples, function(x)  x$fisher_z_prior[,pcor_names]))
    post <- do.call(rbind, lapply(samples, function(x)  x$fisher_z_post[,pcor_names]))

    Sigma0 <- cov(prior)
    Sigma1 <- cov(post)

    mu0 <- ERr1$equality$R_e %*% colMeans(prior)
    mu1 <- ERr1$equality$R_e %*% colMeans(post)

    s0 <- ERr1$equality$R_e %*% Sigma0 %*% t(ERr1$equality$R_e)
    s1 <- ERr1$equality$R_e %*% Sigma1 %*% t(ERr1$equality$R_e)

    BF <- mvtnorm::dmvnorm(t(ERr1$equality$r_e),  mu0, sigma = s0, log = T) -
          mvtnorm::dmvnorm(t(ERr1$equality$r_e),  mu1, sigma = s1, log = T)

    results <- list(BF = exp(BF))
  }
  if(type == "inequality"){
    if(is.null(hyp)){
      stop("inequality constrained hypotheses must be specified")
    }
    samples <- sampling(X, nu = nu, delta = delta, n_samples = n_samples, cores)

    ERr1 <- create_matrices(varnames = pcor_names, hyp = hyp)

    prior <- do.call(rbind, lapply(samples, function(x)  x$fisher_z_prior[,pcor_names]))
    post <- do.call(rbind, lapply(samples, function(x)  x$fisher_z_post[,pcor_names]))

    Sigma0 <- cov(prior)
    Sigma1 <- cov(post)

    mu0 <- ERr1$inequality$R_i %*% colMeans(prior)
    mu1 <- ERr1$inequality$R_i %*% colMeans(post)

    s0 <- ERr1$inequality$R_i %*% Sigma0 %*% t(ERr1$inequality$R_i)
    s1 <- ERr1$inequality$R_i %*% Sigma1 %*% t(ERr1$inequality$R_i)

    prior_prob <- mvtnorm::pmvnorm(lower = as.vector(ERr1$inequality$r_i), upper = Inf,mean = as.vector(mu0),  sigma = s0)[1]
    post_prob <- mvtnorm::pmvnorm(lower = as.vector(ERr1$inequality$r_i), upper = Inf,mean = as.vector(mu1),  sigma = s1)[1]

    results <- list(BF = post_prob / prior_prob)
  }
  if(type == "SD_ratio"){
    mat_bf <- mat_mu <- matrix(0, p, p)

    samples <- sampling(X, nu = nu, delta = delta, n_samples = n_samples, cores)

    prior <- do.call(rbind, lapply(samples, function(x)  x$fisher_z_prior))
    post <- do.call(rbind, lapply(samples, function(x)  x$fisher_z_post))

    mat_bf[upper.tri(mat_bf)] <- apply(post, 2, sd_helper, prior_at_zero)
    mat_bf[lower.tri(mat_bf)] <- t(mat_bf)[lower.tri(mat_bf)]

    mat_mu[upper.tri(mat_mu)] <- apply(post, 2, mean)
    mat_mu[lower.tri(mat_mu)] <- t(mat_mu)[lower.tri(mat_mu)]
    results <- list(mat_bf = mat_bf, mat_mu = mat_mu)
    }
  results
}


main <- bdgraph.sim(p = 20, n = 2000, graph = "AR1")
X <- main$data
colnames(X) <- LETTERS[1:20] # has to be named


hyp <- c("(BA, CA, DB) = EC")
pcor_names <- c("BA", "CA", "DB", "EC")


# sd ratio
fit <-  equality_test(X = X, nu = 200, delta = 1, hyp = hyp,
                          pcor_names = pcor_names, type = "SD_ratio",
                          n_samples = 2500, cores = 4)


fit

GGMnonreg::Diagnostic(ifelse(eq_test$mat_bf > 3, 1, 0), main$G)


# here all are equal to zero
hyp <- c("(BA, CA, DB) = EC")
pcor_names <- c("BA", "CA", "DB", "EC")

main <- bdgraph.sim(p = 20, n = 2000, prob = 0)
X <- main$data
colnames(X) <- LETTERS[1:20]


# equality
fit <-  equality_test(X = X, nu = 200, delta = 1, hyp = hyp,
                      pcor_names = pcor_names, type = "equality",
                      n_samples = 2500, cores = 4)

fit