paper_oat

pass = function(data, var, type){
  # Function for passing variables to different types
  # Arguments:
  # data = the dataset
  # var = the variable (e.g, var = "GY" or var = c("GY", "NGP"))
  var = var
  data[var] = lapply(data[var], type)
  return(data)
}


theme_journal = function () {
  ## Helper ggplot theme function
  theme_bw() %+replace%
    theme(axis.ticks.length = unit(.2, "cm"),
          axis.text = element_text(size = 12, colour = "black"),
          axis.title = element_text(size = 12, colour = "black"),
          axis.ticks = element_line(colour = "black"),
          panel.border = element_rect(colour = "black", fill=NA, size=0.5),
          panel.grid =  element_blank())
}

theme_journal2 = function(){
  ## Helper ggplot theme function
  theme(axis.ticks.length = unit(.2, "cm"),
        axis.text = element_text(family = "serif", size = 8, colour = "black"),
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.25 ),
        axis.title = element_text(family = "serif", size = 8, colour = "black"),
        axis.ticks = element_line(colour = "black"),
        plot.margin = margin(0.2, 0.2, 0.2, 0.7, "cm"),
        panel.border = element_rect(colour = "black", fill=NA, size=0.5),
        panel.grid.major.x = element_blank(), panel.grid.major.y = element_blank(),
        panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(),
        panel.spacing =  unit(0, "lines"))
}

mixed_oat = function(dataset, colvar = 6, nvariables = 16){
  #############################################################################
  # Function for mixed model applied in a dataset with several variables
  # Dataset mus be the following structure of collumns:
  # Year (factor), Fungicide (factor), Cultivar (factor), Block (factor) and
  # the response variables.. In or case, 16 numeric variables.
  # Arguments:
  # dataset = the data
  # colvar = the number of the column with the first variable
  # nvariables = the total number of variables to be analysed
  #############################################################################
  nlin = 0
  # function for labeling significant F value for fixed effects.
  sign = function(data){
    pval =  fixed[["Pr(>F)"]][1:3]
    sig = symnum(
      pval, corr = FALSE, na = FALSE,
      cutpoints = c(0, 0.001, 0.01, 0.05, 1),
      symbols = c("***", "**", "*", "ns")
    )
    return(sig)
  }
  # function for labeling significant Chi-square in the LRT test.
  sign2 = function(data){
    pval =  LRT[["Pr(>Chisq)"]][1:5]
    sig = symnum(
      pval, corr = FALSE, na = FALSE,
      cutpoints = c(0, 0.001, 0.01, 0.05, 1),
      symbols = c("***", "**", "*", "ns")
    )
    return(sig)
  }
  # Creating dataframes for saving the values
  PREDOAT = dataset[,c( "ANO", "FUNG", "CULTIVAR", "Bloco")]
  LRT_res = data.frame(matrix(ncol = 6, nrow = nvariables))
  names(LRT_res) = c("VAR", "B/A", "CULT", "CUL:ANO", "CUL:FUN", "C:F:A")
  VARCOMP = data.frame(matrix(ncol = 7, nrow = nvariables))
  names(VARCOMP) = c("VAR", "C:A:F", "C:A", "C:F", "C", "B/A", "RES")
  plots = list()
  ###################################################
  pb <- winProgressBar(title = "Anova with mixed models",
                       label = "processing data",
                       min = 0,
                       width = 400,
                       max = nvariables,
                       initial = 0)
  for (i in colvar:ncol(dataset)) {
    varname = names(dataset)[i]
    nlin = nlin + 1

    Complete = as.formula(paste0(paste0(varname,"  ~ ",
                                        "ANO * FUNG +
                                        (1|BWE) +
                                        (1|CULTIVAR) +
                                        (1|CULTIVAR:ANO) +
                                        (1|CULTIVAR:FUNG) +
                                        (1|CULTIVAR:ANO:FUNG)")))
    Complete = lmerTest::lmer(Complete, data = dataset)
    random = broom::tidy(Complete, effects = "ran_pars")
    VARCOMP[nlin,1] =  varname
    VARCOMP[nlin,2] =  as.numeric(random[1,3]^2)
    VARCOMP[nlin,3] =  as.numeric(random[2,3]^2)
    VARCOMP[nlin,4] =  as.numeric(random[3,3]^2)
    VARCOMP[nlin,5] =  as.numeric(random[4,3]^2)
    VARCOMP[nlin,6] =  as.numeric(random[5,3]^2)
    VARCOMP[nlin,7] =  as.numeric(random[6,3]^2)
    if (i == colvar){
      # saving the residuals for future QQ-plot
      df = cbind(aveias[,c(1:4)], var = aveias[(match(varname, names(aveias)))]) %>%
        mutate(res = residuals(Complete)) %>%
        group_by(ANO, FUNG, CULTIVAR) %>%
        summarise_all(., funs(mean))
      plots[[paste(varname)]] =   ggplot(df, aes(sample = res)) +
        stat_qq(size = 1) +
        stat_qq_line(col = "red") +
        ggtitle(paste(varname)) +
        theme_journal() +
        theme(aspect.ratio = 1,
              axis.title = element_text(size = 8),
              axis.text = element_text(size = 8),
              plot.title = element_text(size = 10))

      ############ fixed effects #################
      fixed = anova(Complete, ddf="Kenward-Roger")
      fixed2 = round(fixed[5], 3)
      fixed2[["F value"]] = paste0(fixed2[["F value"]], sign(fixed))
      names(fixed2)[which(colnames(fixed2) == "F value")] = varname
      fixed2 = data.frame(fixed2)

      # Prdicted values of the model
      PREDOAT = mutate(PREDOAT,
                       var = predict(Complete))
      names(PREDOAT)[which(colnames(PREDOAT) == "var")] = varname
      data = get_model_data(Complete, type = "re")
      # BLUP for three-way interaction
      blupCAF = data.frame(do.call('rbind', strsplit(as.character(data[[1]]$term),':',fixed = TRUE)))
      colnames(blupCAF) = c("CULT", "ANO", "FUNG")
      blupCAF = mutate(blupCAF,
                       var = get_model_data(Complete, type = "re")[[1]]$estimate)
      names(blupCAF)[which(colnames(blupCAF) == "var")] = varname
      # BLUP for cultivar x year interaction
      blupCA = data.frame(do.call('rbind', strsplit(as.character(data[[2]]$term),':',fixed = TRUE)))
      colnames(blupCA) = c("CULT", "ANO")
      blupCA = mutate(blupCA,
                      var = get_model_data(Complete, type = "re")[[2]]$estimate)
      names(blupCA)[which(colnames(blupCA) == "var")] = varname
      # BLUP for cultivar x fungicide interaction
      blupCF = data.frame(do.call('rbind', strsplit(as.character(data[[3]]$term),':',fixed = TRUE)))
      colnames(blupCF) = c("CULT", "FUNG")
      blupCF = mutate(blupCF,
                      var = get_model_data(Complete, type = "re")[[3]]$estimate)
      names(blupCF)[which(colnames(blupCF) == "var")] = varname
      # BLUP for cultivars
      blupC = data.frame(do.call('rbind', strsplit(as.character(data[[4]]$term),':',fixed = TRUE)))
      colnames(blupC) = c("CULT")
      blupC = mutate(blupC,
                     var = get_model_data(Complete, type = "re")[[4]]$estimate)
      names(blupC)[which(colnames(blupC) == "var")] = varname
    }

    if (i > colvar){
      # saving the residuals for future QQ-plot
      df = cbind(aveias[,c(1:4)], var = aveias[(match(varname, names(aveias)))]) %>%
        mutate(res = residuals(Complete)) %>%
        group_by(ANO, FUNG, CULTIVAR) %>%
        summarise_all(., funs(mean))
      plots[[paste(varname)]] =   ggplot(df, aes(sample = res)) +
        stat_qq(size = 1) +
        stat_qq_line(col = "red")+
        ggtitle(paste(varname))+
        theme_journal()+
        theme(aspect.ratio = 1,
              axis.title = element_text(size = 8),
              axis.text = element_text(size = 8),
              plot.title = element_text(size = 10))

      # Significance of fixed effects
      fixed = anova(Complete, ddf="Kenward-Roger")
      fixed3 = round(fixed[5], 3)
      fixed2[["F value"]] = paste0(fixed3[["F value"]], sign(fixed))
      names(fixed2)[which(colnames(fixed2) == "F value")] = varname

      PREDOAT = mutate(PREDOAT,
                       var = predict(Complete))
      names(PREDOAT)[which(colnames(PREDOAT) == "var")] = varname
      ###########
      blupCAF = mutate(blupCAF,
                       var = get_model_data(Complete, type = "re")[[1]]$estimate)
      names(blupCAF)[which(colnames(blupCAF) == "var")] = varname
      ###########
      blupCA = mutate(blupCA,
                      var = get_model_data(Complete, type = "re")[[2]]$estimate)
      names(blupCA)[which(colnames(blupCA) == "var")] = varname
      ###########
      blupCF = mutate(blupCF,
                      var = get_model_data(Complete, type = "re")[[3]]$estimate)
      names(blupCF)[which(colnames(blupCF) == "var")] = varname
      ###########
      blupC = mutate(blupC,
                     var = get_model_data(Complete, type = "re")[[4]]$estimate)
      names(blupC)[which(colnames(blupC) == "var")] = varname
    }

    # Deviance analysis for random effects
    LRT = ranova(Complete)[2:6,]
    LRT_res[nlin,1] =  varname
    LRT_res[nlin,2:6] = paste0(round(t(LRT)[4,],4), sign2(LRT))

    setWinProgressBar(pb, nlin, title=paste("Analizyng the variable", varname," - ",
                                            round(nlin/nvariables*100,1),"% Concluded -"))

  }
  # Creating the final files and removing temporary dataframes
  fixed = fixed2 %>% t() %>% as.data.frame()
  PREDOAT = PREDOAT %>%
    group_by(ANO, FUNG, CULTIVAR) %>%
    summarise_all(funs(mean(., na.rm = T))) %>%
    subset(select = -c(Bloco))
  close(pb)
  return(list(blupC = blupC,
              blupCA = blupCA,
              blupCAF = blupCAF,
              blupCF = blupCF,
              fixed = fixed,
              LRT_res = LRT_res,
              resplot = plots,
              PREDOAT = PREDOAT,
              VARCOMP = VARCOMP))

}


distdend = function(data,
                    scale = FALSE,
                    selvar = FALSE,
                    results = TRUE,
                    dendrogram = TRUE,
                    pvclust = FALSE,
                    verbose = TRUE,
                    nboot = 1000,
                    alpha = 0.95,
                    distmethod = "euclidean",
                    clustmethod = "average",
                    type = "rectangle",
                    nclust = NULL,
                    ...){
  # Function for computing distances and selecting variables for hierarchical clustering
  # Arguments:
  # data: the dataset (must be a two-way table with cultivars in the rows and variables in the collum)
  # scale: scale the variables (zero mean and unit variance)?
  # selvar: run the algorithm for selecting variables?
  # results: save the results to an object?
  # dendogram: should a dendrogram be created?
  # pvclust: should a bootstrap be used for indentifying the number of clusters? (see ?pvclust)
  # verbose: should the procedure run silently?
  # nboot: the number of bootstrap permutations if 'pvclust = TRUE'
  # alpha: the confidence level for idenfifying the groups
  # distmethod, clustmethod: the distance metric and the clustering method to be used.
  # nclust: the number of clusters to cut the dendrogram.
    if (scale == TRUE & selvar == TRUE){
    stop("The variable selection algorithm can only be executed with non-standardized data. Select 'scale = FALSE'. ")
  }
  if (scale == TRUE){
    data = data.frame(scale(data))
  } else{data = data}

  if (selvar == TRUE){
    n = (ncol(data)-1)
    statistics = data.frame(matrix(nrow = n, ncol = 6))
    ModelEstimates = list()
    modelcode = 1
    namesv = "-"
    original = data
    dein = factoextra::get_dist(original, method = distmethod, diag = T, upper = T)
    for (i in 1:n){
      de = factoextra::get_dist(data, method = distmethod, diag = T, upper = T)
      hc = hclust(de, method = clustmethod)
      d2 = cophenetic(hc)
      cof = cor(d2, de)
      mant = ade4::mantel.rtest(de, dein, nrepet = 1000)
      mantc = mant$obs
      mantp = mant$pvalue
      evect = data.frame(t(prcomp(data)$rotation))
      var = abs(evect)[nrow(evect),]
      names = apply(var, 1, function(x) which(x == max(x)))
      npred = ncol(data)
      statistics[i,1] = paste("Model",modelcode)
      statistics[i,2] = namesv
      statistics[i,3] = cof
      statistics[i,4] = npred
      statistics[i,5] = mantc
      statistics[i,6] = mantp
      mat = as.matrix(de)
      mat = as.data.frame(mat)
      Results = list(nvars = npred,
                     excluded = namesv,
                     namevars = names(data),
                     distance = mat,
                     cormantel = mantc,
                     pvmant = mantp)
      namesv = names(data[names])
      data2 = data.frame(data[-(match(c(namesv), names(data)))])
      data = data2
      ModelEstimates[[paste("Model",modelcode)]] = Results

      names(statistics) = c("Model", "excluded", "cophenetic", "remaining", "cormantel", "pvmantel")
      if(verbose == TRUE){
        cat(paste("Calculating model ",modelcode, " with ", npred,
                  " variables.", "'",namesv,"'", "excluded in this step (",
                  round(modelcode/n*100,1),"%).", "\n"))
      }
      modelcode = modelcode + 1

    }
    cat("Done!","\n")
    cat("\n\n")
    cat("--------------------------------------------------------------------------","\n")
    cat("Summary of the adjusted models","\n")
    cat("--------------------------------------------------------------------------","\n")
    print(statistics)
    cat("--------------------------------------------------------------------------")

    cofgrap = ggplot2::ggplot(statistics, ggplot2::aes(x = remaining, y = cophenetic))+
      ggplot2::geom_point(size = 3)+
      ggplot2::theme_bw()+
      ggplot2::geom_line(size = 1)+
      ggplot2::theme(axis.ticks.length = unit(.2, "cm"),
                     axis.text = ggplot2::element_text(size = 12, colour = "black"),
                     axis.title = ggplot2::element_text(size = 12, colour = "black"),
                     axis.ticks = ggplot2::element_line(colour = "black"),
                     plot.margin = margin(0.5, 0.5, 0.2, 0.6, "cm"),
                     axis.title.y = ggplot2::element_text(margin = margin(r=16)),
                     legend.title = ggplot2::element_blank(),
                     legend.text = ggplot2::element_text(size=12),
                     panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
                     panel.grid.major.x = ggplot2::element_blank(),
                     panel.grid.major.y = ggplot2::element_blank(),
                     panel.grid.minor.x = ggplot2::element_blank(),
                     panel.grid.minor.y = ggplot2::element_blank())+
      ggplot2::labs(x = "Number of variables",
                    y = "Cophenetic correlation coefficient")

    model = statistics$Model[which.max(statistics$cophenetic)]
    predvar = ModelEstimates[[model]]$namevars
    data = data.frame(original[(match(c(predvar), names(original)))])
    cat("\n\n")
    cat("Suggested variables to be used in the analysis","\n")
    cat("--------------------------------------------------------------------------","\n")
    cat("The distance were calculated based on the variables in ", model,".", "\n",
        "The variables included in this model were...","\n",
        predvar,"\n")
    cat("--------------------------------------------------------------------------")

  } else{data = data}

  if(pvclust == T & distmethod == "gower"){
    stop("The pvclust procedure cannot be used using Gower's distance.")
  } else{
    if(sum(sapply(data,is.numeric))!= ncol(data) & distmethod != "gower" ){
      stop("All variables need to be numeric! ")
    } else{
      if (pvclust == TRUE){
        set.seed(123)

        if (distmethod == "pearson" | distmethod == "kendall" | distmethod == "spearman" ){
          distmethod2 = "correlation"
        } else{distmethod2 = distmethod}

        dend = pvclust::pvclust(data.frame(t(data)),
                                method.dist = distmethod2,
                                method.hclust = clustmethod,
                                nboot = nboot)

        plot(dend, hang = -1, cex = 0.5)
        pvclust::pvrect(dend, alpha = alpha)
        pval = dend$edges

      }

      if (distmethod == "gower"){
        if(sum(sapply(data,is.numeric))== ncol(data) & distmethod == "gower" ){
          stop("You are using the Gower's distance, but all variables are numeric !. Use the pass () function to transform the variables. ")
        }
        de = StatMatch::gower.dist(datapc)
        rownames(de) = rownames(datapc)
        colnames(de) = rownames(datapc)
        de = as.dist(de)
      } else{

        de = factoextra::get_dist(data, method = distmethod, diag = T, upper = T)
      }
      mat = as.matrix(de)
      mat = as.data.frame(mat)

      if(is.null(nclust)==F){
        hc = factoextra::hcut(de, hc_method = clustmethod, k = nclust)
      } else{
        hc = hclust(de, method = clustmethod)
      }
      out  = factoextra::fviz_dend(hc,
                                   main = "",
                                   k = nclust,
                                   type = type,
                                   ...)
      d2 = cophenetic(hc)
      cof = cor(d2, de)
      k = 1.25
      pcorte = mean(hc$height) + k * sd(hc$height)

      if(is.null(nclust)==F){
        ctree = cutree(hc,nclust)
        cl.stats = fpc::cluster.stats(d = hc, clustering = ctree)
        cl.names = list()
        cl.code = 0
        for(k in 1:nclust){
          res = rownames(data)[ctree == k]
          cl.code = cl.code + 1
          cl.names[[paste("Cluster",cl.code)]] = res
        }
      } else {
        cl.stats = NULL
        cl.names = NULL}
      if (pvclust == TRUE){
        pval = pval
        dend = dend
      } else{
        pval = NULL
        dend = NULL
      }

      if (results == TRUE){
        if(dendrogram == TRUE){
          if(selvar == TRUE){
            return(structure(list(statistics = statistics,
                                  models = ModelEstimates,
                                  cofgrap = cofgrap,
                                  graphic = out,
                                  hc = hc,
                                  distances = mat,
                                  cl.stats = cl.stats,
                                  cl.names = cl.names,
                                  cophenetic = cof,
                                  cut = pcorte,
                                  pval = pval,
                                  dend = dend),
                             class = "distdend"))
          }else{
            return(structure(list(graphic = out,
                                  hc = hc,
                                  distances = mat,
                                  cl.stats = cl.stats,
                                  cl.names = cl.names,
                                  cophenetic = cof,
                                  cut = pcorte,
                                  pval = pval,
                                  dend = dend),
                             class = "distdend"))
          }
        } else{
          if(selvar == TRUE){
            return(structure(list(statistics = statistics,
                                  models = ModelEstimates,
                                  distances = mat,
                                  cl.stats = cl.stats,
                                  cl.names = cl.names,
                                  cophenetic = cof,
                                  cut = pcorte,
                                  pval = pval,
                                  dend = dend),
                             class = "distdend"))
          }else{
            return(structure(list(distances = mat,
                                  cl.stats = cl.stats,
                                  cl.names = cl.names,
                                  cophenetic = cof,
                                  cut = pcorte,
                                  pval = pval,
                                  dend = dend),
                             class = "distdend"))
          }
        }
      } else{
        if(dendrogram == TRUE){
          return(out)
        } else{
          stop("Invalid arguments. At least one of  'dendrogram' or' results' argument must be 'TRUE'. Nothing was generated. ")
        }
      }

    }
  }
}


pairs.mantel = function(data = list(x, y, ...),
                        nrepet = 1000,
                        names = NULL,
                        prob = 0.05,
                        diag = FALSE,
                        export = FALSE,
                        main = "auto",
                        file.type = "pdf",
                        file.name = NULL,
                        width = 8,
                        height = 7,
                        resolution = 300,
                        size.point = 0.5,
                        shape.point = 19,
                        alpha.point = 1,
                        fill.point = NULL,
                        col.point = "black",
                        minsize = 2,
                        maxsize = 3,
                        signcol = "green",
                        alpha = 0.15,
                        diagcol = "gray",
                        col.up.panel = "gray",
                        col.lw.panel = "gray",
                        col.dia.panel = "black",
                        pan.spacing = 0.15,
                        digits = 2){
  # Function for computing Mantel's correlation in a list of correlation matrices or a list of a 'distdend' objects.
  # A correlation-like plot is returned
  w = c(21:25)
  if(is.null(fill.point)==TRUE && any(w == shape.point)){
    stop("If 'shape.point' is a value between 21 and 25, you must provide a color for fill the shape using the argument 'fill.point.'")
  }

  if (length(which(sapply(data, function(x)  psych::isCorrelation(x))==FALSE)) != length(data)){
    if(length(which(sapply(data, function(x) identical(nrow(x), ncol(x)))==TRUE)) != length(data)){
      stop("All matrices in the list must be a square matrix. Please, check and fix.")
    }
    if (length(unique(unique(sapply(data, function(x) dim(x)))[1,1:length(data)])) != 1){
      stop("All matrices in the list must be the same dimension. Please, check and fix.")
    }

    for(i in 1:length(data)){
      if(i == 1){
        Dataset = data.frame(var = as.vector(t(data[[1]])[lower.tri(data[[1]],diag=F)]))
        if(is.null(names)){
          names(Dataset)[which(colnames(Dataset) == "var")] = paste0("Matrix 1")
        } else {names(Dataset)[which(colnames(Dataset) == "var")] = names[1]}
      }
      if(i >= 2){
        Dataset = dplyr::mutate(Dataset,
                                var = as.vector(t(data[[i]])[lower.tri(data[[i]],diag=F)]))
        if(is.null(names)){
          names(Dataset)[which(colnames(Dataset) == "var")] = paste0("Matrix ", i)
        } else{names(Dataset)[which(colnames(Dataset) == "var")] = names[i]}
      }
    }

    dim = nrow(data[[1]])
  } else {


    if(length(which(sapply(data, function(x) identical(nrow(x[["distances"]]), ncol(x[["distances"]])))==TRUE)) != length(data)){
      stop("All matrices in the list must be a square matrix. Please, check and fix.")
    }
    if (length(unique(unique(sapply(data, function(x) dim(x[["distances"]])))[1,1:length(data)])) != 1){
      stop("All matrices in the list must be the same dimension. Please, check and fix.")
    }


    for(i in 1:length(data)){
      if(i == 1){
        Dataset = data.frame(var = as.vector(t(data[[1]][["distances"]])[lower.tri(data[[1]][["distances"]],diag=F)]))
        if(is.null(names)){
          names(Dataset)[which(colnames(Dataset) == "var")] = paste0("Matrix 1")
        } else {names(Dataset)[which(colnames(Dataset) == "var")] = names[1]}
      }
      if(i >= 2){
        Dataset = dplyr::mutate(Dataset,
                                var = as.vector(t(data[[i]][["distances"]])[lower.tri(data[[i]][["distances"]],diag=F)]))
        if(is.null(names)){
          names(Dataset)[which(colnames(Dataset) == "var")] = paste0("Matrix ", i)
        } else{names(Dataset)[which(colnames(Dataset) == "var")] = names[i]}
      }
    }

    dim = nrow(data[[1]][["distances"]])
  }

  my_custom_cor = function(data, mapping, color = I("black"), sizeRange = c(minsize, maxsize), ...) {
    # get the x and y data to use the other code
    x = GGally::eval_data_col(data, mapping$x)
    y = GGally::eval_data_col(data, mapping$y)

    D = matrix(nrow = dim, ncol = dim)
    D[lower.tri(D, diag = F)] = x
    D[upper.tri(D, diag = F)] = x
    diag(D) = 0

    D2 = matrix(nrow = dim, ncol = dim)
    D2[lower.tri(D2, diag = F)] = y
    D2[upper.tri(D2, diag = F)] = y
    diag(D2) = 0

    ct <- ade4::mantel.randtest(as.dist(D), as.dist(D2), nrepet = nrepet)

    sig = symnum(
      ct$pvalue, corr = FALSE, na = FALSE,
      cutpoints = c(0, 0.001, 0.01, 0.05, 1),
      symbols = c("***", "**", "*", "")
    )


    r = unname(ct$obs)
    rt = format(r, digits = digits)[1]

    # since we can't print it to get the strsize, just use the max size range
    cex = max(sizeRange)

    # helper function to calculate a useable size
    percent_of_range = function(percent, range) {
      percent * diff(range) + min(range, na.rm = TRUE)
    }

    # plot the cor value
    GGally:: ggally_text(
      label = as.character(rt),
      mapping = aes(),
      xP = 0.5, yP = 0.5,
      size = I(percent_of_range(cex * abs(r), sizeRange)),
      color = color,
      ...
    ) +
      # add the sig stars
      geom_text(
        aes_string(
          x = 0.8,
          y = 0.8
        ),
        label = sig,
        size = I(cex),
        color = color,
        ...
      ) +
      # remove all the background stuff and wrap it with a dashed line
      theme_classic() +
      theme(
        panel.background = element_rect(color = col.up.panel),
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x = element_blank()
      )
  }

  my_custom_smooth = function(data, mapping, ...) {
    x = GGally::eval_data_col(data, mapping$x)
    y = GGally::eval_data_col(data, mapping$y)

    D = matrix(nrow = dim, ncol = dim)
    D[lower.tri(D, diag = F)] = x
    D[upper.tri(D, diag = F)] = x
    diag(D) = 0

    D2 = matrix(nrow = dim, ncol = dim)
    D2[lower.tri(D2, diag = F)] = y
    D2[upper.tri(D2, diag = F)] = y
    diag(D2) = 0

    ct <- ade4::mantel.randtest(as.dist(D), as.dist(D2), nrepet = nrepet)

    pval <- unname(ct$pvalue)
    p = ggplot(data = data, mapping = mapping)

    if(is.null(fill.point)==FALSE){
      p = p + geom_point(color = I(col.point), fill = fill.point, shape = shape.point, size = size.point, alpha = alpha.point)
    } else{
      p = p + geom_point(color = I(col.point), shape = shape.point, size = size.point, alpha = alpha.point)
    }
    p = p + theme_classic() +
      theme(
        panel.background = element_rect(fill = "white", color = col.lw.panel),
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x = element_blank()
      )
    if (pval < prob) {
      p = p + theme(
        panel.background = element_rect(fill = scales::alpha(signcol, alpha)))

    }
    p
  }


  ggally_mysmooth = function(data, mapping, ...){
    ggplot(data = data, mapping=mapping) +
      geom_density(fill=alpha(diagcol, 1))+
      theme_classic() +
      theme(panel.background = element_rect(fill=alpha('white', 1), color = col.dia.panel))
  }
  if(main == "auto"){
    title = paste0("Mantel's test with ",  nrepet, " resamples")
  } else{title = main}
  if(diag == TRUE){
    diag = list(continuous = ggally_mysmooth)
  } else{diag = NULL}
  p1 = GGally::ggpairs(
    Dataset,
    title =   title,
    diag = diag,
    lower = list(continuous = my_custom_cor),
    upper = list(continuous = my_custom_smooth),
    axisLabels="none")
  ggplot2::theme_set(theme_gray()+theme(panel.spacing=grid::unit(pan.spacing,"lines")))


  if (export  ==  F|FALSE) {
    print(p1)
  } else

    if (file.type == "pdf"){
      if (is.null(file.name)){
        pdf("Pairs of Mantel's test.pdf",width = width, height = height)
      } else
        pdf(paste0(file.name, ".pdf"), width = width, height = height)
      print(p1)
      dev.off()
    }

  if (file.type == "tiff"){
    if (is.null(file.name)){
      tiff(filename = "Pairs of Mantel's test.tiff",width = width, height = height, units = "in", compression = "lzw", res = resolution)
    } else
      tiff(filename = paste0(file.name, ".tiff"), width = width, height = height, units = "in", compression = "lzw", res = resolution)
    print(p1)
    dev.off()
  }

}


make_mat = function(data, row, col, value){
  # Function for make a two-way table
  # Arguments:
  # data: the dataset in a long-format column (e.g., environment, cultivar, GY,...)
  # row: the name of the column in the data that will fill the row values in the output
  # col: the name of the column in the data that will fill the column values in the output
  # value: the response variable
  nam = cbind(c(row, col, value))
  data = data.frame(data[(match(c(nam), names(data)))])
  return(data.frame(tapply(data[, 3], data[, c(1, 2)], mean)))
}


PCA_oat = function(PC, x="PC1", y="PC2") {
  # function for obtaining the coordinates of the first two PCs for biplot representation using ggplot function
  # Arguments:
  # PC = a "princomp" object
  data = data.frame(PC$x)[1:2] %>% rownames_to_column(var = "NAME")
  datapc <- data.frame(varnames = rownames(PC$rotation), PC$rotation)
  mult = min(
    (max(data[,y]) - min(data[,y])/(max(datapc[,y])-min(datapc[,y]))),
    (max(data[,x]) - min(data[,x])/(max(datapc[,x])-min(datapc[,x])))
  )
  datapc = transform(datapc,
                     v1 = .7 * mult * (get(x)),
                     v2 = .7 * mult * (get(y))) %>%
    subset(select = c(v1, v2)) %>%
    rownames_to_column(var = "NAME")
  names(datapc)[2:3] = c(x, y)
  datapc = rbind(datapc, data)
  datapc[1:6, 1] = c("2015", "2015", "2016", "2016", "2017", "2017")
  datapc$TYPE = c(rep(c("NF", "WF"),3),
                  rep("Cultivar", 22))
  datapc = datapc %>%
    select(TYPE, everything())
  eigs <- PC$sdev^2
  PC1VAR = eigs[1] / sum(eigs) * 100
  PC2VAR = eigs[2] / sum(eigs) * 100
  return(list(datapc = datapc,
              PC1VAR = PC1VAR,
              PC2VAR = PC2VAR))
}


colindiag = function(x, n = NULL){
  # Function for multicoolinearity diagnosis
  # Arguments:
  # x: A correlation matrix or a dataframe with only variables. If a correlation matrix is used,
  # the sample size used to compute the correlations must be declared.

  if(!is.matrix(x) && !is.data.frame(x)){
    stop("The object 'x' must be a correlation matrix or a data.frame")
  }
  if(is.matrix(x) && is.null(n)){
    stop("You have a matrix but the sample size used to compute the correlations (n) was not declared.")
  }
  if(is.data.frame(x)){
    if(sum(sapply(x, is.numeric)==T) !=ncol(x)){
      stop("All variables must be numeric.")
    }
  }

  if(is.matrix(x)){
    cor.x = x
  }
  if(is.data.frame(x)){
    cor.x = cor(x)
    n = nrow(x)
  }
  eigen = eigen(cor.x)
  Det = det(cor.x)
  NC = max(eigen$values)/min(eigen$values)
  Aval = data.frame(eigen$values)
  names(Aval) = "Eigenvalues"
  Avet = data.frame(t(eigen$vectors))
  names(Avet) = colnames(x)
  AvAvet = cbind(Aval, Avet)
  VIF = data.frame(diag(solve(cor.x)))
  names(VIF) = "VIF"
  results = data.frame(linear = as.vector(t(cor.x)[lower.tri(cor.x,diag=F)]))
  results = dplyr::mutate(results,
                          t = linear*(sqrt(n-2))/(sqrt(1-linear^2)),
                          prob = 2*(1-pt(abs(t), df = n-2)))
  names = colnames(x)
  combnam = combn(names,2, paste, collapse = " x ")
  rownames(results) = names(sapply(combnam, names))
  largest_corr = paste0(rownames(results)[which.max(abs(results$linear))], " = ",
                        round(results[which.max(abs(results$linear)),1],3))
  smallest_corr = paste0(rownames(results)[which.min(abs(results$linear))], " = ",
                         round(results[which.min(abs(results$linear)),1],3))
  ncorhigh = sum(results$linear >= abs(0.8))
  if (NC > 1000){
    cat(paste0("Severe multicollinearity in the matrix! Pay attention on the variables listed bellow\n",
               "CN = ", round(NC,3), "\n"))
  }
  if (NC < 100){
    cat(paste0("Weak multicollinearity in the matrix\n",
               "NC = ", round(NC,3), "\n"))
  }
  if (NC > 100 & NC < 1000 ){
    cat(paste0("The multicollinearity in the matrix should be investigated.\n",
               "NC = ", round(NC,3), "\n",
               "Largest VIF = ", max(VIF), "\n"))
  }
  ultimo = data.frame(Peso=t(AvAvet[c(nrow(AvAvet)),])[-c(1),])
  abs = data.frame(Peso = abs(ultimo[,"Peso"]))
  rownames(abs) = rownames(ultimo)
  ultimo = abs[order(abs[,"Peso"], decreasing = T), , drop = FALSE]
  pesovarname = paste(rownames(ultimo), collapse = ' > ')
  final = list(cormat = data.frame(cor.x),
               corlist = results,
               evalevet = AvAvet,
               VIF = VIF,
               CN = NC,
               det = Det,
               largest_corr = largest_corr,
               smallest_corr = smallest_corr,
               weight_var = pesovarname)

  cat(paste0("Matrix determinant: ", round(Det,7)),  "\n")
  cat(paste0("Largest correlation: ", largest_corr),  "\n")
  cat(paste0("Smallest correlation: ", smallest_corr),  "\n")
  cat(paste0("Number of correlations with r >= |0.8|: ", ncorhigh),  "\n")
  cat(paste0("Variables with largest weight in the last eigenvalues: ","\n",
             pesovarname),  "\n")

  return(invisible(final))
}


CANONICAL = function(x, y){
# Function for computing canonical correlations using CCA function.
# Arguments:
# x and y: A dataframe with the variables in the first (smallest) and second (largest) group, respectively.
  can = CCA(x, y, Type = 2)
  results = list(coeff = data.frame(rbind(can$Coef.X,
                                          colnames(can$Coef.Y),
                                          can$Coef.Y)),
                 sig = data.frame(cbind(cbind(can[["Var.UV"]], can[["Corr.UV"]]),
                                        can[["SigTest"]][c(3,5,6)])))

  return(results)
}

cat("Download of functions completed. Check the R environment.")