Advertisement
Olivoto

paper_oat

Sep 16th, 2018
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 34.71 KB | None | 0 0
  1. pass = function(data, var, type){
  2.   # Function for passing variables to different types
  3.   # Arguments:
  4.   # data = the dataset
  5.   # var = the variable (e.g, var = "GY" or var = c("GY", "NGP"))
  6.   var = var
  7.   data[var] = lapply(data[var], type)
  8.   return(data)
  9. }
  10.  
  11.  
  12. theme_journal = function () {
  13.   ## Helper ggplot theme function
  14.   theme_bw() %+replace%
  15.     theme(axis.ticks.length = unit(.2, "cm"),
  16.           axis.text = element_text(size = 12, colour = "black"),
  17.           axis.title = element_text(size = 12, colour = "black"),
  18.           axis.ticks = element_line(colour = "black"),
  19.           panel.border = element_rect(colour = "black", fill=NA, size=0.5),
  20.           panel.grid =  element_blank())
  21. }
  22.  
  23. theme_journal2 = function(){
  24.   ## Helper ggplot theme function
  25.   theme(axis.ticks.length = unit(.2, "cm"),
  26.         axis.text = element_text(family = "serif", size = 8, colour = "black"),
  27.         axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.25 ),
  28.         axis.title = element_text(family = "serif", size = 8, colour = "black"),
  29.         axis.ticks = element_line(colour = "black"),
  30.         plot.margin = margin(0.2, 0.2, 0.2, 0.7, "cm"),
  31.         panel.border = element_rect(colour = "black", fill=NA, size=0.5),
  32.         panel.grid.major.x = element_blank(), panel.grid.major.y = element_blank(),
  33.         panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(),
  34.         panel.spacing =  unit(0, "lines"))
  35. }
  36.  
  37. mixed_oat = function(dataset, colvar = 6, nvariables = 16){
  38.   #############################################################################
  39.   # Function for mixed model applied in a dataset with several variables
  40.   # Dataset mus be the following structure of collumns:
  41.   # Year (factor), Fungicide (factor), Cultivar (factor), Block (factor) and
  42.   # the response variables.. In or case, 16 numeric variables.
  43.   # Arguments:
  44.   # dataset = the data
  45.   # colvar = the number of the column with the first variable
  46.   # nvariables = the total number of variables to be analysed
  47.   #############################################################################
  48.   nlin = 0
  49.   # function for labeling significant F value for fixed effects.
  50.   sign = function(data){
  51.     pval =  fixed[["Pr(>F)"]][1:3]
  52.     sig = symnum(
  53.       pval, corr = FALSE, na = FALSE,
  54.       cutpoints = c(0, 0.001, 0.01, 0.05, 1),
  55.       symbols = c("***", "**", "*", "ns")
  56.     )
  57.     return(sig)
  58.   }
  59.   # function for labeling significant Chi-square in the LRT test.
  60.   sign2 = function(data){
  61.     pval =  LRT[["Pr(>Chisq)"]][1:5]
  62.     sig = symnum(
  63.       pval, corr = FALSE, na = FALSE,
  64.       cutpoints = c(0, 0.001, 0.01, 0.05, 1),
  65.       symbols = c("***", "**", "*", "ns")
  66.     )
  67.     return(sig)
  68.   }
  69.   # Creating dataframes for saving the values
  70.   PREDOAT = dataset[,c( "ANO", "FUNG", "CULTIVAR", "Bloco")]
  71.   LRT_res = data.frame(matrix(ncol = 6, nrow = nvariables))
  72.   names(LRT_res) = c("VAR", "B/A", "CULT", "CUL:ANO", "CUL:FUN", "C:F:A")
  73.   VARCOMP = data.frame(matrix(ncol = 7, nrow = nvariables))
  74.   names(VARCOMP) = c("VAR", "C:A:F", "C:A", "C:F", "C", "B/A", "RES")
  75.   plots = list()
  76.   ###################################################
  77.   pb <- winProgressBar(title = "Anova with mixed models",
  78.                        label = "processing data",
  79.                        min = 0,
  80.                        width = 400,
  81.                        max = nvariables,
  82.                        initial = 0)
  83.   for (i in colvar:ncol(dataset)) {
  84.     varname = names(dataset)[i]
  85.     nlin = nlin + 1
  86.    
  87.     Complete = as.formula(paste0(paste0(varname,"  ~ ",
  88.                                         "ANO * FUNG +
  89.                                        (1|BWE) +
  90.                                        (1|CULTIVAR) +
  91.                                        (1|CULTIVAR:ANO) +
  92.                                        (1|CULTIVAR:FUNG) +
  93.                                        (1|CULTIVAR:ANO:FUNG)")))
  94.     Complete = lmerTest::lmer(Complete, data = dataset)
  95.     random = broom::tidy(Complete, effects = "ran_pars")
  96.     VARCOMP[nlin,1] =  varname
  97.     VARCOMP[nlin,2] =  as.numeric(random[1,3]^2)
  98.     VARCOMP[nlin,3] =  as.numeric(random[2,3]^2)
  99.     VARCOMP[nlin,4] =  as.numeric(random[3,3]^2)
  100.     VARCOMP[nlin,5] =  as.numeric(random[4,3]^2)
  101.     VARCOMP[nlin,6] =  as.numeric(random[5,3]^2)
  102.     VARCOMP[nlin,7] =  as.numeric(random[6,3]^2)
  103.     if (i == colvar){
  104.       # saving the residuals for future QQ-plot
  105.       df = cbind(aveias[,c(1:4)], var = aveias[(match(varname, names(aveias)))]) %>%
  106.         mutate(res = residuals(Complete)) %>%
  107.         group_by(ANO, FUNG, CULTIVAR) %>%
  108.         summarise_all(., funs(mean))
  109.       plots[[paste(varname)]] =   ggplot(df, aes(sample = res)) +
  110.         stat_qq(size = 1) +
  111.         stat_qq_line(col = "red") +
  112.         ggtitle(paste(varname)) +
  113.         theme_journal() +
  114.         theme(aspect.ratio = 1,
  115.               axis.title = element_text(size = 8),
  116.               axis.text = element_text(size = 8),
  117.               plot.title = element_text(size = 10))
  118.      
  119.       ############ fixed effects #################
  120.       fixed = anova(Complete, ddf="Kenward-Roger")
  121.       fixed2 = round(fixed[5], 3)
  122.       fixed2[["F value"]] = paste0(fixed2[["F value"]], sign(fixed))
  123.       names(fixed2)[which(colnames(fixed2) == "F value")] = varname
  124.       fixed2 = data.frame(fixed2)
  125.      
  126.       # Prdicted values of the model
  127.       PREDOAT = mutate(PREDOAT,
  128.                        var = predict(Complete))
  129.       names(PREDOAT)[which(colnames(PREDOAT) == "var")] = varname
  130.       data = get_model_data(Complete, type = "re")
  131.       # BLUP for three-way interaction
  132.       blupCAF = data.frame(do.call('rbind', strsplit(as.character(data[[1]]$term),':',fixed = TRUE)))
  133.       colnames(blupCAF) = c("CULT", "ANO", "FUNG")
  134.       blupCAF = mutate(blupCAF,
  135.                        var = get_model_data(Complete, type = "re")[[1]]$estimate)
  136.       names(blupCAF)[which(colnames(blupCAF) == "var")] = varname
  137.       # BLUP for cultivar x year interaction
  138.       blupCA = data.frame(do.call('rbind', strsplit(as.character(data[[2]]$term),':',fixed = TRUE)))
  139.       colnames(blupCA) = c("CULT", "ANO")
  140.       blupCA = mutate(blupCA,
  141.                       var = get_model_data(Complete, type = "re")[[2]]$estimate)
  142.       names(blupCA)[which(colnames(blupCA) == "var")] = varname
  143.       # BLUP for cultivar x fungicide interaction
  144.       blupCF = data.frame(do.call('rbind', strsplit(as.character(data[[3]]$term),':',fixed = TRUE)))
  145.       colnames(blupCF) = c("CULT", "FUNG")
  146.       blupCF = mutate(blupCF,
  147.                       var = get_model_data(Complete, type = "re")[[3]]$estimate)
  148.       names(blupCF)[which(colnames(blupCF) == "var")] = varname
  149.       # BLUP for cultivars
  150.       blupC = data.frame(do.call('rbind', strsplit(as.character(data[[4]]$term),':',fixed = TRUE)))
  151.       colnames(blupC) = c("CULT")
  152.       blupC = mutate(blupC,
  153.                      var = get_model_data(Complete, type = "re")[[4]]$estimate)
  154.       names(blupC)[which(colnames(blupC) == "var")] = varname
  155.     }
  156.    
  157.     if (i > colvar){
  158.       # saving the residuals for future QQ-plot
  159.       df = cbind(aveias[,c(1:4)], var = aveias[(match(varname, names(aveias)))]) %>%
  160.         mutate(res = residuals(Complete)) %>%
  161.         group_by(ANO, FUNG, CULTIVAR) %>%
  162.         summarise_all(., funs(mean))
  163.       plots[[paste(varname)]] =   ggplot(df, aes(sample = res)) +
  164.         stat_qq(size = 1) +
  165.         stat_qq_line(col = "red")+
  166.         ggtitle(paste(varname))+
  167.         theme_journal()+
  168.         theme(aspect.ratio = 1,
  169.               axis.title = element_text(size = 8),
  170.               axis.text = element_text(size = 8),
  171.               plot.title = element_text(size = 10))
  172.      
  173.       # Significance of fixed effects
  174.       fixed = anova(Complete, ddf="Kenward-Roger")
  175.       fixed3 = round(fixed[5], 3)
  176.       fixed2[["F value"]] = paste0(fixed3[["F value"]], sign(fixed))
  177.       names(fixed2)[which(colnames(fixed2) == "F value")] = varname
  178.      
  179.       PREDOAT = mutate(PREDOAT,
  180.                        var = predict(Complete))
  181.       names(PREDOAT)[which(colnames(PREDOAT) == "var")] = varname
  182.       ###########
  183.       blupCAF = mutate(blupCAF,
  184.                        var = get_model_data(Complete, type = "re")[[1]]$estimate)
  185.       names(blupCAF)[which(colnames(blupCAF) == "var")] = varname
  186.       ###########
  187.       blupCA = mutate(blupCA,
  188.                       var = get_model_data(Complete, type = "re")[[2]]$estimate)
  189.       names(blupCA)[which(colnames(blupCA) == "var")] = varname
  190.       ###########
  191.       blupCF = mutate(blupCF,
  192.                       var = get_model_data(Complete, type = "re")[[3]]$estimate)
  193.       names(blupCF)[which(colnames(blupCF) == "var")] = varname
  194.       ###########
  195.       blupC = mutate(blupC,
  196.                      var = get_model_data(Complete, type = "re")[[4]]$estimate)
  197.       names(blupC)[which(colnames(blupC) == "var")] = varname
  198.     }
  199.    
  200.     # Deviance analysis for random effects
  201.     LRT = ranova(Complete)[2:6,]
  202.     LRT_res[nlin,1] =  varname
  203.     LRT_res[nlin,2:6] = paste0(round(t(LRT)[4,],4), sign2(LRT))
  204.    
  205.     setWinProgressBar(pb, nlin, title=paste("Analizyng the variable", varname," - ",
  206.                                             round(nlin/nvariables*100,1),"% Concluded -"))
  207.    
  208.   }
  209.   # Creating the final files and removing temporary dataframes
  210.   fixed = fixed2 %>% t() %>% as.data.frame()
  211.   PREDOAT = PREDOAT %>%
  212.     group_by(ANO, FUNG, CULTIVAR) %>%
  213.     summarise_all(funs(mean(., na.rm = T))) %>%
  214.     subset(select = -c(Bloco))
  215.   close(pb)
  216.   return(list(blupC = blupC,
  217.               blupCA = blupCA,
  218.               blupCAF = blupCAF,
  219.               blupCF = blupCF,
  220.               fixed = fixed,
  221.               LRT_res = LRT_res,
  222.               resplot = plots,
  223.               PREDOAT = PREDOAT,
  224.               VARCOMP = VARCOMP))
  225.  
  226. }
  227.  
  228.  
  229. distdend = function(data,
  230.                     scale = FALSE,
  231.                     selvar = FALSE,
  232.                     results = TRUE,
  233.                     dendrogram = TRUE,
  234.                     pvclust = FALSE,
  235.                     verbose = TRUE,
  236.                     nboot = 1000,
  237.                     alpha = 0.95,
  238.                     distmethod = "euclidean",
  239.                     clustmethod = "average",
  240.                     type = "rectangle",
  241.                     nclust = NULL,
  242.                     ...){
  243.   # Function for computing distances and selecting variables for hierarchical clustering
  244.   # Arguments:
  245.   # data: the dataset (must be a two-way table with cultivars in the rows and variables in the collum)
  246.   # scale: scale the variables (zero mean and unit variance)?
  247.   # selvar: run the algorithm for selecting variables?
  248.   # results: save the results to an object?
  249.   # dendogram: should a dendrogram be created?
  250.   # pvclust: should a bootstrap be used for indentifying the number of clusters? (see ?pvclust)
  251.   # verbose: should the procedure run silently?
  252.   # nboot: the number of bootstrap permutations if 'pvclust = TRUE'
  253.   # alpha: the confidence level for idenfifying the groups
  254.   # distmethod, clustmethod: the distance metric and the clustering method to be used.
  255.   # nclust: the number of clusters to cut the dendrogram.
  256.     if (scale == TRUE & selvar == TRUE){
  257.     stop("The variable selection algorithm can only be executed with non-standardized data. Select 'scale = FALSE'. ")
  258.   }
  259.   if (scale == TRUE){
  260.     data = data.frame(scale(data))
  261.   } else{data = data}
  262.  
  263.   if (selvar == TRUE){
  264.     n = (ncol(data)-1)
  265.     statistics = data.frame(matrix(nrow = n, ncol = 6))
  266.     ModelEstimates = list()
  267.     modelcode = 1
  268.     namesv = "-"
  269.     original = data
  270.     dein = factoextra::get_dist(original, method = distmethod, diag = T, upper = T)
  271.     for (i in 1:n){
  272.       de = factoextra::get_dist(data, method = distmethod, diag = T, upper = T)
  273.       hc = hclust(de, method = clustmethod)
  274.       d2 = cophenetic(hc)
  275.       cof = cor(d2, de)
  276.       mant = ade4::mantel.rtest(de, dein, nrepet = 1000)
  277.       mantc = mant$obs
  278.       mantp = mant$pvalue
  279.       evect = data.frame(t(prcomp(data)$rotation))
  280.       var = abs(evect)[nrow(evect),]
  281.       names = apply(var, 1, function(x) which(x == max(x)))
  282.       npred = ncol(data)
  283.       statistics[i,1] = paste("Model",modelcode)
  284.       statistics[i,2] = namesv
  285.       statistics[i,3] = cof
  286.       statistics[i,4] = npred
  287.       statistics[i,5] = mantc
  288.       statistics[i,6] = mantp
  289.       mat = as.matrix(de)
  290.       mat = as.data.frame(mat)
  291.       Results = list(nvars = npred,
  292.                      excluded = namesv,
  293.                      namevars = names(data),
  294.                      distance = mat,
  295.                      cormantel = mantc,
  296.                      pvmant = mantp)
  297.       namesv = names(data[names])
  298.       data2 = data.frame(data[-(match(c(namesv), names(data)))])
  299.       data = data2
  300.       ModelEstimates[[paste("Model",modelcode)]] = Results
  301.      
  302.       names(statistics) = c("Model", "excluded", "cophenetic", "remaining", "cormantel", "pvmantel")
  303.       if(verbose == TRUE){
  304.         cat(paste("Calculating model ",modelcode, " with ", npred,
  305.                   " variables.", "'",namesv,"'", "excluded in this step (",
  306.                   round(modelcode/n*100,1),"%).", "\n"))
  307.       }
  308.       modelcode = modelcode + 1
  309.      
  310.     }
  311.     cat("Done!","\n")
  312.     cat("\n\n")
  313.     cat("--------------------------------------------------------------------------","\n")
  314.     cat("Summary of the adjusted models","\n")
  315.     cat("--------------------------------------------------------------------------","\n")  
  316.     print(statistics)
  317.     cat("--------------------------------------------------------------------------")
  318.    
  319.     cofgrap = ggplot2::ggplot(statistics, ggplot2::aes(x = remaining, y = cophenetic))+
  320.       ggplot2::geom_point(size = 3)+
  321.       ggplot2::theme_bw()+
  322.       ggplot2::geom_line(size = 1)+
  323.       ggplot2::theme(axis.ticks.length = unit(.2, "cm"),
  324.                      axis.text = ggplot2::element_text(size = 12, colour = "black"),
  325.                      axis.title = ggplot2::element_text(size = 12, colour = "black"),
  326.                      axis.ticks = ggplot2::element_line(colour = "black"),
  327.                      plot.margin = margin(0.5, 0.5, 0.2, 0.6, "cm"),
  328.                      axis.title.y = ggplot2::element_text(margin = margin(r=16)),
  329.                      legend.title = ggplot2::element_blank(),
  330.                      legend.text = ggplot2::element_text(size=12),
  331.                      panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
  332.                      panel.grid.major.x = ggplot2::element_blank(),
  333.                      panel.grid.major.y = ggplot2::element_blank(),
  334.                      panel.grid.minor.x = ggplot2::element_blank(),
  335.                      panel.grid.minor.y = ggplot2::element_blank())+
  336.       ggplot2::labs(x = "Number of variables",
  337.                     y = "Cophenetic correlation coefficient")
  338.    
  339.     model = statistics$Model[which.max(statistics$cophenetic)]
  340.     predvar = ModelEstimates[[model]]$namevars
  341.     data = data.frame(original[(match(c(predvar), names(original)))])
  342.     cat("\n\n")
  343.     cat("Suggested variables to be used in the analysis","\n")
  344.     cat("--------------------------------------------------------------------------","\n")
  345.     cat("The distance were calculated based on the variables in ", model,".", "\n",
  346.         "The variables included in this model were...","\n",
  347.         predvar,"\n")
  348.     cat("--------------------------------------------------------------------------")
  349.    
  350.   } else{data = data}
  351.  
  352.   if(pvclust == T & distmethod == "gower"){
  353.     stop("The pvclust procedure cannot be used using Gower's distance.")
  354.   } else{  
  355.     if(sum(sapply(data,is.numeric))!= ncol(data) & distmethod != "gower" ){
  356.       stop("All variables need to be numeric! ")
  357.     } else{
  358.       if (pvclust == TRUE){
  359.         set.seed(123)
  360.        
  361.         if (distmethod == "pearson" | distmethod == "kendall" | distmethod == "spearman" ){
  362.           distmethod2 = "correlation"
  363.         } else{distmethod2 = distmethod}
  364.        
  365.         dend = pvclust::pvclust(data.frame(t(data)),
  366.                                 method.dist = distmethod2,
  367.                                 method.hclust = clustmethod,
  368.                                 nboot = nboot)
  369.        
  370.         plot(dend, hang = -1, cex = 0.5)
  371.         pvclust::pvrect(dend, alpha = alpha)
  372.         pval = dend$edges
  373.        
  374.       }
  375.      
  376.       if (distmethod == "gower"){
  377.         if(sum(sapply(data,is.numeric))== ncol(data) & distmethod == "gower" ){
  378.           stop("You are using the Gower's distance, but all variables are numeric !. Use the pass () function to transform the variables. ")
  379.         }
  380.         de = StatMatch::gower.dist(datapc)
  381.         rownames(de) = rownames(datapc)
  382.         colnames(de) = rownames(datapc)
  383.         de = as.dist(de)
  384.       } else{
  385.        
  386.         de = factoextra::get_dist(data, method = distmethod, diag = T, upper = T)
  387.       }
  388.       mat = as.matrix(de)
  389.       mat = as.data.frame(mat)
  390.      
  391.       if(is.null(nclust)==F){
  392.         hc = factoextra::hcut(de, hc_method = clustmethod, k = nclust)
  393.       } else{
  394.         hc = hclust(de, method = clustmethod)
  395.       }
  396.       out  = factoextra::fviz_dend(hc,
  397.                                    main = "",
  398.                                    k = nclust,
  399.                                    type = type,
  400.                                    ...)
  401.       d2 = cophenetic(hc)
  402.       cof = cor(d2, de)
  403.       k = 1.25
  404.       pcorte = mean(hc$height) + k * sd(hc$height)
  405.      
  406.       if(is.null(nclust)==F){
  407.         ctree = cutree(hc,nclust)
  408.         cl.stats = fpc::cluster.stats(d = hc, clustering = ctree)
  409.         cl.names = list()
  410.         cl.code = 0
  411.         for(k in 1:nclust){
  412.           res = rownames(data)[ctree == k]
  413.           cl.code = cl.code + 1
  414.           cl.names[[paste("Cluster",cl.code)]] = res
  415.         }
  416.       } else {
  417.         cl.stats = NULL
  418.         cl.names = NULL}
  419.       if (pvclust == TRUE){
  420.         pval = pval
  421.         dend = dend
  422.       } else{
  423.         pval = NULL
  424.         dend = NULL
  425.       }
  426.      
  427.       if (results == TRUE){
  428.         if(dendrogram == TRUE){
  429.           if(selvar == TRUE){
  430.             return(structure(list(statistics = statistics,
  431.                                   models = ModelEstimates,
  432.                                   cofgrap = cofgrap,
  433.                                   graphic = out,
  434.                                   hc = hc,
  435.                                   distances = mat,
  436.                                   cl.stats = cl.stats,
  437.                                   cl.names = cl.names,
  438.                                   cophenetic = cof,
  439.                                   cut = pcorte,
  440.                                   pval = pval,
  441.                                   dend = dend),
  442.                              class = "distdend"))
  443.           }else{
  444.             return(structure(list(graphic = out,
  445.                                   hc = hc,
  446.                                   distances = mat,
  447.                                   cl.stats = cl.stats,
  448.                                   cl.names = cl.names,
  449.                                   cophenetic = cof,
  450.                                   cut = pcorte,
  451.                                   pval = pval,
  452.                                   dend = dend),
  453.                              class = "distdend"))
  454.           }
  455.         } else{
  456.           if(selvar == TRUE){
  457.             return(structure(list(statistics = statistics,
  458.                                   models = ModelEstimates,
  459.                                   distances = mat,
  460.                                   cl.stats = cl.stats,
  461.                                   cl.names = cl.names,
  462.                                   cophenetic = cof,
  463.                                   cut = pcorte,
  464.                                   pval = pval,
  465.                                   dend = dend),
  466.                              class = "distdend"))
  467.           }else{
  468.             return(structure(list(distances = mat,
  469.                                   cl.stats = cl.stats,
  470.                                   cl.names = cl.names,
  471.                                   cophenetic = cof,
  472.                                   cut = pcorte,
  473.                                   pval = pval,
  474.                                   dend = dend),
  475.                              class = "distdend"))
  476.           }
  477.         }
  478.       } else{
  479.         if(dendrogram == TRUE){
  480.           return(out)
  481.         } else{
  482.           stop("Invalid arguments. At least one of  'dendrogram' or' results' argument must be 'TRUE'. Nothing was generated. ")
  483.         }
  484.       }
  485.      
  486.     }
  487.   }
  488. }
  489.  
  490.  
  491.  
  492. pairs.mantel = function(data = list(x, y, ...),
  493.                         nrepet = 1000,
  494.                         names = NULL,
  495.                         prob = 0.05,
  496.                         diag = FALSE,
  497.                         export = FALSE,
  498.                         main = "auto",
  499.                         file.type = "pdf",
  500.                         file.name = NULL,
  501.                         width = 8,
  502.                         height = 7,
  503.                         resolution = 300,
  504.                         size.point = 0.5,
  505.                         shape.point = 19,
  506.                         alpha.point = 1,
  507.                         fill.point = NULL,
  508.                         col.point = "black",
  509.                         minsize = 2,
  510.                         maxsize = 3,
  511.                         signcol = "green",
  512.                         alpha = 0.15,
  513.                         diagcol = "gray",
  514.                         col.up.panel = "gray",
  515.                         col.lw.panel = "gray",
  516.                         col.dia.panel = "black",
  517.                         pan.spacing = 0.15,
  518.                         digits = 2){
  519.   # Function for computing Mantel's correlation in a list of correlation matrices or a list of a 'distdend' objects.
  520.   # A correlation-like plot is returned
  521.   w = c(21:25)
  522.   if(is.null(fill.point)==TRUE && any(w == shape.point)){
  523.     stop("If 'shape.point' is a value between 21 and 25, you must provide a color for fill the shape using the argument 'fill.point.'")
  524.   }
  525.  
  526.   if (length(which(sapply(data, function(x)  psych::isCorrelation(x))==FALSE)) != length(data)){
  527.     if(length(which(sapply(data, function(x) identical(nrow(x), ncol(x)))==TRUE)) != length(data)){
  528.       stop("All matrices in the list must be a square matrix. Please, check and fix.")
  529.     }
  530.     if (length(unique(unique(sapply(data, function(x) dim(x)))[1,1:length(data)])) != 1){
  531.       stop("All matrices in the list must be the same dimension. Please, check and fix.")
  532.     }
  533.    
  534.     for(i in 1:length(data)){
  535.       if(i == 1){
  536.         Dataset = data.frame(var = as.vector(t(data[[1]])[lower.tri(data[[1]],diag=F)]))
  537.         if(is.null(names)){
  538.           names(Dataset)[which(colnames(Dataset) == "var")] = paste0("Matrix 1")
  539.         } else {names(Dataset)[which(colnames(Dataset) == "var")] = names[1]}
  540.       }
  541.       if(i >= 2){
  542.         Dataset = dplyr::mutate(Dataset,
  543.                                 var = as.vector(t(data[[i]])[lower.tri(data[[i]],diag=F)]))
  544.         if(is.null(names)){
  545.           names(Dataset)[which(colnames(Dataset) == "var")] = paste0("Matrix ", i)
  546.         } else{names(Dataset)[which(colnames(Dataset) == "var")] = names[i]}
  547.       }
  548.     }
  549.    
  550.     dim = nrow(data[[1]])
  551.   } else {
  552.    
  553.    
  554.     if(length(which(sapply(data, function(x) identical(nrow(x[["distances"]]), ncol(x[["distances"]])))==TRUE)) != length(data)){
  555.       stop("All matrices in the list must be a square matrix. Please, check and fix.")
  556.     }
  557.     if (length(unique(unique(sapply(data, function(x) dim(x[["distances"]])))[1,1:length(data)])) != 1){
  558.       stop("All matrices in the list must be the same dimension. Please, check and fix.")
  559.     }
  560.    
  561.    
  562.     for(i in 1:length(data)){
  563.       if(i == 1){
  564.         Dataset = data.frame(var = as.vector(t(data[[1]][["distances"]])[lower.tri(data[[1]][["distances"]],diag=F)]))
  565.         if(is.null(names)){
  566.           names(Dataset)[which(colnames(Dataset) == "var")] = paste0("Matrix 1")
  567.         } else {names(Dataset)[which(colnames(Dataset) == "var")] = names[1]}
  568.       }
  569.       if(i >= 2){
  570.         Dataset = dplyr::mutate(Dataset,
  571.                                 var = as.vector(t(data[[i]][["distances"]])[lower.tri(data[[i]][["distances"]],diag=F)]))
  572.         if(is.null(names)){
  573.           names(Dataset)[which(colnames(Dataset) == "var")] = paste0("Matrix ", i)
  574.         } else{names(Dataset)[which(colnames(Dataset) == "var")] = names[i]}
  575.       }
  576.     }
  577.    
  578.     dim = nrow(data[[1]][["distances"]])
  579.   }
  580.  
  581.   my_custom_cor = function(data, mapping, color = I("black"), sizeRange = c(minsize, maxsize), ...) {
  582.     # get the x and y data to use the other code
  583.     x = GGally::eval_data_col(data, mapping$x)
  584.     y = GGally::eval_data_col(data, mapping$y)
  585.    
  586.     D = matrix(nrow = dim, ncol = dim)
  587.     D[lower.tri(D, diag = F)] = x
  588.     D[upper.tri(D, diag = F)] = x
  589.     diag(D) = 0
  590.    
  591.     D2 = matrix(nrow = dim, ncol = dim)
  592.     D2[lower.tri(D2, diag = F)] = y
  593.     D2[upper.tri(D2, diag = F)] = y
  594.     diag(D2) = 0
  595.    
  596.     ct <- ade4::mantel.randtest(as.dist(D), as.dist(D2), nrepet = nrepet)
  597.    
  598.     sig = symnum(
  599.       ct$pvalue, corr = FALSE, na = FALSE,
  600.       cutpoints = c(0, 0.001, 0.01, 0.05, 1),
  601.       symbols = c("***", "**", "*", "")
  602.     )
  603.    
  604.    
  605.     r = unname(ct$obs)
  606.     rt = format(r, digits = digits)[1]
  607.    
  608.     # since we can't print it to get the strsize, just use the max size range
  609.     cex = max(sizeRange)
  610.    
  611.     # helper function to calculate a useable size
  612.     percent_of_range = function(percent, range) {
  613.       percent * diff(range) + min(range, na.rm = TRUE)
  614.     }
  615.    
  616.     # plot the cor value
  617.     GGally:: ggally_text(
  618.       label = as.character(rt),
  619.       mapping = aes(),
  620.       xP = 0.5, yP = 0.5,
  621.       size = I(percent_of_range(cex * abs(r), sizeRange)),
  622.       color = color,
  623.       ...
  624.     ) +
  625.       # add the sig stars
  626.       geom_text(
  627.         aes_string(
  628.           x = 0.8,
  629.           y = 0.8
  630.         ),
  631.         label = sig,
  632.         size = I(cex),
  633.         color = color,
  634.         ...
  635.       ) +
  636.       # remove all the background stuff and wrap it with a dashed line
  637.       theme_classic() +
  638.       theme(
  639.         panel.background = element_rect(color = col.up.panel),
  640.         axis.line = element_blank(),
  641.         axis.ticks = element_blank(),
  642.         axis.text.y = element_blank(),
  643.         axis.text.x = element_blank()
  644.       )
  645.   }
  646.  
  647.   my_custom_smooth = function(data, mapping, ...) {
  648.     x = GGally::eval_data_col(data, mapping$x)
  649.     y = GGally::eval_data_col(data, mapping$y)
  650.    
  651.     D = matrix(nrow = dim, ncol = dim)
  652.     D[lower.tri(D, diag = F)] = x
  653.     D[upper.tri(D, diag = F)] = x
  654.     diag(D) = 0
  655.    
  656.     D2 = matrix(nrow = dim, ncol = dim)
  657.     D2[lower.tri(D2, diag = F)] = y
  658.     D2[upper.tri(D2, diag = F)] = y
  659.     diag(D2) = 0
  660.    
  661.     ct <- ade4::mantel.randtest(as.dist(D), as.dist(D2), nrepet = nrepet)
  662.    
  663.     pval <- unname(ct$pvalue)
  664.     p = ggplot(data = data, mapping = mapping)
  665.    
  666.     if(is.null(fill.point)==FALSE){
  667.       p = p + geom_point(color = I(col.point), fill = fill.point, shape = shape.point, size = size.point, alpha = alpha.point)
  668.     } else{
  669.       p = p + geom_point(color = I(col.point), shape = shape.point, size = size.point, alpha = alpha.point)
  670.     }
  671.     p = p + theme_classic() +
  672.       theme(
  673.         panel.background = element_rect(fill = "white", color = col.lw.panel),
  674.         axis.line = element_blank(),
  675.         axis.ticks = element_blank(),
  676.         axis.text.y = element_blank(),
  677.         axis.text.x = element_blank()
  678.       )
  679.     if (pval < prob) {
  680.       p = p + theme(
  681.         panel.background = element_rect(fill = scales::alpha(signcol, alpha)))
  682.      
  683.     }
  684.     p
  685.   }
  686.  
  687.  
  688.   ggally_mysmooth = function(data, mapping, ...){
  689.     ggplot(data = data, mapping=mapping) +
  690.       geom_density(fill=alpha(diagcol, 1))+
  691.       theme_classic() +
  692.       theme(panel.background = element_rect(fill=alpha('white', 1), color = col.dia.panel))
  693.   }
  694.   if(main == "auto"){
  695.     title = paste0("Mantel's test with ",  nrepet, " resamples")
  696.   } else{title = main}
  697.   if(diag == TRUE){
  698.     diag = list(continuous = ggally_mysmooth)
  699.   } else{diag = NULL}
  700.   p1 = GGally::ggpairs(
  701.     Dataset,
  702.     title =   title,
  703.     diag = diag,
  704.     lower = list(continuous = my_custom_cor),
  705.     upper = list(continuous = my_custom_smooth),
  706.     axisLabels="none")
  707.   ggplot2::theme_set(theme_gray()+theme(panel.spacing=grid::unit(pan.spacing,"lines")))
  708.  
  709.  
  710.   if (export  ==  F|FALSE) {
  711.     print(p1)
  712.   } else
  713.    
  714.     if (file.type == "pdf"){
  715.       if (is.null(file.name)){
  716.         pdf("Pairs of Mantel's test.pdf",width = width, height = height)
  717.       } else
  718.         pdf(paste0(file.name, ".pdf"), width = width, height = height)
  719.       print(p1)
  720.       dev.off()
  721.     }
  722.  
  723.   if (file.type == "tiff"){
  724.     if (is.null(file.name)){
  725.       tiff(filename = "Pairs of Mantel's test.tiff",width = width, height = height, units = "in", compression = "lzw", res = resolution)
  726.     } else
  727.       tiff(filename = paste0(file.name, ".tiff"), width = width, height = height, units = "in", compression = "lzw", res = resolution)
  728.     print(p1)
  729.     dev.off()
  730.   }
  731.  
  732. }
  733.  
  734.  
  735. make_mat = function(data, row, col, value){
  736.   # Function for make a two-way table
  737.   # Arguments:
  738.   # data: the dataset in a long-format column (e.g., environment, cultivar, GY,...)
  739.   # row: the name of the column in the data that will fill the row values in the output
  740.   # col: the name of the column in the data that will fill the column values in the output
  741.   # value: the response variable
  742.   nam = cbind(c(row, col, value))
  743.   data = data.frame(data[(match(c(nam), names(data)))])
  744.   return(data.frame(tapply(data[, 3], data[, c(1, 2)], mean)))
  745. }
  746.  
  747.  
  748. PCA_oat = function(PC, x="PC1", y="PC2") {
  749.   # function for obtaining the coordinates of the first two PCs for biplot representation using ggplot function
  750.   # Arguments:
  751.   # PC = a "princomp" object
  752.   data = data.frame(PC$x)[1:2] %>% rownames_to_column(var = "NAME")
  753.   datapc <- data.frame(varnames = rownames(PC$rotation), PC$rotation)
  754.   mult = min(
  755.     (max(data[,y]) - min(data[,y])/(max(datapc[,y])-min(datapc[,y]))),
  756.     (max(data[,x]) - min(data[,x])/(max(datapc[,x])-min(datapc[,x])))
  757.   )
  758.   datapc = transform(datapc,
  759.                      v1 = .7 * mult * (get(x)),
  760.                      v2 = .7 * mult * (get(y))) %>%
  761.     subset(select = c(v1, v2)) %>%
  762.     rownames_to_column(var = "NAME")
  763.   names(datapc)[2:3] = c(x, y)
  764.   datapc = rbind(datapc, data)
  765.   datapc[1:6, 1] = c("2015", "2015", "2016", "2016", "2017", "2017")
  766.   datapc$TYPE = c(rep(c("NF", "WF"),3),
  767.                   rep("Cultivar", 22))
  768.   datapc = datapc %>%
  769.     select(TYPE, everything())
  770.   eigs <- PC$sdev^2
  771.   PC1VAR = eigs[1] / sum(eigs) * 100
  772.   PC2VAR = eigs[2] / sum(eigs) * 100
  773.   return(list(datapc = datapc,
  774.               PC1VAR = PC1VAR,
  775.               PC2VAR = PC2VAR))
  776. }
  777.  
  778.  
  779.  
  780. colindiag = function(x, n = NULL){
  781.   # Function for multicoolinearity diagnosis
  782.   # Arguments:
  783.   # x: A correlation matrix or a dataframe with only variables. If a correlation matrix is used,
  784.   # the sample size used to compute the correlations must be declared.
  785.  
  786.   if(!is.matrix(x) && !is.data.frame(x)){
  787.     stop("The object 'x' must be a correlation matrix or a data.frame")
  788.   }
  789.   if(is.matrix(x) && is.null(n)){
  790.     stop("You have a matrix but the sample size used to compute the correlations (n) was not declared.")
  791.   }
  792.   if(is.data.frame(x)){
  793.     if(sum(sapply(x, is.numeric)==T) !=ncol(x)){
  794.       stop("All variables must be numeric.")
  795.     }
  796.   }
  797.  
  798.   if(is.matrix(x)){
  799.     cor.x = x
  800.   }
  801.   if(is.data.frame(x)){
  802.     cor.x = cor(x)
  803.     n = nrow(x)
  804.   }
  805.   eigen = eigen(cor.x)
  806.   Det = det(cor.x)
  807.   NC = max(eigen$values)/min(eigen$values)
  808.   Aval = data.frame(eigen$values)
  809.   names(Aval) = "Eigenvalues"
  810.   Avet = data.frame(t(eigen$vectors))
  811.   names(Avet) = colnames(x)
  812.   AvAvet = cbind(Aval, Avet)
  813.   VIF = data.frame(diag(solve(cor.x)))
  814.   names(VIF) = "VIF"
  815.   results = data.frame(linear = as.vector(t(cor.x)[lower.tri(cor.x,diag=F)]))
  816.   results = dplyr::mutate(results,
  817.                           t = linear*(sqrt(n-2))/(sqrt(1-linear^2)),
  818.                           prob = 2*(1-pt(abs(t), df = n-2)))
  819.   names = colnames(x)
  820.   combnam = combn(names,2, paste, collapse = " x ")
  821.   rownames(results) = names(sapply(combnam, names))
  822.   largest_corr = paste0(rownames(results)[which.max(abs(results$linear))], " = ",
  823.                         round(results[which.max(abs(results$linear)),1],3))
  824.   smallest_corr = paste0(rownames(results)[which.min(abs(results$linear))], " = ",
  825.                          round(results[which.min(abs(results$linear)),1],3))
  826.   ncorhigh = sum(results$linear >= abs(0.8))
  827.   if (NC > 1000){
  828.     cat(paste0("Severe multicollinearity in the matrix! Pay attention on the variables listed bellow\n",
  829.                "CN = ", round(NC,3), "\n"))
  830.   }
  831.   if (NC < 100){
  832.     cat(paste0("Weak multicollinearity in the matrix\n",
  833.                "NC = ", round(NC,3), "\n"))
  834.   }
  835.   if (NC > 100 & NC < 1000 ){
  836.     cat(paste0("The multicollinearity in the matrix should be investigated.\n",
  837.                "NC = ", round(NC,3), "\n",
  838.                "Largest VIF = ", max(VIF), "\n"))
  839.   }
  840.   ultimo = data.frame(Peso=t(AvAvet[c(nrow(AvAvet)),])[-c(1),])
  841.   abs = data.frame(Peso = abs(ultimo[,"Peso"]))
  842.   rownames(abs) = rownames(ultimo)
  843.   ultimo = abs[order(abs[,"Peso"], decreasing = T), , drop = FALSE]
  844.   pesovarname = paste(rownames(ultimo), collapse = ' > ')
  845.   final = list(cormat = data.frame(cor.x),
  846.                corlist = results,
  847.                evalevet = AvAvet,
  848.                VIF = VIF,
  849.                CN = NC,
  850.                det = Det,
  851.                largest_corr = largest_corr,
  852.                smallest_corr = smallest_corr,
  853.                weight_var = pesovarname)
  854.  
  855.   cat(paste0("Matrix determinant: ", round(Det,7)),  "\n")
  856.   cat(paste0("Largest correlation: ", largest_corr),  "\n")
  857.   cat(paste0("Smallest correlation: ", smallest_corr),  "\n")
  858.   cat(paste0("Number of correlations with r >= |0.8|: ", ncorhigh),  "\n")
  859.   cat(paste0("Variables with largest weight in the last eigenvalues: ","\n",
  860.              pesovarname),  "\n")
  861.  
  862.   return(invisible(final))
  863. }
  864.  
  865.  
  866.  
  867. CANONICAL = function(x, y){
  868. # Function for computing canonical correlations using CCA function.
  869. # Arguments:
  870. # x and y: A dataframe with the variables in the first (smallest) and second (largest) group, respectively.
  871.   can = CCA(x, y, Type = 2)
  872.   results = list(coeff = data.frame(rbind(can$Coef.X,
  873.                                           colnames(can$Coef.Y),
  874.                                           can$Coef.Y)),
  875.                  sig = data.frame(cbind(cbind(can[["Var.UV"]], can[["Corr.UV"]]),
  876.                                         can[["SigTest"]][c(3,5,6)])))
  877.  
  878.   return(results)
  879. }
  880.  
  881. cat("Download of functions completed. Check the R environment.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement