Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- pass = function(data, var, type){
- # Function for passing variables to different types
- # Arguments:
- # data = the dataset
- # var = the variable (e.g, var = "GY" or var = c("GY", "NGP"))
- var = var
- data[var] = lapply(data[var], type)
- return(data)
- }
- theme_journal = function () {
- ## Helper ggplot theme function
- theme_bw() %+replace%
- theme(axis.ticks.length = unit(.2, "cm"),
- axis.text = element_text(size = 12, colour = "black"),
- axis.title = element_text(size = 12, colour = "black"),
- axis.ticks = element_line(colour = "black"),
- panel.border = element_rect(colour = "black", fill=NA, size=0.5),
- panel.grid = element_blank())
- }
- theme_journal2 = function(){
- ## Helper ggplot theme function
- theme(axis.ticks.length = unit(.2, "cm"),
- axis.text = element_text(family = "serif", size = 8, colour = "black"),
- axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.25 ),
- axis.title = element_text(family = "serif", size = 8, colour = "black"),
- axis.ticks = element_line(colour = "black"),
- plot.margin = margin(0.2, 0.2, 0.2, 0.7, "cm"),
- panel.border = element_rect(colour = "black", fill=NA, size=0.5),
- panel.grid.major.x = element_blank(), panel.grid.major.y = element_blank(),
- panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(),
- panel.spacing = unit(0, "lines"))
- }
- mixed_oat = function(dataset, colvar = 6, nvariables = 16){
- #############################################################################
- # Function for mixed model applied in a dataset with several variables
- # Dataset mus be the following structure of collumns:
- # Year (factor), Fungicide (factor), Cultivar (factor), Block (factor) and
- # the response variables.. In or case, 16 numeric variables.
- # Arguments:
- # dataset = the data
- # colvar = the number of the column with the first variable
- # nvariables = the total number of variables to be analysed
- #############################################################################
- nlin = 0
- # function for labeling significant F value for fixed effects.
- sign = function(data){
- pval = fixed[["Pr(>F)"]][1:3]
- sig = symnum(
- pval, corr = FALSE, na = FALSE,
- cutpoints = c(0, 0.001, 0.01, 0.05, 1),
- symbols = c("***", "**", "*", "ns")
- )
- return(sig)
- }
- # function for labeling significant Chi-square in the LRT test.
- sign2 = function(data){
- pval = LRT[["Pr(>Chisq)"]][1:5]
- sig = symnum(
- pval, corr = FALSE, na = FALSE,
- cutpoints = c(0, 0.001, 0.01, 0.05, 1),
- symbols = c("***", "**", "*", "ns")
- )
- return(sig)
- }
- # Creating dataframes for saving the values
- PREDOAT = dataset[,c( "ANO", "FUNG", "CULTIVAR", "Bloco")]
- LRT_res = data.frame(matrix(ncol = 6, nrow = nvariables))
- names(LRT_res) = c("VAR", "B/A", "CULT", "CUL:ANO", "CUL:FUN", "C:F:A")
- VARCOMP = data.frame(matrix(ncol = 7, nrow = nvariables))
- names(VARCOMP) = c("VAR", "C:A:F", "C:A", "C:F", "C", "B/A", "RES")
- plots = list()
- ###################################################
- pb <- winProgressBar(title = "Anova with mixed models",
- label = "processing data",
- min = 0,
- width = 400,
- max = nvariables,
- initial = 0)
- for (i in colvar:ncol(dataset)) {
- varname = names(dataset)[i]
- nlin = nlin + 1
- Complete = as.formula(paste0(paste0(varname," ~ ",
- "ANO * FUNG +
- (1|BWE) +
- (1|CULTIVAR) +
- (1|CULTIVAR:ANO) +
- (1|CULTIVAR:FUNG) +
- (1|CULTIVAR:ANO:FUNG)")))
- Complete = lmerTest::lmer(Complete, data = dataset)
- random = broom::tidy(Complete, effects = "ran_pars")
- VARCOMP[nlin,1] = varname
- VARCOMP[nlin,2] = as.numeric(random[1,3]^2)
- VARCOMP[nlin,3] = as.numeric(random[2,3]^2)
- VARCOMP[nlin,4] = as.numeric(random[3,3]^2)
- VARCOMP[nlin,5] = as.numeric(random[4,3]^2)
- VARCOMP[nlin,6] = as.numeric(random[5,3]^2)
- VARCOMP[nlin,7] = as.numeric(random[6,3]^2)
- if (i == colvar){
- # saving the residuals for future QQ-plot
- df = cbind(aveias[,c(1:4)], var = aveias[(match(varname, names(aveias)))]) %>%
- mutate(res = residuals(Complete)) %>%
- group_by(ANO, FUNG, CULTIVAR) %>%
- summarise_all(., funs(mean))
- plots[[paste(varname)]] = ggplot(df, aes(sample = res)) +
- stat_qq(size = 1) +
- stat_qq_line(col = "red") +
- ggtitle(paste(varname)) +
- theme_journal() +
- theme(aspect.ratio = 1,
- axis.title = element_text(size = 8),
- axis.text = element_text(size = 8),
- plot.title = element_text(size = 10))
- ############ fixed effects #################
- fixed = anova(Complete, ddf="Kenward-Roger")
- fixed2 = round(fixed[5], 3)
- fixed2[["F value"]] = paste0(fixed2[["F value"]], sign(fixed))
- names(fixed2)[which(colnames(fixed2) == "F value")] = varname
- fixed2 = data.frame(fixed2)
- # Prdicted values of the model
- PREDOAT = mutate(PREDOAT,
- var = predict(Complete))
- names(PREDOAT)[which(colnames(PREDOAT) == "var")] = varname
- data = get_model_data(Complete, type = "re")
- # BLUP for three-way interaction
- blupCAF = data.frame(do.call('rbind', strsplit(as.character(data[[1]]$term),':',fixed = TRUE)))
- colnames(blupCAF) = c("CULT", "ANO", "FUNG")
- blupCAF = mutate(blupCAF,
- var = get_model_data(Complete, type = "re")[[1]]$estimate)
- names(blupCAF)[which(colnames(blupCAF) == "var")] = varname
- # BLUP for cultivar x year interaction
- blupCA = data.frame(do.call('rbind', strsplit(as.character(data[[2]]$term),':',fixed = TRUE)))
- colnames(blupCA) = c("CULT", "ANO")
- blupCA = mutate(blupCA,
- var = get_model_data(Complete, type = "re")[[2]]$estimate)
- names(blupCA)[which(colnames(blupCA) == "var")] = varname
- # BLUP for cultivar x fungicide interaction
- blupCF = data.frame(do.call('rbind', strsplit(as.character(data[[3]]$term),':',fixed = TRUE)))
- colnames(blupCF) = c("CULT", "FUNG")
- blupCF = mutate(blupCF,
- var = get_model_data(Complete, type = "re")[[3]]$estimate)
- names(blupCF)[which(colnames(blupCF) == "var")] = varname
- # BLUP for cultivars
- blupC = data.frame(do.call('rbind', strsplit(as.character(data[[4]]$term),':',fixed = TRUE)))
- colnames(blupC) = c("CULT")
- blupC = mutate(blupC,
- var = get_model_data(Complete, type = "re")[[4]]$estimate)
- names(blupC)[which(colnames(blupC) == "var")] = varname
- }
- if (i > colvar){
- # saving the residuals for future QQ-plot
- df = cbind(aveias[,c(1:4)], var = aveias[(match(varname, names(aveias)))]) %>%
- mutate(res = residuals(Complete)) %>%
- group_by(ANO, FUNG, CULTIVAR) %>%
- summarise_all(., funs(mean))
- plots[[paste(varname)]] = ggplot(df, aes(sample = res)) +
- stat_qq(size = 1) +
- stat_qq_line(col = "red")+
- ggtitle(paste(varname))+
- theme_journal()+
- theme(aspect.ratio = 1,
- axis.title = element_text(size = 8),
- axis.text = element_text(size = 8),
- plot.title = element_text(size = 10))
- # Significance of fixed effects
- fixed = anova(Complete, ddf="Kenward-Roger")
- fixed3 = round(fixed[5], 3)
- fixed2[["F value"]] = paste0(fixed3[["F value"]], sign(fixed))
- names(fixed2)[which(colnames(fixed2) == "F value")] = varname
- PREDOAT = mutate(PREDOAT,
- var = predict(Complete))
- names(PREDOAT)[which(colnames(PREDOAT) == "var")] = varname
- ###########
- blupCAF = mutate(blupCAF,
- var = get_model_data(Complete, type = "re")[[1]]$estimate)
- names(blupCAF)[which(colnames(blupCAF) == "var")] = varname
- ###########
- blupCA = mutate(blupCA,
- var = get_model_data(Complete, type = "re")[[2]]$estimate)
- names(blupCA)[which(colnames(blupCA) == "var")] = varname
- ###########
- blupCF = mutate(blupCF,
- var = get_model_data(Complete, type = "re")[[3]]$estimate)
- names(blupCF)[which(colnames(blupCF) == "var")] = varname
- ###########
- blupC = mutate(blupC,
- var = get_model_data(Complete, type = "re")[[4]]$estimate)
- names(blupC)[which(colnames(blupC) == "var")] = varname
- }
- # Deviance analysis for random effects
- LRT = ranova(Complete)[2:6,]
- LRT_res[nlin,1] = varname
- LRT_res[nlin,2:6] = paste0(round(t(LRT)[4,],4), sign2(LRT))
- setWinProgressBar(pb, nlin, title=paste("Analizyng the variable", varname," - ",
- round(nlin/nvariables*100,1),"% Concluded -"))
- }
- # Creating the final files and removing temporary dataframes
- fixed = fixed2 %>% t() %>% as.data.frame()
- PREDOAT = PREDOAT %>%
- group_by(ANO, FUNG, CULTIVAR) %>%
- summarise_all(funs(mean(., na.rm = T))) %>%
- subset(select = -c(Bloco))
- close(pb)
- return(list(blupC = blupC,
- blupCA = blupCA,
- blupCAF = blupCAF,
- blupCF = blupCF,
- fixed = fixed,
- LRT_res = LRT_res,
- resplot = plots,
- PREDOAT = PREDOAT,
- VARCOMP = VARCOMP))
- }
- distdend = function(data,
- scale = FALSE,
- selvar = FALSE,
- results = TRUE,
- dendrogram = TRUE,
- pvclust = FALSE,
- verbose = TRUE,
- nboot = 1000,
- alpha = 0.95,
- distmethod = "euclidean",
- clustmethod = "average",
- type = "rectangle",
- nclust = NULL,
- ...){
- # Function for computing distances and selecting variables for hierarchical clustering
- # Arguments:
- # data: the dataset (must be a two-way table with cultivars in the rows and variables in the collum)
- # scale: scale the variables (zero mean and unit variance)?
- # selvar: run the algorithm for selecting variables?
- # results: save the results to an object?
- # dendogram: should a dendrogram be created?
- # pvclust: should a bootstrap be used for indentifying the number of clusters? (see ?pvclust)
- # verbose: should the procedure run silently?
- # nboot: the number of bootstrap permutations if 'pvclust = TRUE'
- # alpha: the confidence level for idenfifying the groups
- # distmethod, clustmethod: the distance metric and the clustering method to be used.
- # nclust: the number of clusters to cut the dendrogram.
- if (scale == TRUE & selvar == TRUE){
- stop("The variable selection algorithm can only be executed with non-standardized data. Select 'scale = FALSE'. ")
- }
- if (scale == TRUE){
- data = data.frame(scale(data))
- } else{data = data}
- if (selvar == TRUE){
- n = (ncol(data)-1)
- statistics = data.frame(matrix(nrow = n, ncol = 6))
- ModelEstimates = list()
- modelcode = 1
- namesv = "-"
- original = data
- dein = factoextra::get_dist(original, method = distmethod, diag = T, upper = T)
- for (i in 1:n){
- de = factoextra::get_dist(data, method = distmethod, diag = T, upper = T)
- hc = hclust(de, method = clustmethod)
- d2 = cophenetic(hc)
- cof = cor(d2, de)
- mant = ade4::mantel.rtest(de, dein, nrepet = 1000)
- mantc = mant$obs
- mantp = mant$pvalue
- evect = data.frame(t(prcomp(data)$rotation))
- var = abs(evect)[nrow(evect),]
- names = apply(var, 1, function(x) which(x == max(x)))
- npred = ncol(data)
- statistics[i,1] = paste("Model",modelcode)
- statistics[i,2] = namesv
- statistics[i,3] = cof
- statistics[i,4] = npred
- statistics[i,5] = mantc
- statistics[i,6] = mantp
- mat = as.matrix(de)
- mat = as.data.frame(mat)
- Results = list(nvars = npred,
- excluded = namesv,
- namevars = names(data),
- distance = mat,
- cormantel = mantc,
- pvmant = mantp)
- namesv = names(data[names])
- data2 = data.frame(data[-(match(c(namesv), names(data)))])
- data = data2
- ModelEstimates[[paste("Model",modelcode)]] = Results
- names(statistics) = c("Model", "excluded", "cophenetic", "remaining", "cormantel", "pvmantel")
- if(verbose == TRUE){
- cat(paste("Calculating model ",modelcode, " with ", npred,
- " variables.", "'",namesv,"'", "excluded in this step (",
- round(modelcode/n*100,1),"%).", "\n"))
- }
- modelcode = modelcode + 1
- }
- cat("Done!","\n")
- cat("\n\n")
- cat("--------------------------------------------------------------------------","\n")
- cat("Summary of the adjusted models","\n")
- cat("--------------------------------------------------------------------------","\n")
- print(statistics)
- cat("--------------------------------------------------------------------------")
- cofgrap = ggplot2::ggplot(statistics, ggplot2::aes(x = remaining, y = cophenetic))+
- ggplot2::geom_point(size = 3)+
- ggplot2::theme_bw()+
- ggplot2::geom_line(size = 1)+
- ggplot2::theme(axis.ticks.length = unit(.2, "cm"),
- axis.text = ggplot2::element_text(size = 12, colour = "black"),
- axis.title = ggplot2::element_text(size = 12, colour = "black"),
- axis.ticks = ggplot2::element_line(colour = "black"),
- plot.margin = margin(0.5, 0.5, 0.2, 0.6, "cm"),
- axis.title.y = ggplot2::element_text(margin = margin(r=16)),
- legend.title = ggplot2::element_blank(),
- legend.text = ggplot2::element_text(size=12),
- panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
- panel.grid.major.x = ggplot2::element_blank(),
- panel.grid.major.y = ggplot2::element_blank(),
- panel.grid.minor.x = ggplot2::element_blank(),
- panel.grid.minor.y = ggplot2::element_blank())+
- ggplot2::labs(x = "Number of variables",
- y = "Cophenetic correlation coefficient")
- model = statistics$Model[which.max(statistics$cophenetic)]
- predvar = ModelEstimates[[model]]$namevars
- data = data.frame(original[(match(c(predvar), names(original)))])
- cat("\n\n")
- cat("Suggested variables to be used in the analysis","\n")
- cat("--------------------------------------------------------------------------","\n")
- cat("The distance were calculated based on the variables in ", model,".", "\n",
- "The variables included in this model were...","\n",
- predvar,"\n")
- cat("--------------------------------------------------------------------------")
- } else{data = data}
- if(pvclust == T & distmethod == "gower"){
- stop("The pvclust procedure cannot be used using Gower's distance.")
- } else{
- if(sum(sapply(data,is.numeric))!= ncol(data) & distmethod != "gower" ){
- stop("All variables need to be numeric! ")
- } else{
- if (pvclust == TRUE){
- set.seed(123)
- if (distmethod == "pearson" | distmethod == "kendall" | distmethod == "spearman" ){
- distmethod2 = "correlation"
- } else{distmethod2 = distmethod}
- dend = pvclust::pvclust(data.frame(t(data)),
- method.dist = distmethod2,
- method.hclust = clustmethod,
- nboot = nboot)
- plot(dend, hang = -1, cex = 0.5)
- pvclust::pvrect(dend, alpha = alpha)
- pval = dend$edges
- }
- if (distmethod == "gower"){
- if(sum(sapply(data,is.numeric))== ncol(data) & distmethod == "gower" ){
- stop("You are using the Gower's distance, but all variables are numeric !. Use the pass () function to transform the variables. ")
- }
- de = StatMatch::gower.dist(datapc)
- rownames(de) = rownames(datapc)
- colnames(de) = rownames(datapc)
- de = as.dist(de)
- } else{
- de = factoextra::get_dist(data, method = distmethod, diag = T, upper = T)
- }
- mat = as.matrix(de)
- mat = as.data.frame(mat)
- if(is.null(nclust)==F){
- hc = factoextra::hcut(de, hc_method = clustmethod, k = nclust)
- } else{
- hc = hclust(de, method = clustmethod)
- }
- out = factoextra::fviz_dend(hc,
- main = "",
- k = nclust,
- type = type,
- ...)
- d2 = cophenetic(hc)
- cof = cor(d2, de)
- k = 1.25
- pcorte = mean(hc$height) + k * sd(hc$height)
- if(is.null(nclust)==F){
- ctree = cutree(hc,nclust)
- cl.stats = fpc::cluster.stats(d = hc, clustering = ctree)
- cl.names = list()
- cl.code = 0
- for(k in 1:nclust){
- res = rownames(data)[ctree == k]
- cl.code = cl.code + 1
- cl.names[[paste("Cluster",cl.code)]] = res
- }
- } else {
- cl.stats = NULL
- cl.names = NULL}
- if (pvclust == TRUE){
- pval = pval
- dend = dend
- } else{
- pval = NULL
- dend = NULL
- }
- if (results == TRUE){
- if(dendrogram == TRUE){
- if(selvar == TRUE){
- return(structure(list(statistics = statistics,
- models = ModelEstimates,
- cofgrap = cofgrap,
- graphic = out,
- hc = hc,
- distances = mat,
- cl.stats = cl.stats,
- cl.names = cl.names,
- cophenetic = cof,
- cut = pcorte,
- pval = pval,
- dend = dend),
- class = "distdend"))
- }else{
- return(structure(list(graphic = out,
- hc = hc,
- distances = mat,
- cl.stats = cl.stats,
- cl.names = cl.names,
- cophenetic = cof,
- cut = pcorte,
- pval = pval,
- dend = dend),
- class = "distdend"))
- }
- } else{
- if(selvar == TRUE){
- return(structure(list(statistics = statistics,
- models = ModelEstimates,
- distances = mat,
- cl.stats = cl.stats,
- cl.names = cl.names,
- cophenetic = cof,
- cut = pcorte,
- pval = pval,
- dend = dend),
- class = "distdend"))
- }else{
- return(structure(list(distances = mat,
- cl.stats = cl.stats,
- cl.names = cl.names,
- cophenetic = cof,
- cut = pcorte,
- pval = pval,
- dend = dend),
- class = "distdend"))
- }
- }
- } else{
- if(dendrogram == TRUE){
- return(out)
- } else{
- stop("Invalid arguments. At least one of 'dendrogram' or' results' argument must be 'TRUE'. Nothing was generated. ")
- }
- }
- }
- }
- }
- pairs.mantel = function(data = list(x, y, ...),
- nrepet = 1000,
- names = NULL,
- prob = 0.05,
- diag = FALSE,
- export = FALSE,
- main = "auto",
- file.type = "pdf",
- file.name = NULL,
- width = 8,
- height = 7,
- resolution = 300,
- size.point = 0.5,
- shape.point = 19,
- alpha.point = 1,
- fill.point = NULL,
- col.point = "black",
- minsize = 2,
- maxsize = 3,
- signcol = "green",
- alpha = 0.15,
- diagcol = "gray",
- col.up.panel = "gray",
- col.lw.panel = "gray",
- col.dia.panel = "black",
- pan.spacing = 0.15,
- digits = 2){
- # Function for computing Mantel's correlation in a list of correlation matrices or a list of a 'distdend' objects.
- # A correlation-like plot is returned
- w = c(21:25)
- if(is.null(fill.point)==TRUE && any(w == shape.point)){
- stop("If 'shape.point' is a value between 21 and 25, you must provide a color for fill the shape using the argument 'fill.point.'")
- }
- if (length(which(sapply(data, function(x) psych::isCorrelation(x))==FALSE)) != length(data)){
- if(length(which(sapply(data, function(x) identical(nrow(x), ncol(x)))==TRUE)) != length(data)){
- stop("All matrices in the list must be a square matrix. Please, check and fix.")
- }
- if (length(unique(unique(sapply(data, function(x) dim(x)))[1,1:length(data)])) != 1){
- stop("All matrices in the list must be the same dimension. Please, check and fix.")
- }
- for(i in 1:length(data)){
- if(i == 1){
- Dataset = data.frame(var = as.vector(t(data[[1]])[lower.tri(data[[1]],diag=F)]))
- if(is.null(names)){
- names(Dataset)[which(colnames(Dataset) == "var")] = paste0("Matrix 1")
- } else {names(Dataset)[which(colnames(Dataset) == "var")] = names[1]}
- }
- if(i >= 2){
- Dataset = dplyr::mutate(Dataset,
- var = as.vector(t(data[[i]])[lower.tri(data[[i]],diag=F)]))
- if(is.null(names)){
- names(Dataset)[which(colnames(Dataset) == "var")] = paste0("Matrix ", i)
- } else{names(Dataset)[which(colnames(Dataset) == "var")] = names[i]}
- }
- }
- dim = nrow(data[[1]])
- } else {
- if(length(which(sapply(data, function(x) identical(nrow(x[["distances"]]), ncol(x[["distances"]])))==TRUE)) != length(data)){
- stop("All matrices in the list must be a square matrix. Please, check and fix.")
- }
- if (length(unique(unique(sapply(data, function(x) dim(x[["distances"]])))[1,1:length(data)])) != 1){
- stop("All matrices in the list must be the same dimension. Please, check and fix.")
- }
- for(i in 1:length(data)){
- if(i == 1){
- Dataset = data.frame(var = as.vector(t(data[[1]][["distances"]])[lower.tri(data[[1]][["distances"]],diag=F)]))
- if(is.null(names)){
- names(Dataset)[which(colnames(Dataset) == "var")] = paste0("Matrix 1")
- } else {names(Dataset)[which(colnames(Dataset) == "var")] = names[1]}
- }
- if(i >= 2){
- Dataset = dplyr::mutate(Dataset,
- var = as.vector(t(data[[i]][["distances"]])[lower.tri(data[[i]][["distances"]],diag=F)]))
- if(is.null(names)){
- names(Dataset)[which(colnames(Dataset) == "var")] = paste0("Matrix ", i)
- } else{names(Dataset)[which(colnames(Dataset) == "var")] = names[i]}
- }
- }
- dim = nrow(data[[1]][["distances"]])
- }
- my_custom_cor = function(data, mapping, color = I("black"), sizeRange = c(minsize, maxsize), ...) {
- # get the x and y data to use the other code
- x = GGally::eval_data_col(data, mapping$x)
- y = GGally::eval_data_col(data, mapping$y)
- D = matrix(nrow = dim, ncol = dim)
- D[lower.tri(D, diag = F)] = x
- D[upper.tri(D, diag = F)] = x
- diag(D) = 0
- D2 = matrix(nrow = dim, ncol = dim)
- D2[lower.tri(D2, diag = F)] = y
- D2[upper.tri(D2, diag = F)] = y
- diag(D2) = 0
- ct <- ade4::mantel.randtest(as.dist(D), as.dist(D2), nrepet = nrepet)
- sig = symnum(
- ct$pvalue, corr = FALSE, na = FALSE,
- cutpoints = c(0, 0.001, 0.01, 0.05, 1),
- symbols = c("***", "**", "*", "")
- )
- r = unname(ct$obs)
- rt = format(r, digits = digits)[1]
- # since we can't print it to get the strsize, just use the max size range
- cex = max(sizeRange)
- # helper function to calculate a useable size
- percent_of_range = function(percent, range) {
- percent * diff(range) + min(range, na.rm = TRUE)
- }
- # plot the cor value
- GGally:: ggally_text(
- label = as.character(rt),
- mapping = aes(),
- xP = 0.5, yP = 0.5,
- size = I(percent_of_range(cex * abs(r), sizeRange)),
- color = color,
- ...
- ) +
- # add the sig stars
- geom_text(
- aes_string(
- x = 0.8,
- y = 0.8
- ),
- label = sig,
- size = I(cex),
- color = color,
- ...
- ) +
- # remove all the background stuff and wrap it with a dashed line
- theme_classic() +
- theme(
- panel.background = element_rect(color = col.up.panel),
- axis.line = element_blank(),
- axis.ticks = element_blank(),
- axis.text.y = element_blank(),
- axis.text.x = element_blank()
- )
- }
- my_custom_smooth = function(data, mapping, ...) {
- x = GGally::eval_data_col(data, mapping$x)
- y = GGally::eval_data_col(data, mapping$y)
- D = matrix(nrow = dim, ncol = dim)
- D[lower.tri(D, diag = F)] = x
- D[upper.tri(D, diag = F)] = x
- diag(D) = 0
- D2 = matrix(nrow = dim, ncol = dim)
- D2[lower.tri(D2, diag = F)] = y
- D2[upper.tri(D2, diag = F)] = y
- diag(D2) = 0
- ct <- ade4::mantel.randtest(as.dist(D), as.dist(D2), nrepet = nrepet)
- pval <- unname(ct$pvalue)
- p = ggplot(data = data, mapping = mapping)
- if(is.null(fill.point)==FALSE){
- p = p + geom_point(color = I(col.point), fill = fill.point, shape = shape.point, size = size.point, alpha = alpha.point)
- } else{
- p = p + geom_point(color = I(col.point), shape = shape.point, size = size.point, alpha = alpha.point)
- }
- p = p + theme_classic() +
- theme(
- panel.background = element_rect(fill = "white", color = col.lw.panel),
- axis.line = element_blank(),
- axis.ticks = element_blank(),
- axis.text.y = element_blank(),
- axis.text.x = element_blank()
- )
- if (pval < prob) {
- p = p + theme(
- panel.background = element_rect(fill = scales::alpha(signcol, alpha)))
- }
- p
- }
- ggally_mysmooth = function(data, mapping, ...){
- ggplot(data = data, mapping=mapping) +
- geom_density(fill=alpha(diagcol, 1))+
- theme_classic() +
- theme(panel.background = element_rect(fill=alpha('white', 1), color = col.dia.panel))
- }
- if(main == "auto"){
- title = paste0("Mantel's test with ", nrepet, " resamples")
- } else{title = main}
- if(diag == TRUE){
- diag = list(continuous = ggally_mysmooth)
- } else{diag = NULL}
- p1 = GGally::ggpairs(
- Dataset,
- title = title,
- diag = diag,
- lower = list(continuous = my_custom_cor),
- upper = list(continuous = my_custom_smooth),
- axisLabels="none")
- ggplot2::theme_set(theme_gray()+theme(panel.spacing=grid::unit(pan.spacing,"lines")))
- if (export == F|FALSE) {
- print(p1)
- } else
- if (file.type == "pdf"){
- if (is.null(file.name)){
- pdf("Pairs of Mantel's test.pdf",width = width, height = height)
- } else
- pdf(paste0(file.name, ".pdf"), width = width, height = height)
- print(p1)
- dev.off()
- }
- if (file.type == "tiff"){
- if (is.null(file.name)){
- tiff(filename = "Pairs of Mantel's test.tiff",width = width, height = height, units = "in", compression = "lzw", res = resolution)
- } else
- tiff(filename = paste0(file.name, ".tiff"), width = width, height = height, units = "in", compression = "lzw", res = resolution)
- print(p1)
- dev.off()
- }
- }
- make_mat = function(data, row, col, value){
- # Function for make a two-way table
- # Arguments:
- # data: the dataset in a long-format column (e.g., environment, cultivar, GY,...)
- # row: the name of the column in the data that will fill the row values in the output
- # col: the name of the column in the data that will fill the column values in the output
- # value: the response variable
- nam = cbind(c(row, col, value))
- data = data.frame(data[(match(c(nam), names(data)))])
- return(data.frame(tapply(data[, 3], data[, c(1, 2)], mean)))
- }
- PCA_oat = function(PC, x="PC1", y="PC2") {
- # function for obtaining the coordinates of the first two PCs for biplot representation using ggplot function
- # Arguments:
- # PC = a "princomp" object
- data = data.frame(PC$x)[1:2] %>% rownames_to_column(var = "NAME")
- datapc <- data.frame(varnames = rownames(PC$rotation), PC$rotation)
- mult = min(
- (max(data[,y]) - min(data[,y])/(max(datapc[,y])-min(datapc[,y]))),
- (max(data[,x]) - min(data[,x])/(max(datapc[,x])-min(datapc[,x])))
- )
- datapc = transform(datapc,
- v1 = .7 * mult * (get(x)),
- v2 = .7 * mult * (get(y))) %>%
- subset(select = c(v1, v2)) %>%
- rownames_to_column(var = "NAME")
- names(datapc)[2:3] = c(x, y)
- datapc = rbind(datapc, data)
- datapc[1:6, 1] = c("2015", "2015", "2016", "2016", "2017", "2017")
- datapc$TYPE = c(rep(c("NF", "WF"),3),
- rep("Cultivar", 22))
- datapc = datapc %>%
- select(TYPE, everything())
- eigs <- PC$sdev^2
- PC1VAR = eigs[1] / sum(eigs) * 100
- PC2VAR = eigs[2] / sum(eigs) * 100
- return(list(datapc = datapc,
- PC1VAR = PC1VAR,
- PC2VAR = PC2VAR))
- }
- colindiag = function(x, n = NULL){
- # Function for multicoolinearity diagnosis
- # Arguments:
- # x: A correlation matrix or a dataframe with only variables. If a correlation matrix is used,
- # the sample size used to compute the correlations must be declared.
- if(!is.matrix(x) && !is.data.frame(x)){
- stop("The object 'x' must be a correlation matrix or a data.frame")
- }
- if(is.matrix(x) && is.null(n)){
- stop("You have a matrix but the sample size used to compute the correlations (n) was not declared.")
- }
- if(is.data.frame(x)){
- if(sum(sapply(x, is.numeric)==T) !=ncol(x)){
- stop("All variables must be numeric.")
- }
- }
- if(is.matrix(x)){
- cor.x = x
- }
- if(is.data.frame(x)){
- cor.x = cor(x)
- n = nrow(x)
- }
- eigen = eigen(cor.x)
- Det = det(cor.x)
- NC = max(eigen$values)/min(eigen$values)
- Aval = data.frame(eigen$values)
- names(Aval) = "Eigenvalues"
- Avet = data.frame(t(eigen$vectors))
- names(Avet) = colnames(x)
- AvAvet = cbind(Aval, Avet)
- VIF = data.frame(diag(solve(cor.x)))
- names(VIF) = "VIF"
- results = data.frame(linear = as.vector(t(cor.x)[lower.tri(cor.x,diag=F)]))
- results = dplyr::mutate(results,
- t = linear*(sqrt(n-2))/(sqrt(1-linear^2)),
- prob = 2*(1-pt(abs(t), df = n-2)))
- names = colnames(x)
- combnam = combn(names,2, paste, collapse = " x ")
- rownames(results) = names(sapply(combnam, names))
- largest_corr = paste0(rownames(results)[which.max(abs(results$linear))], " = ",
- round(results[which.max(abs(results$linear)),1],3))
- smallest_corr = paste0(rownames(results)[which.min(abs(results$linear))], " = ",
- round(results[which.min(abs(results$linear)),1],3))
- ncorhigh = sum(results$linear >= abs(0.8))
- if (NC > 1000){
- cat(paste0("Severe multicollinearity in the matrix! Pay attention on the variables listed bellow\n",
- "CN = ", round(NC,3), "\n"))
- }
- if (NC < 100){
- cat(paste0("Weak multicollinearity in the matrix\n",
- "NC = ", round(NC,3), "\n"))
- }
- if (NC > 100 & NC < 1000 ){
- cat(paste0("The multicollinearity in the matrix should be investigated.\n",
- "NC = ", round(NC,3), "\n",
- "Largest VIF = ", max(VIF), "\n"))
- }
- ultimo = data.frame(Peso=t(AvAvet[c(nrow(AvAvet)),])[-c(1),])
- abs = data.frame(Peso = abs(ultimo[,"Peso"]))
- rownames(abs) = rownames(ultimo)
- ultimo = abs[order(abs[,"Peso"], decreasing = T), , drop = FALSE]
- pesovarname = paste(rownames(ultimo), collapse = ' > ')
- final = list(cormat = data.frame(cor.x),
- corlist = results,
- evalevet = AvAvet,
- VIF = VIF,
- CN = NC,
- det = Det,
- largest_corr = largest_corr,
- smallest_corr = smallest_corr,
- weight_var = pesovarname)
- cat(paste0("Matrix determinant: ", round(Det,7)), "\n")
- cat(paste0("Largest correlation: ", largest_corr), "\n")
- cat(paste0("Smallest correlation: ", smallest_corr), "\n")
- cat(paste0("Number of correlations with r >= |0.8|: ", ncorhigh), "\n")
- cat(paste0("Variables with largest weight in the last eigenvalues: ","\n",
- pesovarname), "\n")
- return(invisible(final))
- }
- CANONICAL = function(x, y){
- # Function for computing canonical correlations using CCA function.
- # Arguments:
- # x and y: A dataframe with the variables in the first (smallest) and second (largest) group, respectively.
- can = CCA(x, y, Type = 2)
- results = list(coeff = data.frame(rbind(can$Coef.X,
- colnames(can$Coef.Y),
- can$Coef.Y)),
- sig = data.frame(cbind(cbind(can[["Var.UV"]], can[["Corr.UV"]]),
- can[["SigTest"]][c(3,5,6)])))
- return(results)
- }
- cat("Download of functions completed. Check the R environment.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement