yayopoint

crosstable

Sep 1st, 2018 (edited)
422
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 8.77 KB | None | 0 0
  1. library(magrittr)
  2. library(stringr)
  3.  
  4. # Función utilizada en Rkward para unir tablas.
  5. bind.tables <- function (...) {
  6.   if (is.list(..1)) tables <- ..1
  7.   else tables <- list(...)
  8.   output <- unlist(tables)
  9.   dim (output) <- c(dim(tables[[1]]), length(tables))
  10.   dimnames(output) <- c (dimnames(tables[[1]]), list("statistic"=names(tables)))
  11.   output
  12. }
  13.  
  14. crosstable <- function(..., data=parent.frame(), row.vars = NULL, col.vars = NULL, stats = "freq", col.total = FALSE, row.total = FALSE, stats.on.cols = TRUE, digits = 2) {
  15.   # This function will use table or xtabs to make an R table
  16.   # can recive as arguments a couple of vectors or an already made table
  17.   if (is.table(..1))
  18.     table1 <- ..1
  19.   else if (class(..1)=="formula")
  20.     table1 <- xtabs(..1, data=data)
  21.   else
  22.     table1 <- table(...)
  23.  
  24.   # We should build the arguments col.vars or row.vars if not defined
  25.   if(xor(is.null(col.vars), is.null(row.vars))) {
  26.     if (is.null(row.vars)) {
  27.       if (is.character(col.vars))
  28.         col.vars <- which(names(dimnames(table1)) %in% col.vars)
  29.       row.vars <- 1:length(dim(table1))
  30.       row.vars <- row.vars[-col.vars]
  31.     } else if (is.null(col.vars)) {
  32.       if (is.character(row.vars))
  33.         row.vars <- which(names(dimnames(table1)) %in% row.vars)
  34.       col.vars <- 1:length(dim(table1))
  35.       col.vars <- col.vars[-row.vars]
  36.     }
  37.   } else if(is.null(col.vars) & is.null(row.vars)) {
  38.     col.vars <- length(dim(table1))
  39.     row.vars <- 1:(length(dim(table1))-1)
  40.   }
  41.  
  42.   # The table to print will be a list of tables made with prop.table
  43.   # the main table is frequency other tables are percentajes
  44.   tablePrint <- lapply(stats, function(x) {
  45.     if (x == "count")
  46.       table1
  47.     else if (x == "total")
  48.       prop.table(table1)*100
  49.     else if (x == "row")
  50.       prop.table(table1, col.vars)*100
  51.     else if (x == "column")
  52.       prop.table(table1, row.vars)*100
  53.   })
  54.  
  55.   # This will define the names of the stats columns
  56.   f <- stats %in% c("column","row","total")
  57.   tableNames <- stats
  58.   tableNames[f] <- paste("% of", tableNames[f])
  59.  
  60.   names(tablePrint) <- tableNames
  61.   tablePrint <- bind.tables(tablePrint)
  62.  
  63.   # The table is the main table to perform chisq.
  64.   # The tablePrint is an attribute only to show.
  65.   structure(table1,
  66.             tablePrint = tablePrint,
  67.             arguments = list(row.vars = row.vars, col.vars = col.vars, stats = stats, stats.on.cols = stats.on.cols, digits = digits),
  68.             class="crosstable")
  69. }
  70.  
  71. print.crosstable <- function(x) {
  72.   invisible(x)
  73.   #-------------------#
  74.   # GENERAL VARIABLES #
  75.   #-------------------#
  76.   tablePrint <- attr(x, "tablePrint")
  77.   arguments <- attr(x, "arguments")
  78.  
  79.   tableDimNames <- dimnames(tablePrint)
  80.   tablePrint.names <- names(dimnames(tablePrint))
  81.  
  82.   if(arguments$stats.on.cols) {
  83.     col.vars <- c(arguments$col.vars, (length(dim(x))+1):length(dim(tablePrint)))
  84.     row.vars <- arguments$row.vars
  85.   }else{
  86.     col.vars <- arguments$col.vars
  87.     row.vars <- c(arguments$row.vars, (length(dim(x))+1):length(dim(tablePrint)))
  88.   }
  89.  
  90.   row.names <- tablePrint.names[row.vars]
  91.   row.dim <- dim(tablePrint)[row.vars]
  92.   col.names <- tablePrint.names[col.vars]
  93.   col.dim <- dim(tablePrint)[col.vars]
  94.  
  95.   #-------------------------#
  96.   # MAKE THE TABLE TO PRINT #
  97.   #-------------------------#
  98.   dimTable <- dim(tablePrint)
  99.  
  100.   # Index of original table to make
  101.   s <- sapply(1:prod(dimTable[(length(dimTable)-1):length(dimTable)]),rep,prod(dimTable[-((length(dimTable)-1):length(dimTable))]))
  102.   dim(s) <- dimTable
  103.   s <- apply(s, col.vars[length(col.vars)], c)
  104.   s <- sort(s, index.return=TRUE)$ix
  105.  
  106.   tablePrint %<>%
  107.     round(arguments$digits) %>%
  108.     apply(col.vars[length(col.vars)], format)
  109.  
  110.   #Now we sort the table using the s index
  111.   tablePrint <- as.vector(tablePrint)[s]
  112.   dim(tablePrint) <- dimTable
  113.  
  114.   f <- arguments$stats %in% c("column","row","total")
  115.   tableFormat <- rep(" ", length(arguments$stats))
  116.   tableFormat[f] <- "%"
  117.  
  118.   # The Sweep function should add % to the cells
  119.   "%p%" <- function(x,y) paste0(x,y)
  120.   tablePrint <- sweep(tablePrint, length(dimTable), tableFormat, "%p%")
  121.   dim(tablePrint) <- dimTable
  122.  
  123.   #colHeaders
  124.   colHeaders <- tableDimNames[col.vars]
  125.   # Table Width for columns based on the length of var labels
  126.   tableWidth <- 0
  127.   for (i in length(colHeaders):1) {
  128.     # Ajustment of the column width by the variable label
  129.     if (nchar(names(colHeaders)[i]) > sum(nchar(colHeaders[[i]]))) {
  130.       dif <- nchar(names(colHeaders)[i]) - sum(nchar(colHeaders[[i]]))
  131.       extraChar <- round(dif*(nchar(colHeaders[[i]])/sum(nchar(colHeaders[[i]]))))
  132.       if (sum(extraChar) < dif)
  133.         extraChar <- extraChar + ceiling((dif-sum(extraChar))*(nchar(colHeaders[[i]])/sum(nchar(colHeaders[[i]]))))
  134.       colWidth <- nchar(colHeaders[[i]]) + extraChar
  135.     } else {
  136.       colWidth <- nchar(colHeaders[[i]])
  137.     }
  138.     # Ajustment of the lower level vars by the upper level vars
  139.     if(sum(colWidth) > sum(tableWidth) & sum(tableWidth) > 0) {
  140.       sapply(1:length(colWidth),
  141.              function(x) {
  142.                if(colWidth[x] > sum(tableWidth)) {
  143.                  dif <- colWidth[x] - sum(tableWidth)
  144.                  extraChar <- round(dif*(tableWidth/sum(tableWidth)))
  145.                  if (sum(extraChar) < dif)
  146.                    extraChar <- extraChar + ceiling((dif-sum(extraChar))*(tableWidth/sum(tableWidth)))
  147.                  tableWidth + extraChar
  148.                } else {
  149.                  tableWidth
  150.                }
  151.              }
  152.       ) -> tableWidth
  153.     } else if (sum(tableWidth) == 0) {
  154.       tableWidth <- colWidth
  155.     } else {
  156.       tableWidth <- rep(tableWidth, length(colWidth))
  157.     }
  158.   }
  159.  
  160.   # Turn table in to ftable and format it to be printed
  161.   tablePrint <- ftable(tablePrint, col.vars = col.vars)
  162.   tableWidth <- pmax(tableWidth, nchar(apply(tablePrint,2,max)))
  163.   tablePrint <- mapply(function(x,y) format(tablePrint[,x], justify ="right", width=y), 1:prod(col.dim), tableWidth)
  164.   dimnames(tablePrint) <- NULL
  165.  
  166.   # A matrix with row headers.
  167.   mapply(
  168.     function(x) {
  169.       rvalue <- character(0)
  170.       for (i in tableDimNames[[row.names[x]]]) {
  171.         rvalue <- c(rvalue,i,rep("", prod(c(row.dim,1)[(x+1):(length(row.dim)+1)])-1))
  172.       }
  173.       rep(rvalue, prod(row.dim[1:x])/row.dim[x])
  174.     }
  175.     , 1:length(row.dim)
  176.   )  -> rowHeaders
  177.   for(i in 1:ncol(rowHeaders)) rowHeaders[,i] <- format(rowHeaders[,i], width=nchar(row.names)[i])
  178.   dim(rowHeaders) <- c(prod(row.dim),length(row.dim))
  179.   rowHeaders.nchar <- nchar(rowHeaders[1,])
  180.  
  181.   # Start of the table
  182.   cat("|", strrep("-", sum(rowHeaders.nchar+1,tableWidth+1)-1), "|\n", sep="")
  183.  
  184.   #Print the table col headers
  185.   colHeaders <- tableDimNames[col.vars]
  186.   for (i in 1:length(col.dim)) {
  187.     iColRep <- prod(c(1,col.dim)[1:i])
  188.     iColDim <- iColRep*length(colHeaders[[i]])
  189.     sapply(
  190.       1:iColRep,
  191.       function(x)
  192.         sum(tableWidth[1:(length(tableWidth)/iColRep)+(length(tableWidth)/iColRep)*(x-1)])
  193.     ) -> iColWidthLab
  194.     sapply(
  195.       1:iColDim,
  196.       function(x)
  197.         sum(tableWidth[1:(length(tableWidth)/iColDim)+(length(tableWidth)/iColDim)*(x-1)])
  198.     ) -> iColWidth
  199.     nSep <- prod(c(col.dim,1)[(i+1):(length(col.dim)+1)])
  200.    
  201.     cat("|", paste0(strrep(" ", c(rowHeaders.nchar)), "|"), sep="")
  202.     cat(paste0(stringr::str_pad(rep(names(colHeaders)[i],iColRep),iColWidthLab+nSep*col.dim[i]-1, "both"), "|"), "\n", sep="")
  203.     cat("|", paste0(strrep(" ", c(rowHeaders.nchar)), "|"), strrep("-", sum(tableWidth+1)-1), "|\n", sep="")
  204.    
  205.     if (i==length(col.dim)) {
  206.       cat("|", paste0(stringr::str_pad(row.names,rowHeaders.nchar,"both", " "),"|"), sep="")
  207.     } else {
  208.       cat("|", paste0(strrep(" ", c(rowHeaders.nchar)), "|"), sep="")
  209.     }
  210.     cat(paste0(stringr::str_pad(rep(colHeaders[[i]],iColRep),iColWidth+nSep-1, "both"), "|"), "\n", sep="")
  211.     if (i<length(col.dim))
  212.       cat("|", paste0(strrep(" ", c(rowHeaders.nchar)), "|"), strrep("-", sum(tableWidth+1)-1), "|\n", sep="")
  213.   }
  214.  
  215.   # Printing the final table
  216.   cat("|", paste0(strrep("-", c(rowHeaders.nchar,tableWidth)), "|"),"\n", sep="")
  217.   for (i in 1:prod(row.dim)) {
  218.     cat("|", paste0(rowHeaders[i,],"|"), sep="")
  219.     cat(paste0(tablePrint[i,], "|"), sep="")
  220.     cat("\n")
  221.   }
  222.  
  223.   # Close of the table
  224.   cat("|", strrep("-", sum(rowHeaders.nchar+1,tableWidth+1)-1), "|\n", sep="")
  225. }
  226.  
  227. ## Prueba con dos variables generadas aleatoriamente
  228. gender <- sample(c(1,2), 131, replace=TRUE) %>%
  229.   factor(levels=c(1,2), labels=c("Man", "Woman"))
  230. strata <- sample(c(1,2,3), 131, replace=TRUE) %>%
  231.   factor(levels=c(1,2,3), labels=c("Low", "Middle", "High"))
  232. party <- sample(c(1,2), 131, replace=TRUE) %>%
  233.   factor(levels=c(1,2), labels=c("Right", "Left"))
  234.  
  235. crosstable(gender, strata, party, stats=c("count", "column", "row"))
Add Comment
Please, Sign In to add comment