Advertisement
daroczig

ggMosaicChart

Aug 4th, 2011
181
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 6.64 KB | None | 0 0
  1. require(ggplot2)
  2. require(gtools)
  3. require(descr) # OR: require(gmodels)
  4.  
  5. # helpers functions
  6. splitString <- function (text, width=15) {
  7. # To split string at given (character count) length.
  8. # Internal function!
  9.     if(is.null(text)) return(NULL)
  10.     result <- NULL
  11.     gapwidth <- nchar(" ")
  12.     availwidth <- width
  13.     for (ii in 1:length(text)) {
  14.         strings <- strsplit(text[ii], " ")[[1]]
  15.         newstring <- strings[1]
  16.         if (length(strings) > 1) {
  17.             linewidth <- nchar(newstring)
  18.             for (i in 2:length(strings)) {
  19.                 w <- nchar(strings[i])
  20.                 if (linewidth + gapwidth + w < availwidth) {
  21.                     sep <- " "
  22.                     linewidth <- linewidth + gapwidth + w
  23.                 }
  24.                 else {
  25.                     sep <- "\n"
  26.                     linewidth <- w
  27.                 }
  28.                 newstring <- paste(newstring, strings[i], sep = sep)
  29.             }}
  30.         result <- c(result,newstring)
  31.     }
  32.     result
  33. # easier method (DONE):  wrapper <- function(x, ...) paste(strwrap(x, ...), collapse = "\n")   TODO!!!
  34. # Usage: splitString(c("Lorem ipsum novum bellum optime fortissime est.","A kutyafüle is az oldalamat fúrja"),15)
  35. }
  36.  
  37. WidthString <- function(text, sep='\n') {
  38. # To find *the longest word's length* and *linebreaks count* in factor levels with given separetor.
  39. # Works with factor or data.frame (of factors) input.
  40. # Internal function!
  41.     if(is.null(text)) return(NULL)
  42.     maxlength <- 0
  43.     linebreaks <- 0
  44.     if (is.data.frame(text)) {
  45.         for (i in 1:length(text)) {
  46.             for (ii in 1:length(levels(text[,i]))) {
  47.                 maxlength <- max(maxlength, max(nchar(strsplit(levels(text[,i])[ii], sep)[[1]])))
  48.                 linebreaks <- max(linebreaks, length(strsplit(levels(text[,i])[ii], sep)[[1]]))
  49.             }
  50.         }
  51.     }
  52.     if (is.factor(text)) {
  53.         for (i in 1:length(levels(text))) {
  54.             maxlength <- max(maxlength, max(nchar(strsplit(levels(text)[i], sep)[[1]])))
  55.             linebreaks <- max(linebreaks, length(strsplit(levels(text)[i], sep)[[1]]))
  56.         }  
  57.     }
  58.     return(list(maxlength=maxlength, linebreaks=linebreaks))
  59. # Usage:
  60. # WidthString(demoData[,c('value')])
  61. # Returns:
  62. # $maxlength
  63. # [1] 32
  64. # $linebreaks
  65. # [1] 1
  66. }
  67.  
  68. getValueLabels <- function(data) {
  69. # Internal function!
  70.     as.character(attr(data,"variable.labels",exact=FALSE)) 
  71. }
  72.  
  73.  
  74. ggMosaicChart <- function(data, dependent, independent, fileprefix='mosaicchart_') {
  75.     ct <- suppressWarnings(CrossTable(data[,independent], data[,dependent], expected=TRUE, chisq=TRUE,
  76.             prop.chisq=TRUE,resid=TRUE,asresid=TRUE))
  77.     #df <- as.data.frame(unclass(ct$t))
  78.     df <- ct$t
  79.     dff <- as.data.frame(cbind(apply(df,1,sum)))
  80.     dff <- as.data.frame(cbind(apply(ct$t,1,sum)))
  81.     names(dff) <- c('segpct')
  82.     dff$segpct <- dff$segpct/sum(dff$segpct)
  83.     dff$xmax <- cumsum(dff$segpct)*100
  84.     dff$xmin <- dff$xmax - dff$segpct*100
  85.     dff$segpct <- NULL
  86.     dfm <- suppressWarnings(cbind(df,dff))
  87.     names(dfm) <- c('segment','variable','value','xmax','xmin')
  88.     dfm$segment <- as.character(dfm$segment)
  89.     dfm$variable <- as.character(dfm$variable)
  90.     dfm$value <- as.numeric(dfm$value)
  91.     dfm1 <- ddply(dfm, .(segment), transform, ym1 = cumsum(value))
  92.     dfm1 <- ddply(dfm1, .(segment), transform, ym2 = max(ym1))
  93.     dfm1 <- ddply(dfm1, .(segment), transform, ymax = (cumsum(value)/ym2)*100)
  94.     dfm1 <- ddply(dfm1, .(segment), transform, ymin = ymax - (value/ym2)*100)
  95.     dfm1$xtext <- with(dfm1, xmin + (xmax - xmin)/2)
  96.     dfm1$ytext <- with(dfm1, ymin + (ymax - ymin)/2)
  97.     dfm1$sig <- as.numeric(ct$chisq$residuals)
  98.     dfm1$rp <- as.numeric(t(ct$prop.row)[,order(dimnames(ct$prop.row)[[1]])])
  99.     dfm1$rc <- as.numeric(t(ct$prop.col)[,order(dimnames(ct$prop.col)[[1]])])
  100.     t <- odd(order(intersect(levels(data[,independent]),unique(dfm1$segment))))
  101.     temp <- NULL
  102.     if (length(t) > 0) {
  103.     for (i in 1:length(t)) {
  104.         if (t[i] == TRUE) {
  105.             temp <- c(temp,rep(0,length(unique(dfm1$variable))))
  106.         } else {
  107.             temp <- c(temp,rep(1,length(unique(dfm1$variable))))
  108.         }
  109.     }}
  110.     dfm1$odd <- temp
  111.     dfm1$segment <- splitString(dfm1$segment, 30)
  112.     sigg <- seq(min(dfm1$sig),max(dfm1$sig),(((min(dfm1$sig)-max(dfm1$sig))*(-1))/ceiling(abs(min(dfm1$sig)-max(dfm1$sig)))))
  113.     sigg[which(sigg < 0)] <- ceiling(sigg[which(sigg < 0)])
  114.     sigg[which(sigg > 0)] <- floor(sigg[which(sigg > 0)])
  115.     sigg <- unique(round(c((min(dfm1$sig)+0.01),sigg,(max(dfm1$sig))-0.01),2))
  116.     pval <- round(as.numeric(ct$chisq$p.value),3)
  117.     if (pval==0) {
  118.         pval <- '0.000'
  119.     }
  120.     p <- ggplot(dfm1, aes(ymin = ymin, ymax = ymax,xmin = xmin, xmax = xmax,order = -as.numeric(segment))) +
  121.             geom_rect(colour = I("black"), aes(fill=sig), alpha=0.5) +
  122.             scale_fill_gradient2(midpoint=0,mid="white",low="#339900",high="#660099",breaks=sigg,name="Pearson\nresiudals") +
  123.             theme_bw(base_size=16) +
  124.             opts(title = substitute(" "*c*" = "*a*", p = "*e*" (n: observations, r: row percent, c: column percent)",list(a=as.numeric(ct$chisq$statistic),c=quote(chi^{2}),d=quote(alpha),e=pval))) +
  125.             geom_text(aes(x = xtext, y = ytext, label = ifelse((((xmax-xmin)<=15) & ((xmax-xmin) > 5) & ((ymax-ymin) > 10)),  paste(variable,"\nn: ",value, "\nc: ",round(rp*100,2),"%\n r: ",round(rc*100,2),"%", sep = ""),  paste('',sep=''))), size = 4) +
  126.             geom_text(aes(x = xtext, y = ytext, label = ifelse((((xmax-xmin)>15) & ((ymax-ymin) > 15)),  paste(variable,"\nn: ", value, "\nc: ",round(rp*100,2),"%\nr: ",round(rc*100,2),"%", sep = ""),  paste('', sep = ""))), size = 4) +
  127.             geom_text(aes(x = xtext, y = ytext, label = ifelse((((xmax-xmin)>15) & ((ymax-ymin) <= 15) & ((ymax-ymin) > 5)),  paste(variable,"\n",value, " / ",round(rp*100,2),"% / ",round(rc*100,2),"%", sep = ""),  paste('', sep = ""))), size = 4) +
  128.             geom_text(aes(x = xtext, y = ytext, label = ifelse((((xmax-xmin)<=15) & ((xmax-xmin) > 10) & ((ymax-ymin) > 5) & ((ymax-ymin) <= 10)),  paste(value, " / ",round(rp*100,2),"% / ",round(rc*100,2),"%", sep = ""),  paste('',sep=''))), size = 3) +
  129.             geom_text(aes(x = xtext, y = 105, label = ifelse((odd == 0), paste(segment,sep=""),paste('',sep=''))), size = 5) +
  130.             geom_text(aes(x = xtext, y = -5, label = ifelse((odd == 1), paste(segment,sep=""),paste('',sep=''))), size = 5) +
  131.             ylab(splitString(as.character(attr(data,"variable.labels",exact=FALSE)[dependent]),70)) +
  132.             xlab(splitString(as.character(attr(data,"variable.labels",exact=FALSE)[independent]),70))
  133.     p <- p + scale_x_continuous(limits=c(-9,110))
  134.     ggsave(paste(fileprefix,independent,'-',dependent,'.png',sep=''),p,width=13,height=12)
  135. }
  136.  
  137. # run demo in working directory
  138. setwd('')
  139.  
  140. # make an example data set: (but is is easier to import a labeled SPSS sav file)
  141. df <- mtcars
  142. df$gear <- factor(df$gear, levels=3:5, labels=c('three', 'four', 'five'))
  143. df$cyl <- factor(df$cyl, levels=c(4,6,8), labels=c('four', 'six', 'eight'))
  144. attr(df, 'variable.labels') <- names(df)
  145. names(attr(df, 'variable.labels')) <- names(df)
  146.  
  147. # make plot to a local png
  148. ggMosaicChart(df, 'gear', 'cyl')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement