Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require(ggplot2)
- require(gtools)
- require(descr) # OR: require(gmodels)
- # helpers functions
- splitString <- function (text, width=15) {
- # To split string at given (character count) length.
- # Internal function!
- if(is.null(text)) return(NULL)
- result <- NULL
- gapwidth <- nchar(" ")
- availwidth <- width
- for (ii in 1:length(text)) {
- strings <- strsplit(text[ii], " ")[[1]]
- newstring <- strings[1]
- if (length(strings) > 1) {
- linewidth <- nchar(newstring)
- for (i in 2:length(strings)) {
- w <- nchar(strings[i])
- if (linewidth + gapwidth + w < availwidth) {
- sep <- " "
- linewidth <- linewidth + gapwidth + w
- }
- else {
- sep <- "\n"
- linewidth <- w
- }
- newstring <- paste(newstring, strings[i], sep = sep)
- }}
- result <- c(result,newstring)
- }
- result
- # easier method (DONE): wrapper <- function(x, ...) paste(strwrap(x, ...), collapse = "\n") TODO!!!
- # Usage: splitString(c("Lorem ipsum novum bellum optime fortissime est.","A kutyafüle is az oldalamat fúrja"),15)
- }
- WidthString <- function(text, sep='\n') {
- # To find *the longest word's length* and *linebreaks count* in factor levels with given separetor.
- # Works with factor or data.frame (of factors) input.
- # Internal function!
- if(is.null(text)) return(NULL)
- maxlength <- 0
- linebreaks <- 0
- if (is.data.frame(text)) {
- for (i in 1:length(text)) {
- for (ii in 1:length(levels(text[,i]))) {
- maxlength <- max(maxlength, max(nchar(strsplit(levels(text[,i])[ii], sep)[[1]])))
- linebreaks <- max(linebreaks, length(strsplit(levels(text[,i])[ii], sep)[[1]]))
- }
- }
- }
- if (is.factor(text)) {
- for (i in 1:length(levels(text))) {
- maxlength <- max(maxlength, max(nchar(strsplit(levels(text)[i], sep)[[1]])))
- linebreaks <- max(linebreaks, length(strsplit(levels(text)[i], sep)[[1]]))
- }
- }
- return(list(maxlength=maxlength, linebreaks=linebreaks))
- # Usage:
- # WidthString(demoData[,c('value')])
- # Returns:
- # $maxlength
- # [1] 32
- # $linebreaks
- # [1] 1
- }
- getValueLabels <- function(data) {
- # Internal function!
- as.character(attr(data,"variable.labels",exact=FALSE))
- }
- ggMosaicChart <- function(data, dependent, independent, fileprefix='mosaicchart_') {
- ct <- suppressWarnings(CrossTable(data[,independent], data[,dependent], expected=TRUE, chisq=TRUE,
- prop.chisq=TRUE,resid=TRUE,asresid=TRUE))
- #df <- as.data.frame(unclass(ct$t))
- df <- ct$t
- dff <- as.data.frame(cbind(apply(df,1,sum)))
- dff <- as.data.frame(cbind(apply(ct$t,1,sum)))
- names(dff) <- c('segpct')
- dff$segpct <- dff$segpct/sum(dff$segpct)
- dff$xmax <- cumsum(dff$segpct)*100
- dff$xmin <- dff$xmax - dff$segpct*100
- dff$segpct <- NULL
- dfm <- suppressWarnings(cbind(df,dff))
- names(dfm) <- c('segment','variable','value','xmax','xmin')
- dfm$segment <- as.character(dfm$segment)
- dfm$variable <- as.character(dfm$variable)
- dfm$value <- as.numeric(dfm$value)
- dfm1 <- ddply(dfm, .(segment), transform, ym1 = cumsum(value))
- dfm1 <- ddply(dfm1, .(segment), transform, ym2 = max(ym1))
- dfm1 <- ddply(dfm1, .(segment), transform, ymax = (cumsum(value)/ym2)*100)
- dfm1 <- ddply(dfm1, .(segment), transform, ymin = ymax - (value/ym2)*100)
- dfm1$xtext <- with(dfm1, xmin + (xmax - xmin)/2)
- dfm1$ytext <- with(dfm1, ymin + (ymax - ymin)/2)
- dfm1$sig <- as.numeric(ct$chisq$residuals)
- dfm1$rp <- as.numeric(t(ct$prop.row)[,order(dimnames(ct$prop.row)[[1]])])
- dfm1$rc <- as.numeric(t(ct$prop.col)[,order(dimnames(ct$prop.col)[[1]])])
- t <- odd(order(intersect(levels(data[,independent]),unique(dfm1$segment))))
- temp <- NULL
- if (length(t) > 0) {
- for (i in 1:length(t)) {
- if (t[i] == TRUE) {
- temp <- c(temp,rep(0,length(unique(dfm1$variable))))
- } else {
- temp <- c(temp,rep(1,length(unique(dfm1$variable))))
- }
- }}
- dfm1$odd <- temp
- dfm1$segment <- splitString(dfm1$segment, 30)
- sigg <- seq(min(dfm1$sig),max(dfm1$sig),(((min(dfm1$sig)-max(dfm1$sig))*(-1))/ceiling(abs(min(dfm1$sig)-max(dfm1$sig)))))
- sigg[which(sigg < 0)] <- ceiling(sigg[which(sigg < 0)])
- sigg[which(sigg > 0)] <- floor(sigg[which(sigg > 0)])
- sigg <- unique(round(c((min(dfm1$sig)+0.01),sigg,(max(dfm1$sig))-0.01),2))
- pval <- round(as.numeric(ct$chisq$p.value),3)
- if (pval==0) {
- pval <- '0.000'
- }
- p <- ggplot(dfm1, aes(ymin = ymin, ymax = ymax,xmin = xmin, xmax = xmax,order = -as.numeric(segment))) +
- geom_rect(colour = I("black"), aes(fill=sig), alpha=0.5) +
- scale_fill_gradient2(midpoint=0,mid="white",low="#339900",high="#660099",breaks=sigg,name="Pearson\nresiudals") +
- theme_bw(base_size=16) +
- opts(title = substitute(" "*c*" = "*a*", p = "*e*" (n: observations, r: row percent, c: column percent)",list(a=as.numeric(ct$chisq$statistic),c=quote(chi^{2}),d=quote(alpha),e=pval))) +
- geom_text(aes(x = xtext, y = ytext, label = ifelse((((xmax-xmin)<=15) & ((xmax-xmin) > 5) & ((ymax-ymin) > 10)), paste(variable,"\nn: ",value, "\nc: ",round(rp*100,2),"%\n r: ",round(rc*100,2),"%", sep = ""), paste('',sep=''))), size = 4) +
- geom_text(aes(x = xtext, y = ytext, label = ifelse((((xmax-xmin)>15) & ((ymax-ymin) > 15)), paste(variable,"\nn: ", value, "\nc: ",round(rp*100,2),"%\nr: ",round(rc*100,2),"%", sep = ""), paste('', sep = ""))), size = 4) +
- geom_text(aes(x = xtext, y = ytext, label = ifelse((((xmax-xmin)>15) & ((ymax-ymin) <= 15) & ((ymax-ymin) > 5)), paste(variable,"\n",value, " / ",round(rp*100,2),"% / ",round(rc*100,2),"%", sep = ""), paste('', sep = ""))), size = 4) +
- geom_text(aes(x = xtext, y = ytext, label = ifelse((((xmax-xmin)<=15) & ((xmax-xmin) > 10) & ((ymax-ymin) > 5) & ((ymax-ymin) <= 10)), paste(value, " / ",round(rp*100,2),"% / ",round(rc*100,2),"%", sep = ""), paste('',sep=''))), size = 3) +
- geom_text(aes(x = xtext, y = 105, label = ifelse((odd == 0), paste(segment,sep=""),paste('',sep=''))), size = 5) +
- geom_text(aes(x = xtext, y = -5, label = ifelse((odd == 1), paste(segment,sep=""),paste('',sep=''))), size = 5) +
- ylab(splitString(as.character(attr(data,"variable.labels",exact=FALSE)[dependent]),70)) +
- xlab(splitString(as.character(attr(data,"variable.labels",exact=FALSE)[independent]),70))
- p <- p + scale_x_continuous(limits=c(-9,110))
- ggsave(paste(fileprefix,independent,'-',dependent,'.png',sep=''),p,width=13,height=12)
- }
- # run demo in working directory
- setwd('')
- # make an example data set: (but is is easier to import a labeled SPSS sav file)
- df <- mtcars
- df$gear <- factor(df$gear, levels=3:5, labels=c('three', 'four', 'five'))
- df$cyl <- factor(df$cyl, levels=c(4,6,8), labels=c('four', 'six', 'eight'))
- attr(df, 'variable.labels') <- names(df)
- names(attr(df, 'variable.labels')) <- names(df)
- # make plot to a local png
- ggMosaicChart(df, 'gear', 'cyl')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement