Advertisement
mgordon

plotHR with multiple models

Dec 6th, 2011
666
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 11.16 KB | None | 0 0
  1. plotHR <- function (models, terms = 1 , se = TRUE , polygon_ci = TRUE, rug = "density" ,
  2.         dens.pos = 0 , xlab = "" , ylab = "Hazard Ratio" ,
  3.         main = NULL , xlim = NULL , ylim = NULL,
  4.         col.term = "#08519C", col.se = "#DEEBF7", col.dens=grey(.9),
  5.         lwd.term = 3, lty.term=1, lwd.se=2,
  6.         x.ticks = NULL, y.ticks = NULL, ylog=TRUE,
  7.         cex = 1, bty = "n", axes = TRUE, ...){
  8.    
  9.     # If the user wants to compare different models the same graph
  10.     # the first dataset is then choosen as the default dataset
  11.     # for getting the rug data.  
  12.     if (length(class(models)) != 1 || class(models) != "list"){
  13.         models <- list(models)
  14.     }
  15.    
  16.     # Create vectors of the colors, line types etc to
  17.     # allow for specific settings for each model
  18.     col.se <- rep(col.se, length.out=length(models))
  19.     col.term <- rep(col.term, length.out=length(models))
  20.     polygon_ci <- rep(polygon_ci, length.out=length(models))
  21.     lty.term <- rep(lty.term, length.out=length(models))
  22.     lwd.term <- rep(lwd.term, length.out=length(models))
  23.    
  24.     # set plotting parameters
  25.     par(las = 1 , cex = cex)
  26.    
  27.     # The y-limit needs to be log transformed to work as expected
  28.     if(length(ylim) && ylog==TRUE){
  29.         ylim <- log(ylim)
  30.     }
  31.    
  32.     getCleanLabels <- function(){
  33.         ## extract the names of all model covariates
  34.         all.labels <- attr(models[[1]]$terms , "term.labels")
  35.        
  36.         # remove 'strata()' / 'factor()' / 'pspline()'
  37.         all.labels <- sub ( "pspline[(]([a-zA-Z._0-9]*)[, .a-zA-Z_0-9=]*[)]" , "\\1" , all.labels)
  38.         all.labels <- sub ( "factor[(]([a-zA-Z._0-9]*)[, .a-zA-Z_0-9=]*[)]" , "\\1" , all.labels)
  39.         all.labels <- sub ( "strata[(]([a-zA-Z._0-9]*)[, .a-zA-Z_0-9=]*[)]" , "\\1" , all.labels)
  40.        
  41.         # The cph in the Design package uses rcs instead of pspline
  42.         all.labels <- sub ("rcs[(]([a-zA-Z._0-9]*)[, .a-zA-Z_0-9=()]*[)]", "\\1", all.labels)
  43.         all.labels <- sub ("strat[(]([a-zA-Z._0-9]*)[, .a-zA-Z_0-9=]*[)]" , "\\1" , all.labels)
  44.    
  45.         # Remove as.integer
  46.         all.labels <- sub ( "as.integer[(]([a-zA-Z._0-9]*)[, .a-zA-Z_0-9=]*[)]" , "\\1" , all.labels)
  47.  
  48.         return(all.labels)    
  49.     }
  50.     all.labels <- getCleanLabels()
  51.    
  52.     # Allow the term searched for be a string
  53.     if (is.character(terms)){
  54.         terms <- grep(terms, all.labels)
  55.         if(length(terms) == 0){
  56.             stop(paste("Could not find term:", terms))
  57.         }
  58.     }
  59.    
  60.     # pick the name of the main term which is goint to be plotted
  61.     term.label <- all.labels[terms]
  62.     if (is.na(term.label)){
  63.         stop(paste("Term", terms, "not found"))
  64.     }
  65.    
  66.     # Remove interaction terms since the data can't be found, ex. male_gender:prosthesis
  67.     terms_with_interaction <- grep("[_.a-zA-Z0-9]+:[_.a-zA-Z0-9]+", all.labels)
  68.     if(length(terms_with_interaction)>0){
  69.         all.labels <- all.labels[-terms_with_interaction]
  70.     }
  71.    
  72.     ## extract data from model;
  73.     # only the covariates really used in the model
  74.     # only complete covariate records (only used in the model anyway)
  75.     # 'as.data.frame()' and 'names()' have to be explicitly specified in case of a univariate model
  76.     getDataFromModel <- function(m){
  77.         data <- eval(m$call$data)
  78.         data <- as.data.frame(na.exclude(data[ , all.labels]))
  79.         names(data) <- all.labels
  80.         return(data)
  81.     }
  82.    
  83.     ### _______________ the smooth term prediction ____________
  84.     ## calculate the HR for all the covariate values found in the dataset
  85.     getFitAndConfidenceInterval <- function(model){
  86.         if(length(grep("cph", class(model))) > 0){
  87.                 # If this is a cph model then don't exclude the na values
  88.                 term <- predict (model , type="terms" , se.fit = TRUE , expand.na=FALSE, na.action=na.pass)
  89.                
  90.                 # The cph model fails to pick the terms of interest
  91.                 if (NCOL(term$fit) > 1){
  92.                     col_2_pick = which(colnames(term$fit) == term.label)
  93.                     term$fit <- term$fit[,col_2_pick]
  94.                     term$se.fit <- term$se.fit[,col_2_pick]
  95.                 }
  96.         }else{
  97.                 term <- predict (model , type="terms" , se.fit = TRUE , terms = terms)
  98.         }
  99.        
  100.         # attach the smooth fit for the HR ('fit') and the CI's to the dataset
  101.         # The as.double is a fix since the data.frame otherwise changes name if pspline in coxph
  102.         df <- data.frame(xvalues= getDataFromModel(model)[,term.label],
  103.                 fit = as.double(term$fit),
  104.                 ucl = as.double(term$fit + 1.96 * term$se.fit),
  105.                 lcl = as.double(term$fit - 1.96 * term$se.fit))
  106.        
  107.         # Change to exponential form
  108.         if (ylog == FALSE){
  109.             df$fit <- exp(df$fit)
  110.             df$ucl <- exp(df$ucl)
  111.             df$lcl <- exp(df$lcl)
  112.         }
  113.        
  114.         # The line doesn't get any better if the value is a duplicate
  115.         # but the PDF gets very large if the dataset is large. By removing
  116.         # duplicates this is avoided
  117.         dups <- duplicated(df$xvalues)
  118.         df <- df[dups == FALSE, ]
  119.        
  120.         return(df)    
  121.     }
  122.    
  123.     if (length(ylim) > 0){
  124.         plot_boundaries.y <- ylim
  125.     }else{
  126.         plot_boundaries.y <- NULL
  127.     }
  128.  
  129.     # Just add the boundary values
  130.     getYBoundaries <- function(ylim, variable){
  131.         variable <- variable[is.infinite(variable) == FALSE]
  132.         return (c(min(ylim, variable), max(ylim, variable)))
  133.     }
  134.    
  135.     xvalues <- NULL
  136.     multi_data <- list()
  137.     for (m in models){
  138.         line_data <- getFitAndConfidenceInterval(m)
  139.         multi_data <- append(multi_data, list(line_data))
  140.         plot_boundaries.y <- getYBoundaries(plot_boundaries.y, line_data$fit)
  141.         if (NROW(line_data) > length(xvalues)){
  142.             xvalues <- line_data$xvalues
  143.         }
  144.     }
  145.    
  146.    
  147.     ### _______________ this is the main extraction __________
  148.    
  149.    
  150.    
  151.     ### _______________ what now follows is the graphical manipulation of the plots ______
  152.    
  153.     # plot empty plot with coordinatesystem and labels
  154.     plot_boundaries.x <- range(xvalues)
  155.     plot(y=plot_boundaries.y, x=plot_boundaries.x, xlim = xlim , ylim = ylim , xlab = xlab,
  156.              ylab = ylab , main = main, type = "n" , axes = FALSE, ...)
  157.    
  158.     # plot CI as polygon shade - if 'se = TRUE' (default)
  159.     if (se) {
  160.         plot_conf_interval <- function(model_data, line_or_polygon_color, polygon_ci){
  161.             current_i.backw <- order(model_data$xvalues , decreasing = TRUE)
  162.             current_i.forw <- order(model_data$xvalues)
  163.            
  164.             if (polygon_ci == TRUE){
  165.                 # The x-axel is always the same
  166.                 x.poly <- c(model_data$xvalues[current_i.forw] , model_data$xvalues[current_i.backw])
  167.                 # The y axel is based upin the current model
  168.                 y.poly <- c(model_data$ucl[current_i.forw] , model_data$lcl[current_i.backw])
  169.                 polygon(x.poly , y.poly , col = line_or_polygon_color, border = NA)
  170.             }else{
  171.                 lines(model_data$xvalues[current_i.forw] , model_data$ucl[current_i.forw], col = line_or_polygon_color, lwd = lwd.se, lty=2)
  172.                 lines(model_data$xvalues[current_i.forw] , model_data$lcl[current_i.forw], col = line_or_polygon_color, lwd = lwd.se, lty=2)
  173.             }
  174.         }
  175.        
  176.         # Plot the last on top
  177.         for (i in rev(1:length(models))) {
  178.             plot_conf_interval(multi_data[[i]], col.se[[i]], polygon_ci[[i]])
  179.         }
  180.     }
  181.    
  182.    
  183.     # Use first model for density data
  184.     base_data <- getDataFromModel(models[[1]])
  185.     xvalues_4_density <- base_data[,term.label]
  186.    
  187.     # Choose within limits
  188.     if (length(xlim) == 2){
  189.         xvalues_4_density <- xvalues_4_density[xvalues_4_density >= min(xlim) & xvalues_4_density <= max(xlim)]
  190.     }
  191.    
  192.     ### _______________ rug = "density" ____________________________________________________
  193.     ### density plot at bottom of panel if rug = "density" in function call
  194.     if (rug == "density") {
  195.         # calculate the coordinates of the density function
  196.         density <- density( xvalues_4_density )
  197.         # the height of the densityity curve
  198.         max.density <- max(density$y)
  199.  
  200.         # Get the boundaries of the plot to
  201.         # put the density polygon at the x-line
  202.         plot_coordinates <- par("usr")
  203.        
  204.         # get the "length" and range of the y-axis
  205.         y.scale <- plot_coordinates[4] - plot_coordinates[3]
  206.        
  207.         # transform the y-coordinates of the density
  208.         # to the lower 10% of the plotting panel
  209.         density$y <- (0.1 * y.scale / max.density) * density$y + plot_coordinates[3]
  210.        
  211.         ## plot the polygon
  212.         polygon( density$x , density$y , border = F , col = col.dens)
  213.     }
  214.    
  215.     ## get the quartiles of the main term
  216.     quantiles <- quantile(xvalues , probs = c(0.025,0.25,0.50,0.75,0.975))
  217.    
  218.     # plot white lines (background color) for 2.5%tile, 1Q, median, 3Q and 97.5%tile through confidence shade and density plot
  219.     axis( side = 1 , at = quantiles , labels = FALSE , lwd = 0 , col.ticks = "white"  , lwd.ticks = 1 , tck = 1 )
  220.    
  221.    
  222.     ### _______________ rug = "ticks" ____________________________________________________
  223.     ### rug plot if "ticks" is specified in function call
  224.     if (rug == "ticks") {
  225.        
  226.         # rugs at datapoints
  227.         axis(side = 1 , line = -1.2 , at = jitter(xvalues_4_density) , labels = F , tick = T , tcl = 0.8 , lwd.ticks = 0.1 , lwd = 0)
  228.         # rugs and labels at 1Q, median and 3Q
  229.         axis(side = 1 , line = -1.0 , at = fivenum(xvalues_4_density)[2:4], lwd = 0 , tick = T, tcl = 1.2 , lwd.ticks = 1 , col.ticks = "black" , labels = c("Quartile 1","Median","Quartile 3"), cex.axis = 0.7, col.axis = "black" , padj = -2.8)
  230.         axis(side = 1 , line = 0.0 , at = fivenum(xvalues_4_density)[2:4], lwd = 0 , tick = T, tcl = 0.2 , lwd.ticks = 1 , col.ticks = "black", labels = FALSE)
  231.     }
  232.    
  233.    
  234.     # Plot the last fit on top, therefore use the reverse
  235.     for (i in rev(1:length(models))) {
  236.         current_i.forw <- order(multi_data[[i]]$xvalues)
  237.    
  238.         # Plots the actual regression line
  239.         lines(multi_data[[i]]$xvalues[current_i.forw],
  240.                 multi_data[[i]]$fit[current_i.forw],
  241.                 col = col.term[[i]],
  242.                 lwd = lwd.term[[i]],
  243.                 lty = lty.term[[i]])
  244.     }
  245.    
  246.     # ___________ main plot _____________
  247.    
  248.    
  249.     # plot the axes
  250.     if (axes){
  251.         axis(side = 1, at = x.ticks)
  252.         if (is.null(y.ticks)){
  253.             y.ticks <- axTicks(2)
  254.         }else if (ylog == TRUE){
  255.             # This is an assumption that the ticks
  256.             # aren't provided in logar
  257.             y.ticks <- log(y.ticks)
  258.         }
  259.        
  260.         if (ylog == TRUE){
  261.             # Get familiar y-axis instead of the log
  262.             axis(side = 2 , at = y.ticks , label = round( exp(y.ticks) , digits = 1))
  263.         }else{
  264.             axis(side = 2 , at = y.ticks)
  265.         }
  266.     }
  267.        
  268.     # plot a box around plotting panel if specified - not plotted by default
  269.     box(bty = bty)
  270. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement