Guest User

Untitled

a guest
May 24th, 2018
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.53 KB | None | 0 0
  1. ###################################################################################################
  2. ##################################### Plot disease enrichment #####################################
  3. ###################################################################################################
  4.  
  5. library(ggplot2)
  6. library(plotrix)
  7. library(viridis)
  8. library(reshape2)
  9.  
  10. setwd('~/Google Drive/MGI/VICC/Manuscript/Figures/misc_figures/')
  11.  
  12. ###################################################################################################
  13. #### Read in files
  14. ###################################################################################################
  15. ## Get disease count lists
  16. data <- read.csv(file="~/Google Drive/MGI/VICC/Manuscript/Figures/Data/disease_counts.csv",header=T, stringsAsFactors = F)
  17.  
  18. ###################################################################################################
  19. #### summarize top level data for plotting
  20. ###################################################################################################
  21. top_data <- aggregate(. ~ TopNode_disease + TopNode_doid, data=data[,3:ncol(data)], FUN=sum)
  22.  
  23. ## Get a list of all of the database names
  24. group_cols <- colnames(top_data)[!(colnames(top_data) %in% c("TopNode_disease","TopNode_doid"))]
  25.  
  26. ## Get total counts for each cancer across databases
  27. top_data$total <- apply(top_data[,colnames(top_data) %in% group_cols], 1, FUN = sum)
  28. top_data$total_perc <- top_data$total/sum(top_data$total)*100
  29.  
  30. # ## Get the top 10 cancers (includes top hit for all 6)
  31. # top <- head(top_data[order(-top_data$total),], n = 5)
  32. ## Get diseases for plotting that make up more than 5% of the total
  33. top <- top_data[which(top_data$total_perc > 5),]
  34.  
  35. ## Create a dataframe of only the top 10 + a binned "other cancer" category for all other cancers combined
  36. top_data_scaled <- as.data.frame(top_data[order(-top_data$total),])
  37. top_data_scaled[!(top_data_scaled$TopNode_disease %in% c(top$TopNode_disease, "benign neoplasm", "other")),c("TopNode_disease","TopNode_doid")] <- c("other cancers","other cancers")
  38. top_data_scaled[(top_data_scaled$TopNode_disease %in% c("cancer")),c("TopNode_disease","TopNode_doid")] <- c("other cancers","other cancers")
  39. top_data_scaled[(top_data_scaled$TopNode_disease %in% c("other")),c("TopNode_disease","TopNode_doid")] <- c("other disease","other disease")
  40. top_data_scaled <- aggregate(. ~ TopNode_disease + TopNode_doid, data=top_data_scaled, FUN=sum)
  41. top_data_scaled$TopNode_disease <- gsub(" ","\n",top_data_scaled$TopNode_disease)
  42.  
  43. ###################################################################################################
  44. #### Create pie charts of data
  45. ###################################################################################################
  46. # pie(top_data_scaled[which(top_data_scaled$cgi > 0),"cgi"], labels=top_data_scaled[which(top_data_scaled$cgi > 0),"TopNode_disease"], main="CGI", col=viridis(length(top_data_scaled[,"cgi"]), option="B"))
  47. # pie3D(top_data_scaled[which(top_data_scaled$cgi > 0),"cgi"], labels=top_data_scaled[which(top_data_scaled$cgi > 0),"TopNode_disease"], explode=0.1, main="CGI")
  48.  
  49. ## Create pie charts for each database
  50. create_a_pie <- function(x,y){
  51. p <- pie(x[which(x[,y] > 0),y], labels=x[which(x[,y] > 0),"TopNode_disease"], main=y, col=viridis(length(x[,y]), option="B"))
  52. return(p)
  53. }
  54. # png(file = paste(getwd(),"disease_by_database__piechart_cgi.png", sep = "/"), height=1200, width=1150, res=150)
  55. # print(create_a_pie(top_data_scaled,"cgi"))
  56. # dev.off()
  57. create_a_pie(top_data_scaled,"civic")
  58.  
  59. ###################################################################################################
  60. #### Create bar charts of data
  61. ###################################################################################################
  62. ## Melt the df
  63. top_data_scaled_long <- melt(top_data_scaled, id.vars = c("TopNode_disease", "TopNode_doid"))
  64.  
  65. ## Refactor to desired order
  66. top_data_scaled_long$TopNode_disease <- factor(top_data_scaled_long$TopNode_disease, levels = c((unique(top_data_scaled[order(top_data_scaled$total,decreasing=F),"TopNode_disease"]))), exclude = NULL)
  67.  
  68. ## Plot the disease by database
  69. png(file = paste(getwd(),"disease_by_database.png", sep = "/"), height=1200, width=1150, res=150)
  70. ggplot(top_data_scaled_long[!(top_data_scaled_long$variable %in% c("total","total_perc")),], aes(x=TopNode_disease, y=value, fill = variable)) + geom_col(position = 'stack') + xlab("Disease") + ylab("Evidence") + guides(fill=guide_legend(title="Database")) + scale_fill_viridis(discrete = T, direction = -1)
  71. dev.off()
  72.  
  73.  
  74. ## Spaces are more desirable for the following plots
  75. top_data_scaled_long$TopNode_disease <- gsub("\n"," ",top_data_scaled_long$TopNode_disease)
  76. ## Change the disease order
  77. top_data_scaled_long$TopNode_disease <- factor(top_data_scaled_long$TopNode_disease, levels = as.vector(c("other disease", "benign neoplasm",as.character(unique(top_data_scaled_long[!(top_data_scaled_long$TopNode_disease %in% c("benign neoplasm","other disease")),"TopNode_disease"])))), exclude = NULL)
  78. ## Create a custom color palette
  79. plma <- rev(viridis::plasma(n=7))
  80. new_palette = c("grey87","grey66", plma)
  81.  
  82. ## Plot the databases by disease proportions
  83. png(file = paste(getwd(),"disease_by_database_proportion.png", sep = "/"), height=1200, width=1150, res=150)
  84. ggplot(top_data_scaled_long[!(top_data_scaled_long$variable %in% c("total","total_perc")),], aes(x=variable, y=value, fill = TopNode_disease)) + geom_col(position = 'fill') + xlab("Database") + ylab("Proportion of Evidence") + guides(fill=guide_legend(title="Disease")) + scale_fill_manual(values=new_palette) + theme_bw()
  85. dev.off()
Add Comment
Please, Sign In to add comment