Advertisement
Guest User

Untitled

a guest
Nov 30th, 2015
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.25 KB | None | 0 0
  1. # setup R error handling to go to stderr
  2. options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
  3.  
  4. # we need that to not crash galaxy with an UTF8 error on German LC settings.
  5. loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
  6.  
  7. library("getopt")
  8. options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
  9. args <- commandArgs(trailingOnly = TRUE)
  10.  
  11. # get options, using the spec as defined by the enclosed list.
  12. # we read the options from the default: commandArgs(TRUE).
  13. spec <- matrix(c(
  14. "quiet", "q", 0, "logicall",
  15. "help", "h", 0, "logical",
  16. "preprocess","p",2,"character",
  17. "cores","c",1,"integer")
  18. ,byrow=TRUE, ncol=4)
  19. opt <- getopt(spec)
  20.  
  21. # if help was asked for print a friendly message
  22. # and exit with a non-zero error code
  23. if (!is.null(opt$help)) {
  24. cat(getopt(spec, usage=TRUE))
  25. q(status=1)
  26. }
  27.  
  28. # enforce the following required arguments
  29.  
  30. if (is.null(opt$preprocess)) {
  31. cat("'preprocess' is required\n")
  32. q(status=1)
  33. }
  34.  
  35. verbose <- if (is.null(opt$quiet)) {
  36. TRUE
  37. } else {
  38. FALSE
  39. }
  40.  
  41. # Load required libraries
  42.  
  43. suppressPackageStartupMessages({
  44. library("minfi")
  45. library("FlowSorted.Blood.450k")
  46. library("TxDb.Hsapiens.UCSC.hg19.knownGene")
  47. library("doParallel")
  48. })
  49.  
  50.  
  51. ## Parse cheetah code and make dataframe for creating tmp dir
  52. minfi_config_file = paste0("minfi_temp","/minfi_config.txt")
  53. minfi_config = read.table(minfi_config_file)
  54. colnames(minfi_config) = c("status","green","red","name")
  55.  
  56. #minfi_config
  57.  
  58. ## Make the tmpdir for symlinking data
  59. base_dir = paste0("minfi_temp","/base")
  60. system(paste0("mkdir ",base_dir))
  61.  
  62.  
  63. ## Make symlinks of files
  64. for (i in 1:nrow(minfi_config)){
  65. stopifnot(nrow(minfi_config) == nrow(minfi_config["name"]))
  66.  
  67. ## Make green idat file symlinks
  68. file_green = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Grn.idat")
  69. cmd_green = paste("ln -s",as.character(minfi_config[i,"green"]),file_green,sep=" ")
  70. cat("Reading file ",i,"GREEN Channel ", file_green)
  71. system(cmd_green)
  72.  
  73. ## Make red idat file symlinks
  74. file_red = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Red.idat")
  75. cmd_red = paste("ln -s",as.character(minfi_config[i,"red"]),file_red,sep=" ")
  76. cat("Reading file ",i,"RED Channel ", file_red)
  77. system(cmd_red)
  78. }
  79.  
  80. ## Make dataframe with Basenames
  81. Basename = paste0(base_dir,"/",unique(substr(list.files(base_dir),1,17)))
  82. status = minfi_config[match(gsub(".+/","",Basename), minfi_config$name),"status"]
  83. targets = data.frame(Basename=Basename,status=status)
  84.  
  85. #targets
  86.  
  87. ## Read 450k files
  88. RGset = read.450k.exp(targets=targets,verbose=T)
  89. #RGset
  90.  
  91.  
  92. ## Preprocess data with the normalization method chosen
  93. if(opt$preprocess == "quantile"){
  94. normalized_RGset = preprocessQuantile(RGset)
  95. #print("Data has been normalized using Quantile Normalization")
  96. } else if (opt$preprocess == "noob"){
  97. normalized_RGset = preprocessNoob(RGset)
  98. #print("Data has been normalized using Noob Normalization")
  99. } else if (opt$preprocess == "raw"){
  100. normalized_RGset = preprocessRaw(RGset)
  101. #print("Data has been normalized using Raw Normalization")
  102. } else if (opt$preprocess == "illumina"){
  103. normalized_RGset = preprocessIllumina(RGset,bg.correct = TRUE, normalize = c("controls", "no"),reference = 1)
  104. #print("Data has been normalized using Illumina Normalization")
  105. } else if (opt$preprocess == "preprocessFunnorm"){
  106. normalized_RGset = preprocessFunnorm(RGset)
  107. #print("Data has been normalized using Functional Normalization")
  108. } else {
  109. normalized_RGset = RGset
  110. #print("No Normalization applied")
  111. }
  112.  
  113.  
  114. ## Get beta values from Proprocessed data
  115. beta = getBeta(normalized_RGset)
  116.  
  117. ## Set phenotype data
  118. pd = pData(RGset)
  119.  
  120.  
  121. ## DENSITY PLOT
  122. ## Produce PDF file
  123. if (!is.null(RGset)){
  124. pdf("density_plot.pdf")
  125. minfi::densityPlot(dat=RGset,sampGroups=pd$status,main='Density Plot',xlab="Beta")
  126. dev.off()
  127. }
  128.  
  129.  
  130. ## QC REPORT
  131. files = gsub(".+/","",pd$filenames)
  132. ## Produce PDF file
  133. if (!is.null(RGset)) {
  134. # Make PDF of QC report
  135. minfi::qcReport(rgSet=RGset,sampNames=files,sampGroups=pd$status,pdf="qc_report.pdf")
  136. }
  137.  
  138.  
  139. ## MDS Plot
  140. ## Set phenotype data
  141. files = gsub(".+/","",pd$filenames)
  142. #numPositions=as.integer("${numPositions}")
  143.  
  144. ## Produce PDF file
  145. if (!is.null(RGset)) {
  146. ## Make PDF of density plot
  147. pdf("mds_plot.pdf")
  148. minfi::mdsPlot(dat=RGset,sampNames=files,sampGroups=pd$status,main="Beta MDS",numPositions=1000,pch=19)
  149. dev.off()
  150. }
  151.  
  152.  
  153. # Estimate Cell counts
  154. #result = minfi::estimateCellCounts(dat=RGset,meanPlot="${meanplot}")
  155. #write.table(result,file="estimated_cell_counts.txt",quote=FALSE,row.names=FALSE)
  156.  
  157. #print("Plot making finished")
  158.  
  159. ## DMP finder
  160. dmp = dmpFinder(dat=beta,pheno=pd$status,type="categorical")
  161. #,shrinkVar="${shrinkVar}")
  162. write.table(dmp,file="dmps.csv",quote=FALSE,row.names=TRUE)
  163. #print("DMP Finder successful")
  164.  
  165.  
  166. #M = getM(normalized_RGset)
  167. #Beta = getBeta(normalized_RGset)
  168. #CN = getCN(normalized_RGset)
  169. #chr = seqnames(normalized_RGset)
  170. #pos = start(normalized_RGset)
  171.  
  172. #print("retrieved Beta and meth values")
  173.  
  174. # Model Matrix to pass into the bumphunter function
  175. pd=pData(normalized_RGset)
  176. T1= levels(pd$status)[2]
  177. T2= levels(pd$status)[1]
  178.  
  179. # Introduce error if levels are different
  180. stopifnot(T1!=T2)
  181.  
  182. keep=pd$status%in%c(T1,T2)
  183. tt=factor(pd$status[keep],c(T1,T2))
  184. design=model.matrix(~tt)
  185.  
  186. #design
  187.  
  188. # Start bumphunter in a parallel environment
  189. # Parallelize over cores on machine
  190. registerDoParallel(cores = opt$cores)
  191.  
  192. ## Bumphunter Run with normalized_RGset processed with Quantile Normalization
  193. res=bumphunter(normalized_RGset[,keep],design,B=25,smooth=FALSE,cutoff= 0.3,type="Beta")
  194. bumps= res$tab
  195.  
  196. #print("DMRs found with bumphunter")
  197. #head(bumps)
  198.  
  199. ## Choose DMR's of a certain length threshold.
  200. ## This helps reduce the size of DMRs early, and match
  201. ## with genes closest to region
  202. #print("Excluding DMR's with length smaller than 4")
  203. bumps = bumps[bumps$L>4,]
  204.  
  205. #print("Annotating DMR's")
  206. #genes <- annotateTranscripts(TxDb.Hsapiens.UCSC.hg19.knownGene)
  207. #tab=matchGenes(bumps,genes)
  208. #result=cbind(bumps,tab)
  209.  
  210. # Save result, which contains DMR's and closest genes
  211. write.table(bumps,file = "dmrs.csv",quote=FALSE)
  212.  
  213. # Garbage collect
  214. gc()
  215.  
  216. # Block finder
  217. #library(sva)
  218. #pheno <- pData(GRset)
  219. #mod <- model.matrix(~as.factor(status), data=pheno)
  220. #mod0 <- model.matrix(~1, data=pheno)
  221. #sva.results <- sva(mval, mod, mod0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement