misingnoglic

bioScript.R

Dec 11th, 2016
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.63 KB | None | 0 0
  1. source("https://bioconductor.org/biocLite.R")
  2. biocLite("genefilter")
  3.  
  4. source("https://bioconductor.org/biocLite.R")
  5. biocLite("EBImage")
  6.  
  7. source("https://bioconductor.org/biocLite.R")
  8. biocLite("rhdf5")
  9.  
  10. source("https://bioconductor.org/biocLite.R")
  11. biocLite("DESeq")
  12.  
  13. source("https://bioconductor.org/biocLite.R")
  14. biocLite("hom.Hs.inp.db")
  15.  
  16. source("https://bioconductor.org/biocLite.R")
  17. biocLite("AnnotationDbi")
  18.  
  19. source("https://bioconductor.org/biocLite.R")
  20. biocLite("org.Mm.eg.db")
  21. biocLite("rhdf5")
  22.  
  23. install.packages("rhdf5")
  24.  
  25. library(rhdf5)
  26. library(statmod)
  27. library(gdata)
  28. library(genefilter)
  29. library(EBImage)
  30. library(rhdf5)
  31. library(DESeq)
  32. library(statmod)
  33. library(hom.Hs.inp.db)
  34. library(org.Hs.eg.db)
  35. library(AnnotationDbi)
  36. library(org.Mm.eg.db)
  37.  
  38. setwd("~/Desktop")
  39. countsQuartz = read.csv("readCounts.csv", header=TRUE)
  40. dataQuartz = dataCB
  41.  
  42.  
  43. #log-transformed counts
  44. LCountsQuartz <- log10(countsQuartz+1)
  45. LmeansQuartz <- rowMeans( LCountsQuartz )
  46. LvarsQuartz <- rowVars( LCountsQuartz )
  47. Lcv2Quartz <- LvarsQuartz / LmeansQuartz^2
  48.  
  49.  
  50. LogNcountsList = list()
  51. useForFitL = LmeansQuartz>0.3
  52. LogNcountsList$mean = LmeansQuartz[useForFitL]
  53. LogNcountsList$cv2 = Lcv2Quartz[useForFitL]
  54. fit_loglin = nls(cv2 ~ a* 10^(-k*mean), LogNcountsList,start=c(a=10,k=2))
  55.  
  56. #variable genes
  57. is_het = (coefficients(fit_loglin)["a"] *10^(-coefficients(fit_loglin)["k"]*LmeansQuartz) < Lcv2Quartz) &  LmeansQuartz>0.3
  58.  
  59. LogVar_techQuartz_logfit <- coefficients(fit_loglin)["a"] *10^(-coefficients(fit_loglin)["k"]*LmeansQuartz)*LmeansQuartz^2
  60.  
  61. #plot mean/cv2 relationship and variable genes
  62. plot( LmeansQuartz, Lcv2Quartz, log="y", col=1+is_het,xlab='meansLogQuartz',ylab='cv2LogQuartz',ylim=c(1e-3,1e2))  
  63. xg <- seq( 0, 4.5, length.out=100 )
  64. lines( xg, coefficients(fit_loglin)["a"] *10^(-coefficients(fit_loglin)["k"]*xg ),lwd=2,col='blue' )
  65. legend('topright',c('Variable genes'),pch=c(1),col=c('red'),cex=0.8)
  66.  
  67. #gene names in the Quartz-Seq mESC data.
  68. gene_names = rownames(dataQuartz)
  69. gene_names_het = gene_names[is_het]
  70.  
  71. #all Cycle base genes homologs (top 600 genes)
  72. hu2musAll = inpIDMapper(dataCB[1:600,3],'HOMSA','MUSMU',srcIDType='ENSEMBL',destIDType='ENSEMBL')
  73. cellcyclegenes_filterCB = dataQuartz$gene_systematic_name
  74.  
  75. #get cell cycle genes from GO
  76. xxGO <- as.list(org.Hs.egGO2EG)
  77. print(xxGO)
  78. cell_cycleEG <-unlist(xxGO['GO:0007049'])
  79. #get ENSEMBLE ids
  80. x <- org.Hs.egENSEMBL
  81. mapped_genes <- mappedkeys(x)
  82. xxE <- as.list(x[mapped_genes])
  83. ens_ids_cc<-unlist(xxE[cell_cycleEG])
  84. cc_gene_indices <- na.omit(match(ens_ids_cc, rownames(dataQuartz)))
  85.  
  86. #ensemble IDs to gene symbols
  87. x <- org.Hs.egSYMBOL
  88. # Get the gene symbol that are mapped to an entrez gene identifiers
  89. mapped_genes <- mappedkeys(x)
  90. # Convert to a list
  91. xx <- as.list(x[mapped_genes])
  92. xxenseg <- as.list(org.Hs.egENSEMBL2EG)
  93. gene_syms=unlist(xx[unlist(xxenseg[gene_names])])
  94. gene_names_list<-(lapply(xxenseg[gene_names],function(x){if(is.null(x)){x=NA}else{x=x[1]}}))
  95. sym_names=unlist(lapply(xx[unlist(gene_names_list)],function(x){if(is.null(x)){x=NA}else{x=x[1]}}))
  96. sym_names[is.na(sym_names)]=gene_names[is.na(sym_names)]
  97.  
  98. #rename a few variables...
  99. tech_noise = LogVar_techQuartz_logfit
  100. cellcyclegenes_filter <- cc_gene_indices
  101. cell_names <- colnames(countsQuartz)
  102. Y <- LCountsQuartz
  103. genes_het_bool <- (is_het)*1
  104.  
  105. h5createFile("test.h5")
  106. h5write(cellcyclegenes_filterCB,gene_names,sym_names,cellcyclegenes_filter,cell_names,Y,genes_het_bool,tech_noise,file='test.h5', name='dataset_1')
  107.  
  108. h5save(cellcyclegenes_filterCB,gene_names,sym_names,cellcyclegenes_filter,cell_names,Y,genes_het_bool,tech_noise,file='test.h5f', name=NULL, createnewfile=TRUE)
Add Comment
Please, Sign In to add comment