Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- source("https://bioconductor.org/biocLite.R")
- biocLite("genefilter")
- source("https://bioconductor.org/biocLite.R")
- biocLite("EBImage")
- source("https://bioconductor.org/biocLite.R")
- biocLite("rhdf5")
- source("https://bioconductor.org/biocLite.R")
- biocLite("DESeq")
- source("https://bioconductor.org/biocLite.R")
- biocLite("hom.Hs.inp.db")
- source("https://bioconductor.org/biocLite.R")
- biocLite("AnnotationDbi")
- source("https://bioconductor.org/biocLite.R")
- biocLite("org.Mm.eg.db")
- biocLite("rhdf5")
- install.packages("rhdf5")
- library(rhdf5)
- library(statmod)
- library(gdata)
- library(genefilter)
- library(EBImage)
- library(rhdf5)
- library(DESeq)
- library(statmod)
- library(hom.Hs.inp.db)
- library(org.Hs.eg.db)
- library(AnnotationDbi)
- library(org.Mm.eg.db)
- setwd("~/Desktop")
- countsQuartz = read.csv("readCounts.csv", header=TRUE)
- dataQuartz = dataCB
- #log-transformed counts
- LCountsQuartz <- log10(countsQuartz+1)
- LmeansQuartz <- rowMeans( LCountsQuartz )
- LvarsQuartz <- rowVars( LCountsQuartz )
- Lcv2Quartz <- LvarsQuartz / LmeansQuartz^2
- LogNcountsList = list()
- useForFitL = LmeansQuartz>0.3
- LogNcountsList$mean = LmeansQuartz[useForFitL]
- LogNcountsList$cv2 = Lcv2Quartz[useForFitL]
- fit_loglin = nls(cv2 ~ a* 10^(-k*mean), LogNcountsList,start=c(a=10,k=2))
- #variable genes
- is_het = (coefficients(fit_loglin)["a"] *10^(-coefficients(fit_loglin)["k"]*LmeansQuartz) < Lcv2Quartz) & LmeansQuartz>0.3
- LogVar_techQuartz_logfit <- coefficients(fit_loglin)["a"] *10^(-coefficients(fit_loglin)["k"]*LmeansQuartz)*LmeansQuartz^2
- #plot mean/cv2 relationship and variable genes
- plot( LmeansQuartz, Lcv2Quartz, log="y", col=1+is_het,xlab='meansLogQuartz',ylab='cv2LogQuartz',ylim=c(1e-3,1e2))
- xg <- seq( 0, 4.5, length.out=100 )
- lines( xg, coefficients(fit_loglin)["a"] *10^(-coefficients(fit_loglin)["k"]*xg ),lwd=2,col='blue' )
- legend('topright',c('Variable genes'),pch=c(1),col=c('red'),cex=0.8)
- #gene names in the Quartz-Seq mESC data.
- gene_names = rownames(dataQuartz)
- gene_names_het = gene_names[is_het]
- #all Cycle base genes homologs (top 600 genes)
- hu2musAll = inpIDMapper(dataCB[1:600,3],'HOMSA','MUSMU',srcIDType='ENSEMBL',destIDType='ENSEMBL')
- cellcyclegenes_filterCB = dataQuartz$gene_systematic_name
- #get cell cycle genes from GO
- xxGO <- as.list(org.Hs.egGO2EG)
- print(xxGO)
- cell_cycleEG <-unlist(xxGO['GO:0007049'])
- #get ENSEMBLE ids
- x <- org.Hs.egENSEMBL
- mapped_genes <- mappedkeys(x)
- xxE <- as.list(x[mapped_genes])
- ens_ids_cc<-unlist(xxE[cell_cycleEG])
- cc_gene_indices <- na.omit(match(ens_ids_cc, rownames(dataQuartz)))
- #ensemble IDs to gene symbols
- x <- org.Hs.egSYMBOL
- # Get the gene symbol that are mapped to an entrez gene identifiers
- mapped_genes <- mappedkeys(x)
- # Convert to a list
- xx <- as.list(x[mapped_genes])
- xxenseg <- as.list(org.Hs.egENSEMBL2EG)
- gene_syms=unlist(xx[unlist(xxenseg[gene_names])])
- gene_names_list<-(lapply(xxenseg[gene_names],function(x){if(is.null(x)){x=NA}else{x=x[1]}}))
- sym_names=unlist(lapply(xx[unlist(gene_names_list)],function(x){if(is.null(x)){x=NA}else{x=x[1]}}))
- sym_names[is.na(sym_names)]=gene_names[is.na(sym_names)]
- #rename a few variables...
- tech_noise = LogVar_techQuartz_logfit
- cellcyclegenes_filter <- cc_gene_indices
- cell_names <- colnames(countsQuartz)
- Y <- LCountsQuartz
- genes_het_bool <- (is_het)*1
- h5createFile("test.h5")
- h5write(cellcyclegenes_filterCB,gene_names,sym_names,cellcyclegenes_filter,cell_names,Y,genes_het_bool,tech_noise,file='test.h5', name='dataset_1')
- h5save(cellcyclegenes_filterCB,gene_names,sym_names,cellcyclegenes_filter,cell_names,Y,genes_het_bool,tech_noise,file='test.h5f', name=NULL, createnewfile=TRUE)
Add Comment
Please, Sign In to add comment