Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Pipeline analisi RNA Targeted - Prima analisi
- #Librerie da importare
- library(Rsubread)
- library(edgeR)
- ##Indicizzazione del genoma di riferimento (Leccino)
- buildindex(basename = "index", reference="/home/genomica/DATA/OLGENOME/genoma_Leccino/assemblingV1/Leccino.contigs_min10kb_V4.fasta")
- ##Individuazione file da utilizzare per l'allineamento a Leccino
- fastqPath_R1 <- list.files("/home/genomica/DATA/GENOLICS/RUN/GENOLICS_R_NuovaAnnotazione/R1", pattern="\\R1_001.fastq.gz$", full=TRUE)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("fastqPath_R1.txt")
- fastqPath_R1
- sink()
- options(max.print=opt)
- fastqPath_R2 <- list.files("/home/genomica/DATA/GENOLICS/RUN/GENOLICS_R_NuovaAnnotazione/R2", pattern="\\R2_001.fastq.gz$", full=TRUE)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("fastqPath_R2.txt")
- fastqPath_R2
- sink()
- options(max.print=opt)
- ##Creazione dei .BAM di allineamento
- align(index="index", maxMismatches=7, readfile1=fastqPath_R1, readfile2=fastqPath_R2, input_format="FASTQ", output_format="BAM")
- #Creare un oggetto con l'indirizzo dell'unico file .bam presente e chiamarlo all.bam
- all.bam <- list.files("/home/genomica/DATA/GENOLICS/RUN/GENOLICS_R_NuovaAnnotazione/R1", pattern="\\.subread.BAM$", full=TRUE)
- options(max.print=1000000000)
- sink("all_bam.txt")
- all.bam
- sink()
- options(max.print=opt)
- #Conta delle abbondanze geniche fornendo file di annotazioni esterni (solo di tipo 'gene')
- #Creare un file 'metadata' esterno e chiamarlo target
- fc <- featureCounts(all.bam, annot.ext="/home/genomica/DATA/GENOLICS/RUN/GENOLICS_R_NuovaAnnotazione/GFF3_aug_leccino_styleAUG_mp25_21_no_Sfclp_k4_con_NR_anno_6lug21", isGTFAnnotationFile=TRUE, nthreads=16, GTF.featureType="gene", GTF.attrType="ID", allowMultiOverlap=TRUE, fraction=TRUE, isPairedEnd=TRUE)
- targets <- read.delim("/home/genomica/DATA/GENOLICS/RUN/GENOLICS_R_NuovaAnnotazione/target", stringsAsFactors=FALSE)
- colnames(fc$counts) <- rownames(targets)
- head(fc$counts)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("featureCounts.txt")
- fc$counts
- sink()
- options(max.print=opt)
- #Formattazione dei dati e organizzazione per gli script successivi
- group <- paste(targets$Cultivar, targets$Condizione, sep=".")
- group <- factor(group)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("table_group.txt")
- table(group)
- sink()
- options(max.print=opt)
- y <- DGEList(fc$counts, group=group)
- y$genes <- fc$annotation[, "Length", drop=FALSE]
- y$genes$Symbol <- fc$annotation$GeneID[as.numeric(rownames(y$genes))]
- design <- model.matrix(~0+group)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("design_matrix.txt")
- design
- sink()
- options(max.print=opt)
- colnames(design) <- levels(group)
- #Filtraggio per livello di espressione
- keep <- filterByExpr(y, design)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("table_keep.txt")
- table(keep)
- sink()
- options(max.print=opt)
- y <- y[keep, , keep.lib.sizes=FALSE]
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("featureCounts_filtered.txt")
- y$counts
- sink()
- options(max.print=opt)
- #Calcolo dei fattori di normalizzazione
- y <- calcNormFactors(y)
- y$samples
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("calcNormFactors.txt")
- y$samples
- sink()
- options(max.print=opt)
- #Analisi di dispersione
- y <- estimateDisp(y, design, robust=TRUE)
- plotBCV(y)
- fit <- glmQLFit(y, design, robust=TRUE)
- head(fit$coefficients)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("fit.txt")
- fit$coefficients
- sink()
- options(max.print=opt)
- plotQLDisp(fit)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("summary_fit.txt")
- summary(fit$df.prior)
- sink()
- options(max.print=opt)
- #Esportazione dei dati normalizzati (secondo TMM)
- y4 = y
- y4 <- calcNormFactors(y4, method = "TMM")
- tmm <- cpm(y4)
- write.table(tmm, file="./normalizedCounts_TMM.txt", sep="\t", quote=F)
- #Analisi PCA
- pch <- c(0,1,2,15,16,17)
- colors <- rep(c("green", "red", "blue"), 3)
- plotMDS(y, col=colors[group], pch=pch[group])
- legend("topleft", legend=levels(group), pch=pch, col=colors, ncol=3)
- #Analisi di espressione genica differenziale - Cellina.Turn VS Cellina.Ripe
- B.LvsP <- makeContrasts(Cellina.Turn-Cellina.Ripe, levels=design)
- res <- glmQLFTest(fit, contrast=B.LvsP)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("topTags_total_Cellina.Turn-Cellina.Ripe.txt")
- topTags(res, n=10000000)
- sink()
- options(max.print=opt)
- is.de <- decideTestsDGE(res, adjust.method="BH")
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("summary_Cellina.Turn-Cellina.Ripe_BH.txt")
- summary(is.de)
- sink()
- options(max.print=opt)
- plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
- #Analisi di espressione genica differenziale - Ruveia.Turn VS Ruveia.Ripe
- B.LvsP <- makeContrasts(Ruveia.Turn-Ruveia.Ripe, levels=design)
- res <- glmQLFTest(fit, contrast=B.LvsP)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("topTags_total_Ruveia.Turn-Ruveia.Ripe.txt")
- topTags(res, n=10000000)
- sink()
- options(max.print=opt)
- is.de <- decideTestsDGE(res, adjust.method="BH")
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("summary_Ruveia.Turn-Ruveia.Ripe_BH.txt")
- summary(is.de)
- sink()
- options(max.print=opt)
- plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
- #Analisi di espressione genica differenziale - Salella.Turn VS Salella.Ripe
- B.LvsP <- makeContrasts(Salella.Turn-Salella.Ripe, levels=design)
- res <- glmQLFTest(fit, contrast=B.LvsP)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("topTags_total_Salella.Turn-Salella.Ripe.txt")
- topTags(res, n=10000000)
- sink()
- options(max.print=opt)
- is.de <- decideTestsDGE(res, adjust.method="BH")
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("summary_Salella.Turn-Salella.Ripe_BH.txt")
- summary(is.de)
- sink()
- options(max.print=opt)
- plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
- #Analisi di espressione genica differenziale - Cellina.Turn-Salella.Turn
- B.LvsP <- makeContrasts(Cellina.Turn-Salella.Turn, levels=design)
- res <- glmQLFTest(fit, contrast=B.LvsP)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("topTags_total_Cellina.Turn-Salella.Turn.txt")
- topTags(res, n=10000000)
- sink()
- options(max.print=opt)
- is.de <- decideTestsDGE(res, adjust.method="BH")
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("summary_Cellina.Turn-Salella.Turn.txt")
- summary(is.de)
- sink()
- options(max.print=opt)
- plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
- #Analisi di espressione genica differenziale - Cellina.Ripe-Salella.Ripe
- B.LvsP <- makeContrasts(Cellina.Ripe-Salella.Ripe, levels=design)
- res <- glmQLFTest(fit, contrast=B.LvsP)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("topTags_total_Cellina.Ripe-Salella.Ripe.txt")
- topTags(res, n=10000000)
- sink()
- options(max.print=opt)
- is.de <- decideTestsDGE(res, adjust.method="BH")
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("summary_Cellina.Ripe-Salella.Ripe.txt")
- summary(is.de)
- sink()
- options(max.print=opt)
- plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
- #Analisi di espressione genica differenziale - Ruveia.Turn-Salella.Turn
- B.LvsP <- makeContrasts(Ruveia.Turn-Salella.Turn, levels=design)
- res <- glmQLFTest(fit, contrast=B.LvsP)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("topTags_total_Ruveia.Turn-Salella.Turn.txt")
- topTags(res, n=10000000)
- sink()
- options(max.print=opt)
- is.de <- decideTestsDGE(res, adjust.method="BH")
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("summary_Ruveia.Turn-Salella.Turn.txt")
- summary(is.de)
- sink()
- options(max.print=opt)
- plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
- #Analisi di espressione genica differenziale - Ruveia.Ripe-Salella.Ripe
- B.LvsP <- makeContrasts(Ruveia.Ripe-Salella.Ripe, levels=design)
- res <- glmQLFTest(fit, contrast=B.LvsP)
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("topTags_total_Ruveia.Ripe-Salella.Ripe.txt")
- topTags(res, n=10000000)
- sink()
- options(max.print=opt)
- is.de <- decideTestsDGE(res, adjust.method="BH")
- opt <- getOption("max.print")
- options(max.print=1000000000)
- sink("summary_Ruveia.Ripe-Salella.Ripe.txt")
- summary(is.de)
- sink()
- options(max.print=opt)
- plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement