Advertisement
Light1992

Pipeline corretta Acidi Grassi con Nuova Annotazione 2

Jul 23rd, 2021
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.63 KB | None | 0 0
  1. #Pipeline analisi RNA Targeted - Prima analisi
  2.  
  3. #Librerie da importare
  4. library(Rsubread)
  5. library(edgeR)
  6.  
  7. ##Indicizzazione del genoma di riferimento (Leccino)
  8. buildindex(basename = "index", reference="/home/genomica/DATA/OLGENOME/genoma_Leccino/assemblingV1/Leccino.contigs_min10kb_V4.fasta")
  9.  
  10. ##Individuazione file da utilizzare per l'allineamento a Leccino
  11. fastqPath_R1 <- list.files("/home/genomica/DATA/GENOLICS/RUN/GENOLICS_R_NuovaAnnotazione/R1", pattern="\\R1_001.fastq.gz$", full=TRUE)
  12. opt <- getOption("max.print")
  13. options(max.print=1000000000)
  14. sink("fastqPath_R1.txt")
  15. fastqPath_R1
  16. sink()
  17. options(max.print=opt)
  18.  
  19. fastqPath_R2 <- list.files("/home/genomica/DATA/GENOLICS/RUN/GENOLICS_R_NuovaAnnotazione/R2", pattern="\\R2_001.fastq.gz$", full=TRUE)
  20. opt <- getOption("max.print")
  21. options(max.print=1000000000)
  22. sink("fastqPath_R2.txt")
  23. fastqPath_R2
  24. sink()
  25. options(max.print=opt)
  26.  
  27. ##Creazione dei .BAM di allineamento
  28. align(index="index", maxMismatches=7, readfile1=fastqPath_R1, readfile2=fastqPath_R2, input_format="FASTQ", output_format="BAM")
  29.  
  30. #Creare un oggetto con l'indirizzo dell'unico file .bam presente e chiamarlo all.bam
  31. all.bam <- list.files("/home/genomica/DATA/GENOLICS/RUN/GENOLICS_R_NuovaAnnotazione/R1", pattern="\\.subread.BAM$", full=TRUE)
  32. options(max.print=1000000000)
  33. sink("all_bam.txt")
  34. all.bam
  35. sink()
  36. options(max.print=opt)
  37.  
  38. #Conta delle abbondanze geniche fornendo file di annotazioni esterni (solo di tipo 'gene')
  39. #Creare un file 'metadata' esterno e chiamarlo target
  40. fc <- featureCounts(all.bam, annot.ext="/home/genomica/DATA/GENOLICS/RUN/GENOLICS_R_NuovaAnnotazione/GFF3_aug_leccino_styleAUG_mp25_21_no_Sfclp_k4_con_NR_anno_6lug21", isGTFAnnotationFile=TRUE, nthreads=16, GTF.featureType="gene", GTF.attrType="ID", allowMultiOverlap=TRUE, fraction=TRUE, isPairedEnd=TRUE)
  41. targets <- read.delim("/home/genomica/DATA/GENOLICS/RUN/GENOLICS_R_NuovaAnnotazione/target", stringsAsFactors=FALSE)
  42. colnames(fc$counts) <- rownames(targets)
  43. head(fc$counts)
  44. opt <- getOption("max.print")
  45. options(max.print=1000000000)
  46. sink("featureCounts.txt")
  47. fc$counts
  48. sink()
  49. options(max.print=opt)
  50.  
  51. #Formattazione dei dati e organizzazione per gli script successivi
  52. group <- paste(targets$Cultivar, targets$Condizione, sep=".")
  53. group <- factor(group)
  54. opt <- getOption("max.print")
  55. options(max.print=1000000000)
  56. sink("table_group.txt")
  57. table(group)
  58. sink()
  59. options(max.print=opt)
  60. y <- DGEList(fc$counts, group=group)
  61. y$genes <- fc$annotation[, "Length", drop=FALSE]
  62. y$genes$Symbol <- fc$annotation$GeneID[as.numeric(rownames(y$genes))]
  63. design <- model.matrix(~0+group)
  64. opt <- getOption("max.print")
  65. options(max.print=1000000000)
  66. sink("design_matrix.txt")
  67. design
  68. sink()
  69. options(max.print=opt)
  70. colnames(design) <- levels(group)
  71.  
  72. #Filtraggio per livello di espressione
  73. keep <- filterByExpr(y, design)
  74. opt <- getOption("max.print")
  75. options(max.print=1000000000)
  76. sink("table_keep.txt")
  77. table(keep)
  78. sink()
  79. options(max.print=opt)
  80. y <- y[keep, , keep.lib.sizes=FALSE]
  81. opt <- getOption("max.print")
  82. options(max.print=1000000000)
  83. sink("featureCounts_filtered.txt")
  84. y$counts
  85. sink()
  86. options(max.print=opt)
  87.  
  88. #Calcolo dei fattori di normalizzazione
  89. y <- calcNormFactors(y)
  90. y$samples
  91. opt <- getOption("max.print")
  92. options(max.print=1000000000)
  93. sink("calcNormFactors.txt")
  94. y$samples
  95. sink()
  96. options(max.print=opt)
  97.  
  98. #Analisi di dispersione
  99. y <- estimateDisp(y, design, robust=TRUE)
  100. plotBCV(y)
  101. fit <- glmQLFit(y, design, robust=TRUE)
  102. head(fit$coefficients)
  103. opt <- getOption("max.print")
  104. options(max.print=1000000000)
  105. sink("fit.txt")
  106. fit$coefficients
  107. sink()
  108. options(max.print=opt)
  109. plotQLDisp(fit)
  110. opt <- getOption("max.print")
  111. options(max.print=1000000000)
  112. sink("summary_fit.txt")
  113. summary(fit$df.prior)
  114. sink()
  115. options(max.print=opt)
  116.  
  117. #Esportazione dei dati normalizzati (secondo TMM)
  118. y4 = y
  119. y4 <- calcNormFactors(y4, method = "TMM")
  120. tmm <- cpm(y4)
  121. write.table(tmm, file="./normalizedCounts_TMM.txt", sep="\t", quote=F)
  122.  
  123. #Analisi PCA
  124. pch <- c(0,1,2,15,16,17)
  125. colors <- rep(c("green", "red", "blue"), 3)
  126. plotMDS(y, col=colors[group], pch=pch[group])
  127. legend("topleft", legend=levels(group), pch=pch, col=colors, ncol=3)
  128.  
  129. #Analisi di espressione genica differenziale - Cellina.Turn VS Cellina.Ripe
  130. B.LvsP <- makeContrasts(Cellina.Turn-Cellina.Ripe, levels=design)
  131. res <- glmQLFTest(fit, contrast=B.LvsP)
  132. opt <- getOption("max.print")
  133. options(max.print=1000000000)
  134. sink("topTags_total_Cellina.Turn-Cellina.Ripe.txt")
  135. topTags(res, n=10000000)
  136. sink()
  137. options(max.print=opt)
  138. is.de <- decideTestsDGE(res, adjust.method="BH")
  139. opt <- getOption("max.print")
  140. options(max.print=1000000000)
  141. sink("summary_Cellina.Turn-Cellina.Ripe_BH.txt")
  142. summary(is.de)
  143. sink()
  144. options(max.print=opt)
  145. plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
  146.  
  147.  
  148. #Analisi di espressione genica differenziale - Ruveia.Turn VS Ruveia.Ripe
  149. B.LvsP <- makeContrasts(Ruveia.Turn-Ruveia.Ripe, levels=design)
  150. res <- glmQLFTest(fit, contrast=B.LvsP)
  151. opt <- getOption("max.print")
  152. options(max.print=1000000000)
  153. sink("topTags_total_Ruveia.Turn-Ruveia.Ripe.txt")
  154. topTags(res, n=10000000)
  155. sink()
  156. options(max.print=opt)
  157. is.de <- decideTestsDGE(res, adjust.method="BH")
  158. opt <- getOption("max.print")
  159. options(max.print=1000000000)
  160. sink("summary_Ruveia.Turn-Ruveia.Ripe_BH.txt")
  161. summary(is.de)
  162. sink()
  163. options(max.print=opt)
  164. plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
  165.  
  166. #Analisi di espressione genica differenziale - Salella.Turn VS Salella.Ripe
  167. B.LvsP <- makeContrasts(Salella.Turn-Salella.Ripe, levels=design)
  168. res <- glmQLFTest(fit, contrast=B.LvsP)
  169. opt <- getOption("max.print")
  170. options(max.print=1000000000)
  171. sink("topTags_total_Salella.Turn-Salella.Ripe.txt")
  172. topTags(res, n=10000000)
  173. sink()
  174. options(max.print=opt)
  175. is.de <- decideTestsDGE(res, adjust.method="BH")
  176. opt <- getOption("max.print")
  177. options(max.print=1000000000)
  178. sink("summary_Salella.Turn-Salella.Ripe_BH.txt")
  179. summary(is.de)
  180. sink()
  181. options(max.print=opt)
  182. plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
  183.  
  184. #Analisi di espressione genica differenziale - Cellina.Turn-Salella.Turn
  185. B.LvsP <- makeContrasts(Cellina.Turn-Salella.Turn, levels=design)
  186. res <- glmQLFTest(fit, contrast=B.LvsP)
  187. opt <- getOption("max.print")
  188. options(max.print=1000000000)
  189. sink("topTags_total_Cellina.Turn-Salella.Turn.txt")
  190. topTags(res, n=10000000)
  191. sink()
  192. options(max.print=opt)
  193. is.de <- decideTestsDGE(res, adjust.method="BH")
  194. opt <- getOption("max.print")
  195. options(max.print=1000000000)
  196. sink("summary_Cellina.Turn-Salella.Turn.txt")
  197. summary(is.de)
  198. sink()
  199. options(max.print=opt)
  200. plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
  201.  
  202. #Analisi di espressione genica differenziale - Cellina.Ripe-Salella.Ripe
  203. B.LvsP <- makeContrasts(Cellina.Ripe-Salella.Ripe, levels=design)
  204. res <- glmQLFTest(fit, contrast=B.LvsP)
  205. opt <- getOption("max.print")
  206. options(max.print=1000000000)
  207. sink("topTags_total_Cellina.Ripe-Salella.Ripe.txt")
  208. topTags(res, n=10000000)
  209. sink()
  210. options(max.print=opt)
  211. is.de <- decideTestsDGE(res, adjust.method="BH")
  212. opt <- getOption("max.print")
  213. options(max.print=1000000000)
  214. sink("summary_Cellina.Ripe-Salella.Ripe.txt")
  215. summary(is.de)
  216. sink()
  217. options(max.print=opt)
  218. plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
  219.  
  220.  
  221. #Analisi di espressione genica differenziale - Ruveia.Turn-Salella.Turn
  222. B.LvsP <- makeContrasts(Ruveia.Turn-Salella.Turn, levels=design)
  223. res <- glmQLFTest(fit, contrast=B.LvsP)
  224. opt <- getOption("max.print")
  225. options(max.print=1000000000)
  226. sink("topTags_total_Ruveia.Turn-Salella.Turn.txt")
  227. topTags(res, n=10000000)
  228. sink()
  229. options(max.print=opt)
  230. is.de <- decideTestsDGE(res, adjust.method="BH")
  231. opt <- getOption("max.print")
  232. options(max.print=1000000000)
  233. sink("summary_Ruveia.Turn-Salella.Turn.txt")
  234. summary(is.de)
  235. sink()
  236. options(max.print=opt)
  237. plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
  238.  
  239.  
  240. #Analisi di espressione genica differenziale - Ruveia.Ripe-Salella.Ripe
  241. B.LvsP <- makeContrasts(Ruveia.Ripe-Salella.Ripe, levels=design)
  242. res <- glmQLFTest(fit, contrast=B.LvsP)
  243. opt <- getOption("max.print")
  244. options(max.print=1000000000)
  245. sink("topTags_total_Ruveia.Ripe-Salella.Ripe.txt")
  246. topTags(res, n=10000000)
  247. sink()
  248. options(max.print=opt)
  249. is.de <- decideTestsDGE(res, adjust.method="BH")
  250. opt <- getOption("max.print")
  251. options(max.print=1000000000)
  252. sink("summary_Ruveia.Ripe-Salella.Ripe.txt")
  253. summary(is.de)
  254. sink()
  255. options(max.print=opt)
  256. plotMD(res, status=is.de, values=c(1,-1), col=c("cyan","green"), legend="topright")
  257.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement