Advertisement
Guest User

edgeR_DEseq2

a guest
Mar 19th, 2019
203
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 4.88 KB | None | 0 0
  1. # create FAKA fastq files
  2. touch a.fastq b.fastq c.fastq d.fastq e.fastq f.fastq
  3.  
  4. ## edgeR COMMANDS
  5. #Open the R terminal (from the folder with .count and samples1 files) and do the following commands.
  6. #R
  7.  
  8. #Load the edgeR package, already installed:
  9. library(edgeR)
  10.  
  11. #Create a variable with samples file information:
  12. samples = read.table("samples_edgeR", sep="\t", header=TRUE)
  13.  
  14. #Read count files created by htseq-count with readDGE function
  15. counts = readDGE(samples$countf)$counts
  16.  
  17. #Filter weakly expressed and noninformative (e.g., non-aligned) features using a command like
  18. noint = rownames(counts) %in% c("__no_feature","__ambiguous","__too_low_aQual","__not_aligned","__alignment_not_unique")
  19. cpms = cpm(counts)
  20. keep = rowSums(cpms >1) >=3 & !noint
  21. counts = counts[keep,]
  22. colnames(counts) = samples$shortname
  23. head(counts[,order(samples$condition)], 5)
  24.  
  25.  
  26. # Create a DGEList object (edgeR’s container for RNA-seq count data):
  27. d = DGEList(counts=counts, group = samples$condition)
  28. d$samples
  29.  
  30. # Estimate normalization factors by effective library size (TMM -trimmed mean of M-values)
  31. #Normalizes for RNA composition by finding a set of scalingfactors for the library sizes that minimize the log-fold changes between the samples for most genes
  32. d = calcNormFactors(d)
  33. d$samples
  34.  
  35.  
  36.  
  37. # Inspect the relationships between samples using a multidimensional scaling (MDS) plot
  38. plotMDS(d, labels=samples$shortname, col=c("darkgreen","blue")[factor(samples$condition)])
  39.  
  40. # Export the figure in PDF format:
  41. dev.copy2pdf(file="plotMDS.pdf")
  42.  
  43. # Estimate dispersions with quantile-adjusted conditional maximum likelihood (qCML) method (experiments with single factor)
  44. d = estimateCommonDisp(d)
  45. d = estimateTagwiseDisp(d)
  46.  
  47. #Plot the mean-variance relationship:
  48. plotMeanVar(d, show.tagwise.vars=TRUE, NBline=TRUE)
  49. dev.copy2pdf(file="plotMeanVar.pdf")
  50.  
  51. #plot the relationship of biological coefficient of variation (sqrt(common.dispersion)) versus mean log CPM
  52. d$common.dispersion
  53. plotBCV(d)
  54. dev.copy2pdf(file="plotBCV.pdf")
  55.  
  56. d$common.dispersion
  57. sqrt(d$common.dispersion)
  58.  
  59.  
  60.  
  61. # Test for differential expression (classic' edgeR for single factor experiments) - exact test:
  62. de = exactTest(d, pair=c("cntr","trat"))
  63.  
  64. # Use the topTags function to present a tabular summary of the differential expression statistics (exact Test):
  65. tt = topTags(de, n=nrow(d))
  66. # Visualize it:
  67. head(tt$table)
  68.  
  69. #Inspect the depth-adjusted reads per million for some of the top differentially expressed genes:
  70. nc = cpm(d, normalized.lib.sizes=TRUE)
  71. rn = rownames(tt$table)
  72. head(nc[rn,order(samples$condition)],5)
  73.  
  74. #Create a graphical summary, M (log-fold change) versus A (log-average expression) plot, showing the genes selected as differentially expressed (with # a 5% false discovery rate)
  75. deg = rn[tt$table$FDR < .05]
  76.  
  77. #Plot log-fold change against log-counts per million, with DE genes highlighted
  78. plotMD(de)
  79. abline(h=c(-1, 1), col="blue")
  80. dev.copy2pdf(file="plotMD.pdf")
  81.  
  82. #Total number of genes significantly up-regulated or down-regulated at 5% FDR
  83. summary(decideTests(de))
  84.  
  85.  
  86. # Finally, save the result table as a CSV file
  87. write.csv(tt$table, file="tableDEG_edgeR.csv")
  88.  
  89.  
  90. #Exit R terminal and check the final output:
  91. q()
  92.  
  93.  
  94.  
  95. #### // \\ ####
  96.  
  97. ## DESeq2 COMMANDS
  98.  
  99. #Open R
  100. #R
  101.  
  102. #Load the DESeq2 package, already installed:
  103. library("DESeq2")
  104.  
  105. #Go to the correct folder
  106. getwd()
  107.  
  108.  
  109. directory <- "/home/rabico/Documentos/transcriptoma"
  110.  
  111. #Create a variable with sampleTable file information:
  112. sampleTable <- read.table('samples_DESeq2',sep='\t', header=T)
  113.  
  114. #Treatments - levels
  115. treatments = c("cntr","trat")
  116.  
  117. #Use the function DESeqDataSetFromHTSeqCount if you have used htseq-count from the HTSeq
  118. ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,directory = directory,design = ~ condition)
  119.  
  120.  
  121. #Create factor according to condition
  122. colData(ddsHTSeq)$condition <- factor(colData(ddsHTSeq)$condition,levels = treatments)
  123.  
  124. #Count number of lines
  125. nrow(ddsHTSeq)
  126.  
  127. #Pre-filtering low count genes
  128. keep <- rowSums(counts(ddsHTSeq)) >= 10
  129. dds <- ddsHTSeq[keep,]
  130. nrow(dds)
  131.  
  132.  
  133. #Differential expression analysis
  134. dds <- DESeq(dds)
  135.  
  136. #inspect sample relationships, invoke a variance-stabilizaing transformation
  137. library("vsn")
  138. vsd <- vst(dds, blind = FALSE)
  139. plotPCA(vsd)
  140. dev.copy2pdf(file="plotPCAvsd")
  141.  
  142.  
  143. #Inspect the estimated dispersions
  144. plotDispEsts(dds)
  145. dev.copy2pdf(file="plotDispEsts")
  146.  
  147.  
  148. #Log fold change shrinkage for visualization and ranking
  149. resultsNames(dds)
  150. resLFC <- lfcShrink(dds, coef="condition_trat_vs_cntr", type="apeglm")
  151. resLFC
  152. plotMA(resLFC, ylim=c(-8,8), main= "trat vs cntr", alpha=0.05)
  153. abline(h=c(-1,1),col='green')
  154. dev.copy2pdf(file='plotMA.pdf')
  155.  
  156.  
  157.  
  158. #Ordering by p-value adjusted
  159. resOrdered <- resLFC[order(resLFC$padj),]
  160.  
  161. write.csv(as.data.frame(resOrdered), file="tableDEG_DESeq2.csv")
  162.  
  163. resSig <- subset(resOrdered, padj < 0.05)
  164. resSig
  165.  
  166.  
  167. q()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement