Advertisement
Guest User

Untitled

a guest
Mar 25th, 2019
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.99 KB | None | 0 0
  1. # load data into R
  2. library(tidyverse)
  3. library(edgeR)
  4. files <- dir(path = "data", pattern = "*.tab", full.names = T)
  5.  
  6. counttablefull <- files %>%
  7. map(read_tsv, skip = 4, col_names = FALSE ) %>%
  8. reduce(cbind)
  9.  
  10. counttablefull %>% head()
  11.  
  12. datasets <- files %>%
  13. stringr::str_replace("data/", "") %>%
  14. stringr::str_replace("ReadsPerGene.out.tab", "")
  15.  
  16. colnames <- c()
  17. for (i in datasets) {
  18. colnames <- c(colnames,
  19. paste0("gene_i", i),
  20. paste0(i, "unstranded"),
  21. paste0(i, "_forwardstrand"),
  22. paste0(i, "_reversestrand")
  23. )
  24. }
  25.  
  26. names(counttablefull) <- colnames
  27.  
  28. # make a column for gene id, and remove duplicates
  29. counttablefull <-
  30. counttablefull %>%
  31. mutate(ensembl_gene = gene_iinput29b_2) %>%
  32. select(-starts_with("gene"))
  33.  
  34.  
  35. counttablefull %>%
  36. select(-ensembl_gene) %>%
  37. summarise_each(funs(sum)) %>%
  38. gather(library, counts) %>%
  39. separate(library, into = c("treatment", "replicate", "stranding"),
  40. sep="_") %>%
  41. spread(stranding, counts) %>%
  42. mutate(propF = forwardstrand/unstranded,
  43. propR = reversestrand/unstranded)
  44.  
  45. # visualise library depth
  46. counttablefull %>%
  47. select(ends_with("reversestrand")) %>%
  48. summarise_each(funs(sum)) %>%
  49. gather(library, counts) %>%
  50. separate(library, into = c("treatment", "replicate", "stranding"),
  51. sep="_")%>%
  52. ggplot(aes(y=counts, x=treatment, fill=replicate)) + geom_bar(
  53. stat="identity", position="dodge") + theme_minimal()
  54.  
  55.  
  56. counttablefull <- counttablefull %>% select(ensembl_gene, ends_with("reversestrand"))
  57.  
  58. names(counttablefull) <- str_replace(names(counttablefull), "_reversestrand", "")
  59.  
  60. # EDA of count table ----
  61.  
  62. counttablefull %>%
  63. select(-ensembl_gene) %>%
  64. gather(library, value) %>%
  65. filter(value > 0) %>%
  66. ggplot(aes(x=value)) + geom_density() + facet_wrap(~library) +
  67. scale_x_log10(labels = scales::comma)
  68.  
  69. # Filter out lowly expressed genes
  70.  
  71. counttablematrix <- counttablefull %>%
  72. select(-ensembl_gene) %>%
  73. as.matrix()
  74. row.names(counttablematrix) <- counttablefull$ensembl_gene
  75.  
  76. counttable_cpm <- cpm(counttablematrix)
  77.  
  78. counttablefull %>%
  79. select(-ensembl_gene) %>%
  80. purrr::modify(~sum(.)) %>%
  81. distinct() %>%
  82. gather(library, counts) %>%
  83. mutate(millionreads = counts/(10^6)) %>%
  84. mutate(cpmThreshold = 15/millionreads) %>%
  85. #filter(str_detect(library, "input")) %>%
  86. mutate(mycutoff = 0.6 * millionreads)
  87.  
  88. # Filter ---
  89. subsetting_matrix <- counttable_cpm > 0.6
  90. subsetting_vector <- rowSums(subsetting_matrix) >= 3
  91. counttablematrix_filt <- counttablematrix[subsetting_vector,]
  92.  
  93. mydgelist <- DGEList(counttablematrix_filt)
  94.  
  95. ## MDS plot ---
  96. plotMDS(mydgelist)
  97.  
  98. ## DGEA
  99. ## normalise
  100. mydgelist <- calcNormFactors(mydgelist)
  101. mydgelist$samples
  102. groups <- as.factor(
  103. str_split(rownames(mydgelist$samples), "_", simplify=T)[,1])
  104.  
  105. design <- model.matrix(~0 + groups)
  106. colnames(design) <- levels(groups)
  107.  
  108. # limma
  109. mydgelist_voomed <- voom(mydgelist, design, plot = T)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement