Advertisement
Guest User

Untitled

a guest
Mar 29th, 2017
50
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.73 KB | None | 0 0
  1. library(tidyr)
  2. library(dplyr)
  3. library(magrittr)
  4. library(ggplot2)
  5.  
  6. # load data in "wide" format (genes in columns)
  7. spellman <- read.csv("spellman-reformated.csv")
  8.  
  9. # restructure in "long" format
  10. spellman.long <- gather(spellman, gene, expression, -expt, -time)
  11.  
  12. # group by gene and calculate the number of missing (NA) values per gene
  13. spellman.na <-
  14. spellman.long %>%
  15. group_by(gene) %>%
  16. summarize(na.count = sum(is.na(expression)))
  17.  
  18. # get genes where no more than 5 values are missing
  19. good.genes <-
  20. spellman.na %>%
  21. filter(na.count < 5) %$% gene
  22.  
  23. # select corresponding columns
  24. spellman.filtered <-
  25. spellman %>%
  26. select(one_of(good.genes))
  27.  
  28. dim(spellman) # dimensions of original data
  29. dim(spellman.filtered) # dimensions of filtered data
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement