Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(tidyr)
- library(dplyr)
- library(magrittr)
- library(ggplot2)
- # load data in "wide" format (genes in columns)
- spellman <- read.csv("spellman-reformated.csv")
- # restructure in "long" format
- spellman.long <- gather(spellman, gene, expression, -expt, -time)
- # group by gene and calculate the number of missing (NA) values per gene
- spellman.na <-
- spellman.long %>%
- group_by(gene) %>%
- summarize(na.count = sum(is.na(expression)))
- # get genes where no more than 5 values are missing
- good.genes <-
- spellman.na %>%
- filter(na.count < 5) %$% gene
- # select corresponding columns
- spellman.filtered <-
- spellman %>%
- select(one_of(good.genes))
- dim(spellman) # dimensions of original data
- dim(spellman.filtered) # dimensions of filtered data
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement