Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library("dplyr")
- library("tidyr")
- read_msigdb <- function(file) {
- msigdb_cols = count.fields(file, sep = "\t")
- msigdb_cols_max = max(msigdb_cols[!is.na(msigdb_cols)])
- msigdb_data = read.table(file, header = FALSE, sep = "\t", quote = "",
- fill = TRUE, col.names = paste0("V", seq_len(msigdb_cols_max)),
- stringsAsFactors = FALSE)
- msigdb_data = tbl_df(msigdb_data) %>%
- select(-V2) %>%
- gather(columns, gene, -V1) %>%
- select(-columns) %>%
- mutate(gene = ifelse(gene == "", NA, gene)) %>%
- filter(!is.na(gene)) %>%
- rename(pathway = V1) %>%
- rename(gene_entrez = gene) %>%
- mutate(gene_entrez = as.character(gene_entrez)) %>%
- arrange(pathway)
- return(msigdb_data)
- }
- msigdb_data <- read_msigdb(msigdb_file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement