Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # /data4/bio/runs-danio/PLANT/Compare/Brass/BN
- # В консоли делаем
- # grep '>' *.fasta | sed 's/:/\t/' | sed 's/>//' > ../ids_BN.fasta
- ids <- read.delim("../ids_BN.fasta", head = F)
- colnames(ids) <- c("file", "id")
- ids <- unique(ids)
- ids <- ids[ids$file != "BN_all.fasta", ]
- ids$file <- gsub(".fasta", "", ids$file)
- id.counts <- as.data.frame(table(ids$id))
- colnames(id.counts) <- c("id", "count")
- agg <- aggregate(ids$file, by = list(ids$id), paste, collapse = ",")
- colnames(agg) <- c("id", "files")
- id.counts <- merge(id.counts, agg)
- id.counts <- id.counts[order(id.counts$count, decreasing = T), ]
- nrow(id.counts[id.counts$count > 1, ])
- write.table(id.counts, "../ids_BN_counts.txt", row.names = F,
- sep = "\t", quote = F)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement