Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library("seqinr")
- library("Biostrings")
- library("MASS")
- library("GenomicRanges")
- library("dndscv")
- ## Helper methods copied from dndscv
- nt = c("A","C","G","T")
- compnt = setNames(rev(nt), nt)
- chr2cds = function(pos,cds_int,strand) {
- if (strand==1) {
- return(which(pos==unlist(apply(cds_int, 1, function(x) x[1]:x[2]))))
- } else if (strand==-1) {
- return(which(pos==rev(unlist(apply(cds_int, 1, function(x) x[1]:x[2])))))
- }
- }
- rest_nt <- function(n) {
- rest_nts <- nt[-which(nt == n)]
- sample(rest_nts, 1)
- }
- data("refcds_hg19", package="dndscv")
- TP53 <- RefCDS[which(sapply(RefCDS, function(x) x$gene_name) == "TP53")]
- cds_int <- TP53[[1]]$intervals_cds
- cds_pos <- unlist(apply(cds_int, 1, function(x) x[1]:x[2]))
- splice_pos <- TP53[[1]]$intervals_splice
- # Use mutations from example dataset but remove all mapping to TP53
- data("dataset_simbreast", package="dndscv")
- my_mutations <- subset(mutations, !(chr == "17" & pos %in% cds_pos) & nchar(ref) == 1 & !(pos %in% splice_pos))
- # Sample 50 random positions in the TP53 CDS
- my_mut_pos <- sample(cds_pos, 50)
- # Make a data frame with random mutations at sampled positions
- my_mut_cds_pos <- sapply(my_mut_pos, function(x) (chr2cds(x, TP53[[1]]$intervals_cds, TP53[[1]]$strand)))
- my_mut_ref <- sapply(my_mut_cds_pos,
- function(x) as.character(TP53[[1]]$seq_cds[x]))
- my_muts <- data.frame(sampleID=paste0("S_", 1:length(my_mut_pos)),
- chr=TP53[[1]]$chr,pos=my_mut_pos,
- ref=compnt[my_mut_ref],
- mut=sapply(my_mut_ref, rest_nt),
- stringsAsFactors=F)
- ## Apply dndscv on the combined mutations
- dndsout = dndscv(rbind(my_mutations[, 1:5], my_muts))
- sel_cv = dndsout$sel_cv
- signif_genes = sel_cv[sel_cv$qglobal_cv<0.1, c("gene_name","qglobal_cv")]
- rownames(signif_genes) = NULL
- print(signif_genes)
Add Comment
Please, Sign In to add comment