Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## libraries
- library(genomeIntervals)
- library(Biostrings)
- ## read the reference file
- ref <- readDNAStringSet("Ptrichocarpa_156.fa")
- ## read the gff3
- annot <- readGff3("Ptrichocarpa_156_gene.gff3")
- ## keep only the genes
- annot <- annot[annot$type=="gene",]
- ## process by scaffolds
- fasta <- Reduce(append,lapply(levels(seq_name(annot)),function(scf,annot,ref){
- ## sel the annotations
- sel <- seq_name(annot)==scf
- ## get the starts and the ends
- seqs <- as(Views(ref[[scf]],annot[sel,1],annot[sel,2]),"DNAStringSet")
- ## get the gene ID
- names(seqs) <- getGffAttribute(annot[sel],"ID")
- ## check the strand
- n.sel <- strand(annot[sel]) == "-"
- append(seqs[!n.sel],reverseComplement(seqs[n.sel]))
- },annot,ref))
- ## write it as fasta
- writeXStringSet(fasta,file="Ptrichocarpa_156_gene_sequence.fa")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement