Advertisement
Guest User

Untitled

a guest
Dec 22nd, 2014
167
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.79 KB | None | 0 0
  1. ## libraries
  2. library(genomeIntervals)
  3. library(Biostrings)
  4.  
  5. ## read the reference file
  6. ref <- readDNAStringSet("Ptrichocarpa_156.fa")
  7.  
  8. ## read the gff3
  9. annot <- readGff3("Ptrichocarpa_156_gene.gff3")
  10.  
  11. ## keep only the genes
  12. annot <- annot[annot$type=="gene",]
  13.  
  14. ## process by scaffolds
  15. fasta <- Reduce(append,lapply(levels(seq_name(annot)),function(scf,annot,ref){
  16.  
  17. ## sel the annotations
  18. sel <- seq_name(annot)==scf
  19.  
  20. ## get the starts and the ends
  21. seqs <- as(Views(ref[[scf]],annot[sel,1],annot[sel,2]),"DNAStringSet")
  22.  
  23. ## get the gene ID
  24. names(seqs) <- getGffAttribute(annot[sel],"ID")
  25.  
  26. ## check the strand
  27. n.sel <- strand(annot[sel]) == "-"
  28. append(seqs[!n.sel],reverseComplement(seqs[n.sel]))
  29.  
  30. },annot,ref))
  31.  
  32. ## write it as fasta
  33. writeXStringSet(fasta,file="Ptrichocarpa_156_gene_sequence.fa")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement