Guest User

Untitled

a guest
Feb 22nd, 2014
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.79 KB | None | 0 0
  1. # make sure there are no empty lines in annotations file.
  2. # save this script on your disk and run it like this:
  3. #
  4. #    python this_script.py FASTA.fa ASSIGNMENTS.txt TAXON
  5. #
  6.  
  7. import sys
  8. import Oligotyping.lib.fastalib as u
  9.  
  10. fasta = u.SequenceSource(sys.argv[1])
  11. annotations = sys.argv[2]
  12. taxon_of_interest = sys.argv[3]
  13.  
  14. output = u.FastaOutput(sys.argv[1] + "-" + taxon_of_interest)
  15.  
  16. annotations_dict = dict([(line.split()[0].split(';')[0], line.split()[1]) for line in open(annotations).readlines()\
  17.                                 if taxon_of_interest in line.split()[1]])
  18.  
  19.  
  20. while fasta.next():
  21.     sample = fasta.id.split(';')[0]
  22.     freq = int(fasta.id.split('size=')[1][:-1])
  23.     if annotations_dict.has_key(sample):
  24.         output.store(fasta, split = False)
  25.  
  26. output.close()
Advertisement
Add Comment
Please, Sign In to add comment