Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # make sure there are no empty lines in annotations file.
- # save this script on your disk and run it like this:
- #
- # python this_script.py FASTA.fa ASSIGNMENTS.txt TAXON
- #
- import sys
- import Oligotyping.lib.fastalib as u
- fasta = u.SequenceSource(sys.argv[1])
- annotations = sys.argv[2]
- taxon_of_interest = sys.argv[3]
- output = u.FastaOutput(sys.argv[1] + "-" + taxon_of_interest)
- annotations_dict = dict([(line.split()[0].split(';')[0], line.split()[1]) for line in open(annotations).readlines()\
- if taxon_of_interest in line.split()[1]])
- while fasta.next():
- sample = fasta.id.split(';')[0]
- freq = int(fasta.id.split('size=')[1][:-1])
- if annotations_dict.has_key(sample):
- for i in range(0, freq):
- output.write_id('%s-%d' % (sample, i))
- output.write_seq(fasta.seq, split = False)
- output.close()
Advertisement
Add Comment
Please, Sign In to add comment