Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- import collections
- import itertools
- import sys
- def count(counter, dna_seq, length):
- return counter.update(
- dna_seq[i:i + length] for i in range(len(dna_seq) - length + 1))
- def percentage(counter, keys):
- total = sum(counter[key] for key in keys)
- for occurences, key in sorted((-counter[key], key) for key in keys):
- print("%s %.3f" % (key, -occurences * 100.0 / total))
- print()
- it = itertools.dropwhile(lambda l: not l.startswith(">THREE"), sys.stdin)
- next(it)
- it = itertools.takewhile(lambda l: not l.startswith(">"), it)
- dna_seq = ''.join(l.strip().upper() for l in it)
- counter = collections.Counter()
- for length in (1, 2, 3, 4, 6, 12, 18):
- count(counter, dna_seq, length)
- bases = "ATCG"
- percentage(counter, bases)
- percentage(counter, tuple(i + j for i in bases for j in bases))
- for seq in ("GGT", "GGTA", "GGTATT", "GGTATTTTAATT", "GGTATTTTAATTTATAGT"):
- print("%-7d %s" % (counter[seq], seq))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement