Advertisement
rstuart

k-nucleotide

Oct 6th, 2015
175
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.96 KB | None | 0 0
  1. #!/usr/bin/python3
  2. import collections
  3. import itertools
  4. import sys
  5.  
  6. def count(counter, dna_seq, length):
  7.     return counter.update(
  8.         dna_seq[i:i + length] for i in range(len(dna_seq) - length + 1))
  9.  
  10. def percentage(counter, keys):
  11.     total = sum(counter[key] for key in keys)
  12.     for occurences, key in sorted((-counter[key], key) for key in keys):
  13.         print("%s %.3f" % (key, -occurences * 100.0 / total))
  14.     print()
  15.  
  16. it = itertools.dropwhile(lambda l: not l.startswith(">THREE"), sys.stdin)
  17. next(it)
  18. it = itertools.takewhile(lambda l: not l.startswith(">"), it)
  19. dna_seq = ''.join(l.strip().upper() for l in it)
  20. counter = collections.Counter()
  21. for length in (1, 2, 3, 4, 6, 12, 18):
  22.     count(counter, dna_seq, length)
  23. bases = "ATCG"
  24. percentage(counter, bases)
  25. percentage(counter, tuple(i + j for i in bases for j in bases))
  26. for seq in ("GGT", "GGTA", "GGTATT", "GGTATTTTAATT", "GGTATTTTAATTTATAGT"):
  27.     print("%-7d %s" % (counter[seq], seq))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement