Advertisement
Guest User

Untitled

a guest
Feb 1st, 2017
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.72 KB | None | 0 0
  1. from collections import defaultdict
  2.  
  3.  
  4. def count_kmers(sequence, k):
  5.     kmers_count = defaultdict(int)
  6.     top_rng = len(sequence) - k + 1
  7.  
  8.     for i in range(0, top_rng):
  9.         kmer = sequence[i:i + k]
  10.         assert len(kmer) == 14, 'FAIL'
  11.         kmers_count[kmer] += 1
  12.     return kmers_count
  13.  
  14.  
  15. def get_most_frequent_kmers(kmers):
  16.     max_freq = max(kmers.itervalues())
  17.     most_freq = (kmer for kmer, freq in kmers.iteritems() if freq == max_freq)
  18.     return ' '.join(most_freq)
  19.  
  20.  
  21. if __name__ == '__main__':
  22.     with open('dataset_2_10.txt', 'r') as data:
  23.         sequence = data.readline()
  24.         k = int(data.readline())
  25.     kmers = count_kmers(sequence, k)
  26.     print get_most_frequent_kmers(kmers)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement