Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from collections import defaultdict
- def count_kmers(sequence, k):
- kmers_count = defaultdict(int)
- top_rng = len(sequence) - k + 1
- for i in range(0, top_rng):
- kmer = sequence[i:i + k]
- assert len(kmer) == 14, 'FAIL'
- kmers_count[kmer] += 1
- return kmers_count
- def get_most_frequent_kmers(kmers):
- max_freq = max(kmers.itervalues())
- most_freq = (kmer for kmer, freq in kmers.iteritems() if freq == max_freq)
- return ' '.join(most_freq)
- if __name__ == '__main__':
- with open('dataset_2_10.txt', 'r') as data:
- sequence = data.readline()
- k = int(data.readline())
- kmers = count_kmers(sequence, k)
- print get_most_frequent_kmers(kmers)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement