Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from Bio.SeqRecord import SeqRecord
- def convert(dna):
- if isinstance(dna, SeqRecord):
- return str(dna.seq)
- return dna
- def immediateNeighbors(dna):
- hood = set()
- add = hood.add
- mer = ['A', 'C', 'G', 'T']
- for i in range(0, len(dna)):
- for j in range(0, len(mer)):
- if dna[i] != mer[j]:
- add(dna[:i] + mer[j] + dna[i + 1:])
- return hood
- def neighbors(dna, dist):
- dna = convert(dna)
- hood = [dna]
- extend = hood.extend
- for _ in range(0, dist):
- for j in range(0, len(hood)):
- extend(immediateNeighbors(hood[j]))
- return set(hood)
- def mostFrequentKmers(dna, merSize, dist):
- dna = convert(dna)
- res = set()
- dict = {}
- get = dict.get
- freqMax = 0
- for i in range(0, len(dna) - merSize + 1):
- nbs = neighbors(dna[i:i + merSize], dist)
- for neighbor in nbs:
- f = get(neighbor, 0) + 1
- dict[neighbor] = f
- freqMax = max(f, freqMax)
- add = res.add
- for mer, freq in dict.items():
- if freq == freqMax:
- add(mer)
- return res
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement