Advertisement
Guest User

Untitled

a guest
Feb 17th, 2018
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.14 KB | None | 0 0
  1. from Bio.SeqRecord import SeqRecord
  2.  
  3.  
  4. def convert(dna):
  5. if isinstance(dna, SeqRecord):
  6. return str(dna.seq)
  7. return dna
  8.  
  9.  
  10. def immediateNeighbors(dna):
  11. hood = set()
  12. add = hood.add
  13. mer = ['A', 'C', 'G', 'T']
  14. for i in range(0, len(dna)):
  15. for j in range(0, len(mer)):
  16. if dna[i] != mer[j]:
  17. add(dna[:i] + mer[j] + dna[i + 1:])
  18. return hood
  19.  
  20.  
  21. def neighbors(dna, dist):
  22. dna = convert(dna)
  23. hood = [dna]
  24. extend = hood.extend
  25. for _ in range(0, dist):
  26. for j in range(0, len(hood)):
  27. extend(immediateNeighbors(hood[j]))
  28. return set(hood)
  29.  
  30.  
  31. def mostFrequentKmers(dna, merSize, dist):
  32. dna = convert(dna)
  33. res = set()
  34. dict = {}
  35. get = dict.get
  36. freqMax = 0
  37.  
  38. for i in range(0, len(dna) - merSize + 1):
  39. nbs = neighbors(dna[i:i + merSize], dist)
  40. for neighbor in nbs:
  41. f = get(neighbor, 0) + 1
  42. dict[neighbor] = f
  43. freqMax = max(f, freqMax)
  44.  
  45. add = res.add
  46. for mer, freq in dict.items():
  47. if freq == freqMax:
  48. add(mer)
  49. return res
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement