Advertisement
Guest User

Untitled

a guest
Feb 13th, 2018
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.91 KB | None | 0 0
  1. #Enter your program here
  2. import itertools
  3.  
  4. def ScanAndScoreMotif(DNA, motif, minTotalHammingDist):
  5. totalDist = 0
  6. bestAlignment = []
  7. k = len(motif)
  8. for seq in DNA:
  9. minHammingDist = k+1
  10. for s in xrange(len(seq)-k+1):
  11. HammingDist = sum([1 for i in xrange(k) if motif[i] != seq[s+i]])
  12. if (HammingDist < minHammingDist):
  13. bestS = s
  14. minHammingDist = HammingDist
  15. bestAlignment.append(bestS)
  16. totalDist += minHammingDist
  17.  
  18. if totalDist > minTotalHammingDist:
  19. return [], k*len(DNA)
  20.  
  21. return bestAlignment, totalDist
  22.  
  23. def BranchAndBoundMedianStringMotifSearch(DNA,k):
  24. """ Consider all possible 4**k motifs"""
  25. bestAlignment = []
  26. minHammingDist = k*len(DNA)
  27. kmer = ''
  28. bestPrefixes = []
  29. if(k == 1):
  30. for pattern in itertools.product('acgt', repeat= (1)):
  31. motif = ''.join(pattern)
  32. align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
  33. if(dist < minHammingDist + 12):
  34. bestPrefixes.append(motif)
  35. if (dist < minHammingDist):
  36. bestAlignment = [s for s in align]
  37. minHammingDist = dist
  38. kmer = motif
  39. return bestAlignment, minHammingDist, kmer
  40. if(k == 2):
  41. for pattern in itertools.product('acgt', repeat= (2)):
  42. motif = ''.join(pattern)
  43. align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
  44. if(dist < minHammingDist + 12):
  45. bestPrefixes.append(motif)
  46. if (dist < minHammingDist):
  47. bestAlignment = [s for s in align]
  48. minHammingDist = dist
  49. kmer = motif
  50. return bestAlignment, minHammingDist, kmer
  51. if(k == 3):
  52. for pattern in itertools.product('acgt', repeat= (3)):
  53. motif = ''.join(pattern)
  54. align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
  55. if(dist < minHammingDist + 12):
  56. bestPrefixes.append(motif)
  57. if (dist < minHammingDist):
  58. bestAlignment = [s for s in align]
  59. minHammingDist = dist
  60. kmer = motif
  61. return bestAlignment, minHammingDist, kmer
  62. for pattern in itertools.product('acgt', repeat= (4)):
  63. motif = ''.join(pattern)
  64. align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
  65. if(dist < minHammingDist + 12):
  66. bestPrefixes.append(motif)
  67. if (dist < minHammingDist):
  68. bestAlignment = [s for s in align]
  69. minHammingDist = dist
  70. kmer = motif
  71. bestPrefixesNext = []
  72. for n in range (5, k+1):
  73. minHammingDist = k*len(DNA)
  74. bestAlignment = []
  75. kmer = ''
  76. for pattern in bestPrefixes:
  77. motif = ''.join(pattern)
  78. for base in ["a", "c", "t", "g"]:
  79. motif = ''.join(pattern)
  80. motif = motif + base
  81. align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
  82. if(dist < minHammingDist + 4*n):
  83. bestPrefixesNext.append(motif)
  84. if (dist < minHammingDist):
  85. bestAlignment = [s for s in align]
  86. minHammingDist = dist
  87. kmer = motif
  88. motif = ''.join(pattern)
  89. motif = base + motif
  90. align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
  91. if(dist < minHammingDist + 4*n):
  92. bestPrefixesNext.append(motif)
  93. if (dist < minHammingDist):
  94. bestAlignment = [s for s in align]
  95. minHammingDist = dist
  96. kmer = motif
  97. bestPrefixes = bestPrefixesNext
  98. bestPrefixesNext = []
  99. return bestAlignment, minHammingDist, kmer
  100.  
  101. seqApprox = [
  102. 'tagtggtcttttgagtgtagatctgaagggaaagtatttccaccagttcggggtcacccagcagggcagggtgacttaat',
  103. 'cgcgactcggcgctcacagttatcgcacgtttagaccaaaacggagttggatccgaaactggagtttaatcggagtcctt',
  104. 'gttacttgtgagcctggttagacccgaaatataattgttggctgcatagcggagctgacatacgagtaggggaaatgcgt',
  105. 'aacatcaggctttgattaaacaatttaagcacgtaaatccgaattgacctgatgacaatacggaacatgccggctccggg',
  106. 'accaccggataggctgcttattaggtccaaaaggtagtatcgtaataatggctcagccatgtcaatgtgcggcattccac',
  107. 'tagattcgaatcgatcgtgtttctccctctgtgggttaacgaggggtccgaccttgctcgcatgtgccgaacttgtaccc',
  108. 'gaaatggttcggtgcgatatcaggccgttctcttaacttggcggtgcagatccgaacgtctctggaggggtcgtgcgcta',
  109. 'atgtatactagacattctaacgctcgcttattggcggagaccatttgctccactacaagaggctactgtgtagatccgta',
  110. 'ttcttacacccttctttagatccaaacctgttggcgccatcttcttttcgagtccttgtacctccatttgctctgatgac',
  111. 'ctacctatgtaaaacaacatctactaacgtagtccggtctttcctgatctgccctaacctacaggtcgatccgaaattcg']
  112.  
  113. %time BranchAndBoundMedianStringMotifSearch(seqApprox,10)
  114. #([17, 47, 18, 33, 21, 0, 46, 70, 16, 65], 11, 'tagatccgaa')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement