Anophoo

align 2

Jan 4th, 2017
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.43 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import random # for seed, random
  4. import sys # for stdout
  5. import copy
  6.  
  7.  
  8.  
  9. ################################### TEST PART ##################################
  10. ################################################################################
  11.  
  12. # Tests align strands and scores
  13. # Parameters types:
  14. # score = int example: -6
  15. # plusScores = string example: " 1 1 1"
  16. # minusScores = string example: "22 111 11 "
  17. # strandAligned1 = string example: " CAAGTCGC"
  18. # strandAligned2 = string example: "ATCCCATTAC"
  19. #
  20. # Note: all strings must have same length
  21. def test(score, plusScores, minusScores, strandAligned1, strandAligned2):
  22. print("\n>>>>>>START TEST<<<<<<")
  23.  
  24. if testStrands(score, plusScores, minusScores, strandAligned1, strandAligned2):
  25. sys.stdout.write(">>>>>>>Test SUCCESS:")
  26. sys.stdout.write("\n\t\t" + "Score: "+str(score))
  27. sys.stdout.write("\n\t\t+ " + plusScores)
  28. sys.stdout.write("\n\t\t " + strandAligned1)
  29. sys.stdout.write("\n\t\t " + strandAligned2)
  30. sys.stdout.write("\n\t\t- " + minusScores)
  31. sys.stdout.write("\n\n")
  32. else:
  33. sys.stdout.write("\t>>>>!!!Test FAILED\n\n")
  34.  
  35.  
  36. # converts character score to int
  37. def testScoreToInt(score):
  38. if score == ' ':
  39. return 0
  40. return int(score)
  41.  
  42.  
  43. # computes sum of scores
  44. def testSumScore(scores):
  45. result = 0
  46. for ch in scores:
  47. result += testScoreToInt(ch)
  48. return result
  49.  
  50.  
  51. # test each characters and scores
  52. def testValidateEach(ch1, ch2, plusScore, minusScore):
  53. if ch1 == ' ' or ch2 == ' ':
  54. return plusScore == 0 and minusScore == 2
  55. if ch1 == ch2:
  56. return plusScore == 1 and minusScore == 0
  57. return plusScore == 0 and minusScore == 1
  58.  
  59.  
  60. # test and validates strands
  61. def testStrands(score, plusScores, minusScores, strandAligned1, strandAligned2):
  62. if len(plusScores) != len(minusScores) or len(minusScores) != len(strandAligned1) or len(strandAligned1) != len(
  63. strandAligned2):
  64. sys.stdout.write("Length mismatch! \n")
  65. return False
  66.  
  67. if len(plusScores) == 0:
  68. sys.stdout.write("Length is Zero! \n")
  69. return False
  70.  
  71. if testSumScore(plusScores) - testSumScore(minusScores) != score:
  72. sys.stdout.write("Score mismatch to score strings! TEST FAILED!\n")
  73. return False
  74. for i in range(len(plusScores)):
  75. if not testValidateEach(strandAligned1[i], strandAligned2[i], testScoreToInt(plusScores[i]),
  76. testScoreToInt(minusScores[i])):
  77. sys.stdout.write("Invalid scores for position " + str(i) + ":\n")
  78. sys.stdout.write("\t char1: " + strandAligned1[i] + " char2: " +
  79. strandAligned2[i] + " +" + str(testScoreToInt(plusScores[i])) + " -" +
  80. str(testScoreToInt(minusScores[i])) + "\n")
  81. return False
  82.  
  83. return True
  84.  
  85. ######################## END OF TEST PART ######################################
  86. ################################################################################
  87.  
  88.  
  89. # Computes the score of the optimal alignment of two DNA strands.
  90. def findOptimalAlignment(strand1, strand2, cache, plusScores, minusScores):
  91.  
  92. key = strand1 + '-' + strand2
  93. if key in cache:
  94. return cache[key]
  95.  
  96. # if one of the two strands is empty, then there is only
  97. # one possible alignment, and of course it's optimal
  98. if len(strand1) == 0:
  99. aligned = {'strand1': ' ' * len(strand2),
  100. 'strand2': strand2,
  101. 'score': len(strand2) * -2,
  102. 'plusScores': " " * len(strand2),
  103. 'minusScores': "2" * len(strand2)}
  104. newCacheValue2 = copy.deepcopy(aligned)
  105. cache[key] = newCacheValue2
  106. return aligned
  107. if len(strand2) == 0:
  108. aligned = {'strand1': strand1,
  109. 'strand2': ' ' * len(strand1),
  110. 'score': len(strand1) * -2,
  111. 'plusScores': " " * len(strand1),
  112. 'minusScores': "2" * len(strand1)}
  113. newCacheValue3 = copy.deepcopy(aligned)
  114. cache[key] = newCacheValue3
  115. return aligned
  116.  
  117. # There's the scenario where the two leading bases of
  118. # each strand are forced to align, regardless of whether or not
  119. # they actually match.
  120. bestWith = findOptimalAlignment(strand1[1:],
  121. strand2[1:],
  122. cache,
  123. plusScores,
  124. minusScores)
  125. if strand1[0] == strand2[0]:
  126. # no benefit from making other recursive calls
  127. aligned = {'strand1': strand1[0] + bestWith['strand1'],
  128. 'strand2': strand2[0] + bestWith['strand2'],
  129. 'score': bestWith['score'] + 1,
  130. 'plusScores': 'plusScores' + "1",
  131. 'minusScores': 'minusScores' + " "}
  132. newCacheValue4 = copy.deepcopy(aligned)
  133. cache[key] = newCacheValue4
  134. return aligned
  135.  
  136. best = {'strand1': strand1[0] + bestWith['strand1'],
  137. 'strand2': strand2[0] + bestWith['strand2'],
  138. 'score': bestWith['score'] - 1,
  139. 'plusScores': 'plusScores' + " ",
  140. 'minusScores': 'minusScores' + "1"}
  141.  
  142. # It's possible that the leading base of strand1 best
  143. # matches not the leading base of strand2, but the one after it.
  144. bestWithout = findOptimalAlignment(strand1, strand2[1:], cache, plusScores, minusScores)
  145. bestWithout['score'] -= 2 # penalize for insertion of space
  146. if bestWithout['score'] > best['score']:
  147. bestWithout['strand1'] = ' ' + bestWithout['strand1']
  148. bestWithout['strand2'] = strand2[0] + bestWithout['strand2']
  149. best = copy.deepcopy(bestWithout)
  150.  
  151. # opposite scenario
  152. bestWithout = findOptimalAlignment(strand1[1:], strand2, cache, plusScores, minusScores)
  153. bestWithout['score'] -= 2 # penalize for insertion of space
  154. if bestWithout['score'] > best['score']:
  155. bestWithout['strand2'] = ' ' + bestWithout['strand2']
  156. bestWithout['strand1'] = strand1[0] + bestWithout['strand1']
  157. best = copy.deepcopy(bestWithout)
  158.  
  159. newCacheValue5 = copy.deepcopy(best)
  160. cache[key] = newCacheValue5
  161. return best
  162.  
  163.  
  164. # Utility function that generates a random DNA string of
  165. # a random length drawn from the range [minlength, maxlength]
  166. def generateRandomDNAStrand(minlength, maxlength):
  167. assert minlength > 0, \
  168. "Minimum length passed to generateRandomDNAStrand" \
  169. "must be a positive number" # these \'s allow mult-line statements
  170. assert maxlength >= minlength, \
  171. "Maximum length passed to generateRandomDNAStrand must be at " \
  172. "as large as the specified minimum length"
  173. strand = ""
  174. length = random.choice(xrange(minlength, maxlength + 1))
  175. bases = ['A', 'T', 'G', 'C']
  176. for i in xrange(0, length):
  177. strand += random.choice(bases)
  178. return strand
  179.  
  180. # Method that just prints out the supplied alignment score.
  181. # This is more of a placeholder for what will ultimately
  182. # print out not only the score but the alignment as well.
  183.  
  184. def printAlignment(alignment, out = sys.stdout):
  185. out.write("Optimal alignment score is " + str(alignment['score']) + "\n")
  186.  
  187. # Unit test main in place to do little more than
  188. # exercise the above algorithm. As written, it
  189. # generates two fairly short DNA strands and
  190. # determines the optimal alignment score.
  191. #
  192. # As you change the implementation of findOptimalAlignment
  193. # to use memoization, you should change the 8s to 40s and
  194. # the 10s to 60s and still see everything execute very
  195. # quickly.
  196.  
  197. def main():
  198. #(score, plusScores, minusScores, strandAligned1, strandAligned2):
  199. data = {} #
  200. plusScores = ""
  201. minusScores = ""
  202. while (True):
  203. sys.stdout.write("Generate random DNA strands? ")
  204. answer = sys.stdin.readline()
  205. if answer == "no\n": break
  206. strand1 = generateRandomDNAStrand(8, 10)
  207. strand2 = generateRandomDNAStrand(8, 10)
  208. sys.stdout.write("Aligning these two strands: " + strand1 + "\n")
  209. sys.stdout.write(" " + strand2 + "\n")
  210. alignment = findOptimalAlignment(strand1, strand2, data, plusScores, minusScores)
  211. printAlignment(alignment)
  212. test(alignment['score'], alignment['plusScores'], alignment['minusScores'], alignment['strand1'], alignment['strand2'])
  213.  
  214. if __name__ == "__main__":
  215. main()
Advertisement
Add Comment
Please, Sign In to add comment