Anophoo

align

Jan 4th, 2017
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.54 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import random # for seed, random
  4. import sys # for stdout
  5. import copy
  6.  
  7.  
  8.  
  9. ################################### TEST PART ##################################
  10. ################################################################################
  11.  
  12. # Tests align strands and scores
  13. # Parameters types:
  14. # score = int example: -6
  15. # plusScores = string example: " 1 1 1"
  16. # minusScores = string example: "22 111 11 "
  17. # strandAligned1 = string example: " CAAGTCGC"
  18. # strandAligned2 = string example: "ATCCCATTAC"
  19. #
  20. # Note: all strings must have same length
  21. def test(score, plusScores, minusScores, strandAligned1, strandAligned2):
  22. print("\n>>>>>>START TEST<<<<<<")
  23.  
  24. if testStrands(score, plusScores, minusScores, strandAligned1, strandAligned2):
  25. sys.stdout.write(">>>>>>>Test SUCCESS:")
  26. sys.stdout.write("\n\t\t" + "Score: "+str(score))
  27. sys.stdout.write("\n\t\t+ " + plusScores)
  28. sys.stdout.write("\n\t\t " + strandAligned1)
  29. sys.stdout.write("\n\t\t " + strandAligned2)
  30. sys.stdout.write("\n\t\t- " + minusScores)
  31. sys.stdout.write("\n\n")
  32. else:
  33. sys.stdout.write("\t>>>>!!!Test FAILED\n\n")
  34.  
  35.  
  36. # converts character score to int
  37. def testScoreToInt(score):
  38. if score == ' ':
  39. return 0
  40. return int(score)
  41.  
  42.  
  43. # computes sum of scores
  44. def testSumScore(scores):
  45. result = 0
  46. for ch in scores:
  47. result += testScoreToInt(ch)
  48. return result
  49.  
  50.  
  51. # test each characters and scores
  52. def testValidateEach(ch1, ch2, plusScore, minusScore):
  53. if ch1 == ' ' or ch2 == ' ':
  54. return plusScore == 0 and minusScore == 2
  55. if ch1 == ch2:
  56. return plusScore == 1 and minusScore == 0
  57. return plusScore == 0 and minusScore == 1
  58.  
  59.  
  60. # test and validates strands
  61. def testStrands(score, plusScores, minusScores, strandAligned1, strandAligned2):
  62. if len(plusScores) != len(minusScores) or len(minusScores) != len(strandAligned1) or len(strandAligned1) != len(
  63. strandAligned2):
  64. sys.stdout.write("Length mismatch! \n")
  65. return False
  66.  
  67. if len(plusScores) == 0:
  68. sys.stdout.write("Length is Zero! \n")
  69. return False
  70.  
  71. if testSumScore(plusScores) - testSumScore(minusScores) != score:
  72. sys.stdout.write("Score mismatch to score strings! TEST FAILED!\n")
  73. return False
  74. for i in range(len(plusScores)):
  75. if not testValidateEach(strandAligned1[i], strandAligned2[i], testScoreToInt(plusScores[i]),
  76. testScoreToInt(minusScores[i])):
  77. sys.stdout.write("Invalid scores for position " + str(i) + ":\n")
  78. sys.stdout.write("\t char1: " + strandAligned1[i] + " char2: " +
  79. strandAligned2[i] + " +" + str(testScoreToInt(plusScores[i])) + " -" +
  80. str(testScoreToInt(minusScores[i])) + "\n")
  81. return False
  82.  
  83. return True
  84.  
  85. ######################## END OF TEST PART ######################################
  86. ################################################################################
  87.  
  88.  
  89. # Computes the score of the optimal alignment of two DNA strands.
  90. def findOptimalAlignment(strand1, strand2, cache, plusScores, minusScores):
  91.  
  92. #newCacheValue = copy.deepcopy(aligned)
  93. #cache[key] = newCacheValue
  94.  
  95. key = strand1 + '-' + strand2
  96. if key in cache:
  97. return cache[key]
  98.  
  99. # if one of the two strands is empty, then there is only
  100. # one possible alignment, and of course it's optimal
  101. if len(strand1) == 0:
  102. aligned = {'strand1': ' ' * len(strand2),
  103. 'strand2': strand2,
  104. 'score': len(strand2) * -2,
  105. 'plusScores': " " * len(strand2),
  106. 'minusScores': "2" * len(strand2)}
  107. newCacheValue2 = copy.deepcopy(aligned)
  108. cache[key] = newCacheValue2
  109. #addToCache(aligned)
  110. return aligned
  111. if len(strand2) == 0:
  112. aligned = {'strand1': strand1,
  113. 'strand2': ' ' * len(strand1),
  114. 'score': len(strand1) * -2,
  115. 'plusScores': " " * len(strand1),
  116. 'minusScores': "2" * len(strand1)}
  117. newCacheValue3 = copy.deepcopy(aligned)
  118. cache[key] = newCacheValue3
  119. #addToCache(aligned)
  120. return aligned
  121.  
  122. # There's the scenario where the two leading bases of
  123. # each strand are forced to align, regardless of whether or not
  124. # they actually match.
  125. bestWith = findOptimalAlignment(strand1[1:],
  126. strand2[1:],
  127. cache,
  128. plusScores,
  129. minusScores)
  130. if strand1[0] == strand2[0]:
  131. # no benefit from making other recursive calls
  132. aligned = {'strand1': strand1[0] + bestWith['strand1'],
  133. 'strand2': strand2[0] + bestWith['strand2'],
  134. 'score': bestWith['score'] + 1,
  135. 'plusScores': 'plusScores' + "1",
  136. 'minusScores': 'minusScores' + " "}
  137. newCacheValue4 = copy.deepcopy(aligned)
  138. cache[key] = newCacheValue4
  139. #addToCache(aligned)
  140. return aligned
  141.  
  142. best = {'strand1': strand1[0] + bestWith['strand1'],
  143. 'strand2': strand2[0] + bestWith['strand2'],
  144. 'score': bestWith['score'] - 1}
  145.  
  146. # It's possible that the leading base of strand1 best
  147. # matches not the leading base of strand2, but the one after it.
  148. bestWithout = findOptimalAlignment(strand1, strand2[1:], cache, plusScores, minusScores)
  149. bestWithout['score'] -= 2 # penalize for insertion of space
  150. if bestWithout['score'] > best['score']:
  151. bestWithout['strand1'] = ' ' + bestWithout['strand1']
  152. bestWithout['strand2'] = strand2[0] + bestWithout['strand2']
  153. best = copy.deepcopy(bestWithout)
  154.  
  155. # opposite scenario
  156. bestWithout = findOptimalAlignment(strand1[1:], strand2, cache, plusScores, minusScores)
  157. bestWithout['score'] -= 2 # penalize for insertion of space
  158. if bestWithout['score'] > best['score']:
  159. bestWithout['strand2'] = ' ' + bestWithout['strand2']
  160. bestWithout['strand1'] = strand1[0] + bestWithout['strand1']
  161. best = copy.deepcopy(bestWithout)
  162.  
  163. newCacheValue5 = copy.deepcopy(best)
  164. cache[key] = newCacheValue5
  165. #addToCache(best)
  166. return best
  167.  
  168.  
  169. # Utility function that generates a random DNA string of
  170. # a random length drawn from the range [minlength, maxlength]
  171. def generateRandomDNAStrand(minlength, maxlength):
  172. assert minlength > 0, \
  173. "Minimum length passed to generateRandomDNAStrand" \
  174. "must be a positive number" # these \'s allow mult-line statements
  175. assert maxlength >= minlength, \
  176. "Maximum length passed to generateRandomDNAStrand must be at " \
  177. "as large as the specified minimum length"
  178. strand = ""
  179. length = random.choice(xrange(minlength, maxlength + 1))
  180. bases = ['A', 'T', 'G', 'C']
  181. for i in xrange(0, length):
  182. strand += random.choice(bases)
  183. return strand
  184.  
  185. # Method that just prints out the supplied alignment score.
  186. # This is more of a placeholder for what will ultimately
  187. # print out not only the score but the alignment as well.
  188.  
  189. def printAlignment(alignment, out = sys.stdout):
  190. out.write("Optimal alignment score is " + str(alignment['score']) + "\n")
  191.  
  192. # Unit test main in place to do little more than
  193. # exercise the above algorithm. As written, it
  194. # generates two fairly short DNA strands and
  195. # determines the optimal alignment score.
  196. #
  197. # As you change the implementation of findOptimalAlignment
  198. # to use memoization, you should change the 8s to 40s and
  199. # the 10s to 60s and still see everything execute very
  200. # quickly.
  201.  
  202. def main():
  203. #(score, plusScores, minusScores, strandAligned1, strandAligned2):
  204. data = {} #
  205. plusScores = ""
  206. minusScores = ""
  207. while (True):
  208. sys.stdout.write("Generate random DNA strands? ")
  209. answer = sys.stdin.readline()
  210. if answer == "no\n": break
  211. strand1 = generateRandomDNAStrand(8, 10)
  212. strand2 = generateRandomDNAStrand(8, 10)
  213. sys.stdout.write("Aligning these two strands: " + strand1 + "\n")
  214. sys.stdout.write(" " + strand2 + "\n")
  215. alignment = findOptimalAlignment(strand1, strand2, data, plusScores, minusScores)
  216. printAlignment(alignment)
  217. #test([alignment['score']], [alignment['plusScores']], [alignment['minusScores']],
  218. #[alignment['strand1']], [alignment['strand2']])
  219.  
  220. if __name__ == "__main__":
  221. main()
Advertisement
Add Comment
Please, Sign In to add comment