Advertisement
Pug_coder

Untitled

Dec 24th, 2023
12
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.34 KB | None | 0 0
  1. from typing import Callable, Tuple
  2.  
  3. DEBUG = False
  4.  
  5.  
  6. def score_fun(
  7. a: str, b: str, match_score: int = 5, mismatch_score: int = -4
  8. ) -> int:
  9. return match_score if a == b else mismatch_score
  10.  
  11.  
  12. class NeedlemanWunschAffineMatrixHandler:
  13. def __init__(self, seq1: str, seq2: str, gap_open: int, gap_extend: int):
  14. self.seq1 = seq1
  15. self.seq2 = seq2
  16. self.gap_open = gap_open
  17. self.gap_extend = gap_extend
  18.  
  19. def matrices_init(self):
  20. infinity = float('-inf')
  21. n, m = len(self.seq1) + 1, len(self.seq2) + 1
  22.  
  23. matching_matrix = [[0] * m for _ in range(n)]
  24.  
  25. # D(0, 0) = infinity
  26. deletion_matrix = [
  27. [infinity if i == 0 and j == 0 else 0 for i in range(m)]
  28. for j in range(n)
  29. ]
  30.  
  31. # I(0, 0) = infinity
  32. insertion_matrix = [
  33. [infinity if i == 0 and j == 0 else 0 for i in range(m)]
  34. for j in range(n)
  35. ]
  36.  
  37. # I(0, j) = infinity
  38. insertion_matrix[0] = [
  39. infinity if i in range(len(insertion_matrix[0])) else elem for i, elem in enumerate(insertion_matrix[0])
  40. ]
  41. """
  42. M(i, 0) = infinity
  43. D(0, j) = open + (j - 1) * extend
  44. """
  45. for i in range(m):
  46. matching_matrix[0][i] = infinity
  47. deletion_matrix[0][i] = self.gap_open + (i - 1) * self.gap_extend
  48.  
  49. """
  50. M(i, 0) = infinity
  51. M(0, j) = infinity
  52. I(i, 0) = open + (i - 1) * extend
  53. """
  54. for i in range(n):
  55. matching_matrix[i][0] = infinity
  56. insertion_matrix[i][0] = self.gap_open + (i - 1) * self.gap_extend
  57. deletion_matrix[i][0] = infinity
  58.  
  59. matching_matrix[0][0] = 0
  60.  
  61. for i in range(1, n):
  62. for j in range(1, m):
  63. matching_matrix[i][j] = max(matching_matrix[i - 1][j - 1],
  64. insertion_matrix[i - 1][j - 1],
  65. deletion_matrix[i - 1][j - 1]) + score_fun(self.seq1[i - 1], self.seq2[j - 1])
  66. insertion_matrix[i][j] = max(insertion_matrix[i][j - 1] + self.gap_extend,
  67. matching_matrix[i][j - 1] + self.gap_open)
  68. deletion_matrix[i][j] = max(deletion_matrix[i - 1][j] + self.gap_extend,
  69. matching_matrix[i - 1][j] + self.gap_open)
  70. return matching_matrix, insertion_matrix, deletion_matrix
  71.  
  72. def traceback(self, matching_matrix, insertion_matrix, deletion_matrix):
  73. score = max(
  74. matching_matrix[-1][-1], insertion_matrix[-1][-1], deletion_matrix[-1][-1]
  75. )
  76. i, j = len(self.seq1), len(self.seq2)
  77. matrix_names = {matching_matrix[-1][-1]: "match", insertion_matrix[-1][-1]: "insertion",
  78. deletion_matrix[-1][-1]: "deletion"}
  79. aln1, aln2, current_matrix = '', '', matrix_names[score]
  80. while i > 0 or j > 0:
  81. match_case = match current_matrix:
  82. case "match" if matching_matrix[i][j] == matching_matrix[i - 1][j - 1] + score_fun(self.seq1[i - 1], self.seq2[j - 1]):
  83. push_seq1, push_seq2 = self.seq1[i - 1], self.seq2[j - 1]
  84. i_step, j_step = -1, -1
  85. case "insertion" if j <= 0 or insertion_matrix[i][j] == insertion_matrix[i][j - 1] + self.gap_extend:
  86. push_seq1, push_seq2 = ('-', self.seq2[j - 1]) if j > 0 else (self.seq1[i - 1], '-')
  87. i_step, j_step = (0, -1) if j > 0 else (-1, 0)
  88. case "deletion" if i <= 0 or deletion_matrix[i][j] == deletion_matrix[i - 1][j] + self.gap_extend:
  89. push_seq1, push_seq2 = (self.seq1[i - 1], '-') if i > 0 else ('-', self.seq2[j - 1])
  90. i_step, j_step = (-1, 0) if i > 0 else (0, -1)
  91.  
  92. i, j = i + i_step, j + j_step
  93. aln1 += push_seq1
  94. aln2 += push_seq2
  95.  
  96. return aln1[::-1], aln2[::-1], score
  97.  
  98.  
  99.  
  100. def needleman_wunsch_affine(
  101. seq1: str,
  102. seq2: str,
  103. score_fun: Callable = score_fun,
  104. gap_open: int = -10,
  105. gap_extend: int = -1,
  106. ) -> Tuple[str, str, int]:
  107. '''
  108. Inputs:
  109. seq1 - first sequence
  110. seq2 - second sequence
  111. score_fun - function that takes two characters and returns score
  112. gap_open - gap open penalty
  113. gap_extend - gap extend penalty
  114. Outputs:
  115. aln1 - first aligned sequence
  116. aln2 - second aligned sequence
  117. score - score of the alignment
  118. '''
  119. n, m = len(seq1) + 1, len(seq2) + 1
  120. # infinity = 2 * gap_open + (n + m - 2) * gap_extend + 1
  121. #infinity = float('-inf')
  122. nwh = NeedlemanWunschAffineMatrixHandler(seq1, seq2, gap_open, gap_extend)
  123. matching_matrix, insertion_matrix, deletion_matrix = nwh.matrices_init()
  124. aln1, aln2, score = nwh.traceback(matching_matrix, insertion_matrix, deletion_matrix)
  125.  
  126. return aln1, aln2, score
  127.  
  128. def print_array(matrix: list):
  129. for row in matrix:
  130. for element in row:
  131. print(f"{element:6}", end="")
  132. print()
  133.  
  134.  
  135. def main():
  136. aln1, aln2, score = needleman_wunsch_affine(
  137. "ACGT", "TAGT", gap_open=-10, gap_extend=-1
  138. )
  139. print(f'str 1: {aln1}')
  140. print(f'str 2: {aln2}')
  141. print(f'score: {score}')
  142.  
  143.  
  144. if __name__ == "__main__":
  145. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement