Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from typing import Callable, Tuple
- DEBUG = False
- def score_fun(
- a: str, b: str, match_score: int = 5, mismatch_score: int = -4
- ) -> int:
- return match_score if a == b else mismatch_score
- class NeedlemanWunschAffineMatrixHandler:
- def __init__(self, seq1: str, seq2: str, gap_open: int, gap_extend: int):
- self.seq1 = seq1
- self.seq2 = seq2
- self.gap_open = gap_open
- self.gap_extend = gap_extend
- def matrices_init(self):
- infinity = float('-inf')
- n, m = len(self.seq1) + 1, len(self.seq2) + 1
- matching_matrix = [[0] * m for _ in range(n)]
- # D(0, 0) = infinity
- deletion_matrix = [
- [infinity if i == 0 and j == 0 else 0 for i in range(m)]
- for j in range(n)
- ]
- # I(0, 0) = infinity
- insertion_matrix = [
- [infinity if i == 0 and j == 0 else 0 for i in range(m)]
- for j in range(n)
- ]
- # I(0, j) = infinity
- insertion_matrix[0] = [
- infinity if i in range(len(insertion_matrix[0])) else elem for i, elem in enumerate(insertion_matrix[0])
- ]
- """
- M(i, 0) = infinity
- D(0, j) = open + (j - 1) * extend
- """
- for i in range(m):
- matching_matrix[0][i] = infinity
- deletion_matrix[0][i] = self.gap_open + (i - 1) * self.gap_extend
- """
- M(i, 0) = infinity
- M(0, j) = infinity
- I(i, 0) = open + (i - 1) * extend
- """
- for i in range(n):
- matching_matrix[i][0] = infinity
- insertion_matrix[i][0] = self.gap_open + (i - 1) * self.gap_extend
- deletion_matrix[i][0] = infinity
- matching_matrix[0][0] = 0
- for i in range(1, n):
- for j in range(1, m):
- matching_matrix[i][j] = max(matching_matrix[i - 1][j - 1],
- insertion_matrix[i - 1][j - 1],
- deletion_matrix[i - 1][j - 1]) + score_fun(self.seq1[i - 1], self.seq2[j - 1])
- insertion_matrix[i][j] = max(insertion_matrix[i][j - 1] + self.gap_extend,
- matching_matrix[i][j - 1] + self.gap_open)
- deletion_matrix[i][j] = max(deletion_matrix[i - 1][j] + self.gap_extend,
- matching_matrix[i - 1][j] + self.gap_open)
- return matching_matrix, insertion_matrix, deletion_matrix
- def traceback(self, matching_matrix, insertion_matrix, deletion_matrix):
- score = max(
- matching_matrix[-1][-1], insertion_matrix[-1][-1], deletion_matrix[-1][-1]
- )
- i, j = len(self.seq1), len(self.seq2)
- matrix_names = {matching_matrix[-1][-1]: "match", insertion_matrix[-1][-1]: "insertion",
- deletion_matrix[-1][-1]: "deletion"}
- aln1, aln2, current_matrix = '', '', matrix_names[score]
- while i > 0 or j > 0:
- match_case = match current_matrix:
- case "match" if matching_matrix[i][j] == matching_matrix[i - 1][j - 1] + score_fun(self.seq1[i - 1], self.seq2[j - 1]):
- push_seq1, push_seq2 = self.seq1[i - 1], self.seq2[j - 1]
- i_step, j_step = -1, -1
- case "insertion" if j <= 0 or insertion_matrix[i][j] == insertion_matrix[i][j - 1] + self.gap_extend:
- push_seq1, push_seq2 = ('-', self.seq2[j - 1]) if j > 0 else (self.seq1[i - 1], '-')
- i_step, j_step = (0, -1) if j > 0 else (-1, 0)
- case "deletion" if i <= 0 or deletion_matrix[i][j] == deletion_matrix[i - 1][j] + self.gap_extend:
- push_seq1, push_seq2 = (self.seq1[i - 1], '-') if i > 0 else ('-', self.seq2[j - 1])
- i_step, j_step = (-1, 0) if i > 0 else (0, -1)
- i, j = i + i_step, j + j_step
- aln1 += push_seq1
- aln2 += push_seq2
- return aln1[::-1], aln2[::-1], score
- def needleman_wunsch_affine(
- seq1: str,
- seq2: str,
- score_fun: Callable = score_fun,
- gap_open: int = -10,
- gap_extend: int = -1,
- ) -> Tuple[str, str, int]:
- '''
- Inputs:
- seq1 - first sequence
- seq2 - second sequence
- score_fun - function that takes two characters and returns score
- gap_open - gap open penalty
- gap_extend - gap extend penalty
- Outputs:
- aln1 - first aligned sequence
- aln2 - second aligned sequence
- score - score of the alignment
- '''
- n, m = len(seq1) + 1, len(seq2) + 1
- # infinity = 2 * gap_open + (n + m - 2) * gap_extend + 1
- #infinity = float('-inf')
- nwh = NeedlemanWunschAffineMatrixHandler(seq1, seq2, gap_open, gap_extend)
- matching_matrix, insertion_matrix, deletion_matrix = nwh.matrices_init()
- aln1, aln2, score = nwh.traceback(matching_matrix, insertion_matrix, deletion_matrix)
- return aln1, aln2, score
- def print_array(matrix: list):
- for row in matrix:
- for element in row:
- print(f"{element:6}", end="")
- print()
- def main():
- aln1, aln2, score = needleman_wunsch_affine(
- "ACGT", "TAGT", gap_open=-10, gap_extend=-1
- )
- print(f'str 1: {aln1}')
- print(f'str 2: {aln2}')
- print(f'score: {score}')
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement