Posted by Anonymous on Sun 24 Feb 17:43
report abuse | View followups from Anonymous | download | new post
- #!/usr/bin/perl
- ##
- # Alignment computes a global alignment with 2 sequences.
- # Copyright (C) 2007 M Fourment
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- ##
- ##
- # File: alignment.py
- # Author: mathieu
- ##
- def score(a, b):
- if a == b:
- return 5
- return - 4
- def max(a,b,c):
- if a > b:
- if a > c:
- return a
- return c
- if b > c:
- return b
- return c
- def align(seq1,seq2):
- n = len(seq1) #1st dimension
- m = len(seq2) #2nd dimension
- e=-10 # gap cost
- i=0 # increment 1st dimension
- j=0 # increment 2nd dimension
- #F = zeros((n+1,m+1))
- F = [[0 for x in range(m+1)] for y in range(n+1)]
- # init uper row
- for i in range(1,n+1):
- F[i][0] = e * i
- # init left column
- for j in range(1,m+1):
- F[0][j] = e * j
- #compute f matrix
- for i in range(1,n+1):
- # PULLED THIS OUT FROM THE INNER LOOP.
- seq1m = seq1[i - 1]
- for j in range(1,m+1):
- F[i][j] = max(F[i-1][j-1]+score(seq1m, seq2[j-1]), F[i][j-1]+e, F[i-1][j]+e)
- # backtrack from bottom right
- i=n
- j=m
- align1=""
- align2=""
- while i > 0 and j > 0:
- Score = F[i][j]
- ScoreDiag = F[i-1][j-1]
- ScoreUp = F[i][j-1]
- ScoreLeft = F[i-1][j]
- # Score diagonal
- if F[i][j] == ScoreDiag + score(seq1[i-1], seq2[j-1]):
- align1=seq1[i-1]+align1
- align2=seq2[j-1]+align2
- i=i-1
- j=j-1
- # Score left
- elif F[i][j] == ScoreLeft + e:
- align1=seq1[i-1]+align1
- align2="-"+align2
- i=i-1
- # Score up
- elif F[i][j] == ScoreUp + e:
- align1="-"+align1
- align2=seq2[j-1]+align2
- j=j-1
- # seq1 longer
- while i > 0:
- align1=seq1[i-1]+align1
- align2="-"+align2
- i=i-1
- # seq2 (genome) longer
- while j > 0:
- align1="-"+align1
- align2=seq2[j-1]+align2
- j=j-1
- return align1, align2
- def imain():
- return align("tagtagtagactccgagatagagaagtctactaaaaatggagaaatacacagagatccacaatagaatgagagaatgtgtgccaggggaggtatctgcagtagagtgcctggatctattagatcggttttacgctgtgagacatgatgttgtggatcagatgataaagcatgattggtctgacaacaaagacaaagaacagcctataggccatgtcttgctgatggcaggagtaccgaatgaagtcatacaaggtatggaaaagaagatcatacctggaagtcctagtgggcagattctgagatcattttttaagatgacaccagataactactacagggagtttaattgagtttattgaggtgacagttacagctgatgtggcaagaggaacacgtgagaaaatcttgaagtatcaagcaggtctagagtacatagaacagttgctgcatcaggagtcagaaagaggcaatctgccaggtggttatagaataaagtttgatgtagtggcagtgaggactgatggctcaaatatatcaacacaatggccaagtcaacgaaatgagggtgtcgttcaaacaatgaggttaattcaggcagatatcaattacgtgagagaacatttaataaaaaatgatgagagatcagcactagaagccatgttcaatttaaaattccatgtcagtggacctaaagccaggacctttgacattcctgattatcggccccaacaactgtgtaacccaaacatagataatctcttgaactactgtaaaaattggttgacacgagagcatgaatttgcctttgatgaggttaaagggcaaagagttttcaacatatttgaagcagaagaaattaagcataaagaaagatacaatccatcacgtaaaccaaggaattttttattgatacaaggaaccgttcaaggtccatacttgcccaactatagcatccgaccagtacgatacaaaagttggctgtctggagatattaaaaaaccatcctgaaactccgatacaaatattagctagagatatggcattgaaatatataatgttggataaagatgaccttataaactactataaccctagagcatatttcaagcaaactgctaatataaaggaacctggcacttttaagctgaatttatcatcaatggatcctaaggccaaggcactgttagatgttatctcaaagaactcaaaaaaaggtgtgtttggtgaggtaattgatagtatagagataagtagtttaatacagcaaaatgagtgttctaaagtcatagaaaagatcctatctgatttggagatcaatgttggtgaaacaagccaaggcctagataatcccaagaggacaacaggtgttgatgatatcttaaagaagttttatgacaatgagttggtcaagtatatgttacatatagtaagaaagacaacagcatggcatatggggcatttgttaagagatataacagaatgtctaattgcacatgctggcttgaaaagatcaaagtactggtcaatacatgggttctctcatggtggaatattattgatgatcttgccatcaaaatcactagaagttgcaggttcttatattcggttttttactgtttttaaggatggattaggtttaattgactatgaaaacttagattcaactgtggtaattgatggtgtaagttggtgtttcagcaaggttatgagtttagacctaaatcgactattggctctgaatatatcttttgagaagacactattagcaacagctacatggtttcaatactatacagaggaccaaggtcactttcccttacaacatgcattgaggtctgtttttgcattccactttttactgactgacacaaaagatgaaattatgtgctatctttgataatttaagatatctaatccctgcagtgacctcgctttattcaggatataaaccgttgattgtaaagttttttgaaaggccatttaagagtgcacttgatgtatacctgtatactattataaaaacactacttgtaagccttgcacaaaataataaaatccgcttctattcaaaagttcggctattggggcttacagtagaccaatcaaccattggtgcaagtggagtttacccttctttaatgtcaagagtggtttataaacattataaaagtttaatctcagaagcgactacctgttttttcttatttgagaagggtcttcatggcaatcttactgaggaagcaaaaatacatttagagacagtcgaatgggctaggaaatttagtgacaaagagaaggcttatggtgcatatattatggaggaaggttacacaataaaggatgtagtagatggcaatattccagtagaacaacaattattttgtcaagaagtagtagagttatctgcaatggaattaaatacatatttggaagccaagtcacaagtgatggcagctaacataatgaacaaacactgggatcgtccatatttcagccaaacaaggaatattagcttaaaaggtatgtcaggcgcattacaagaagatggtcatttatcagctagtgttacattaattgaagcaatacgttttttgaatcaatctcaacaaaacccctcagtgttggagatgtatgaacaaactaagaggcagaaggctatggcacggattgttagaaaataccagagaacagaagctgatcgtggattttttataactactttaccaaccagagtgagattagagataatagaggattattttgatgcaatagctaaagtagtccctgaagagtatatatcatatggaggagagcggaagatattaaatattcaacaggcacttgaaaaggcattaagatgggcttcaggtgaaagtgagattcaaattagtatgggacaggtcattaaactaaaacggaagttaatgtatgttagtgcagatgcaacaaaatggtccccaggagataactctgcgaaattccgaagattcacccaggcattacacgatggtctacgagatgataaattaaagaggtgtgtagtggatgccttacggaatatatatgaaactgattttttcatgtctaggaagttacatcgatatatcgatggtatggatgatttatctgaatttgtagaagattttttatcattctttccaaataaagtatctgctgctatcaaaggaaattggctgcaaggaaatctaaacaag", "tagtagtagactccgagatagagaagtctactaaaaatggagaaatacacagagatcatagaatgagagaatgtgtgccaggggaggtatctgcagtagagtgcctggatctattagatcggttttacgctgtgagacatgatgttgtggatcagatgataaagcatgattggtctgacaacaaagacaaagaacagcctataggccatgtcttgctgatggcaggagtaccgaatgaagtcatacaaggtatggaaaagaagatcatacctggaagtcctagtgggcagattctgagatcattttttaagatgacaccagataactacaagattacagggagtttaattgagtttattgaggtgacagttacagctgatgtggcaagaggaacacgtgagaaaatcttgaagtatcaagcaggtctagagtacatagaacagttgctgcatcaggagtcagaaagaggcaatctgccaggtggttatagaataaagtttgatgtagtggcagtgaggactgatggctcaaatatatcaacacaatggccaagtcaacgaaatgagggtgtcgttcaaacaatgaggttaattcaggcagatcaattacgtgagagaacatttaataaaaaatgatgagagatcagcactagaagccatgttcaatttaaaattccatgtcagtggacctaaagccaggacctttgacattcctgattatcggccccaacaactgtgtaacccaaacatagataatctcttgaactactgtaaaaattggttgacacgagagcatgaatttgcctttgatgaggttaaagggcaaagagttttcaacatatttgaagcagaagaaattaagcataaagaaagatacaatccatcacgtaaaccaaggaattttttattgatacaaggaaccgttcaaggtccatacttgccctcaactatagcatccgaccagtacgatacaaaagttggctgtctggagatattaaaaaaccatcctgaaactccgatacaaatattagctagagatatggcattgaaatatataatgttggataaagatgaccttataaactactataaccctagagcatatttcaagcaaactgctaatataaaggaacctggcacttttaagctgaatttatcatcaatggatcctaaggccaaggcactgttagatgttatctcaaagaactcaaaaaaaggtgtgtttggtgaggtaattgatagtatagagataagtagtttaatacagcaaaatgagtgttctaaagtcatagaaaagatcctatctgatttggagatcaatgttggtgaaacaagccaaggcctagataatcccaagaggacaacaggtgttgatgatatcttaaagaagttttatgacaatgagttggtcaagtatatgttacatatagtaagaaagacaacagcatggcatatggggcatttgttaagagatataacagaatgtctaattgcacatgctggcttgaaaagatcaaagtactggtcaatacatgggttctctcatggtggaatattattgatgatcttgccatcaaaatcactagaagttgcaggttcttatattcggttttttactgtttttaaggatggattaggtttaattgactatgaaaacttagattcaactgtggtaattgatggtgtaagttggtgtttcagcaaggttatgagtttagacctaaatcgactattggctctgaatatatcttttgagaagacactattagcaacagctacatggtttcaatactatacagaggaccaaggtcactttcccttacaacatgcattgaggtctgtttttgcattccactttttactgactgacacaaaagatgaaattatgtgctatctttgataatttaagatatctaatccctgcagtgacctcgctttattcaggatataaaccgttgattgtaaagttttttgaaaggccatttaagagtgcacttgatgtatacctgtatactattataaaaacactacttgtaagccttgcacaaaataataaaatccgcttctattcaaaagttcggctattggggcttacagtagaccaatcaaccattggtgcaagtggagtttacccttctttaatgtcaagagtggtttataaacattataaaagtttaatctcagaagcgactacctgttttttcttatttgagaagggtcttcatggcaatcttactgaggaagcaaaaatacatttagagacagtcgaatgggctaggaaatttagtgacaaagagaaggcttatggtgcatatattatggaggaaggttacacaataaaggatgtagtagatggcaatattccagtagaacaacaattattttgtcaagaagtagtagagttatctgcaatggaattaaatacatatttggaagccaagtcacaagtgatggcagctaacataatgaacaaacactgggatcgtccatatttcagccaaacaaggaatattagcttaaaaggtatgtcaggcgcattacaagaagatggtcatttatcagctagtgttacattaattgaagcaatacgttttttgaatcaatctcaacaaaacccctcagtgttggagatgtatgaacaaactaagaggcagaaggctatggcacggattgttagaaaataccagagaacagaagctgatcgtggattttttataactactttaccaaccagagtgagattagagataatagaggattattttgatgcaatagctaaagtagtccctgaagagtatatatcatatggaggagagcggaagatattaaatattcaacaggcacttgaaaaggcattaagatgggcttcaggtgaaagtgagattcaaattagtatgggacaggtcattaaactaaaacggaagttaatgtatgttagtgcagatgcaacaaaatggtccccaggagataactctgcgaaattccgaagattcacccaggcattacacgatggtctacgagatgataaattaaagaggtgtgtagtggatgccttacggaatatatatgaaactgattttttcatgtctaggaagttacatcgatatatcgatggtatggatgatttatctgaatttgtagaagattttttatcattctttccaaataaagtatctgctgctatcaaaggaaattggctgcaaggaaatctaaacaag")
- if __name__ == "__main__":
- print ",".join(imain())
Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.