Untitled
By: a guest | Feb 24th, 2008 | Syntax:
Python | Size: 9.07 KB | Hits: 147 | Expires: Never
#!/usr/bin/perl
##
# Alignment computes a global alignment with 2 sequences.
# Copyright (C) 2007 M Fourment
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
##
##
# File: alignment.py
# Author: mathieu
##
def score(a, b):
if a == b:
return 5
return - 4
def max(a,b,c):
if a > b:
if a > c:
return a
return c
if b > c:
return b
return c
def align(seq1,seq2):
n = len(seq1) #1st dimension
m = len(seq2) #2nd dimension
e=-10 # gap cost
i=0 # increment 1st dimension
j=0 # increment 2nd dimension
#F = zeros((n+1,m+1))
F = [[0 for x in range(m+1)] for y in range(n+1)]
# init uper row
for i in range(1,n+1):
F[i][0] = e * i
# init left column
for j in range(1,m+1):
F[0][j] = e * j
#compute f matrix
for i in range(1,n+1):
# PULLED THIS OUT FROM THE INNER LOOP.
seq1m = seq1[i - 1]
for j in range(1,m+1):
F[i][j] = max(F[i-1][j-1]+score(seq1m, seq2[j-1]), F[i][j-1]+e, F[i-1][j]+e)
# backtrack from bottom right
i=n
j=m
align1=""
align2=""
while i > 0 and j > 0:
Score = F[i][j]
ScoreDiag = F[i-1][j-1]
ScoreUp = F[i][j-1]
ScoreLeft = F[i-1][j]
# Score diagonal
if F[i][j] == ScoreDiag + score(seq1[i-1], seq2[j-1]):
align1=seq1[i-1]+align1
align2=seq2[j-1]+align2
i=i-1
j=j-1
# Score left
elif F[i][j] == ScoreLeft + e:
align1=seq1[i-1]+align1
align2="-"+align2
i=i-1
# Score up
elif F[i][j] == ScoreUp + e:
align1="-"+align1
align2=seq2[j-1]+align2
j=j-1
# seq1 longer
while i > 0:
align1=seq1[i-1]+align1
align2="-"+align2
i=i-1
# seq2 (genome) longer
while j > 0:
align1="-"+align1
align2=seq2[j-1]+align2
j=j-1
return align1, align2
def imain():
return align("tagtagtagactccgagatagagaagtctactaaaaatggagaaatacacagagatccacaatagaatgagagaatgtgtgccaggggaggtatctgcagtagagtgcctggatctattagatcggttttacgctgtgagacatgatgttgtggatcagatgataaagcatgattggtctgacaacaaagacaaagaacagcctataggccatgtcttgctgatggcaggagtaccgaatgaagtcatacaaggtatggaaaagaagatcatacctggaagtcctagtgggcagattctgagatcattttttaagatgacaccagataactactacagggagtttaattgagtttattgaggtgacagttacagctgatgtggcaagaggaacacgtgagaaaatcttgaagtatcaagcaggtctagagtacatagaacagttgctgcatcaggagtcagaaagaggcaatctgccaggtggttatagaataaagtttgatgtagtggcagtgaggactgatggctcaaatatatcaacacaatggccaagtcaacgaaatgagggtgtcgttcaaacaatgaggttaattcaggcagatatcaattacgtgagagaacatttaataaaaaatgatgagagatcagcactagaagccatgttcaatttaaaattccatgtcagtggacctaaagccaggacctttgacattcctgattatcggccccaacaactgtgtaacccaaacatagataatctcttgaactactgtaaaaattggttgacacgagagcatgaatttgcctttgatgaggttaaagggcaaagagttttcaacatatttgaagcagaagaaattaagcataaagaaagatacaatccatcacgtaaaccaaggaattttttattgatacaaggaaccgttcaaggtccatacttgcccaactatagcatccgaccagtacgatacaaaagttggctgtctggagatattaaaaaaccatcctgaaactccgatacaaatattagctagagatatggcattgaaatatataatgttggataaagatgaccttataaactactataaccctagagcatatttcaagcaaactgctaatataaaggaacctggcacttttaagctgaatttatcatcaatggatcctaaggccaaggcactgttagatgttatctcaaagaactcaaaaaaaggtgtgtttggtgaggtaattgatagtatagagataagtagtttaatacagcaaaatgagtgttctaaagtcatagaaaagatcctatctgatttggagatcaatgttggtgaaacaagccaaggcctagataatcccaagaggacaacaggtgttgatgatatcttaaagaagttttatgacaatgagttggtcaagtatatgttacatatagtaagaaagacaacagcatggcatatggggcatttgttaagagatataacagaatgtctaattgcacatgctggcttgaaaagatcaaagtactggtcaatacatgggttctctcatggtggaatattattgatgatcttgccatcaaaatcactagaagttgcaggttcttatattcggttttttactgtttttaaggatggattaggtttaattgactatgaaaacttagattcaactgtggtaattgatggtgtaagttggtgtttcagcaaggttatgagtttagacctaaatcgactattggctctgaatatatcttttgagaagacactattagcaacagctacatggtttcaatactatacagaggaccaaggtcactttcccttacaacatgcattgaggtctgtttttgcattccactttttactgactgacacaaaagatgaaattatgtgctatctttgataatttaagatatctaatccctgcagtgacctcgctttattcaggatataaaccgttgattgtaaagttttttgaaaggccatttaagagtgcacttgatgtatacctgtatactattataaaaacactacttgtaagccttgcacaaaataataaaatccgcttctattcaaaagttcggctattggggcttacagtagaccaatcaaccattggtgcaagtggagtttacccttctttaatgtcaagagtggtttataaacattataaaagtttaatctcagaagcgactacctgttttttcttatttgagaagggtcttcatggcaatcttactgaggaagcaaaaatacatttagagacagtcgaatgggctaggaaatttagtgacaaagagaaggcttatggtgcatatattatggaggaaggttacacaataaaggatgtagtagatggcaatattccagtagaacaacaattattttgtcaagaagtagtagagttatctgcaatggaattaaatacatatttggaagccaagtcacaagtgatggcagctaacataatgaacaaacactgggatcgtccatatttcagccaaacaaggaatattagcttaaaaggtatgtcaggcgcattacaagaagatggtcatttatcagctagtgttacattaattgaagcaatacgttttttgaatcaatctcaacaaaacccctcagtgttggagatgtatgaacaaactaagaggcagaaggctatggcacggattgttagaaaataccagagaacagaagctgatcgtggattttttataactactttaccaaccagagtgagattagagataatagaggattattttgatgcaatagctaaagtagtccctgaagagtatatatcatatggaggagagcggaagatattaaatattcaacaggcacttgaaaaggcattaagatgggcttcaggtgaaagtgagattcaaattagtatgggacaggtcattaaactaaaacggaagttaatgtatgttagtgcagatgcaacaaaatggtccccaggagataactctgcgaaattccgaagattcacccaggcattacacgatggtctacgagatgataaattaaagaggtgtgtagtggatgccttacggaatatatatgaaactgattttttcatgtctaggaagttacatcgatatatcgatggtatggatgatttatctgaatttgtagaagattttttatcattctttccaaataaagtatctgctgctatcaaaggaaattggctgcaaggaaatctaaacaag", "tagtagtagactccgagatagagaagtctactaaaaatggagaaatacacagagatcatagaatgagagaatgtgtgccaggggaggtatctgcagtagagtgcctggatctattagatcggttttacgctgtgagacatgatgttgtggatcagatgataaagcatgattggtctgacaacaaagacaaagaacagcctataggccatgtcttgctgatggcaggagtaccgaatgaagtcatacaaggtatggaaaagaagatcatacctggaagtcctagtgggcagattctgagatcattttttaagatgacaccagataactacaagattacagggagtttaattgagtttattgaggtgacagttacagctgatgtggcaagaggaacacgtgagaaaatcttgaagtatcaagcaggtctagagtacatagaacagttgctgcatcaggagtcagaaagaggcaatctgccaggtggttatagaataaagtttgatgtagtggcagtgaggactgatggctcaaatatatcaacacaatggccaagtcaacgaaatgagggtgtcgttcaaacaatgaggttaattcaggcagatcaattacgtgagagaacatttaataaaaaatgatgagagatcagcactagaagccatgttcaatttaaaattccatgtcagtggacctaaagccaggacctttgacattcctgattatcggccccaacaactgtgtaacccaaacatagataatctcttgaactactgtaaaaattggttgacacgagagcatgaatttgcctttgatgaggttaaagggcaaagagttttcaacatatttgaagcagaagaaattaagcataaagaaagatacaatccatcacgtaaaccaaggaattttttattgatacaaggaaccgttcaaggtccatacttgccctcaactatagcatccgaccagtacgatacaaaagttggctgtctggagatattaaaaaaccatcctgaaactccgatacaaatattagctagagatatggcattgaaatatataatgttggataaagatgaccttataaactactataaccctagagcatatttcaagcaaactgctaatataaaggaacctggcacttttaagctgaatttatcatcaatggatcctaaggccaaggcactgttagatgttatctcaaagaactcaaaaaaaggtgtgtttggtgaggtaattgatagtatagagataagtagtttaatacagcaaaatgagtgttctaaagtcatagaaaagatcctatctgatttggagatcaatgttggtgaaacaagccaaggcctagataatcccaagaggacaacaggtgttgatgatatcttaaagaagttttatgacaatgagttggtcaagtatatgttacatatagtaagaaagacaacagcatggcatatggggcatttgttaagagatataacagaatgtctaattgcacatgctggcttgaaaagatcaaagtactggtcaatacatgggttctctcatggtggaatattattgatgatcttgccatcaaaatcactagaagttgcaggttcttatattcggttttttactgtttttaaggatggattaggtttaattgactatgaaaacttagattcaactgtggtaattgatggtgtaagttggtgtttcagcaaggttatgagtttagacctaaatcgactattggctctgaatatatcttttgagaagacactattagcaacagctacatggtttcaatactatacagaggaccaaggtcactttcccttacaacatgcattgaggtctgtttttgcattccactttttactgactgacacaaaagatgaaattatgtgctatctttgataatttaagatatctaatccctgcagtgacctcgctttattcaggatataaaccgttgattgtaaagttttttgaaaggccatttaagagtgcacttgatgtatacctgtatactattataaaaacactacttgtaagccttgcacaaaataataaaatccgcttctattcaaaagttcggctattggggcttacagtagaccaatcaaccattggtgcaagtggagtttacccttctttaatgtcaagagtggtttataaacattataaaagtttaatctcagaagcgactacctgttttttcttatttgagaagggtcttcatggcaatcttactgaggaagcaaaaatacatttagagacagtcgaatgggctaggaaatttagtgacaaagagaaggcttatggtgcatatattatggaggaaggttacacaataaaggatgtagtagatggcaatattccagtagaacaacaattattttgtcaagaagtagtagagttatctgcaatggaattaaatacatatttggaagccaagtcacaagtgatggcagctaacataatgaacaaacactgggatcgtccatatttcagccaaacaaggaatattagcttaaaaggtatgtcaggcgcattacaagaagatggtcatttatcagctagtgttacattaattgaagcaatacgttttttgaatcaatctcaacaaaacccctcagtgttggagatgtatgaacaaactaagaggcagaaggctatggcacggattgttagaaaataccagagaacagaagctgatcgtggattttttataactactttaccaaccagagtgagattagagataatagaggattattttgatgcaatagctaaagtagtccctgaagagtatatatcatatggaggagagcggaagatattaaatattcaacaggcacttgaaaaggcattaagatgggcttcaggtgaaagtgagattcaaattagtatgggacaggtcattaaactaaaacggaagttaatgtatgttagtgcagatgcaacaaaatggtccccaggagataactctgcgaaattccgaagattcacccaggcattacacgatggtctacgagatgataaattaaagaggtgtgtagtggatgccttacggaatatatatgaaactgattttttcatgtctaggaagttacatcgatatatcgatggtatggatgatttatctgaatttgtagaagattttttatcattctttccaaataaagtatctgctgctatcaaaggaaattggctgcaaggaaatctaaacaag")
if __name__ == "__main__":
print ",".join(imain())