Share Pastebin
Guest
Public paste!

Untitled

By: a guest | Feb 24th, 2008 | Syntax: Python | Size: 9.07 KB | Hits: 147 | Expires: Never
Copy text to clipboard
  1. #!/usr/bin/perl
  2.  
  3.  ##
  4.  #   Alignment computes a global alignment with 2 sequences.
  5.  #  Copyright (C) 2007 M Fourment
  6.  #
  7.  #   This program is free software: you can redistribute it and/or modify
  8.  #   it under the terms of the GNU General Public License as published by
  9.  #   the Free Software Foundation, either version 3 of the License, or
  10.  #   (at your option) any later version.
  11.  #
  12.  #   This program is distributed in the hope that it will be useful,
  13.  #   but WITHOUT ANY WARRANTY; without even the implied warranty of
  14.  #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15.  #   GNU General Public License for more details.
  16.  #
  17.  #   You should have received a copy of the GNU General Public License
  18.  #   along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19.  ##
  20.  ##
  21.  # File:   alignment.py
  22.  # Author: mathieu
  23.  ##
  24.  
  25. def score(a, b):
  26.     if a == b:
  27.         return 5
  28.     return - 4
  29.  
  30. def max(a,b,c):
  31.     if a > b:
  32.         if a > c:
  33.             return a
  34.         return c
  35.     if b > c:
  36.         return b
  37.     return c
  38.  
  39.  
  40. def align(seq1,seq2):
  41.     n = len(seq1) #1st dimension
  42.     m = len(seq2) #2nd dimension
  43.     e=-10 # gap cost
  44.     i=0 # increment 1st dimension
  45.     j=0 # increment 2nd dimension
  46.     #F = zeros((n+1,m+1))
  47.     F = [[0 for x in range(m+1)] for y in range(n+1)]
  48.  
  49.  
  50.   # init uper row
  51.     for i in range(1,n+1):
  52.         F[i][0] = e * i
  53.    
  54.   # init left column
  55.     for j in range(1,m+1):
  56.         F[0][j] = e * j
  57.    
  58.   #compute f matrix
  59.     for i in range(1,n+1):
  60.         # PULLED THIS OUT FROM THE INNER LOOP.
  61.         seq1m = seq1[i - 1]
  62.         for j in range(1,m+1):
  63.             F[i][j] = max(F[i-1][j-1]+score(seq1m, seq2[j-1]), F[i][j-1]+e, F[i-1][j]+e)
  64.  
  65.   # backtrack from bottom right
  66.     i=n
  67.     j=m
  68.     align1=""
  69.     align2=""
  70.  
  71.     while i > 0 and j > 0:
  72.         Score = F[i][j]
  73.         ScoreDiag = F[i-1][j-1]
  74.         ScoreUp = F[i][j-1]
  75.         ScoreLeft = F[i-1][j]
  76.  
  77.     # Score diagonal
  78.         if F[i][j] == ScoreDiag + score(seq1[i-1], seq2[j-1]):
  79.             align1=seq1[i-1]+align1
  80.             align2=seq2[j-1]+align2
  81.             i=i-1
  82.             j=j-1
  83.  
  84.     # Score left
  85.         elif F[i][j] == ScoreLeft + e:
  86.             align1=seq1[i-1]+align1
  87.             align2="-"+align2
  88.             i=i-1
  89.       # Score up
  90.         elif F[i][j] == ScoreUp + e:
  91.             align1="-"+align1
  92.             align2=seq2[j-1]+align2
  93.             j=j-1
  94.    
  95.   # seq1 longer
  96.     while i > 0:
  97.         align1=seq1[i-1]+align1
  98.         align2="-"+align2
  99.         i=i-1
  100.  
  101.   # seq2 (genome) longer
  102.     while j > 0:
  103.         align1="-"+align1
  104.         align2=seq2[j-1]+align2
  105.         j=j-1
  106.  
  107.     return align1, align2
  108.    
  109.  
  110. def imain():
  111.     return align("tagtagtagactccgagatagagaagtctactaaaaatggagaaatacacagagatccacaatagaatgagagaatgtgtgccaggggaggtatctgcagtagagtgcctggatctattagatcggttttacgctgtgagacatgatgttgtggatcagatgataaagcatgattggtctgacaacaaagacaaagaacagcctataggccatgtcttgctgatggcaggagtaccgaatgaagtcatacaaggtatggaaaagaagatcatacctggaagtcctagtgggcagattctgagatcattttttaagatgacaccagataactactacagggagtttaattgagtttattgaggtgacagttacagctgatgtggcaagaggaacacgtgagaaaatcttgaagtatcaagcaggtctagagtacatagaacagttgctgcatcaggagtcagaaagaggcaatctgccaggtggttatagaataaagtttgatgtagtggcagtgaggactgatggctcaaatatatcaacacaatggccaagtcaacgaaatgagggtgtcgttcaaacaatgaggttaattcaggcagatatcaattacgtgagagaacatttaataaaaaatgatgagagatcagcactagaagccatgttcaatttaaaattccatgtcagtggacctaaagccaggacctttgacattcctgattatcggccccaacaactgtgtaacccaaacatagataatctcttgaactactgtaaaaattggttgacacgagagcatgaatttgcctttgatgaggttaaagggcaaagagttttcaacatatttgaagcagaagaaattaagcataaagaaagatacaatccatcacgtaaaccaaggaattttttattgatacaaggaaccgttcaaggtccatacttgcccaactatagcatccgaccagtacgatacaaaagttggctgtctggagatattaaaaaaccatcctgaaactccgatacaaatattagctagagatatggcattgaaatatataatgttggataaagatgaccttataaactactataaccctagagcatatttcaagcaaactgctaatataaaggaacctggcacttttaagctgaatttatcatcaatggatcctaaggccaaggcactgttagatgttatctcaaagaactcaaaaaaaggtgtgtttggtgaggtaattgatagtatagagataagtagtttaatacagcaaaatgagtgttctaaagtcatagaaaagatcctatctgatttggagatcaatgttggtgaaacaagccaaggcctagataatcccaagaggacaacaggtgttgatgatatcttaaagaagttttatgacaatgagttggtcaagtatatgttacatatagtaagaaagacaacagcatggcatatggggcatttgttaagagatataacagaatgtctaattgcacatgctggcttgaaaagatcaaagtactggtcaatacatgggttctctcatggtggaatattattgatgatcttgccatcaaaatcactagaagttgcaggttcttatattcggttttttactgtttttaaggatggattaggtttaattgactatgaaaacttagattcaactgtggtaattgatggtgtaagttggtgtttcagcaaggttatgagtttagacctaaatcgactattggctctgaatatatcttttgagaagacactattagcaacagctacatggtttcaatactatacagaggaccaaggtcactttcccttacaacatgcattgaggtctgtttttgcattccactttttactgactgacacaaaagatgaaattatgtgctatctttgataatttaagatatctaatccctgcagtgacctcgctttattcaggatataaaccgttgattgtaaagttttttgaaaggccatttaagagtgcacttgatgtatacctgtatactattataaaaacactacttgtaagccttgcacaaaataataaaatccgcttctattcaaaagttcggctattggggcttacagtagaccaatcaaccattggtgcaagtggagtttacccttctttaatgtcaagagtggtttataaacattataaaagtttaatctcagaagcgactacctgttttttcttatttgagaagggtcttcatggcaatcttactgaggaagcaaaaatacatttagagacagtcgaatgggctaggaaatttagtgacaaagagaaggcttatggtgcatatattatggaggaaggttacacaataaaggatgtagtagatggcaatattccagtagaacaacaattattttgtcaagaagtagtagagttatctgcaatggaattaaatacatatttggaagccaagtcacaagtgatggcagctaacataatgaacaaacactgggatcgtccatatttcagccaaacaaggaatattagcttaaaaggtatgtcaggcgcattacaagaagatggtcatttatcagctagtgttacattaattgaagcaatacgttttttgaatcaatctcaacaaaacccctcagtgttggagatgtatgaacaaactaagaggcagaaggctatggcacggattgttagaaaataccagagaacagaagctgatcgtggattttttataactactttaccaaccagagtgagattagagataatagaggattattttgatgcaatagctaaagtagtccctgaagagtatatatcatatggaggagagcggaagatattaaatattcaacaggcacttgaaaaggcattaagatgggcttcaggtgaaagtgagattcaaattagtatgggacaggtcattaaactaaaacggaagttaatgtatgttagtgcagatgcaacaaaatggtccccaggagataactctgcgaaattccgaagattcacccaggcattacacgatggtctacgagatgataaattaaagaggtgtgtagtggatgccttacggaatatatatgaaactgattttttcatgtctaggaagttacatcgatatatcgatggtatggatgatttatctgaatttgtagaagattttttatcattctttccaaataaagtatctgctgctatcaaaggaaattggctgcaaggaaatctaaacaag", "tagtagtagactccgagatagagaagtctactaaaaatggagaaatacacagagatcatagaatgagagaatgtgtgccaggggaggtatctgcagtagagtgcctggatctattagatcggttttacgctgtgagacatgatgttgtggatcagatgataaagcatgattggtctgacaacaaagacaaagaacagcctataggccatgtcttgctgatggcaggagtaccgaatgaagtcatacaaggtatggaaaagaagatcatacctggaagtcctagtgggcagattctgagatcattttttaagatgacaccagataactacaagattacagggagtttaattgagtttattgaggtgacagttacagctgatgtggcaagaggaacacgtgagaaaatcttgaagtatcaagcaggtctagagtacatagaacagttgctgcatcaggagtcagaaagaggcaatctgccaggtggttatagaataaagtttgatgtagtggcagtgaggactgatggctcaaatatatcaacacaatggccaagtcaacgaaatgagggtgtcgttcaaacaatgaggttaattcaggcagatcaattacgtgagagaacatttaataaaaaatgatgagagatcagcactagaagccatgttcaatttaaaattccatgtcagtggacctaaagccaggacctttgacattcctgattatcggccccaacaactgtgtaacccaaacatagataatctcttgaactactgtaaaaattggttgacacgagagcatgaatttgcctttgatgaggttaaagggcaaagagttttcaacatatttgaagcagaagaaattaagcataaagaaagatacaatccatcacgtaaaccaaggaattttttattgatacaaggaaccgttcaaggtccatacttgccctcaactatagcatccgaccagtacgatacaaaagttggctgtctggagatattaaaaaaccatcctgaaactccgatacaaatattagctagagatatggcattgaaatatataatgttggataaagatgaccttataaactactataaccctagagcatatttcaagcaaactgctaatataaaggaacctggcacttttaagctgaatttatcatcaatggatcctaaggccaaggcactgttagatgttatctcaaagaactcaaaaaaaggtgtgtttggtgaggtaattgatagtatagagataagtagtttaatacagcaaaatgagtgttctaaagtcatagaaaagatcctatctgatttggagatcaatgttggtgaaacaagccaaggcctagataatcccaagaggacaacaggtgttgatgatatcttaaagaagttttatgacaatgagttggtcaagtatatgttacatatagtaagaaagacaacagcatggcatatggggcatttgttaagagatataacagaatgtctaattgcacatgctggcttgaaaagatcaaagtactggtcaatacatgggttctctcatggtggaatattattgatgatcttgccatcaaaatcactagaagttgcaggttcttatattcggttttttactgtttttaaggatggattaggtttaattgactatgaaaacttagattcaactgtggtaattgatggtgtaagttggtgtttcagcaaggttatgagtttagacctaaatcgactattggctctgaatatatcttttgagaagacactattagcaacagctacatggtttcaatactatacagaggaccaaggtcactttcccttacaacatgcattgaggtctgtttttgcattccactttttactgactgacacaaaagatgaaattatgtgctatctttgataatttaagatatctaatccctgcagtgacctcgctttattcaggatataaaccgttgattgtaaagttttttgaaaggccatttaagagtgcacttgatgtatacctgtatactattataaaaacactacttgtaagccttgcacaaaataataaaatccgcttctattcaaaagttcggctattggggcttacagtagaccaatcaaccattggtgcaagtggagtttacccttctttaatgtcaagagtggtttataaacattataaaagtttaatctcagaagcgactacctgttttttcttatttgagaagggtcttcatggcaatcttactgaggaagcaaaaatacatttagagacagtcgaatgggctaggaaatttagtgacaaagagaaggcttatggtgcatatattatggaggaaggttacacaataaaggatgtagtagatggcaatattccagtagaacaacaattattttgtcaagaagtagtagagttatctgcaatggaattaaatacatatttggaagccaagtcacaagtgatggcagctaacataatgaacaaacactgggatcgtccatatttcagccaaacaaggaatattagcttaaaaggtatgtcaggcgcattacaagaagatggtcatttatcagctagtgttacattaattgaagcaatacgttttttgaatcaatctcaacaaaacccctcagtgttggagatgtatgaacaaactaagaggcagaaggctatggcacggattgttagaaaataccagagaacagaagctgatcgtggattttttataactactttaccaaccagagtgagattagagataatagaggattattttgatgcaatagctaaagtagtccctgaagagtatatatcatatggaggagagcggaagatattaaatattcaacaggcacttgaaaaggcattaagatgggcttcaggtgaaagtgagattcaaattagtatgggacaggtcattaaactaaaacggaagttaatgtatgttagtgcagatgcaacaaaatggtccccaggagataactctgcgaaattccgaagattcacccaggcattacacgatggtctacgagatgataaattaaagaggtgtgtagtggatgccttacggaatatatatgaaactgattttttcatgtctaggaagttacatcgatatatcgatggtatggatgatttatctgaatttgtagaagattttttatcattctttccaaataaagtatctgctgctatcaaaggaaattggctgcaaggaaatctaaacaag")
  112.  
  113. if __name__ == "__main__":
  114.     print ",".join(imain())