daily pastebin goal
89%
SHARE
TWEET

Untitled

a guest Feb 13th, 2018 78 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #Enter your program here
  2. import itertools
  3.  
  4. def ScanAndScoreMotif(DNA, motif, minTotalHammingDist):
  5.     totalDist = 0
  6.     bestAlignment = []
  7.     k = len(motif)
  8.     for seq in DNA:
  9.         minHammingDist = k+1
  10.         for s in xrange(len(seq)-k+1):
  11.             HammingDist = sum([1 for i in xrange(k) if motif[i] != seq[s+i]])
  12.             if (HammingDist < minHammingDist):
  13.                 bestS = s
  14.                 minHammingDist = HammingDist
  15.         bestAlignment.append(bestS)
  16.         totalDist += minHammingDist
  17.        
  18.         if totalDist > minTotalHammingDist:
  19.             return [], k*len(DNA)
  20.        
  21.     return bestAlignment, totalDist
  22.  
  23. def BranchAndBoundMedianStringMotifSearch(DNA,k):
  24.     """ Consider all possible 4**k motifs"""
  25.     bestAlignment = []
  26.     minHammingDist = k*len(DNA)
  27.     kmer = ''
  28.     bestPrefixes = []
  29.     if(k == 1):
  30.         for pattern in itertools.product('acgt', repeat= (1)):
  31.             motif = ''.join(pattern)
  32.             align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
  33.             if(dist < minHammingDist + 12):
  34.                 bestPrefixes.append(motif)
  35.             if (dist < minHammingDist):
  36.                 bestAlignment = [s for s in align]
  37.                 minHammingDist = dist
  38.                 kmer = motif
  39.         return bestAlignment, minHammingDist, kmer
  40.     if(k == 2):
  41.         for pattern in itertools.product('acgt', repeat= (2)):
  42.             motif = ''.join(pattern)
  43.             align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
  44.             if(dist < minHammingDist + 12):
  45.                 bestPrefixes.append(motif)
  46.             if (dist < minHammingDist):
  47.                 bestAlignment = [s for s in align]
  48.                 minHammingDist = dist
  49.                 kmer = motif
  50.         return bestAlignment, minHammingDist, kmer
  51.     if(k == 3):
  52.         for pattern in itertools.product('acgt', repeat= (3)):
  53.             motif = ''.join(pattern)
  54.             align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
  55.             if(dist < minHammingDist + 12):
  56.                 bestPrefixes.append(motif)
  57.             if (dist < minHammingDist):
  58.                 bestAlignment = [s for s in align]
  59.                 minHammingDist = dist
  60.                 kmer = motif
  61.         return bestAlignment, minHammingDist, kmer
  62.     for pattern in itertools.product('acgt', repeat= (4)):
  63.         motif = ''.join(pattern)
  64.         align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
  65.         if(dist < minHammingDist + 12):
  66.             bestPrefixes.append(motif)
  67.         if (dist < minHammingDist):
  68.             bestAlignment = [s for s in align]
  69.             minHammingDist = dist
  70.             kmer = motif
  71.     bestPrefixesNext = []
  72.     for n in range (5, k+1):
  73.         minHammingDist = k*len(DNA)
  74.         bestAlignment = []
  75.         kmer = ''
  76.         for pattern in bestPrefixes:
  77.             motif = ''.join(pattern)
  78.             for base in ["a", "c", "t", "g"]:
  79.                 motif = ''.join(pattern)
  80.                 motif = motif + base
  81.                 align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
  82.                 if(dist < minHammingDist + 4*n):
  83.                     bestPrefixesNext.append(motif)
  84.                 if (dist < minHammingDist):
  85.                     bestAlignment = [s for s in align]
  86.                     minHammingDist = dist
  87.                     kmer = motif
  88.                 motif = ''.join(pattern)
  89.                 motif = base + motif
  90.                 align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
  91.                 if(dist < minHammingDist + 4*n):
  92.                     bestPrefixesNext.append(motif)
  93.                 if (dist < minHammingDist):
  94.                     bestAlignment = [s for s in align]
  95.                     minHammingDist = dist
  96.                     kmer = motif
  97.         bestPrefixes = bestPrefixesNext
  98.         bestPrefixesNext = []
  99.     return bestAlignment, minHammingDist, kmer
  100.  
  101. seqApprox = [
  102.     'tagtggtcttttgagtgtagatctgaagggaaagtatttccaccagttcggggtcacccagcagggcagggtgacttaat',
  103.     'cgcgactcggcgctcacagttatcgcacgtttagaccaaaacggagttggatccgaaactggagtttaatcggagtcctt',
  104.     'gttacttgtgagcctggttagacccgaaatataattgttggctgcatagcggagctgacatacgagtaggggaaatgcgt',
  105.     'aacatcaggctttgattaaacaatttaagcacgtaaatccgaattgacctgatgacaatacggaacatgccggctccggg',
  106.     'accaccggataggctgcttattaggtccaaaaggtagtatcgtaataatggctcagccatgtcaatgtgcggcattccac',
  107.     'tagattcgaatcgatcgtgtttctccctctgtgggttaacgaggggtccgaccttgctcgcatgtgccgaacttgtaccc',
  108.     'gaaatggttcggtgcgatatcaggccgttctcttaacttggcggtgcagatccgaacgtctctggaggggtcgtgcgcta',
  109.     'atgtatactagacattctaacgctcgcttattggcggagaccatttgctccactacaagaggctactgtgtagatccgta',
  110.     'ttcttacacccttctttagatccaaacctgttggcgccatcttcttttcgagtccttgtacctccatttgctctgatgac',
  111.     'ctacctatgtaaaacaacatctactaacgtagtccggtctttcctgatctgccctaacctacaggtcgatccgaaattcg']
  112.  
  113. %time BranchAndBoundMedianStringMotifSearch(seqApprox,10)
  114. #([17, 47, 18, 33, 21, 0, 46, 70, 16, 65], 11, 'tagatccgaa')
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top