Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Enter your program here
- import itertools
- def ScanAndScoreMotif(DNA, motif, minTotalHammingDist):
- totalDist = 0
- bestAlignment = []
- k = len(motif)
- for seq in DNA:
- minHammingDist = k+1
- for s in xrange(len(seq)-k+1):
- HammingDist = sum([1 for i in xrange(k) if motif[i] != seq[s+i]])
- if (HammingDist < minHammingDist):
- bestS = s
- minHammingDist = HammingDist
- bestAlignment.append(bestS)
- totalDist += minHammingDist
- if totalDist > minTotalHammingDist:
- return [], k*len(DNA)
- return bestAlignment, totalDist
- def BranchAndBoundMedianStringMotifSearch(DNA,k):
- """ Consider all possible 4**k motifs"""
- bestAlignment = []
- minHammingDist = k*len(DNA)
- kmer = ''
- bestPrefixes = []
- if(k == 1):
- for pattern in itertools.product('acgt', repeat= (1)):
- motif = ''.join(pattern)
- align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
- if(dist < minHammingDist + 12):
- bestPrefixes.append(motif)
- if (dist < minHammingDist):
- bestAlignment = [s for s in align]
- minHammingDist = dist
- kmer = motif
- return bestAlignment, minHammingDist, kmer
- if(k == 2):
- for pattern in itertools.product('acgt', repeat= (2)):
- motif = ''.join(pattern)
- align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
- if(dist < minHammingDist + 12):
- bestPrefixes.append(motif)
- if (dist < minHammingDist):
- bestAlignment = [s for s in align]
- minHammingDist = dist
- kmer = motif
- return bestAlignment, minHammingDist, kmer
- if(k == 3):
- for pattern in itertools.product('acgt', repeat= (3)):
- motif = ''.join(pattern)
- align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
- if(dist < minHammingDist + 12):
- bestPrefixes.append(motif)
- if (dist < minHammingDist):
- bestAlignment = [s for s in align]
- minHammingDist = dist
- kmer = motif
- return bestAlignment, minHammingDist, kmer
- for pattern in itertools.product('acgt', repeat= (4)):
- motif = ''.join(pattern)
- align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
- if(dist < minHammingDist + 12):
- bestPrefixes.append(motif)
- if (dist < minHammingDist):
- bestAlignment = [s for s in align]
- minHammingDist = dist
- kmer = motif
- bestPrefixesNext = []
- for n in range (5, k+1):
- minHammingDist = k*len(DNA)
- bestAlignment = []
- kmer = ''
- for pattern in bestPrefixes:
- motif = ''.join(pattern)
- for base in ["a", "c", "t", "g"]:
- motif = ''.join(pattern)
- motif = motif + base
- align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
- if(dist < minHammingDist + 4*n):
- bestPrefixesNext.append(motif)
- if (dist < minHammingDist):
- bestAlignment = [s for s in align]
- minHammingDist = dist
- kmer = motif
- motif = ''.join(pattern)
- motif = base + motif
- align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
- if(dist < minHammingDist + 4*n):
- bestPrefixesNext.append(motif)
- if (dist < minHammingDist):
- bestAlignment = [s for s in align]
- minHammingDist = dist
- kmer = motif
- bestPrefixes = bestPrefixesNext
- bestPrefixesNext = []
- return bestAlignment, minHammingDist, kmer
- seqApprox = [
- 'tagtggtcttttgagtgtagatctgaagggaaagtatttccaccagttcggggtcacccagcagggcagggtgacttaat',
- 'cgcgactcggcgctcacagttatcgcacgtttagaccaaaacggagttggatccgaaactggagtttaatcggagtcctt',
- 'gttacttgtgagcctggttagacccgaaatataattgttggctgcatagcggagctgacatacgagtaggggaaatgcgt',
- 'aacatcaggctttgattaaacaatttaagcacgtaaatccgaattgacctgatgacaatacggaacatgccggctccggg',
- 'accaccggataggctgcttattaggtccaaaaggtagtatcgtaataatggctcagccatgtcaatgtgcggcattccac',
- 'tagattcgaatcgatcgtgtttctccctctgtgggttaacgaggggtccgaccttgctcgcatgtgccgaacttgtaccc',
- 'gaaatggttcggtgcgatatcaggccgttctcttaacttggcggtgcagatccgaacgtctctggaggggtcgtgcgcta',
- 'atgtatactagacattctaacgctcgcttattggcggagaccatttgctccactacaagaggctactgtgtagatccgta',
- 'ttcttacacccttctttagatccaaacctgttggcgccatcttcttttcgagtccttgtacctccatttgctctgatgac',
- 'ctacctatgtaaaacaacatctactaacgtagtccggtctttcctgatctgccctaacctacaggtcgatccgaaattcg']
- %time BranchAndBoundMedianStringMotifSearch(seqApprox,10)
- #([17, 47, 18, 33, 21, 0, 46, 70, 16, 65], 11, 'tagatccgaa')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement