• API
• FAQ
• Tools
• Archive
daily pastebin goal
45%
SHARE
TWEET

# Untitled

a guest Feb 13th, 2018 78 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
2. import itertools
3.
4. def ScanAndScoreMotif(DNA, motif, minTotalHammingDist):
5.     totalDist = 0
6.     bestAlignment = []
7.     k = len(motif)
8.     for seq in DNA:
9.         minHammingDist = k+1
10.         for s in xrange(len(seq)-k+1):
11.             HammingDist = sum([1 for i in xrange(k) if motif[i] != seq[s+i]])
12.             if (HammingDist < minHammingDist):
13.                 bestS = s
14.                 minHammingDist = HammingDist
15.         bestAlignment.append(bestS)
16.         totalDist += minHammingDist
17.
18.         if totalDist > minTotalHammingDist:
19.             return [], k*len(DNA)
20.
21.     return bestAlignment, totalDist
22.
23. def BranchAndBoundMedianStringMotifSearch(DNA,k):
24.     """ Consider all possible 4**k motifs"""
25.     bestAlignment = []
26.     minHammingDist = k*len(DNA)
27.     kmer = ''
28.     bestPrefixes = []
29.     if(k == 1):
30.         for pattern in itertools.product('acgt', repeat= (1)):
31.             motif = ''.join(pattern)
32.             align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
33.             if(dist < minHammingDist + 12):
34.                 bestPrefixes.append(motif)
35.             if (dist < minHammingDist):
36.                 bestAlignment = [s for s in align]
37.                 minHammingDist = dist
38.                 kmer = motif
39.         return bestAlignment, minHammingDist, kmer
40.     if(k == 2):
41.         for pattern in itertools.product('acgt', repeat= (2)):
42.             motif = ''.join(pattern)
43.             align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
44.             if(dist < minHammingDist + 12):
45.                 bestPrefixes.append(motif)
46.             if (dist < minHammingDist):
47.                 bestAlignment = [s for s in align]
48.                 minHammingDist = dist
49.                 kmer = motif
50.         return bestAlignment, minHammingDist, kmer
51.     if(k == 3):
52.         for pattern in itertools.product('acgt', repeat= (3)):
53.             motif = ''.join(pattern)
54.             align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
55.             if(dist < minHammingDist + 12):
56.                 bestPrefixes.append(motif)
57.             if (dist < minHammingDist):
58.                 bestAlignment = [s for s in align]
59.                 minHammingDist = dist
60.                 kmer = motif
61.         return bestAlignment, minHammingDist, kmer
62.     for pattern in itertools.product('acgt', repeat= (4)):
63.         motif = ''.join(pattern)
64.         align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
65.         if(dist < minHammingDist + 12):
66.             bestPrefixes.append(motif)
67.         if (dist < minHammingDist):
68.             bestAlignment = [s for s in align]
69.             minHammingDist = dist
70.             kmer = motif
71.     bestPrefixesNext = []
72.     for n in range (5, k+1):
73.         minHammingDist = k*len(DNA)
74.         bestAlignment = []
75.         kmer = ''
76.         for pattern in bestPrefixes:
77.             motif = ''.join(pattern)
78.             for base in ["a", "c", "t", "g"]:
79.                 motif = ''.join(pattern)
80.                 motif = motif + base
81.                 align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
82.                 if(dist < minHammingDist + 4*n):
83.                     bestPrefixesNext.append(motif)
84.                 if (dist < minHammingDist):
85.                     bestAlignment = [s for s in align]
86.                     minHammingDist = dist
87.                     kmer = motif
88.                 motif = ''.join(pattern)
89.                 motif = base + motif
90.                 align, dist = ScanAndScoreMotif(DNA, motif, minHammingDist)
91.                 if(dist < minHammingDist + 4*n):
92.                     bestPrefixesNext.append(motif)
93.                 if (dist < minHammingDist):
94.                     bestAlignment = [s for s in align]
95.                     minHammingDist = dist
96.                     kmer = motif
97.         bestPrefixes = bestPrefixesNext
98.         bestPrefixesNext = []
99.     return bestAlignment, minHammingDist, kmer
100.
101. seqApprox = [
102.     'tagtggtcttttgagtgtagatctgaagggaaagtatttccaccagttcggggtcacccagcagggcagggtgacttaat',
103.     'cgcgactcggcgctcacagttatcgcacgtttagaccaaaacggagttggatccgaaactggagtttaatcggagtcctt',
104.     'gttacttgtgagcctggttagacccgaaatataattgttggctgcatagcggagctgacatacgagtaggggaaatgcgt',
105.     'aacatcaggctttgattaaacaatttaagcacgtaaatccgaattgacctgatgacaatacggaacatgccggctccggg',
106.     'accaccggataggctgcttattaggtccaaaaggtagtatcgtaataatggctcagccatgtcaatgtgcggcattccac',
107.     'tagattcgaatcgatcgtgtttctccctctgtgggttaacgaggggtccgaccttgctcgcatgtgccgaacttgtaccc',
108.     'gaaatggttcggtgcgatatcaggccgttctcttaacttggcggtgcagatccgaacgtctctggaggggtcgtgcgcta',
109.     'atgtatactagacattctaacgctcgcttattggcggagaccatttgctccactacaagaggctactgtgtagatccgta',
110.     'ttcttacacccttctttagatccaaacctgttggcgccatcttcttttcgagtccttgtacctccatttgctctgatgac',
111.     'ctacctatgtaaaacaacatctactaacgtagtccggtctttcctgatctgccctaacctacaggtcgatccgaaattcg']
112.
113. %time BranchAndBoundMedianStringMotifSearch(seqApprox,10)
114. #([17, 47, 18, 33, 21, 0, 46, 70, 16, 65], 11, 'tagatccgaa')
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top