Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- def is_valid(dna):
- if len(dna) < 10 or len(dna) > 100:
- return False
- reg = re.compile('^[ATGC]+$')
- if not reg.match(dna):
- return False
- return True
- def analyze_dna(strands, codon_mapping):
- valid_list = []
- for dna in strands:
- if is_valid(dna):
- valid_list.append(dna)
- if len(valid_list) < 3:
- return ''
- ss = build_long_strand(valid_list)
- m = len(ss)
- if m % 3 != 0:
- return ''
- i = 0
- dd = {}
- while i < m:
- s = ss[i:i+3]
- if s not in codon_mapping:
- return ''
- if codon_mapping[s] not in dd:
- dd[codon_mapping[s]] = 1
- else:
- dd[codon_mapping[s]] += 1
- i+=3
- lst = [(k, v) for k, v in dd]
- lst.sort()
- for k, v in lst:
- res += k
- res += ': '
- res += str(v)
- res += '\n'
- res = res[:-1]
- return res
- def build_long_strand(valid_list):
- n = len(valid_list)
- result = []
- visited = [False]*n
- long_strand = ''
- if dfs_helper(n, result, visited, valid_list):
- long_strand = result[0]
- for i in range(1,len(result)):
- long_strand += result[i][3:]
- return long_strand
- def dfs_helper(n, result, visited, valid_list):
- if n == 0:
- return True
- for i in range(len(visited)):
- if not visited[i]:
- if not result or result[-1][-3:] == valid_list[i][:3]:
- result.append(valid_list[i])
- visited[i] = True
- if dfs_helper(n-1, result, visited, valid_list):
- return True
- visited[i] = False
- result.pop(-1)
- return False
- a = ['AGTGGGGGGGGG', 'AAACCCAATTT', 'TTTACACAGCT', 'GCTGGGCCCAGT']
- print(build_long_strand(a))
Add Comment
Please, Sign In to add comment