Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- number_to_dna = {0: "A", 1: "C", 2: "G", 3: "T"}
- dna_to_number = {"A": 0, "C": 1, "G": 2, "T": 3}
- cashe_number = dict()
- cashe_pattern = dict()
- def FindFreqWordsBySort(text, k):
- freq_pattern = set()
- l = len(text)
- index = [0] * (l - k + 1)
- count = [0] * (l - k + 1)
- for i in range(l - k):
- pattern = text[i : i + k]
- index[i] = PatternToNumber(pattern)
- count[i] = 1
- SortedIndex = sorted(index)
- for i in range(1, l - k):
- if SortedIndex[i] == SortedIndex[i-1]:
- count[i] = count[i-1] + 1
- maxCount = max(count)
- for i in range(l - k):
- if count[i] == maxCount:
- freq_pattern.add(NumberToPattern(SortedIndex[i], k))
- return freq_pattern
- import time
- #-------------------------------------------
- def NumberToPattern(index,k):
- if index in cashe_pattern:
- return cashe_pattern[index]
- result = ""
- while index >= 4:
- result += number_to_dna[index % 4]
- index //= 4
- result += number_to_dna[index]
- l = len(result)
- result += "A"*(k-l)
- cashe_pattern[index] = result[::-1]
- cashe_number[result[::-1]] = index
- return result[::-1]
- def PatternToNumber(pattern):
- if pattern in cashe_number:
- return cashe_number[pattern]
- result = 0
- k = -1
- for char in pattern[::-1]:
- k += 1
- result += dna_to_number[char] * (4**k)
- cashe_number[pattern] = result
- cashe_pattern[result] = pattern
- return result
- #-------------------------------------------
- print(FindFreqWordsBySort("ACGTTGCATGTCGCATGATGCATGAGAGCT", 4))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement