Advertisement
Guest User

Untitled

a guest
Mar 27th, 2017
52
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.57 KB | None | 0 0
  1. number_to_dna = {0: "A", 1: "C", 2: "G", 3: "T"}
  2.  
  3. dna_to_number = {"A": 0, "C": 1, "G": 2, "T": 3}
  4. cashe_number = dict()
  5. cashe_pattern = dict()
  6.  
  7. def FindFreqWordsBySort(text, k):
  8. freq_pattern = set()
  9.  
  10. l = len(text)
  11.  
  12. index = [0] * (l - k + 1)
  13. count = [0] * (l - k + 1)
  14.  
  15. for i in range(l - k):
  16. pattern = text[i : i + k]
  17.  
  18. index[i] = PatternToNumber(pattern)
  19.  
  20. count[i] = 1
  21.  
  22. SortedIndex = sorted(index)
  23.  
  24. for i in range(1, l - k):
  25. if SortedIndex[i] == SortedIndex[i-1]:
  26. count[i] = count[i-1] + 1
  27. maxCount = max(count)
  28.  
  29. for i in range(l - k):
  30. if count[i] == maxCount:
  31. freq_pattern.add(NumberToPattern(SortedIndex[i], k))
  32.  
  33. return freq_pattern
  34.  
  35.  
  36. import time
  37.  
  38. #-------------------------------------------
  39.  
  40. def NumberToPattern(index,k):
  41. if index in cashe_pattern:
  42. return cashe_pattern[index]
  43. result = ""
  44.  
  45. while index >= 4:
  46. result += number_to_dna[index % 4]
  47.  
  48. index //= 4
  49.  
  50. result += number_to_dna[index]
  51.  
  52. l = len(result)
  53.  
  54. result += "A"*(k-l)
  55.  
  56. cashe_pattern[index] = result[::-1]
  57. cashe_number[result[::-1]] = index
  58. return result[::-1]
  59.  
  60. def PatternToNumber(pattern):
  61.  
  62. if pattern in cashe_number:
  63. return cashe_number[pattern]
  64.  
  65. result = 0
  66.  
  67. k = -1
  68.  
  69. for char in pattern[::-1]:
  70. k += 1
  71.  
  72. result += dna_to_number[char] * (4**k)
  73.  
  74. cashe_number[pattern] = result
  75. cashe_pattern[result] = pattern
  76. return result
  77.  
  78. #-------------------------------------------
  79.  
  80. print(FindFreqWordsBySort("ACGTTGCATGTCGCATGATGCATGAGAGCT", 4))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement