Advertisement
furas

Python - find matching - (Stackoverflow)

Feb 18th, 2022 (edited)
752
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # matching - Finding identical copies in Python - Stackoverflow
  2. # https://stackoverflow.com/questions/71177696/finding-identical-copies-in-python
  3.  
  4. # find matching not overlapping substrings
  5.  
  6. # Still don't know what to do with
  7. # - `ababa aba` --> it should be `"aba":[0,2]` or `"aba":[0,6]` or pairs `"aba":[(0,2), (0,6)]`
  8. # - `aba ababa` --> it should be `"aba":[0,3]` or `"aba":[4,6]` or pairs `"aba":[(0,3), (4,6)]`
  9.  
  10. import pprint
  11.  
  12. def find_matches(text):
  13.     results = dict()
  14.    
  15.     full_len = len(text)
  16.     max_len = full_len//2
  17.     #print('max_len:', max_len)
  18.    
  19.     # get different lengths
  20.     for length in range(1, max_len+1):
  21.         #print('--- length:', length, '---')
  22.        
  23.         # get substrings starting in different places
  24.         for start in range(0, len(text)-length):
  25.  
  26.             pattern = text[start:start+length]
  27.             #print('pattern:', pattern)
  28.  
  29.             # search only if pattern wasn't search before
  30.             if pattern not in results:
  31.                 results[pattern] = [start]
  32.                                
  33.                 # search only after pattern
  34.                 index = start+length
  35.                 while True:
  36.                     index = text.find(pattern, index)
  37.                     if index < 0:
  38.                         break
  39.                     results[pattern].add(index)
  40.                     #index += 1
  41.                     index += length
  42.    
  43.     # remove single results
  44.     results = {key:val for key,val in results.items() if len(val) > 1}
  45.    
  46.     return results
  47.    
  48. # ----
  49.  
  50. text = "lowlow key keykey y k"
  51.  
  52. results = find_matches(text)
  53.  
  54. pprint.pprint(results)
  55.              
  56. for number, char in enumerate(text):
  57.     print(f"{number:2}|{char}")
  58.  
  59. # --- Result ---
  60.  
  61. """
  62. {' ': [6, 10, 17, 19],
  63. ' k': [6, 10, 19],
  64. ' ke': [6, 10],
  65. ' key': [6, 10],
  66. 'e': [8, 12, 15],
  67. 'ey': [8, 12, 15],
  68. 'ey ': [8, 15],
  69. 'k': [7, 11, 14, 20],
  70. 'ke': [7, 11, 14],
  71. 'key': [7, 11, 14],
  72. 'key ': [7, 14],
  73. 'l': [0, 3],
  74. 'lo': [0, 3],
  75. 'low': [0, 3],
  76. 'o': [1, 4],
  77. 'ow': [1, 4],
  78. 'w': [2, 5],
  79. 'y': [9, 13, 16, 18],
  80. 'y ': [9, 16, 18],
  81. 'y k': [9, 18]}
  82.  
  83. 0|l
  84. 1|o
  85. 2|w
  86. 3|l
  87. 4|o
  88. 5|w
  89. 6|
  90. 7|k
  91. 8|e
  92. 9|y
  93. 10|
  94. 11|k
  95. 12|e
  96. 13|y
  97. 14|k
  98. 15|e
  99. 16|y
  100. 17|
  101. 18|y
  102. 19|
  103. 20|k
  104. """
Advertisement
Advertisement
Advertisement
RAW Paste Data Copied
Advertisement