Advertisement
dan-masek

Simple implementation of Knuth–Morris–Pratt algorithm

Apr 8th, 2021
627
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.82 KB | None | 0 0
  1. # ----------------------------------------------------------------------------
  2.  
  3. def build_kmp_table(input_buffer, pos, look_ahead_end):
  4.     table = [-1]
  5.     candidate = 0
  6.     for i in range(1, look_ahead_end - pos):
  7.         if input_buffer[pos + i] == input_buffer[pos + candidate]:
  8.             table.append(table[candidate])
  9.         else:
  10.             table.append(candidate)
  11.             while candidate >= 0 and (input_buffer[pos + i] != input_buffer[pos + candidate]):
  12.                 candidate = table[candidate]
  13.         candidate += 1
  14.     return table
  15.  
  16. # ----------------------------------------------------------------------------
  17.  
  18. def find_longest_match_kmp(input_buffer, sliding_window_start, pos, look_ahead_end):
  19.     look_ahead_size = look_ahead_end - pos # Actual
  20.  
  21.     max_match_length = 0
  22.     max_match_pos = 0
  23.  
  24.     table = build_kmp_table(input_buffer, pos, look_ahead_end)
  25.  
  26.     la_idx = 0 # k
  27.     sw_idx = 0 # j
  28.  
  29.     while (sliding_window_start + sw_idx - la_idx) < pos:
  30.         if input_buffer[pos + la_idx] == input_buffer[sliding_window_start + sw_idx]:
  31.             la_idx += 1
  32.             sw_idx += 1
  33.             if la_idx == look_ahead_size:
  34.                 # Full-length match found, we're done
  35.                 max_match_pos = sliding_window_start + sw_idx - la_idx
  36.                 max_match_length = la_idx
  37.                 break
  38.         else:
  39.             if la_idx > max_match_length:
  40.                 max_match_pos = sliding_window_start + sw_idx - la_idx
  41.                 max_match_length = la_idx
  42.             la_idx = table[la_idx]
  43.             if la_idx < 0:
  44.                 la_idx += 1
  45.                 sw_idx += 1
  46.  
  47.     if max_match_length == 0:
  48.         return 0, 0
  49.     return (pos - max_match_pos, max_match_length)
  50.  
  51. # ----------------------------------------------------------------------------
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement