dan-masek

Simple implementation of Knuth–Morris–Pratt algorithm

Apr 8th, 2021
351
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # ----------------------------------------------------------------------------
  2.  
  3. def build_kmp_table(input_buffer, pos, look_ahead_end):
  4.     table = [-1]
  5.     candidate = 0
  6.     for i in range(1, look_ahead_end - pos):
  7.         if input_buffer[pos + i] == input_buffer[pos + candidate]:
  8.             table.append(table[candidate])
  9.         else:
  10.             table.append(candidate)
  11.             while candidate >= 0 and (input_buffer[pos + i] != input_buffer[pos + candidate]):
  12.                 candidate = table[candidate]
  13.         candidate += 1
  14.     return table
  15.  
  16. # ----------------------------------------------------------------------------
  17.  
  18. def find_longest_match_kmp(input_buffer, sliding_window_start, pos, look_ahead_end):
  19.     look_ahead_size = look_ahead_end - pos # Actual
  20.  
  21.     max_match_length = 0
  22.     max_match_pos = 0
  23.  
  24.     table = build_kmp_table(input_buffer, pos, look_ahead_end)
  25.  
  26.     la_idx = 0 # k
  27.     sw_idx = 0 # j
  28.  
  29.     while (sliding_window_start + sw_idx - la_idx) < pos:
  30.         if input_buffer[pos + la_idx] == input_buffer[sliding_window_start + sw_idx]:
  31.             la_idx += 1
  32.             sw_idx += 1
  33.             if la_idx == look_ahead_size:
  34.                 # Full-length match found, we're done
  35.                 max_match_pos = sliding_window_start + sw_idx - la_idx
  36.                 max_match_length = la_idx
  37.                 break
  38.         else:
  39.             if la_idx > max_match_length:
  40.                 max_match_pos = sliding_window_start + sw_idx - la_idx
  41.                 max_match_length = la_idx
  42.             la_idx = table[la_idx]
  43.             if la_idx < 0:
  44.                 la_idx += 1
  45.                 sw_idx += 1
  46.  
  47.     if max_match_length == 0:
  48.         return 0, 0
  49.     return (pos - max_match_pos, max_match_length)
  50.  
  51. # ----------------------------------------------------------------------------
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×