Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import matplotlib.pyplot as plt
- import random
- import string
- CMP_COUNT = 0
- def matches_at(t: str, pos: int, w: str) -> bool:
- # 0 <= pos <= len(t) - len(w)
- global CMP_COUNT
- for i in range(len(w)):
- CMP_COUNT += 1
- if t[pos+i] != w[i]:
- return False
- return True
- # return all(t[pos+i]==w[i] for i in range(len(w)))
- def naive_search(t: str, w: str) -> list:
- result = []
- for pos in range(len(t) - len(w) + 1):
- if matches_at(t, pos, w):
- result.append(pos)
- return result
- # return [pos for pos in range(len(t) + len(w) + 1) if matches_at(t, pos, w)]
- def sunday_search(t: str, w: str) -> list:
- result = []
- p = 0
- lastp = create_pattern_cache(w)
- while p <= (len(t) - len(w)):
- if matches_at(t, p, w):
- result.append(p)
- p += len(w)
- if p < len(t): p -= lastp.get(t[p], -1)
- return result
- def create_pattern_cache(pattern):
- return { sign: index for index, sign in enumerate(pattern) }
- def benchmark(alg, t, w):
- global CMP_COUNT
- CMP_COUNT = 0
- alg(t, w)
- return CMP_COUNT
- def rand_str(str_size, alphabet_size):
- alphabet = string.ascii_letters[:alphabet_size]
- result = ''
- for _ in range(str_size):
- result += random.choice(alphabet)
- return result
- def create_text_len_simulation(plt, t_max_len, p_len, a_len, methods):
- w = rand_str(p_len, a_len)
- for method in methods:
- result = []
- for t_len in range(t_max_len):
- t = rand_str(t_len, a_len)
- result.append(benchmark(method[0], t, w))
- plt.plot(range(t_max_len), result, label=method[1])
- plt.set_xlabel('Text length')
- plt.set_ylabel('Number of count operation')
- plt.legend()
- def create_pattern_len_simulation(plt, t_len, p_max_len, a_len, methods):
- t = rand_str(t_len, a_len)
- for method in methods:
- result = []
- for p_len in range(1, p_max_len):
- w = rand_str(p_len, a_len)
- result.append(benchmark(method[0], t, w))
- plt.plot(range(1, p_max_len), result, label=method[1])
- plt.set_xlabel('Pattern length')
- plt.set_ylabel('Number of count operation')
- plt.legend()
- def create_alphabet_len_simulation(plt, t_len, p_len, a_max_len, methods):
- for method in methods:
- result = []
- for a_len in range(1, a_max_len):
- t = rand_str(t_len, a_len)
- w = rand_str(p_len, a_len)
- result.append(benchmark(method[0], t, w))
- plt.plot(range(1, a_max_len), result, label=method[1])
- plt.set_xlabel('Pattern length')
- plt.set_ylabel('Number of count operation')
- plt.legend()
- if __name__ == "__main__":
- methods = [(naive_search, 'Naive'), (sunday_search, 'Sunday')]
- _, axs = plt.subplots(3, 1)
- text_len_plt, pattern_len_plt, alphabet_len_plt = axs.flat
- create_text_len_simulation(text_len_plt, 1000, 6, 4, methods)
- create_pattern_len_simulation(pattern_len_plt, 1000, 1000, 4, methods)
- create_alphabet_len_simulation(alphabet_len_plt, 100, 6, 200, methods)
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement