Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Fri Mar 31 01:06:37 2023
- @author: ahsan
- """
- NO_OF_CHARS = 256
- def badCharHeuristic(string, size):
- '''
- The preprocessing function for
- Boyer Moore's bad character heuristic
- '''
- # Initialize all occurrence as -1
- badChar = [-1]*NO_OF_CHARS
- # Fill the actual value of last occurrence
- for i in range(size):
- badChar[ord(string[i])] = i;
- if string[i] == '.':
- for j in range(NO_OF_CHARS):
- badChar[j] = i;
- # return initialized list
- return badChar
- def preprocess_strong_suffix(shift, bpos, pat, m):
- # m is the length of pattern
- i = m
- j = m + 1
- bpos[i] = j
- while i > 0:
- '''if character at position i-1 is
- not equivalent to character at j-1,
- then continue searching to right
- of the pattern for border '''
- while j <= m and (pat[i - 1] != pat[j - 1] or pat[i-1] == '.' or pat[j-1] == '.'):
- ''' the character preceding the occurrence
- of t in pattern P is different than the
- mismatching character in P, we stop skipping
- the occurrences and shift the pattern
- from i to j '''
- if shift[j] == 0:
- shift[j] = j - i
- # Update the position of next border
- j = bpos[j]
- ''' p[i-1] matched with p[j-1], border is found.
- store the beginning position of border '''
- i -= 1
- j -= 1
- bpos[i] = j
- # Preprocessing for case 2
- def preprocess_case2(shift, bpos, pat, m):
- j = bpos[0]
- for i in range(m + 1):
- ''' set the border position of the first character
- of the pattern to all indices in array shift
- having shift[i] = 0 '''
- if shift[i] == 0:
- shift[i] = j
- ''' suffix becomes shorter than bpos[0],
- use the position of next widest border
- as value of j '''
- if i == j:
- j = bpos[j]
- def search(txt, pat):
- '''
- A pattern searching function that uses Bad Character
- Heuristic of Boyer Moore Algorithm
- '''
- m = len(pat)
- n = len(txt)
- # create the bad character list by calling
- # the preprocessing function badCharHeuristic()
- # for given pattern
- badChar = badCharHeuristic(pat, m)
- bpos = [0] * (m + 1)
- # initialize all occurrence of shift to 0
- shift = [0] * (m + 1)
- # do preprocessing
- preprocess_strong_suffix(shift, bpos, pat, m)
- preprocess_case2(shift, bpos, pat, m)
- print(badChar)
- print(bpos)
- print(shift)
- # s is shift of the pattern with respect to text
- s = 0
- while(s <= n-m):
- j = m-1
- # Keep reducing index j of pattern while
- # characters of pattern and text are matching
- # at this shift s
- while j>=0 and (pat[j] == txt[s+j] or pat[j] == '.'):
- j -= 1
- # If the pattern is present at current shift,
- # then index j will become -1 after the above loop
- if j<0:
- print("Pattern occur at shift = {}".format(s))
- '''
- Shift the pattern so that the next character in text
- aligns with the last occurrence of it in pattern.
- The condition s+m < n is necessary for the case when
- pattern occurs at the end of text
- '''
- s += max((m-badChar[ord(txt[s+m])] if s+m<n else 1), shift[0])
- else:
- '''
- Shift the pattern so that the bad character in text
- aligns with the last occurrence of it in pattern. The
- max function is used to make sure that we get a positive
- shift. We may get a negative shift if the last occurrence
- of bad character in pattern is on the right side of the
- current character.
- '''
- s += max(shift[j + 1], j-badChar[ord(txt[s+j])])
- #text = "ABAAAABAACD"
- #pat = "ABA"
- #text = "ABAAABCD"
- #pat = "ABC"
- #text = "babbababaabbaba"
- #pat = "abba"
- text = "bbbbbababbbbbabb"
- pat = "bb.bb"
- search(text, pat)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement