Advertisement
Epillion

Assignment Task 2

Mar 31st, 2023
843
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.36 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Fri Mar 31 01:06:37 2023
  4.  
  5. @author: ahsan
  6. """
  7.  
  8. NO_OF_CHARS = 256
  9.  
  10. def badCharHeuristic(string, size):
  11.     '''
  12.    The preprocessing function for
  13.    Boyer Moore's bad character heuristic
  14.    '''
  15.  
  16.     # Initialize all occurrence as -1
  17.     badChar = [-1]*NO_OF_CHARS
  18.  
  19.     # Fill the actual value of last occurrence
  20.     for i in range(size):
  21.         badChar[ord(string[i])] = i;
  22.         if string[i] == '.':
  23.             for j in range(NO_OF_CHARS):
  24.                 badChar[j] = i;        
  25.  
  26.     # return initialized list
  27.     return badChar
  28.  
  29. def preprocess_strong_suffix(shift, bpos, pat, m):
  30.  
  31.     # m is the length of pattern
  32.     i = m
  33.     j = m + 1
  34.     bpos[i] = j
  35.  
  36.     while i > 0:
  37.          
  38.         '''if character at position i-1 is
  39.        not equivalent to character at j-1,
  40.        then continue searching to right
  41.        of the pattern for border '''
  42.         while j <= m and (pat[i - 1] != pat[j - 1] or pat[i-1] == '.' or pat[j-1] == '.'):
  43.              
  44.             ''' the character preceding the occurrence
  45.            of t in pattern P is different than the
  46.            mismatching character in P, we stop skipping
  47.            the occurrences and shift the pattern
  48.            from i to j '''
  49.             if shift[j] == 0:
  50.                 shift[j] = j - i
  51.  
  52.             # Update the position of next border
  53.             j = bpos[j]
  54.              
  55.         ''' p[i-1] matched with p[j-1], border is found.
  56.        store the beginning position of border '''
  57.         i -= 1
  58.         j -= 1
  59.         bpos[i] = j
  60.  
  61. # Preprocessing for case 2
  62. def preprocess_case2(shift, bpos, pat, m):
  63.     j = bpos[0]
  64.     for i in range(m + 1):
  65.          
  66.         ''' set the border position of the first character
  67.        of the pattern to all indices in array shift
  68.        having shift[i] = 0 '''
  69.         if shift[i] == 0:
  70.             shift[i] = j
  71.              
  72.         ''' suffix becomes shorter than bpos[0],
  73.        use the position of next widest border
  74.        as value of j '''
  75.         if i == j:
  76.             j = bpos[j]
  77.  
  78. def search(txt, pat):
  79.     '''
  80.    A pattern searching function that uses Bad Character
  81.    Heuristic of Boyer Moore Algorithm
  82.    '''
  83.     m = len(pat)
  84.     n = len(txt)
  85.  
  86.     # create the bad character list by calling
  87.     # the preprocessing function badCharHeuristic()
  88.     # for given pattern
  89.     badChar = badCharHeuristic(pat, m)
  90.    
  91.    
  92.     bpos = [0] * (m + 1)
  93.  
  94.     # initialize all occurrence of shift to 0
  95.     shift = [0] * (m + 1)
  96.  
  97.     # do preprocessing
  98.     preprocess_strong_suffix(shift, bpos, pat, m)
  99.     preprocess_case2(shift, bpos, pat, m)
  100.    
  101.     print(badChar)
  102.     print(bpos)
  103.     print(shift)
  104.  
  105.     # s is shift of the pattern with respect to text
  106.     s = 0
  107.     while(s <= n-m):
  108.         j = m-1
  109.  
  110.         # Keep reducing index j of pattern while
  111.         # characters of pattern and text are matching
  112.         # at this shift s
  113.         while j>=0 and (pat[j] == txt[s+j] or pat[j] == '.'):
  114.             j -= 1
  115.  
  116.         # If the pattern is present at current shift,
  117.         # then index j will become -1 after the above loop
  118.         if j<0:
  119.             print("Pattern occur at shift = {}".format(s))
  120.  
  121.             '''  
  122.                Shift the pattern so that the next character in text
  123.                      aligns with the last occurrence of it in pattern.
  124.                The condition s+m < n is necessary for the case when
  125.                   pattern occurs at the end of text
  126.               '''
  127.             s += max((m-badChar[ord(txt[s+m])] if s+m<n else 1), shift[0])
  128.         else:
  129.             '''
  130.               Shift the pattern so that the bad character in text
  131.               aligns with the last occurrence of it in pattern. The
  132.               max function is used to make sure that we get a positive
  133.               shift. We may get a negative shift if the last occurrence
  134.               of bad character in pattern is on the right side of the
  135.               current character.
  136.            '''
  137.             s += max(shift[j + 1], j-badChar[ord(txt[s+j])])
  138.            
  139.            
  140.            
  141.            
  142. #text = "ABAAAABAACD"
  143. #pat = "ABA"
  144. #text = "ABAAABCD"
  145. #pat = "ABC"
  146. #text = "babbababaabbaba"
  147. #pat = "abba"
  148. text = "bbbbbababbbbbabb"
  149. pat = "bb.bb"
  150. search(text, pat)
  151.  
  152.  
  153.  
  154.  
  155.  
  156.  
  157.  
  158.  
  159.  
  160.  
  161.  
  162.  
  163.  
  164.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement