cyga

bold words with kmp too

Sep 19th, 2021
722
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. class PolyHash():
  2.     def __init__(self, seq: List[int], prime=37, rem=10**9+7):
  3.         n = len(seq)
  4.         self.rem = rem
  5.         self.hashes = [0]*(n+1)
  6.         self.pows = [1]*(n+1)
  7.         for i in range(1, n+1):
  8.             self.pows[i] = self.pows[i-1]*prime % rem
  9.             self.hashes[i] = (self.hashes[i-1]*prime + seq[i-1]) % rem
  10.  
  11.     def hash(self, l: int, r: int):
  12.         ''' [l, r) '''
  13.         return (self.hashes[r] - (self.hashes[l]*self.pows[r-l]) % self.rem) % self.rem
  14.    
  15. def string_hasher(s: string) -> PolyHash:
  16.     return PolyHash([ord(ch)-ord('a') for ch in s])
  17.  
  18. def calc_word_hashes(words: List[str]) -> List[int]:
  19.     word_hashes = [None]*len(words)
  20.     for i, word in enumerate(words):
  21.         word_hasher = string_hasher(word)
  22.         word_hashes[i] = word_hasher.hash(0, len(word))
  23.     return word_hashes
  24.    
  25. def scanline(bolds: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
  26.     START, END = 0, 1
  27.     pois = [(start, START) for start, _ in bolds] + [(end, END) for _, end in bolds]
  28.     pois.sort()
  29.    
  30.     n_opened = 0
  31.     b_start = None
  32.     res = []
  33.     for x, tip in pois:
  34.         if tip == START:
  35.             if n_opened == 0:
  36.                 b_start = x
  37.             n_opened += 1
  38.         else:
  39.             n_opened -= 1
  40.             if n_opened == 0:
  41.                 res.append((b_start, x))
  42.                
  43.     return res
  44.  
  45. def find_all_bolds(words: List[str], s: str) -> List[Tuple[int, int]]:
  46.     word_hashes = calc_word_hashes(words)
  47.     s_hasher = string_hasher(s)
  48.     bolds = []
  49.     for i in range(1, len(s)+1):
  50.         for j, h in enumerate(word_hashes):
  51.             if i >= len(words[j]):
  52.                 if s_hasher.hash(i-len(words[j]), i) == h:
  53.                     bolds.append((i-len(words[j]), i))
  54.     return bolds
  55.    
  56. def prefix_sum(s: str):
  57.     n = len(s)
  58.     pi = [0]*n
  59.     for i in range(1, n):
  60.         j = pi[i-1]
  61.         while j > 0 and s[i] != s[j]:
  62.             j = pi[j-1]
  63.         if s[i] == s[j]:
  64.             j += 1
  65.         pi[i] = j
  66.     return pi
  67.    
  68. def find_all_bolds_kmp(words: List[str], s: str) -> List[Tuple[int, int]]:
  69.     pis = [prefix_sum(word) for word in words]
  70.     js = [0]*len(words)
  71.     bolds = []
  72.     for i, ch in enumerate(s):
  73.         for k, j in enumerate(js):
  74.             while j >= len(words[k]) or (j > 0 and ch != words[k][j]):
  75.                 j = pis[k][j-1]
  76.             if ch == words[k][j]:
  77.                 j += 1
  78.             js[k] = j
  79.            
  80.             if j == len(words[k]):
  81.                 bolds.append((i+1-len(words[k]), i+1))
  82.     return bolds
  83.    
  84. class Solution:
  85.     def boldWords(self, words: List[str], s: str) -> str:
  86.         #bolds = find_all_bolds(words, s)
  87.         bolds = find_all_bolds_kmp(words, s)
  88.                        
  89.         bolds = scanline(bolds)
  90.         res = []
  91.         j, k = 0, 0
  92.         for i in range(len(s)+1):
  93.             if j < len(bolds) and bolds[j][k] == i:
  94.                 if k == 0:
  95.                     res.append("<b>")
  96.                     k += 1
  97.                 else:
  98.                     res.append("</b>")
  99.                     k = 0
  100.                     j += 1
  101.                
  102.             if i < len(s):
  103.                 res.append(s[i])
  104.                        
  105.         return ''.join(res)
RAW Paste Data