Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class PolyHash():
- def __init__(self, seq: List[int], prime=37, rem=10**9+7):
- n = len(seq)
- self.rem = rem
- self.hashes = [0]*(n+1)
- self.pows = [1]*(n+1)
- for i in range(1, n+1):
- self.pows[i] = self.pows[i-1]*prime % rem
- self.hashes[i] = (self.hashes[i-1]*prime + seq[i-1]) % rem
- def hash(self, l: int, r: int):
- ''' [l, r) '''
- return (self.hashes[r] - (self.hashes[l]*self.pows[r-l]) % self.rem) % self.rem
- def string_hasher(s: string) -> PolyHash:
- return PolyHash([ord(ch)-ord('a') for ch in s])
- def calc_word_hashes(words: List[str]) -> List[int]:
- word_hashes = [None]*len(words)
- for i, word in enumerate(words):
- word_hasher = string_hasher(word)
- word_hashes[i] = word_hasher.hash(0, len(word))
- return word_hashes
- def scanline(bolds: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
- START, END = 0, 1
- pois = [(start, START) for start, _ in bolds] + [(end, END) for _, end in bolds]
- pois.sort()
- n_opened = 0
- b_start = None
- res = []
- for x, tip in pois:
- if tip == START:
- if n_opened == 0:
- b_start = x
- n_opened += 1
- else:
- n_opened -= 1
- if n_opened == 0:
- res.append((b_start, x))
- return res
- def find_all_bolds(words: List[str], s: str) -> List[Tuple[int, int]]:
- word_hashes = calc_word_hashes(words)
- s_hasher = string_hasher(s)
- bolds = []
- for i in range(1, len(s)+1):
- for j, h in enumerate(word_hashes):
- if i >= len(words[j]):
- if s_hasher.hash(i-len(words[j]), i) == h:
- bolds.append((i-len(words[j]), i))
- return bolds
- def prefix_sum(s: str):
- n = len(s)
- pi = [0]*n
- for i in range(1, n):
- j = pi[i-1]
- while j > 0 and s[i] != s[j]:
- j = pi[j-1]
- if s[i] == s[j]:
- j += 1
- pi[i] = j
- return pi
- def find_all_bolds_kmp(words: List[str], s: str) -> List[Tuple[int, int]]:
- pis = [prefix_sum(word) for word in words]
- js = [0]*len(words)
- bolds = []
- for i, ch in enumerate(s):
- for k, j in enumerate(js):
- while j >= len(words[k]) or (j > 0 and ch != words[k][j]):
- j = pis[k][j-1]
- if ch == words[k][j]:
- j += 1
- js[k] = j
- if j == len(words[k]):
- bolds.append((i+1-len(words[k]), i+1))
- return bolds
- class TrieNode:
- def __init__(self):
- self.children = {}
- self.ends = set()
- class Trie:
- def __init__(self):
- self.root = TrieNode()
- def insert(self, word: str) -> None:
- """ Inserts a word into the trie. """
- p = self.root
- for ch in word:
- if ch not in p.children:
- p.children[ch] = TrieNode()
- p = p.children[ch]
- p.ends.add(len(word))
- def find_all_bolds_trie(words, s) -> List[Tuple[int, int]]:
- ''' O(sum of words lengths + len(s)*?) '''
- trie = Trie()
- for word in words:
- trie.insert(word)
- trie_nodes = []
- bolds = []
- for i, ch in enumerate(s):
- trie_nodes2 = []
- trie_nodes.append(trie.root)
- for trie_node in trie_nodes:
- if ch not in trie_node.children:
- continue
- trie_node = trie_node.children[ch]
- for length in trie_node.ends:
- bolds.append((i+1-length, i+1))
- trie_nodes2.append(trie_node)
- trie_nodes = trie_nodes2
- return bolds
- class Solution:
- def boldWords(self, words: List[str], s: str) -> str:
- #bolds = find_all_bolds(words, s)
- #bolds = find_all_bolds_kmp(words, s)
- bolds = find_all_bolds_trie(words, s)
- bolds = scanline(bolds)
- res = []
- j, k = 0, 0
- for i in range(len(s)+1):
- if j < len(bolds) and bolds[j][k] == i:
- if k == 0:
- res.append("<b>")
- k += 1
- else:
- res.append("</b>")
- k = 0
- j += 1
- if i < len(s):
- res.append(s[i])
- return ''.join(res)
Advertisement
Add Comment
Please, Sign In to add comment