Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def build_trie(strs, prefix=''):
- strs = list(strs)
- if not prefix: print strs
- width = sys.maxint
- leaf = True
- for s in strs:
- if s != prefix:
- width = min(len(s), width)
- leaf = False
- if leaf:
- return None
- prefix = dict()
- for s in strs:
- prefix.setdefault(s[:width], []).append(s)
- trie = dict((k, build_trie(v, k)) for k, v in prefix.iteritems())
- trie['_width'] = width
- return trie
- def match_trie(trie, text):
- key = text[:trie['_width']]
- if key in trie:
- if trie[key]:
- result = match_trie(trie[key], text)
- if result:
- return result
- elif key in trie[key]:
- return key
- else:
- return key
- return None
- ---SNIP---
- def render(self, output):
- new_output = []
- exclude = set()
- idx = 0
- last_match = 0
- while idx < len(output):
- for def_list in self.def_lists:
- match = match_trie(def_list['trie'], output[idx:].lower())
- try:
- if match and match not in exclude:
- # Exclude partial-word matches
- if output[idx + len(match)] not in string.letters:
- new_output.append(output[last_match:idx])
- # vvv This part does the "highlighting" (actually linkifying in this case)
- replace = dict(original=output[idx:idx + len(match)])
- replace.update(def_list['items'][match])
- new_output.append(def_list['replace'] % replace)
- idx = idx + len(match)
- last_match = idx
- if self.first_only:
- exclude.add(match)
- break
- except IndexError:
- pass
- try:
- # Skip the rest of the word
- while output[idx] in string.letters:
- idx += 1
- # Skip to the next word (also skip tags)
- tag = False
- while tag or output[idx] not in string.letters:
- if output[idx] == '<':
- tag = True
- elif output[idx] == '>':
- tag = False
- idx += 1
- except IndexError:
- pass
- if new_output:
- new_output.append(output[last_match:])
- return ''.join(new_output)
- else:
- return output
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement