Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import math
- class LM(dict):
- def __init__(self, data, default_value):
- self._default = default_value
- super().__init__(**data)
- def __getitem__(self, key):
- item = dict.get(self, key)
- if item is None:
- return self._default
- return item
- def split_words(language_model, sent):
- memo = LM({'': 0}, -math.inf)
- solution = {}
- _ = split_words_aux(language_model, memo, solution, sent)
- return restore_from_splits(solution, sent)
- def split_words_aux(language_model, memo, splits, sent):
- if memo[sent] >= 0:
- return memo[sent]
- q = -math.inf
- best_hypo = None
- for i in range(1, len(sent) + 1):
- prefix = sent[:i]
- suffix = sent[i:]
- new_q = language_model[prefix] + split_words_aux(language_model, memo, splits, suffix)
- if new_q > q:
- q = new_q
- best_hypo = i
- memo[sent] = q
- splits[sent] = best_hypo
- return q
- def restore_from_splits(splits, sent):
- result = []
- while len(sent) > 0:
- prefix = sent[:splits[sent]]
- suffix = sent[splits[sent]:]
- result.append(prefix)
- sent = suffix
- return ' '.join(result)
- if __name__ == '__main__':
- lm = LM({'мама': 0.3, 'мыла': 0.3, 'раму': 0.3}, 0.01)
- sent = 'мамамылараму'
- print(split_words(lm, sent))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement