Advertisement
Guest User

Untitled

a guest
Aug 19th, 2018
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.42 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. import math
  3.  
  4.  
  5. class LM(dict):
  6.     def __init__(self, data, default_value):
  7.         self._default = default_value
  8.         super().__init__(**data)
  9.  
  10.     def __getitem__(self, key):
  11.         item = dict.get(self, key)
  12.  
  13.         if item is None:
  14.             return self._default
  15.  
  16.         return item
  17.  
  18.  
  19. def split_words(language_model, sent):
  20.     memo = LM({'': 0}, -math.inf)
  21.     solution = {}
  22.     _ = split_words_aux(language_model, memo, solution, sent)
  23.     return restore_from_splits(solution, sent)
  24.  
  25.  
  26. def split_words_aux(language_model, memo, splits, sent):
  27.     if memo[sent] >= 0:
  28.         return memo[sent]
  29.  
  30.     q = -math.inf
  31.     best_hypo = None
  32.  
  33.     for i in range(1, len(sent) + 1):
  34.         prefix = sent[:i]
  35.         suffix = sent[i:]
  36.  
  37.         new_q = language_model[prefix] + split_words_aux(language_model, memo, splits, suffix)
  38.         if new_q > q:
  39.             q = new_q
  40.             best_hypo = i
  41.  
  42.     memo[sent] = q
  43.     splits[sent] = best_hypo
  44.     return q
  45.  
  46.  
  47. def restore_from_splits(splits, sent):
  48.     result = []
  49.     while len(sent) > 0:
  50.         prefix = sent[:splits[sent]]
  51.         suffix = sent[splits[sent]:]
  52.  
  53.         result.append(prefix)
  54.         sent = suffix
  55.     return ' '.join(result)
  56.  
  57.  
  58. if __name__ == '__main__':
  59.     lm = LM({'мама': 0.3, 'мыла': 0.3, 'раму': 0.3}, 0.01)
  60.     sent = 'мамамылараму'
  61.     print(split_words(lm, sent))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement