Advertisement
kpfp_linux

PJN 6 03:25

Jun 4th, 2013
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.68 KB | None | 0 0
  1. import bisect
  2. import random
  3. import itertools
  4. import re
  5.  
  6.  
  7. class Markov:
  8.     def __init__(self, n, data, tokenize=False):
  9.         self._tree = {}
  10.         self._n = n
  11.         if tokenize: data = re.sub("([.,?0:;()!-]+\s*)|([.,?!]*\s+)", " ", data).split(" ")  # tokenize data if necessary
  12.         prev_cnt, prev_d = self._tree.setdefault(data[0], ([1, 0], {}))  # build tree
  13.         for letter in data[1:]:
  14.             if letter == '':
  15.                 continue
  16.             prev_d[letter] = prev_d.setdefault(letter, 0) + 1
  17.             prev_cnt[1] += 1
  18.             prev_cnt, prev_d = self._tree.setdefault(letter, ([1, 0], {}))
  19.         for i in range(1, n):  # build sums of higher order
  20.             for k, (v_c, v_d) in self._tree.items():
  21.                 v_c.append(sum((sv * self._tree[sk][0][i] for sk, sv in v_d.items())))
  22.  
  23.     def gen(self):
  24.         print("GEN")
  25.         next_key = random.choice(list(self._tree.keys()))
  26.         while True:
  27.             print("  yield: {0}".format(next_key))
  28.             yield next_key
  29.             _count, succs = self._tree[next_key]
  30.             keys = list(succs.keys())
  31.             weights = [succs[key] * self._tree[key][0][self._n - 2] for key in keys]
  32.             cumul_dist = list(itertools.accumulate(weights))
  33.             try:
  34.                 next_key = keys[bisect.bisect(cumul_dist, random.random() * cumul_dist[-1])]
  35.             except IndexError:
  36.                 break
  37.  
  38.     def __iter__(self):
  39.         self.__it__ = self.gen()
  40.         return self
  41.  
  42.     def __next__(self):
  43.         while True:
  44.             try:
  45.                 return next(self.__it__)
  46.             except StopIteration:
  47.                 self.__it__ = self.gen()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement