Advertisement
kpfp_linux

PJN 6

Jun 4th, 2013
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.62 KB | None | 0 0
  1. import bisect
  2. import random
  3. import itertools
  4. import re
  5.  
  6.  
  7. class Markov:
  8.     def __init__(self, n, data, tokenize=False):
  9.         self._tree = {}
  10.         self._n = n
  11.         if tokenize: data = re.sub("([.,?!]+\s*)|([.,?!]*\s+)", " ", data).split(" ")  # tokenize data if necessary
  12.         prev_cnt, prev_d = self._tree.setdefault(data[0], ([1, 0], {}))  # build tree
  13.         for letter in data[1:]:
  14.             prev_d[letter] = prev_d.setdefault(letter, 0) + 1
  15.             prev_cnt[1] += 1
  16.             prev_cnt, prev_d = self._tree.setdefault(letter, ([1, 0], {}))
  17.         for i in range(1, n):  # build sums of higher order
  18.             for k, (v_c, v_d) in self._tree.items():
  19.                 v_c.append(sum((sv * self._tree[sk][0][i] for sk, sv in v_d.items())))
  20.  
  21.     def gen(self):
  22.         print("GEN")
  23.         next_key = random.choice(list(self._tree.keys()))
  24.         while True:
  25.             print("  yield: {0}".format(next_key))
  26.             yield next_key
  27.             _count, succs = self._tree[next_key]
  28.             keys = list(succs.keys())
  29.             weights = [succs[key] * self._tree[key][0][self._n - 2] for key in keys]
  30.             cumul_dist = list(itertools.accumulate(weights))
  31.             try:
  32.                 next_key = keys[bisect.bisect(cumul_dist, random.random() * cumul_dist[-1])]
  33.             except IndexError:
  34.                 break
  35.  
  36.     def __iter__(self):
  37.         self.__it__ = self.gen()
  38.         return self
  39.  
  40.     def __next__(self):
  41.         while True:
  42.             try:
  43.                 return next(self.__it__)
  44.             except StopIteration:
  45.                 self.__it__ = self.gen()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement