Advertisement
Guest User

Generate gtypist drill

a guest
Apr 8th, 2013
220
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.33 KB | None | 0 0
  1. #!/usr/bin/env python
  2. import string
  3. import random
  4. import numpy as np
  5. import numpy.random
  6.  
  7. filename="1_1_all_fullalpha.txt"
  8. allowed_chars="adefhilnoprstu"
  9. required_chars="fu"
  10. min_len=3
  11. max_len=6
  12.  
  13. def choice(a, size=1, replace=True, p=None):
  14.     # Format and Verify input
  15.     if isinstance(a, int):
  16.         if a > 0:
  17.             pop_size = a #population size
  18.         else:
  19.             raise ValueError("a must be greater than 0")
  20.     else:
  21.         a = np.array(a, ndmin=1, copy=0)
  22.         if a.ndim != 1:
  23.             raise ValueError("a must be 1-dimensional")
  24.         pop_size = a.size
  25.         if pop_size is 0:
  26.             raise ValueError("a must be non-empty")
  27.    
  28.     if None != p:
  29.         p = np.array(p, dtype=np.double, ndmin=1, copy=0)
  30.         if p.ndim != 1:
  31.             raise ValueError("p must be 1-dimensional")
  32.         if p.size != pop_size:
  33.             raise ValueError("a and p must have same size")
  34.         if np.any(p < 0):
  35.             raise ValueError("probabilities are not non-negative")
  36.         if not np.allclose(p.sum(), 1):
  37.             raise ValueError("probabilities do not sum to 1")
  38.    
  39.     # Actual sampling
  40.     if replace:
  41.         if None != p:
  42.             cdf = p.cumsum()
  43.             cdf /= cdf[-1]
  44.             uniform_samples = np.random.random(size)
  45.             idx = cdf.searchsorted(uniform_samples, side='right')
  46.         else:
  47.             idx = numpy.random.randint(0, pop_size, size=size)
  48.     else:
  49.         if size > pop_size:
  50.             raise ValueError(''.join(["Cannot take a larger sample than ",
  51.                                       "population when 'replace=False'"]))
  52.        
  53.         if None != p:
  54.             if np.sum(p > 0) < size:
  55.                 raise ValueError("Fewer non-zero entries in p than size")
  56.             n_uniq = 0
  57.             p = p.copy()
  58.             found = np.zeros(size, dtype=np.int)
  59.             while n_uniq < size:
  60.                 x = numpy.random.rand(size - n_uniq)
  61.                 if n_uniq > 0:
  62.                     p[found[0:n_uniq]] = 0
  63.                 cdf = np.cumsum(p)
  64.                 cdf /= cdf[-1]
  65.                 new = cdf.searchsorted(x, side='right')
  66.                 new = np.unique(new)
  67.                 found[n_uniq:n_uniq + new.size] = new
  68.                 n_uniq += new.size
  69.             idx = found
  70.         else:
  71.             idx = numpy.random.permutation(pop_size)[:size]
  72.    
  73.     #Use samples as indices for a if a is array-like
  74.     if isinstance(a, int):
  75.         return idx
  76.     else:
  77.         return a.take(idx)
  78.  
  79. def check_word(word, count_s, prob_s, allowed_chars, required_chars, min_len, max_len):
  80.     if len(word) < min_len or len(word) > max_len:
  81.         return 0
  82.     if word.translate(None, allowed_chars):
  83.         return 0
  84.     if word.translate(None, required_chars) == word:
  85.     return 0
  86.     if random.random() < float(prob_s):
  87.         return int(count_s) + 1
  88.     else:
  89.         return 0
  90.  
  91. def build_list(filename, allowed_chars, max_len):
  92.     words = {}
  93.     with open(filename, "r") as f:
  94.         for line in f:
  95.             c = string.split(line.lstrip(), "\t")
  96.             if c[2] == '%':
  97.                 continue
  98.             word = "";
  99.             if c[0] == '@':
  100.                 word = c[2]
  101.             else:
  102.                 word = c[0]
  103.             count = check_word(word, c[3], c[5], allowed_chars, required_chars, min_len, max_len)
  104.             if count > 0:
  105.                 if word in words:
  106.                     words[word] += count
  107.                 else:
  108.                     words[word] = count
  109.     return ([key for key in sorted(words)], [words[key] for key in sorted(words)])
  110.  
  111. if __name__=="__main__":
  112.     (keys, weights) = build_list(filename, allowed_chars, max_len)
  113.     p = np.array(weights, dtype=float) / sum(weights)
  114.     o = open("drill.typ","w")
  115.     o.write("B:"+allowed_chars+" drill")
  116.     max_len=76
  117.     for pos in xrange(1,11):
  118.         o.write("\n\nI:"+allowed_chars+" drill ("+str(pos)+")")
  119.         o.write("\n*:_C_D_"+str(pos))
  120.         o.write("\nD:")
  121.         cur_len=0
  122.         words = choice(keys, size=20, replace=False, p=p)
  123.         for word in words:
  124.             if cur_len + len(word) + 1 > max_len:
  125.                 break
  126.             elif cur_len > 0:
  127.                 o.write(' ')
  128.             o.write(word)
  129.             cur_len += len(word) + 1
  130.     o.write("\n\nX:\n")
  131.     o.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement