Generate gtypist drill

#!/usr/bin/env python
import string
import random
import numpy as np
import numpy.random

filename="1_1_all_fullalpha.txt"
allowed_chars="adefhilnoprstu"
required_chars="fu"
min_len=3
max_len=6

def choice(a, size=1, replace=True, p=None):
    # Format and Verify input
    if isinstance(a, int):
        if a > 0:
            pop_size = a #population size
        else:
            raise ValueError("a must be greater than 0")
    else:
        a = np.array(a, ndmin=1, copy=0)
        if a.ndim != 1:
            raise ValueError("a must be 1-dimensional")
        pop_size = a.size
        if pop_size is 0:
            raise ValueError("a must be non-empty")

    if None != p:
        p = np.array(p, dtype=np.double, ndmin=1, copy=0)
        if p.ndim != 1:
            raise ValueError("p must be 1-dimensional")
        if p.size != pop_size:
            raise ValueError("a and p must have same size")
        if np.any(p < 0):
            raise ValueError("probabilities are not non-negative")
        if not np.allclose(p.sum(), 1):
            raise ValueError("probabilities do not sum to 1")

    # Actual sampling
    if replace:
        if None != p:
            cdf = p.cumsum()
            cdf /= cdf[-1]
            uniform_samples = np.random.random(size)
            idx = cdf.searchsorted(uniform_samples, side='right')
        else:
            idx = numpy.random.randint(0, pop_size, size=size)
    else:
        if size > pop_size:
            raise ValueError(''.join(["Cannot take a larger sample than ",
                                      "population when 'replace=False'"]))

        if None != p:
            if np.sum(p > 0) < size:
                raise ValueError("Fewer non-zero entries in p than size")
            n_uniq = 0
            p = p.copy()
            found = np.zeros(size, dtype=np.int)
            while n_uniq < size:
                x = numpy.random.rand(size - n_uniq)
                if n_uniq > 0:
                    p[found[0:n_uniq]] = 0
                cdf = np.cumsum(p)
                cdf /= cdf[-1]
                new = cdf.searchsorted(x, side='right')
                new = np.unique(new)
                found[n_uniq:n_uniq + new.size] = new
                n_uniq += new.size
            idx = found
        else:
            idx = numpy.random.permutation(pop_size)[:size]

    #Use samples as indices for a if a is array-like
    if isinstance(a, int):
        return idx
    else:
        return a.take(idx)

def check_word(word, count_s, prob_s, allowed_chars, required_chars, min_len, max_len):
    if len(word) < min_len or len(word) > max_len:
        return 0
    if word.translate(None, allowed_chars):
        return 0
    if word.translate(None, required_chars) == word:
    return 0
    if random.random() < float(prob_s):
        return int(count_s) + 1
    else:
        return 0

def build_list(filename, allowed_chars, max_len):
    words = {}
    with open(filename, "r") as f:
        for line in f:
            c = string.split(line.lstrip(), "\t")
            if c[2] == '%':
                continue
            word = "";
            if c[0] == '@':
                word = c[2]
            else:
                word = c[0]
            count = check_word(word, c[3], c[5], allowed_chars, required_chars, min_len, max_len)
            if count > 0:
                if word in words:
                    words[word] += count
                else:
                    words[word] = count
    return ([key for key in sorted(words)], [words[key] for key in sorted(words)])

if __name__=="__main__":
    (keys, weights) = build_list(filename, allowed_chars, max_len)
    p = np.array(weights, dtype=float) / sum(weights)
    o = open("drill.typ","w")
    o.write("B:"+allowed_chars+" drill")
    max_len=76
    for pos in xrange(1,11):
        o.write("\n\nI:"+allowed_chars+" drill ("+str(pos)+")")
        o.write("\n*:_C_D_"+str(pos))
        o.write("\nD:")
        cur_len=0
        words = choice(keys, size=20, replace=False, p=p)
        for word in words:
            if cur_len + len(word) + 1 > max_len:
                break
            elif cur_len > 0:
                o.write(' ')
            o.write(word)
            cur_len += len(word) + 1
    o.write("\n\nX:\n")
    o.close()