Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import string
- import random
- import numpy as np
- import numpy.random
- filename="1_1_all_fullalpha.txt"
- allowed_chars="adefhilnoprstu"
- required_chars="fu"
- min_len=3
- max_len=6
- def choice(a, size=1, replace=True, p=None):
- # Format and Verify input
- if isinstance(a, int):
- if a > 0:
- pop_size = a #population size
- else:
- raise ValueError("a must be greater than 0")
- else:
- a = np.array(a, ndmin=1, copy=0)
- if a.ndim != 1:
- raise ValueError("a must be 1-dimensional")
- pop_size = a.size
- if pop_size is 0:
- raise ValueError("a must be non-empty")
- if None != p:
- p = np.array(p, dtype=np.double, ndmin=1, copy=0)
- if p.ndim != 1:
- raise ValueError("p must be 1-dimensional")
- if p.size != pop_size:
- raise ValueError("a and p must have same size")
- if np.any(p < 0):
- raise ValueError("probabilities are not non-negative")
- if not np.allclose(p.sum(), 1):
- raise ValueError("probabilities do not sum to 1")
- # Actual sampling
- if replace:
- if None != p:
- cdf = p.cumsum()
- cdf /= cdf[-1]
- uniform_samples = np.random.random(size)
- idx = cdf.searchsorted(uniform_samples, side='right')
- else:
- idx = numpy.random.randint(0, pop_size, size=size)
- else:
- if size > pop_size:
- raise ValueError(''.join(["Cannot take a larger sample than ",
- "population when 'replace=False'"]))
- if None != p:
- if np.sum(p > 0) < size:
- raise ValueError("Fewer non-zero entries in p than size")
- n_uniq = 0
- p = p.copy()
- found = np.zeros(size, dtype=np.int)
- while n_uniq < size:
- x = numpy.random.rand(size - n_uniq)
- if n_uniq > 0:
- p[found[0:n_uniq]] = 0
- cdf = np.cumsum(p)
- cdf /= cdf[-1]
- new = cdf.searchsorted(x, side='right')
- new = np.unique(new)
- found[n_uniq:n_uniq + new.size] = new
- n_uniq += new.size
- idx = found
- else:
- idx = numpy.random.permutation(pop_size)[:size]
- #Use samples as indices for a if a is array-like
- if isinstance(a, int):
- return idx
- else:
- return a.take(idx)
- def check_word(word, count_s, prob_s, allowed_chars, required_chars, min_len, max_len):
- if len(word) < min_len or len(word) > max_len:
- return 0
- if word.translate(None, allowed_chars):
- return 0
- if word.translate(None, required_chars) == word:
- return 0
- if random.random() < float(prob_s):
- return int(count_s) + 1
- else:
- return 0
- def build_list(filename, allowed_chars, max_len):
- words = {}
- with open(filename, "r") as f:
- for line in f:
- c = string.split(line.lstrip(), "\t")
- if c[2] == '%':
- continue
- word = "";
- if c[0] == '@':
- word = c[2]
- else:
- word = c[0]
- count = check_word(word, c[3], c[5], allowed_chars, required_chars, min_len, max_len)
- if count > 0:
- if word in words:
- words[word] += count
- else:
- words[word] = count
- return ([key for key in sorted(words)], [words[key] for key in sorted(words)])
- if __name__=="__main__":
- (keys, weights) = build_list(filename, allowed_chars, max_len)
- p = np.array(weights, dtype=float) / sum(weights)
- o = open("drill.typ","w")
- o.write("B:"+allowed_chars+" drill")
- max_len=76
- for pos in xrange(1,11):
- o.write("\n\nI:"+allowed_chars+" drill ("+str(pos)+")")
- o.write("\n*:_C_D_"+str(pos))
- o.write("\nD:")
- cur_len=0
- words = choice(keys, size=20, replace=False, p=p)
- for word in words:
- if cur_len + len(word) + 1 > max_len:
- break
- elif cur_len > 0:
- o.write(' ')
- o.write(word)
- cur_len += len(word) + 1
- o.write("\n\nX:\n")
- o.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement