Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random, re
- NOISE_LENGTH = 20 # How long the noise should be, and therefore the upper bound on how long a word could
- # possibly be (though in practice, the word will be much shorter, depending on how
- # restrictive the syllable_structure is)
- # A basic CV syllable structure, which can repeat between 1 and 6 times
- syllable_structure = re.compile(u'(?:[^aæɑeɛəiɪouʌʊ][aæɑeɛəiɪouʌʊ]){1,6}')
- alphabet = u'aæɑbdðeɛəfghiɪjklmnŋopɹsʃtθuʌʊvwzʒ' # All of the IPA characters used by English
- num_words = 10
- def main():
- for i in range(num_words):
- print(generate(syllable_structure, alphabet))
- def generate(pattern, alphabet):
- """
- Generates a word using a particular alphabet that adheres to some regular expression
- :param SRE_Pattern pattern: The pattern (probably syllable structure) that the word must adhere to
- :param str word: All and only the acceptable symbols in the language
- :return: A word that adheres to the pattern and alphabet
- """
- match = None
- while not match:
- # Keep generating noise until some substring of that noise matches the pattern
- noise = generate_noise(alphabet)
- match = pattern.match(noise)
- return match[0] # We only care about the first match
- def generate_noise(alphabet):
- """
- Generates a random string that uses only the characters in some alphabet
- :param str alphabet: A string of all and only the characters that can be included in the noise
- :return: A random string containing characters from the alphabet
- """
- return u''.join(random.choice(alphabet) for i in range(NOISE_LENGTH))
- main()
Add Comment
Please, Sign In to add comment