Guest User

Untitled

a guest
Nov 17th, 2017
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.66 KB | None | 0 0
  1. import random, re
  2.  
  3. NOISE_LENGTH = 20 # How long the noise should be, and therefore the upper bound on how long a word could
  4. # possibly be (though in practice, the word will be much shorter, depending on how
  5. # restrictive the syllable_structure is)
  6.  
  7. # A basic CV syllable structure, which can repeat between 1 and 6 times
  8. syllable_structure = re.compile(u'(?:[^aæɑeɛəiɪouʌʊ][aæɑeɛəiɪouʌʊ]){1,6}')
  9.  
  10. alphabet = u'aæɑbdðeɛəfghiɪjklmnŋopɹsʃtθuʌʊvwzʒ' # All of the IPA characters used by English
  11. num_words = 10
  12.  
  13. def main():
  14. for i in range(num_words):
  15. print(generate(syllable_structure, alphabet))
  16.  
  17.  
  18. def generate(pattern, alphabet):
  19. """
  20. Generates a word using a particular alphabet that adheres to some regular expression
  21.  
  22. :param SRE_Pattern pattern: The pattern (probably syllable structure) that the word must adhere to
  23. :param str word: All and only the acceptable symbols in the language
  24. :return: A word that adheres to the pattern and alphabet
  25. """
  26. match = None
  27.  
  28. while not match:
  29. # Keep generating noise until some substring of that noise matches the pattern
  30. noise = generate_noise(alphabet)
  31. match = pattern.match(noise)
  32.  
  33. return match[0] # We only care about the first match
  34.  
  35.  
  36. def generate_noise(alphabet):
  37. """
  38. Generates a random string that uses only the characters in some alphabet
  39.  
  40. :param str alphabet: A string of all and only the characters that can be included in the noise
  41. :return: A random string containing characters from the alphabet
  42. """
  43. return u''.join(random.choice(alphabet) for i in range(NOISE_LENGTH))
  44.  
  45.  
  46. main()
Add Comment
Please, Sign In to add comment