Advertisement
Isoraqathedh

word-generator wip 1

Apr 20th, 2014
148
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lisp 9.15 KB | None | 0 0
  1. ;;;; Syllable generator
  2.  
  3. (ql:quickload :cl-ppcre)
  4.  
  5. (defpackage :syllable-generator
  6.   (:use :common-lisp
  7.     :cl-ppcre))
  8.  
  9. (in-package :syllable-generator)
  10.  
  11. (defparameter *spelling-schemes* '(:common :decorated :pointed)
  12.   "The order for the orthogrpahic convention list.")
  13.  
  14. (defvar *default-orthographic-style* :common
  15.   "Special variable that holds the default orthographic style.")
  16.  
  17. (defvar *delimiter-limit* 6
  18.   "Determines how long a syllable can be before it gets squared off by hyphens. Use NIL for no hyphens.")
  19.  
  20. (defclass phoneme ()
  21.   ((ipa :initarg :ipa
  22.     :accessor pronounce)
  23.    (orth :initarg :orthography)
  24.    (name :accessor name)))
  25.  
  26. (defmethod initialize-instance :after ((instance phoneme) &key)
  27.   (with-slots (name orth) instance
  28.     (setf name (cdr (first orth)))))
  29.  
  30. (defgeneric write-phoneme (phoneme)
  31.   (:documentation "Writes the phoneme with an optional variant spelling scheme"))
  32. (defmethod write-phoneme ((phoneme phoneme))
  33.   (cdr (assoc *default-orthographic-style* (slot-value phoneme 'orth))))
  34.  
  35. (defmethod print-object ((object phoneme) stream)
  36.   (with-accessors ((ipa pronounce) (orthography write-phoneme)) object
  37.     (print-unreadable-object (object stream :type t) object
  38.       (format stream "/~a/ ‹~a›" ipa orthography))))
  39.  
  40. (defun make-phoneme-list (phoneme-list)
  41.   (destructuring-bind (probability orthographies ipa) phoneme-list
  42.     (list probability
  43.       (make-instance 'phoneme
  44.              :orthography (loop for spelling-scheme in *spelling-schemes*
  45.                         for orthography     in orthographies
  46.                         collect (cons spelling-scheme orthography))
  47.              :ipa ipa))))
  48.  
  49. (defvar *phonemes* (make-hash-table)
  50.   "Holds phoneme collections with a probability in each one.")
  51.  
  52. (progn ; The progn is here for ease of C-c C-c compilation.
  53.   (defparameter *phoneme-data*
  54.     '((:vowels
  55.        (160 ("a"  "a" "a")  "ɑ" )  (90  ("e"  "e" "e" ) "ɛ" ) (160 ("i"  "i" "i" ) "i" ) (150 ("o"  "o" "o" ) "ɔ" ) (140 ("u"  "u" "u")  "ʊ" )
  56.        (50  ("ae" "ā" "-a") "ɑː")  (70  ("ee" "ē" "-e") "ɛː") (50  ("ij" "ī" "-i") "iː") (100 ("ou" "ō" "-o") "ɔː") (30  ("vu" "ō" "-u") "ʊː"))
  57.       (:vowels-for-y
  58.        (160 ("a"  "a" "a")  "ɑ" )  (90  ("e"  "e" "e" ) "ɛ" ) (100 ("o"  "o" "o" ) "ɔ" ) (140 ("u"  "u" "u" ) "ʊ" )
  59.        (150 ("ae" "ā" "-a") "ɑː")  (150 ("ee" "ē" "-e") "ɛː") (100 ("ou" "ō" "-o") "ɔː") (110 ("vu" "ō" "-u") "ʊː"))
  60.       (:vowels-for-w
  61.        (160 ("a"  "a" "a")  "ɑ" )  (90  ("e"  "e" "e" ) "ɛ" ) (160 ("i"  "i" "i" ) "i" ) (150 ("o"  "o" "o" ) "ɔ" )
  62.        (120 ("ae" "ā" "-a") "ɑː")  (95  ("ee" "ē" "-e") "ɛː") (125 ("ij" "ī" "-i") "iː") (100 ("ou" "ō" "-o") "ɔː"))
  63.       (:approximants
  64.        (550 ("y"  "y" "y") "j") (450 ("w"  "w" "w") "w"))
  65.       (:plosives
  66.        (122 ("p"  "p" "p") "p") (116 ("t"  "t" "t") "t") (122 ("k"  "k" "k") "k")
  67.        (111 ("b"  "b" "b") "b") (108 ("d"  "d" "d") "d") (111 ("g"  "g" "g") "g")
  68.        (70  ("h"  "h" "h") "h") (120 ("m"  "m" "m") "m") (120 ("n"  "n" "n") "n"))
  69.       (:plosives-fricatives
  70.        (122 ("p"  "p" "p") "p") (116 ("t"  "t" "t") "t") (127 ("k"  "k" "k") "k")
  71.        (111 ("b"  "b" "b") "b") (108 ("d"  "d" "d") "d") (116 ("g"  "g" "g") "g")
  72.        (150 ("m"  "m" "m") "m") (150 ("n"  "n" "n") "n"))
  73.       (:modified-plosives
  74.        (185 ("pp" "ṕ" "-p") "pʰ") (170 ("tt" "ẗ" "-t") "tʰ") (185 ("kk" "ḱ" "-k") "kʰ")
  75.        (185 ("bb" "ḇ" "-b") "bʷ") (145 ("dd" "ḏ" "-d") "dʷ") (165 ("gg" "ḡ" "-g") "gʷ"))
  76.       (:fricatives
  77.        (180 ("th" "þ" "t,") "θ") (240 ("f"  "f"  "f") "f") (140 ("s"  "s" "s" ) "s")
  78.        (140 ("x"  "x" "x") "x")  (180 ("ss" "š" "s.") "ʃ"))
  79.       (:rhotics
  80.        (180 ("rl" "ľ" "l.") "ɾ") (320 ("l"  "l" "l" ) "l") (180 ("r"  "r" "r" ) "ɹ") (320 ("rr" "ř" "r.") "r"))
  81.       (:rhotics-restricted
  82.        (650 ("l"  "l" "l" ) "l") (350 ("r"  "r" "r" ) "ɹ"))
  83.       (:front-glide
  84.        (1000 ("i"  "i" "i" ) "i"))
  85.       (:back-glide
  86.        (1000 ("w"  "w" "w" ) "w")))
  87.   "The groups of phonemes that make up a syllable, and how often each phoneme represents the group it's in.")
  88.  
  89.   (loop for (key . args) in *phoneme-data* do (setf (gethash key *phonemes*) (mapcar #'make-phoneme-list args))))
  90.                     ; Filling in the phoneme hashtable using *phoneme-data*
  91.  
  92. (defparameter *slot-probability-data*
  93.   '((:onset-unrestricted
  94.      (300)
  95.      (150 :plosives)          (25  :plosives-fricatives :fricatives)  (25 :plosives :rhotics)
  96.      (90  :modified-plosives) (60  :modified-plosives :rhotics)
  97.      (90  :fricatives)        (60  :fricatives :rhotics)
  98.      (100 :approximants)      (100 :approximants :rhotics-restricted))
  99.     (:onset-limited
  100.      (300)
  101.      (200 :plosives)
  102.      (150 :modified-plosives)
  103.      (150 :fricatives)
  104.      (200 :approximants))
  105.     (:onset-none (1000))
  106.     (:nucleus
  107.      (200 :front-glide :vowels-for-y)
  108.      (600 :vowels)
  109.      (200 :vowels-for-w :back-glide))
  110.     (:coda
  111.      (500)
  112.      (200 :plosives-fricatives) (200 :plosives-fricatives :fricatives)
  113.      (100 :modified-plosives)
  114.      (100 :fricatives)))
  115.   "The probability of phoneme combination occupying each slot in the syllable.")
  116.  
  117. (defparameter *respelling-regexes-data*
  118.   ((lambda (regex-strings)
  119.      (loop for (regex . args) in regex-strings
  120.        collect (cons (create-scanner regex :case-insensitive-mode t) args)))
  121.    '(("^kk" "kh") ("^gg" "qu") ("^pp" "ph") ("^bb" "bw") ("^tt" "tj") ("^dd" "dw")
  122.      ("kk$" "c") ("x$" "ch")
  123.      ("kk([tdkgpbrles])\\1" "kh\\1\\1") ("gg([tdkgpbrles])\\1" "gu\\1\\1")
  124.      ("tt([tdkgpbrles])\\1" "tj\\1\\1") ("dd([tdkgpbrles])\\1" "du\\1\\1")
  125.      ("pp([tdkgpbrles])\\1" "ph\\1\\1") ("bb([tdkgpbrles])\\1" "bu\\1\\1")
  126.      ("rr([tdkgpbrles])\\1" "rh\\1\\1") ("ll([tdkgpbrles])\\1" "lj\\1\\1")
  127.      ("ss([tdkgpbrles])\\1" "sz\\1\\1")
  128.      ("eekk" "eekh") ("eegg" "eegu") ("eett" "eetj") ("eedd" "eedu")
  129.      ("eepp" "eeph") ("eebb" "eebu") ("eerr" "eerh") ("eell" "eelj")
  130.      ("tsss" "cchs") ("dsss" "djs") ("tss([^s])" "cch\\1") ("dss([^s])" "dj\\1")
  131.      ("tss$" "cch") ("dss$" "dj")
  132.      ("^(p|t|k|ph|tj|kh|b|d|g|bw|dw|qu|h|m|n)ss" "\\1sc")
  133.      ("([tdkgpblreus])\\1\\1\\1?" "\\1\\1")
  134.      ("h(rr?|ll?)" "\\1" 3/4)))
  135.   "Regex respelling. Uses the form (from-string to-string [probability of action]).")
  136.  
  137. ;;; -----
  138.  
  139. (defun select-from (candidate-list &optional initial-die-value)
  140.     (loop for (weight . candidate) in candidate-list
  141.       for die = (- (or initial-die-value
  142.                (random (reduce #'+ candidate-list :key #'first)))
  143.                weight)
  144.         then (- die weight)
  145.       ; do (format t "~&~a" die)
  146.       when (minusp die)
  147.         return candidate
  148.       finally (return-from select-from candidate)))
  149.  
  150. (defun make-syllable (&rest syllable-slots)
  151.   (loop for syllable-slot in syllable-slots
  152.     append (loop for phoneme-set in (select-from (cdr (assoc syllable-slot *slot-probability-data*)))
  153.          append (select-from (gethash phoneme-set *phonemes*)))))
  154.  
  155. (defun spell-syllable (syllable-list)
  156.   (reduce #'(lambda (p q) (concatenate 'string p q))
  157.       (mapcar #'write-phoneme syllable-list)))
  158.  
  159. (defun make-word ()
  160.   (loop for counter from 1 to 20
  161.     for probability-for-next-syllable = 16/25
  162.       then (* probability-for-next-syllable (expt 16/25 counter))
  163.                     ; formula: If a syllable is the nth in a word,
  164.                     ; the probability that another syllable follows it is 0.64^n.
  165.     collect (make-syllable :onset-unrestricted :nucleus :coda) into result
  166.     ; do (format t "~&~a, ~a" (length (first result)) (length result))
  167.     until (and (> (random 1.0) probability-for-next-syllable)
  168.            (or (< 1 (length result))
  169.                (< 1 (length (first result)))))
  170.     finally (return-from make-word result)))
  171.  
  172.  
  173. (defun spell-word (word &optional (delimiter "-"))
  174.   (let ((initial-word (mapcar #'(lambda (p) (apply #'concatenate 'string (mapcar #'write-phoneme p))) word))
  175.     (delimiter-regex (create-scanner (format nil "^~a|~:*~a$" delimiter)))
  176.     (double-delimiter-regex (create-scanner (format nil "~a+" delimiter))))
  177.     (if (eql *default-orthographic-style* :common)
  178.     ;;; The common style requires processing through a list of regexen.
  179.     (loop with hyphenated-word = (if *delimiter-limit*
  180.                      (regex-replace-all
  181.                       double-delimiter-regex
  182.                       (regex-replace-all
  183.                        delimiter-regex
  184.                        (apply #'concatenate 'string
  185.                           (mapcar #'(lambda (p)
  186.                                   (if (< *delimiter-limit* (length p))
  187.                                   (concatenate 'string delimiter p delimiter)
  188.                                   p)) initial-word))
  189.                        "")
  190.                       delimiter)
  191.                      (apply #'concatenate 'string initial-word))
  192.           for (from to probability) in *respelling-regexes-data*
  193.           for word = (if (< (random 1.0) (or probability 1))
  194.                 (regex-replace-all from hyphenated-word to)
  195.                 word)
  196.         then (if (< (random 1.0) (or probability 1))
  197.              (regex-replace-all from word to)
  198.              word)
  199.           finally (return-from spell-word word)))
  200.     ;;; Other styles don't need to be respelled, so they just need to be concatenated together
  201.     (if *delimiter-limit*
  202.         (regex-replace-all
  203.          delimiter-regex
  204.          (apply #'concatenate 'string
  205.             (mapcar #'(lambda (p)
  206.                 (if (< *delimiter-limit* (length p))
  207.                     (concatenate 'string delimiter p delimiter)
  208.                     p)) initial-word))
  209.          "")
  210.         initial-word))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement