Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ;;;; Syllable generator
- (ql:quickload :cl-ppcre)
- (defpackage :syllable-generator
- (:use :common-lisp
- :cl-ppcre))
- (in-package :syllable-generator)
- (defparameter *spelling-schemes* '(:common :decorated :pointed)
- "The order for the orthogrpahic convention list.")
- (defvar *default-orthographic-style* :common
- "Special variable that holds the default orthographic style.")
- (defvar *delimiter-limit* 6
- "Determines how long a syllable can be before it gets squared off by hyphens. Use NIL for no hyphens.")
- (defclass phoneme ()
- ((ipa :initarg :ipa
- :accessor pronounce)
- (orth :initarg :orthography)
- (name :accessor name)))
- (defmethod initialize-instance :after ((instance phoneme) &key)
- (with-slots (name orth) instance
- (setf name (cdr (first orth)))))
- (defgeneric write-phoneme (phoneme)
- (:documentation "Writes the phoneme with an optional variant spelling scheme"))
- (defmethod write-phoneme ((phoneme phoneme))
- (cdr (assoc *default-orthographic-style* (slot-value phoneme 'orth))))
- (defmethod print-object ((object phoneme) stream)
- (with-accessors ((ipa pronounce) (orthography write-phoneme)) object
- (print-unreadable-object (object stream :type t) object
- (format stream "/~a/ ‹~a›" ipa orthography))))
- (defun make-phoneme-list (phoneme-list)
- (destructuring-bind (probability orthographies ipa) phoneme-list
- (list probability
- (make-instance 'phoneme
- :orthography (loop for spelling-scheme in *spelling-schemes*
- for orthography in orthographies
- collect (cons spelling-scheme orthography))
- :ipa ipa))))
- (defvar *phonemes* (make-hash-table)
- "Holds phoneme collections with a probability in each one.")
- (progn ; The progn is here for ease of C-c C-c compilation.
- (defparameter *phoneme-data*
- '((:vowels
- (160 ("a" "a" "a") "ɑ" ) (90 ("e" "e" "e" ) "ɛ" ) (160 ("i" "i" "i" ) "i" ) (150 ("o" "o" "o" ) "ɔ" ) (140 ("u" "u" "u") "ʊ" )
- (50 ("ae" "ā" "-a") "ɑː") (70 ("ee" "ē" "-e") "ɛː") (50 ("ij" "ī" "-i") "iː") (100 ("ou" "ō" "-o") "ɔː") (30 ("vu" "ō" "-u") "ʊː"))
- (:vowels-for-y
- (160 ("a" "a" "a") "ɑ" ) (90 ("e" "e" "e" ) "ɛ" ) (100 ("o" "o" "o" ) "ɔ" ) (140 ("u" "u" "u" ) "ʊ" )
- (150 ("ae" "ā" "-a") "ɑː") (150 ("ee" "ē" "-e") "ɛː") (100 ("ou" "ō" "-o") "ɔː") (110 ("vu" "ō" "-u") "ʊː"))
- (:vowels-for-w
- (160 ("a" "a" "a") "ɑ" ) (90 ("e" "e" "e" ) "ɛ" ) (160 ("i" "i" "i" ) "i" ) (150 ("o" "o" "o" ) "ɔ" )
- (120 ("ae" "ā" "-a") "ɑː") (95 ("ee" "ē" "-e") "ɛː") (125 ("ij" "ī" "-i") "iː") (100 ("ou" "ō" "-o") "ɔː"))
- (:approximants
- (550 ("y" "y" "y") "j") (450 ("w" "w" "w") "w"))
- (:plosives
- (122 ("p" "p" "p") "p") (116 ("t" "t" "t") "t") (122 ("k" "k" "k") "k")
- (111 ("b" "b" "b") "b") (108 ("d" "d" "d") "d") (111 ("g" "g" "g") "g")
- (70 ("h" "h" "h") "h") (120 ("m" "m" "m") "m") (120 ("n" "n" "n") "n"))
- (:plosives-fricatives
- (122 ("p" "p" "p") "p") (116 ("t" "t" "t") "t") (127 ("k" "k" "k") "k")
- (111 ("b" "b" "b") "b") (108 ("d" "d" "d") "d") (116 ("g" "g" "g") "g")
- (150 ("m" "m" "m") "m") (150 ("n" "n" "n") "n"))
- (:modified-plosives
- (185 ("pp" "ṕ" "-p") "pʰ") (170 ("tt" "ẗ" "-t") "tʰ") (185 ("kk" "ḱ" "-k") "kʰ")
- (185 ("bb" "ḇ" "-b") "bʷ") (145 ("dd" "ḏ" "-d") "dʷ") (165 ("gg" "ḡ" "-g") "gʷ"))
- (:fricatives
- (180 ("th" "þ" "t,") "θ") (240 ("f" "f" "f") "f") (140 ("s" "s" "s" ) "s")
- (140 ("x" "x" "x") "x") (180 ("ss" "š" "s.") "ʃ"))
- (:rhotics
- (180 ("rl" "ľ" "l.") "ɾ") (320 ("l" "l" "l" ) "l") (180 ("r" "r" "r" ) "ɹ") (320 ("rr" "ř" "r.") "r"))
- (:rhotics-restricted
- (650 ("l" "l" "l" ) "l") (350 ("r" "r" "r" ) "ɹ"))
- (:front-glide
- (1000 ("i" "i" "i" ) "i"))
- (:back-glide
- (1000 ("w" "w" "w" ) "w")))
- "The groups of phonemes that make up a syllable, and how often each phoneme represents the group it's in.")
- (loop for (key . args) in *phoneme-data* do (setf (gethash key *phonemes*) (mapcar #'make-phoneme-list args))))
- ; Filling in the phoneme hashtable using *phoneme-data*
- (defparameter *slot-probability-data*
- '((:onset-unrestricted
- (300)
- (150 :plosives) (25 :plosives-fricatives :fricatives) (25 :plosives :rhotics)
- (90 :modified-plosives) (60 :modified-plosives :rhotics)
- (90 :fricatives) (60 :fricatives :rhotics)
- (100 :approximants) (100 :approximants :rhotics-restricted))
- (:onset-limited
- (300)
- (200 :plosives)
- (150 :modified-plosives)
- (150 :fricatives)
- (200 :approximants))
- (:onset-none (1000))
- (:nucleus
- (200 :front-glide :vowels-for-y)
- (600 :vowels)
- (200 :vowels-for-w :back-glide))
- (:coda
- (500)
- (200 :plosives-fricatives) (200 :plosives-fricatives :fricatives)
- (100 :modified-plosives)
- (100 :fricatives)))
- "The probability of phoneme combination occupying each slot in the syllable.")
- (defparameter *respelling-regexes-data*
- ((lambda (regex-strings)
- (loop for (regex . args) in regex-strings
- collect (cons (create-scanner regex :case-insensitive-mode t) args)))
- '(("^kk" "kh") ("^gg" "qu") ("^pp" "ph") ("^bb" "bw") ("^tt" "tj") ("^dd" "dw")
- ("kk$" "c") ("x$" "ch")
- ("kk([tdkgpbrles])\\1" "kh\\1\\1") ("gg([tdkgpbrles])\\1" "gu\\1\\1")
- ("tt([tdkgpbrles])\\1" "tj\\1\\1") ("dd([tdkgpbrles])\\1" "du\\1\\1")
- ("pp([tdkgpbrles])\\1" "ph\\1\\1") ("bb([tdkgpbrles])\\1" "bu\\1\\1")
- ("rr([tdkgpbrles])\\1" "rh\\1\\1") ("ll([tdkgpbrles])\\1" "lj\\1\\1")
- ("ss([tdkgpbrles])\\1" "sz\\1\\1")
- ("eekk" "eekh") ("eegg" "eegu") ("eett" "eetj") ("eedd" "eedu")
- ("eepp" "eeph") ("eebb" "eebu") ("eerr" "eerh") ("eell" "eelj")
- ("tsss" "cchs") ("dsss" "djs") ("tss([^s])" "cch\\1") ("dss([^s])" "dj\\1")
- ("tss$" "cch") ("dss$" "dj")
- ("^(p|t|k|ph|tj|kh|b|d|g|bw|dw|qu|h|m|n)ss" "\\1sc")
- ("([tdkgpblreus])\\1\\1\\1?" "\\1\\1")
- ("h(rr?|ll?)" "\\1" 3/4)))
- "Regex respelling. Uses the form (from-string to-string [probability of action]).")
- ;;; -----
- (defun select-from (candidate-list &optional initial-die-value)
- (loop for (weight . candidate) in candidate-list
- for die = (- (or initial-die-value
- (random (reduce #'+ candidate-list :key #'first)))
- weight)
- then (- die weight)
- ; do (format t "~&~a" die)
- when (minusp die)
- return candidate
- finally (return-from select-from candidate)))
- (defun make-syllable (&rest syllable-slots)
- (loop for syllable-slot in syllable-slots
- append (loop for phoneme-set in (select-from (cdr (assoc syllable-slot *slot-probability-data*)))
- append (select-from (gethash phoneme-set *phonemes*)))))
- (defun spell-syllable (syllable-list)
- (reduce #'(lambda (p q) (concatenate 'string p q))
- (mapcar #'write-phoneme syllable-list)))
- (defun make-word ()
- (loop for counter from 1 to 20
- for probability-for-next-syllable = 16/25
- then (* probability-for-next-syllable (expt 16/25 counter))
- ; formula: If a syllable is the nth in a word,
- ; the probability that another syllable follows it is 0.64^n.
- collect (make-syllable :onset-unrestricted :nucleus :coda) into result
- ; do (format t "~&~a, ~a" (length (first result)) (length result))
- until (and (> (random 1.0) probability-for-next-syllable)
- (or (< 1 (length result))
- (< 1 (length (first result)))))
- finally (return-from make-word result)))
- (defun spell-word (word &optional (delimiter "-"))
- (let ((initial-word (mapcar #'(lambda (p) (apply #'concatenate 'string (mapcar #'write-phoneme p))) word))
- (delimiter-regex (create-scanner (format nil "^~a|~:*~a$" delimiter)))
- (double-delimiter-regex (create-scanner (format nil "~a+" delimiter))))
- (if (eql *default-orthographic-style* :common)
- ;;; The common style requires processing through a list of regexen.
- (loop with hyphenated-word = (if *delimiter-limit*
- (regex-replace-all
- double-delimiter-regex
- (regex-replace-all
- delimiter-regex
- (apply #'concatenate 'string
- (mapcar #'(lambda (p)
- (if (< *delimiter-limit* (length p))
- (concatenate 'string delimiter p delimiter)
- p)) initial-word))
- "")
- delimiter)
- (apply #'concatenate 'string initial-word))
- for (from to probability) in *respelling-regexes-data*
- for word = (if (< (random 1.0) (or probability 1))
- (regex-replace-all from hyphenated-word to)
- word)
- then (if (< (random 1.0) (or probability 1))
- (regex-replace-all from word to)
- word)
- finally (return-from spell-word word)))
- ;;; Other styles don't need to be respelled, so they just need to be concatenated together
- (if *delimiter-limit*
- (regex-replace-all
- delimiter-regex
- (apply #'concatenate 'string
- (mapcar #'(lambda (p)
- (if (< *delimiter-limit* (length p))
- (concatenate 'string delimiter p delimiter)
- p)) initial-word))
- "")
- initial-word))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement