Guest User

hyphens.lisp

a guest
Jun 14th, 2018
215
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lisp 6.17 KB | None | 0 0
  1. ;;; [2018-06-13] Challenge #363 [Intermediate] Word Hy-phen-a-tion By Com-put-er
  2. ;;; /r/dailyprogrammer https://redd.it/8qxpqd
  3. ;;;
  4. ;;; hyphenate.lisp - hyphenate words using Liang's Algorithm
  5. ;;;
  6. ;;; uses TeX patterns but not trie data structure to search for them
  7. ;;;
  8. ;;; DOES NOT validate that patterns are valid (i.e. number before starting ".")
  9. ;;; fails this test from challenge page: ceremony => cer-e-mo-ny
  10. ;;;
  11. ;;; more failure: $ cat our-hyphens.txt | tr -d [:alpha:] | sort | uniq -c
  12. ;;;
  13. ;;; no. of - | should be | but we get
  14. ;;; -------- * --------- * ----------
  15. ;;; 0        | 21830     | 22464
  16. ;;; 1        | 56852     | 56554
  17. ;;; 2........+ 50452.....| 50078.....
  18. ;;; 3        | 26630     | 26650
  19. ;;; 4        | 11751     | 11757
  20. ;;; 5........+  4044.....|  4055.....
  21. ;;; 6        |  1038     |  1037
  22. ;;; 7        |   195     |   194
  23. ;;; 8........+    30.....|    30.....
  24. ;;; 9        |     1     |     1
  25. ;;; ==================================
  26.  
  27. (defpackage :Liang
  28.   (:use :common-lisp)
  29.   (:export :load-hyphen-patterns :hyphenate :join))
  30.  
  31. (in-package :Liang)
  32.  
  33. ;;a day or so were wasted not understanding Common Lisp #'subseq
  34. ;;the &optional argument is 'END not 'len as in every other scripting language... :/
  35. (defun my-subseq (seq start len)
  36.   (break)
  37.   (if (>= start (length seq))
  38.       (format t "XXX! seq=~a, start=~a, len=~a~%" seq start len)
  39.     (subseq seq start len)))
  40.  
  41.  
  42. (defparameter *substr-lengths* nil) ;facilitates (dolist (x *substr-lengths*) ...) iteration
  43.  
  44. (defparameter *patterns* (make-hash-table :test #'equal)) ;Liang patterns indexed by prefix substrings
  45.  
  46. (defun strip-digits (str)
  47.   (remove-if #'digit-char-p str))
  48.  
  49. ;https://lispnews.wordpress.com/2018/01/15/reading-a-file-line-by-line-revisited/
  50. ;http://sodaware.sdf.org/notes/cl-read-file-into-string/
  51.  
  52. (defun load-hyphen-patterns (file-path)
  53.   ;;reset globals in case opening another pattern file
  54.   (let ((-min-substr-length- 99)
  55.     (-max-substr-length- -99))
  56.     (setq *substr-lengths* nil)
  57.     (setq *patterns* (make-hash-table :test #'equal))
  58.     (with-open-file (fin file-path)
  59.       (loop for pat = (read-line fin nil)
  60.      while pat do
  61.        (let* ((key (strip-digits pat))
  62.           (len (length key)))
  63.          (setf (gethash key *patterns*) pat)
  64.          (when (< len -min-substr-length-) (setq -min-substr-length- len))
  65.          (when (> len -max-substr-length-) (setq -max-substr-length- len)))))
  66.     (dotimes (x (1+ -max-substr-length-) t)
  67.       (when (>= x -min-substr-length-) (push x *substr-lengths*)))))
  68.  
  69. (defun generate-prefixes (str start)
  70.   (let ((len (- (length str) start))
  71.     (prefixes nil))
  72.     (dolist (x *substr-lengths* prefixes)
  73.       (when (< x len) (push (subseq str start (+ start x)) prefixes)))))
  74.  
  75. ;;returns an updated marks array, eventually the slots with odd numbers will be hyphen breaks
  76. ;;marks is updated from starting offset as the string is iterated through
  77.  
  78. (defun scan-patterns (str marks start)
  79.   (let ((prefixes (generate-prefixes str start)))
  80.     (dolist (key prefixes marks)
  81.       (let ((pattern (gethash key *patterns*))
  82.         (pos start)
  83.         (n nil))
  84.     (when pattern
  85.       (dotimes (x (length pattern) marks)
  86.         (setq n (digit-char-p (char pattern x)))
  87.         (if n
  88.         (when (> n (aref marks pos)) (setf (aref marks pos) n))
  89.         (incf pos))))))))
  90.  
  91. (defun analyze-word (word)
  92.   (let* ((len (length word))
  93.      (-word- (concatenate 'string "." word "."))
  94.      (-marks- (make-array (length -word-) :initial-element 0)))
  95.     (dotimes (i (length -word-) (subseq -marks- 1 (+ 1 len)))
  96.       (setq -marks- (scan-patterns -word- -marks- i)))))
  97.  
  98. ;;--- a few helper functions and then hyphenate ---
  99.  
  100. ;;this function is broken, but the magic snippet works as an inline statement...
  101. ;;let's make it a macro in case that helps then
  102. (defmacro join (&rest strings)
  103.   `(format nil "~{~a~^-~}" ,@strings))
  104.  
  105. ;;there should be a way to iterate a filtered marks array but we lisp newbies...
  106. (defun split-by-pos-list (str start pos-list)
  107.   (let ((end (car pos-list)))
  108.     (when start
  109.       (cons (subseq str start end)
  110.         (split-by-pos-list str end (cdr pos-list))))))
  111.  
  112. (defun generate-pos-list (marks-array)
  113.   (let ((ls nil)
  114.     (len (length marks-array)))
  115.     (dotimes (x len)
  116.       (let ((pos (- len x 1)))
  117.     (when (oddp (aref marks-array pos))
  118.       (push pos ls))))
  119.     ls))
  120.  
  121.  
  122. (defun hyphenate (word)
  123.   (let* ((marks (analyze-word word))
  124.      (pos-list (generate-pos-list marks))
  125.      (syllables (split-by-pos-list word 0 pos-list)))
  126.     ;;without this words like "programmer" are okay but "capable" isn't...
  127.     ;;probably a bug elsewhere!
  128.     (remove-if #'(lambda (x) (equal x "")) syllables)))
  129.  
  130.  
  131. ;;;------------ MAIN ------------
  132.  
  133. (in-package :user)
  134.  
  135. (defconstant +how-show+ 'stdout)
  136. (defconstant +start-time+ (get-internal-run-time))
  137.  
  138. ;;takes between 0.05 to 0.09 seconds
  139. ;;clisp runs much faster than ecl but the file load is comparable
  140.  
  141. (Liang:load-hyphen-patterns #P"/sdcard/tex-hyphenation-patterns.txt")
  142.  
  143. ;;takes about ??? seconds
  144. ;;clisp: approximately 115 seconds 'stdout
  145. ;;       approximately 103 seconds 'dev/null
  146. ;;ecl:   approximately  40 seconds 'stdout <-- wall time over 4 minutes, the internal clock gives "runtime" not io bound :/
  147. ;;ecl:   approximately  38 seconds 'dev/null
  148. ;;
  149. ;;ecl mobile (on local device, no network larency):
  150. ;;  with 'stdout
  151. ;;    All that in 166.44 seconds!
  152. ;;    real    5m45.929s
  153. ;;    user    2m47.250s
  154. ;;    sys     2m33.160s
  155. ;;  with 'dev/null
  156. ;;    All that in 132.17 seconds!
  157. ;;    real    4m11.379s
  158. ;;    user    2m13.020s
  159. ;;    sys     1m58.120s
  160.  
  161. (unless (eq +how-show+ 'repl)
  162.  
  163. (with-open-file (word-list #P"/sdcard/enable1.txt")
  164.   (loop for word = (read-line word-list nil)
  165.      while word do
  166.        (case +how-show+
  167.      ;#'Liang:join isn't working correctly... we just print the syllable list
  168.      ('stdout (write-line (Liang:join (Liang:hyphenate word))))
  169.      ('dev/null (Liang:hyphenate word))
  170.      (otherwise (error "set +how-out+ to either 'stdout or 'dev/null")))))
  171.  
  172. ) ;end non-repl
  173.  
  174. (defconstant +finish-time+ (get-internal-run-time))
  175.  
  176. (defconstant +diff-time+ (- +finish-time+ +start-time+))
  177. (defconstant +seconds+ (/ +diff-time+ internal-time-units-per-second))
  178. (format t "All that in ~f seconds!~%" +seconds+)
Add Comment
Please, Sign In to add comment