Advertisement
Guest User

68000 regex compiler v2

a guest
May 4th, 2016
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ;; the main advantage of bypassing a general-purpose compiler is in the ability to change an algorithm to a very low-level representation
  2. ;; and writing a custom code-generator can allow you to automate the generation of high-performance code designed for a specific task that a more general purpose compiler could not match
  3.  
  4. ;; this program is a code generator for matching regular languages (simple regex), outputting self-modifying 68000 code
  5. ;; A equivalent program written in C and compiled in gcc or any 68k C compiler would create much more bloat and run far slower.
  6. ;; This is probably not fair, as an x86 compiler would be much more modern, and likely fare far better, however it would never have known to use the self-modifying code trick I used.
  7. ;; If I were to use something similar in a professional product, I would probably use an equivalent written in C unless I knew heavy optimization was necessary.  Not many 68000 products are around nowadays though.
  8.  
  9.  
  10.  
  11. ;; Basically, these functions generate a list of states for all of the currently evaluating states in a NFA, and at run-time we step through them in lock-step
  12. ;; if a state succeeds (matching a character in this case), it writes the address of the next state to the next-state list
  13. ;; if it fails, it jumps to the next state in the current-state list.
  14.  
  15. ;; at the end of the current state list, we swap buffers and begin evaluating the next states, overwriting the old states to generate the next-next states.
  16.  
  17. ;; this process ends once one of the states jumps to SUCCESS (marking a full successful match), or TRUE_FAIL (meaning all states have failed to match)
  18.  
  19. ;; the code driver required to run the generated code is here: http://pastebin.com/bJNZKxW5
  20. ;; and examples are here: http://pastebin.com/DDY3PmMd
  21. ;; all generated code is for the Easy68k IDE, as it's the easiest to quickly test and debug with.
  22.  
  23. ;; this compiler is modeled after the one here: http://www.fing.edu.uy/inco/cursos/intropln/material/p419-thompson.pdf ,
  24. ;; but uses a lispy trick with branch continuations
  25.  
  26. (defun compile-regex (regex &optional
  27.                               (success-cont-label 'success)
  28.                               (failure-cont-label 'failure))
  29.  
  30.   (let* ((label (gensym "label"))
  31.          (code (etypecase regex
  32.                  (character
  33.                   (compile-character regex
  34.                                      label
  35.                                      success-cont-label
  36.                                      failure-cont-label))
  37.                  (list
  38.                   (destructuring-bind (operator &rest operands) regex
  39.                     (ecase operator
  40.                       ((or :or union :union)
  41.                        (compile-or operands success-cont-label failure-cont-label))
  42.                       ((and :and and :and)
  43.                        (compile-and (first operands)
  44.                                     (second operands)
  45.                                     success-cont-label
  46.                                     failure-cont-label))
  47.                       ((kleene :kleene
  48.                         closure :closure
  49.                         * :*)
  50.                        (compile-kleene operands label success-cont-label failure-cont-label))
  51.  
  52.                       ((plus :plus
  53.                         :+ +)
  54.                        (compile-regex
  55.                         `(:and ,(first operands)
  56.                                (:kleene ,(first operands)))
  57.                         success-cont-label
  58.                         'fail))))))))
  59.    
  60.     (values (cons (format nil "~a:" label) code) label)))
  61.  
  62. (defun compile-and (a b success-cont-label failure-cont-label)
  63.     (multiple-value-bind (b-code b-label)
  64.         (compile-regex b success-cont-label
  65.                        failure-cont-label)
  66.       (multiple-value-bind (a-code a-label)
  67.           (compile-regex a b-label
  68.                          failure-cont-label)
  69.         (append a-code b-code))))
  70.  
  71. (defun compile-character (regex cur-label success-cont-label failure-cont-label)
  72.   (append
  73.    (list (format nil "    cmp #'~a', d0" regex))
  74.    (case failure-cont-label
  75.      (success (list (format nil "    bne FAIL_SUCCESS")))
  76.      ;(failure (list (format nil "    bne FAILURE")))
  77.      (t (list (format nil "    bne FAIL"))))
  78.    (case success-cont-label
  79.      (success
  80.       (list (format nil "    bra SUCCESS")))
  81.  
  82.      (recur
  83.       (list (format nil "    move.w #~a, (a1)+" cur-label)
  84.             (format nil "    move.w #swap, (a1)+")
  85.             (format nil "    cmp d4, a1")
  86.             (format nil "    bge EXPENDED_MEMORY")
  87.             (format nil "    NEXT")))
  88.      (otherwise
  89.       (list (format nil "    move.w #~a, (a1)+" success-cont-label)
  90.             (format nil "    move.w #swap, (a1)+")
  91.             (format nil "    cmp d4, a1")
  92.             (format nil "    bge EXPENDED_MEMORY")
  93.             (format nil "    NEXT"))))))
  94.  
  95.  
  96. (defun compile-or (operands success-cont-label failure-cont-label)
  97.     (multiple-value-bind (a-code a-label)
  98.         (compile-regex (first operands)
  99.                        success-cont-label
  100.                        'fail)
  101.       (multiple-value-bind (b-code b-label)
  102.           (compile-regex (second operands)
  103.                          success-cont-label
  104.                          'fail)
  105.         (append
  106.          (list
  107.           (format nil "    move.w #~a, -(a0)" b-label)
  108.           (format nil "    bra ~a" a-label))
  109.          a-code
  110.          b-code))))
  111.  
  112. (defun compile-kleene (operands current-label
  113.                        success-cont-label
  114.                        failure-cont-label)
  115.     (multiple-value-bind (a-code a-label)
  116.         (compile-regex (first operands)
  117.                        'recur
  118.                        success-cont-label)
  119.       (if (eq 'success success-cont-label)
  120.           (cons
  121.            (format nil "    moveq #1, d7")
  122.            a-code)
  123.           (append
  124.            (list
  125.             (format nil "    subq.l #2, a0")
  126.             (format nil "    move.w #~a, (a0)"
  127.                     (case success-cont-label
  128.                       (recur current-label)
  129.                       (otherwise success-cont-label)))
  130.              
  131.             (format nil "    bra ~a" a-label))
  132.            a-code))))
  133.  
  134.  
  135.  
  136. (defun print-program (prog)
  137.   (loop for ex in prog do
  138.        (princ ex)
  139.        (terpri)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement