Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ;; the main advantage of bypassing a general-purpose compiler is in the ability to change an algorithm to a very low-level representation
- ;; and writing a custom code-generator can allow you to automate the generation of high-performance code designed for a specific task that a more general purpose compiler could not match
- ;; this program is a code generator for matching regular languages (simple regex), outputting self-modifying 68000 code
- ;; A equivalent program written in C and compiled in gcc or any 68k C compiler would create much more bloat and run far slower.
- ;; This is probably not fair, as an x86 compiler would be much more modern, and likely fare far better, however it would never have known to use the self-modifying code trick I used.
- ;; If I were to use something similar in a professional product, I would probably use an equivalent written in C unless I knew heavy optimization was necessary. Not many 68000 products are around nowadays though.
- ;; Basically, these functions generate a list of states for all of the currently evaluating states in a NFA, and at run-time we step through them in lock-step
- ;; if a state succeeds (matching a character in this case), it writes the address of the next state to the next-state list
- ;; if it fails, it jumps to the next state in the current-state list.
- ;; at the end of the current state list, we swap buffers and begin evaluating the next states, overwriting the old states to generate the next-next states.
- ;; this process ends once one of the states jumps to SUCCESS (marking a full successful match), or TRUE_FAIL (meaning all states have failed to match)
- ;; the code driver required to run the generated code is here: http://pastebin.com/bJNZKxW5
- ;; and examples are here: http://pastebin.com/DDY3PmMd
- ;; all generated code is for the Easy68k IDE, as it's the easiest to quickly test and debug with.
- ;; this compiler is modeled after the one here: http://www.fing.edu.uy/inco/cursos/intropln/material/p419-thompson.pdf ,
- ;; but uses a lispy trick with branch continuations
- (defun compile-regex (regex &optional
- (success-cont-label 'success)
- (failure-cont-label 'failure))
- (let* ((label (gensym "label"))
- (code (etypecase regex
- (character
- (compile-character regex
- label
- success-cont-label
- failure-cont-label))
- (list
- (destructuring-bind (operator &rest operands) regex
- (ecase operator
- ((or :or union :union)
- (compile-or operands success-cont-label failure-cont-label))
- ((and :and and :and)
- (compile-and (first operands)
- (second operands)
- success-cont-label
- failure-cont-label))
- ((kleene :kleene
- closure :closure
- * :*)
- (compile-kleene operands label success-cont-label failure-cont-label))
- ((plus :plus
- :+ +)
- (compile-regex
- `(:and ,(first operands)
- (:kleene ,(first operands)))
- success-cont-label
- 'fail))))))))
- (values (cons (format nil "~a:" label) code) label)))
- (defun compile-and (a b success-cont-label failure-cont-label)
- (multiple-value-bind (b-code b-label)
- (compile-regex b success-cont-label
- failure-cont-label)
- (multiple-value-bind (a-code a-label)
- (compile-regex a b-label
- failure-cont-label)
- (append a-code b-code))))
- (defun compile-character (regex cur-label success-cont-label failure-cont-label)
- (append
- (list (format nil " cmp #'~a', d0" regex))
- (case failure-cont-label
- (success (list (format nil " bne FAIL_SUCCESS")))
- ;(failure (list (format nil " bne FAILURE")))
- (t (list (format nil " bne FAIL"))))
- (case success-cont-label
- (success
- (list (format nil " bra SUCCESS")))
- (recur
- (list (format nil " move.w #~a, (a1)+" cur-label)
- (format nil " move.w #swap, (a1)+")
- (format nil " cmp d4, a1")
- (format nil " bge EXPENDED_MEMORY")
- (format nil " NEXT")))
- (otherwise
- (list (format nil " move.w #~a, (a1)+" success-cont-label)
- (format nil " move.w #swap, (a1)+")
- (format nil " cmp d4, a1")
- (format nil " bge EXPENDED_MEMORY")
- (format nil " NEXT"))))))
- (defun compile-or (operands success-cont-label failure-cont-label)
- (multiple-value-bind (a-code a-label)
- (compile-regex (first operands)
- success-cont-label
- 'fail)
- (multiple-value-bind (b-code b-label)
- (compile-regex (second operands)
- success-cont-label
- 'fail)
- (append
- (list
- (format nil " move.w #~a, -(a0)" b-label)
- (format nil " bra ~a" a-label))
- a-code
- b-code))))
- (defun compile-kleene (operands current-label
- success-cont-label
- failure-cont-label)
- (multiple-value-bind (a-code a-label)
- (compile-regex (first operands)
- 'recur
- success-cont-label)
- (if (eq 'success success-cont-label)
- (cons
- (format nil " moveq #1, d7")
- a-code)
- (append
- (list
- (format nil " subq.l #2, a0")
- (format nil " move.w #~a, (a0)"
- (case success-cont-label
- (recur current-label)
- (otherwise success-cont-label)))
- (format nil " bra ~a" a-label))
- a-code))))
- (defun print-program (prog)
- (loop for ex in prog do
- (princ ex)
- (terpri)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement