;;; 13 support 28 lexer 38 tokenizer 78 total ;;; Globals (defconstant +eof-character+ (code-char 26)) ;;; Utilities. (defmacro aif (cond then &optional else) `(let ((-it- ,cond)) (if -it- ,then ,else))) ;;; Utilities (defun space-char? (c) (or (eql c #\space) (eql c #\tab) (eql c #\newline))) (defun ! (s) (read-char s nil +eof-character+)) (defun un! (c s) (unread-char c s)) (defun d+ (d N) (+ (* N 10) d)) (defun bclear (b) (setf (fill-pointer b) 0) b) (defun bpush (c b) (vector-push (char-upcase c) b) b) ;;; ;;; RCASE and READING: lexical analizer using tail recursive ;;; functions as states. ;;; #|| (rcase c ( ...) ...) A version of case specialize for a character, c. The of the first that matches c is taken. MATCHES - that character. - if character is the same as variable . ( . ) - if ( the-char . ) is not NIL. (or ...) - first that matches. (not ) - if doesn't match. :EOF - if stream is at end of file. (code ) - if arbitrary expression, is true. T - anthing. Must be last! ||# (defun compile-arms (arms) (cond ((null arms) 'NIL) (t `(aif ,(compile-test (caar arms)) (progn ,(cdar arms)) ,(compile-arms (cdr arms)))))) (defun compile-test (pattern) (cond ((characterp pattern) `(eql -c- ,pattern)) ((eq pattern :EOF) `(and (eql -c- +eof-character+) (eq (peek-char nil s nil :EOF) :EOF))) ((symbolp pattern) pattern) ((and (consp pattern) (eq (car pattern) 'or)) `(or ,@(mapcar #'compile-test (cdr pattern)))) ((and (consp pattern) (eq (car pattern) 'not) (null (cddr pattern))) `(not ,(compile-test (cadr pattern)))) ((and (consp pattern) (symbolp (car pattern))) `(,(car pattern) -c- ,@(cdr pattern))))) (defmacro rcase (c &body arms) `(let ((-c- ,c)) ,(compile-arms arms))) (defmacro reading ((var stream) &body states) `(let ((,var ,stream)) (labels ,(mapcar #'(lambda (state) (destructuring-bind (name args . body) state `(,name ,args (rcase ,(car args) ,@body)))) states) (,(caar states) (! ,var))))) ;;; Example tokenizer for identifiers and number separated by spaces. (let ((b (make-array 300 :element-type 'character :fill-pointer t))) (defun read-token-2 (stream) (reading (s stream) (token (-c-) ((space-char?) (token (! s))) (#\+ (int? (! s) NIL)) (#\- (int? (! s) T )) (#\. (frac (! s) NIL 0 1)) ((digit-char-p) (int (! s) NIL -it-)) ((alpha-char-p) (identifier (! s) (bpush -c- (bclear b)))) (:eof :eof)) (int? (-c- neg?) ((digit-char-p) (int (! s) neg? -it-)) (#\. (frac (! s) neg? 0 1))) (int (-c- neg? N) ((digit-char-p) (int (! s) neg? (d+ -it- N))) (#\. (frac (! s) neg? N 0)) ((or #\E #\e) (expo (! s) neg? N 0)) (t (un! -c- s) (if neg? (- N) N))) (frac (-c- neg? N scale) ((digit-char-p) (frac (! s) neg? (d+ -it- N) (- scale 1))) ((or #\E #\e) (expo (! s) neg? N scale)) (t (un! -c- s) (if neg? (- (* N (expt 10.0 scale))) (* N (expt 10.0 scale))))) (expo (-c- neg? N scale) (#\+ (expod (! s) neg? N NIL scale 0)) (#\- (expod (! s) neg? N T scale 0)) (t (expod -c- neg? N NIL scale 0))) (expod (-c- neg? N eneg? scale e) ((digit-char-p) (expod (! s) neg? N eneg? scale (d+ -it- e))) (t (un! -c- s) (let ((N (* N (expt 10.0 (+ (if eneg? (- e) e) scale))))) (if neg? (- N) N)))) (identifier (-c- buffer) ((space-char?) (intern buffer)) (t (identifier (! s) (bpush -c- buffer))))))) #|| This version uses nested labels to better model flow of control and limits of variable use. However, some tests are duplicated and it isn't as readable, though jumps have fewer arguments. ||# #+not-yet (let ((b (make-array 300 :element-type 'character :fill-pointer t))) (defun read-token-2 (stream) (reading (s stream) (token (-c-) ((int? (-c- neg?) ((int (-c- N) ((frac (-c- N scale) ((expo (-c-) ((expod (-c- eneg? e) () ((digit-char-p) (expod (! s) eneg? (d+ -it- e))) (t (un! -c- s) (let ((N (* N (expt 10.0 (+ (if eneg? (- e) e) scale))))) (if neg? (- N) N))))) (#\+ (expod (! s) NIL 0)) (#\- (expod (! s) T 0)) (t (expod -c- NIL 0))) ) ((digit-char-p) (frac (! s) (d+ -it- N) (- scale 1))) ((or #\E #\e) (expo (! s))) (t (un! -c- s) (if neg? (- (* N (expt 10.0 scale))) (* N (expt 10.0 scale))))) ) ((digit-char-p) (int (! s) (d+ -it- N))) (#\. (frac (! s) N 0)) ((or #\E #\e) (frac -c- N 0)) (t (un! -c- s) (if neg? (- N) N)))) ((digit-char-p) (int (! s) -it-)) (#\. (frac (! s) 0 1))) (identifier (-c- buffer) () ((space-char?) (intern buffer)) (t (identifier (! s) (bpush -c- buffer)))) ) ((space-char?) (token (! s))) (#\+ (int? (! s) NIL)) (#\- (int? (! s) T )) (#\. (int? -c- NIL)) ((digit-char-p) (int? -c- NIL)) ((alpha-char-p) (identifier (! s) (bpush -c- (bclear b)))) (:eof :eof)) )))