;;;; sp.scm (module sp (parse-syntax evaluate-runtime-initialization process-files +default-rule-cache-size+) (import scheme chicken) (use big-chicken honu matchable miscmacros sp-runtime) (define +default-rule-cache-size+ 10) (define (parse-syntax input #!key (source "") trace default-action debug cache-size) (let ((tokens '()) (rules '()) (*vars* '()) (*here* #f) (code '()) (refd '())) (define (top in) (match in (() (done)) (((? symbol? id) ':= (? string? rx) . more) (when (assq id tokens) (warning (conc "redefinition of token `" id "'"))) (push! (cons id (string-append "^" rx)) tokens) (top more)) (((? symbol? id) '-> . more) (top (def id more))) (((? string? fn) . more) (push! `(include ,fn) code) (top more)) (((? pair? exp) . more) (push! exp code) (top more)) (_ (error "unexpected token" source in)))) (define (sequence in) (let loop ((in in) (seq '())) (match in (() (values (reverse seq) in)) (('|:| exp . more) (loop more (cons `( ,exp) seq))) (('|\|| . more) (loop more (cons ' seq))) ((('#%parens stuff ...) . more) (loop more (cons (alternatives (sequence stuff)) seq))) ((('#%brackets stuff ...) . more) (loop more (cons `( ,(alternatives (sequence stuff))) seq))) ((('#%braces stuff ...) '+ . more) (let ((s (alternatives (sequence stuff)))) (loop more (cons `( ,s) seq)))) ((('#%braces stuff ...) . more) (let ((s (alternatives (sequence stuff)))) (loop more (cons `( ,s) seq)))) (((? (cut eq? '... <>)) . more) (loop more (cons `( ,(car seq)) (cdr seq)))) (('_ (? string? msg) . more) (loop more (cons `( ,msg) seq))) (('_ . more) (loop more (cons '( #f) seq))) (('! . more) (loop more (cons ' seq))) (('~ . more) (loop more (cons ' seq))) (((? string? s) ('#%parens '~ strs ...) . more) (loop more (cons `( ,s ,strs) seq))) (((? string? s) . more) (loop more (cons `( ,s) seq))) (((? char? c) . more) (loop more (cons `( ,(string c)) seq))) (((? symbol? var) '= . more) (loop more (cons `( ,var) seq))) (((? symbol?) (or ':= '->) . more) (values (reverse seq) in)) (((? symbol? tid) . more) (push! (cons tid *here*) refd) (loop more (cons tid seq))) (((? pair? t) . more) (loop more (cons `( ,t) seq))) (_ (error "unexpected token" source (fragment in)))))) (define (fragment in) (if (> (length in) 10) (append (take in 10) '(...)) in)) (define (def id in) (fluid-let ((*vars* '()) (*here* id)) (let-values (((seq in) (sequence in))) (let ((seq (alternatives seq #t)) (vars (delete-duplicates *vars* eq?))) (when (lr? seq id) (error (conc "left-recursion detected in rule `" id "'"))) (defrule id vars seq) in)))) (define (defrule id vars rule) (when debug (pp (list id vars rule) (current-error-port))) (when (assq id rules) (warning (conc "redefinition of rule `" id "'"))) (push! (list id vars rule) rules)) (define (lr? rule id) (let rec ((rule rule)) (match rule ((' x1 . _) (rec x1)) ((' _ x) (rec x)) ((' x) (rec x)) ((' x) (rec x)) ((' _ x) (rec x)) ((' xs ...) (any rec xs)) ((' x) (rec x)) ((? symbol? x1) (eq? x1 id)) (_ #f)))) (define (bind lst) (let loop ((lst lst)) (match lst (() '()) (((' var)) (push! var *vars*) `(( ,var ))) (((' var) exp . more) (push! var *vars*) (cons `( ,var ,exp) (loop more))) ((x . more) (cons x (loop more)))))) (define (arm lst act) ; assumes lst is reversed (match lst (((' exp) . lst) `(( ,exp ( ,@(bind (reverse lst)))))) (_ ;; if arm has only a single match that is not a token, we don't generate def. action (cond ((and (= 1 (length lst)) (or (not (symbol? (car lst))) (not (assq (car lst) tokens)))) (bind lst)) ((and act default-action) `(( (default-action) ( ,@(bind (reverse lst)))))) (else (bind (reverse lst))))))) (define (alternatives rule #!optional def) (let loop ((rule rule) (as '()) (part '())) (match rule (() (let ((p (if (null? part) ' `( ,@(arm part def))))) (if (null? as) p `( ,@(reverse (cons p as)))))) ((' . more) (loop more (cons `( ,@(arm part def)) as) '())) ((x . more) (loop more as (cons x part)))))) (define (done) (for-each (match-lambda ((id . loc) (unless (or (assq id rules) (assq id tokens)) (warning (conc loc ": reference to undefined identifier `" id "'"))))) refd) (unless (assq 'start rules) (warning "missing `start' rule")) `(begin ,@(reverse code) ,@(map (match-lambda (('whitespace . rx) `(whitespace (irregex ,rx 's 'fast))) ((id . rx) `(define ,(symbol-append id '$) (irregex ,rx 's 'fast)))) (reverse tokens)) ,@(map (match-lambda ((id vars rule) (translate id vars rule rules trace default-action))) (reverse rules)) ,(if (or (not cache-size) (positive? cache-size)) `(set! start& (let ((old start&)) (lambda (state) (fluid-let ,(map (lambda (rule) (list (symbol-append (car rule) '&&) `(make-vector ,(or cache-size +default-rule-cache-size+) #f))) rules) ,(if debug `(let ((r (old state))) ,@(map (lambda (rule) `(pp (cons ',(car rule) (vector->list ,(symbol-append (car rule) '&&))))) rules) r) '(old state)))))) '(void)))) (top input))) (define (translate name vars rule rules trace defaction) (let ((%input (gensym 'input)) (%abort (gensym 'abort)) (name& (symbol-append name '&)) (name&& (symbol-append name '&&))) `(begin (: ,name&& (vector-of (or boolean (pair fixnum (or boolean (struct result)))))) (define ,name&&) (: ,name& ((struct state) -> (or boolean (struct result)))) (define (,name& ,%input) (call/cc (lambda (,%abort) (,(if trace 'with-cached-result/trace 'with-cached-result) ,name&& ',name ,%input ',trace (lambda () (assume ((,%input (struct state))) (let ,(map (cut list <> ''()) vars) ,(let walk ((rule rule) (in %input)) (match rule ((' msg) `(fail ,in ,(or msg (sprintf "syntax error (~a)" ',name)))) ((or ' '()) `(success ,in)) (' `(make-result ,in ,in)) (' `(,%abort #f)) ((' alts ...) `(or ,@(map (cut walk <> in) alts))) ((' exp x) (let ((%p1 (gensym 'p))) `(and-let* ((,%p1 ,(walk x in))) (assume ((,%p1 (struct result))) ,(cond ((string? exp) `(constant-result-action ,exp ,%p1)) ((equal? exp "*") '(default-action)) ((integer? exp) `(result-value-ref-action ,(sub1 exp) ,%p1)) (else `(apply-action ',name (lambda () ,exp) ,%p1))))))) ((' exp) `(apply-test ,in (lambda () ,exp))) ((' var x) (let ((%p1 (gensym 'p))) `(and-let* ((,%p1 ,(walk x in))) (assume ((,%p1 (struct result))) (set! ,var (result-value ,%p1)) ,%p1)))) ((' x) (walk x in)) ((' x1 xs ...) (let ((%p1 (gensym 'p)) (%in1 (gensym 'in1))) `(and-let* ((,%p1 ,(walk x1 in))) (assume ((,%p1 (struct result))) (let ((,%in1 (result-state ,%p1))) (combine ,%p1 ,(walk `( ,@xs) %in1))))))) ((' str notlst) `(ltoken ,str ',notlst ,in)) ((' str) `(ltoken ,str '() ,in)) ((' x) (let ((%p1 (gensym 'p)) (%in1 (gensym 'in))) `(let* ((,%in1 ,in) (,%p1 ,(walk x %in1))) (if ,%p1 ,(if defaction `(make-result (list (result-value ,%p1)) (result-state ,%p1)) %p1) (make-result ',(if defaction '(#f) '()) ,%in1))))) ((' x) (let ((%loop (gensym 'loop)) (%i1 (gensym 'in)) (%p1 (gensym 'p)) (%rs (gensym 'rs))) `(let ,%loop ((,%i1 ,in) (,%rs '())) (let ((,%p1 ,(walk x %i1))) (if ,%p1 (,%loop (result-state ,%p1) (cons (result-value ,%p1) ,%rs)) (make-result ,(if defaction `(list (concatenate (reverse ,%rs))) `(concatenate (reverse ,%rs))) ,%i1)))))) ((' x) (let ((%loop (gensym 'loop)) (%i1 (gensym 'in)) (%p1 (gensym 'p)) (%rs (gensym 'rs)) (%p0 (gensym 'p))) `(and-let* ((,%p0 ,(walk x in))) (let ,%loop ((,%i1 (result-state ,%p0)) (,%rs (list (result-value ,%p0)))) (let ((,%p1 ,(walk x %i1))) (if ,%p1 (,%loop (result-state ,%p1) (cons (result-value ,%p1) ,%rs)) (make-result ,(if defaction `(list (concatenate (reverse ,%rs))) `(concatenate (reverse ,%rs))) ,%i1))))))) (t (if (assq t rules) `(,(symbol-append t '&) ,in) `(mtoken ,(symbol-append t '$) ,in))))))))))))))) (define (read-grammar fn) (read-file fn read-honu)) (define +runtime-initialization+ '(begin (require-extension irregex srfi-1 srfi-13 extras utils) (import sp-runtime))) (define (evaluate-runtime-initialization) (eval '(require 'chicken-syntax)) ; for "and-let*" (eval +runtime-initialization+)) (define (process-files grs getsrc #!key compile (processor void) (output #f) repeat trace (counters 'both) (skip-whitespace #t) line-input default-action debug cache-size) (let ((outp (if output (open-output-file output) (current-output-port)))) (when compile (fprintf outp ";;; GENERATED BY \"sp\"~%~%") (pp +runtime-initialization+ outp)) (for-each (lambda (gr) (let ((prg (parse-syntax (read-grammar gr) source: gr trace: trace debug: debug cache-size: cache-size default-action: default-action))) (cond (compile (fprintf outp "~%;; ~a~%" gr) (pp prg outp) (newline outp)) (else (eval prg))))) grs) (cond (compile (fprintf outp "~%;;; END OF FILE~%")) (else (process-input (eval 'start&) getsrc counters: counters processor: processor repeat: repeat trace: trace skipws: skip-whitespace line-input: line-input))))) )