;;;; sp.scm
(module sp
(parse-syntax
evaluate-runtime-initialization
process-files
+default-rule-cache-size+)
(import scheme chicken)
(use big-chicken honu matchable miscmacros sp-runtime)
(define +default-rule-cache-size+ 10)
(define (parse-syntax input #!key (source "") trace default-action debug cache-size)
(let ((tokens '())
(rules '())
(*vars* '())
(*here* #f)
(code '())
(refd '()))
(define (top in)
(match in
(() (done))
(((? symbol? id) ':= (? string? rx) . more)
(when (assq id tokens)
(warning (conc "redefinition of token `" id "'")))
(push! (cons id (string-append "^" rx)) tokens)
(top more))
(((? symbol? id) '-> . more)
(top (def id more)))
(((? string? fn) . more)
(push! `(include ,fn) code)
(top more))
(((? pair? exp) . more)
(push! exp code)
(top more))
(_ (error "unexpected token" source in))))
(define (sequence in)
(let loop ((in in) (seq '()))
(match in
(() (values (reverse seq) in))
(('|:| exp . more)
(loop more (cons `( ,exp) seq)))
(('|\|| . more)
(loop more (cons ' seq)))
((('#%parens stuff ...) . more)
(loop more (cons (alternatives (sequence stuff)) seq)))
((('#%brackets stuff ...) . more)
(loop more (cons `( ,(alternatives (sequence stuff))) seq)))
((('#%braces stuff ...) '+ . more)
(let ((s (alternatives (sequence stuff))))
(loop more (cons `( ,s) seq))))
((('#%braces stuff ...) . more)
(let ((s (alternatives (sequence stuff))))
(loop more (cons `( ,s) seq))))
(((? (cut eq? '... <>)) . more)
(loop more (cons `( ,(car seq)) (cdr seq))))
(('_ (? string? msg) . more)
(loop more (cons `( ,msg) seq)))
(('_ . more)
(loop more (cons '( #f) seq)))
(('! . more)
(loop more (cons ' seq)))
(('~ . more)
(loop more (cons ' seq)))
(((? string? s) ('#%parens '~ strs ...) . more)
(loop more (cons `( ,s ,strs) seq)))
(((? string? s) . more)
(loop more (cons `( ,s) seq)))
(((? char? c) . more)
(loop more (cons `( ,(string c)) seq)))
(((? symbol? var) '= . more)
(loop more (cons `( ,var) seq)))
(((? symbol?) (or ':= '->) . more)
(values (reverse seq) in))
(((? symbol? tid) . more)
(push! (cons tid *here*) refd)
(loop more (cons tid seq)))
(((? pair? t) . more)
(loop more (cons `( ,t) seq)))
(_ (error "unexpected token" source (fragment in))))))
(define (fragment in)
(if (> (length in) 10)
(append (take in 10) '(...))
in))
(define (def id in)
(fluid-let ((*vars* '())
(*here* id))
(let-values (((seq in) (sequence in)))
(let ((seq (alternatives seq #t))
(vars (delete-duplicates *vars* eq?)))
(when (lr? seq id)
(error (conc "left-recursion detected in rule `" id "'")))
(defrule id vars seq)
in))))
(define (defrule id vars rule)
(when debug (pp (list id vars rule) (current-error-port)))
(when (assq id rules)
(warning (conc "redefinition of rule `" id "'")))
(push! (list id vars rule) rules))
(define (lr? rule id)
(let rec ((rule rule))
(match rule
((' x1 . _) (rec x1))
((' _ x) (rec x))
((' x) (rec x))
((' x) (rec x))
((' _ x) (rec x))
((' xs ...) (any rec xs))
((' x) (rec x))
((? symbol? x1) (eq? x1 id))
(_ #f))))
(define (bind lst)
(let loop ((lst lst))
(match lst
(() '())
(((' var))
(push! var *vars*)
`(( ,var )))
(((' var) exp . more)
(push! var *vars*)
(cons `( ,var ,exp) (loop more)))
((x . more) (cons x (loop more))))))
(define (arm lst act) ; assumes lst is reversed
(match lst
(((' exp) . lst)
`(( ,exp ( ,@(bind (reverse lst))))))
(_
;; if arm has only a single match that is not a token, we don't generate def. action
(cond ((and (= 1 (length lst))
(or (not (symbol? (car lst)))
(not (assq (car lst) tokens))))
(bind lst))
((and act default-action)
`(( (default-action) ( ,@(bind (reverse lst))))))
(else (bind (reverse lst)))))))
(define (alternatives rule #!optional def)
(let loop ((rule rule) (as '()) (part '()))
(match rule
(()
(let ((p (if (null? part) ' `( ,@(arm part def)))))
(if (null? as)
p
`( ,@(reverse (cons p as))))))
((' . more)
(loop more (cons `( ,@(arm part def)) as) '()))
((x . more)
(loop more as (cons x part))))))
(define (done)
(for-each
(match-lambda
((id . loc)
(unless (or (assq id rules) (assq id tokens))
(warning (conc loc ": reference to undefined identifier `" id "'")))))
refd)
(unless (assq 'start rules)
(warning "missing `start' rule"))
`(begin
,@(reverse code)
,@(map (match-lambda
(('whitespace . rx)
`(whitespace (irregex ,rx 's 'fast)))
((id . rx)
`(define ,(symbol-append id '$) (irregex ,rx 's 'fast))))
(reverse tokens))
,@(map (match-lambda
((id vars rule)
(translate id vars rule rules trace default-action)))
(reverse rules))
,(if (or (not cache-size) (positive? cache-size))
`(set! start&
(let ((old start&))
(lambda (state)
(fluid-let ,(map (lambda (rule)
(list (symbol-append (car rule) '&&)
`(make-vector
,(or cache-size +default-rule-cache-size+)
#f)))
rules)
,(if debug
`(let ((r (old state)))
,@(map (lambda (rule)
`(pp (cons ',(car rule)
(vector->list ,(symbol-append (car rule) '&&)))))
rules)
r)
'(old state))))))
'(void))))
(top input)))
(define (translate name vars rule rules trace defaction)
(let ((%input (gensym 'input))
(%abort (gensym 'abort))
(name& (symbol-append name '&))
(name&& (symbol-append name '&&)))
`(begin
(: ,name&& (vector-of (or boolean (pair fixnum (or boolean (struct result))))))
(define ,name&&)
(: ,name& ((struct state) -> (or boolean (struct result))))
(define (,name& ,%input)
(call/cc
(lambda (,%abort)
(,(if trace 'with-cached-result/trace 'with-cached-result)
,name&& ',name ,%input ',trace
(lambda ()
(assume
((,%input (struct state)))
(let ,(map (cut list <> ''()) vars)
,(let walk ((rule rule) (in %input))
(match rule
((' msg)
`(fail ,in ,(or msg (sprintf "syntax error (~a)" ',name))))
((or ' '())
`(success ,in))
('
`(make-result ,in ,in))
(' `(,%abort #f))
((' alts ...)
`(or ,@(map (cut walk <> in) alts)))
((' exp x)
(let ((%p1 (gensym 'p)))
`(and-let* ((,%p1 ,(walk x in)))
(assume ((,%p1 (struct result)))
,(cond ((string? exp)
`(constant-result-action ,exp ,%p1))
((equal? exp "*")
'(default-action))
((integer? exp)
`(result-value-ref-action ,(sub1 exp) ,%p1))
(else `(apply-action ',name (lambda () ,exp) ,%p1)))))))
((' exp)
`(apply-test ,in (lambda () ,exp)))
((' var x)
(let ((%p1 (gensym 'p)))
`(and-let* ((,%p1 ,(walk x in)))
(assume ((,%p1 (struct result)))
(set! ,var (result-value ,%p1))
,%p1))))
((' x)
(walk x in))
((' x1 xs ...)
(let ((%p1 (gensym 'p))
(%in1 (gensym 'in1)))
`(and-let* ((,%p1 ,(walk x1 in)))
(assume ((,%p1 (struct result)))
(let ((,%in1 (result-state ,%p1)))
(combine ,%p1 ,(walk `( ,@xs) %in1)))))))
((' str notlst)
`(ltoken ,str ',notlst ,in))
((' str)
`(ltoken ,str '() ,in))
((' x)
(let ((%p1 (gensym 'p))
(%in1 (gensym 'in)))
`(let* ((,%in1 ,in)
(,%p1 ,(walk x %in1)))
(if ,%p1
,(if defaction
`(make-result
(list (result-value ,%p1))
(result-state ,%p1))
%p1)
(make-result
',(if defaction '(#f) '())
,%in1)))))
((' x)
(let ((%loop (gensym 'loop))
(%i1 (gensym 'in))
(%p1 (gensym 'p))
(%rs (gensym 'rs)))
`(let ,%loop ((,%i1 ,in) (,%rs '()))
(let ((,%p1 ,(walk x %i1)))
(if ,%p1
(,%loop (result-state ,%p1) (cons (result-value ,%p1) ,%rs))
(make-result
,(if defaction
`(list (concatenate (reverse ,%rs)))
`(concatenate (reverse ,%rs)))
,%i1))))))
((' x)
(let ((%loop (gensym 'loop))
(%i1 (gensym 'in))
(%p1 (gensym 'p))
(%rs (gensym 'rs))
(%p0 (gensym 'p)))
`(and-let* ((,%p0 ,(walk x in)))
(let ,%loop ((,%i1 (result-state ,%p0))
(,%rs (list (result-value ,%p0))))
(let ((,%p1 ,(walk x %i1)))
(if ,%p1
(,%loop (result-state ,%p1) (cons (result-value ,%p1) ,%rs))
(make-result
,(if defaction
`(list (concatenate (reverse ,%rs)))
`(concatenate (reverse ,%rs)))
,%i1)))))))
(t (if (assq t rules)
`(,(symbol-append t '&) ,in)
`(mtoken ,(symbol-append t '$) ,in)))))))))))))))
(define (read-grammar fn)
(read-file fn read-honu))
(define +runtime-initialization+
'(begin
(require-extension irregex srfi-1 srfi-13 extras utils)
(import sp-runtime)))
(define (evaluate-runtime-initialization)
(eval '(require 'chicken-syntax)) ; for "and-let*"
(eval +runtime-initialization+))
(define (process-files grs getsrc
#!key compile (processor void) (output #f) repeat trace (counters 'both)
(skip-whitespace #t) line-input default-action debug cache-size)
(let ((outp (if output
(open-output-file output)
(current-output-port))))
(when compile
(fprintf outp ";;; GENERATED BY \"sp\"~%~%")
(pp +runtime-initialization+ outp))
(for-each
(lambda (gr)
(let ((prg (parse-syntax
(read-grammar gr) source: gr trace: trace debug: debug
cache-size: cache-size
default-action: default-action)))
(cond (compile
(fprintf outp "~%;; ~a~%" gr)
(pp prg outp)
(newline outp))
(else
(eval prg)))))
grs)
(cond (compile (fprintf outp "~%;;; END OF FILE~%"))
(else (process-input
(eval 'start&)
getsrc
counters: counters
processor: processor
repeat: repeat trace: trace
skipws: skip-whitespace line-input: line-input)))))
)