;; Based on http://common-lisp.net/~dcrampsie/smug.html ;; Inspired by https://github.com/joshua-choi/fnparse/ (module comparse (parse fail result item bind satisfies in is char-seq maybe sequence sequence* repeated zero-or-more one-or-more any-of all-of none-of none-of* preceded-by followed-by enclosed-by end-of-input as-string recursive-parser memoize memo-table ->parser-input parser-input? parser-input->lazy-seq parser-input->list parser-input->string parser-input-end?) (import chicken scheme latch) (use data-structures lazy-seq srfi-1 srfi-14 srfi-69 extras trie ports) (define ((result value) input) (cons value input)) (define fail (constantly #f)) (define (item input) (and (not (lazy-null? input)) (cons (lazy-head input) (lazy-tail input)))) (define ((bind parser proc) input) (and-let* ((value (parser input))) ((proc (car value)) (cdr value)))) (define (satisfies condition . args) (bind item (lambda (x) (if (apply condition x args) (result x) fail)))) (define (args-list parser more-parsers) (if (and (list? parser) (null? more-parsers)) parser (cons parser more-parsers))) (define (in collection . items) (if (and (null? items) (char-set? collection)) (satisfies (lambda (c) (and (char? c) (char-set-contains? collection c)))) (satisfies memq (args-list collection items)))) (define (is x) (satisfies eq? x)) (define-syntax sequence* (syntax-rules () ((_ () body ...) (begin body ...)) ((_ ((binding parser) more-bindings ...) body ...) (bind parser (lambda (binding) (sequence* (more-bindings ...) body ...)))))) (define (sequence parser . parsers) (let ((parsers (args-list parser parsers))) (lambda (input) (let loop ((parsers parsers) (parts '()) (input input)) (if (null? parsers) (cons (reverse parts) input) (and-let* ((value ((car parsers) input))) (loop (cdr parsers) (cons (car value) parts) (cdr value)))))))) (define ((char-seq str) input) (let ((len (string-length str))) (let loop ((pos 0) (input input)) (if (= len pos) (cons str input) (and (< pos len) (not (lazy-null? input)) (eq? (lazy-head input) (string-ref str pos)) (loop (+ pos 1) (lazy-tail input))))))) (define ((any-of parser . parsers) input) (let loop ((parsers (args-list parser parsers))) (and (not (null? parsers)) (or ((car parsers) input) (loop (cdr parsers)))))) (define ((all-of parser . parsers) input) (let loop ((parsers (args-list parser parsers))) (and-let* ((value ((car parsers) input))) (if (null? (cdr parsers)) value (and value (loop (cdr parsers))))))) (define ((none-of parser . parsers) input) (let loop ((parsers (args-list parser parsers))) (if (null? parsers) (cons #t input) (and (not ((car parsers) input)) (loop (cdr parsers)))))) (define (preceded-by parser . parsers) (let loop ((parsers (args-list parser parsers))) (bind (car parsers) (lambda (value) (if (null? (cdr parsers)) (result value) (loop (cdr parsers))))))) (define (none-of* parser but . parsers) (receive (but parsers) (car+cdr (reverse (cons* parser but parsers))) (preceded-by (none-of parsers) but))) (define ((followed-by parser following . more-following) input) (and-let* ((value (parser input))) (let loop ((following (args-list following more-following)) (input (cdr value))) (if (null? following) value (and-let* ((value ((car following) input))) (loop (cdr following) (cdr value))))))) (define (enclosed-by open content close) (sequence* ((_ open) (value content) (_ close)) (result value))) (define (->parser object) (cond ((procedure? object) object) ((char-set? object) (satisfies object)) ((char? object) (is object)) ((string? object) (char-seq object)) (else (error "Don't know how to turn object into parser" object)))) (define (zero-or-more parser) (any-of (sequence* ((x parser) (xs (zero-or-more parser))) (result (cons x xs))) (result '()))) (define (one-or-more parser) (sequence* ((x parser) (y (zero-or-more parser))) (result (cons x y)))) (define (repeated-until parser end) (any-of (all-of end (result '())) (sequence* ((x parser) (y (repeated-until parser end))) (result (cons x y))))) (define (repeated/end min) (if (<= min 0) (result '()) fail)) (define (repeated/min/max parser min max) (if (zero? max) (repeated/end min) (any-of (sequence* ((x parser) (y (repeated/min/max parser (- min 1) (- max 1)))) (result (cons x y))) (repeated/end min)))) (define (repeated/min parser min) (any-of (sequence* ((x parser) (y (repeated/min parser (- min 1)))) (result (cons x y))) (repeated/end min))) (define (repeated parser #!rest args #!key (min 0) max until) (cond (until (cond (max (followed-by (repeated/min/max parser min max) until)) ((zero? min) (repeated-until parser until)) (else (sequence* ((x (repeated/min/max parser min min)) (y (repeated-until parser until))) (result (append x y)))))) (max (repeated/min/max parser min max)) ((and (pair? args) (null? (cdr args))) (repeated/min/max parser (car args) (car args))) (else (repeated/min parser min)))) (define (maybe parser #!optional default) (any-of parser (result default))) (define end-of-input (none-of item)) (define (as-string parser) (sequence* ((parts parser)) (result (call-with-output-string (lambda (out) (let print-loop ((parts parts)) (cond ((pair? parts) (let pair-loop ((parts parts)) (cond ((pair? parts) (print-loop (car parts)) (pair-loop (cdr parts))) ((not (null? parts)) ; improper list (print-loop parts))))) ((and parts (not (null? parts))) (write-string (->string parts) #f out))))))))) (define memo-table (make-parameter #f)) (define (lazy-seq-prefix from to) (let loop ((from from)) (if (or (eq? from to) (lazy-null? from)) '() (cons (lazy-head from) (loop (lazy-tail from)))))) (define (parser-memo-ref memo input) (let loop ((memo memo) (input input) (length 1)) (and (not (lazy-null? input)) (and-let* ((memo (trie-ref* memo (lazy-head input))) (value (trie-value memo))) (if (null? value) (loop memo (lazy-tail input) (+ 1 length)) (cons (car value) length)))))) (define (memo-ref parser input) (and-let* ((parser-memo (hash-table-ref/default (memo-table) parser #f)) (result (parser-memo-ref parser-memo input))) (cons (car result) (let loop ((n (cdr result)) (input input)) (if (zero? n) input (loop (- n 1) (lazy-tail input))))))) (define (memo-set! parser input) (and-let* ((result (parser input))) (hash-table-update! (memo-table) parser (lambda (memo) (trie-insert! memo (lazy-seq-prefix input (cdr result)) (car result)) memo) make-trie) result)) (define ((memoize parser) input) (if (memo-table) (or (memo-ref parser input) (memo-set! parser input)) (parser input))) (define-syntax recursive-parser (syntax-rules () ((_ body ...) (lambda () (lambda (input) (let-once ((parser (begin body ...))) (parser input))))))) (define-record-type parser-input (make-parser-input lazy-seq) parser-input? (lazy-seq parser-input->lazy-seq)) (define-record-printer (parser-input input out) (display "#lazy-seq input))) (if (lazy-null? seq) (display "-end>" out) (let loop ((n 10) (seq seq)) (if (lazy-seq-realized? seq) (if (or (zero? n) (lazy-null? seq)) (display ">" out) (begin (display " " out) (write (lazy-head seq) out) (loop (- n 1) (lazy-tail seq)))) (display " ...>" out)))))) (define (parser-input-end? input) (lazy-null? (parser-input->lazy-seq input))) (define (parser-input->list input) (lazy-seq->list (parser-input->lazy-seq input))) (define (parser-input->string input) (list->string (parser-input->list input))) (define (->lazy-seq location x) (cond ((lazy-seq? x) x) ((parser-input? x) (parser-input->lazy-seq x)) ((string? x) (list->lazy-seq (string->list x))) ((list? x) (list->lazy-seq x)) ((input-port? x) (input-port->lazy-seq x read-char)) (else (error location "Unable to convert object to parser input" x)))) (define (->parser-input x) (make-parser-input (->lazy-seq '->parser-input x))) (define (parse parser input #!key memoize) (parameterize ((memo-table (if memoize (make-hash-table) (memo-table)))) (let* ((result (parser (->lazy-seq 'parse input)))) (if result (values (car result) (make-parser-input (cdr result))) (values result (make-parser-input (->lazy-seq 'parse input))))))) )