;; Based on http://common-lisp.net/~dcrampsie/smug.html ;; Inspired by https://github.com/joshua-choi/fnparse/ (module comparse (parse fail result item bind satisfies in is char-seq maybe sequence sequence* repeated zero-or-more one-or-more any-of all-of none-of none-of* preceded-by followed-by enclosed-by as-string recursive-parser memoize memo-table) (import chicken scheme) (use data-structures lazy-seq srfi-1 srfi-14 srfi-69 extras latch trie) (define ((result value) input) (cons value input)) (define fail (constantly #f)) (define (item input) (and (not (lazy-null? input)) (cons (lazy-head input) (lazy-tail input)))) (define ((bind parser proc) input) (and-let* ((value (parser input))) ((proc (car value)) (cdr value)))) (define (satisfies condition . args) (bind item (lambda (x) (if (apply condition x args) (result x) fail)))) (define (args-list parser more-parsers) (if (and (list? parser) (null? more-parsers)) parser (cons parser more-parsers))) (define (in collection . items) (if (and (null? items) (char-set? collection)) (satisfies (lambda (c) (and (char? c) (char-set-contains? collection c)))) (satisfies memq (args-list collection items)))) (define (is x) (satisfies eq? x)) (define-syntax sequence* (syntax-rules () ((_ () body ...) (begin body ...)) ((_ ((binding parser) more-bindings ...) body ...) (bind parser (lambda (binding) (sequence* (more-bindings ...) body ...)))))) (define (sequence parser . parsers) (let ((parsers (args-list parser parsers))) (lambda (input) (let loop ((parsers parsers) (parts '()) (input input)) (if (null? parsers) (cons (reverse parts) input) (and-let* ((value ((car parsers) input))) (loop (cdr parsers) (cons (car value) parts) (cdr value)))))))) (define ((char-seq str) input) (let ((len (string-length str))) (let loop ((pos 0) (input input)) (if (= len pos) (cons str input) (and (< pos len) (not (lazy-null? input)) (eq? (lazy-head input) (string-ref str pos)) (loop (+ pos 1) (lazy-tail input))))))) ;; Allow (any-of (list parser ...)) alternatively? (define ((any-of parser . parsers) input) (let loop ((parsers (args-list parser parsers))) (and (not (null? parsers)) (or ((car parsers) input) (loop (cdr parsers)))))) (define ((all-of parser . parsers) input) (let loop ((parsers (args-list parser parsers))) (and-let* ((value ((car parsers) input))) (if (null? (cdr parsers)) value (and value (loop (cdr parsers))))))) (define ((none-of parser . parsers) input) (let loop ((parsers (args-list parser parsers))) (if (null? parsers) (cons #t input) (and (not ((car parsers) input)) (loop (cdr parsers)))))) (define (preceded-by parser . parsers) (let loop ((parsers (args-list parser parsers))) (bind (car parsers) (lambda (value) (if (null? (cdr parsers)) (result value) (loop (cdr parsers))))))) (define (none-of* parser but . parsers) (receive (but parsers) (car+cdr (reverse (cons* parser but parsers))) (preceded-by (none-of parsers) but))) (define ((followed-by parser following . more-following) input) (and-let* ((value (parser input))) (let loop ((following (args-list following more-following)) (input (cdr value))) (if (null? following) value (and-let* ((value ((car following) input))) (loop (cdr following) (cdr value))))))) (define (enclosed-by open content close) (sequence* ((_ open) (value content) (_ close)) (result value))) (define (->parser object) (cond ((procedure? object) object) ((char-set? object) (satisfies object)) ((char? object) (is object)) ((string? object) (char-seq object)) (else (error "Don't know how to turn object into parser" object)))) (define (zero-or-more parser) (any-of (sequence* ((x parser) (xs (zero-or-more parser))) (result (cons x xs))) (result '()))) (define (one-or-more parser) (sequence* ((x parser) (y (zero-or-more parser))) (result (cons x y)))) (define (repeated-until parser end) (any-of (all-of end (result '())) (sequence* ((x parser) (y (repeated-until parser end))) (result (cons x y))))) (define (repeated* parser min max) (let ((min (or min 0))) (any-of (sequence* ((x parser) (y (repeated* parser (- min 1) (and max (- max 1))))) (result (cons x y))) (if (and (<= min 0) (or (not max) (>= max 0))) (result '()) fail)))) (define (repeated parser #!rest args #!key min max until) (cond (until (cond (max (followed-by (repeated* parser min max) until)) ((or (not min) (zero? min)) (repeated-until parser until)) (else (sequence* ((x (repeated* parser min min)) (y (repeated-until parser until))) (result (append x y)))))) ((or min max (null? args)) (repeated* parser min max)) (else (repeated* parser (car args) (car args))))) (define (maybe parser) (any-of parser (result #f))) (define (xconc x y) (conc y x)) (define (as-string parser) (sequence* ((parts parser)) (result (fold xconc "" (remove boolean? (flatten parts)))))) (define memo-table (make-parameter #f)) (define (lazy-seq-prefix from to) (let loop ((from from)) (if (or (eq? from to) (lazy-null? from)) '() (cons (lazy-head from) (loop (lazy-tail from)))))) (define (parser-memo-ref memo input) (let loop ((memo memo) (input input) (length 1)) (and (not (lazy-null? input)) (and-let* ((memo (trie-ref* memo (lazy-head input))) (value (trie-value memo))) (if (null? value) (loop memo (lazy-tail input) (+ 1 length)) (cons (car value) length)))))) (define (memo-ref parser input) (and-let* ((parser-memo (hash-table-ref/default (memo-table) parser #f)) (result (parser-memo-ref parser-memo input))) (cons (car result) (let loop ((n (cdr result)) (input input)) (if (zero? n) input (loop (- n 1) (lazy-tail input))))))) (define (memo-set! parser input) (and-let* ((result (parser input))) (hash-table-update! (memo-table) parser (lambda (memo) (trie-insert! memo (lazy-seq-prefix input (cdr result)) (car result)) memo) make-trie) result)) (define ((memoize parser) input) (if (memo-table) (or (memo-ref parser input) (memo-set! parser input)) (parser input))) (define-syntax recursive-parser (syntax-rules () ((_ body ...) (lambda () (lambda (input) (let-once ((parser (begin body ...))) (parser input))))))) (define (->lazy-seq x) (cond ((lazy-seq? x) x) ((string? x) (list->lazy-seq (string->list x))) ((list? x) (list->lazy-seq x)) ((input-port? x) (input-port->lazy-seq x read-char)) (else (error "Don't know how to turn object into lazy-seq" x)))) (define (parse parser input #!key memoize) (parameterize ((memo-table (if memoize (make-hash-table) (memo-table)))) (let* ((input (->lazy-seq input)) (result (parser (->lazy-seq input)))) (if result (values (car result) (cdr result)) (values result input))))) )