#|-------------------- 3.3 |# "./abnf-charlist.scm" 1791 ;; ;; ABNF parser combinators specialized for character lists. ;; ;; Copyright 2009-2010 Ivan Raikov and the Okinawa Institute of Science ;; and Technology. ;; ;; ;; This program is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of ;; the License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; . (module abnf-charlist ( (concatenation lex:seq) (alternatives lex:bar) variable-repetition repetition repetition1 repetition-n optional-sequence range set set-from-string char lit alpha binary decimal hexadecimal ascii-char cr lf crlf ctl dquote htab lwsp octet sp vchar wsp quoted-pair quoted-string pass bind drop-consumed longest (:: lex:seq) :? :! :s :c :* :+ ) (import scheme chicken ) (require-extension typeclass input-classes abnf) (require-library lexgen) (import (prefix lexgen lex:)) (import (only lexgen Input->Token Token->CharLex )) (define char-list- (make- null? car cdr)) (define char-list- (Input->Token char-list-)) (define char-list- (Token->CharLex char-list-)) (define char-list- (Token.CharLex->CoreABNF char-list- char-list-)) (import-instance ( char-list-) ) ) #|-------------------- 3.3 |# "./abnf-consumers.scm" 3943 ;; ;; Convenience procedures and macros for manipulating items in the ;; stream of consumed tokens returned by an abnf-based parser. ;; ;; Copyright 2009 Ivan Raikov and the Okinawa Institute of Science ;; and Technology. ;; ;; ;; This program is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of ;; the License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; . (module abnf-consumers * (import scheme chicken data-structures extras ) (require-extension abnf srfi-1 srfi-14) ;; collects all consumed objects of type obj? (define (consumed-objects obj?) (lambda (cs) (and (or (pair? cs) (null? cs)) (let loop ((cs cs) (ax (list))) (cond ((null? cs) (list ax )) ((obj? (car cs)) (loop (cdr cs) (cons (car cs) ax))) (else (cons ax cs))))))) ;; construct composite objects from consumed objects (define (consumed-objects-lift get-consumed) (lambda rest (let-optionals rest ((kons identity)) (let ((make (if (procedure? kons) kons (lambda (x) (and (pair? x) `(,kons . ,x)))))) (lambda (x) (let ((x1 (get-consumed x))) (and x1 (pair? x1) (list? (car x1)) (let ((item (make (car x1)))) (if item (cons item (cdr x1)) (cdr x1)))))))))) (define consumed-chars (consumed-objects char?)) (define consumed-chars->list (consumed-objects-lift consumed-chars)) (define (trim-ws-char-list cs) (let* ((cs1 (let loop ((cs cs)) (cond ((null? cs) (reverse cs)) ((char-set-contains? char-set:whitespace (car cs)) (loop (cdr cs))) (else (reverse cs))))) (cs2 (let loop ((cs cs1)) (cond ((null? cs) (reverse cs)) ((char-set-contains? char-set:whitespace (car cs)) (loop (cdr cs))) (else (reverse cs)))))) cs2)) ;; construct strings from consumed chars (define consumed-chars->string (consumed-chars->list list->string)) ;; construct symbols from consumed chars; trailing and preceding white ;; space is stripped (define consumed-chars->symbol (consumed-chars->list (compose string->symbol list->string trim-ws-char-list))) (define consumed-strings (consumed-objects string?)) (define consumed-strings->list (consumed-objects-lift consumed-strings)) (define consumed-pairs (consumed-objects pair?)) (define consumed-pairs->list (consumed-objects-lift consumed-pairs)) ;; shortcut for (bind (consumed-chars->list) (longest ... )) (define-syntax bind-consumed->list (syntax-rules () ((_ p) (bind (consumed-chars->list) (longest p))) ((_ l p) (bind (consumed-chars->list l) (longest p))) )) ;; shortcut for (bind consumed-chars->string (longest ... )) (define-syntax bind-consumed->string (syntax-rules () ((_ p) (bind consumed-chars->string (longest p))) )) ;; shortcut for (bind consumed-chars->symbol (longest ... )) (define-syntax bind-consumed->symbol (syntax-rules () ((_ p) (bind consumed-chars->symbol (longest p))) )) ;; shortcut for (bind (consumed-strings->list ...) (longest ... )) (define-syntax bind-consumed-strings->list (syntax-rules () ((_ l p) (bind (consumed-strings->list l) (longest p))) ((_ p) (bind (consumed-strings->list) (longest p))) )) ;; shortcut for (bind (consumed-pairs->list ...) (longest ... )) (define-syntax bind-consumed-pairs->list (syntax-rules () ((_ l p) (bind (consumed-pairs->list l) (longest p))) ((_ p) (bind (consumed-pairs->list) (longest p))) )) ) #|-------------------- 3.3 |# "./abnf.meta" 594 ;; -*- Hen -*- ((egg "abnf.egg") ; This should never change ; List here all the files that should be bundled as part of your egg. (files "abnf.setup" "abnf-consumers.scm" "abnf.meta" "abnf-charlist.scm" "abnf.scm") ; Your egg's license: (license "GPL-3") ; Pick one from the list of categories (see below) for your egg and ; enter it here. (category parsing) ; A list of eggs abnf depends on. (needs typeclass input-classes lexgen ) ;; (test-depends test) (doc-from-wiki) (author "Ivan Raikov") (synopsis "Parser combinators for Augmented BNF grammars (RFC 4234).")) #|-------------------- 3.3 |# "./abnf.scm" 7529 ;; ;; Parser for the grammar defined in RFC4234, "Augmented BNF for ;; Syntax Specifications: ABNF". ;; ;; ;; Copyright 2009-2010 Ivan Raikov and the Okinawa Institute of Science ;; and Technology. ;; ;; ;; This program is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of ;; the License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; . (module abnf ( (concatenation lex:seq) (alternatives lex:bar) variable-repetition repetition repetition1 repetition-n optional-sequence pass bind drop-consumed longest ( :: lex:seq) :? :! :* :+ make- Input->Token Token->CharLex Token.CharLex->CoreABNF ) (import scheme chicken data-structures extras ) (require-extension srfi-1 srfi-14 srfi-69 typeclass input-classes) (require-library lexgen) (import (prefix lexgen lex:)) ;; the following is necessary because type classes are currently not ;; aware of module system prefixes (import (only lexgen Input->Token Token->CharLex )) (define-class ( T) ( L) range set set-from-string char lit alpha binary decimal hexadecimal ascii-char cr lf crlf ctl dquote htab lwsp octet sp vchar wsp quoted-pair quoted-string :s :c ) (define pass lex:pass) (define bind lex:bind) (define drop-consumed lex:drop) (define longest lex:longest) ;;;; ABNF operators ;; Concatenation (RFC 4234, Section 3.1) (define-syntax concatenation (syntax-rules () ((_) lex:pass) ((_ a) a) ((_ a b) (lex:seq a b)) ((concatenation a b ...) (lex:seq a (concatenation b ...))) )) ;; Alternatives (RFC 4234, Section 3.2) (define-syntax alternatives (syntax-rules () ((_) lex:pass) ((_ a) a) ((_ a b) (lex:bar a b)) ((alternatives a b ...) (lex:bar a (alternatives b ...))) )) ;; Value range alternatives (RFC 4234, Section 3.4) ;; (part of the CoreABNF typeclass) ;;(define range lex:range) ;; Specific repetition (RFC 4234, Section 3.7) (define (repetition-n n p) (let ((ps (list-tabulate n (lambda (i) p)))) (lex:lst ps))) ;; Variable repetition (RFC 4234, Section 3.6) ;; * repetition (define (repetition p) (lex:star* p)) ;; 1* repetition (define repetition1 lex:pos) (define (variable-repetition min max p) (if (< max min) (variable-repetition max min p) (let loop ((i (- max 1)) (k (+ min 1)) (r (if (positive? min) (repetition-n min p) lex:pass))) (cond ((>= i min) (loop (- i 1) (+ k 1) (lex:bar (repetition-n k p) r))) (else r))))) (define optional-sequence lex:opt) ;;;; Terminal values (RFC 4234, Section 2.3) ;; Matches a literal string (case-insensitive) (define=> (lit lex:) (lambda (s) (let* ((f (lambda (t) (tok t (lex:try char-ci=?)))) (ps (map f (if (string? s) (string->list s) s)))) (lex:lst ps)))) ;;;; Core Rules (RFC 4234, Appendix B) ;; Match any character of the alphabet. (define=> (alpha ) (set char-set:letter)) ;; Match [0..1] (define=> (binary ) (range #\0 #\1)) ;; Match [0..9] (define=> (decimal ) (range #\0 #\9)) ;; Match [0..9] and [A..F,a..f] (define=> (hexadecimal ) (set char-set:hex-digit)) ;; Match any 7-bit US-ASCII character except for NUL (ASCII value 0, that is). (define=> (ascii-char ) (set (ucs-range->char-set 1 127))) ;; Match the carriage return character \r. (define=> (cr ) (char (integer->char 13))) ;; Match the linefeed character \n. (define=> (lf ) (char (integer->char 10))) ;; Match the Internet newline \r\n. ;; cr lf ;; Match any US-ASCII control character. That is any character with a ;; decimal value in the range of [0..31,127]. (define=> (ctl ) (set char-set:iso-control)) ;; Match the double quote character " (define=> (dquote ) (char #\")) ;; Match the tab \t character (define=> (htab ) (char (integer->char 9))) ;; Match either 'sp' or 'htab'. (define=> (wsp ) (set (char-set #\space #\tab))) ;; Match linear white space: *(WSP / CRLF WSP) ;; (define=> (lwsp ) (lex:star* (lex:bar wsp (lex:seq (lex:drop crlf) wsp)))) ;; Match /any/ character. (define=> (octet ) (set char-set:full)) ;; Match the space character (define=> (sp ) (char #\space)) ;; Match any printable ASCII character. (The "v" stands for ;; "visible".) That is any character in the decimal range of ;; [33..126]. (define=> (vchar ) (set char-set:graphic)) ;; Match a "quoted pair". Any characters (excluding CR and LF) may be ;; quoted. ;; (define quoted-pair (lex:seq (char #\\) (lex:bar vchar wsp))) ;; Match a quoted string. The specials \ and " must be escaped inside ;; a quoted string; CR and LF are not allowed at all. (define char-set:quoted (char-set-complement (string->char-set "\\\"\r\n"))) (define=> (qtext ) (set char-set:quoted)) ;; (define qcont (lex:bar (lex:pos qtext) quoted-pair)) ;; (define quoted-string (lex:seq dquote (lex:seq (lex:star* qcont) dquote))) ;;;; Additional convenience procedures and parser combinators ;; match any character from a set defined as a string (define=> (set-from-string ) (lambda (s) (set (string->char-set s)))) (define=> (quoted-pair ) (lambda (vchar wsp) (let* ((quoted-pair (lex:seq (char #\\) (lex:bar vchar wsp)))) quoted-pair))) (define=> (quoted-string ) (lambda (dquote qtext quoted-pair) (let* ((qcont (lex:bar (lex:pos qtext) quoted-pair)) (quoted-string (lex:seq dquote (lex:seq (lex:star* qcont) dquote)))) quoted-string))) (define (Token.CharLex->CoreABNF T L) (let* ((lit (lit T)) (char ((lambda=> () char) L)) (range ((lambda=> () range) L)) (set ((lambda=> () set) L)) (alpha (alpha L)) (binary (binary L)) (decimal (decimal L)) (hexadecimal (hexadecimal L)) (ascii-char (ascii-char L)) (cr (cr L)) (lf (lf L)) (crlf (lex:seq cr lf)) (ctl (ctl L)) (dquote (dquote L)) (htab (htab L)) (wsp (wsp L)) (lwsp (lex:star* (lex:bar wsp (lex:seq (lex:drop crlf) wsp)))) (octet (octet L)) (sp (sp L)) (vchar (vchar L)) (qtext (qtext L)) (q-p ((quoted-pair L) vchar wsp)) (q-s ((quoted-string L) dquote qtext q-p)) (set-from-string (set-from-string L)) (:c char) (:s lit) ) (make- T L range set set-from-string char lit alpha binary decimal hexadecimal ascii-char cr lf crlf ctl dquote htab lwsp octet sp vchar wsp q-p q-s :s :c ) )) ;;;; Syntactic abbreviations ;;;; Based on a proposal by Moritz Heidkamp (define :? optional-sequence) (define :! drop-consumed) (define :* repetition) (define :+ repetition1) (define-syntax :: (syntax-rules () ((_ e1 e2 ...) (concatenation e1 e2 ...)))) ) #|-------------------- 3.3 |# "./abnf.setup" 822 ;; -*- Hen -*- (define (dynld-name fn) (make-pathname #f fn ##sys#load-dynamic-extension)) (required-extension-version 'lexgen 4.0) (compile -O -d2 -S -s abnf.scm -j abnf) (compile -s abnf.import.scm) (compile -O -d2 -S -s abnf-charlist.scm -j abnf-charlist) (compile -s abnf-charlist.import.scm) (compile -O -d2 -S -s abnf-consumers.scm -j abnf-consumers) (compile -s abnf-consumers.import.scm) (install-extension ;; Name of your extension: 'abnf ;; Files to install for your extension: `(,(dynld-name "abnf") ,(dynld-name "abnf.import") ,(dynld-name "abnf-charlist") ,(dynld-name "abnf-charlist.import") ,(dynld-name "abnf-consumers") ,(dynld-name "abnf-consumers.import") ) ;; Assoc list with properties for your extension: '((version 3.3) (documentation "abnf.html") ))