;; ;; Parser for the grammar defined in RFC4234, "Augmented BNF for ;; Syntax Specifications: ABNF". ;; ;; ;; Copyright 2009-2010 Ivan Raikov and the Okinawa Institute of Science ;; and Technology. ;; ;; ;; This program is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of ;; the License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; . (module abnf ( (concatenation lex:seq) (alternatives lex:bar) variable-repetition repetition repetition1 repetition-n optional-sequence range set set-from-string char lit alpha binary decimal hexadecimal ascii-char cr lf crlf ctl dquote htab lwsp octet sp vchar wsp quoted-pair quoted-string pass bind drop-consumed longest (:: lex:seq) ( :| lex:bar) :? :! :s :c :* :+ ) (import scheme chicken data-structures extras ) (require-extension srfi-1 srfi-14 srfi-69) (require-library lexgen) (import (prefix lexgen lex:)) (define pass lex:pass) (define bind lex:bind) (define drop-consumed lex:drop) (define longest lex:longest) ;;;; Terminal values (RFC 4234, Section 2.3) ;; Matches a single character (define char lex:char) ;; Concatenation (RFC 4234, Section 3.1) (define-syntax concatenation (syntax-rules () ((_) lex:pass) ((_ a) a) ((_ a b) (lex:seq a b)) ((concatenation a b ...) (lex:seq a (concatenation b ...))) )) ;; Alternatives (RFC 4234, Section 3.2) (define-syntax alternatives (syntax-rules () ((_) lex:pass) ((_ a) a) ((_ a b) (lex:bar a b)) ((alternatives a b ...) (lex:bar a (alternatives b ...))) )) ;; Value range alternatives (RFC 4234, Section 3.4) (define range lex:range) ;; Specific repetition (RFC 4234, Section 3.7) (define (repetition-n n p) (let ((ps (list-tabulate n (lambda (i) p)))) (lex:lst ps))) ;; Variable repetition (RFC 4234, Section 3.6) ;; * repetition (define (repetition p) (lex:star* p)) ;; 1* repetition (define repetition1 lex:pos) (define (variable-repetition min max p) (if (< max min) (variable-repetition max min p) (let loop ((i (- max 1)) (k (+ min 1)) (r (if (positive? min) (repetition-n min p) lex:pass))) (cond ((>= i min) (loop (- i 1) (+ k 1) (lex:bar (repetition-n k p) r))) (else r))))) (define optional-sequence lex:opt) ;; Matches a literal string (case-insensitive) (define (lit s) (let* ((f (lambda (t) (lex:tok t (lex:try char-ci=?)))) (ps (map f (if (string? s) (string->list s) s)))) (lex:lst ps))) ;;;; Core Rules (RFC 4234, Section 6.1) ;; Match any character of the alphabet. (define alpha (lex:set char-set:letter)) ;; Match [0..1] (define binary (lex:range #\0 #\1)) ;; Match [0..9] (define decimal (lex:range #\0 #\9)) ;; Match [0..9] and [A..F,a..f] (define hexadecimal (lex:set char-set:hex-digit)) ;; Match any 7-bit US-ASCII character except for NUL (ASCII value 0, that is). (define ascii-char (lex:set (ucs-range->char-set 1 127))) ;; Match the carriage return character \r. (define cr (char (integer->char 13))) ;; Match the linefeed character \n. (define lf (char (integer->char 10))) ;; Match the Internet newline \r\n. (define crlf (lex:seq cr lf)) ;; Match any US-ASCII control character. That is any character with a ;; decimal value in the range of [0..31,127]. (define ctl (lex:set char-set:iso-control)) ;; Match the double quote character " (define dquote (char #\")) ;; Match the tab \t character (define htab (char (integer->char 9))) ;; Match either 'sp' or 'htab'. (define wsp (lex:set (char-set #\space #\tab))) ;; Match linear white space: *(WSP / CRLF WSP) (define lwsp (lex:star* (lex:bar wsp (lex:seq (lex:drop crlf) wsp)))) ;; Match /any/ character. (define octet (lex:set char-set:full)) ;; Match the space character (define sp (char #\space)) ;; Match any printable ASCII character. (The "v" stands for ;; "visible".) That is any character in the decimal range of ;; [33..126]. (define vchar (lex:set char-set:graphic)) ;; Match a "quoted pair". Any characters (excluding CR and LF) may be ;; quoted. (define quoted-pair (lex:seq (char #\\) (lex:bar vchar wsp))) ;; Match a quoted string. The specials \ and " must be escaped inside ;; a quoted string; CR and LF are not allowed at all. (define char-set:quoted (char-set-complement (string->char-set "\\\"\r\n"))) (define qtext (lex:set char-set:quoted)) (define qcont (lex:bar (lex:pos qtext) quoted-pair)) (define quoted-string (lex:seq dquote (lex:seq (lex:star* qcont) dquote))) ;;;; Additional convenience procedures and parser combinators ;; match any character from an SRFI-14 character set (define set lex:set) ;; match any character from a set defined as a string (define (set-from-string s) (lex:set (string->char-set s))) ;;;; Syntactic abbreviations ;;;; Based on a proposal by Moritz Heidkamp (define :? optional-sequence) (define :! drop-consumed) (define :s lit) (define :c char) (define :* repetition) (define :+ repetition1) (define-syntax :: (syntax-rules () ((_ e1 e2 ...) (concatenation e1 e2 ...)))) (define-syntax :| (syntax-rules () ((_ e1 e2 ...) (alternatives e1 e2 ...)))) )