;;
;; Parser for the grammar defined in RFC4234, "Augmented BNF for
;; Syntax Specifications: ABNF".
;;
;;
;; Copyright 2009-2011 Ivan Raikov and the Okinawa Institute of Science
;; and Technology.
;;
;;
;; This program is free software: you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of
;; the License, or (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; A full copy of the GPL license can be found at
;; .
(module abnf
(
(concatenation lex:seq) (alternatives lex:bar)
variable-repetition repetition repetition1 repetition-n
optional-sequence
pass bind drop-consumed longest
( :: lex:seq) :? :! :* :+
make-
Input->Token Token->CharLex
Token.CharLex->CoreABNF
)
(import scheme chicken data-structures extras )
(require-extension srfi-1 srfi-14 srfi-69 typeclass input-classes)
(require-library lexgen)
(import (prefix lexgen lex:))
;; the following is necessary because type classes are currently not
;; aware of module system prefixes
(import (only lexgen Input->Token Token->CharLex ))
(define-class ( T) ( L)
range set set-from-string char lit alpha
binary decimal hexadecimal ascii-char cr lf crlf ctl
dquote htab lwsp octet sp vchar wsp
:s :c )
(define pass lex:pass)
(define bind lex:bind)
(define drop-consumed lex:drop)
(define longest lex:longest)
;;;; ABNF operators
;; Concatenation (RFC 4234, Section 3.1)
(define-syntax concatenation
(syntax-rules ()
((_) lex:pass)
((_ a) a)
((_ a b) (lex:seq a b))
((concatenation a b ...)
(lex:seq a (concatenation b ...)))
))
;; Alternatives (RFC 4234, Section 3.2)
(define-syntax alternatives
(syntax-rules ()
((_) lex:pass)
((_ a) a)
((_ a b) (lex:bar a b))
((alternatives a b ...)
(lex:bar a (alternatives b ...)))
))
;; Value range alternatives (RFC 4234, Section 3.4)
;; (part of the CoreABNF typeclass)
;;(define range lex:range)
;; Specific repetition (RFC 4234, Section 3.7)
(define (repetition-n n p)
(let ((ps (list-tabulate n (lambda (i) p))))
(lex:lst ps)))
;; Variable repetition (RFC 4234, Section 3.6)
;; * repetition
(define (repetition p) (lex:star* p))
;; 1* repetition
(define repetition1 lex:pos)
(define (variable-repetition min max p)
(if (< max min) (variable-repetition max min p)
(let loop ((i (- max 1)) (k (+ min 1)) (r (if (positive? min) (repetition-n min p) lex:pass)))
(cond ((>= i min) (loop (- i 1) (+ k 1) (lex:bar (repetition-n k p) r)))
(else r)))))
(define optional-sequence lex:opt)
;;;; Terminal values (RFC 4234, Section 2.3)
;; Matches a literal string (case-insensitive)
(define=> (lit lex:)
(lambda (s)
(let* ((f (lambda (t) (tok t (lex:try char-ci=?))))
(ps (map f (if (string? s) (string->list s) s))))
(lex:lst ps))))
;;;; Core Rules (RFC 4234, Appendix B)
;; Match any character of the alphabet.
(define=> (alpha )
(set char-set:letter))
;; Match [0..1]
(define=> (binary )
(range #\0 #\1))
;; Match [0..9]
(define=> (decimal )
(range #\0 #\9))
;; Match [0..9] and [A..F,a..f]
(define=> (hexadecimal )
(set char-set:hex-digit))
;; Match any 7-bit US-ASCII character except for NUL (ASCII value 0, that is).
(define=> (ascii-char )
(set (ucs-range->char-set 1 127)))
;; Match the carriage return character \r.
(define=> (cr ) (char (integer->char 13)))
;; Match the linefeed character \n.
(define=> (lf ) (char (integer->char 10)))
;; Match the Internet newline \r\n.
;; cr lf
;; Match any US-ASCII control character. That is any character with a
;; decimal value in the range of [0..31,127].
(define=> (ctl ) (set char-set:iso-control))
;; Match the double quote character "
(define=> (dquote ) (char #\"))
;; Match the tab \t character
(define=> (htab ) (char (integer->char 9)))
;; Match either 'sp' or 'htab'.
(define=> (wsp ) (set (char-set #\space #\tab)))
;; Match linear white space: *(WSP / CRLF WSP)
;; (define=> (lwsp ) (lex:star* (lex:bar wsp (lex:seq (lex:drop crlf) wsp))))
;; Match /any/ character.
(define=> (octet ) (set char-set:full))
;; Match the space character
(define=> (sp ) (char #\space))
;; Match any printable ASCII character. (The "v" stands for
;; "visible".) That is any character in the decimal range of
;; [33..126].
(define=> (vchar ) (set char-set:graphic))
;;;; Additional convenience procedures and parser combinators
;; match any character from a set defined as a string
(define=> (set-from-string )
(lambda (s)
(set (string->char-set s))))
(define (Token.CharLex->CoreABNF T L)
(let* ((lit (lit T))
(char ((lambda=> () char) L))
(range ((lambda=> () range) L))
(set ((lambda=> () set) L))
(alpha (alpha L))
(binary (binary L))
(decimal (decimal L))
(hexadecimal (hexadecimal L))
(ascii-char (ascii-char L))
(cr (cr L))
(lf (lf L))
(crlf (lex:seq cr lf))
(ctl (ctl L))
(dquote (dquote L))
(htab (htab L))
(wsp (wsp L))
(lwsp (lex:star* (lex:bar wsp (lex:seq (lex:drop crlf) wsp))))
(octet (octet L))
(sp (sp L))
(vchar (vchar L))
(set-from-string (set-from-string L))
(:c char)
(:s lit)
)
(make- T L
range set set-from-string char lit alpha
binary decimal hexadecimal ascii-char cr lf crlf ctl
dquote htab lwsp octet sp vchar wsp
:s :c )
))
;;;; Syntactic abbreviations
;;;; Based on a proposal by Moritz Heidkamp
(define :? optional-sequence)
(define :! drop-consumed)
(define :* repetition)
(define :+ repetition1)
(define-syntax ::
(syntax-rules () ((_ e1 e2 ...) (concatenation e1 e2 ...))))
)