;; ;; Parser combinators for CGI variables and MIME headers, ;; as described in RFC 3875 "The Common Gateway Interface (CGI) 1.1". ;; ;; Copyright 2013 Ivan Raikov. ;; ;; Based in part on the Haskell Network.CGI library by Peter Thiemann ;; and Bjorn Bringert. ;; (module cgi-grammar ( CoreABNF->CGI ) (import scheme chicken) (require-extension data-structures srfi-1 utf8 utf8-srfi-13 utf8-srfi-14) (require-extension typeclass input-classes) (require-library lexgen) (import (prefix lexgen lex:)) (import (only lexgen Input->Token Token->CharLex )) (require-library abnf abnf-consumers) (import (prefix abnf abnf:) (prefix abnf-consumers abnf:)) (import (only abnf )) ;; helper macro for mutually-recursive parser definitions (define-syntax vac (syntax-rules () ((_ fn) (lambda args (apply fn args))))) (define-class ( A) auth-type content-length content-type gateway-interface path-info query-string remote-addr remote-host request-method server-name server-port server-protocol server-software part-headers ) ;; Match any US-ASCII character except for control characters and ;; separators. (define=> (token ( abnf.)) (abnf:repetition1 (abnf.set (char-set-difference char-set:ascii (char-set-union char-set:iso-control (char-set #\space #\tab) (string->char-set "()<>@,;:\\/[]?={}")))))) ;; within a quoted string, any ASCII graphic or space is permitted ;; without blackslash-quoting except double-quote and the backslash ;; itself. (define=> (qdtext ( abnf.)) (abnf.set (char-set-difference char-set:printing (char-set #\" #\\)))) (define=> (quoted-string ( abnf.)) (lambda (qdtext) (abnf:concatenation (abnf:drop-consumed abnf.dquote) (abnf:repetition qdtext) (abnf:drop-consumed abnf.dquote)))) (define=> (auth-type ( abnf.)) (lambda (token) (abnf:bind-consumed->string (abnf:alternatives (abnf.lit "Basic") (abnf.lit "Digest") token)) )) (define=> (content-length ( abnf.)) (abnf:bind-consumed->string (abnf:repetition1 abnf.decimal))) ;; CONTENT_TYPE = "" | media-type ;; media-type = type "/" subtype *( ";" parameter ) ;; type = token ;; subtype = token ;; parameter = attribute "=" value ;; attribute = token ;; value = token | quoted-string (define=> (content-type ( abnf.)) (lambda (token quoted-string) (let* ((type (abnf:bind-consumed->string token)) (subtype (abnf:bind-consumed->string token)) (attribute (abnf:bind-consumed->string token)) (value (abnf:bind-consumed->string (abnf:alternatives token quoted-string))) (parameter (abnf:bind-consumed-strings->list (abnf:concatenation attribute (abnf:drop-consumed (abnf.char #\=)) value))) (media-type (abnf:concatenation (abnf:bind-consumed-strings->list 'type type) (abnf:drop-consumed (abnf.char #\/)) (abnf:bind-consumed-strings->list 'subtype subtype) (abnf:repetition (abnf:concatenation (abnf:drop-consumed (abnf:concatenation (abnf.char #\;) (abnf:repetition (abnf.set (char-set #\space #\tab))) )) parameter)))) ) (abnf:alternatives media-type abnf:pass) )) ) (define=> (gateway-interface ( abnf.)) (abnf:concatenation (abnf:bind-consumed->symbol (abnf.lit "CGI")) (abnf:drop-consumed (abnf.char #\/)) (abnf:bind-consumed->string (abnf:repetition1 abnf.decimal)) (abnf:drop-consumed (abnf.char #\.) ) (abnf:bind-consumed->string (abnf:repetition1 abnf.decimal)) )) ;; PATH_INFO = "" | ( "/" path ) ;; path = lsegment *( "/" lsegment ) ;; lsegment = *lchar ;; lchar = (define=> (lchar ( abnf.)) (abnf.set (char-set-difference (char-set-union char-set:printing char-set:iso-control) (char-set #\\)))) (define (lsegment lchar) (abnf:bind-consumed->string (abnf:repetition lchar))) (define=> (path ( abnf.)) (lambda (lsegment) (abnf:concatenation lsegment (abnf:repetition (abnf:concatenation (abnf:drop-consumed (abnf.char #\/)) lsegment))))) (define=> (path-info ( abnf.)) (lambda (path) (abnf:alternatives (abnf:concatenation (abnf:drop-consumed (abnf.char #\/)) (abnf:consumed-pairs->list path)) abnf:pass) )) ;; QUERY_STRING = query-string ;; query-string = *uric ;; uric = reserved | unreserved | escaped (define=> (escaped ( abnf.)) (abnf:concatenation (abnf.char #\%) abnf.hexadecimal abnf.hexadecimal)) (define=> (reserved ( abnf.)) (abnf.set-from-string ";/?:@&=+$,[]")) (define=> (mark ( abnf.)) (abnf.set-from-string "-_.!~*'()")) (define=> (unreserved ( abnf.)) (lambda (mark) (abnf:alternatives abnf.alpha abnf.decimal mark))) (define (uric reserved unreserved escaped) (abnf:alternatives reserved unreserved escaped)) (define (query-string uric) (abnf:repetition uric)) ;; REMOTE_ADDR = hostnumber ;; hostnumber = ipv4-address | ipv6-address ;; ipv4-address = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit ;; ipv6-address = hexpart [ ":" ipv4-address ] ;; hexpart = hexseq | ( [ hexseq ] "::" [ hexseq ] ) ;; hexseq = 1*4hex *( ":" 1*4hex ) (define=> (ddot ( abnf.)) (abnf:drop-consumed (abnf.char #\.))) (define=> (dcolon ( abnf.)) (abnf:drop-consumed (abnf.char #\:))) (define=> (ipv4d ( abnf.)) (abnf:bind-consumed->string (abnf:variable-repetition 1 3 abnf.decimal))) (define (ipv4-address ipv4d ddot) (abnf:concatenation ipv4d ddot ipv4d ddot ipv4d ddot ipv4d)) (define=> (ipv6h ( abnf.)) (abnf:bind-consumed->string (abnf:variable-repetition 1 4 abnf.hexadecimal))) (define (hexseq ipv6h dcolon) (abnf:concatenation ipv6h (abnf:repetition (abnf:concatenation dcolon ipv6h)))) (define=> (hexpart ( abnf.)) (lambda (hexseq) (abnf:alternatives hexseq (abnf:concatenation (abnf:optional-sequence hexseq) (abnf.lit "::") (abnf:optional-sequence hexseq) )) )) (define (ipv6-address ipv4-address hexpart dcolon) (abnf:concatenation hexpart (abnf:optional-sequence (abnf:concatenation dcolon ipv4-address)))) (define (hostnumber ipv4-address ipv6-address) (abnf:alternatives ipv4-address ipv6-address)) (define remote-addr hostnumber) ;; REMOTE_HOST = "" | hostname | hostnumber ;; hostname = *( domainlabel "." ) toplabel [ "." ] ;; domainlabel = alphanum [ *alphahypdigit alphanum ] ;; toplabel = alpha [ *alphahypdigit alphanum ] ;; alphahypdigit = alphanum | "-" (define=> (alphanum ( abnf.)) (abnf:alternatives abnf.alpha abnf.decimal)) (define=> (alphahypdigit ( abnf.)) (abnf:alternatives alphanum abnf.char #\-)) (define=> (toplabel ( abnf.)) (lambda (alphahypdigit alphanum) (abnf:bind-consumed->string (abnf:concatenation abnf.alpha (abnf:optional-sequence (abnf:concatenation (abnf:repetition alphahypdigit) alphanum)))) )) (define (domainlabel alphahypdigit alphanum) (abnf:bind-consumed->string (abnf:concatenation alphanum (abnf:optional-sequence (abnf:concatenation (abnf:repetition alphahypdigit) alphanum))) )) (define (hostname domainlabel toplabel ddot) (abnf:concatenation (abnf:repetition (abnf:concatenation domainlabel ddot)) toplabel (abnf:optional-sequence ddot))) (define (remote-host hostname hostnumber) (abnf:alternatives hostname hostnumber abnf:pass)) ;; REQUEST_METHOD = method ;; method = "GET" | "POST" | "HEAD" | extension-method ;; extension-method = "PUT" | "DELETE" | token (define=> (extension-method ( abnf.)) (lambda (token) (abnf:alternatives (abnf.lit "PUT") (abnf.lit "DELETE") token))) (define=> (method ( abnf.)) (lambda (extension-method) (abnf:alternatives (abnf.lit "GET") (abnf.lit "POST") (abnf.lit "HEAD") extension-method))) (define (request-method method) (abnf:bind-consumed->string method)) ;; SERVER_NAME = server-name ;; server-name = hostname | ipv4-address | ( "[" ipv6-address "]" ) (define=> (server-name ( abnf.)) (lambda (ipv4-address ipv6-address) (abnf:alternatives hostname ipv4-address (abnf:drop-consumed (abnf.char #\[)) ipv6-address (abnf:drop-consumed (abnf.char #\]))) )) ;; SERVER_PORT = server-port ;; server-port = 1*digit (define=> (server-port ( abnf.)) (abnf:bind-consumed->string (abnf:repetition abnf.decimal))) ;; SERVER_PROTOCOL = HTTP-Version | "INCLUDED" | extension-version ;; HTTP-Version = "HTTP" "/" 1*digit "." 1*digit ;; extension-version = protocol [ "/" 1*digit "." 1*digit ] ;; protocol = token (define protocol token) (define=> (extension-version ( abnf.)) (lambda (protocol ddot) (abnf:concatenation (abnf:bind-consumed->string protocol ) (abnf:optional-sequence (abnf:concatenation (abnf:drop-consumed (abnf.char #\/)) (abnf:bind-consumed->string (abnf:repetition1 abnf.decimal)) ddot (abnf:bind-consumed->string (abnf:repetition1 abnf.decimal))))))) (define=> (HTTP-Version ( abnf.)) (abnf:concatenation (abnf:bind-consumed->string (abnf.lit "HTTP")) (abnf:optional-sequence (abnf:concatenation (abnf:drop-consumed (abnf.char #\/)) (abnf:bind-consumed->string (abnf:repetition1 abnf.decimal)) ddot (abnf:bind-consumed->string (abnf:repetition1 abnf.decimal)) )) )) (define=> (server-protocol ( abnf.)) (lambda (HTTP-Version extension-version) (abnf:bind-consumed-strings->list (abnf:alternatives HTTP-Version (abnf.lit "INCLUDED") extension-version)) )) (define=> (product ( abnf.)) (lambda (token) (abnf:concatenation token (abnf:optional-sequence (abnf:concatenation (abnf.char #\/) token)) ))) (define=> (ctext ( abnf.)) (abnf.set (char-set-difference char-set:graphic (char-set #\( #\) #\\)))) (define=> (comment ( abnf.)) (vac (abnf:concatenation (abnf.char #\() (abnf:repetition (abnf:alternatives ctext comment)) (abnf.char #\))) )) (define consumed-objects-lift-any (abnf:consumed-objects-lift (abnf:consumed-objects identity))) (define=> (header ( abnf.)) (lambda (ss p) (abnf:bind (consumed-objects-lift-any) (abnf:concatenation (abnf:bind-consumed->symbol (abnf.lit ss)) (abnf:drop-consumed (abnf.char #\:)) (abnf:drop-consumed (abnf:repetition (abnf.char #\space))) p (abnf:drop-consumed abnf.crlf)) ))) (define (server-software product comment) (abnf:bind-consumed->string (abnf:repetition1 (abnf:alternatives product comment)) )) ;; disposition := "Content-Disposition" ":" ;; disposition-type ;; *(";" disposition-parm) ;; disposition-type := "inline" ;; / "attachment" ;; / extension-token ;; ; values are not case-sensitive ;; disposition-parm := filename-parm ;; / creation-date-parm ;; / modification-date-parm ;; / read-date-parm ;; / size-parm ;; / parameter ;; filename-parm := "filename" "=" value ;; creation-date-parm := "creation-date" "=" quoted-date-time ;; modification-date-parm := "modification-date" "=" quoted-date-time ;; read-date-parm := "read-date" "=" quoted-date-time ;; size-parm := "size" "=" 1*DIGIT ;; quoted-date-time := quoted-string ;; ; contents MUST be an RFC 822 `date-time' ;; ; numeric timezones (+HHMM or -HHMM) MUST be (define=> (disposition-type ( abnf.)) (lambda (token) (abnf:bind-consumed-strings->list 'type (abnf:alternatives (abnf.lit "inline") (abnf.lit "attachment") (abnf:bind-consumed->string token))) )) (define=> (content-disposition ( abnf.)) (lambda (token quoted-string disposition-type) (let* ( (attribute (abnf:bind-consumed->symbol token)) (value (abnf:bind-consumed->string (abnf:alternatives quoted-string token))) (parameter (abnf:bind (consumed-objects-lift-any) (abnf:concatenation attribute (abnf:drop-consumed (abnf.char #\=)) value))) ) (abnf:concatenation disposition-type (abnf:repetition (abnf:concatenation (abnf:drop-consumed (abnf:concatenation (abnf.char #\;) (abnf:repetition (abnf.set (char-set #\space #\tab))) )) parameter)) )) )) (define (CoreABNF->CGI A) (let* ((token* (token A)) (qdtext* (qdtext A)) (quoted-string* ((quoted-string A) qdtext*)) (lchar* (lchar A)) (lsegment* (lsegment lchar*)) (path* ((path A) lsegment*)) (escaped* (escaped A)) (reserved* (reserved A)) (mark* (mark A)) (unreserved* ((unreserved A) mark*)) (uric* (uric reserved* unreserved* escaped*)) (query-string* (query-string uric)) (ddot* (ddot A)) (dcolon* (dcolon A)) (ipv4d* (ipv4d A)) (ipv4-address* (ipv4-address ipv4d* ddot*)) (ipv6h* (ipv6h A)) (hexseq* (hexseq ipv6h* dcolon*)) (hexpart* ((hexpart A) hexseq*)) (ipv6-address* (ipv6-address ipv4-address* hexpart* dcolon*)) (hostnumber* (hostnumber ipv4-address* ipv6-address*)) (remote-addr* hostnumber*) (alphanum* (alphanum A)) (alphahypdig* (alphahypdigit A)) (server-name* ((server-name A) ipv4-address* ipv6-address*)) (toplabel* ((toplabel A) alphahypdig* alphanum)) (domainlabel* (domainlabel alphahypdigit alphanum)) (hostname* (hostname domainlabel toplabel ddot)) (extension-method* (extension-method A)) (method* ((method A) extension-method*)) (request-method* (request-method method*)) (content-length* (content-length A)) (content-type* ((content-type A) token* quoted-string*)) (disposition-type* ((disposition-type A) token*)) (content-disposition* ((content-disposition A) token* quoted-string* disposition-type*)) (comment* (comment A)) (product* (product A)) (header* (header A)) ) (make- A ((auth-type A) token*) content-length* content-type* (gateway-interface A) ((path-info A) path*) query-string* remote-addr* (remote-host hostname* hostnumber*) request-method* server-name* (server-port A) (server-protocol A) (server-software product* comment*) (abnf:repetition (abnf:alternatives (header* "Content-Type" content-type*) (header* "Content-Length" content-length*) (header* "Content-Disposition" content-disposition*) )) ) )) )