;; ;; Intarweb is an improved HTTP library for Chicken ;; ; Copyright (c) 2008-2010, Peter Bex ; All rights reserved. ; ; Redistribution and use in source and binary forms, with or without ; modification, are permitted provided that the following conditions ; are met: ; ; 1. Redistributions of source code must retain the above copyright ; notice, this list of conditions and the following disclaimer. ; 2. Redistributions in binary form must reproduce the above copyright ; notice, this list of conditions and the following disclaimer in the ; documentation and/or other materials provided with the distribution. ; 3. Neither the name of the author nor the names of its ; contributors may be used to endorse or promote products derived ; from this software without specific prior written permission. ; ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ; COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ; OF THE POSSIBILITY OF SUCH DAMAGE. (provide 'intarweb) (module intarweb (read-line-limit replace-header-contents replace-header-contents! remove-header remove-header! update-header-contents update-header-contents! headers single-headers headers? headers->list http-name->symbol symbol->http-name header-parsers header-unparsers unparse-header unparse-headers safe-methods safe? idempotent-methods idempotent? keep-alive? response-class etag=? etag=-weakly? etag-matches? etag-matches-weakly? make-request request? request-major request-major-set! request-minor request-minor-set! request-method request-method-set! request-uri request-uri-set! request-headers request-headers-set! request-port request-port-set! update-request set-request! request-parsers read-request request-unparsers write-request read-headers http-0.9-request-parser http-1.x-request-parser http-0.9-request-unparser http-1.x-request-unparser header-parse-error-handler make-response response? response-major response-major-set! response-minor response-minor-set! response-code response-code-set! response-reason response-reason-set! response-headers response-headers-set! response-port response-port-set! update-response set-response! write-response response-parsers response-unparsers read-response http-0.9-response-parser http-1.x-response-parser http-0.9-response-unparser http-1.x-response-unparser ;; http-header-parsers header-contents header-values header-value header-params header-param get-value get-params get-param split-multi-header parse-token parse-comment parse-parameters parse-value+parameters multiple single make-key/value-subparser rfc1123-string->time rfc850-string->time asctime-string->time http-date-string->time rfc1123-subparser rfc850-subparser asctime-subparser http-date-subparser quality-subparser unknown-header-parser symbol-subparser symbol-subparser-ci natnum-subparser host/port-subparser base64-subparser range-subparser etag-parser product-parser mailbox-subparser if-range-parser retry-after-subparser via-parser warning-parser key/value-subparser set-cookie-parser cache-control-parser pragma-parser te-parser cookie-parser unparse-params must-be-quoted-chars quote-string unparse-token default-header-unparser etag-unparser host/port-unparser product-unparser rfc1123-unparser cookie-unparser ) (import scheme chicken foreign) (use extras ports data-structures srfi-1 srfi-13 srfi-14 regex posix base64 defstruct uri-common) (define read-line-limit (make-parameter 1024)) ; #f if you want no limit ;; Make headers a new type, to force the use of the HEADERS procedure ;; and ensure only proper header values are passed to all procedures ;; that deal with headers. (define-record headers v) (define-record-printer (headers h out) (fprintf out "#(headers: ~S)" (headers-v h))) (define headers->list headers-v) (define (remove-header! name headers) (let loop ((h (headers-v headers))) (cond ((null? h) headers) ((eq? name (caar h)) (set-cdr! h (cdr h)) headers) (else (loop (cdr h)))))) (define (remove-header name headers) (make-headers (let loop ((h (headers-v headers))) (cond ((null? h) h) ((eq? name (caar h)) (loop (cdr h))) (else (cons (car h) (loop (cdr h)))))))) ;; XXX: Do we need these replace procedures in the exports list? It ;; looks like we can use update everywhere. (define (replace-header-contents! name contents headers) (let loop ((h (headers-v headers))) (cond ((null? h) (headers-v-set! headers (cons (cons name contents) (headers-v headers))) headers) ((eq? name (caar h)) (set-cdr! (car h) contents) headers) (else (loop (cdr h)))))) (define (replace-header-contents name contents headers) (make-headers (let loop ((h (headers-v headers))) (cond ((null? h) (cons (cons name contents) h)) ((eq? name (caar h)) (cons (cons (caar h) contents) (cdr h))) (else (cons (car h) (loop (cdr h)))))))) (define (make-updater replacer) (lambda (name contents headers) (let ((old (header-contents name headers '()))) (replacer name (if (member name (single-headers)) (list (last contents)) (append old contents)) headers)))) (define update-header-contents (make-updater replace-header-contents)) (define update-header-contents! (make-updater replace-header-contents!)) (define http-name->symbol (compose string->symbol string-downcase!)) (define symbol->http-name (compose string-titlecase symbol->string)) ;; Make a header set from a literal expression by folding in the headers ;; with any previous ones (define (headers headers-to-be #!optional (old-headers (make-headers '()))) (fold (lambda (h new-headers) (update-header-contents (car h) (map (lambda (v) (if (vector? v) v (vector v '()))) ; normalize to vector (cdr h)) new-headers)) old-headers headers-to-be)) (define normalized-uri (compose uri-normalize-path-segments uri-reference)) (include "header-parsers") ; Also includes header unparsers ;; Any unknown headers are considered to be multi-headers, always (define single-headers (make-parameter '(accept-ranges age authorization content-length content-location content-md5 content-type date etag expect expires host if-modified-since if-unmodified-since last-modified location max-forwards proxy-authorization range referer retry-after server transfer-encoding user-agent www-authenticate))) (define string->http-method string->symbol) (define http-method->string symbol->string) ;; Hack to insert trailer in chunked ports (define *end-of-transfer-object* (list 'eot)) ;; Make an output port automatically "chunked" (define (chunked-output-port port) (make-output-port (lambda (s) ; write (if (eq? *end-of-transfer-object* s) (fprintf port "0\r\n\r\n") ; trailer? (fprintf port "~X\r\n~A\r\n" (string-length s) s))) (lambda () ; close (close-output-port port)) (lambda () ; flush (flush-output port)))) ;; Make an input port automatically "chunked" (define (chunked-input-port port) (let* ((chunk-length 0) (position 0) (check-position (lambda () (when (and position (>= position chunk-length)) (unless (zero? chunk-length) (read-line port)) ; read \r\n data trailer (let* ((line (read-line port))) (if (eof-object? line) (set! position #f) (begin (set! chunk-length (string->number line 16)) (if chunk-length (set! position 0) (set! position #f))))))))) (make-input-port (lambda () ; read (check-position) (if position (let ((char (read-char port))) (if (not (eof-object? char)) (set! position (add1 position))) char) #!eof)) (lambda () ; ready? (check-position) (and position (char-ready? port))) (lambda () ; close (close-input-port port)) (lambda () ; peek (check-position) (if position (peek-char port) #!eof))))) ;; OPTIONS and TRACE are not explicitly mentioned in in section ;; 9.1.1, but section 9.1.2 says they SHOULD NOT have side-effects ;; by definition, which means they are safe, as well. (define safe-methods (make-parameter '(GET HEAD OPTIONS TRACE))) ;; RFC2616, Section 9.1.1 (define (safe? obj) (let ((method (if (request? obj) (request-method obj) obj))) (not (not (member method (safe-methods)))))) (define idempotent-methods (make-parameter '(GET HEAD PUT DELETE OPTIONS TRACE))) ;; RFC2616, Section 9.1.2 (define (idempotent? obj) (let ((method (if (request? obj) (request-method obj) obj))) (not (not (member method (idempotent-methods)))))) (define (keep-alive? obj) (let ((major (if (request? obj) (request-major obj) (response-major obj))) (minor (if (request? obj) (request-minor obj) (response-minor obj))) (con (header-value 'connection (if (request? obj) (request-headers obj) (response-headers obj))))) (if (and (= major 1) (> minor 0)) (not (eq? con 'close)) ;; RFC 2068, section 19.7.1 (see also RFC 2616, section 19.6.2) (eq? con 'keep-alive)))) (define (response-class obj) (let ((code (if (response? obj) (response-code obj) obj))) (- code (modulo code 100)))) (define (etag=? a b) (and (not (eq? 'weak (car a))) (eq? (car a) (car b)) (string=? (cdr a) (cdr b)))) (define (etag=-weakly? a b) (and (eq? (car a) (car b)) (string=? (cdr a) (cdr b)))) (define (etag-matches? etag matchlist) (any (lambda (m) (or (eq? m '*) (etag=? etag m))) matchlist)) (define (etag-matches-weakly? etag matchlist) (any (lambda (m) (or (eq? m '*) (etag=-weakly? etag m))) matchlist)) ;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Request parsing ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;; ;; This includes parsers for all RFC-defined headers (define header-parsers (make-parameter `((accept . ,(multiple symbol-subparser-ci `((q . ,quality-subparser)))) (accept-charset . ,(multiple symbol-subparser-ci `((q . ,quality-subparser)))) (accept-encoding . ,(multiple symbol-subparser-ci `((q . ,quality-subparser)))) (accept-language . ,(multiple symbol-subparser-ci `((q . ,quality-subparser)))) (accept-ranges . ,(single symbol-subparser-ci)) (age . ,(single natnum-subparser)) (allow . ,(multiple symbol-subparser)) (authorization . ,authorization-parser) (cache-control . ,cache-control-parser) (connection . ,(multiple symbol-subparser-ci)) (content-encoding . ,(multiple symbol-subparser-ci)) (content-language . ,(multiple symbol-subparser-ci)) (content-length . ,(single natnum-subparser)) (content-location . ,(single normalized-uri)) (content-md5 . ,(single base64-subparser)) (content-range . ,(single range-subparser)) (content-type . ,(single symbol-subparser-ci)) (date . ,(single http-date-subparser)) (etag . ,etag-parser) (expect . ,(single (make-key/value-subparser '()))) (expires . ,(single http-date-subparser)) (from . ,(multiple mailbox-subparser)) (host . ,(single host/port-subparser)) (if-match . ,if-match-parser) (if-modified-since . ,(single http-date-subparser)) (if-none-match . ,if-match-parser) (if-range . ,if-range-parser) (if-unmodified-since . ,(single http-date-subparser)) (last-modified . ,(single http-date-subparser)) (location . ,(single normalized-uri)) (max-forwards . ,(single natnum-subparser)) (pragma . ,pragma-parser) (proxy-authenticate . ,authenticate-parser) (proxy-authorization . ,authorization-parser) (range . ,(multiple range-subparser)) (referer . ,(single normalized-uri)) (retry-after . ,(single retry-after-subparser)) (server . ,product-parser) (te . ,te-parser) (trailer . ,(multiple symbol-subparser-ci)) (transfer-encoding . ,(single symbol-subparser-ci)) (upgrade . ,(multiple update-header-contents!)) (user-agent . ,product-parser) (vary . ,(multiple symbol-subparser-ci)) (via . ,via-parser) (warning . ,warning-parser) (www-authenticate . ,authenticate-parser) ;; RFC 2109 (set-cookie . ,set-cookie-parser) (cookie . ,cookie-parser) ;; RFC 2965? ))) (define header-parse-error-handler ;; ignore errors (make-parameter (lambda (header-name contents headers exn) headers))) ;; The parser is supposed to return a list of header values for its header (define (parse-header name contents) (let* ((default unknown-header-parser) (parser (alist-ref name (header-parsers) eq? default))) (parser contents))) (define (parse-header-line line headers) (or (and-let* ((colon-idx (string-index line #\:)) (header-name (http-name->symbol (string-take line colon-idx))) (contents (string-trim-both (string-drop line (add1 colon-idx))))) (handle-exceptions exn ((header-parse-error-handler) header-name contents headers exn) (update-header-contents! header-name (parse-header header-name contents) headers))) (signal-http-condition "Bad header line" 'header-error 'contents line))) (define (read-headers port) (let ((first-line (read-line port))) (if (or (eof-object? first-line) (string-null? first-line)) (make-headers '()) (let loop ((prev-line first-line) (line (read-line port)) (headers (make-headers '()))) (if (or (eof-object? line) (string-null? line)) (if (string-null? prev-line) headers (parse-header-line prev-line headers)) (if (char-whitespace? (string-ref line 0)) ; Continuation char? (loop (string-append prev-line line) (read-line port) headers) (if (string=? (string-take-right prev-line 1) "\\") ; escaped? ;; XXX Test if this works with all combinations of \r\n ;; with prepended backslashes. We don't care about ;; malformed stuff like "foo\\\\\n" or \ with missing " (loop (string-append prev-line "\n" line) (read-line port) headers) (loop line (read-line port) (parse-header-line prev-line headers))))))))) (define (signal-http-condition msg type . more-info) (signal (make-composite-condition (make-property-condition 'http) (apply make-property-condition type more-info) (make-property-condition 'exn 'message msg)))) (defstruct request (method 'GET) uri (major 1) (minor 1) (headers (make-headers '())) port) ;; This removes the dependency on regex-case and is simpler (define-syntax regex-let (syntax-rules () ((regex-let str regex (name ...) body ...) (let ((values (string-match regex str))) (and values (apply (lambda (name ...) body ...) values)))))) ;; Perhaps we should have header parsers indexed by version or ;; something like that, so you can define the maximum version. Useful ;; for when expecting a response. Then we group request/response parsers ;; together, as with request/response unparsers. (define (http-0.9-request-parser line in) (regex-let line "[Gg][Ee][Tt] +([^ \t]+)" (_ uri) (make-request method: 'GET uri: (normalized-uri uri) major: 0 minor: 9 port: in))) ;; XXX This actually parses anything >= HTTP/1.0 (define (http-1.x-request-parser line in) (regex-let line "([a-zA-Z]+) +([^ \t]+) +[Hh][Tt][Tt][Pp]/([0-9]+)\\.([0-9]+)" (_ method uri major minor) (make-request method: (string->http-method method) uri: (normalized-uri uri) major: (string->number major) minor: (string->number minor) headers: (read-headers in) port: in))) (define request-parsers ; order matters here (make-parameter (list http-1.x-request-parser http-0.9-request-parser))) (define (read-request inport) (let* ((line (read-line inport (read-line-limit))) ;; A bit ugly, but simpler than the alternatives (line (if (eof-object? line) "" line))) ;; Try each parser in turn to process the request-line. ;; A parser returns either #f or a request object (let loop ((parsers (request-parsers))) (if (null? parsers) (signal-http-condition "Unknown protocol" 'unknown-protocol-line 'line line) (or ((car parsers) line inport) (loop (cdr parsers))))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Request unparsing ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;; (define header-unparsers (make-parameter `((date . ,rfc1123-unparser) (etag . ,etag-unparser) (expires . ,rfc1123-unparser) (host . ,host/port-unparser) (if-match . ,if-match-unparser) (if-modified-since . ,rfc1123-unparser) (if-none-match . ,if-match-unparser) (if-unmodified-since . ,rfc1123-unparser) (last-modified . ,rfc1123-unparser) (user-agent . ,product-unparser) (server . ,product-unparser) (cookie . ,cookie-unparser) (set-cookie . ,set-cookie-unparser) (authorization . ,authorization-unparser) (www-authenticate . ,authenticate-unparser) (proxy-authorization . ,authorization-unparser) (proxy-authenticate . ,authenticate-unparser)))) (define (unparse-header header-name header-value) (let* ((def default-header-unparser) (unparser (alist-ref header-name (header-unparsers) eq? def))) (unparser header-value))) (define (unparse-headers headers out) (for-each (lambda (h) (let* ((name (car h)) (name-s (symbol->http-name name)) (contents (cdr h))) (for-each (lambda (value) ;; Verify there's no \r\n or \r or \n in value? (fprintf out "~A: ~A\r\n" name-s value)) (unparse-header name contents)))) (headers-v headers))) (define (write-request-line request) (fprintf (request-port request) "~A ~A HTTP/~A.~A\r\n" (http-method->string (request-method request)) (uri->string (request-uri request)) (request-major request) (request-minor request))) (define (http-0.9-request-unparser request) (fprintf (request-port request) "GET ~A\r\n" (uri->string (request-uri request))) request) (define (http-1.0-request-unparser request) (and-let* (((= (request-major request) 1)) ((= (request-minor request) 0)) (o (request-port request))) (write-request-line request) (unparse-headers (request-headers request) o) (display "\r\n" o) request)) ;; XXX This actually unparses anything >= HTTP/1.1 (define (http-1.x-request-unparser request) (and-let* (((or (> (request-major request) 1) (and (= (request-major request) 1) (> (request-minor request) 0)))) (o (request-port request))) (write-request-line request) (unparse-headers (request-headers request) o) (display "\r\n" o) (if (memq 'chunked (header-values 'transfer-encoding (request-headers request))) (update-request request port: (chunked-output-port (request-port request))) request))) (define request-unparsers ; order matters here (make-parameter (list http-1.x-request-unparser http-1.0-request-unparser http-0.9-request-unparser))) (define (write-request request) ;; Try each unparser in turn to write the request-line. ;; An unparser returns either #f or a new request object. (let loop ((unparsers (request-unparsers))) (if (null? unparsers) (signal-http-condition "Unknown protocol" 'unknown-protocol 'major (request-major request) 'minor (request-minor request)) (or ((car unparsers) request) (loop (cdr unparsers)))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Response unparsing ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defstruct response (code 200) (reason "OK") (major 1) (minor 1) (headers (make-headers '())) port) (define (http-0.9-response-unparser response) response) ;; The response-body will just follow (define (write-response-line response) (fprintf (response-port response) "HTTP/~A.~A ~A ~A\r\n" (response-major response) (response-minor response) (response-code response) (response-reason response))) (define (http-1.0-response-unparser response) (and-let* (((= (response-major response) 1)) ((= (response-minor response) 0)) (o (response-port response))) (write-response-line response) (unparse-headers (response-headers response) o) (display "\r\n" o) response)) ;; XXX This actually unparses anything >= HTTP/1.1 (define (http-1.x-response-unparser response) (and-let* (((or (> (response-major response) 1) (and (= (response-major response) 1) (> (response-minor response) 0)))) (o (response-port response))) (write-response-line response) (unparse-headers (response-headers response) o) (display "\r\n" o) (if (memq 'chunked (header-values 'transfer-encoding (response-headers response))) (update-response response port: (chunked-output-port (response-port response))) response))) (define response-unparsers (make-parameter (list http-1.x-response-unparser http-1.0-response-unparser http-0.9-response-unparser))) (define (write-response response) ;; Try each unparser in turn to write the response-line. ;; An unparser returns either #f or a new response object. (let loop ((unparsers (response-unparsers))) (if (null? unparsers) (signal-http-condition "Unknown protocol" 'unknown-protocol 'major (response-major response) 'minor (response-minor response)) (or ((car unparsers) response) (loop (cdr unparsers)))))) ;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Response parsing ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;; (define (http-1.x-response-parser line in) (regex-let line "[Hh][Tt][Tt][Pp]/([0-9]+)\\.([0-9]+) +([0-9]+) +(.*)" (_ major minor code reason) (and-let* ((major (string->number major)) (minor (string->number minor)) ((or (> major 1) (and (= major 1) (> minor 0)))) (h (read-headers in)) (port (if (memq 'chunked (header-values 'transfer-encoding h)) (chunked-input-port in) in))) (make-response code: (string->number code) reason: reason major: major minor: minor headers: h port: port)))) (define (http-1.0-response-parser line in) (regex-let line "[Hh][Tt][Tt][Pp]/1\\.0 +([0-9]+) +(.*)" (_ code reason) (make-response code: (string->number code) reason: reason major: 1 minor: 0 headers: (read-headers in) port: in))) ;; You can't "detect" a 0.9 response, because there is no response line. ;; It will simply output the body directly, so we will just assume that ;; if we can't recognise the output string, we just got a 0.9 response. ;; If this is not desired, just change response-parsers to exclude this one. (define (http-0.9-response-parser line in) (make-response code: 200 reason: "OK" major: 0 minor: 9 ;; XXX This is wrong, it re-inserts \r\n, while it may have ;; been a \n only. To work around this, we'd have to write ;; a custom read-line procedure. ;; However, it does not matter much because HTTP 0.9 is only ;; defined to ever return text/html, no binary or any other ;; content type. port: (call-with-input-string (string-append line "\r\n") (lambda (str) (make-concatenated-port str in))))) (define response-parsers ;; order matters here (make-parameter (list http-1.x-response-parser http-1.0-response-parser http-0.9-response-parser))) (define (read-response inport) (let* ((line (read-line inport (read-line-limit))) (line (if (eof-object? line) "" line))) (let loop ((parsers (response-parsers))) (if (null? parsers) (signal-http-condition "Unknown protocol" 'unknown-protocol-line 'line line) (or ((car parsers) line inport) (loop (cdr parsers))))))) )