;; ;; URI-common provides URI handling procedures for common URI schemes ;; that are based on the generic syntax such as http, https, file, ftp. ;; It also provides automatic form-urlencoded query argument ;; encoding/decoding ;; ; Copyright (c) 2008-2009, Peter Bex ; All rights reserved. ; ; Redistribution and use in source and binary forms, with or without ; modification, are permitted provided that the following conditions ; are met: ; ; 1. Redistributions of source code must retain the above copyright ; notice, this list of conditions and the following disclaimer. ; 2. Redistributions in binary form must reproduce the above copyright ; notice, this list of conditions and the following disclaimer in the ; documentation and/or other materials provided with the distribution. ; 3. Neither the name of the author nor the names of its ; contributors may be used to endorse or promote products derived ; from this software without specific prior written permission. ; ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ; COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ; OF THE POSSIBILITY OF SUCH DAMAGE. ; ; Please report bugs, suggestions and ideas to the Chicken Trac ; ticket tracking system (assign tickets to user 'sjamaan'): ; http://trac.callcc.org (provide 'uri-common) (module uri-common (uri-reference absolute-uri uri->uri-generic uri-generic->uri update-uri uri? uri-scheme uri-username uri-password uri-host uri-port uri-path uri-query uri-fragment uri->string form-urlencode form-urldecode uri-relative-to uri-relative-from uri-normalize-path-segments uri-normalize-case char-set:query/fragment) (import chicken scheme) (require-extension srfi-1 srfi-13 srfi-14 extras data-structures defstruct matchable) (require-library uri-generic) (import (prefix uri-generic generic:)) ;; We could use the hostinfo egg for this, but that would be yet another ;; dependency. Besides, not all service names have a matching URI scheme ;; nor do all URI schemes have a matching service name. (define default-ports '((http . 80) ; RFC 2616 (https . 443) ; RFC 2818 (shttp . 80) ; RFC 2660 (ftp . 21) ; RFC 959; no official URI scheme defined ;; nonstandard, but could be useful (svn+ssh . 22) (svn . 3690) )) ;; A common URI is a generic URI plus stored decoded versions of most components (defstruct URI-common generic username password host path query fragment) (define-record-printer (URI-common x out) (fprintf out "#(URI-common scheme=~S port=~S host=~S path=~S query=~S fragment=~S" (generic:uri-scheme (URI-common-generic x)) (generic:uri-port (URI-common-generic x)) (URI-common-host x) (URI-common-path x) (URI-common-query x) (URI-common-fragment x))) (define (decode-string* s) (and s (generic:uri-decode-string s))) (define (uri-reference u) (uri-generic->uri (generic:uri-reference u))) (define (absolute-uri u) (uri-generic->uri (generic:absolute-uri u))) (define (uri-generic->uri uri) (make-URI-common generic: uri username: (decode-string* (generic:uri-username uri)) password: (decode-string* (generic:uri-password uri)) host: (decode-string* (generic:uri-host uri)) path: (decode-path (generic:uri-path uri)) query: (form-urldecode (generic:uri-query uri)) fragment: (decode-string* (generic:uri-fragment uri)))) (define (uri->uri-generic uri) (URI-common-generic uri)) ;; Accessors (define uri? URI-common?) (define uri-scheme (compose generic:uri-scheme URI-common-generic)) (define uri-username URI-common-username) (define uri-password URI-common-password) (define uri-host URI-common-host) (define uri-path URI-common-path) (define uri-query URI-common-query) (define uri-fragment URI-common-fragment) (define (uri-port uc) (let ((u (URI-common-generic uc))) (or (generic:uri-port u) (alist-ref (generic:uri-scheme u) default-ports)))) ;; Normalize an URI, but only if there's a scheme present ;; - Remove port if it's the default port for this scheme ;; - Make path empty path if not specified (define (normalize-uri u) (let ((port (generic:uri-port u))) (when (generic:uri-scheme u) (when (eqv? port (alist-ref (generic:uri-scheme u) default-ports)) (set! u (generic:update-uri u port: #f)))) (when (or (not (generic:uri-path u)) (null? (generic:uri-path u))) (set! u (generic:update-uri u uri-path: '(/ "")))) u)) (define (encode-string* s . rest) (and s (apply generic:uri-encode-string s rest))) (define update-uri (let ((unset (list 'unset))) (lambda (uc #!key (scheme unset) (username unset) (password unset) (host unset) (port unset) (path unset) (query unset) (fragment unset)) (let ((uc (update-URI-common uc))) ;; new copy (unless (eq? scheme unset) (URI-common-generic-set! uc (generic:update-uri (URI-common-generic uc) scheme: scheme))) (unless (eq? username unset) (URI-common-generic-set! uc (generic:update-uri (URI-common-generic uc) username: (encode-string* username))) (URI-common-username-set! uc username)) (unless (eq? password unset) (URI-common-generic-set! uc (generic:update-uri (URI-common-generic uc) password: (encode-string* password))) (URI-common-password-set! uc password)) (unless (eq? host unset) (URI-common-generic-set! uc (generic:update-uri (URI-common-generic uc) host: (encode-string* host))) (URI-common-host-set! uc host)) (unless (eq? port unset) (URI-common-generic-set! uc (generic:update-uri (URI-common-generic uc) port: port))) (unless (eq? path unset) (URI-common-generic-set! uc (generic:update-uri (URI-common-generic uc) path: (encode-path path))) (URI-common-path-set! uc path)) (unless (eq? query unset) (URI-common-generic-set! uc (generic:update-uri (URI-common-generic uc) query: (form-urlencode query))) (URI-common-query-set! uc query)) (unless (eq? fragment unset) (URI-common-generic-set! uc (generic:update-uri (URI-common-generic uc) fragment: (encode-string* fragment char-set:query/fragment))) (URI-common-fragment-set! uc fragment)) uc)))) (define (encode-path p) (and p (match p (('/ . rst) (cons '/ (map generic:uri-encode-string rst))) (else (map generic:uri-encode-string p))))) ;; Characters allowed in queries and fragments (define char-set:query/fragment (char-set-difference (char-set-complement generic:char-set:uri-unreserved) (string->char-set ":@?/") generic:char-set:sub-delims)) ;; Handling of application/x-www-form-urlencoded data ;; ;; This implements both HTML 4's specification ;; (http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1) ;; and XHTML XForms' specification ;; (http://www.w3.org/TR/xforms/#structure-model-submission) ;; ;; The latter is a more generalised form of the former, as it allows ;; the user to specify a custom separator character. The HTML 4 ;; spec also contains a recommendation ;; (http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2) ;; that semicolons should be used instead of ampersands as a separator. ;; However, it provides no mechanism to select the separator to use ;; when submitting a form, which makes it a pretty useless recommendation. ;; This recommendation also complicates matters on the server because one ;; would need to handle both form-generated GET query parameters and ;; hardcoded GET query parameters as specified in anchors. ;; ;; There's also a 2006 Internet-Draft by Bjoern Hoehrmann that was ;; intended to standardize this, but it was allowed to expire in 2007: ;; http://ietfreport.isoc.org/idref/draft-hoehrmann-urlencoded (define form-urlencoded-separator (make-parameter ";&")) (define (form-urlencode alist #!key (separator (form-urlencoded-separator))) (and alist (not (null? alist)) (let* ((separator-chars (->char-set separator)) (join-string (string-take (if (string? separator-chars) separator-chars (char-set->string separator-chars)) 1)) (enc (lambda (s) (string-translate* (generic:uri-encode-string s (char-set-union separator-chars (char-set #\= #\+) (char-set-delete char-set:query/fragment #\space))) '((" " . "+")))))) (string-join (reverse (fold (lambda (arg query) (match arg ((a . #f) query) ((a . #t) (cons (enc (->string a)) query)) ((a . b) (cons (sprintf "~A=~A" (enc (->string a)) (enc b)) query)))) '() alist)) join-string)))) (define (form-urldecode query #!key (separator (form-urlencoded-separator))) (if query (map (lambda (part) (let ((idx (string-index part #\=)) (decode (lambda (s) (generic:uri-decode-string (string-translate* s '(("+" . "%20"))))))) (if idx (cons (string->symbol (decode (string-take part idx))) (decode (string-drop part (add1 idx)))) (cons (string->symbol (decode part)) #t)))) (string-split query (char-set->string (->char-set separator)) #t)) '())) ; _always_ provide a list interface for the query, even if not there (define (decode-path p) (and p (match p (('/ . rst) (cons '/ (map generic:uri-decode-string rst))) (else (map generic:uri-decode-string p))))) ;; Simple convenience procedures (define (uri->string uri . args) (apply generic:uri->string (URI-common-generic uri) args)) (define (wrap proc) (lambda args (uri-generic->uri (apply proc (map URI-common-generic args))))) (define uri-relative-to (wrap generic:uri-relative-to)) (define uri-relative-from (wrap generic:uri-relative-from)) (define uri-normalize-case (wrap generic:uri-normalize-case)) (define uri-normalize-path-segments (wrap generic:uri-normalize-path-segments)) )