;; SXML serializer into XML and HTML ; ; Partial conformance with ; [1] XSLT 2.0 and XQuery 1.0 Serialization ; W3C Candidate Recommendation 3 November 2005 ; http://www.w3.org/TR/2005/CR-xslt-xquery-serialization-20051103/ ; ; This software is in Public Domain. ; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND. ; ; Please send bug reports and comments to: ; lizorkin@ispras.ru Dmitry Lizorkin ; Prefix for global identifiers in this module is `srl:' ; short for "serialization" ; Requires: function `filter' from SRFI-1 ; syntax `cond-expand' from SRFI-0 ; In particular, for PLT, `filter' can be acquired as follows: ;(require (lib "filter.ss" "srfi/1")) ;========================================================================== ; Basic ; `map' and `append' in a single pass: ; (srl:map-append func lst) = (apply append (map func lst)) ; A simplified analogue of `map-union' from "sxpathlib.scm" (define (srl:map-append func lst) (if (null? lst) lst (append (func (car lst)) (srl:map-append func (cdr lst))))) ; procedure srl:apply-string-append :: STR-LST -> STRING ; str-lst ::= (listof string) ; Concatenates `str-lst' members into a single string ; (srl:apply-string-append str-lst) = (apply string-append str-lst) (cond-expand (chicken ; In Chicken, procedures are generally limited to 126 arguments ; http://www.call-with-current-continuation.org/ ; Due to this Chicken limitation, we cannot apply `string-append' directly ; for a potentially long `str-lst' ; Similar to R5RS 'list-tail' but returns the new list consisting of the ; first 'k' members of 'lst' (define (srl:list-head lst k) (if (or (null? lst) (zero? k)) '() (cons (car lst) (srl:list-head (cdr lst) (- k 1))))) ; Because of Chicken 126-argument limitation, I do not care of intermediate ; garbage produced in the following solution: (define (srl:apply-string-append str-lst) (cond ((null? str-lst) "") ((null? (cdr str-lst)) (car str-lst)) (else ; at least two members (let ((middle (inexact->exact (round (/ (length str-lst) 2))))) (string-append (srl:apply-string-append (srl:list-head str-lst middle)) (srl:apply-string-append (list-tail str-lst middle))))))) ) (else (define (srl:apply-string-append str-lst) (apply string-append str-lst)) )) ; Analogue of `assoc' ; However, search is performed by `cdr' of each alist member and `string=?' is ; used for comparison (define (srl:assoc-cdr-string= item alist) (cond ((null? alist) #f) ((string=? (cdar alist) item) (car alist)) (else (srl:assoc-cdr-string= item (cdr alist))))) ; Analogue of `member' for strings that uses case insensitive comparison (define (srl:member-ci str lst) (cond ((null? lst) #f) ((string-ci=? str (car lst)) lst) (else (srl:member-ci str (cdr lst))))) ; Analogue of `member' ; The end of the `lst' is returned, from the first member that satisfies ; the `pred?' (define (srl:mem-pred pred? lst) (cond ((null? lst) #f) ((pred? (car lst)) lst) (else (srl:mem-pred pred? (cdr lst))))) ;------------------------------------------------- ; Borrowed from "char-encoding.scm" ; The newline character (cond-expand ((or scheme48 scsh) (define srl:char-nl (ascii->char 10))) (else (define srl:char-nl (integer->char 10)))) ; A string consisting of a single newline character (define srl:newline (string srl:char-nl)) ;------------------------------------------------- ; Borrowed from "sxpathlib.scm" ; A simplified implementation of `select-kids' is sufficienf for the serializer (define (srl:select-kids test-pred?) (lambda (node) ; node or node-set (cond ((null? node) node) ((not (pair? node)) '()) ; No children ((symbol? (car node)) (filter test-pred? (cdr node))) (else (srl:map-append (srl:select-kids test-pred?) node))))) ;------------------------------------------------- ; Borrowed from "modif.scm" ; Separates the list into two lists with respect to the predicate ; Returns: (values res-lst1 res-lst2) ; res-lst1 - contains all members from the input lst that satisfy the pred? ; res-lst2 - contains the remaining members of the input lst (define (srl:separate-list pred? lst) (let loop ((lst lst) (satisfy '()) (rest '())) (cond ((null? lst) (values (reverse satisfy) (reverse rest))) ((pred? (car lst)) ; the first member satisfies the predicate (loop (cdr lst) (cons (car lst) satisfy) rest)) (else (loop (cdr lst) satisfy (cons (car lst) rest)))))) ;------------------------------------------------- ; Borrowed from "fragments.scm" ; A simplified implementation of `sxml:clean-fragments' (define (srl:clean-fragments fragments) (reverse (let loop ((fragments fragments) (result '())) (cond ((null? fragments) result) ((null? (car fragments)) (loop (cdr fragments) result)) ((pair? (car fragments)) (loop (cdr fragments) (loop (car fragments) result))) (else (loop (cdr fragments) (cons (car fragments) result))))))) ; A very much simplified analogue of `sxml:display-fragments' for fragments ; that have no more than two levels of nesting ; fragments-level2 ::= (listof fragments-level1) ; fragments-level1 ::= string | (listof string) (define (srl:display-fragments-2nesting fragments-level2 port) (for-each (lambda (level1) (if (pair? level1) (for-each (lambda (x) (display x port)) level1) (display level1 port))) fragments-level2)) ;========================================================================== ; Helper SXML utilities ; Splits an SXML `name' into namespace id/uri and local part ; Returns: (cons namespace-id local-part) ; local-part - string ; namespace-id - string or #f if the `name' does not have a prefix (define (srl:split-name name) (let* ((name-str (symbol->string name)) (lng (string-length name-str))) (let iter ((i (- lng 1))) (cond ((< i 0) ; name scanned, #\: not found (cons #f name-str)) ((char=? (string-ref name-str i) #\:) (cons (substring name-str 0 i) (substring name-str (+ i 1) lng))) (else (iter (- i 1))))))) ; Converts SXML atomic object to a string. Keeps non-atomic object unchanged. ; A simplified analogue of applying the XPath `string(.)' function to atomic ; object. (define (srl:atomic->string obj) (cond ((or (pair? obj) ; non-atomic type (string? obj)) obj) ((number? obj) (number->string obj)) ((boolean? obj) (if obj "true" "false")) (else ; unexpected type ; ATTENTION: should probably raise an error here obj))) ; Whether an SXML element is empty (define (srl:empty-elem? elem) (or (null? (cdr elem)) ; just the name (and (null? (cddr elem)) ; just the name and attributes (pair? (cadr elem)) (eq? (caadr elem) '@)) (and (not (null? (cddr elem))) ; name, attributes, and SXML 2.X aux-list (null? (cdddr elem)) (pair? (caddr elem)) (eq? (caaddr elem) '@@)))) ;------------------------------------------------- ; Handling SXML namespaces ; is defined in the SXML specification as ; ::= ( "URI" original-prefix? ) ; Conventional namespace prefix referred to in XML-related specifications ; These prefixes are used for serializing the corresponding namespace URIs by ; default, unless a different prefix is supplied (define srl:conventional-ns-prefixes '((dc . "http://purl.org/dc/elements/1.1/") (fo . "http://www.w3.org/1999/XSL/Format") (rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#") (rng . "http://relaxng.org/ns/structure/1.0") (xlink . "http://www.w3.org/1999/xlink") (xqx . "http://www.w3.org/2005/XQueryX") (xsd . "http://www.w3.org/2001/XMLSchema") (xsi . "http://www.w3.org/2001/XMLSchema-instance") (xsl . "http://www.w3.org/1999/XSL/Transform"))) ; Returns (listof ) for the given SXML element (define (srl:namespace-assoc-for-elem elem) ((srl:select-kids (lambda (node) (pair? node))) ((srl:select-kids (lambda (node) (and (pair? node) (eq? (car node) '*NAMESPACES*)))) (append ((srl:select-kids ; compatibility with SXML 3.0 (lambda (node) (and (pair? node) (eq? (car node) '@)))) ((srl:select-kids (lambda (node) (and (pair? node) (eq? (car node) '@)))) elem)) ((srl:select-kids ; compatibility with SXML 2.X (lambda (node) (and (pair? node) (eq? (car node) '@@)))) elem))))) ; Returns (listof ) for the SXML document node (define (srl:ns-assoc-for-top doc) ((srl:select-kids (lambda (node) (pair? node))) ((srl:select-kids (lambda (node) (and (pair? node) (eq? (car node) '*NAMESPACES*)))) ((srl:select-kids (lambda (node) ; After sequence normalization [1], the SXML 3.0 aux-list is used ; at the top-level (and (pair? node) (eq? (car node) '@)))) doc)))) ; Extract original prefix-binding from `namespace-assoc-lst' ; namespace-assoc-lst ::= (listof ) ; ::= ( "URI" original-prefix? ) ; Returns: (listof (cons original-prefix "URI")) (define (srl:extract-original-prefix-binding namespace-assoc-lst) (map (lambda (triple) (cons (caddr triple) (cadr triple))) (filter ; specifies original prefix (lambda (memb) (= (length memb) 3)) namespace-assoc-lst))) ;------------------------------------------------- ; Handling xml:space attribute ; Returns the new value of `space-preserve?' in accordance with the value of ; xml:space attribute probably presented for the given SXML element `elem' ; space-preserve? ::= #t | #f - whether the SXML subtree inherits the ; xml:space attribute with the value "preserve" (define (srl:update-space-specifier elem space-preserve?) (let ((xml-space-val ((srl:select-kids string?) ((srl:select-kids (lambda (node) (and (pair? node) (eq? (car node) 'xml:space)))) ((srl:select-kids (lambda (node) (and (pair? node) (eq? (car node) '@)))) elem))))) (cond ((null? xml-space-val) ; no xml:space attribute space-preserve?) ((string=? (car xml-space-val) "preserve") #t) ((string=? (car xml-space-val) "default") #f) (else space-preserve?)))) ;========================================================================== ; Sequence normalization ; Sect. 2 in [1] ; Performs sequence normalization in accordance with [1] ; Returns the SXML document node (define (srl:normalize-sequence node-or-sequence) (letrec ((normaliz-step-1 ; "If the sequence that is input to serialization is empty, create a ; sequence S1 that consists of a zero-length string. Otherwise, copy ; each item in the sequence that is input to serialization to create ; the new sequence S1." [1] (lambda (node-or-seq) (cond ((null? node-or-seq) ; empty sequence '("")) ; Effect of `as-nodeset' from "sxpathlib.scm" ((or (not (pair? node-or-seq)) ; single item (symbol? (car node-or-seq))) ; single node (list node-or-seq)) (else node-or-seq)))) (normaliz-step-2 ; "For each item in S1, if the item is atomic, obtain the lexical ; representation of the item by casting it to an xs:string and copy ; the string representation to the new sequence; otherwise, copy the ; item, which will be a node, to the new sequence. The new sequence is ; S2." [1] (lambda (seq) (map (lambda (item) (srl:atomic->string item)) seq))) (normaliz-step-3 ; "For each subsequence of adjacent strings in S2, copy a single ; string to the new sequence equal to the values of the strings in the ; subsequence concatenated in order, each separated by a single space. ; Copy all other items to the new sequence. The new sequence is S3." (lambda (seq) (let loop ((src (reverse seq)) (res '())) (cond ((null? src) res) ((string? (car src)) (let adjacent ((src (cdr src)) (adj-strs (list (car src)))) (cond ((null? src) ; source sequence is over (cons (srl:apply-string-append adj-strs) res)) ((string? (car src)) (adjacent (cdr src) (cons (car src) (cons " " adj-strs)))) (else (loop (cdr src) (cons (car src) (cons (srl:apply-string-append adj-strs) res))))))) (else (loop (cdr src) (cons (car src) res))))))) ; Step 4 from [1] is redundant for SXML, since SXML text nodes are not ; distinquished from strings (normaliz-step-5 ; "For each item in S4, if the item is a document node, copy its ; children to the new sequence; otherwise, copy the item to the new ; sequence. The new sequence is S5." [1] (lambda (seq) (cond ((null? seq) seq) ((and (pair? (car seq)) (eq? (caar seq) '*TOP*)) ; Document node (append (cdar seq) (normaliz-step-5 (cdr seq)))) (else (cons (car seq) (normaliz-step-5 (cdr seq))))))) (normaliz-step-6 ; "For each subsequence of adjacent text nodes in S5, copy a single ; text node to the new sequence equal to the values of the text nodes ; in the subsequence concatenated in order. Any text nodes with values ; of zero length are dropped. Copy all other items to the new sequence. ; The new sequence is S6." [1] ; Much like Step 3; however, a space between adjacent strings is not ; inserted and the zero-length strings are removed (lambda (seq) (let loop ((src (reverse seq)) (res '())) (cond ((null? src) res) ((string? (car src)) (if (string=? (car src) "") ; empty string (loop (cdr src) res) (let adjacent ((src (cdr src)) (adj-strs (list (car src)))) (cond ((null? src) ; source sequence is over (cons (srl:apply-string-append adj-strs) res)) ((string? (car src)) ; If it is an empty string, the effect of its presense ; will be removed by string concatenation (adjacent (cdr src) (cons (car src) adj-strs))) (else (loop (cdr src) (cons (car src) (cons (srl:apply-string-append adj-strs) res)))))))) (else (loop (cdr src) (cons (car src) res))))))) (normaliz-step-7 ; "It is a serialization error [err:SENR0001] if an item in S6 is an ; attribute node or a namespace node. Otherwise, construct a new ; sequence, S7, that consists of a single document node and copy all ; the items in the sequence, which are all nodes, as children of that ; document node." [1] ; On this step, we should take care of SXML aux-lists ; ATTENTION: should generally raise an error in the presense of ; attribute nodes in a sequence. By nature of SXML 3.0, however, ; attribute nodes on the top level are treated as aux-nodes (lambda (seq) (call-with-values (lambda () (srl:separate-list (lambda (item) (and (pair? item) (or (eq? (car item) '@@) ; aux-list in SXML 2.X (eq? (car item) '@) ; aux-list in SXML 3.0 ))) seq)) (lambda (aux-lists body) (if (null? aux-lists) `(*TOP* ,@body) `(*TOP* (@ ,@(srl:map-append cdr aux-lists)) ,@body))))))) ; TODO: According to [1], if the normalized sequence does not have exactly ; one element node node child or has text node children, then the ; serialized output should be an XML external general parsed entity. ; However, external parsed entities are not currently handled by SSAX ; parser. Should think of a compromise between conformance and practical ; usability. (normaliz-step-7 (normaliz-step-6 (normaliz-step-5 (normaliz-step-3 (normaliz-step-2 (normaliz-step-1 node-or-sequence)))))))) ;========================================================================== ; Character escaping during string serialization ; Escaping in accordance with [1] and [2]: ; ; [2] Extensible Markup Language (XML) 1.0 (Third Edition) ; W3C Recommendation 04 February 2004 ; http://www.w3.org/TR/2004/REC-xml-20040204 ;------------------------------------------------- ; CDATA sections ; Returns #f if a given character `ch' is in XML character range [2] ; Otherwise, returns a string representing the character reference for that ; character (define (srl:xml-char-escaped ch) (let ((code (char->integer ch))) (if (or (= code 9) (= code 10) (= code 13) (and (>= code 32) (<= code 55295)) (and (>= code 57344) (<= code 65533)) (>= code 65536)) #f (string-append "&#" (number->string code) ";" )))) ; Represents a given string `str' as a CDATA section (define (srl:string->cdata-section str) (let ((flush-buffer ; If a `buffer' is non-empty, converts it to a CDATA string and ; cons'es this string to `res'. Returns a new res (lambda (buffer res) (if (null? buffer) res (cons (string-append "string (reverse buffer)) "]]>") res))))) (let loop ((src (string->list str)) (buffer '()) (res '(""))) (cond ((null? src) (srl:apply-string-append (reverse (flush-buffer buffer res)))) ((srl:xml-char-escaped (car src)) => (lambda (charref) (loop (cdr src) '() (cons charref (flush-buffer buffer res))))) ((and (char=? (car src) #\]) (not (null? buffer)) (char=? (car buffer) #\])) (loop (cdr src) '() (cons (string (car buffer) (car src)) ;= "]]" (flush-buffer (cdr buffer) res)))) (else ; any other character (loop (cdr src) (cons (car src) buffer) res)))))) ;------------------------------------------------- ; Character data and attribute values ; Associative lists of characters to be escaped in XML character data and ; attribute values respectively [2] (define srl:escape-alist-char-data '((#\& . "&") (#\< . "<") (#\> . ">"))) (define srl:escape-alist-att-value (append `((#\' . "'") (#\" . """) ; Escaping the newline character in attribute value (,srl:char-nl . " ")) srl:escape-alist-char-data)) (define srl:escape-alist-html-att '((#\& . "&") (#\> . ">") (#\' . "'") (#\" . """))) ; Escape a string with the `srl:xml-char-escaped' and with the `escape-alist' ; supplied ; escape-alist ::= (listof (cons char string)) ; html-method? ::= #t | #f ; Returns the escaped string (define (srl:string->escaped str escape-alist html-method?) (let loop ((src (string->list str)) (adj-chars '()) (res '())) (cond ((null? src) (srl:apply-string-append (reverse (cons (list->string (reverse adj-chars)) res)))) ((assv (car src) escape-alist) ; current character matches the alist => (lambda (pair) (if ; Subsect. 7.2 in [1]: ; "The HTML output method MUST NOT escape a & character occurring ; in an attribute value immediately followed by a { character" (and (char=? (car src) #\&) html-method? (not (null? (cdr src))) (char=? (cadr src) #\{)) (loop (cdr src) (cons (car src) adj-chars) res) (loop (cdr src) '() (cons (cdr pair) (cons (list->string (reverse adj-chars)) res)))))) ((srl:xml-char-escaped (car src)) => (lambda (esc) (loop (cdr src) '() (cons esc (cons (list->string (reverse adj-chars)) res))))) (else (loop (cdr src) (cons (car src) adj-chars) res))))) (define (srl:string->char-data str) (srl:string->escaped str srl:escape-alist-char-data #f)) (define (srl:string->att-value str) (srl:string->escaped str srl:escape-alist-att-value #f)) (define (srl:string->html-att str) (srl:string->escaped str srl:escape-alist-html-att #t)) ;------------------------------------------------- ; Serializing entities produced by HtmlPrag ; ; [3] Neil W. Van Dyke. ; HtmlPrag: Pragmatic Parsing and Emitting of HTML using SXML and SHTML ; Version 0.16, 2005-12-18, http://www.neilvandyke.org/htmlprag/ ; "..SHTML adds a special & syntax for non-ASCII (or non-Extended-ASCII) ; characters. The syntax is (& val), where val is a symbol or string naming ; with the symbolic name of the character, or an integer with the numeric ; value of the character." [3] ; entity ::= `(& ,val) ; val ::= symbol | string | number ; Returns the string representation for the entity (define (srl:shtml-entity->char-data entity) ; TODO: think of an appropriate error message for an ill-formed entity (if (= (length entity) 2) (let ((val (cadr entity))) (cond ((symbol? val) (string-append "&" (symbol->string val) ";") ) ((string? val) (string-append "&" val ";") ) ((and (number? val) (integer? val) (> val 0)) ; to guarantee well-formedness of the result produced (string-append "&#" (number->string val) ";") ) (else ; should signal of an error ""))) "")) ;========================================================================== ; Serialization for markup ; declared-ns-prefixes ::= (listof (cons prefix-string namespace-uri)) ; prefix-string, namespace-uri - strings ; Returns the string representation for a QName ; prefix-string ::= string or #f if the name contains no prefix ; TODO: should check names for proper characters (define (srl:qname->string prefix-string local-part) (if prefix-string (string-append prefix-string ":" local-part) local-part)) ;------------------------------------------------- ; Different types of nodes ; Returns the list of strings that constitute the serialized representation ; for the attribute. Inserts a whitespace symbol in the beginning ; method ::= 'xml | 'html (define (srl:attribute->str-lst prefix-string local-part att-value method) (let ((attval (srl:atomic->string att-value))) (cond (prefix-string (list " " prefix-string ":" local-part "=\"" ((if (eq? method 'html) srl:string->html-att srl:string->att-value) attval) "\"")) ((eq? method 'html) (if (string=? local-part attval) ; boolean attribute (list " " local-part) (list " " local-part "=\"" (srl:string->html-att attval) "\""))) (else ; unprefixed attribute, XML output method (list " " local-part "=\"" (srl:string->att-value attval) "\""))))) ; Returns the list of strings that constitute the serialized representation ; for the namespace declaration. Inserts a whitespace symbol in the beginning ; ATTENTION: character escaping for namespace URI may be improper, study this ; issue (define (srl:namespace-decl->str-lst prefix-string namespace-uri) (list " xmlns:" prefix-string "=\"" (srl:string->att-value namespace-uri) "\"")) ; According to SXML specification, ; ::= ( *COMMENT* "comment string" ) ; ATTENTION: in the case of ill-formed comment, should probably report an error ; instead of recovering (define (srl:comment->str-lst comment-node) (let ((proper-string-in-comment? ; Whether a proper string occurs in the comment node. Thus, ; "For compatibility, the string '--' (double-hyphen) MUST NOT occur ; within comments. ... Note that the grammar does not allow a comment ; ending in --->." [2] (lambda (str) (let ((lng (string-length str))) (or (zero? lng) ; empty string allowed in comment [2] (and (not (char=? (string-ref str 0) #\-)) (let iter ((i 1) (prev-hyphen? #f)) (cond ((>= i lng) (not prev-hyphen?) ; string must not end with hyphen ) ((char=? (string-ref str i) #\-) (if prev-hyphen? #f (iter (+ i 1) #t))) (else (iter (+ i 1) #f)))))))))) (if (and (= (length comment-node) 2) (string? (cadr comment-node)) (proper-string-in-comment? (cadr comment-node))) (list "") (list "") ; should probably report of an error ))) ; According to SXML specification, ; ::= ( *PI* pi-target ; ? "processing instruction content string" ) ; method ::= 'xml | 'html ; Subsect 7.3 in [1]: "The HTML output method MUST terminate processing ; instructions with > rather than ?>." ; ATTENTION: in the case of ill-formed PI content string, should probably ; report an error instead of recovering (define (srl:processing-instruction->str-lst pi-node method) (let ((string-not-contain-charlist? ; Whether `str' does not contain a sequence of characters from ; `char-lst' as its substring (lambda (str char-lst) (let ((lng (string-length str))) (or (zero? lng) ; empty string doesn't contain (let iter ((i 0) (pattern char-lst)) (cond ((>= i lng) #t) ((char=? (string-ref str i) (car pattern)) (if (null? (cdr pattern)) ; it is the last member #f ; contains (iter (+ i 1) (cdr pattern)))) (else (iter (+ i 1) char-lst))))))))) (if (or (null? (cdr pi-node)) (not (symbol? (cadr pi-node)))) ; no target => ill-formed PI '() ; should probably raise an error (let ((content (filter string? (cddr pi-node)))) (cond ((null? content) ; PI with no content - correct situation (list "string (cadr pi-node)) (if (eq? method 'html) ">" "?>"))) ; Subsect. 7.3 in [1]: "It is a serialization error to use the HTML ; output method when > appears within a processing instruction in ; the data model instance being serialized." ((and (null? (cdr content)) ; only a single member (string-not-contain-charlist? (car content) (if (eq? method 'html) '(#\>) '(#\? #\>)))) (list "string (cadr pi-node)) " " (car content) (if (eq? method 'html) ">" "?>"))) (else ; should probably signal of an error '())))))) ;------------------------------------------------- ; SXML element ; Returns: (values ; prefix-string namespace-uri local-part declaration-required?) ; prefix-string - namespace prefix to be given to the serialized name: a string ; or #f if no prefix is required ; namespace-uri - the namespace URI for the given `name', #f if the name has no ; namespace URI ; local-part - local part of the name ; declaration-required ::= #t | #f - whether `prefix' has to be declared (define (srl:name->qname-components name ns-prefix-assig namespace-assoc declared-ns-prefixes) (let ((use-ns-id-or-generate-prefix (lambda (ns-id) (if (and ns-id ; try to use namespace-id as a prefix (not (assq (string->symbol ns-id) ns-prefix-assig)) (not (assoc ns-id declared-ns-prefixes))) ns-id ; Otherwise - generate unique prefix ; Returns a prefix-string not presented in ns-prefix-assig and ; declared-ns-prefixes (let loop ((i 1)) (let ((candidate (string-append "prfx" (number->string i)))) (if (or (assoc candidate declared-ns-prefixes) (assq (string->symbol candidate) ns-prefix-assig)) (loop (+ i 1)) candidate)))))) (n-parts (srl:split-name name))) (cond ((not (car n-parts)) ; no namespace-id => no namespace (values #f #f (cdr n-parts) ; name as a string #f)) ((string-ci=? (car n-parts) "xml") ; reserved XML namespace (values (car n-parts) "http://www.w3.org/XML/1998/namespace" (cdr n-parts) #f)) (else (call-with-values (lambda () (cond ((assq (string->symbol (car n-parts)) ; suppose a namespace-id namespace-assoc) => (lambda (lst) (values (cadr lst) (car n-parts)))) (else ; first part of a name is a namespace URI (values (car n-parts) #f)))) (lambda (namespace-uri ns-id) (cond ((srl:assoc-cdr-string= namespace-uri declared-ns-prefixes) => (lambda (pair) ; Prefix for that namespace URI already declared (values (car pair) namespace-uri (cdr n-parts) #f))) (else ; namespace undeclared (values (cond ((srl:assoc-cdr-string= namespace-uri ns-prefix-assig) => (lambda (pair) ; A candidate namespace prefix is supplied from the user (let ((candidate (symbol->string (car pair)))) (if (assoc candidate declared-ns-prefixes) ; The prefix already bound to a different namespace ; Avoid XML prefix re-declaration (use-ns-id-or-generate-prefix ns-id) candidate)))) (else (use-ns-id-or-generate-prefix ns-id))) namespace-uri (cdr n-parts) #t ; in any case, prefix declaration is required ))))))))) ; Constructs start and end tags for an SXML element `elem' ; method ::= 'xml | 'html ; Returns: (values start-tag end-tag ; ns-prefix-assig namespace-assoc declared-ns-prefixes) ; start-tag ::= (listof string) ; end-tag ::= (listof string) or #f for empty element ; TODO: escape URI attributes for HTML ; TODO: indentation probably should be made between attribute declarations (define (srl:construct-start-end-tags elem method ns-prefix-assig namespace-assoc declared-ns-prefixes) (let ((ns-assoc-here (srl:namespace-assoc-for-elem elem)) (empty? (srl:empty-elem? elem))) (let ((ns-prefix-assig (append (srl:extract-original-prefix-binding ns-assoc-here) ns-prefix-assig)) (namespace-assoc (append ns-assoc-here namespace-assoc))) (call-with-values (lambda () (srl:name->qname-components ; element name (car elem) ns-prefix-assig namespace-assoc declared-ns-prefixes)) (lambda (elem-prefix elem-uri elem-local elem-decl-required?) (let loop ((attrs (reverse ((srl:select-kids (lambda (node) ; not SXML 3.0 aux-list (and (pair? node) (not (eq? (car node) '@))))) ((srl:select-kids (lambda (node) (and (pair? node) (eq? (car node) '@)))) elem)))) (start-tag (if (or (not empty?) (and (eq? method 'html) (not elem-prefix) (srl:member-ci elem-local ; ATTENTION: should probably move this list ; to a global const '("area" "base" "basefont" "br" "col" "frame" "hr" "img" "input" "isindex" "link" "meta" "param")))) '(">") '(" />"))) (ns-prefix-assig ns-prefix-assig) (namespace-assoc namespace-assoc) (declared-ns-prefixes ; As if element namespace already declared (if elem-decl-required? (cons (cons elem-prefix elem-uri) declared-ns-prefixes) declared-ns-prefixes))) (if (null? attrs) ; attributes scanned (let ((elem-name (srl:qname->string elem-prefix elem-local))) (values (cons "<" (cons elem-name (if elem-decl-required? (cons (srl:namespace-decl->str-lst elem-prefix elem-uri) start-tag) start-tag))) (if empty? #f (list "")) ns-prefix-assig namespace-assoc declared-ns-prefixes)) (call-with-values (lambda () (srl:name->qname-components (caar attrs) ; attribute name ns-prefix-assig namespace-assoc declared-ns-prefixes)) (lambda (attr-prefix attr-uri attr-local attr-decl-required?) (let ((start-tag (cons (srl:attribute->str-lst attr-prefix attr-local ; TODO: optimize for HTML output method (if (null? (cdar attrs)) ; no attribute value attr-local (cadar attrs)) method) start-tag))) (loop (cdr attrs) (if attr-decl-required? (cons (srl:namespace-decl->str-lst attr-prefix attr-uri) start-tag) start-tag) ns-prefix-assig namespace-assoc (if attr-decl-required? (cons (cons attr-prefix attr-uri) declared-ns-prefixes) declared-ns-prefixes)))))))))))) ;========================================================================== ; Recursively walking the tree of SXML elements ; indentation ::= (listof string) or #f - a list of whitespace strings ; depending on the node nesting or #f if no indent is required ; space-preserve? ::= #t | #f - whether the subtree inherits the xml:space ; attribute with the value "preserve" ; cdata-section-elements ::= (listof symbol) - list of element names whose ; child nodes are to be output with CDATA section ; text-node-handler :: string -> string - a function that performs a proper ; character escaping for the given node if it is a text node ; TODO: do not insert whitespaces adjacent to HTML %inline elements in HTML ; output method (define (srl:node->nested-str-lst-recursive node method ns-prefix-assig namespace-assoc declared-ns-prefixes indentation space-preserve? cdata-section-elements text-node-handler) (if (not (pair? node)) ; text node (text-node-handler (srl:atomic->string node)) (case (car node) ; node name ((*COMMENT*) (srl:comment->str-lst node)) ((*PI*) (srl:processing-instruction->str-lst node method)) ((&) (srl:shtml-entity->char-data node)) ((*DECL*) ; recovering for non-SXML nodes '()) (else ; otherwise - an element node (call-with-values (lambda () (srl:construct-start-end-tags node method ns-prefix-assig namespace-assoc declared-ns-prefixes)) (lambda (start-tag end-tag ns-prefix-assig namespace-assoc declared-ns-prefixes) (if (not end-tag) ; empty element => recursion stops start-tag (let ((space-preserve? (srl:update-space-specifier node space-preserve?)) (text-node-handler (cond ((memq (car node) cdata-section-elements) srl:string->cdata-section) ((and (eq? method 'html) (srl:member-ci (symbol->string (car node)) '("script" "style"))) ; No escaping for strings inside these HTML elements (lambda (str) str)) (else srl:string->char-data))) (content ((srl:select-kids (lambda (node) ; TODO: support SXML entities (not (and (pair? node) (memq (car node) '(@ @@ *ENTITY*)))))) node))) (call-with-values (lambda () (cond ((or (not indentation) (and (eq? method 'html) (srl:member-ci (symbol->string (car node)) '("pre" "script" "style" "textarea")))) ; No indent - on this level and subsequent levels (values #f #f)) ((or space-preserve? (srl:mem-pred ; at least a single text node (lambda (node) (not (pair? node))) content)) ; No indent on this level, possible indent on nested levels (values #f indentation)) (else (values (cons srl:newline indentation) (cons (car indentation) indentation))))) (lambda (indent-here indent4recursive) (if indent-here (append start-tag (map (lambda (kid) (list indent-here (srl:node->nested-str-lst-recursive kid method ns-prefix-assig namespace-assoc declared-ns-prefixes indent4recursive space-preserve? cdata-section-elements text-node-handler))) content) (cons srl:newline (cons (cdr indentation) end-tag))) (append start-tag (map (lambda (kid) (srl:node->nested-str-lst-recursive kid method ns-prefix-assig namespace-assoc declared-ns-prefixes indent4recursive space-preserve? cdata-section-elements text-node-handler)) content) end-tag)))))))))))) (define (srl:display-node-out-recursive node port method ns-prefix-assig namespace-assoc declared-ns-prefixes indentation space-preserve? cdata-section-elements text-node-handler) (if (not (pair? node)) ; text node (display (text-node-handler (srl:atomic->string node)) port) (case (car node) ; node name ((*COMMENT*) (for-each (lambda (x) (display x port)) (srl:comment->str-lst node))) ((*PI*) (for-each (lambda (x) (display x port)) (srl:processing-instruction->str-lst node method))) ((&) (display (srl:shtml-entity->char-data node) port)) ((*DECL*) ; recovering for non-SXML nodes #f) (else ; otherwise - an element node (call-with-values (lambda () (srl:construct-start-end-tags node method ns-prefix-assig namespace-assoc declared-ns-prefixes)) (lambda (start-tag end-tag ns-prefix-assig namespace-assoc declared-ns-prefixes) (begin (srl:display-fragments-2nesting start-tag port) (if end-tag ; there exists content (let ((space-preserve? (srl:update-space-specifier node space-preserve?)) (text-node-handler (cond ((memq (car node) cdata-section-elements) srl:string->cdata-section) ((and (eq? method 'html) (srl:member-ci (symbol->string (car node)) '("script" "style"))) ; No escaping for strings inside these HTML elements (lambda (str) str)) (else srl:string->char-data))) (content ((srl:select-kids (lambda (node) ; TODO: support SXML entities (not (and (pair? node) (memq (car node) '(@ @@ *ENTITY*)))))) node))) (call-with-values (lambda () (cond ((or (not indentation) (and (eq? method 'html) (srl:member-ci (symbol->string (car node)) '("pre" "script" "style" "textarea")))) ; No indent - on this level and subsequent levels (values #f #f)) ((or space-preserve? (srl:mem-pred ; at least a single text node (lambda (node) (not (pair? node))) content)) ; No indent on this level, possible indent on nested levels (values #f indentation)) (else (values (cons srl:newline indentation) (cons (car indentation) indentation))))) (lambda (indent-here indent4recursive) (begin (for-each ; display content (if indent-here (lambda (kid) (begin (for-each (lambda (x) (display x port)) indent-here) (srl:display-node-out-recursive kid port method ns-prefix-assig namespace-assoc declared-ns-prefixes indent4recursive space-preserve? cdata-section-elements text-node-handler))) (lambda (kid) (srl:display-node-out-recursive kid port method ns-prefix-assig namespace-assoc declared-ns-prefixes indent4recursive space-preserve? cdata-section-elements text-node-handler))) content) (if indent-here (begin (display srl:newline port) (for-each (lambda (x) (display x port)) (cdr indentation)))) (for-each (lambda (x) (display x port)) end-tag))))))))))))) ;------------------------------------------------- ; Serializing the document node - start of recursion ; Creates the serialized representation for the XML declaration ; Returns: (listof string) ; version ::= string | number ; standalone ::= 'yes | 'no | 'omit (define (srl:make-xml-decl version standalone) (let ((version (if (number? version) (number->string version) version))) (if (eq? standalone 'omit) (list "") (list "string standalone) "'?>")))) ; omit-xml-declaration? ::= #t | #f ; standalone ::= 'yes | 'no | 'omit ; version ::= string | number (define (srl:top->nested-str-lst doc cdata-section-elements indent method ns-prefix-assig omit-xml-declaration? standalone version) (let* ((namespace-assoc (srl:ns-assoc-for-top doc)) (ns-prefix-assig (append (srl:extract-original-prefix-binding namespace-assoc) ns-prefix-assig)) (serialized-content (map (if indent ; => output each member from the newline (let ((indentation (list indent))) ; for nested elements (lambda (kid) (list srl:newline (srl:node->nested-str-lst-recursive kid method ns-prefix-assig namespace-assoc '() indentation #f cdata-section-elements srl:string->char-data)))) (lambda (kid) (srl:node->nested-str-lst-recursive kid method ns-prefix-assig namespace-assoc '() indent #f cdata-section-elements srl:string->char-data))) ((srl:select-kids ; document node content (lambda (node) ; TODO: support SXML entities (not (and (pair? node) (memq (car node) '(@ @@ *ENTITY*)))))) doc)))) (if (or (eq? method 'html) omit-xml-declaration?) (if (and indent (not (null? serialized-content))) ; Remove the starting newline ; ATTENTION: beware of `Gambit cadar bug': ; http://mailman.iro.umontreal.ca/pipermail/gambit-list/ ; 2005-July/000315.html (cons (cadar serialized-content) (cdr serialized-content)) serialized-content) (list (srl:make-xml-decl version standalone) serialized-content)))) (define (srl:display-top-out doc port cdata-section-elements indent method ns-prefix-assig omit-xml-declaration? standalone version) (let ((no-xml-decl? ; no XML declaration was displayed? (if (not (or (eq? method 'html) omit-xml-declaration?)) (begin (for-each ; display xml declaration (lambda (x) (display x port)) (srl:make-xml-decl version standalone)) #f) #t)) (content ; document node content ((srl:select-kids (lambda (node) ; TODO: support SXML entities (not (and (pair? node) (memq (car node) '(@ @@ *ENTITY*)))))) doc)) (namespace-assoc (srl:ns-assoc-for-top doc))) (let ((ns-prefix-assig (append (srl:extract-original-prefix-binding namespace-assoc) ns-prefix-assig))) (cond ((null? content) ; generally a rare practical situation #t) ; nothing more to do ((and indent no-xml-decl?) ; We'll not display newline before (car content) (let ((indentation (list indent))) ; for nested elements (for-each (lambda (kid put-newline?) (begin (if put-newline? (display srl:newline port)) (srl:display-node-out-recursive kid port method ns-prefix-assig namespace-assoc '() indentation #f cdata-section-elements srl:string->char-data))) content ; After sequence normalization, content does not contain #f (cons #f (cdr content))))) (else (for-each (if indent ; => output each member from the newline (let ((indentation (list indent))) ; for nested elements (lambda (kid) (begin (display srl:newline port) (srl:display-node-out-recursive kid port method ns-prefix-assig namespace-assoc '() indentation #f cdata-section-elements srl:string->char-data)))) (lambda (kid) (srl:display-node-out-recursive kid port method ns-prefix-assig namespace-assoc '() indent #f cdata-section-elements srl:string->char-data))) content)))))) ;========================================================================== ; Interface ;------------------------------------------------- ; Calling the serializer with all the serialization parameters supported ; and with no overhead of parameters parsing. ; ATTENTION: As future versions of this library may provide support for ; additional serialization parameters, the functions `srl:sxml->string' and ; `srl:display-sxml' specified in this subsections may have a different number ; of their arguments in the future versions of the library. ; Returns a string that contains the serialized representation for `sxml-obj'. ; cdata-section-elements ::= (listof sxml-name) ; indent ::= #t | #f | whitespace-string ; method = 'xml | 'html ; ns-prefix-assign ::= (listof (cons prefix-symbol namespace-uri-string)) ; omit-xml-declaration? ::= #t | #f ; standalone ::= 'yes | 'no | 'omit ; version ::= number | string (define (srl:sxml->string sxml-obj cdata-section-elements indent method ns-prefix-assig omit-xml-declaration? standalone version) (srl:apply-string-append (srl:clean-fragments (srl:top->nested-str-lst (srl:normalize-sequence sxml-obj) cdata-section-elements (if (and indent (not (string? indent))) " " indent) method ns-prefix-assig omit-xml-declaration? standalone version)))) ; Writes the serialized representation of the `sxml-obj' to an output port ; `port'. The result returned by the function is unspecified. (define (srl:display-sxml sxml-obj port-or-filename cdata-section-elements indent method ns-prefix-assig omit-xml-declaration? standalone version) (if (string? port-or-filename) ; a filename? (let ((out (open-output-file port-or-filename))) (begin (srl:display-top-out (srl:normalize-sequence sxml-obj) out cdata-section-elements (if (and indent (not (string? indent))) " " indent) method ns-prefix-assig omit-xml-declaration? standalone version) (display srl:newline out) ; newline at the end of file (close-output-port out))) (srl:display-top-out (srl:normalize-sequence sxml-obj) port-or-filename cdata-section-elements (if (and indent (not (string? indent))) " " indent) method ns-prefix-assig omit-xml-declaration? standalone version))) ;------------------------------------------------- ; Generalized serialization procedure, parameterizable with all the ; serialization params supported by this implementation ; procedure srl:parameterizable :: SXML-OBJ [PORT] {PARAM}* -> ; -> STRING|unspecified ; sxml-obj - an SXML object to serialize ; param ::= (cons param-name param-value) ; param-name ::= symbol ; ; 1. cdata-section-elements ; value ::= (listof sxml-elem-name) ; sxml-elem-name ::= symbol ; ; 2. indent ; value ::= 'yes | #t | 'no | #f | whitespace-string ; ; 3. method ; value ::= 'xml | 'html ; ; 4. ns-prefix-assig ; value ::= (listof (cons prefix namespace-uri)) ; prefix ::= symbol ; namespace-uri ::= string ; ; 5. omit-xml-declaration? ; value ::= 'yes | #t | 'no | #f ; ; 6. standalone ; value ::= 'yes | #t | 'no | #f | 'omit ; ; 7. version ; value ::= string | number ; ; ATTENTION: If a parameter name is unexpected or a parameter value is ; ill-formed, the parameter is silently ignored. Probably, a warning message ; in such a case would be more appropriate. ; ; Example: ; (srl:parameterizable ; '(tag (@ (attr "value")) (nested "text node") (empty)) ; (current-output-port) ; '(method . xml) ; XML output method is used by default ; '(indent . "\t") ; use a single tabulation to indent nested elements ; '(omit-xml-declaration . #f) ; add XML declaration ; '(standalone . yes) ; denote a standalone XML document ; '(version . "1.0")) ; XML version (define (srl:parameterizable sxml-obj . port-or-filename+params) (call-with-values (lambda () (if (and (not (null? port-or-filename+params)) (or (output-port? (car port-or-filename+params)) (string? (car port-or-filename+params)))) (values (car port-or-filename+params) (cdr port-or-filename+params)) (values #f port-or-filename+params))) (lambda (port-or-filename params) (let loop ((params params) (cdata-section-elements '()) (indent " ") (method 'xml) (ns-prefix-assig srl:conventional-ns-prefixes) (omit-xml-declaration? #t) (standalone 'omit) (version "1.0")) (cond ((null? params) ; all parameters parsed (if port-or-filename (srl:display-sxml sxml-obj port-or-filename cdata-section-elements indent method ns-prefix-assig omit-xml-declaration? standalone version) (srl:sxml->string sxml-obj cdata-section-elements indent method ns-prefix-assig omit-xml-declaration? standalone version))) ((or (not (pair? (car params))) ; not a pair or has no param value (null? (cdar params))) (loop (cdr params) cdata-section-elements indent method ns-prefix-assig omit-xml-declaration? standalone version)) (else (let ((prm-value (cdar params))) (case (caar params) ((cdata-section-elements) (loop (cdr params) (if (list? prm-value) prm-value cdata-section-elements) indent method ns-prefix-assig omit-xml-declaration? standalone version)) ((indent) (loop (cdr params) cdata-section-elements (cond ((boolean? prm-value) (if prm-value " " prm-value)) ((string? prm-value) prm-value) ((eq? prm-value 'yes) " ") ((eq? prm-value 'no) #f) (else indent)) method ns-prefix-assig omit-xml-declaration? standalone version)) ((method) (loop (cdr params) cdata-section-elements indent (if (or (eq? prm-value 'xml) (eq? prm-value 'html)) prm-value method) ns-prefix-assig omit-xml-declaration? standalone version)) ((ns-prefix-assig) (loop (cdr params) cdata-section-elements indent method (if (and (list? prm-value) (not (srl:mem-pred ; no non-pair members (lambda (x) (not (pair? x))) prm-value))) (append prm-value ns-prefix-assig) ns-prefix-assig) omit-xml-declaration? standalone version)) ((omit-xml-declaration) (loop (cdr params) cdata-section-elements indent method ns-prefix-assig (cond ((boolean? prm-value) prm-value) ((eq? prm-value 'yes) #t) ((eq? prm-value 'no) #f) (else indent)) standalone version)) ((standalone) (loop (cdr params) cdata-section-elements indent method ns-prefix-assig omit-xml-declaration? (cond ((memv prm-value '(yes no omit)) prm-value) ((boolean? prm-value) (if prm-value 'yes 'no)) (else standalone)) version)) ((version) (loop (cdr params) cdata-section-elements indent method ns-prefix-assig omit-xml-declaration? standalone (if (or (string? prm-value) (number? prm-value)) prm-value version))) (else (loop (cdr params) cdata-section-elements indent method ns-prefix-assig omit-xml-declaration? standalone version)))))))))) ;------------------------------------------------- ; High-level functions for popular serialization use-cases ; These functions use only a subset of serializer functionality, however, this ; subset seems sufficient for most practical purposes. ; procedure srl:sxml->xml :: SXML-OBJ [PORT-OR-FILENAME] -> STRING|unspecified ; ; Serializes the `sxml-obj' into XML, with indentation to facilitate ; readability by a human. ; ; sxml-obj - an SXML object (a node or a nodeset) to be serialized ; port-or-filename - an output port or an output file name, an optional ; argument ; If `port-or-filename' is not supplied, the functions return a string that ; contains the serialized representation of the `sxml-obj'. ; If `port-or-filename' is supplied and is a port, the functions write the ; serialized representation of `sxml-obj' to this port and return an ; unspecified result. ; If `port-or-filename' is supplied and is a string, this string is treated as ; an output filename, the serialized representation of `sxml-obj' is written to ; that filename and an unspecified result is returned. If a file with the given ; name already exists, the effect is unspecified. (define (srl:sxml->xml sxml-obj . port-or-filename) (if (null? port-or-filename) (srl:sxml->string sxml-obj '() #t 'xml srl:conventional-ns-prefixes #t 'omit "1.0") (srl:display-sxml sxml-obj (car port-or-filename) '() #t 'xml srl:conventional-ns-prefixes #t 'omit "1.0"))) ; procedure srl:sxml->xml-noindent :: SXML-OBJ [PORT-OR-FILENAME] -> ; -> STRING|unspecified ; ; Serializes the `sxml-obj' into XML, without indentation. (define (srl:sxml->xml-noindent sxml-obj . port-or-filename) (if (null? port-or-filename) (srl:sxml->string sxml-obj '() #f 'xml srl:conventional-ns-prefixes #t 'omit "1.0") (srl:display-sxml sxml-obj (car port-or-filename) '() #f 'xml srl:conventional-ns-prefixes #t 'omit "1.0"))) ; procedure srl:sxml->html :: SXML-OBJ [PORT-OR-FILENAME] -> STRING|unspecified ; ; Serializes the `sxml-obj' into HTML, with indentation to facilitate ; readability by a human. ; ; sxml-obj - an SXML object (a node or a nodeset) to be serialized ; port-or-filename - an output port or an output file name, an optional ; argument ; If `port-or-filename' is not supplied, the functions return a string that ; contains the serialized representation of the `sxml-obj'. ; If `port-or-filename' is supplied and is a port, the functions write the ; serialized representation of `sxml-obj' to this port and return an ; unspecified result. ; If `port-or-filename' is supplied and is a string, this string is treated as ; an output filename, the serialized representation of `sxml-obj' is written to ; that filename and an unspecified result is returned. If a file with the given ; name already exists, the effect is unspecified. (define (srl:sxml->html sxml-obj . port-or-filename) (if (null? port-or-filename) (srl:sxml->string sxml-obj '() #t 'html '() #t 'omit "4.0") (srl:display-sxml sxml-obj (car port-or-filename) '() #t 'html '() #t 'omit "4.0"))) ; procedure srl:sxml->html-noindent :: SXML-OBJ [PORT-OR-FILENAME] -> ; -> STRING|unspecified ; ; Serializes the `sxml-obj' into HTML, without indentation. (define (srl:sxml->html-noindent sxml-obj . port-or-filename) (if (null? port-or-filename) (srl:sxml->string sxml-obj '() #f 'html '() #t 'omit "4.0") (srl:display-sxml sxml-obj (car port-or-filename) '() #f 'html '() #t 'omit "4.0")))