;; sxml-serializer -- Serialize SXML to XML and HTML4 ;; Uses public domain code from sxml-tools by Dmitry Lizorkin ;; Chicken port Copyright (C) 2010 Jim Ursetto. All Rights Reserved. ;; License: BSD. ;; Changes over stock: ;; Add allow-prefix-redeclarations? option. Allows user to provide multiple namespace ;; URIs that map to the same prefix in ns-prefixes. Has no effect in attributes. ;; Currently, does not permit redeclarations in *NAMESPACES* except via original-prefix ;; (which is the same as using ns-prefixes). ;; Add fake namespace prefix *default*. Namespace URIs associated with this prefix ;; are assigned the default namespace xmlns="..." and elements with ;; no namespace reset the default to xmlns="". (module sxml-serializer (serialize-sxml conventional-ns-prefixes allow-prefix-redeclarations? ;; These currently offer little benefit over plain serialize-sxml. ;; sxml->xml ;; sxml->xml/noindent ;; sxml->html ;; sxml->html/noindent ) (import scheme (chicken base)) (import (only srfi-1 filter) (only srfi-13 string-concatenate)) (include "serializer.scm") (define sxml->xml srl:sxml->xml) (define sxml->xml/noindent srl:sxml->xml-noindent) (define sxml->html srl:sxml->html) (define sxml->html/noindent srl:sxml->html-noindent) (define display-sxml srl:display-sxml) (define sxml->string srl:sxml->string) (define srl:apply-string-append string-concatenate) ;; override srl:conventional-ns-prefixes so that sxml->xml etc. use the extended list (define srl:conventional-ns-prefixes '((admin . "http://webns.net/mvcb/") (atom . "http://www.w3.org/2005/Atom") (cc . "http://web.resource.org/cc/") (content . "http://purl.org/rss/1.0/modules/content/") (dc . "http://purl.org/dc/elements/1.1/") (feedburner . "http://rssnamespace.org/feedburner/ext/1.0") (fo . "http://www.w3.org/1999/XSL/Format") (geo . "http://www.w3.org/2003/01/geo/wgs84_pos#") (georss . "http://www.georss.org/georss") (itunes . "http://www.itunes.com/dtds/podcast-1.0.dtd") (media . "http://search.yahoo.com/mrss/") (rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#") (rng . "http://relaxng.org/ns/structure/1.0") (rss . "http://purl.org/rss/1.0/") (slash . "http://purl.org/rss/1.0/modules/slash/") (sy . "http://purl.org/rss/1.0/modules/syndication/") (taxo . "http://purl.org/rss/1.0/modules/taxonomy/") (thr . "http://purl.org/syndication/thread/1.0") (wiki . "http://purl.org/rss/1.0/modules/wiki/") (wfw . "http://wellformedweb.org/CommentAPI/") (xhtml . "http://www.w3.org/1999/xhtml") (xlink . "http://www.w3.org/1999/xlink") (xqx . "http://www.w3.org/2005/XQueryX") (xsd . "http://www.w3.org/2001/XMLSchema") (xsi . "http://www.w3.org/2001/XMLSchema-instance") (xsl . "http://www.w3.org/1999/XSL/Transform"))) (define conventional-ns-prefixes srl:conventional-ns-prefixes) (define allow-prefix-redeclarations? (make-parameter #t)) ; use #f for stock compatibility ;; serialize-sxml: replacement for srl:parameterizable using keyword args ;; instead of (k . v) pairs. ;; Currently disallows xml-declaration emission because the interface is silly and ;; it doesn't provide an "encoding" option, and because if there is a (*PI* xml ...) ;; in the document it will either emit two, or omit only one. (define (serialize-sxml sxml-obj #!key (output #f) (cdata-section-elements '()) (indent " ") (method 'xml) (ns-prefixes conventional-ns-prefixes) (allow-prefix-redeclarations (allow-prefix-redeclarations?))) (let ((omit-xml-declaration #t) ;; Force omission of xml-declaration (standalone 'omit) (version "1.0") ;; Don't prefix "" URIs. Could go in conventional-ns-prefixes instead, ;; but overriding ns-prefixes would kill that behavior. (ns-prefixes (append ns-prefixes '((*default* . "")))) ) (parameterize ((allow-prefix-redeclarations? allow-prefix-redeclarations)) ; redundant? (if output (srl:display-sxml sxml-obj output cdata-section-elements indent method ns-prefixes omit-xml-declaration standalone version) (srl:sxml->string sxml-obj cdata-section-elements indent method ns-prefixes omit-xml-declaration standalone version) )))) ;;; changes (define (srl:qname->string prefix-string local-part) (if (and prefix-string (not (string=? prefix-string "*default*"))) (string-append prefix-string ":" local-part) local-part)) (define (srl:namespace-decl->str-lst prefix-string namespace-uri) (if (string=? prefix-string "*default*") (list " xmlns" "=\"" (srl:string->att-value namespace-uri) "\"") (list " xmlns:" prefix-string "=\"" (srl:string->att-value namespace-uri) "\""))) ;; Changes: When declaring a namespace prefix, remove any existing matching prefixes ;; from the declaration list, so new URIs shadow old ones with the same prefix. ;; Changes are marked with [+]. (define (srl:construct-start-end-tags elem method ns-prefix-assig namespace-assoc declared-ns-prefixes) (let ((ns-assoc-here (srl:namespace-assoc-for-elem elem)) (empty? (srl:empty-elem? elem))) (let ((ns-prefix-assig (append (srl:extract-original-prefix-binding ns-assoc-here) ns-prefix-assig)) (namespace-assoc (append ns-assoc-here namespace-assoc))) (call-with-values (lambda () (srl:name->qname-components ; element name (car elem) ns-prefix-assig namespace-assoc declared-ns-prefixes #f)) (lambda (elem-prefix elem-uri elem-local elem-decl-required?) (let loop ((attrs (reverse ((srl:select-kids (lambda (node) ; not SXML 3.0 aux-list (and (pair? node) (not (eq? (car node) '@))))) ((srl:select-kids (lambda (node) (and (pair? node) (eq? (car node) '@)))) elem)))) (start-tag (if (or (not empty?) (and (eq? method 'html) (not elem-prefix) (srl:member-ci elem-local ; ATTENTION: should probably move this list ; to a global const '("area" "base" "basefont" "br" "col" "frame" "hr" "img" "input" "isindex" "link" "meta" "param")))) '(">") '(" />"))) (ns-prefix-assig ns-prefix-assig) (namespace-assoc namespace-assoc) (declared-ns-prefixes ; As if element namespace already declared (if elem-decl-required? (alist-update elem-prefix elem-uri ;; [+] declared-ns-prefixes string=?) declared-ns-prefixes))) (if (null? attrs) ; attributes scanned (let ((elem-name (srl:qname->string elem-prefix elem-local))) (values (cons "<" (cons elem-name (if elem-decl-required? (cons (srl:namespace-decl->str-lst elem-prefix elem-uri) start-tag) start-tag))) (if empty? #f (list "")) ns-prefix-assig namespace-assoc declared-ns-prefixes)) (call-with-values (lambda () (srl:name->qname-components (caar attrs) ; attribute name ns-prefix-assig namespace-assoc declared-ns-prefixes #t)) (lambda (attr-prefix attr-uri attr-local attr-decl-required?) (let ((start-tag (cons (srl:attribute->str-lst attr-prefix attr-local ; TODO: optimize for HTML output method (if (null? (cdar attrs)) ; no attribute value attr-local (cadar attrs)) method) start-tag))) (loop (cdr attrs) (if attr-decl-required? (cons (srl:namespace-decl->str-lst attr-prefix attr-uri) start-tag) start-tag) ns-prefix-assig namespace-assoc (if attr-decl-required? ;; alist-update not required if attributes cannot redeclare any prefixes, ;; but to be on the safe side... (alist-update attr-prefix attr-uri declared-ns-prefixes string=?) declared-ns-prefixes)))))))))))) ;; Changes: check (allow-prefix-redeclarations) parameter before denying XML prefix ;; redeclarations. Requires declared-ns-prefixes to contain unique keys (prefixes). ;; - Also have empty namespace signal a declaration of "" is required if a non-empty ;; *default* namespace is defined. Empty namespace declaration is considered ;; to be ("*default*" . "") so it overwrites any previous default declaration. ;; - Also disallow prefix redeclarations in attributes, avoiding multiple declarations ;; of the same namespace prefix in one tag. An unintended consequence is that you ;; can't then redeclare a prefix declared in ANY parent; to fix this we'd have ;; to track namespaces assigned per-attribute. (define (srl:name->qname-components name ns-prefix-assig namespace-assoc declared-ns-prefixes attribute?) (let ((use-ns-id-or-generate-prefix (lambda (ns-id) (if (and ns-id ; try to use namespace-id as a prefix (not (assq (string->symbol ns-id) ns-prefix-assig)) (not (assoc ns-id declared-ns-prefixes)) ;; FIXME: maybe remove ) ns-id ; Otherwise - generate unique prefix ; Returns a prefix-string not presented in ns-prefix-assig and ; declared-ns-prefixes (let loop ((i 1)) (let ((candidate (string-append "prfx" (number->string i)))) (if (or (assoc candidate declared-ns-prefixes) (assq (string->symbol candidate) ns-prefix-assig)) (loop (+ i 1)) candidate)))))) (n-parts (srl:split-name name))) (cond ((and attribute? (not (car n-parts))) ; no namespace-id => no namespace (values #f #f (cdr n-parts) ; name as a string #f)) ((and (car n-parts) (string-ci=? (car n-parts) "xml")) ; reserved XML namespace (values (car n-parts) "http://www.w3.org/XML/1998/namespace" (cdr n-parts) #f)) (else (call-with-values (lambda () (let ((nid (or (car n-parts) "*default*"))) (cond ((assq (string->symbol nid) ; suppose a namespace-id namespace-assoc) => (lambda (lst) (values (cadr lst) nid))) (else ; first part of a name is a namespace URI (values (or (car n-parts) "") #f))))) (lambda (namespace-uri ns-id) (cond ((srl:assoc-cdr-string= namespace-uri declared-ns-prefixes) => (lambda (pair) ; Prefix for that namespace URI already declared (values (car pair) namespace-uri (cdr n-parts) #f))) (else ; namespace undeclared (values (cond ((srl:assoc-cdr-string= namespace-uri ns-prefix-assig) => (lambda (pair) ; A candidate namespace prefix is supplied from the user (let ((candidate (symbol->string (car pair)))) (if (and (or attribute? (not (allow-prefix-redeclarations?))) (assoc candidate declared-ns-prefixes)) ;; The prefix already bound to a different namespace ;; Avoid XML prefix re-declaration (use-ns-id-or-generate-prefix ns-id) candidate)))) (else (if (and ns-id (allow-prefix-redeclarations?)) ns-id (use-ns-id-or-generate-prefix ns-id)))) namespace-uri (cdr n-parts) #t ; in any case, prefix declaration is required ))))))))) ;; Changes: (car node) is only treated as a tag when it is a symbol. ;; For indentation purposes we also treat a list starting with a ;; text node as a text node (to avoid ("foo") having extraneous ;; whitespace added around it). (define (srl:display-node-out-recursive node port method ns-prefix-assig namespace-assoc declared-ns-prefixes indentation space-preserve? cdata-section-elements text-node-handler) (cond ((not (pair? node)) ; text node (display (text-node-handler (srl:atomic->string node)) port)) ((not (symbol? (car node))) ;; ** change (for-each (lambda (kid) (srl:display-node-out-recursive kid port method ns-prefix-assig namespace-assoc declared-ns-prefixes indentation space-preserve? cdata-section-elements text-node-handler)) node)) (else (case (car node) ; node name ((*COMMENT*) (for-each (lambda (x) (display x port)) (srl:comment->str-lst node))) ((*PI*) (for-each (lambda (x) (display x port)) (srl:processing-instruction->str-lst node method))) ((&) (display (srl:shtml-entity->char-data node) port)) ((*DECL*) ; recovering for non-SXML nodes #f) (else ; otherwise - an element node (call-with-values (lambda () (srl:construct-start-end-tags node method ns-prefix-assig namespace-assoc declared-ns-prefixes)) (lambda (start-tag end-tag ns-prefix-assig namespace-assoc declared-ns-prefixes) (begin (srl:display-fragments-2nesting start-tag port) (if end-tag ; there exists content (let ((space-preserve? (srl:update-space-specifier node space-preserve?)) (text-node-handler (cond ((memq (car node) cdata-section-elements) srl:string->cdata-section) ((and (eq? method 'html) (srl:member-ci (symbol->string (car node)) '("script" "style"))) ; No escaping for strings inside these HTML elements (lambda (str) str)) (else srl:string->char-data))) (content ((srl:select-kids (lambda (node) ; TODO: support SXML entities (not (and (pair? node) (memq (car node) '(@ @@ *ENTITY*)))))) node))) (call-with-values (lambda () (cond ((or (not indentation) (and (eq? method 'html) (srl:member-ci (symbol->string (car node)) '("pre" "script" "style" "textarea")))) ; No indent - on this level and subsequent levels (values #f #f)) ((or space-preserve? (srl:mem-pred ; at least a single text node (lambda (node) (or (not (pair? node)) (not (symbol? (car node))))) ;; ** change content)) ; No indent on this level, possible indent on nested levels (values #f indentation)) (else (values (cons srl:newline indentation) (cons (car indentation) indentation))))) (lambda (indent-here indent4recursive) (begin (for-each ; display content (if indent-here (lambda (kid) (begin (for-each (lambda (x) (display x port)) indent-here) (srl:display-node-out-recursive kid port method ns-prefix-assig namespace-assoc declared-ns-prefixes indent4recursive space-preserve? cdata-section-elements text-node-handler))) (lambda (kid) (srl:display-node-out-recursive kid port method ns-prefix-assig namespace-assoc declared-ns-prefixes indent4recursive space-preserve? cdata-section-elements text-node-handler))) content) (if indent-here (begin (display srl:newline port) (for-each (lambda (x) (display x port)) (cdr indentation)))) (for-each (lambda (x) (display x port)) end-tag)))))))))))))) ;; Changes: (car node) is only treated as a tag when it is a symbol. ;; For indentation purposes we also treat a list starting with a ;; text node as a text node (to avoid ("foo") having extraneous ;; whitespace added around it). ;; This is almost a cut and paste of srl:display-node-out-recursive. (define (srl:node->nested-str-lst-recursive node method ns-prefix-assig namespace-assoc declared-ns-prefixes indentation space-preserve? cdata-section-elements text-node-handler) (cond ((not (pair? node)) ; text node (text-node-handler (srl:atomic->string node))) ((not (symbol? (car node))) ;; ** change (map (lambda (kid) (srl:node->nested-str-lst-recursive kid method ns-prefix-assig namespace-assoc declared-ns-prefixes indentation space-preserve? cdata-section-elements text-node-handler)) node)) (else (case (car node) ; node name ((*COMMENT*) (srl:comment->str-lst node)) ((*PI*) (srl:processing-instruction->str-lst node method)) ((&) (srl:shtml-entity->char-data node)) ((*DECL*) ; recovering for non-SXML nodes '()) (else ; otherwise - an element node (call-with-values (lambda () (srl:construct-start-end-tags node method ns-prefix-assig namespace-assoc declared-ns-prefixes)) (lambda (start-tag end-tag ns-prefix-assig namespace-assoc declared-ns-prefixes) (if (not end-tag) ; empty element => recursion stops start-tag (let ((space-preserve? (srl:update-space-specifier node space-preserve?)) (text-node-handler (cond ((memq (car node) cdata-section-elements) srl:string->cdata-section) ((and (eq? method 'html) (srl:member-ci (symbol->string (car node)) '("script" "style"))) ; No escaping for strings inside these HTML elements (lambda (str) str)) (else srl:string->char-data))) (content ((srl:select-kids (lambda (node) ; TODO: support SXML entities (not (and (pair? node) (memq (car node) '(@ @@ *ENTITY*)))))) node))) (call-with-values (lambda () (cond ((or (not indentation) (and (eq? method 'html) (srl:member-ci (symbol->string (car node)) '("pre" "script" "style" "textarea")))) ; No indent - on this level and subsequent levels (values #f #f)) ((or space-preserve? (srl:mem-pred ; at least a single text node (lambda (node) (or (not (pair? node)) (not (symbol? (car node))))) ;; ** change content)) ; No indent on this level, possible indent on nested levels (values #f indentation)) (else (values (cons srl:newline indentation) (cons (car indentation) indentation))))) (lambda (indent-here indent4recursive) (if indent-here (append start-tag (map (lambda (kid) (list indent-here (srl:node->nested-str-lst-recursive kid method ns-prefix-assig namespace-assoc declared-ns-prefixes indent4recursive space-preserve? cdata-section-elements text-node-handler))) content) (cons srl:newline (cons (cdr indentation) end-tag))) (append start-tag (map (lambda (kid) (srl:node->nested-str-lst-recursive kid method ns-prefix-assig namespace-assoc declared-ns-prefixes indent4recursive space-preserve? cdata-section-elements text-node-handler)) content) end-tag))))))))))))) ;; Changes: Declare the empty ("") namespace URI upfront so we do ;; not get a spurious xmlns="" on the first unprefixed elt. ;; WARNING: *default* must be added to ns-prefixes, but this is done ;; only in serialize-sxml and theoretically there are other ;; entry points to this procedure (though not in practice, ;; as the module hides them). ;; Therefore, srl:top->nested-str-lst and srl:display-top-out ;; should probably take a declared-ns-prefixes argument so that ;; *default* is only automatically declared when it's ;; already in ns-prefixes. (define (srl:top->nested-str-lst doc cdata-section-elements indent method ns-prefix-assig omit-xml-declaration? standalone version) (let* ((namespace-assoc (srl:ns-assoc-for-top doc)) (declared-ns-prefixes '(("*default*" . ""))) ;; [+] (ns-prefix-assig (append (srl:extract-original-prefix-binding namespace-assoc) ns-prefix-assig)) (serialized-content (map (if indent ; => output each member from the newline (let ((indentation (list indent))) ; for nested elements (lambda (kid) (list srl:newline (srl:node->nested-str-lst-recursive kid method ns-prefix-assig namespace-assoc declared-ns-prefixes indentation #f cdata-section-elements srl:string->char-data)))) (lambda (kid) (srl:node->nested-str-lst-recursive kid method ns-prefix-assig namespace-assoc declared-ns-prefixes indent #f cdata-section-elements srl:string->char-data))) ((srl:select-kids ; document node content (lambda (node) ; TODO: support SXML entities (not (and (pair? node) (memq (car node) '(@ @@ *ENTITY*)))))) doc)))) (if (or (eq? method 'html) omit-xml-declaration?) (if (and indent (not (null? serialized-content))) ; Remove the starting newline ; ATTENTION: beware of `Gambit cadar bug': ; http://mailman.iro.umontreal.ca/pipermail/gambit-list/ ; 2005-July/000315.html (cons (cadar serialized-content) (cdr serialized-content)) serialized-content) (list (srl:make-xml-decl version standalone) serialized-content)))) (define (srl:display-top-out doc port cdata-section-elements indent method ns-prefix-assig omit-xml-declaration? standalone version) (let ((no-xml-decl? ; no XML declaration was displayed? (if (not (or (eq? method 'html) omit-xml-declaration?)) (begin (for-each ; display xml declaration (lambda (x) (display x port)) (srl:make-xml-decl version standalone)) #f) #t)) (content ; document node content ((srl:select-kids (lambda (node) ; TODO: support SXML entities (not (and (pair? node) (memq (car node) '(@ @@ *ENTITY*)))))) doc)) (namespace-assoc (srl:ns-assoc-for-top doc)) (declared-ns-prefixes '(("*default*" . "")))) ; [+] (let ((ns-prefix-assig (append (srl:extract-original-prefix-binding namespace-assoc) ns-prefix-assig))) (cond ((null? content) ; generally a rare practical situation #t) ; nothing more to do ((and indent no-xml-decl?) ; We'll not display newline before (car content) (let ((indentation (list indent))) ; for nested elements (for-each (lambda (kid put-newline?) (begin (if put-newline? (display srl:newline port)) (srl:display-node-out-recursive kid port method ns-prefix-assig namespace-assoc declared-ns-prefixes indentation #f cdata-section-elements srl:string->char-data))) content ; After sequence normalization, content does not contain #f (cons #f (cdr content))))) (else (for-each (if indent ; => output each member from the newline (let ((indentation (list indent))) ; for nested elements (lambda (kid) (begin (display srl:newline port) (srl:display-node-out-recursive kid port method ns-prefix-assig namespace-assoc declared-ns-prefixes indentation #f cdata-section-elements srl:string->char-data)))) (lambda (kid) (srl:display-node-out-recursive kid port method ns-prefix-assig namespace-assoc declared-ns-prefixes indent #f cdata-section-elements srl:string->char-data))) content)))))) ;; Changes: accept nulls, chars, and symbols as SXML nodes. For some ;; reason numbers and bools were already accepted, which makes me think ;; this was an oversight. (define (srl:atomic->string obj) (cond ((or (pair? obj) ; non-atomic type (string? obj)) obj) ((null? obj) "") ((char? obj) (##sys#char->utf8-string obj)) ((symbol? obj) (symbol->string obj)) ((number? obj) (number->string obj)) ((boolean? obj) (if obj "true" "false")) (else ; unexpected type ; ATTENTION: should probably raise an error here obj))) )