; An example of an interesting (S)XML transformation ; ; This example emphasizes how higher-order transformers in SXSLT ; elegantly solve a class of problems that often occurs in practice: ; transforming an XML document so that different occurrences of an ; element yield different results depending on that element's ; ancestors, siblings, or descendants. We also highlight the ; treatment of XML Namespaces. Our example is an extended version of ; generating rudimentary English phrases from a database. The original ; problem was used in ; Shriram Krishnamurthi, Gray, K.E. and Graunke, P.T.: ; Transformation-by-Example for XML. ; Practical Aspects of Declarative Languages, 2000. ; ; to demonstrate XT3D: Given a database of purchase records, the goal ; is to create a purchase summary such that multiple purchases are ; separated by commas, except for the last two, which are separated by ; the word 'and'. The transformation involves the restructuring of the ; original markup and the insertion of item delimiters: commas and ; 'and'. We also add to the top-level 'text' element an attribute ; 'count' with the count of the descendant text elements. See 'doc1' ; and 'doc2' as two sample documents. Run this code to see the answer ; and the intermediate results. ; ; The desired transformation is done by a pre-post-order iterator ; taking instructions from a stylesheet. The stylesheets instructs ; pre-post-order to visit the nodes of the tree in post-order, the ; order of call-by-value applications. This depth-first traversal mode ; fits the problem. ; ; IMPORT ; The following is a Bigloo-specific module declaration. ; Other Scheme systems have something similar. ; ; See the Makefile for the rules to run this example on Bigloo, SCM ; and other systems ; (module sxml-db-conversion ; (include "myenv-bigloo.scm") ; (include "srfi-13-local.scm") ; or import from SRFI-13 if available ; (include "char-encoding.scm") ; (include "util.scm") ; (include "look-for-str.scm") ; (include "input-parse.scm") ; (include "SSAX-code.scm") ; (include "SXML-tree-trans.scm") ; ) ; ; $Id: sxml-db-conv.scm,v 1.6 2004/07/07 16:02:30 sperber Exp $ ; Sample documents: (define doc1 " 4 tinkers " ) ; The expected output is ; 4 tinkers (define doc2 " 4 tinkers 5 tailors 2 soldiers 1 spy ") ; The tranlator, from a source XML document to the transformed document ; in the SXML form. (define (convert-db doc-xml) (let ((doc ; The first step of the transformation is parsing the source ; document into an abstract syntax tree (SXML) form. Using a SSAX XML ; parser [SSAX], this step can be accomplished as follows. ; NB: we instruct the parser to represent http://www.w3c.org/HTML/ ; by a ns-user-shortcut 'h'. See the SXML Specification ; for a more detailed discussion of the namespaces in SXML. (call-with-input-string doc-xml (lambda (port) (ssax:xml->sxml port '((h . "http://www.w3c.org/HTML/"))))))) (cout nl ">>>The following is the sample document to transform: " nl doc-xml nl nl ">> and its SXML form:" nl) (pp doc) (pre-post-order doc ; the conversion stylesheet `((h:p . ,(lambda (tag str) (lambda (count . args) ; count can be #f: don't generate the attr (let ((att-list (if count `((@ (acc:count ,count))) '()))) (if (null? args) `(out:text ,@att-list ,str) (let ((arg (car args)) (rest (cdr args))) (if (null? rest) `(out:text ,@att-list ,(string-append str " and") ,(arg #f)) `(out:text ,@att-list ,(string-append str ",") ,(apply arg (cons #f rest)))))))))) (http://internal.com/db:purchase . ,(lambda (tag . procs) (if (null? procs) '() (apply (car procs) (cons (length (cdr procs)) (cdr procs)))))) (*text* . ,(lambda (trigger str) str)) (*TOP* . ,(lambda x x)) (@ . ,(lambda x x)) (*NAMESPACES* *preorder* ; Replace namespace declarations of the source document ; with the namespace declarations of the target document. ; The target document uses two namespace-ids: ; 'out:' and 'acc:' . ,(lambda (trigger ns) `(,trigger (out "http://internal.com/out") (acc "http://internal.com/accounting")))) ))) ) ; pretty-print the resulting SXML into XML, emitting the appropriate ; xmlns:xxx attributes (define (SXML->XML sxml) (define this-ss `((@ ((*default* ; local override for attributes . ,(lambda (attr-key . value) (enattr attr-key value)))) . ,(lambda (trigger . value) (cons '@ value))) (*default* . ,(lambda (tag . elems) (entag tag elems))) (*text* . ,(lambda (trigger str) (if (string? str) (string->goodXML str) str))) (*TOP* ; check for the namespaces and add xmlns:xxx attributes *macro* ; to the root element . ,(lambda (tag . elems) (define (make-xmlns ns-assoc) (list (string->symbol (string-append "xmlns:" (symbol->string (car ns-assoc)))) (cadr ns-assoc))) (let* ((namespaces ; extract from the annotations of *TOP* (apply append (pre-post-order elems `((@ ((*NAMESPACES* *preorder* . ,(lambda (tag . ns) ns)) (*default* *preorder* . ,(lambda x '()))) . ,(lambda (tag . elems) (apply append elems))) (*default* *preorder* . ,(lambda x '())))))) ;(_ (cerr "namespaces: " namespaces nl)) (xmlns-attrs (map make-xmlns namespaces))) (pre-post-order elems `((@ *preorder* . ,(lambda x '())) ; ignore *TOP* annotations (*default* ; would handle the root elem ((@ *preorder* ; attributes of the root element . ,(lambda (tag . attrs) ; add xmlnsn attrs (cons tag (append xmlns-attrs attrs)))) (*default* *preorder* . ,(lambda x x)) ; don't descend and don't transform ) . ,(lambda (root-tag . children) (if (and (pair? children) (pair? (car children)) (eq? '@ (caar children))) (cons root-tag children) ; root element had no attributes. Add xmlns ones (cons* root-tag `(@ ,xmlns-attrs) children)))))) ))) )) (SRV:send-reply (pre-post-order sxml this-ss) )) (define (entag tag elems) (cond ((and (pair? elems) (pair? (car elems)) (eq? '@ (caar elems))) (let ((attrs (cdar elems)) (children (cdr elems))) (list #\< tag attrs (if (null? children) "/>" (list #\> children "))))) ((null? elems) (list #\< tag "/>")) (else (list #\< tag #\> elems ")))) (define (enattr attr-key value) (if (null? value) (list #\space attr-key "='" attr-key "'") (list #\space attr-key "='" value #\'))) ; Given a string, check to make sure it does not contain characters ; such as '<' or '&' that require encoding. Return either the original ; string, or a list of string fragments with special characters ; replaced by appropriate character entities. (define string->goodXML (make-char-quotator '((#\< . "<") (#\> . ">") (#\& . "&") (#\" . """) (#\' . "'")))) (define (do-convert doc) (let ((transf-sxml (convert-db doc))) (cout nl ">>>The transformed SXML tree is as follows" nl) (pp transf-sxml) (cout nl ">>>Here is the result of the pretty printing of the transformed " "SXML tree into XML" nl nl) (SXML->XML transf-sxml) (cout nl nl "====================================" nl) )) (do-convert doc1) (newline) (do-convert doc2) (cout nl nl "Done" nl)