; XML/HTML processing in Scheme
; SXML expression tree transformers
;
; IMPORT
; A prelude appropriate for your Scheme system
; (myenv-bigloo.scm, myenv-mit.scm, etc.)
;
; EXPORT
; (provide SRV:send-reply
; post-order pre-post-order replace-range)
;
; See vSXML-tree-trans.scm for the validation code, which also
; serves as usage examples.
;
; $Id: SXML-tree-trans.scm,v 1.8 2009/03/16 03:08:59 oleg Exp $
; procedure: SRV:send-reply FRAGMENT ...
;
; Output the 'fragments'
; The fragments are a list of strings, characters,
; numbers, thunks, #f, #t -- and other fragments.
; The function traverses the tree depth-first, writes out
; strings and characters, executes thunks, and ignores
; #f and '().
; The function returns #t if anything was written at all;
; otherwise the result is #f
; If #t occurs among the fragments, it is not written out
; but causes the result of SRV:send-reply to be #t
(define (SRV:send-reply . fragments)
(let loop ((fragments fragments) (result #f))
(cond
((null? fragments) result)
((not (car fragments)) (loop (cdr fragments) result))
((null? (car fragments)) (loop (cdr fragments) result))
((eq? #t (car fragments)) (loop (cdr fragments) #t))
((pair? (car fragments))
(loop (cdr fragments) (loop (car fragments) result)))
((procedure? (car fragments))
((car fragments))
(loop (cdr fragments) #t))
(else
(display (car fragments))
(loop (cdr fragments) #t)))))
; procedure: pre-post-order TREE BINDINGS
;
; Traversal of an SXML tree or a grove:
; a or a
;
; A and a are mutually-recursive datatypes that
; underlie the SXML tree:
; ::= (name . ) | "text string"
; An (ordered) set of nodes is just a list of the constituent nodes:
; ::= ( ...)
; Nodelists, and Nodes other than text strings are both lists. A
; however is either an empty list, or a list whose head is
; not a symbol (an atom in general). A symbol at the head of a node is
; either an XML name (in which case it's a tag of an XML element), or
; an administrative name such as '@'.
; See SXPath.scm and SSAX.scm for more information on SXML.
; Pre-Post-order traversal of a tree and creation of a new tree:
; pre-post-order:: x ->
; where
; ::= ( ...)
; ::= ( *preorder* . ) |
; ( *macro* . ) |
; ( . ) |
; ( . )
; ::= XMLname | *text* | *default*
; :: x [] ->
;
; The pre-post-order function visits the nodes and nodelists
; pre-post-order (depth-first). For each of the form (name
; ...) it looks up an association with the given 'name' among
; its . If failed, pre-post-order tries to locate a
; *default* binding. It's an error if the latter attempt fails as
; well. Having found a binding, the pre-post-order function first
; checks to see if the binding is of the form
; ( *preorder* . )
; If it is, the handler is 'applied' to the current node. Otherwise,
; the pre-post-order function first calls itself recursively for each
; child of the current node, with prepended to the
; in effect. The result of these calls is passed to the
; (along with the head of the current ). To be more
; precise, the handler is _applied_ to the head of the current node
; and its processed children. The result of the handler, which should
; also be a , replaces the current . If the current
; is a text string or other atom, a special binding with a symbol
; *text* is looked up.
;
; A binding can also be of a form
; ( *macro* . )
; This is equivalent to *preorder* described above. However, the result
; is re-processed again, with the current stylesheet.
(define (pre-post-order tree bindings)
(let* ((default-binding (assq '*default* bindings))
(text-binding (or (assq '*text* bindings) default-binding))
(text-handler ; Cache default and text bindings
(and text-binding
(if (procedure? (cdr text-binding))
(cdr text-binding) (cddr text-binding)))))
(let loop ((tree tree))
(cond
((null? tree) '())
((not (pair? tree))
(let ((trigger '*text*))
(if text-handler (text-handler trigger tree)
(error "Unknown binding for " trigger " and no default"))))
((not (symbol? (car tree))) (map loop tree)) ; tree is a nodelist
(else ; tree is an SXML node
(let* ((trigger (car tree))
(binding (or (assq trigger bindings) default-binding)))
(cond
((not binding)
(error "Unknown binding for " trigger " and no default"))
((not (pair? (cdr binding))) ; must be a procedure: handler
(apply (cdr binding) trigger (map loop (cdr tree))))
((eq? '*preorder* (cadr binding))
(apply (cddr binding) tree))
((eq? '*macro* (cadr binding))
(loop (apply (cddr binding) tree)))
(else ; (cadr binding) is a local binding
(apply (cddr binding) trigger
(pre-post-order (cdr tree) (append (cadr binding) bindings)))
))))))))
; procedure: post-order TREE BINDINGS
; post-order is a strict subset of pre-post-order without *preorder*
; (let alone *macro*) traversals.
; Now pre-post-order is actually faster than the old post-order.
; The function post-order is deprecated and is aliased below for
; backward compatibility.
(define post-order pre-post-order)
; A version of pre-post-order that transforms an SXML document into a
; _strictly conformant_ SXML document. That is, the result of a
; pre-post-order transformation can be queried with SXPath or
; transformed again with SXSLT.
; Joerg-Cyril Hoehle wrote on the SSAX-SXML mailing list about
; chaining of transformations on a SXML source:
; SXML --transform--> SXML1 --transform--> SXML2 ... --> XML
; It's only the last transformation step that would produce XML.
; We can use a pre-post-order traversal combinator with an appropriate
; stylesheet to run each 'transform' step above. SRV:send-reply at the
; end will write out the resulting XML document.
; (see Joerg-Cyril Hoehle's messages on the SSAX-SXML list on Oct
; 21 and 22, 2003).
;
; Composing SXML transformations by feeding the result of one
; pre-post-order traversal into another works. Still, the result of
; pre-post-order is merely a tree of fragments, which is generally not
; a strictly valid SXML. Joerg-Cyril Hoehle pointed out that, for
; example, given an SXML document
; '(Data (repeat 3 (random-Header 3))))
; a sample transformation
; (pre-post-order sxml
; `((repeat *macro*
; . ,(lambda (tag count . elems)
; (apply make-list count elems)))
; (random-Header *preorder*
; . ,(lambda (tag elems)
; `(Header ,(gensym))))
; (*text* . ,(lambda (trigger x) x))
; (*default* . ,(lambda x x))))
;
; yields the following.
; (Data
; ((Header VOTj)
; (Header 0qel)
; (Header bA97)))
;
; All (Header ...) elements are enclosed in an extra pair of
; parentheses. In general, pre-post-order may add extra nesting levels
; and insert empty lists. Both these features break the strict SXML
; specification compliance of the transformation result. Still,
; pre-post-order itself can process such a tree correctly. Therefore,
; if we use only pre-post-order for our multi-stage SXML
; transformations, no problems occur. However, if we wish to employ
; SXPath to select parts from a pre-post-order-transformed SXML
; document, we get a problem. SXPath, unlike pre-post-order, insists
; on its source document being fully SXML compliant.
;
; The problem can be rectified, by changing pre-post-order as shown in
; the code below. The only change is replacing the two occurrences of
; 'map' (there are only two such occurrences) with
; map-node-concat. Justification for the change: a pre-post-order
; handler can yield either a node, or a nodelist. Now, if the handler
; returns a nodelist, we _splice_ it in in the result tree. This
; operation seems to make sure that each node of a tree is a valid
; SXML node.
;
; For a pure SXML-to-XML conversion, the splicing-in seems to be an
; overkill. Therefore, it may make sense to keep both versions of
; pre-post-order. Personally I have no problem with proliferation of
; pre-post-order-like functions. I believe that it is the data
; structure/protocols that should be standardized and
; parsimonious. Each user may write processing code in his own way. Of
; course some of the processing code turns out more general than the
; other, and can be shared. Nevertheless, it's the common data
; structure, the common format that guarantees interoperability --
; rather than the common library. Code should be tailored (or even
; automatically generated) to suit circumstances.
;
; map-node-concat FN NODELIST -> NODELIST
; Map FN to each element of NODELIST where FN is a function
; NODE -> NODE or NODELIST
; If an application of FN yields a NODELIST (including the empty list),
; we _splice_ it in into the result. Essentially,
; (map-node-concat fn nodelist)
; is equivalent to
; (apply append
; (map (lambda (node)
; (let ((result (fn node)))
; (if (nodelist? result) result (list result))))
; nodelist))
(define (map-node-concat fn lst)
(if (null? lst) '()
(let ((result (fn (car lst))))
(cond
((null? result) ; It's a null node-list, splice it in
(map-node-concat fn (cdr lst)))
((and (pair? result) (not (symbol? (car result))))
; it's a non-null node-list
(append result (map-node-concat fn (cdr lst))))
(else
(cons result (map-node-concat fn (cdr lst))))))))
; The following is almost identical to pre-post-order
; except that the two occurrences of 'map' in that pre-post-order
; (there are only two such occurrences) are replaced with map-node-concat
; in the code below.
(define (pre-post-order-splice tree bindings)
(let* ((default-binding (assq '*default* bindings))
(text-binding (or (assq '*text* bindings) default-binding))
(text-handler ; Cache default and text bindings
(and text-binding
(if (procedure? (cdr text-binding))
(cdr text-binding) (cddr text-binding)))))
(let loop ((tree tree))
(cond
((null? tree) '())
((not (pair? tree))
(let ((trigger '*text*))
(if text-handler (text-handler trigger tree)
(error "Unknown binding for " trigger " and no default"))))
; tree is a nodelist
((not (symbol? (car tree))) (map-node-concat loop tree))
(else ; tree is an SXML node
(let* ((trigger (car tree))
(binding (or (assq trigger bindings) default-binding)))
(cond
((not binding)
(error "Unknown binding for " trigger " and no default"))
((not (pair? (cdr binding))) ; must be a procedure: handler
(apply (cdr binding) trigger
(map-node-concat loop (cdr tree))))
((eq? '*preorder* (cadr binding))
(apply (cddr binding) tree))
((eq? '*macro* (cadr binding))
(loop (apply (cddr binding) tree)))
(else ; (cadr binding) is a local binding
(apply (cddr binding) trigger
(pre-post-order (cdr tree) (append (cadr binding) bindings)))
))))))))
;------------------------------------------------------------------------
; Extended tree fold
; tree = atom | (node-name tree ...)
;
; foldts fdown fup fhere seed (Leaf str) = fhere seed str
; foldts fdown fup fhere seed (Nd kids) =
; fup seed $ foldl (foldts fdown fup fhere) (fdown seed) kids
; procedure fhere: seed -> atom -> seed
; procedure fdown: seed -> node -> seed
; procedure fup: parent-seed -> last-kid-seed -> node -> seed
; foldts returns the final seed
(define (foldts fdown fup fhere seed tree)
(cond
((null? tree) seed)
((not (pair? tree)) ; An atom
(fhere seed tree))
(else
(let loop ((kid-seed (fdown seed tree)) (kids (cdr tree)))
(if (null? kids)
(fup seed kid-seed tree)
(loop (foldts fdown fup fhere kid-seed (car kids))
(cdr kids)))))))
; procedure: replace-range:: BEG-PRED x END-PRED x FOREST -> FOREST
; Traverse a forest depth-first and cut/replace ranges of nodes.
;
; The nodes that define a range don't have to have the same immediate
; parent, don't have to be on the same level, and the end node of a
; range doesn't even have to exist. A replace-range procedure removes
; nodes from the beginning node of the range up to (but not including)
; the end node of the range. In addition, the beginning node of the
; range can be replaced by a node or a list of nodes. The range of
; nodes is cut while depth-first traversing the forest. If all
; branches of the node are cut a node is cut as well. The procedure
; can cut several non-overlapping ranges from a forest.
; replace-range:: BEG-PRED x END-PRED x FOREST -> FOREST
; where
; type FOREST = (NODE ...)
; type NODE = Atom | (Name . FOREST) | FOREST
;
; The range of nodes is specified by two predicates, beg-pred and end-pred.
; beg-pred:: NODE -> #f | FOREST
; end-pred:: NODE -> #f | FOREST
; The beg-pred predicate decides on the beginning of the range. The node
; for which the predicate yields non-#f marks the beginning of the range
; The non-#f value of the predicate replaces the node. The value can be a
; list of nodes. The replace-range procedure then traverses the tree and skips
; all the nodes, until the end-pred yields non-#f. The value of the end-pred
; replaces the end-range node. The new end node and its brothers will be
; re-scanned.
; The predicates are evaluated pre-order. We do not descend into a node that
; is marked as the beginning of the range.
(define (replace-range beg-pred end-pred forest)
; loop forest keep? new-forest
; forest is the forest to traverse
; new-forest accumulates the nodes we will keep, in the reverse
; order
; If keep? is #t, keep the curr node if atomic. If the node is not atomic,
; traverse its children and keep those that are not in the skip range.
; If keep? is #f, skip the current node if atomic. Otherwise,
; traverse its children. If all children are skipped, skip the node
; as well.
(define (loop forest keep? new-forest)
(if (null? forest) (values (reverse new-forest) keep?)
(let ((node (car forest)))
(if keep?
(cond ; accumulate mode
((beg-pred node) => ; see if the node starts the skip range
(lambda (repl-branches) ; if so, skip/replace the node
(loop (cdr forest) #f
(append (reverse repl-branches) new-forest))))
((not (pair? node)) ; it's an atom, keep it
(loop (cdr forest) keep? (cons node new-forest)))
(else
(let*-values
(((node?) (symbol? (car node))) ; or is it a nodelist?
((new-kids keep?) ; traverse its children
(loop (if node? (cdr node) node) #t '())))
(loop (cdr forest) keep?
(cons
(if node? (cons (car node) new-kids) new-kids)
new-forest)))))
; skip mode
(cond
((end-pred node) => ; end the skip range
(lambda (repl-branches) ; repl-branches will be re-scanned
(loop (append repl-branches (cdr forest)) #t
new-forest)))
((not (pair? node)) ; it's an atom, skip it
(loop (cdr forest) keep? new-forest))
(else
(let*-values
(((node?) (symbol? (car node))) ; or is it a nodelist?
((new-kids keep?) ; traverse its children
(loop (if node? (cdr node) node) #f '())))
(loop (cdr forest) keep?
(if (or keep? (pair? new-kids))
(cons
(if node? (cons (car node) new-kids) new-kids)
new-forest)
new-forest) ; if all kids are skipped
)))))))) ; skip the node too
(let*-values (((new-forest keep?) (loop forest #t '())))
new-forest))