;; XPath/XPointer -> Abstract Syntax Tree parser ; ; This software is in Public Domain. ; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND. ; ; Please send bug reports and comments to: ; lisovsky@acm.org Kirill Lisovsky ; lizorkin@hotbox.ru Dmitry Lizorkin ;========================================================================== ; W3C textual XPath/XPointer -> AST ; Writing operations as an S-expression in an infix notation (define (txp:ast-operation-helper expr-lst op-lst add-on) (let ((rev-expr-lst (reverse expr-lst))) (let loop ((exprs (cdr rev-expr-lst)) (ops (reverse op-lst)) (res (car rev-expr-lst))) (if (null? ops) res (loop (cdr exprs) (cdr ops) (list (car ops) (car exprs) res)))))) ;------------------------------------------------- ; Parameters for TXPath -> AST implementation (define txp:ast-params `( ; Axes (axis ((ancestor ,(lambda (add-on) 'ancestor)) (ancestor-or-self ,(lambda (add-on) 'ancestor-or-self)) (attribute ,(lambda (add-on) 'attribute)) (child ,(lambda (add-on) 'child)) (descendant ,(lambda (add-on) 'descendant)) (descendant-or-self ,(lambda (add-on) 'descendant-or-self)) (following ,(lambda (add-on) 'following)) (following-sibling ,(lambda (add-on) 'following-sibling)) (namespace ,(lambda (add-on) 'namespace)) (parent ,(lambda (add-on) 'parent)) (preceding ,(lambda (add-on) 'preceding)) (preceding-sibling ,(lambda (add-on) 'preceding-sibling)) (self ,(lambda (add-on) 'self)) ; Addition by XLink (arc ,(lambda (add-on) 'arc)) (traverse ,(lambda (add-on) 'traverse)) (traverse-arc ,(lambda (add-on) 'traverse-arc)))) ; Node test (node-test ((star ,(lambda (add-on) '((*)))) (uri+star ,(lambda (uri add-on) `((namespace-uri ,uri)))) (qname ,(lambda (uri local-name add-on) (if (not uri) `((local-name ,local-name)) `((namespace-uri ,uri) (local-name ,local-name))))) (comment ,(lambda (add-on) '((comment)))) (text ,(lambda (add-on) '((text)))) (processing-instruction ,(lambda (literal-string add-on) (if (not literal-string) ; no literal provided '((pi)) `((pi ,literal-string))))) (node ,(lambda (add-on) '((node)))) (point ,(lambda (add-on) '((point)))) (range ,(lambda (add-on) '((range)))))) ; Location step (step ((common ,(lambda (axis-res node-test-res predicate-res-lst add-on) `(step (axis-specifier (,axis-res)) (node-test ,@node-test-res) ,@predicate-res-lst))) (range-to ,(lambda (expr-res predicate-res-lst add-on) `(range-to (expr ,expr-res) ,@predicate-res-lst))))) ; Relative location path (relative-lpath ,(lambda (step-res-lst add-on) (cons 'relative-location-path step-res-lst))) ; Location path (location-path ((bare-slash ,(lambda (add-on) '(absolute-location-path))) (slash ,(lambda (relative-lpath-res add-on) (cons 'absolute-location-path (cdr relative-lpath-res)))) (double-slash ,(lambda (relative-lpath-res add-on) `(absolute-location-path (step (axis-specifier (descendant-or-self)) (node-test (node))) ,@(cdr relative-lpath-res)))))) ; Predicate (predicate ,(lambda (expr-res add-on) (list 'predicate expr-res))) ; Variable reference (variable-ref ,(lambda (var-name-string add-on) `(variable-reference ,var-name-string))) ; Function call (function-call ,(lambda (fun-name-string arg-res-lst add-on) `(function-call (function-name ,fun-name-string) ,@(map (lambda (arg-res) `(argument ,arg-res)) arg-res-lst)))) ; Primary expression (primary-expr ((literal ,(lambda (literal add-on) `(literal ,literal))) (number ,(lambda (number add-on) `(number ,number))))) ; Filter expression (filter-expr ,(lambda (primary-expr-res predicate-res-lst add-on) `(filter-expr (primary-expr ,primary-expr-res) ,@predicate-res-lst))) ; Path expression (path-expr ((slash ,(lambda (filter-expr-res relative-lpath-res add-on) `(path-expr ,(if (eq? (car filter-expr-res) 'filter-expr) filter-expr-res `(filter-expr (primary-expr ,filter-expr-res))) ,@(cdr relative-lpath-res)))) (double-slash ,(lambda (filter-expr-res relative-lpath-res add-on) `(path-expr ,(if (eq? (car filter-expr-res) 'filter-expr) filter-expr-res `(filter-expr (primary-expr ,filter-expr-res))) (step (axis-specifier (descendant-or-self)) (node-test (node))) ,@(cdr relative-lpath-res)))))) ; Union expression (union-expr ,(lambda (path-expr-res-lst add-on) (cons 'union-expr path-expr-res-lst))) ; Unary expression (unary-expr ,(lambda (union-expr-res num-minuses add-on) (let loop ((n num-minuses) (res union-expr-res)) (if (= n 0) res (loop (- n 1) (list '- res)))))) ; Different operations (operations ((* ,(lambda (add-on) '*)) (div ,(lambda (add-on) 'div)) (mod ,(lambda (add-on) 'mod)) (+ ,(lambda (add-on) '+)) (- ,(lambda (add-on) '-)) (< ,(lambda (add-on) '<)) (> ,(lambda (add-on) '>)) (<= ,(lambda (add-on) '<=)) (>= ,(lambda (add-on) '>=)) (= ,(lambda (add-on) '=)) (!= ,(lambda (add-on) '!=)))) ; Additive and multiplicative expressions (mul-expr ,txp:ast-operation-helper) (add-expr ,txp:ast-operation-helper) ; Relational expression (relational-expr ,txp:ast-operation-helper) ; Equality expression (equality-expr ,txp:ast-operation-helper) ; And-expression (and-expr ,(lambda (equality-expr-res-lst add-on) (cons 'and equality-expr-res-lst))) ; Or-expression (or-expr ,(lambda (and-expr-res-lst add-on) (cons 'or and-expr-res-lst))) ; Full XPointer (full-xptr ,(lambda (expr-res-lst add-on) (cons 'full-xptr expr-res-lst))) ; XPointer child sequence (child-seq ((with-name ,(lambda (name-string number-lst add-on) `(child-seq (name ,name-string) ,@(map (lambda (num) (list 'number num)) number-lst)))) (without-name ,(lambda (number-lst add-on) (cons 'child-seq (map (lambda (num) (list 'number num)) number-lst)))))) )) (define txp:ast-res (txp:parameterize-parser txp:ast-params)) ;------------------------------------------------- ; Highest level API functions ; ; xpath-string - an XPath location path (a string) ; ns-binding - declared namespace prefixes (an optional argument) ; ns-binding = (list (prefix . uri) ; (prefix . uri) ; ...) ; prefix - a symbol ; uri - a string ; ; The returned result: abstract-syntax-tree or #f ; abstract-syntax-tree - an S-expression ; #f - signals of a parse error (an error message is printed as a side effect ; during parsing) (define (txp:ast-api-helper parse-proc) (lambda (xpath-string . ns-binding) (let ((res (parse-proc xpath-string (if (null? ns-binding) ns-binding (car ns-binding)) '()))) (if (txp:error? res) ; error detected #f res)))) (define txp:xpath->ast (txp:ast-api-helper (cadr (assq 'xpath txp:ast-res)))) (define txp:xpointer->ast (txp:ast-api-helper (cadr (assq 'xpointer txp:ast-res)))) (define txp:expr->ast (txp:ast-api-helper (cadr (assq 'expr txp:ast-res)))) ;========================================================================== ; SXPath native syntax -> AST ; Additional features added to AST by native SXPath ; Operator += below denotes additional alternatives to AST grammar rules ; {7} += (node-test (equal? )) ; | (node-test (eq? )) ; | (node-test (names + )) ; | (node-test (not-names + )) ; {4} += (lambda-step ) ; | (define (txp:sxpath->ast path . ns-binding) (let ((ns-binding (if (null? ns-binding) ns-binding (car ns-binding)))) (if (string? path) ; Just a textual XPath (txp:expr->ast path ns-binding) (let loop ((ast-steps '()) (path path)) (cond ((null? path) ; parsing is finished (if (null? ast-steps) ; empty path '(absolute-location-path) (let ((forward-steps (reverse ast-steps))) (cons (if (eq? (caar forward-steps) 'filter-expr) 'path-expr 'relative-location-path) forward-steps)))) ((procedure? (car path)) (loop (cons (list 'lambda-step (car path)) ast-steps) (cdr path))) ((assq (car path) '((// . descendant-or-self) (.. . parent))) => (lambda (pair) (loop (cons `(step (axis-specifier (,(cdr pair))) (node-test (node))) ast-steps) (cdr path)))) ((symbol? (car path)) (loop (cons `(step (axis-specifier (child)) (node-test ,(cond ((assq (car path) '((* . (*)) (*text* . (text)))) => cdr) (else `(local-name ,(symbol->string (car path))))))) ast-steps) (cdr path))) ((string? (car path)) (and-let* ; only for the location path for the moment ((txt-ast (txp:expr->ast (car path) ns-binding))) (loop (if (eq? (car txt-ast) 'relative-location-path) (append (reverse (cdr txt-ast)) ast-steps) (cons `(filter-expr (primary-expr ,txt-ast)) ast-steps)) (cdr path)))) ((and (pair? (car path)) (not (null? (car path)))) (cond ((assq (caar path) '((*or* . names) (*not* . not-names))) => (lambda (pair) (loop (cons `(step (axis-specifier (child)) (node-test ,(cons (cdr pair) (map symbol->string (cdar path))))) ast-steps) (cdr path)))) ((assq (caar path) '((equal? . equal?) (eq? . eq?) (ns-id:* . namespace-uri))) => (lambda (pair) (loop (cons `(step (axis-specifier (child)) (node-test ,(list (cdr pair) (cadar path)))) ast-steps) (cdr path)))) (else (let reducer ((reducing-path (cdar path)) (filters '())) (cond ((null? reducing-path) (if (symbol? (caar path)) ; just a child axis (loop (cons `(step (axis-specifier (child)) (node-test (local-name ,(symbol->string (caar path)))) ,@(reverse filters)) ast-steps) (cdr path)) (and-let* ((select (txp:sxpath->ast (caar path) ns-binding))) (loop (cons `(filter-expr (primary-expr ,select) ,@(reverse filters)) ast-steps) (cdr path))))) ((number? (car reducing-path)) (reducer (cdr reducing-path) (cons `(predicate ,(if (negative? (car reducing-path)) ; from end of nodeset `(- (function-call (function-name "last")) (number ,(- -1 (car reducing-path)))) `(number ,(car reducing-path)))) filters))) (else (and-let* ((pred-ast (txp:sxpath->ast (car reducing-path) ns-binding))) (reducer (cdr reducing-path) (cons `(predicate ,pred-ast) filters))))))))) (else (cerr "Invalid path step: " (car path)) #f)))))) ;========================================================================== ; Several popular accessors and constructors for AST steps ; Whether a representation for location step (define (txp:step? op) (and (pair? op) (eq? (car op) 'step))) ; Returns the axis specifier of the location step ; Argument: the AST representation of a location step ; Result: either '(child) and the like, or #f if the AST contains syntactic ; error (define (txp:step-axis op) (and (txp:step? op) (not (null? (cdr op))) (pair? (cadr op)) (eq? (caadr op) 'axis-specifier) (cadadr op))) ; Returns the node test of the location step ; Argument: the AST representation of a location step ; Result: either '(*) and the like, or #f if the AST contains syntactic ; error (define (txp:step-node-test op) (and (txp:step? op) (not (null? (cdr op))) (not (null? (cddr op))) (pair? (caddr op)) (eq? (caaddr op) 'node-test) (cadr (caddr op)))) ; Returns predicate expressions of the location step ; Argument: the AST representation of a location step ; Result: either (listof ast-expr) ; or #f if syntactic error detected in a location step AST (define (txp:step-preds op) (and (txp:step? op) (not (null? (cdr op))) (not (null? (cddr op))) (null? (filter (lambda (sub) ; not a predicate representation (not (and (pair? sub) (eq? (car sub) 'predicate)))) (cdddr op))) (map cadr (cdddr op)))) ; Constructs the AST representation for a given axis, node-test and ; a list of predicate expressions ; axis ::= '(child) and the like ; node-test ::= '(*) and the like ; pred-expr-list ::= (listof ast-expr) (define (txp:construct-step axis node-test . pred-expr-list) `(step (axis-specifier ,axis) (node-test ,node-test) ,@(map (lambda (pred-expr) `(predicate ,pred-expr)) pred-expr-list)))