(module gumbo
;; exports
(html->sxml)
(import chicken scheme foreign)
(use srfi-1 lolevel)
(foreign-declare "#include ")
(define gumbo_parse
(foreign-lambda (c-pointer (struct GumboOutput))
"gumbo_parse"
c-string))
(define DOCUMENT (foreign-value "GUMBO_NODE_DOCUMENT" int))
(define ELEMENT (foreign-value "GUMBO_NODE_ELEMENT" int))
(define TEXT (foreign-value "GUMBO_NODE_TEXT" int))
(define CDATA (foreign-value "GUMBO_NODE_CDATA" int))
(define COMMENT (foreign-value "GUMBO_NODE_COMMENT" int))
(define WHITESPACE (foreign-value "GUMBO_NODE_WHITESPACE" int))
(define TEMPLATE (foreign-value "GUMBO_NODE_TEMPLATE" int))
(define node-type
(foreign-lambda* int (((c-pointer (struct GumboNode)) node))
"C_return(((GumboNode*)node)->type);"))
(define (node->sxml node)
(let ((type (node-type node)))
((cond
((= type DOCUMENT) document->sxml)
((= type ELEMENT) element->sxml)
((= type TEXT) node-text)
((= type CDATA) node-text)
((= type COMMENT) comment->sxml)
((= type WHITESPACE) ignore)
((= type TEMPLATE) element->sxml)
(else
(abort (make-property-condition
'exn
'message (sprintf "Unknown node type: ~S~n"
(node-type node))))))
node)))
(define (ignore node) #f)
(define (document->sxml node)
`(*TOP* ,@(filter-map node->sxml (document-children node))))
(define (document-children node)
(gumbo-vector->list
((foreign-lambda* (c-pointer (struct GumboVector))
(((c-pointer (struct GumboNode)) node))
"C_return(&(((GumboNode*)node)->v.document.children));")
node)))
(define (comment->sxml node)
`(*COMMENT* ,(node-text node)))
(define (element-children node)
(gumbo-vector->list
((foreign-lambda* (c-pointer (struct GumboVector))
(((c-pointer (struct GumboNode)) node))
"C_return(&(((GumboNode*)node)->v.element.children));")
node)))
(define (element-attributes node)
(gumbo-vector->list
((foreign-lambda* (c-pointer (struct GumboVector))
(((c-pointer (struct GumboNode)) node))
"C_return(&(((GumboNode*)node)->v.element.attributes));")
node)))
(define gumbo-vector-length
(foreign-lambda* unsigned-int (((c-pointer (struct GumboVector)) v))
"unsigned int length = ((GumboVector*)v)->length;
C_return(length);"))
(define gumbo-vector-ref
(foreign-lambda* c-pointer
(((c-pointer (struct GumboVector)) v)
(unsigned-int i))
"C_return(((GumboVector*)v)->data[i]);"))
(define gumbo-attribute-name
(foreign-lambda* c-string
(((c-pointer (struct GumboAttribute)) attribute))
"C_return(((GumboAttribute*)attribute)->name);"))
(define gumbo-attribute-value
(foreign-lambda* c-string
(((c-pointer (struct GumboAttribute)) attribute))
"C_return(((GumboAttribute*)attribute)->value);"))
(define (attribute->sxml a)
(list (string->symbol (gumbo-attribute-name a))
(gumbo-attribute-value a)))
(define (gumbo-vector->list v)
(list-tabulate (gumbo-vector-length v)
(cut gumbo-vector-ref v <>)))
(define element-normalized-tagname
(foreign-lambda* c-string (((c-pointer (struct GumboNode)) node))
"C_return(gumbo_normalized_tagname(((GumboNode*)node)->v.element.tag));"))
(define node-text
(foreign-lambda* c-string (((c-pointer (struct GumboNode)) node))
"C_return(((GumboNode*)node)->v.text.text);"))
(define (element->sxml node)
`(,(string->symbol (element-normalized-tagname node))
,@(let ((attrs (element-attributes node)))
(if (null? attrs)
'()
(list (cons '@ (map attribute->sxml attrs)))))
,@(filter-map node->sxml (element-children node))))
(define output-document
(foreign-lambda* (c-pointer (struct GumboNode))
(((c-pointer (struct GumboOutput)) output))
"C_return(((GumboOutput *)output)->document);"))
(define destroy-output!
(foreign-lambda* void
(((c-pointer (struct GumboOutput)) output))
"gumbo_destroy_output(&kGumboDefaultOptions, (GumboOutput*)output);"))
(define (html->sxml src)
(let* ((output (gumbo_parse src))
(result (handle-exceptions
exn (begin (destroy-output! output)
(abort exn))
(node->sxml (output-document output)))))
(destroy-output! output)
result))
)