(module gumbo
;; exports
(html->sxml)
(import chicken scheme foreign)
(use srfi-1 lolevel)
(foreign-declare "#include ")
(define gumbo_parse
(foreign-lambda (c-pointer (struct GumboOutput))
"gumbo_parse"
c-string))
(define TAG_UNKNOWN
(foreign-value "GUMBO_TAG_UNKNOWN" int))
(define DOCUMENT (foreign-value "GUMBO_NODE_DOCUMENT" int))
(define ELEMENT (foreign-value "GUMBO_NODE_ELEMENT" int))
(define TEXT (foreign-value "GUMBO_NODE_TEXT" int))
(define CDATA (foreign-value "GUMBO_NODE_CDATA" int))
(define COMMENT (foreign-value "GUMBO_NODE_COMMENT" int))
(define WHITESPACE (foreign-value "GUMBO_NODE_WHITESPACE" int))
(define TEMPLATE (foreign-value "GUMBO_NODE_TEMPLATE" int))
(define node-type
(foreign-lambda* int (((c-pointer (struct GumboNode)) node))
"C_return(((GumboNode*)node)->type);"))
(define (node->sxml node)
(let ((type (node-type node)))
((cond
((= type DOCUMENT) document->sxml)
((= type ELEMENT) element->sxml)
((= type TEXT) node-text)
((= type CDATA) node-text)
((= type COMMENT) comment->sxml)
((= type WHITESPACE) node-text)
((= type TEMPLATE) element->sxml)
(else
(abort (make-property-condition
'exn
'message (sprintf "Unknown node type: ~S~n"
(node-type node))))))
node)))
(define (document->sxml node)
`(*TOP* ,@(filter-map node->sxml (document-children node))))
(define (document-children node)
(gumbo-vector->list
((foreign-lambda* (c-pointer (struct GumboVector))
(((c-pointer (struct GumboNode)) node))
"C_return(&(((GumboNode*)node)->v.document.children));")
node)))
(define (comment->sxml node)
`(*COMMENT* ,(node-text node)))
(define (element-children node)
(gumbo-vector->list
((foreign-lambda* (c-pointer (struct GumboVector))
(((c-pointer (struct GumboNode)) node))
"C_return(&(((GumboNode*)node)->v.element.children));")
node)))
(define (element-attributes node)
(gumbo-vector->list
((foreign-lambda* (c-pointer (struct GumboVector))
(((c-pointer (struct GumboNode)) node))
"C_return(&(((GumboNode*)node)->v.element.attributes));")
node)))
(define gumbo-vector-length
(foreign-lambda* unsigned-int (((c-pointer (struct GumboVector)) v))
"unsigned int length = ((GumboVector*)v)->length;
C_return(length);"))
(define gumbo-vector-ref
(foreign-lambda* c-pointer
(((c-pointer (struct GumboVector)) v)
(unsigned-int i))
"C_return(((GumboVector*)v)->data[i]);"))
(define gumbo-attribute-name
(foreign-lambda* c-string
(((c-pointer (struct GumboAttribute)) attribute))
"C_return(((GumboAttribute*)attribute)->name);"))
(define gumbo-attribute-value
(foreign-lambda* c-string
(((c-pointer (struct GumboAttribute)) attribute))
"C_return(((GumboAttribute*)attribute)->value);"))
(define (attribute->sxml a)
(list (string->symbol (gumbo-attribute-name a))
(gumbo-attribute-value a)))
(define (gumbo-vector->list v)
(list-tabulate (gumbo-vector-length v)
(cut gumbo-vector-ref v <>)))
(define gumbo-string-piece-length
(foreign-lambda* size_t
(((c-pointer (struct GumboStringPiece)) p))
"C_return(((GumboStringPiece*)p)->length);"))
(define gumbo-string-piece-data
(foreign-lambda* c-pointer
(((c-pointer (struct GumboStringPiece)) p))
"C_return(((GumboStringPiece*)p)->data);"))
(define (gumbo-string-piece->string p)
(let* ((len (gumbo-string-piece-length p))
(str (make-string len)))
(move-memory! (gumbo-string-piece-data p)
(location str)
len)
str))
(define element-tag
(foreign-lambda* int (((c-pointer (struct GumboNode)) node))
"C_return(((GumboNode*)node)->v.element.tag);"))
(define normalized-tagname
(foreign-lambda c-string "gumbo_normalized_tagname" int))
(define tag-from-original-text
(foreign-lambda void "gumbo_tag_from_original_text"
c-pointer))
(define element-original-tag-string-piece
(foreign-lambda* (c-pointer (struct GumboStringPiece))
(((c-pointer (struct GumboNode)) node))
"C_return(&(((GumboNode*)node)->v.element.original_tag));"))
(define (element-original-tagname node)
(let ((original (element-original-tag-string-piece node)))
(tag-from-original-text original)
(gumbo-string-piece->string original)))
(define (tagname node)
(let ((tag (element-tag node)))
(string->symbol
(if (= TAG_UNKNOWN tag)
(element-original-tagname node)
(normalized-tagname tag)))))
(define node-text
(foreign-lambda* c-string (((c-pointer (struct GumboNode)) node))
"C_return(((GumboNode*)node)->v.text.text);"))
(define (element->sxml node)
`(,(tagname node)
,@(let ((attrs (element-attributes node)))
(if (null? attrs)
'()
(list (cons '@ (map attribute->sxml attrs)))))
,@(filter-map node->sxml (element-children node))))
(define output-document
(foreign-lambda* (c-pointer (struct GumboNode))
(((c-pointer (struct GumboOutput)) output))
"C_return(((GumboOutput *)output)->document);"))
(define destroy-output!
(foreign-lambda* void
(((c-pointer (struct GumboOutput)) output))
"gumbo_destroy_output(&kGumboDefaultOptions, (GumboOutput*)output);"))
(define (html->sxml src)
(let* ((output (gumbo_parse src))
(result (handle-exceptions
exn (begin (destroy-output! output)
(abort exn))
(node->sxml (output-document output)))))
(destroy-output! output)
result))
)