(use test) (load "html-parser.scm") (import html-parser) (test-begin "html-parser") (define (string-scan str pat) (string-length ((make-string-reader/ci pat) (open-input-string str)))) (test-group "utilities" (test "string-scan (basic)" 3 (string-scan "abcdefghi" "def")) (test "string-scan (case-insensitive)" 3 (string-scan "abcdEfghi" "deF")) (test "string-scan (single char)" 4 (string-scan "abcdefghi" "e")) (test "string-scan (overlap)" 12 (string-scan "salkabcdabghabcdabdef" "abcdabd")) (test "string-scan (boyer-moore)" 216 (string-scan "abracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabrabracadababrabrabr" "abracadabra")) (test "string-scan (empty pattern)" 0 (string-scan "abakjrgaker" "")) ) (test-group "parsing" (test '(*TOP* (a (@ (href "http://foo.scm/")) "foo")) (html->sxml "foo")) (test '(*TOP* (a "a" (b "ab") "a" (c "ac")) "x") (html->sxml "aabaacx")) (test '(*TOP* (p "p1") (p "p2") (p "p3")) (html->sxml "

p1

p2

p3")) (test '(*TOP* (i "italic" (b "bold italic")) (b "bold")) (html->sxml "italicbold italicbold")) (test '(*TOP* (*PI* xml "blah blah")) (html->sxml "")) (test '(*TOP* (*DECL* DOCTYPE HTML PUBLIC "-//W3C//DTD HTML&4.0//EN")) (html->sxml "")) (test '(*TOP* "abc" (*COMMENT* "def") "ghi") (html->sxml "abcghi")) (test '(*TOP* (pre "&")) (html->sxml "

&]]>
")) (test '(*TOP* (xmp "&")) (html->sxml "<a>&amp;<!--foo--><![CDATA[...]]></a>")) (test '(*TOP* (b (@ (id "&")) "&")) (html->sxml "&")) (test '(*TOP* (foo (@ (bar "&x")))) (html->sxml "")) (test '(*TOP* (foo (@ (bar)))) (html->sxml "")) (test '(*TOP* (div (@ (data "")) "empty")) (html->sxml "
empty
")) (test '(*TOP* (br) "\r\n" (br) "\r\n" (div (@ (data "(sxml (@ (attr \"12345\")) body)")) "div body")) (html->sxml "
\r\n
\r\n
div body
")) ) (test-end)