(module chicken-doc-html (chicken-doc-sxml->html tree->string quote-html text-content quote-identifier unquote-identifier definition->identifier signature->identifier syntax-highlighter colorize prettify-js) (import scheme chicken) (use (only sxml-transforms string->goodHTML SRV:send-reply pre-post-order* universal-conversion-rules*)) ;; temp, for toc (use matchable) (use (only data-structures conc ->string string-intersperse string-translate alist-ref)) (use (only ports with-output-to-string)) (use regex) (import irregex) (use (only extras sprintf)) (use (only srfi-13 string-downcase string-index)) (use (only srfi-1 remove)) (define (sxml-walk doc ss) (let ((default-handler (cond ((assq '*default* ss) => cdr) (else (lambda (t b s) (error 'sxml-walk "No default binding for" t))))) (text-handler (cond ((assq '*text* ss) => cdr) (else #f)))) (let loop ((doc doc)) (cond ((null? doc) '()) ((pair? doc) (let ((tag (car doc)) (body (cdr doc))) (if (symbol? tag) (let ((handler-cell (assq tag ss))) (if handler-cell ((cdr handler-cell) tag body ss) (default-handler tag body ss))) (map loop doc)))) (else (if text-handler (text-handler '*text* doc ss) doc)))))) (define (tree->string doc) (with-output-to-string (lambda () (SRV:send-reply doc)))) (define (quote-html s) (string->goodHTML s)) ;; Like sxpath // *text*. Beware, if your tags have arguments that ;; shouldn't be considered text, they will still be extracted. (define (text-content doc) (tree->string (sxml-walk doc `((*default* . ,(lambda (t b s) (sxml-walk b s))) (@ . ,(lambda (t b s) '())) (*text* . ,(lambda (t b s) b)))))) ;;; URI fragment (id=) handling for sections and definitions ;; Permitted characters in ID attributes in HTML < 5 are only A-Z a-z 0-9 : - _ ;; even though URI fragments are much more liberal. For compatibility, we ;; "period-encode" all other chars. (define +rx:%idfragment-escape+ (irregex "[^-_:A-Za-z0-9]")) (define +rx:%idfragment-unescape+ (irregex "\\.([0-9a-fA-F][0-9a-fA-F])")) ;; Encode raw identifier text string so it is usable as an HTML 4 ID attribute ;; (and consequently, as a URI fragment). (define (quote-identifier x) ; Not a good name; should prob. be encode-identifier (irregex-replace/all +rx:%idfragment-escape+ x (lambda (m) (sprintf ".~x" (char->integer (string-ref (irregex-match-substring m 0) 0)))))) ;; Decode period-encoded URI fragment (or ID attribute value). ;; Note that spaces were period-encoded, not converted to underscore, ;; so the transformation is reversible. (define (unquote-identifier x) (irregex-replace/all +rx:%idfragment-unescape+ x (lambda (m) (string (integer->char (string->number (irregex-match-substring m 1) 16)))))) ;; WARNING: Currently being used to both generate new ids for headers and ;; to figure out the id for an internal-link target. However the former may ;; distinuish duplicate IDs while the latter should ignore duplicates. ;; FIXME: Duplicate IDs will be generated for duplicate section or ;; definition names. A unique suffix is needed. (define (section->identifier x) (string-append "sec:" (string-translate x #\space #\_))) (define (definition->identifier x) (string-append "def:" x)) (define (section->href x) ;; Convert section name to internal fragment href. (if (string=? x "") "" (string-append "#" (quote-identifier (section->identifier x))))) (define (split-fragment link) ;; Split at first # (cond ((string-index link #\#) => (lambda (i) (cons (substring link 0 i) (substring link (+ i 1))))) ; don't include # (else (cons link "")))) (define (join-fragment href fragment) ;; Join with # (if (string=? fragment "") href (string-append href "#" fragment))) (use (only svnwiki-sxml svnwiki-signature->identifier)) (define signature->identifier svnwiki-signature->identifier) ;;; Syntax highlighting ;; Highlight SHTML body with LANG syntax and return SHTML or #f. TAG ;; is either PRE or CODE (currently only PRE) and indicates our ;; context; it's probably wise to ignore anything other than PRE ;; unless the highlighter is super-fast. Return SHTML *must* be ;; surrounded with TAG and *should* set 'highlight' class, along with ;; a class for the particular highlighter used, such as 'colorize' or ;; 'prettyprint' (prettify.js). #f return is considered "can't ;; highlight" and is currently reported as a warning; this is clunky and ;; we will probably downgrade it to ignore. ;; Highlighters must be prepared to accept SHTML, using text-content ;; if they require string input (like the colorize egg). (As we only highlight ;; plain strings without markup currently, this is for future compatibility.) ;; LANG will generally be the languages supported by the colorize egg ;; although you may accept additions. LANG #f is not currently ;; possible (the parser rewrites it to 'scheme) but should be handled ;; as meaning "figure it out if you can". (use colorize) ;; TODO: colorize should be autoloaded. (define colorize (lambda (lang tag body) (if (eq? tag 'pre) (and lang (coloring-type-exists? lang) `(,tag (@ (class "highlight colorize")) (lit ,(html-colorize lang (text-content body))))) `(,tag (@ (class "highlight")) ,body)))) (define prettify-js (lambda (lang tag body) (define (lang->ext L) (alist-ref L '(;; Support out of the box (c . c) (c++ . cpp) (java . java) (python . py) (ruby . rb) (javascript . js) (shell . sh) (html . html) (xhtml . xhtml) (xml . xml) ;; Supported via extensions (lisp . lisp) (elisp . el) (common-lisp . cl) (css . css) (sql . sql) (haskell . hs) (scheme . scm) ;; Unsupported, but supported by colorize (objective-c . #f) (erlang . #f) (diff . #f) ))) (if (not (eq? tag 'pre)) `(,tag (@ (class "highlight")) ,body) (cond ((lang->ext lang) => (lambda (ext) `(,tag (@ (class "highlight prettyprint lang-" ,ext)) ,body))) (else #f))))) (define syntax-highlighter (make-parameter colorize)) ;; Can be #f, which is equivalent to (constantly #f) ;;; HTML renderer (define +rx:wiki-man-page+ (irregex '(: (? "http://wiki.call-cc.org") (or "/man/4/" "/manual/") (submatch (+ any))))) (define +rx:wiki-egg-page+ (irregex '(: (? "http://wiki.call-cc.org") (or "/eggref/4/" "/egg/") (submatch (+ any))))) (define (chicken-doc-sxml->html doc path->href ; for internal links; make parameter? def->href ; link to definition node, or #f for no link man-filename->path ) (define (path+section->href p s) (string-append (path->href p) (section->href s))) (tree->string (let ((walk sxml-walk) (drop-tag (lambda (t b s) '())) (drop-tag-noisily (lambda (t b s) ;; Warning disabled as it just spams the logs; instead the ;; offender could be included in an HTML comment. ; (warning "dropped" (cons t b)) '())) (quote-text `(*text* . ,(lambda (t b s) (quote-html b))))) (letrec ((block (lambda (tag) (let ((open (conc "<" tag ">")) (close (conc ""))) (lambda (t b s) (list open (walk b s) close))))) (inline (lambda (tag) (let ((open (conc "<" tag ">")) (close (conc ""))) (lambda (t b s) (list open (walk b inline-ss) close))))) (inline-ss `( ,quote-text (*default* . ,drop-tag-noisily) ;; 500 error is annoying (b . ,(inline "b")) (i . ,(inline "i")) (tt . ,(inline "tt")) (sup . ,(inline "sup")) (sub . ,(inline "sub")) (small . ,(inline "small")) ;; questionable (big . ,(inline "big")) ;; questionable (img . ,drop-tag) (link . ,(lambda (t b s) ;; svnwiki-sxml does not return int-link for ;; call-cc.org links, so we must check that here. (define (process-resource R F) (cond ;; Wiki man page, link to corresponding man page ((string-match +rx:wiki-man-page+ R) => (lambda (m) (cond ((man-filename->path (cadr m)) => (lambda (p) (path+section->href p F))) (else "")))) ;; Wiki egg page, link to node ((string-match +rx:wiki-egg-page+ R) => (lambda (m) (path+section->href (list (cadr m)) F))) (else (join-fragment R F)))) (let ((do-link (lambda (link desc) ;; Caller must quote DESC. (let* ((S (split-fragment link)) (href (process-resource (car S) (cdr S)))) `("" ,desc ""))))) (match b ((link desc) (do-link link (walk desc inline-ss))) ((link) (do-link link (quote-html link))))))) (int-link . ,(lambda (t b s) (define (process-resource R F) ;; Returns: href ;; Usage of man-filename->path is barely tolerable. ;; Perhaps we should use the id cache. (cond ((string=? R "") ;; #fragments target section names in this doc. (section->href F)) ;; Wiki man page, link to corresponding man page, ;; or to a dummy URL if man page lookup fails. ((string-match +rx:wiki-man-page+ R) => (lambda (m) (cond ((man-filename->path (cadr m)) => (lambda (p) (path+section->href p F))) (else "")))) ;; Wiki egg page, link to node ((string-match +rx:wiki-egg-page+ R) => (lambda (m) (path+section->href (list (cadr m)) F))) ;; Unknown absolute path, link to wiki ((char=? (string-ref R 0) #\/) (join-fragment (string-append "http://wiki.call-cc.org" R) F)) ;; Relative path, try man page. Wiki links to ;; current directory (/man) but we can't. ((man-filename->path R) => (lambda (p) (path+section->href p F))) ;; Relative path, assume egg node. (else (path+section->href (list R) F)))) (let ((ilink (lambda (link desc) ;; Caller must quote DESC. (let* ((S (split-fragment link)) (href (process-resource (car S) (cdr S)))) `("" ,desc ""))))) (match b ((link desc) (ilink link (walk desc inline-ss))) ((link) (ilink link (quote-html link))))))) ))) (walk doc `( (p . ,(inline "p")) (def . ,(lambda (t b def-ss) `("
" ,(match b ((('sig . sigs) . body) `(,(map (lambda (s) (match s ((type sig . alist) (let* ((defid (cond ((assq 'id alist) => cadr) (else (signature->identifier sig type)))) (defid (and defid (->string defid)))) `("
identifier defid)) #\") '()) #\> ;; Link to underlying node. ,(let ((def-href (and defid (def->href defid)))) `(,(if def-href `("") '()) "" ,(quote-html sig) "" ,(if def-href "" '()))) " " "" ,(quote-html (->string type)) "" "
\n"))))) sigs) "
" ,(walk body def-ss) "
\n"))) "
\n"))) (pre . ,(block "pre")) ; may need to quote contents (ul . ,(lambda (t b ul-ss) `("\n"))) (ol . ,(lambda (t b ol-ss) `("
    " ,(walk b `((li . ,(lambda (t b s) `("
  1. " ,(walk b ol-ss) "
  2. \n"))))) "
\n"))) (dl . ,(lambda (t b dl-ss) `("
" ,(walk b `((dt . ,(lambda (t b s) `("
" ,(walk b inline-ss) ;? "
\n"))) (dd . ,(lambda (t b s) `("
" ,(walk b dl-ss) "
"))))) "
\n"))) (tags . ,drop-tag) (toc . ,(lambda (t b s) (sxml->html (toc doc)))) (section . ,(lambda (t b s) (match b ((level title . body) (let ((H (list "h" (number->string level))) (id (cond ((section->identifier (text-content title)) => quote-identifier) (else #f)))) (list "<" H (if id `(" id=\"" ,id "\"") '()) ">" "" (walk title inline-ss) "" "" (walk body s))))))) (table . ,(lambda (t b table-ss) ;; Table may be malformed as svnwiki-sxml just passes us the ;; raw HTML, so we drop bad tags. `("\n" ,(walk b `((tr . ,(lambda (t b s) `("" ,(walk b (let ((table-ss `((@ . ,drop-tag) . ,table-ss))) `((th . ,(lambda (t b s) `(""))) (td . ,(lambda (t b s) `(""))) (@ . ,drop-tag) (*default* . ,drop-tag-noisily)))) "\n"))) (@ . ,drop-tag) (*default* . ,drop-tag-noisily))) "
" ,(walk b table-ss) "" ,(walk b table-ss) "
\n"))) ;; colorize supports: ;; (lisp scheme elisp common-lisp c c++ java objective-c erlang python ruby haskell diff) ;; other suggested syntax names to support: ;; (javascript shell css html) (highlight . ,(lambda (t b s) ;; Note: currently in svnwiki-sxml, highlight only has 2 args and body cannot ;; be a tree, only a string. However in the future, highlighted code could contain ;; markup such as links. We would first convert the body to SHTML; colorizers ;; must therefore be prepared to accept SHTML, using text-content if they need ;; string input (like the colorize egg), or passing it through for prettify.js. ;; (Currently, we cannot transform to SHTML.) ;; lang #f not currently possible, as parser rewrites it to 'scheme; but it ;; should be handled here as meaning "figure it out" ;; syntax-highlighter returns: highlighted SHTML tree, or #f if highlighting failed ;; Highlighter should set 'highlight' class in tag, along with a class for ;; the particular highlighter used, such as colorize or prettyprint (prettify.js). (match b ((lang . body) (let ((lang (and lang (string->symbol (string-downcase (->string lang))))) (type 'pre)) (sxml->html (let ((H (syntax-highlighter))) (cond ((and H (H lang type body))) (else `(,(if lang `((lit "")) '()) (,type (@ (class "highlight")) ,body))))))))))) ;; convert example contents to `(pre ...) and re-walk it ;; FIXME: The html-parser will erroneously parse html tags ;; inside tags. Right now we drop them, but we ;; should either not parse them in the first place or ;; convert them back here (less nice). Furthermore the parser ;; may unrecoverably screw up the structure of examples, for ;; example if it contains an

tag; therefore we drop unknown ;; tags to prevent a complete rendering error. (examples . ,(lambda (t b ex-ss) (walk b `((*default* . ,drop-tag-noisily) (example . ,(lambda (t b s) (walk `(pre ,(walk b `((init . ,(lambda (t b s) (list b "\n"))) (expr . ,(lambda (t b s) (walk b `((*default* . ,drop-tag-noisily))))) (result . ,(lambda (t b s) `("\n; Result: " ,b))) (*default* . ,drop-tag-noisily)))) ex-ss))))))) (blockquote . ,(block "blockquote")) (hr . ,(lambda (t b s) "
")) ,@inline-ss )))))) (define (sxml->html doc) (with-output-to-string (lambda () (SRV:send-reply (pre-post-order* doc `((*text* . ,(lambda (t b) ;; Default *text* does not quote symbols, chars, #s. (string->goodHTML (->string b)))) (lit *preorder* . ,(lambda (t b) b)) ;; should this tree->string? . ,universal-conversion-rules*)))))) ;; FIXME: Be sure to bench the performance with TOC on. (define (toc doc) `(div (@ (id "toc")) (h2 (@ class "toc") "TOC" " " (& "raquo")) (ul (@ class "toc") ;; set class for compatibility with browsers lacking CSS2 selectors . ,(sxml-walk doc `((*default* . ,(lambda (t b s) '())) (section . ,(lambda (t b s) (match b ((level title . body) (let ((child (remove null? (sxml-walk body s))) (id (cond ((section->identifier (text-content title)) => quote-identifier) (else #f)))) `(li ,(if id `(a (@ (href "#" ,id)) ,title) title) ,(if (null? child) '() `(ul . ,child))))))))))))) )