;; FIXME: Some of the match constructs (section, link) can error out if SMXL is malformed ;; -- they should maybe warn on stderr and dump an error into the document. (module chicken-doc-html (chicken-doc-sxml->html tree->string quote-html text-content quote-identifier unquote-identifier definition->identifier signature->identifier syntax-highlighter colorize prettify-js) (import scheme (chicken base)) (import (only sxml-transforms string->goodHTML SRV:send-reply pre-post-order* universal-conversion-rules*)) ;; temp, for toc (import matchable) (import (only (chicken string) conc ->string string-intersperse string-translate string-split)) (import (only (chicken port) with-output-to-string with-input-from-string)) (import regex) (import (chicken irregex)) (import (only (chicken format) sprintf)) (import (only srfi-13 string-downcase string-index)) (import (only srfi-1 remove append-map)) (import (chicken condition) (chicken keyword)) (define (sxml-walk doc ss) (let ((default-handler (cond ((assq '*default* ss) => cdr) (else (lambda (t b s) (error 'sxml-walk "No default binding for" t))))) (text-handler (cond ((assq '*text* ss) => cdr) (else #f)))) (let loop ((doc doc)) (cond ((null? doc) '()) ((pair? doc) (let ((tag (car doc)) (body (cdr doc))) (if (symbol? tag) (let ((handler-cell (assq tag ss))) (if handler-cell ((cdr handler-cell) tag body ss) (default-handler tag body ss))) (map loop doc)))) (else (if text-handler (text-handler '*text* doc ss) doc)))))) (define (tree->string doc) (with-output-to-string (lambda () (SRV:send-reply doc)))) (define (quote-html s) (string->goodHTML s)) ;; Like sxpath // *text*. Beware, if your tags have arguments that ;; shouldn't be considered text, they will still be extracted. (define (text-content doc) (tree->string (sxml-walk doc `((*default* . ,(lambda (t b s) (sxml-walk b s))) (@ . ,(lambda (t b s) '())) (*text* . ,(lambda (t b s) b)))))) ;;; URI fragment (id=) handling for sections and definitions ;; Permitted characters in ID attributes in HTML < 5 are only A-Z a-z 0-9 : - _ ;; even though URI fragments are much more liberal. For compatibility, we ;; "period-encode" all other chars. (define +rx:%idfragment-escape+ (irregex "[^-_:A-Za-z0-9]")) (define +rx:%idfragment-unescape+ (irregex "\\.([0-9a-fA-F][0-9a-fA-F])")) ;; Encode raw identifier text string so it is usable as an HTML 4 ID attribute ;; (and consequently, as a URI fragment). (define (quote-identifier x) ; Not a good name; should prob. be encode-identifier (irregex-replace/all +rx:%idfragment-escape+ x (lambda (m) (sprintf ".~x" (char->integer (string-ref (irregex-match-substring m 0) 0)))))) ;; Decode period-encoded URI fragment (or ID attribute value). ;; Note that spaces were period-encoded, not converted to underscore, ;; so the transformation is reversible. (define (unquote-identifier x) (irregex-replace/all +rx:%idfragment-unescape+ x (lambda (m) (string (integer->char (string->number (irregex-match-substring m 1) 16)))))) ;; WARNING: Currently being used to both generate new ids for headers and ;; to figure out the id for an internal-link target. However the former may ;; distinguish duplicate IDs while the latter should ignore duplicates. ;; FIXME: Duplicate IDs will be generated for duplicate section or ;; definition names. A unique suffix is needed. (define (section->identifier x) (string-append "sec:" (string-translate x #\space #\_))) (define (definition->identifier x) (string-append "def:" x)) (define (section->href x) ;; Convert section name to internal fragment href. (if (string=? x "") "" (string-append "#" (quote-identifier (section->identifier x))))) (define (split-fragment link) ;; Split at first # (cond ((string-index link #\#) => (lambda (i) (cons (substring link 0 i) (substring link (+ i 1))))) ; don't include # (else (cons link "")))) (define (join-fragment href fragment) ;; Join with # (if (string=? fragment "") href (string-append href "#" fragment))) (import (only svnwiki-sxml svnwiki-signature->identifier)) (define signature->identifier svnwiki-signature->identifier) ;;; Syntax highlighting ;; Highlight SHTML body with LANG syntax and return SHTML or #f. TAG ;; is either PRE or CODE (currently only PRE) and indicates our ;; context; it's probably wise to ignore anything other than PRE ;; unless the highlighter is super-fast. Return SHTML *must* be ;; surrounded with TAG and *should* set 'highlight' class, along with ;; a class for the particular highlighter used, such as 'colorize' or ;; 'prettyprint' (prettify.js). #f return is considered "can't ;; highlight" and is currently reported as a warning; this is clunky and ;; we will probably downgrade it to ignore. ;; Highlighters must be prepared to accept SHTML, using text-content ;; if they require string input (like the colorize egg). (As we only highlight ;; plain strings without markup currently, this is for future compatibility.) ;; LANG will generally be the languages supported by the colorize egg ;; although you may accept additions. LANG #f is not currently ;; possible (the parser rewrites it to 'scheme) but should be handled ;; as meaning "figure it out if you can". (import colorize) ;; TODO: colorize should be autoloaded. (define colorize (lambda (lang tag body) (if (eq? tag 'pre) (and lang (coloring-type-exists? lang) `(,tag (@ (class "highlight colorize")) (lit ,(html-colorize lang (text-content body))))) `(,tag (@ (class "highlight")) ,body)))) (define prettify-js (lambda (lang tag body) (define (lang->ext L) (alist-ref L '(;; Support out of the box (c . c) (c++ . cpp) (java . java) (python . py) (ruby . rb) (javascript . js) (shell . sh) (json . json) (html . html) (xhtml . xhtml) (xml . xml) (coffeescript . coffee) (objective-c . m) ;; Supported via extensions (lisp . lisp) (elisp . el) (common-lisp . cl) (css . css) (sql . sql) (haskell . hs) (scheme . scm) ;; Unsupported, but supported by colorize (erlang . #f) (diff . #f) ))) (if (not (eq? tag 'pre)) `(,tag (@ (class "highlight")) ,body) (cond ((lang->ext lang) => (lambda (ext) `(,tag (@ (class "highlight prettyprint lang-" ,ext)) ,body))) (else #f))))) (define syntax-highlighter (make-parameter colorize)) ;; Can be #f, which is equivalent to (constantly #f) ;;; HTML renderer (define +rx:wiki-man-page+ (irregex '(: (? "http://wiki.call-cc.org") (or "/man/4/" "/manual/") (submatch (+ any))))) (define +rx:wiki-egg-page+ (irregex '(: (? "http://wiki.call-cc.org") (or "/eggref/4/" "/egg/") (submatch (+ any))))) (define (chicken-doc-sxml->html doc path->href ; for internal links; make parameter? def->href ; link to definition node, or #f for no link man-filename->path ) (define (path+section->href p s) (string-append (path->href p) (section->href s))) (tree->string (let ((walk sxml-walk) (drop-tag (lambda (t b s) '())) (drop-tag-noisily (lambda (t b s) ;; Warning disabled as it just spams the logs; instead the ;; offender could be included in an HTML comment. ; (warning "dropped" (cons t b)) '())) (quote-text `(*text* . ,(lambda (t b s) (quote-html b)))) (sig-args '())) ;; FIXME temp TESTING (letrec ((block (lambda (tag) ;; could be moved out of letrec, but eh (let ((open (conc "<" tag ">")) (close (conc ""))) (lambda (t b s) (list open (walk b s) close))))) (inline (lambda (tag) (let ((open (conc "<" tag ">")) (close (conc ""))) (lambda (t b s) (list open (walk b inline-ss) close))))) (inline-ss #f)) ;; because we can't rely on letrec* behavior (set! inline-ss `( ,quote-text (*default* . ,drop-tag-noisily) ;; 500 error is annoying (b . ,(inline "b")) (i . ,(inline "i")) ;; Conversion of to is done here, via a fluid-let (tt . ,(lambda (t b s) (cond ((not (pair? b)) "") ((memq (string->symbol (car b)) sig-args) (sxml->html `(var (@ (class "arg")) ,b))) ((def->href (car b)) => (lambda (href) ;; def->href generates a direct node ;; link, where we might prefer a # link. ;; Also, A embedded in VAR is odd, but ;; it's easier to style. (sxml->html `(var (@ (class "id")) (a (@ (href ,href)) ,b))))) (else ((inline "tt") t b s))))) (sup . ,(inline "sup")) (sub . ,(inline "sub")) (small . ,(inline "small")) ;; questionable (big . ,(inline "big")) ;; questionable (img . ,drop-tag) (code . ,(inline "code")) (var . ,(inline "var")) (em . ,(inline "em")) (strong . ,(inline "strong")) (& . ,(lambda (t b s) ;; Assume whitelisted at parse time (map (lambda (e) (string-append "&" e ";")) b))) (link . ,(lambda (t b s) ;; svnwiki-sxml does not return int-link for ;; call-cc.org links, so we must check that here. (define (process-resource R F) (cond ;; Wiki man page, link to corresponding man page ((string-match +rx:wiki-man-page+ R) => (lambda (m) (cond ((man-filename->path (cadr m)) => (lambda (p) (path+section->href p F))) (else "")))) ;; Wiki egg page, link to node ((string-match +rx:wiki-egg-page+ R) => (lambda (m) ;; Split on / for eggs to allow subpage links. ;; (Thus we can't link to pages containing a slash; we ;; should permit percent encoding in the link.) (path+section->href (string-split (cadr m) "/") F))) (else (join-fragment R F)))) (let ((do-link (lambda (link desc) ;; Caller must quote DESC. (let* ((S (split-fragment link)) (href (process-resource (car S) (cdr S)))) `("" ,desc ""))))) (match b ((link desc) (do-link link (walk desc inline-ss))) ((link) (do-link link (quote-html link))) (else (error "malformed link" b)))))) (int-link . ,(lambda (t b s) (define (process-resource R F) ;; Returns: href ;; Usage of man-filename->path is barely tolerable. ;; Perhaps we should use the id cache. (cond ((string=? R "") ;; #fragments target section names in this doc. (section->href F)) ;; Wiki man page, link to corresponding man page, ;; or to a dummy URL if man page lookup fails. ((string-match +rx:wiki-man-page+ R) => (lambda (m) (cond ((man-filename->path (cadr m)) => (lambda (p) (path+section->href p F))) (else "")))) ;; Wiki egg page, link to node ((string-match +rx:wiki-egg-page+ R) => (lambda (m) (path+section->href (string-split (cadr m) "/") F))) ;; Unknown absolute path, link to wiki ((char=? (string-ref R 0) #\/) (join-fragment (string-append "http://wiki.call-cc.org" R) F)) ;; Relative path, try man page. Wiki links to ;; current directory (/man) but we can't. ((man-filename->path R) => (lambda (p) (path+section->href p F))) ;; Relative path, assume egg node. (else (path+section->href (string-split R "/") F)))) (let ((ilink (lambda (link desc) ;; Caller must quote DESC. (let* ((S (split-fragment link)) (href (process-resource (car S) (cdr S)))) `("" ,desc ""))))) (match b ((link desc) (ilink link (walk desc inline-ss))) ((link) (ilink link (quote-html link))) (else (error "malformed int-link" b)))))) )) (walk doc `( (p . ,(inline "p")) (def . ,(lambda (t b def-ss) ;; FIXME: Setter signatures not handled ;; FIXME handle car=quote ;; FIXME: Handle (?) result shown as -> or => after read object ;; --HANDLED-- ;; Optionals after #!optional are handled. They must look like foo or (foo bar). ;; Keywords after #!key are handled. They must look like foo or (foo bar). ;; Rest args after #!rest are handled. ;; Rest args as in (foo . bar) are handled and converted to (foo #!rest bar). ;; Optionals like [foo [bar [baz]] in last position are handled and converted to #!optionals foo bar baz. ;; If a default value for optionals is desired, use #!optionals (foo val). ;; --NOT HANDLED-- ;; Optionals like [foo bar baz] (srfi-13) and [foo] [bar] [baz] (sundials) are not allowed and ;; the signature is rendered unchanged. ;; Keyword optionals like [foo: foo-procedure] (spiffy start-server) or [#:foo 1.0] (srfi-27) ;; or [#:foo FOO] (setup-helper) or [#:foo FOO #t] or [foo: FOO] (smsmatrix) ;; or [name [source #f [tag 'locale]]] (locale make-locale-components) are not handled. ;; Arguments can be lowercased, but this is done via CSS. (define (parse-signature sig type) ;; Testing read/write invariance as strings is problematic because ;; - 'foo is written as (quote foo) ;; - #!optional is written as |#!optional| ;; but we need to render each arg as an HTML string anyway, so it might work (and (memq type '(procedure parameter constant record setter string)) (let ((L (handle-exceptions exn #f (with-input-from-string sig read)))) L))) (define (parse-argument arg dsssl) (cond ((keyword? arg) #f) ((symbol? arg) (case arg ((#!optional #!key #!rest) `(var (@ (class "dsssl")) ,arg)) ;; Perhaps anything starting with # should be marked as ;; a keyword or such (else `(var (@ (class arg)) ,arg)))) ((or (string? arg) (number? arg)) `(var (@ (class value)) ,arg)) ((pair? arg) (cond ((not (pair? (cdr arg))) #f) ;; never permit (foo . bar) ((null? (cdr arg)) #f) ;; Optionals like [foo] were rewritten to #!optionals foo ((null? (cddr arg)) ;; optional value as (foo 3) -- in an #!optional or #!key clause (if (eq? (car arg) 'quote) (let ((val (cadr arg))) (if (or (symbol? val) (string? val) (number? val)) ;; Render simple values as . We could even do a def->href test ;; and render as , but that's unlikely to ever be useful. `(var (@ (class value)) #\' ,val) `(tt ,(conc #\' val)))) (and (memq dsssl '(#!optional #!key)) (and-let* ((key (parse-argument (car arg) '())) (val (parse-argument (cadr arg) '()))) ;; This will erroneously render val as class arg when val is a plain ;; symbol, when it should be class value or, fancily, class id. ;; Could do this by changing the dsssl arg to 'mode' and parsing IDs here ;; instead of upstream in compute-sig-shtml. `(#\( ,key " " ,val #\)))))) (else #f))) (else `(tt ,(->string arg))))) (define (parse-optional-arg arg dsssl) ;; Parse (foo), (foo (bar)), (foo (bar (baz))), ... and return a list of optional args, ;; or #f if parsing failed. Note: Unlike parse-argument, does not return shtml. (define (loop acc arg) (if (and (pair? arg) (not (keyword? (car arg))) (symbol? (car arg))) (cond ((null? (cdr arg)) (reverse (cons (car arg) acc))) ((and (null? (cddr arg))) (loop (cons (car arg) acc) (cadr arg))) (else #f)) #f)) (and (not dsssl) (loop '() arg))) (define (extract-var-args-from-shtml shtml) ;; The SHTML is not a proper sexpr markup of the signature. We walk it because ;; it may not be flat. (append-map (lambda (b) (match b (('var ('@ ('class 'arg)) x) (list x)) ((_ . _) ; recurse into pair (extract-var-args-from-shtml b)) (else '()))) shtml)) (define (compute-sig-shtml sig type) `(span (@ (class sig)) . ,(cond ((parse-signature sig type) => (lambda (siglist) (cond ((not (pair? siglist)) `((var (@ (class id)) ,siglist))) ; might need to check type ((match siglist ;; Handle setters. Kinda gross! (('set! (id arg) val) `((var (@ (class dsssl)) set!) ; meh " (" (var (@ (class id)) ,id) " " ,(parse-argument arg '()) ") " ,(parse-argument val '()))) (else #f))) ((call/cc (lambda (k) ; rewrite in iterative style pls (let ((shtml `((var (@ (class "id")) ,(car siglist)) ; might need to verify is symbol . ,(let loop ((siglist (cdr siglist)) (dsssl #f)) (cond ((null? siglist) '()) ((pair? siglist) (let ((dsssl (if (memq (car siglist) '(#!optional #!key #!rest)) (car siglist) dsssl))) ; hmm (let ((opt-args (and (null? (cdr siglist)) (parse-optional-arg (car siglist) dsssl)))) (if opt-args (loop (cons '#!optional opt-args) dsssl) (cons (or (parse-argument (car siglist) dsssl) (k #f)) (loop (cdr siglist) dsssl)))))) (else ;; Convert improper list (foo bar . baz) to (foo bar #!rest baz) (loop `(#!rest ,siglist) dsssl))))))) (intersperse shtml " "))))) (else `((tt ,sig)))))) (else `((tt ,sig)))))) (sxml->html `(dl (@ (class "defsig")) ,(match b ((('sig . sigs) . body) (let ((args '())) `(,(map (lambda (s) (match s ((type sig . alist) (let* ((defid (cond ((assq 'id alist) => cadr) (else (signature->identifier sig type)))) (defid (and defid (->string defid)))) `(dt (@ (class "defsig") ,(if defid `(id (lit ,(quote-identifier (definition->identifier defid)))) '())) ,(let ((def-href (and defid (def->href defid)))) (let ((sig-span (compute-sig-shtml sig type))) (set! args (append (extract-var-args-from-shtml sig-span) args)) ;; horrible! (if def-href ;; Link to underlying node, when present. `(a (@ href ,def-href) ,sig-span) sig-span))) (span (@ (class type)) ,(->string type))))) (else (error "malformed defsig sig" s)))) sigs) (dd (@ (class "defsig")) (lit ,(fluid-let ((sig-args (append args sig-args))) ;; FIXME (walk body def-ss))))))) (else (error "malformed defsig" b))))))) (pre . ,(block "pre")) ; may need to quote contents (ul . ,(lambda (t b ul-ss) `("
    " ,(walk b `((li . ,(lambda (t b s) `("
  • " ,(walk b ul-ss) "
  • \n"))))) "
\n"))) (ol . ,(lambda (t b ol-ss) `("
    " ,(walk b `((li . ,(lambda (t b s) `("
  1. " ,(walk b ol-ss) "
  2. \n"))))) "
\n"))) (dl . ,(lambda (t b dl-ss) `("
" ,(walk b `((dt . ,(lambda (t b s) `("
" ,(walk b inline-ss) ;? "
\n"))) (dd . ,(lambda (t b s) `("
" ,(walk b dl-ss) "
"))))) "
\n"))) (tags . ,drop-tag) (toc . ,(lambda (t b s) (sxml->html (toc doc)))) (section . ,(lambda (t b s) (match b ((level title . body) (let ((H (list "h" (number->string level))) (id (cond ((section->identifier (text-content title)) => quote-identifier) (else #f)))) (list "<" H (if id `(" id=\"" ,id "\"") '()) ">" "" (walk title inline-ss) "" "" (walk body s)))) (else (error "malformed section" b))))) (table . ,(lambda (t b table-ss) ;; Table may be malformed as svnwiki-sxml just passes us the ;; raw HTML, so we drop bad tags. `("\n" ,(walk b `((tr . ,(lambda (t b s) `("" ,(walk b (let ((table-ss `((@ . ,drop-tag) . ,table-ss))) `((th . ,(lambda (t b s) `(""))) (td . ,(lambda (t b s) `(""))) (@ . ,drop-tag) (*default* . ,drop-tag-noisily)))) "\n"))) (@ . ,drop-tag) (*default* . ,drop-tag-noisily))) "
" ,(walk b table-ss) "" ,(walk b table-ss) "
\n"))) ;; colorize supports: ;; (lisp scheme elisp common-lisp c c++ java objective-c erlang python ruby haskell diff) ;; other suggested syntax names to support: ;; (javascript shell css html) (highlight . ,(lambda (t b s) ;; Note: currently in svnwiki-sxml, highlight only has 2 args and body cannot ;; be a tree, only a string. However in the future, highlighted code could contain ;; markup such as links. We would first convert the body to SHTML; colorizers ;; must therefore be prepared to accept SHTML, using text-content if they need ;; string input (like the colorize egg), or passing it through for prettify.js. ;; (Currently, we cannot transform to SHTML.) ;; lang #f not currently possible, as parser rewrites it to 'scheme; but it ;; should be handled here as meaning "figure it out" ;; syntax-highlighter returns: highlighted SHTML tree, or #f if highlighting failed ;; Highlighter should set 'highlight' class in tag, along with a class for ;; the particular highlighter used, such as colorize or prettyprint (prettify.js). (match b ((lang . body) (let ((lang (and lang (string->symbol (string-downcase (->string lang))))) (type 'pre)) (sxml->html (let ((H (syntax-highlighter))) (cond ((and H (H lang type body))) (else `(,(if lang `((lit "")) '()) (,type (@ (class "highlight")) ,body)))))))) (else (error "malformed highlight" b))))) ;; convert example contents to `(pre ...) and re-walk it ;; FIXME: The html-parser will erroneously parse html tags ;; inside tags. Right now we drop them, but we ;; should either not parse them in the first place or ;; convert them back here (less nice). Furthermore the parser ;; may unrecoverably screw up the structure of examples, for ;; example if it contains an

tag; therefore we drop unknown ;; tags to prevent a complete rendering error. (examples . ,(lambda (t b ex-ss) (walk b `((*default* . ,drop-tag-noisily) (example . ,(lambda (t b s) (walk `(pre ,(walk b `((init . ,(lambda (t b s) (list b "\n"))) (expr . ,(lambda (t b s) (walk b `((*default* . ,drop-tag-noisily))))) (result . ,(lambda (t b s) `("\n; Result: " ,b))) (*default* . ,drop-tag-noisily)))) ex-ss))))))) (blockquote . ,(block "blockquote")) (hr . ,(lambda (t b s) "
")) ,@inline-ss )))))) (define (sxml->html doc) (with-output-to-string (lambda () (SRV:send-reply (pre-post-order* doc `((*text* . ,(lambda (t b) ;; Default *text* does not quote symbols, chars, #s. (string->goodHTML (->string b)))) (lit *preorder* . ,(lambda (t b) b)) ;; should this tree->string? . ,universal-conversion-rules*)))))) ;; FIXME: Be sure to bench the performance with TOC on. (define (toc doc) `(div (@ (id "toc")) (h2 (@ class "toc") "TOC" " " (& "raquo")) (ul (@ class "toc") ;; set class for compatibility with browsers lacking CSS2 selectors . ,(sxml-walk doc `((*default* . ,(lambda (t b s) '())) (section . ,(lambda (t b s) (match b ((level title . body) (let ((child (remove null? (sxml-walk body s))) (id (cond ((section->identifier (text-content title)) => quote-identifier) (else #f)))) `(li ,(if id `(a (@ (href "#" ,id)) ,title) title) ,(if (null? child) '() `(ul . ,child))))))))))))) )