(module chicken-doc-html (chicken-doc-sxml->html tree->string quote-html text-content quote-identifier unquote-identifier definition->identifier signature->identifier syntax-highlighter colorize prettify-js) (import scheme chicken) (use (only sxml-transforms string->goodHTML SRV:send-reply pre-post-order* universal-conversion-rules*)) ;; temp, for toc (use matchable) (use (only data-structures conc ->string string-intersperse string-translate alist-ref)) (use (only ports with-output-to-string)) (use regex) (import irregex) (use (only extras sprintf)) (use (only srfi-13 string-downcase string-index)) (use (only srfi-1 remove)) (define (sxml-walk doc ss) (let ((default-handler (cond ((assq '*default* ss) => cdr) (else (lambda (t b s) (error 'sxml-walk "No default binding for" t))))) (text-handler (cond ((assq '*text* ss) => cdr) (else #f)))) (let loop ((doc doc)) (cond ((null? doc) '()) ((pair? doc) (let ((tag (car doc)) (body (cdr doc))) (if (symbol? tag) (let ((handler-cell (assq tag ss))) (if handler-cell ((cdr handler-cell) tag body ss) (default-handler tag body ss))) (map loop doc)))) (else (if text-handler (text-handler '*text* doc ss) doc)))))) (define (tree->string doc) (with-output-to-string (lambda () (SRV:send-reply doc)))) (define (quote-html s) (string->goodHTML s)) ;; Like sxpath // *text*. Beware, if your tags have arguments that ;; shouldn't be considered text, they will still be extracted. (define (text-content doc) (tree->string (sxml-walk doc `((*default* . ,(lambda (t b s) (sxml-walk b s))) (@ . ,(lambda (t b s) '())) (*text* . ,(lambda (t b s) b)))))) ;;; URI fragment (id=) handling for sections and definitions ;; Permitted characters in ID attributes in HTML < 5 are only A-Z a-z 0-9 : - _ ;; even though URI fragments are much more liberal. For compatibility, we ;; "period-encode" all other chars. (define +rx:%idfragment-escape+ (irregex "[^-_:A-Za-z0-9]")) (define +rx:%idfragment-unescape+ (irregex "\\.([0-9a-fA-F][0-9a-fA-F])")) ;; Encode raw identifier text string so it is usable as an HTML 4 ID attribute ;; (and consequently, as a URI fragment). (define (quote-identifier x) ; Not a good name; should prob. be encode-identifier (irregex-replace/all +rx:%idfragment-escape+ x (lambda (m) (sprintf ".~x" (char->integer (string-ref (irregex-match-substring m 0) 0)))))) ;; Decode period-encoded URI fragment (or ID attribute value). ;; Note that spaces were period-encoded, not converted to underscore, ;; so the transformation is reversible. (define (unquote-identifier x) (irregex-replace/all +rx:%idfragment-unescape+ x (lambda (m) (string (integer->char (string->number (irregex-match-substring m 1) 16)))))) ;; WARNING: Currently being used to both generate new ids for headers and ;; to figure out the id for an internal-link target. However the former may ;; distinuish duplicate IDs while the latter should ignore duplicates. ;; FIXME: Duplicate IDs will be generated for duplicate section or ;; definition names. A unique suffix is needed. (define (section->identifier x) (string-append "sec:" (string-translate x #\space #\_))) (define (definition->identifier x) (string-append "def:" x)) (define (section->href x) ;; Convert section name to internal fragment href. (if (string=? x "") "" (string-append "#" (quote-identifier (section->identifier x))))) (define (split-fragment link) ;; Split at first # (cond ((string-index link #\#) => (lambda (i) (cons (substring link 0 i) (substring link (+ i 1))))) ; don't include # (else (cons link "")))) (define (join-fragment href fragment) ;; Join with # (if (string=? fragment "") href (string-append href "#" fragment))) (use (only svnwiki-sxml svnwiki-signature->identifier)) (define signature->identifier svnwiki-signature->identifier) ;;; Syntax highlighting ;; Highlight SHTML body with LANG syntax and return SHTML or #f. TAG ;; is either PRE or CODE (currently only PRE) and indicates our ;; context; it's probably wise to ignore anything other than PRE ;; unless the highlighter is super-fast. Return SHTML *must* be ;; surrounded with TAG and *should* set 'highlight' class, along with ;; a class for the particular highlighter used, such as 'colorize' or ;; 'prettyprint' (prettify.js). #f return is considered "can't ;; highlight" and is currently reported as a warning; this is clunky and ;; we will probably downgrade it to ignore. ;; Highlighters must be prepared to accept SHTML, using text-content ;; if they require string input (like the colorize egg). (As we only highlight ;; plain strings without markup currently, this is for future compatibility.) ;; LANG will generally be the languages supported by the colorize egg ;; although you may accept additions. LANG #f is not currently ;; possible (the parser rewrites it to 'scheme) but should be handled ;; as meaning "figure it out if you can". (use colorize) ;; TODO: colorize should be autoloaded. (define colorize (lambda (lang tag body) (if (eq? tag 'pre) (and lang (coloring-type-exists? lang) `(,tag (@ (class "highlight colorize")) (lit ,(html-colorize lang (text-content body))))) `(,tag (@ (class "highlight")) ,body)))) (define prettify-js (lambda (lang tag body) (define (lang->ext L) (alist-ref L '(;; Support out of the box (c . c) (c++ . cpp) (java . java) (python . py) (ruby . rb) (javascript . js) (shell . sh) (html . html) (xhtml . xhtml) (xml . xml) ;; Supported via extensions (lisp . lisp) (elisp . el) (common-lisp . cl) (css . css) (sql . sql) (haskell . hs) (scheme . scm) ;; Unsupported, but supported by colorize (objective-c . #f) (erlang . #f) (diff . #f) ))) (if (not (eq? tag 'pre)) `(,tag (@ (class "highlight")) ,body) (cond ((lang->ext lang) => (lambda (ext) `(,tag (@ (class "highlight prettyprint lang-" ,ext)) ,body))) (else #f))))) (define syntax-highlighter (make-parameter colorize)) ;; Can be #f, which is equivalent to (constantly #f) ;;; HTML renderer (define +rx:wiki-man-page+ (irregex '(: (? "http://wiki.call-cc.org") (or "/man/4/" "/manual/") (submatch (+ any))))) (define +rx:wiki-egg-page+ (irregex '(: (? "http://wiki.call-cc.org") (or "/eggref/4/" "/egg/") (submatch (+ any))))) (define (chicken-doc-sxml->html doc path->href ; for internal links; make parameter? def->href ; link to definition node, or #f for no link man-filename->path ) (define (path+section->href p s) (string-append (path->href p) (section->href s))) (tree->string (let ((walk sxml-walk) (drop-tag (lambda (t b s) '())) (drop-tag-noisily (lambda (t b s) ;; Warning disabled as it just spams the logs; instead the ;; offender could be included in an HTML comment. ; (warning "dropped" (cons t b)) '())) (quote-text `(*text* . ,(lambda (t b s) (quote-html b))))) (letrec ((block (lambda (tag) (let ((open (conc "<" tag ">")) (close (conc "" tag ">"))) (lambda (t b s) (list open (walk b s) close))))) (inline (lambda (tag) (let ((open (conc "<" tag ">")) (close (conc "" tag ">"))) (lambda (t b s) (list open (walk b inline-ss) close))))) (inline-ss `( ,quote-text (*default* . ,drop-tag-noisily) ;; 500 error is annoying (b . ,(inline "b")) (i . ,(inline "i")) (tt . ,(inline "tt")) (sup . ,(inline "sup")) (sub . ,(inline "sub")) (small . ,(inline "small")) ;; questionable (big . ,(inline "big")) ;; questionable (img . ,drop-tag) (link . ,(lambda (t b s) ;; svnwiki-sxml does not return int-link for ;; call-cc.org links, so we must check that here. (define (process-resource R F) (cond ;; Wiki man page, link to corresponding man page ((string-match +rx:wiki-man-page+ R) => (lambda (m) (cond ((man-filename->path (cadr m)) => (lambda (p) (path+section->href p F))) (else "")))) ;; Wiki egg page, link to node ((string-match +rx:wiki-egg-page+ R) => (lambda (m) (path+section->href (list (cadr m)) F))) (else (join-fragment R F)))) (let ((do-link (lambda (link desc) ;; Caller must quote DESC. (let* ((S (split-fragment link)) (href (process-resource (car S) (cdr S)))) `("" ,desc ""))))) (match b ((link desc) (do-link link (walk desc inline-ss))) ((link) (do-link link (quote-html link))))))) (int-link . ,(lambda (t b s) (define (process-resource R F) ;; Returns: href ;; Usage of man-filename->path is barely tolerable. ;; Perhaps we should use the id cache. (cond ((string=? R "") ;; #fragments target section names in this doc. (section->href F)) ;; Wiki man page, link to corresponding man page, ;; or to a dummy URL if man page lookup fails. ((string-match +rx:wiki-man-page+ R) => (lambda (m) (cond ((man-filename->path (cadr m)) => (lambda (p) (path+section->href p F))) (else "")))) ;; Wiki egg page, link to node ((string-match +rx:wiki-egg-page+ R) => (lambda (m) (path+section->href (list (cadr m)) F))) ;; Unknown absolute path, link to wiki ((char=? (string-ref R 0) #\/) (join-fragment (string-append "http://wiki.call-cc.org" R) F)) ;; Relative path, try man page. Wiki links to ;; current directory (/man) but we can't. ((man-filename->path R) => (lambda (p) (path+section->href p F))) ;; Relative path, assume egg node. (else (path+section->href (list R) F)))) (let ((ilink (lambda (link desc) ;; Caller must quote DESC. (let* ((S (split-fragment link)) (href (process-resource (car S) (cdr S)))) `("" ,desc ""))))) (match b ((link desc) (ilink link (walk desc inline-ss))) ((link) (ilink link (quote-html link))))))) ))) (walk doc `( (p . ,(inline "p")) (def . ,(lambda (t b def-ss) `("
" ,(walk b table-ss) " | "))) (td . ,(lambda (t b s) `("" ,(walk b table-ss) " | "))) (@ . ,drop-tag) (*default* . ,drop-tag-noisily)))) "
---|