(module chicken-doc-parser (parse-and-write-tags/svnwiki) (import scheme chicken) (use regex) (import irregex) (use matchable) (use extras) (use ports) (define +identifier-tags+ (list "procedure" "macro" "read" "parameter" "record" "string" "class" "method" "constant")) ;; Match tag lines of the form "(abc def)" (define +rx:tag+ (irregex `(: bol "<" (submatch (or ,@+identifier-tags+)) ">" (submatch (?? (+ any))) "" (? "
") eol))) (define +rx:section+ (irregex '(: bol (submatch (** 2 5 "=")) (* space) (submatch (+ any)) eol))) (define +rx:code+ (irregex '(: "{{" (submatch (+ (~ "}"))) "}}"))) (define +rx:link+ (irregex '(: "[[" (submatch (+ (~ "]|"))) "|" (submatch (+ (~ "]|"))) "]]"))) (define +rx:nondescript-link+ (irregex '(: "[[" (submatch (+ (~ "]"))) "]]"))) (define +rx:wiki-command+ (irregex '(: bos "[[" (submatch (+ (~ "]:|"))) ":" (submatch (* (~ "]:|"))) "]]"))) (define +rx:enscript-either+ (irregex "]*>")) (define +rx:td-end+ (irregex '(or "" ""))) (define +rx:tr-begin+ (irregex "")) (define +rx:deltable+ (irregex '(or "" "" "" (: "<" (? "/") "table" (* (~ ">")) ">")))) (define +rx:ivanism+ (irregex '(: ":" eos))) ;; Convert signature (usually a list or bare identifier) into an identifier ;; At the moment, this just means taking the car of a list if it's a list, ;; or otherwise returning the read item. If it cannot be read as a ;; scheme expression, fail. (define (signature->identifier sig type) (condition-case (let ((L (with-input-from-string sig read))) (cond ((pair? L) (car L)) ((symbol? L) ;; SPECIAL HANDLING: handle e.g. MPI:init:: -> MPI:init. ;; Remove this once these signatures are normalized. ;; (Warning: usually read as keywords, if so symbol->string ;; will strip one : itself) (let ((str (irregex-replace +rx:ivanism+ (symbol->string L) ""))) (if str (string->symbol str) L))) (else sig))) ((exn) (warning "Could not parse signature" sig) #f))) (define (tag-line line) (match (string-search +rx:tag+ line) ((_ type signature) (list type signature (signature->identifier signature type))) (#f #f))) (define (section-line line) (match (string-search +rx:section+ line) ((_ num title) (list num title)) (#f #f))) ;; Read and parse svnwiki format text file at pathname (or port) FN and ;; write all tag groups using (WRITE-TAGS tags tag-body), where TAGS ;; is a list of (type signature identifier) records, and TAG-BODY ;; is a string containing the tag body for this tag group. Additionally, ;; a transformed wiki document will be written to port PARSED-OUT. ;; (This API must change if the parsing gets any more complex. We ;; would probably just return an SXML document.) (define (parse-and-write-tags/svnwiki fn-or-port write-tags parsed-out) ((if (port? fn-or-port) with-input-from-port with-input-from-file) fn-or-port (lambda () (let loop ((line (read-line)) (section 1) (tags '()) (tag-body '()) (where 'section)) (define (tag?) (pair? tags)) (cond ((eof-object? line) (when (tag?) (write-tags tags tag-body)) (void)) ((tag-line line) => (match-lambda ((type sig id) ;; NB Tag signatures are formatted and saved directly in ;; the tag body. This preserves context (all grouped signatures ;; will appear when one is referenced), especially important ;; for identifiers with multiple valid signatures. (let ((pretty-sig (sprintf "~a: ~a" type sig))) (display pretty-sig parsed-out) (newline parsed-out) (cond ((eq? where 'tag-header) (loop (read-line) section (cons (list type sig id) tags) (cons pretty-sig tag-body) where)) (else (when (tag?) (write-tags tags tag-body)) (loop (read-line) section (cons (list type sig id) '()) (cons pretty-sig '()) 'tag-header))))))) ((section-line line) => (match-lambda ((num title) (display line parsed-out) (newline parsed-out) (cond ((tag?) (write-tags tags tag-body) (loop (read-line) section '() '() 'section)) (else (loop (read-line) section tags tag-body 'section)))))) (else ;; Q&D replacements; won't work across line break, but that is rare (let* ((line (irregex-replace/all +rx:code+ line "`" 1 "`")) (line (irregex-replace/all +rx:link+ line 2 " (" 1 ")")) (line (irregex-replace/all +rx:wiki-command+ line "")) (line (irregex-replace/all +rx:nondescript-link+ line 1)) (line (irregex-replace/all +rx:enscript-either+ line "")) (line (irregex-replace/all +rx:td-end+ line "\t| ")) (line (irregex-replace/all +rx:tr-begin+ line "| ")) (line (irregex-replace/all +rx:deltable+ line ""))) (display line parsed-out) (newline parsed-out) (if (tag?) (loop (read-line) section tags (cons line tag-body) 'line) (loop (read-line) section tags tag-body 'line)))) ))))) ;; (check-all "~/scheme/chicken-wiki/eggref/4/sql-de-lite" (list "sql-de-lite")) ;; (check-all "~/scheme/cdoc/sql-de-lite.wiki") ;; (check-all "~/scheme/chicken-wiki/man/4/Unit posix" (list "posix")) ;; (signature->identifier "(prepared-cache-size n" 'procedure) ;; (parse-unit "Non-standard macros and special forms" "chicken") ;; (parse-unit "Locations" "chicken") )