;; Copyright 2021 Idiomdrottning. AGPL—See COPYING for details. (module 7off (7off) (import scheme anaphora (chicken base) (chicken io) (chicken port) (chicken pathname) (chicken process-context) (chicken string) define-options lowdown matchable srfi-1 sxml-transforms sxpath utf8 utf8-srfi-13 srfi-42 srfi-69 strse uri-common) (define ((string-with-first-letter? letter) str) (and (string? str) (not (string-null? str)) (eq? letter (string-ref str 0)))) (define ((string-that-starts-with? prefix) str) (aand (string? str) (substring-index prefix str) (zero? it))) (define-options (7off 7off.scm) '((allow-wack-headers "Allow skipping header levels instead of enforcing tree layout." (single-char #\w)) (disable-warnings "Suppress warnings and lintings." (single-char #\q)) (help "Print this help text." (single-char #\h)) (input-file "Input file name." (single-char #\i) (value #t)) (output-file "Output file name." (single-char #\o) (value #t)) (default-alt "Alt text for verbatim blocks." (single-char #\a) (value #t) (default "Code")) (polish "Polish quotes and dashes." (single-char #\p)) (swap-urls "URL-swapping alist file" (value #t) (single-char #\s) (default #f)))) (enable-warnings (not disable-warnings)) (define awh (make-parameter allow-wack-headers)) (define swappable (make-parameter '())) (define (swap-url url) (alist-ref url (swappable) string=? url)) ;; These can be multi-char strings such as "« " and " »". (define open-quote "“") (define close-quote "”") (define warn-once (let ((warnings '())) (lambda (trigger str) (when (and trigger (not (member str warnings))) (warning str) (set! warnings (cons str warnings)))))) (define (bork . args) (with-output-to-port (current-error-port) (cut print (apply conc "Error: " args))) (exit 1)) (define (punctuated? str) (memv (string-ref str (sub1 (string-length str))) (string->list ".?¿!¡‽⸘:;…"))) (define wrap-nls (let ((last-seen "")) (lambda (lis) (let ((pref (cond ((and ((string-with-first-letter? #\>) last-seen) ((string-with-first-letter? #\>) (car lis))) (list (string-drop-right (car lis) 1) "\n")) ((and ((string-with-first-letter? #\=) last-seen) ((string-with-first-letter? #\>) (car lis))) '("")) ((and ((string-with-first-letter? #\#) (car lis)) (xlinks)) => (compose flatten reverse (cut cons "\n" <>))) ((and ((string-with-first-letter? #\=) (car lis)) (xlinks)) => (compose flatten reverse)) ((and ((string-with-first-letter? #\=) last-seen) ((string-with-first-letter? #\=) (car lis))) '("")) (else '("\n"))))) (set! last-seen (car lis)) (append pref lis '("\n")))))) (define (add-nl lis) (append lis '("\n"))) (define prep-nl (cut cons "\n" <>)) (define (heading tag body) (let ((level (car body))) (heading-linter level) (warn-once (< 3 level) "Documents with header levels 4 and above might be better to split up into sub pages.") (append (list (list-ref '("# " "## " "### " "" "" "") (sub1 level))) (cdr body) (list (if (or (> 4 level) (punctuated? (last body))) "" "."))))) (define (strip tag body) body) (define (softwrap lis) (map (lambda (e) (if (equal? e "\n") " " e)) lis)) (define (softwrap-und lis) (fold-right (lambda (e n) (let ((t (equal? e "\n"))) (if (and t (not (null? n)) (string? (car n)) (string-whitespace? (car n))) (cons " " (cdr n)) (cons (if t " " e) n)))) '() lis)) (define ((verbatim alt) tag body) `("```" ,alt "\n" ,@body "```")) (define (tree-map proc tree) (cond ((null? tree) '()) ((atom? tree) (proc tree)) (else (map (cut tree-map proc <>) tree)))) (define (tree-remove pred tree) (cond ((null? tree) '()) ((atom? tree) tree) (else (map (cut tree-remove pred <>) (remove pred tree))))) (define (string-whitespace? str) (every char-whitespace? (string->list str))) (define (xdr body) (drop-while (disjoin (compose not string?) string-whitespace?) body)) (define (strip-attributes body) (if (and (pair? body) (pair? (car body)) (eq? '@ (caar body))) (cdr body) body)) (define (html tag body) ;; FIXME ;; implement a href, img, dl/dt/dd, hr, br (case (car body) ((h1 h2 h3 h4 h5 h6) (wrap-nls (heading 'heading (cons (string->number (list->string (cdr (string->list (symbol->string (car body)))))) (cdr body))))) ((del) (append (cdr body) (make-list (add1 (string-count (apply conc (cdr body)) #\space)) "^W"))) ((p) (wrap-nls (softwrap-und (strip-attributes (cdr body))))) ((aside) (wrap-nls (softwrap-und (cdr body)))) ((td th) (cons "\t" (xdr body))) ((tr) (cons "\n" (xdr body))) ((table) (wrap-nls ((verbatim "Tab-separated values") #t (append (xdr body) '("\n"))))) (else (strip-attributes (cdr body))))) (define (insert-gt tag body) (if (and (not (null? body)) ((string-with-first-letter? #\>) (car body))) (cons* tag (conc ">" (car body)) (cdr body)) (cons* tag "> " body))) (define (blockquote tag body) (pre-post-order* body `((br . ,insert-gt) (paragraph . ,insert-gt) (*text* . ,strip) (*default* . ,cons)))) (define xlinks (let ((lis '("\n"))) (lambda args (if (null? args) (let ((ret lis)) (set! lis '("\n")) (if (null? (cdr ret)) #f ret)) (set! lis (cons (car args) lis)))))) (define rlinks (let ((ht (make-hash-table))) (match-lambda ((? string? arg) (if (hash-table-exists? ht arg) (hash-table-ref ht arg) (begin (warning (conc "Missing reference: " arg)) #f))) (al (hash-table-set! ht (tassv al 'ref) (tassv al 'label)))))) (define refs (let ((ht (make-hash-table))) (match-lambda ((? string? arg) (hash-table-exists? ht arg)) (al (hash-table-set! ht (tassv al 'label) #t))))) (define (maybe-backwrap body) (if (not (refs (tassv body 'ref))) (conc "[" (tassv body 'label) "]") (tassv body 'label))) (define (tassv alist . keys) (cond ((null? keys) #f) ((aand (assv (car keys) alist) (cadr it)) (apply conc (cdr (assv (car keys) alist)))) (else (apply tassv alist (cdr keys))))) (define ctx (let ((num 0)) (lambda args (if (null? args) (begin (set! num (add1 num)) (conc num ". ")) (set! num 0))))) (define (list-bindings prefix) `((paragraph *macro* . ,(compose softwrap-und strip)) (reference-location . ,(compose prep-nl ref-loc)) (item *macro* . ,(compose prep-nl softwrap-und (cut cons (prefix) <>) strip)))) (define (strip-all body) (pre-post-order-splice* body `((href . ,(constantly '())) (title . ,(constantly '())) (*text* . ,strip) (*default* . ,(lambda (tag body) (if body (map strip-all body) '())))))) (define (body-length stripped-body) (sum-ec (: str stripped-body) (if (string? str)) (string-length str))) (define (link-lift tag body) (let* ((tag-body (cons tag body)) (xl ((sxpath '(// explicit-link label)) tag-body)) (sb (strip-all tag-body))) (if (and (= 1 (length xl)) (< (body-length sb) (max 60 (* 2 (body-length (strip-all (cdar xl))))))) `(reference-location (label ,sb) ,@((sxpath '(// explicit-link href)) tag-body) (title ,sb)) (cons tag body)))) (define (stripw tag body) (warn-once #t "Nested lists flattened.") body) (define (strip-nested-lists tag body) (cons tag (pre-post-order-splice* body `((ordered-list . ,stripw) (bullet-list . ,stripw) (*text* . ,strip) (*default* . ,cons))))) (define (ref-loc tag body) `("=> " ,(swap-url (tassv body 'href)) " " ,(cond ((tassv body 'title) => identity) ((rlinks (tassv body 'label)) => identity) (else (tassv body 'label))))) (define (strip-yaml str) (if (and ((string-that-starts-with? "---\n") str) (substring-index "\n---\n" str)) (substring/shared str (+ 5 (substring-index "\n---\n" str))) str)) (define rel (let ((canon (string-append (current-directory) "/"))) (lambda (path . upd) (if (null? upd) (uri->string (uri-relative-to (uri-reference path) (uri-reference canon))) (set! canon path))))) (define (opposite quote) (if (string= quote open-quote) close-quote open-quote)) (define nonl-string? (conjoin string? (complement (cut string=? <> "\n")))) (define (textmerge tree) (cond ((null? tree) '()) ((atom? tree) tree) ((and (not (null? (cdr tree))) (nonl-string? (car tree)) (nonl-string? (cadr tree))) (textmerge (cons (string-append (car tree) (cadr tree)) (cddr tree)))) (else (cons (textmerge (car tree)) (textmerge (cdr tree)))))) (define (quote-polish-strings doc) (define qt (let ((qs 0)) (lambda (next) (set! qs (if (strse? next (: alpha)) 1 (add1 qs))) (string-append (if (odd? qs) open-quote close-quote) next)))) (define (sq before after) (string-append before (cond ((strse? after (or num "n " "til " "tis " "twas " "N " "Til " "Tis " "Twas ")) "’") ;;; I know 'til with apostrophe is ;;; incorrect but two wrongs don't make ;;; a right. The correct apostrophe for ;;; all elisions is right quote. ((strse? before (: alpha)) "’") ((strse? after (: alpha)) "‘") (else "’")) after)) (pre-post-order* (textmerge doc) `((verbatim *preorder* . ,cons) (code *preorder* . ,cons) (html-element *preorder* . ,cons) (*text* . ,(lambda (tag body) (if (string? body) (strse body (: "\"" (=> char (? any))) (qt char) (: (=> before (? any)) "'" (=> after (? (or (: word " ") any)))) (sq before after) " -- " " – " ;; unicode thinspace " --- " " — " (= 3 "-") "—" (= 2 "-") "–") body))) (*default* . ,cons)))) (define heading-linter (let ((hs 0)) (lambda (level) (unless (awh) (if (< (add1 hs) level) (if (= 0 hs) (bork "Starting at level " level " instead of 1 can be fine if are going to include at level " (sub1 level) ". Use --allow-wack-headers if that's what you're doing.") (bork "Skipped a header level from " hs " to " level "! This is bad. Use --allow-wack-headers to override.")) (set! hs level)))))) ; not (max hs level); should be last seen to work in all contexts (define (7off #!key (allow-wack-headers (awh)) (default-alt default-alt) (polish polish) (swap-urls swap-urls)) (awh allow-wack-headers) (swappable (aif swap-urls (with-input-from-file it read) '())) (for-each (o display ->string) (cdr (pre-post-order-splice* (pre-post-order* (pre-post-order-splice* (pre-post-order-splice* ((if polish quote-polish-strings identity) (markdown->sxml* (strse (strip-yaml (read-string)) (: "{% include_absolute '" ($ (*? nonl)) "' %}") (with-input-from-file (rel (m 1)) read-string)))) `((image . ,(lambda (tag body) (cons 'explicit-link body))) (reference-image . ,(lambda (tag body) (cons 'reference-link body))) (*text* . ,strip) (*default* . ,cons))) `((reference . ,(lambda (tag body) (refs body) (cons 'reference-location body))) (paragraph *preorder* . ,link-lift) (item *preorder* . ,link-lift) (bullet-list . ,strip-nested-lists) (ordered-list . ,strip-nested-lists) (*text* . ,strip) (*default* . ,cons))) `((reference-link . ,(lambda (tag body) (let ((body (map softwrap body))) (rlinks body) (maybe-backwrap body)))) (*text* . ,strip) (*default* . ,cons))) `((auto-link . ,(lambda (tag body) (cadar body))) (br . ,(lambda (tag body) (cons "\n" body))) (blockquote *macro* . ,blockquote) (bullet-list ,(list-bindings (constantly "* ")) . ,(compose flatten add-nl strip)) (ordered-list ,(list-bindings ctx) . ,(lambda (tag body) (ctx 'reset) (flatten (add-nl body)))) (code . ,strip) (emphasis . ,strip) (explicit-link . ,(lambda (tag body) (xlinks (conc "=> " (swap-url (tassv body 'href)) " " (tassv body 'title 'label) "\n")) (tassv body 'label))) (heading . ,(compose wrap-nls heading)) (hr . ,(constantly "\n")) (html-element *macro* . ,(compose (cut tree-map (lambda (t) (if ((conjoin symbol? (cut eq? 'html-element <>)) t) 'html-element-p t)) <>) (cut tree-remove (conjoin (string-with-first-letter? #\newline) string-whitespace?) <>) cons)) (html-element-p . ,html) (paragraph *macro* . ,(compose wrap-nls softwrap strip)) (reference-location . ,(compose wrap-nls ref-loc)) (strong . ,strip) (comment . ,(constantly '())) (verbatim . ,(compose wrap-nls (verbatim default-alt))) (*text* . ,strip) (*default* . ,cons))))) (awhen (xlinks) (for-each display (reverse it)))) (when (member (pathname-file (program-name)) '("7off" "7off.scm")) (awhen input-file (rel (conc (current-directory) "/") #t) (current-input-port (open-input-file (rel it)))) (awhen output-file (current-output-port (open-output-file (rel it)))) (7off)) )