;; ;; ;; A program to produce wiki pages from email form submissions ;; generated by the FormMail.pl script. ;; ;; Copyright 2008-2013 Ivan Raikov and the Okinawa Institute of ;; Science and Technology. ;; ;; This program is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; . ;; (import scheme chicken data-structures srfi-1) (require-extension posix utils srfi-13 srfi-14) (require-extension typeclass input-classes rb-tree formular byte-blob byte-blob-stream) (require-extension fmt ssax getopt-long ) (require-library abnf internet-message mbox) (import (only abnf CharLex->CoreABNF Input->Token Token->CharLex ) (only internet-message CoreABNF->InetMessage/UTF8) (only mbox Input+.CoreABNF->Mbox/UTF8) ) (foreign-declare "#include ") (define log10 (foreign-lambda double "log10" double)) (define lookup-def (lambda (k lst . rest) (let-optionals rest ((default #f)) (let ((v (alist-ref k lst eq? default))) (and v (if (or (atom? v) (and (pair? v) (pair? (cdr v)))) v (car v))))))) (define s+ string-append) (define s$ string->symbol) (define parser-error error) (define (xml:str-handler fragment foll-fragment seed) (if (string-null? foll-fragment) (cons fragment seed) (cons* foll-fragment fragment seed))) (define (entity-ref->string entity) (case entity ((quot) "\"") ((apos) "'") ((amp) "&") ((lt) "<") ((gt) ">") (else (error 'entity-ref->string "unknown entity " entity)))) (define (xml:read-string s) (let loop ((lst (list)) (iport (open-input-string s))) (if (eof-object? (peek-char iport)) (string-concatenate (reverse lst)) (let-values (((lst token) (ssax:read-char-data iport #t xml:str-handler lst))) (let ((v (and (not (eof-object? token )) (list (xml-token-kind token) (xml-token-head token))))) (cond ((and (pair? v) (eq? (car v) 'ENTITY-REF)) (let ((entity (cadr v))) (loop (cons (entity-ref->string entity) lst) iport))) (else (loop lst iport)))))))) (define (string-take-alphanumeric str n) (let loop ((i n) (lst (string->list str)) (res (list))) (if (or (null? lst) (zero? i)) (list->string (reverse res)) (if (positive? i) (loop (if (or (char-alphabetic? (car lst)) (char-numeric? (car lst))) (- i 1) i) (cdr lst) (cons (car lst) res)))))) (define (string-drop-alphanumeric str n) (let loop ((i n) (lst (string->list str))) (cond ((null? lst) #f) ((zero? i) (list->string lst)) ((positive? i) (loop (if (or (char-alphabetic? (car lst)) (char-numeric? (car lst))) (- i 1) i) (cdr lst)))))) (define (pp-submission/wiki id from-address time-seconds fields . rest) (let-optionals rest ((include-fields #f) (field-limits #f) ) (let ((wiki-fields (filter-map identity (if include-fields (map (lambda (x) (assoc x fields)) include-fields) fields)))) (print (s+ "== " id " ==\n")) (for-each (lambda (field) (if (>= (length field) 2) (let ((section-limit (lookup-def (first field) field-limits))) (let* ((section-title (->string (first field))) (section-content (second field)) (section-content (string-trim-both (xml:read-string (if (pair? section-content) (list->string section-content) section-content)) char-set:whitespace)) (section-overlimit ((lambda (x) (and (string? x) (string-split x "\n" #t))) ((lambda (x) (and section-limit (string-drop-alphanumeric x section-limit))) section-content)))) (print (s+ "=== " section-title " ===")) (print section-content) (if section-overlimit (begin (print (s+ "==== " section-title " (over character limit) ====")) (print section-overlimit))) (print))))) wiki-fields)))) (define (make-id i width id-prefix idnum ) (s+ id-prefix " " (fmt #f (pad-char #\0 (fit/left width idnum))) "-" (->string i))) (define (pp-formular-tree/wiki tree . rest) (let-optionals rest ((id-prefix "Form Submission") (id-order #f) (include-fields #f) (exclude #f) (include #f) (field-limits #f) (after-date #f)) (let ((m (rb-tree-map s<=))) (with-instance (( m)) (let* ((keys (list-keys tree)) (order (or id-order (and (positive? (length keys)) (inexact->exact (ceiling (log10 (length keys))))) 3))) (if (null? keys) (error 'pp-formular-tree/text "empty list of forms")) ((foldi tree) (lambda (from-address lst i) (cond ((and exclude (member (s$ from-address) exclude)) i) ((or (not include) (member (s$ from-address) include)) (let loop ((lst lst)) (if (null? lst) i (let* ((submission (cdr (last lst))) (time-seconds (lookup-def 'time-seconds submission)) (fields (let recur ((fields (lookup-def 'fields submission))) (cond ((and (pair? fields) (pair? (car fields)) (symbol? (car (car fields)))) fields) ((pair? fields) (recur (car fields))) (else fields)))) ) (if (or (not after-date) (> time-seconds after-date )) (let* ((width (if (positive? order) order 1)) (idnum (inexact->exact (- time-seconds 10e8))) (id (let loop ((i 1) (id (make-id 1 width id-prefix idnum))) (if (file-exists? id) (loop (+ 1 i) (make-id (+ 1 i) width id-prefix idnum)) id)))) (if (> (length lst) 1) (print "Multiple submissions by " from-address ": using submission from " (seconds->string time-seconds))) (with-output-to-port (open-output-file id) (lambda () (pp-submission/wiki id from-address time-seconds fields include-fields field-limits))))) (+ i 1))) )) (else i))) 1) )) )) ) (define opt-defaults `( (order . 3) (mbox-path . "mbox") (title-prefix . "Form Submission") (field-limits . "") )) (define (defopt x) (lookup-def x opt-defaults)) (define opt-grammar `( (exclude "specify a colon-separated list of entries to exclude (default is none)" (value (required "FROM1:FROM2") (transformer ,(lambda (x) (list (map string->symbol (string-split x ":"))))))) (include "specify a colon-separated list of entries to include (default is all)" (value (required "FROM1:FROM2") (transformer ,(lambda (x) (list (map string->symbol (string-split x ":"))))))) (after-date "omit entries before given date (in seconds)" (value (required "N") (predicate ,string->number) (transformer ,string->number))) (fields "specify a colon-separated list of fields to process (default is all fields)" (value (required "FIELD1:FIELD2:...") (transformer ,(lambda (x) (list (map string->symbol (string-split x ":"))))))) (flimits "specify a comma-separated list of fields and character limits (default is none )" (value (required "FIELD1:LIMIT1,...") (default ,(defopt 'field-limits)) (transformer ,(lambda (x) (let loop ((lst (string-split (->string x) ",")) (field-limits (list))) (if (null? lst) field-limits (let ((v (string-split (car lst) ":"))) (cond ((and (pair? v) (pair? (cdr v))) (let ((field (car v)) (limit (cadr v))) (let ((field-sym (s$ field)) (limit-num (string->number limit))) (if (and (symbol? field-sym) (number? limit-num)) (loop (cdr lst) (cons (list field-sym limit-num) field-limits)) (error "invalid field:limit pair: " (list field limit)))))) (else (error "invalid field:limit list " lst)))))))))) (mbox-path ,(string-append "specify path to input mbox (default: " (defopt 'mbox-path) ")") (value (required "PATH") (default ,(defopt 'mbox-path)) )) (order ,(string-append "specify order of form id (default: " (->string (defopt 'order)) ")") (value (required "N") (default ,(defopt 'order)) (transformer ,(lambda (x) (inexact->exact (string->number x)))))) (title-prefix ,(string-append "specify prefix for text page title (default: " (defopt 'title-prefix) ")") (value (required "STRING") (default ,(defopt 'title-prefix)))) (write-alist "write an alist representation to FILE" (value (required "FILE"))) (read-alist "read an alist representation from FILE" (value (required "FILE"))) (stream "use I/O interface based on streams") (help (single-char #\h)) )) ;; Use args:usage to generate a formatted list of options (from OPTS), ;; suitable for embedding into help text. (define (form2wiki:usage) (print "Usage: form2wiki [options...] operands ") (newline) (print "The following options are recognized: ") (newline) (print (parameterize ((indent 2) (width 32)) (usage opt-grammar))) (exit 1)) ;; Process arguments and collate options and arguments into OPTIONS ;; alist, and operands (filenames) into OPERANDS. (define opts (getopt-long (command-line-arguments) opt-grammar)) (define opt (make-option-dispatch opts opt-grammar)) ;; from SRFI-33, useful in splitting up the bit patterns used to ;; represent unicode values in utf8 (define-inline (extract-bit-field size position n) (bitwise-and (bitwise-not (arithmetic-shift -1 size)) (arithmetic-shift n (- position)))) ;; The following is borrowed from the utf8 library by Alex Shinn: ;; number of total bytes in a utf8 char given the 1st byte (define utf8-start-byte-length '#( 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 0x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 1x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 2x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 3x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 4x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 5x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 6x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 7x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 8x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 9x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; ax 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; bx 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ; cx 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ; dx 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ; ex 4 4 4 4 4 4 4 4 5 5 5 5 6 6 0 0 ; fx )) ;; The following two routines are based on the read-utf8-char routine ;; from the utf8 library by Alex Shinn: (define (byte-blob-char-car b) (let ((b1 (fxand 255 (byte-blob-car b) ))) (let ((len (vector-ref utf8-start-byte-length b1))) (if (<= len 1) (integer->char b1) (let loop ((res (extract-bit-field (- 7 len) 0 b1)) (b (byte-blob-cdr b)) (i (- len 1))) (if (zero? i) (integer->char res) (let ((b2 (byte-blob-car b))) (cond ((not (= #b10 (extract-bit-field 2 6 b2))) (error 'byte-blob-char-car "invalid utf8 sequence")) (else (loop (bitwise-ior (arithmetic-shift res 6) (bitwise-and #b00111111 b2)) (byte-blob-cdr b) (- i 1))) )) )) ) ))) (define (byte-blob-char-cdr x) (let ((b (fxand 255 (byte-blob-car x)))) (let ((n (vector-ref utf8-start-byte-length b))) (if (fx= n 1) (byte-blob-cdr x) (byte-blob-drop x n)) ))) (define (byte-blob-stream-char-car b) (let ((b1 (fxand 255 (byte-blob-stream-car b) ))) (let ((len (vector-ref utf8-start-byte-length b1))) (if (<= len 1) (integer->char b1) (let loop ((res (extract-bit-field (- 7 len) 0 b1)) (i (- len 1))) (if (zero? i) (integer->char res) (let ((b2 (fxand 255 (byte-blob-stream-car b)))) (cond ((not (= #b10 (extract-bit-field 2 6 b2))) (error 'byte-blob-stream-char-car "invalid utf8 sequence")) (else (loop (bitwise-ior (arithmetic-shift res 6) (bitwise-and #b00111111 b2)) (- i 1))) )) )) ) ))) (define (byte-blob-stream-char-cdr x) (let ((b (fxand 255 (byte-blob-stream-car x)))) (let ((n (vector-ref utf8-start-byte-length b))) (if (fx= n 1) (byte-blob-stream-cdr x) (byte-blob-stream-drop x n)) ))) (define byte-blob-stream- (make- byte-blob-stream-empty? byte-blob-stream-char-car byte-blob-stream-char-cdr)) (define byte-blob-stream- (Input->Token byte-blob-stream-)) (define byte-blob-stream- (Token->CharLex byte-blob-stream-)) (define byte-blob-stream- (CharLex->CoreABNF byte-blob-stream-)) (define byte-blob-stream- (CoreABNF->InetMessage/UTF8 byte-blob-stream- )) (define byte-blob-stream- (make- byte-blob-stream- byte-blob-stream-find (compose blob->byte-blob string->blob) file->byte-blob-stream )) (define byte-blob-stream- (Input+.CoreABNF->Mbox/UTF8 byte-blob-stream- byte-blob-stream- )) (define byte-blob- (make- byte-blob-empty? byte-blob-char-car byte-blob-char-cdr)) (define byte-blob- (Input->Token byte-blob-)) (define byte-blob- (Token->CharLex byte-blob-)) (define byte-blob- (CharLex->CoreABNF byte-blob-)) (define byte-blob- (CoreABNF->InetMessage/UTF8 byte-blob- )) (define byte-blob- (make- byte-blob- byte-blob-find (compose blob->byte-blob string->blob) file->byte-blob )) (define byte-blob- (Input+.CoreABNF->Mbox/UTF8 byte-blob- byte-blob- )) (define (main options operands) (if (opt 'help) (form2wiki:usage)) (let ((mbox-instance (if (opt 'stream) byte-blob-stream- byte-blob-)) (inet-message-instance (if (opt 'stream) byte-blob-stream- byte-blob-))) (with-instance (( mbox-instance)) (let* ((mbox-messages->form-tree (mbox-messages->form-tree inet-message-instance)) (tree (if (opt 'read-alist) (alist->rb-tree (read (open-input-file (opt 'read-alist)))) (mbox-messages->form-tree (mbox-file->messages (opt 'mbox-path)))))) (pp-formular-tree/wiki tree (or (opt 'title-prefix) (defopt 'title-prefix)) (opt 'order) (opt 'fields) (opt 'exclude) (opt 'include) (opt 'flimits) (opt 'after-date) )))) ) (main opts (opt '@))