;; ;; ;; A program to produce plain text output from email form submissions ;; generated by the FormMail.pl script. ;; ;; Copyright 2008-2013 Ivan Raikov and the Okinawa Institute of ;; Science and Technology. ;; ;; This program is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; . ;; (import scheme chicken data-structures srfi-1 ) (require-extension posix utils utf8 utf8-srfi-13 utf8-srfi-14) (require-extension typeclass input-classes rb-tree formular byte-blob byte-blob-stream) (require-extension fmt ssax getopt-long ) (require-library abnf internet-message mbox) (import (only abnf CharLex->CoreABNF Input->Token Token->CharLex ) (only internet-message CoreABNF->InetMessage/UTF8) (only mbox Input+.CoreABNF->Mbox/UTF8) ) (foreign-declare "#include ") (define log10 (foreign-lambda double "log10" double)) (define lookup-def (lambda (k lst . rest) (let-optionals rest ((default #f)) (let ((v (alist-ref k lst eq? default))) (and v (if (or (atom? v) (and (pair? v) (pair? (cdr v)))) v (car v))))))) (define s+ string-append) (define parser-error error) (define (xml:str-handler fragment foll-fragment seed) (if (string-null? foll-fragment) (cons fragment seed) (cons* foll-fragment fragment seed))) (define (entity-ref->string entity) (case entity ((quot) "\"") ((apos) "'") ((amp) "&") ((lt) "<") ((gt) ">") (else (error 'entity-ref->string "unknown entity " entity)))) (define (xml:read-string s) (let loop ((lst (list)) (iport (open-input-string s))) (if (eof-object? (peek-char iport)) (string-concatenate (reverse lst)) (let-values (((lst token) (ssax:read-char-data iport #t xml:str-handler lst))) (let ((v (and (not (eof-object? token )) (list (xml-token-kind token) (xml-token-head token))))) (cond ((and (pair? v) (eq? (car v) 'ENTITY-REF)) (let ((entity (cadr v))) (loop (cons (entity-ref->string entity) lst) iport))) (else (loop lst iport)))))))) (define (pp-submission/text id from-address time-seconds fields . rest) (let-optionals rest ((include-fields #f)) (let ((text-fields (filter-map identity (if include-fields (map (lambda (x) (assoc x fields)) include-fields) fields)))) (print id) (for-each (lambda (field) (if (>= (length field) 2) (let* ((section-title (->string (first field))) (section-content (second field)) (section-content (string-trim-both (xml:read-string (if (pair? section-content) (list->string section-content) section-content)) char-set:whitespace))) (display (s+ section-title ": " section-content)) (print)))) text-fields)))) (define (make-id i width id-prefix idnum ) (s+ id-prefix " " (fmt #f (pad-char #\0 (fit/left width idnum))) "-" (->string i))) (define (pp-formular-tree/text tree . rest) (let-optionals rest ((id-prefix "Form Submission") (id-order #f) (include-fields #f) (exclude #f) (after-time #f)) (let ((m (rb-tree-map s<=))) (with-instance (( m)) (let* ((id-list (list)) (keys (list-keys tree)) (order (or id-order (and (positive? (length keys)) (inexact->exact (ceiling (log10 (length keys))))) 3)) ) (if (null? keys) (error 'pp-formular-tree/text "empty list of forms")) ((foldi tree) (lambda (from-address lst i) (cond ((and exclude (member (string->symbol from-address) exclude)) i) (else (let loop ((lst lst)) (if (null? lst) i (let* ((submission (cdr (last lst))) (time-seconds (lookup-def 'time-seconds submission)) (fields (let recur ((fields (lookup-def 'fields submission))) (cond ((and (pair? fields) (pair? (car fields)) (symbol? (car (car fields)))) fields) ((pair? fields) (recur (car fields))) (else fields)))) ) (if (or (not after-time) (> time-seconds after-time)) (let* ((width (if (positive? order) order 1)) (idnum (inexact->exact (- time-seconds 10e8))) (id (let loop ((v 0) (id (make-id 1 width id-prefix idnum))) (let ((hv (member id id-list))) (if hv (loop (+ v 1) (make-id (+ 1 v) width id-prefix idnum)) (begin (set! id-list (cons id id-list)) id)))))) (if (> (length lst) 1) (print "Multiple submissions by " from-address ": using submission from " (seconds->string time-seconds))) (pp-submission/text id from-address time-seconds fields include-fields))) (+ i 1) )) )) )) 1)) )) )) (define opt-defaults `( (order . 3) (mbox-path . "mbox") (prefix . "Form Submission") )) (define (defopt x) (lookup-def x opt-defaults)) (define opt-grammar `( (exclude "specify a colon-separated list of entries to exclude (default is none)" (value (required "FROM1:FROM2") (transformer ,(lambda (x) (list (map string->symbol (string-split x ":"))))))) (after-date "omit entries before given date (in seconds)" (value (required "N") (predicate ,string->number) (transformer ,string->number))) (fields "specify a colon-separated list of fields to process (default is all fields)" (value (required "FIELD1:FIELD2:...") (transformer ,(lambda (x) (list (map string->symbol (string-split x ":"))))))) (mbox-path ,(string-append "specify path to input mbox (default: " (defopt 'mbox-path) ")") (value (required "PATH") (default ,(defopt 'mbox-path)) )) (order ,(string-append "specify order of form id (default: " (->string (defopt 'order)) ")") (value (required "N") (default ,(defopt 'order)) (transformer ,(lambda (x) (inexact->exact (string->number x)))))) (prefix ,(string-append "specify prefix for text page title (default: " (defopt 'prefix) ")") (value (required "STRING") (default ,(defopt 'prefix)))) (write-alist "write an alist representation to FILE" (value (required "FILE"))) (read-alist "read an alist representation from FILE" (value (required "FILE"))) (stream "use I/O interface based on streams") (help (single-char #\h)) )) ;; Use args:usage to generate a formatted list of options (from OPTS), ;; suitable for embedding into help text. (define (form2txt:usage) (print "Usage: form2txt [options...] operands ") (newline) (print "The following options are recognized: ") (newline) (print (parameterize ((indent 5)) (usage opt-grammar))) (exit 1)) ;; Process arguments and collate options and arguments into OPTIONS ;; alist, and operands (filenames) into OPERANDS. (define opts (getopt-long (command-line-arguments) opt-grammar)) (define opt (make-option-dispatch opts opt-grammar)) ;; from SRFI-33, useful in splitting up the bit patterns used to ;; represent unicode values in utf8 (define-inline (extract-bit-field size position n) (bitwise-and (bitwise-not (arithmetic-shift -1 size)) (arithmetic-shift n (- position)))) ;; The following is borrowed from the utf8 library by Alex Shinn: ;; number of total bytes in a utf8 char given the 1st byte (define utf8-start-byte-length '#( 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 0x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 1x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 2x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 3x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 4x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 5x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 6x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 7x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 8x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 9x 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; ax 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; bx 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ; cx 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ; dx 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ; ex 4 4 4 4 4 4 4 4 5 5 5 5 6 6 0 0 ; fx )) ;; The following two routines are based on the read-utf8-char routine ;; from the utf8 library by Alex Shinn: (define (byte-blob-char-car b) (let ((b1 (fxand 255 (byte-blob-car b) ))) (let ((len (vector-ref utf8-start-byte-length b1))) (if (<= len 1) (integer->char b1) (let loop ((res (extract-bit-field (- 7 len) 0 b1)) (b (byte-blob-cdr b)) (i (- len 1))) (if (zero? i) (integer->char res) (let ((b2 (byte-blob-car b))) (cond ((not (= #b10 (extract-bit-field 2 6 b2))) (error 'byte-blob-char-car "invalid utf8 sequence")) (else (loop (bitwise-ior (arithmetic-shift res 6) (bitwise-and #b00111111 b2)) (byte-blob-cdr b) (- i 1))) )) )) ) ))) (define (byte-blob-char-cdr x) (let ((b (fxand 255 (byte-blob-car x)))) (let ((n (vector-ref utf8-start-byte-length b))) (if (fx= n 1) (byte-blob-cdr x) (byte-blob-drop x n)) ))) (define (byte-blob-stream-char-car b) (let ((b1 (fxand 255 (byte-blob-stream-car b) ))) (let ((len (vector-ref utf8-start-byte-length b1))) (if (<= len 1) (integer->char b1) (let loop ((res (extract-bit-field (- 7 len) 0 b1)) (i (- len 1))) (if (zero? i) (integer->char res) (let ((b2 (fxand 255 (byte-blob-stream-car b)))) (cond ((not (= #b10 (extract-bit-field 2 6 b2))) (error 'byte-blob-stream-char-car "invalid utf8 sequence")) (else (loop (bitwise-ior (arithmetic-shift res 6) (bitwise-and #b00111111 b2)) (- i 1))) )) )) ) ))) (define (byte-blob-stream-char-cdr x) (let ((b (fxand 255 (byte-blob-stream-car x)))) (let ((n (vector-ref utf8-start-byte-length b))) (if (fx= n 1) (byte-blob-stream-cdr x) (byte-blob-stream-drop x n)) ))) (define byte-blob-stream- (make- byte-blob-stream-empty? byte-blob-stream-char-car byte-blob-stream-char-cdr)) (define byte-blob-stream- (Input->Token byte-blob-stream-)) (define byte-blob-stream- (Token->CharLex byte-blob-stream-)) (define byte-blob-stream- (CharLex->CoreABNF byte-blob-stream-)) (define byte-blob-stream- (CoreABNF->InetMessage/UTF8 byte-blob-stream- )) (define byte-blob-stream- (make- byte-blob-stream- byte-blob-stream-find (compose blob->byte-blob string->blob) file->byte-blob-stream )) (define byte-blob-stream- (Input+.CoreABNF->Mbox/UTF8 byte-blob-stream- byte-blob-stream- )) (define byte-blob- (make- byte-blob-empty? byte-blob-char-car byte-blob-char-cdr)) (define byte-blob- (Input->Token byte-blob-)) (define byte-blob- (Token->CharLex byte-blob-)) (define byte-blob- (CharLex->CoreABNF byte-blob-)) (define byte-blob- (CoreABNF->InetMessage/UTF8 byte-blob- )) (define byte-blob- (make- byte-blob- byte-blob-find (compose blob->byte-blob string->blob) file->byte-blob )) (define byte-blob- (Input+.CoreABNF->Mbox/UTF8 byte-blob- byte-blob- )) (define (main options operands) (if (opt 'help) (form2txt:usage)) (let ((mbox-instance (if (opt 'stream) byte-blob-stream- byte-blob-)) (inet-message-instance (if (opt 'stream) byte-blob-stream- byte-blob-)) (m (rb-tree-map s<=))) (with-instance (( mbox-instance) ( m m.)) (let* ((mbox-messages->form-tree (mbox-messages->form-tree inet-message-instance)) (tree (if (opt 'read-alist) (alist->rb-tree (read (open-input-file (opt 'read-alist)))) (let ((messages (mbox-file->messages (opt 'mbox-path)))) (mbox-messages->form-tree messages) ))) ) (if (opt 'write-alist) (let ((oport (open-output-file (opt 'write-alist)))) (display "(" oport) ((m.for-each-ascending tree) (lambda (x) (let ((x1 (cons (car x) (map (lambda (lst) (let* ((submission (cdr lst)) (time-seconds (if submission (lookup-def 'time-seconds submission) (error 'form2txt "invalid submission" lst))) (fields (let recur ((fields (lookup-def 'fields submission))) (cond ((and (pair? fields) (pair? (car fields)) (symbol? (car (car fields)))) fields) ((pair? fields) (recur (car fields))) (else fields)))) (pf (lambda (f) (and (pair? f) (pair? (cdr f)) (if (string? (second f)) f (list (first f) (list->string (second f)))))))) `(submission (time-seconds ,time-seconds) (fields ,(filter-map pf fields))))) (cdr x))))) (pp x1 oport)))) (display ")" oport) )) (pp-formular-tree/text tree (or (opt 'prefix) (defopt 'prefix)) (opt 'order) (opt 'fields) (opt 'exclude) (opt 'after-date) )) )) ) (main opts (opt '@))