;;
;;
;; A program to produce wiki pages from email form submissions
;; generated by the FormMail.pl script.
;;
;; Copyright 2008-2012 Ivan Raikov and the Okinawa Institute of
;; Science and Technology.
;;
;; This program is free software: you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; A full copy of the GPL license can be found at
;; .
;;
(import scheme chicken data-structures srfi-1)
(require-extension posix utils srfi-13 srfi-14)
(require-extension typeclass input-classes rb-tree formular byte-blob byte-blob-stream)
(require-extension fmt ssax getopt-long )
(require-library abnf internet-message mbox)
(import (only abnf
CharLex->CoreABNF Input->Token
Token->CharLex
)
(only internet-message CoreABNF->InetMessage/UTF8)
(only mbox Input+.CoreABNF->Mbox/UTF8)
)
(foreign-declare "#include ")
(define log10 (foreign-lambda double "log10" double))
(define lookup-def
(lambda (k lst . rest)
(let-optionals rest ((default #f))
(let ((v (alist-ref k lst eq? default)))
(and v (if (or (atom? v) (and (pair? v) (pair? (cdr v)))) v
(car v)))))))
(define (alist->rb-tree alst)
(define (s<= x y)
(cond ((string-ci< x y) -1)
((string-ci= x y) 0)
(else 1)))
(let* ((tree (make-ephemeral-map s<=))
(update! (tree 'put!)))
(for-each (lambda (x)
(let ((k (car x))
(v (cdr x)))
(update! k v)))
alst)
tree))
(define s+ string-append)
(define s$ string->symbol)
(define parser-error error)
(define (xml:str-handler fragment foll-fragment seed)
(if (string-null? foll-fragment) (cons fragment seed)
(cons* foll-fragment fragment seed)))
(define (entity-ref->string entity)
(case entity
((quot) "\"")
((apos) "'")
((amp) "&")
((lt) "<")
((gt) ">")
(else (error 'entity-ref->string "unknown entity " entity))))
(define (xml:read-string s)
(let loop ((lst (list)) (iport (open-input-string s)))
(if (eof-object? (peek-char iport))
(string-concatenate (reverse lst))
(let-values (((lst token) (ssax:read-char-data iport #t xml:str-handler lst)))
(let ((v (and (not (eof-object? token ))
(list (xml-token-kind token) (xml-token-head token)))))
(cond ((and (pair? v) (eq? (car v) 'ENTITY-REF))
(let ((entity (cadr v)))
(loop (cons (entity-ref->string entity) lst) iport)))
(else (loop lst iport))))))))
(define (string-take-alphanumeric str n)
(let loop ((i n) (lst (string->list str)) (res (list)))
(if (or (null? lst) (zero? i)) (list->string (reverse res))
(if (positive? i)
(loop (if (or (char-alphabetic? (car lst)) (char-numeric? (car lst))) (- i 1) i)
(cdr lst) (cons (car lst) res))))))
(define (string-drop-alphanumeric str n)
(let loop ((i n) (lst (string->list str)))
(cond ((null? lst) #f)
((zero? i) (list->string lst))
((positive? i)
(loop (if (or (char-alphabetic? (car lst)) (char-numeric? (car lst))) (- i 1) i)
(cdr lst))))))
(define (pp-submission/wiki id from-address time-seconds fields . rest)
(let-optionals rest ((include-fields #f) (field-limits #f) )
(let ((wiki-fields (filter-map identity
(if include-fields
(map (lambda (x) (assoc x fields)) include-fields)
fields))))
(print (s+ "== " id " ==\n"))
(for-each (lambda (field)
(if (>= (length field) 2)
(let ((section-limit (lookup-def (first field) field-limits)))
(let* ((section-title (->string (first field)))
(section-content (second field))
(section-content
(string-trim-both
(xml:read-string
(if (pair? section-content)
(list->string section-content)
section-content))
char-set:whitespace))
(section-overlimit
((lambda (x) (and (string? x) (string-split x "\n" #t)))
((lambda (x) (and section-limit (string-drop-alphanumeric x section-limit)))
section-content))))
(print (s+ "=== " section-title " ==="))
(print section-content)
(if section-overlimit
(begin
(print (s+ "==== " section-title " (over character limit) ===="))
(print section-overlimit)))
(print)))))
wiki-fields))))
(define (make-id i width id-prefix idnum )
(s+ id-prefix " " (fmt #f (pad-char #\0 (fit/left width idnum))) "-" (->string i)))
(define (pp-formular-tree/wiki tree . rest)
(let-optionals rest ((id-prefix "Form Submission") (id-order #f) (include-fields #f)
(exclude #f) (include #f) (field-limits #f) (after-date #f))
(let* ((keys (tree 'list-keys))
(order (or id-order
(and (positive? (length keys)) (inexact->exact (ceiling (log10 (length keys)))))
3)))
(if (null? keys) (error 'pp-formular-tree/text "empty list of forms"))
((tree 'foldi)
(lambda (from-address lst i)
(cond ((and exclude (member (s$ from-address) exclude)) i)
((or (not include) (member (s$ from-address) include))
(let loop ((lst lst))
(if (null? lst) i
(let* ((submission (cdr (last lst)))
(time-seconds (lookup-def 'time-seconds submission))
(fields (let recur ((fields (lookup-def 'fields submission)))
(cond ((and (pair? fields)
(pair? (car fields))
(symbol? (car (car fields))))
fields)
((pair? fields)
(recur (car fields)))
(else fields))))
)
(if (or (not after-date) (> time-seconds after-date ))
(let* ((width (if (positive? order) order 1))
(idnum (inexact->exact (- time-seconds 10e8)))
(id (let loop ((i 1) (id (make-id 1 width id-prefix idnum)))
(if (file-exists? id)
(loop (+ 1 i) (make-id (+ 1 i) width id-prefix idnum)) id))))
(if (> (length lst) 1)
(print "Multiple submissions by " from-address ": using submission from "
(seconds->string time-seconds)))
(with-output-to-port (open-output-file id)
(lambda ()
(pp-submission/wiki id from-address time-seconds fields
include-fields field-limits)))))
(+ i 1)))))
(else i))) 1)
)))
(define opt-defaults
`(
(order . 3)
(mbox-path . "mbox")
(title-prefix . "Form Submission")
(field-limits . "")
))
(define (defopt x)
(lookup-def x opt-defaults))
(define opt-grammar
`(
(exclude "specify a colon-separated list of entries to exclude (default is none)"
(value (required "FROM1:FROM2")
(transformer ,(lambda (x) (list (map string->symbol (string-split x ":")))))))
(include "specify a colon-separated list of entries to include (default is all)"
(value (required "FROM1:FROM2")
(transformer ,(lambda (x) (list (map string->symbol (string-split x ":")))))))
(after-date "omit entries before given date (in seconds)"
(value (required "N")
(predicate ,string->number)
(transformer ,string->number)))
(fields "specify a colon-separated list of fields to process (default is all fields)"
(value (required "FIELD1:FIELD2:...")
(transformer ,(lambda (x) (list (map string->symbol (string-split x ":")))))))
(flimits "specify a comma-separated list of fields and character limits (default is none )"
(value
(required "FIELD1:LIMIT1,...")
(default ,(defopt 'field-limits))
(transformer
,(lambda (x)
(let loop ((lst (string-split (->string x) ","))
(field-limits (list)))
(if (null? lst) field-limits
(let ((v (string-split (car lst) ":")))
(cond ((and (pair? v) (pair? (cdr v)))
(let ((field (car v)) (limit (cadr v)))
(let ((field-sym (s$ field))
(limit-num (string->number limit)))
(if (and (symbol? field-sym) (number? limit-num))
(loop (cdr lst) (cons (list field-sym limit-num) field-limits))
(error "invalid field:limit pair: " (list field limit))))))
(else (error "invalid field:limit list " lst))))))))))
(mbox-path ,(string-append "specify path to input mbox (default: " (defopt 'mbox-path) ")")
(value (required "PATH")
(default ,(defopt 'mbox-path))
))
(order ,(string-append "specify order of form id (default: " (->string (defopt 'order)) ")")
(value
(required "N")
(default ,(defopt 'order))
(transformer ,(lambda (x) (inexact->exact (string->number x))))))
(title-prefix ,(string-append "specify prefix for text page title (default: " (defopt 'title-prefix) ")")
(value (required "STRING")
(default ,(defopt 'title-prefix))))
(write-alist "write an alist representation to FILE"
(value (required "FILE")))
(read-alist "read an alist representation from FILE"
(value (required "FILE")))
(stream "use I/O interface based on streams")
(help (single-char #\h))
))
;; Use args:usage to generate a formatted list of options (from OPTS),
;; suitable for embedding into help text.
(define (form2wiki:usage)
(print "Usage: form2wiki [options...] operands ")
(newline)
(print "The following options are recognized: ")
(newline)
(print (parameterize ((indent 2) (width 32)) (usage opt-grammar)))
(exit 1))
;; Process arguments and collate options and arguments into OPTIONS
;; alist, and operands (filenames) into OPERANDS.
(define opts (getopt-long (command-line-arguments) opt-grammar))
(define opt (make-option-dispatch opts opt-grammar))
;; from SRFI-33, useful in splitting up the bit patterns used to
;; represent unicode values in utf8
(define-inline (extract-bit-field size position n)
(bitwise-and (bitwise-not (arithmetic-shift -1 size))
(arithmetic-shift n (- position))))
;; The following is borrowed from the utf8 library by Alex Shinn:
;; number of total bytes in a utf8 char given the 1st byte
(define utf8-start-byte-length
'#(
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 0x
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 1x
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 2x
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 3x
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 4x
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 5x
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 6x
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 7x
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 8x
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; 9x
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; ax
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ; bx
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ; cx
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ; dx
3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ; ex
4 4 4 4 4 4 4 4 5 5 5 5 6 6 0 0 ; fx
))
;; The following two routines are based on the read-utf8-char routine
;; from the utf8 library by Alex Shinn:
(define (byte-blob-char-car b)
(let ((b1 (fxand 255 (byte-blob-car b) )))
(let ((len (vector-ref utf8-start-byte-length b1)))
(if (<= len 1)
(integer->char b1)
(let loop ((res (extract-bit-field (- 7 len) 0 b1))
(i (- len 1)))
(if (zero? i)
(integer->char res)
(let ((b2 (fxand 255 (byte-blob-car b))))
(cond
((not (= #b10 (extract-bit-field 2 6 b2)))
(error 'byte-blob-char-car "invalid utf8 sequence"))
(else
(loop (bitwise-ior (arithmetic-shift res 6)
(bitwise-and #b00111111 b2))
(- i 1)))
))
))
)
)))
(define (byte-blob-char-cdr x)
(let ((b (fxand 255 (byte-blob-car x))))
(let ((n (vector-ref utf8-start-byte-length b)))
(if (fx= n 1) (byte-blob-cdr x)
(byte-blob-drop x n))
)))
(define (byte-blob-stream-char-car b)
(let ((b1 (fxand 255 (byte-blob-stream-car b) )))
(let ((len (vector-ref utf8-start-byte-length b1)))
(if (<= len 1)
(integer->char b1)
(let loop ((res (extract-bit-field (- 7 len) 0 b1))
(i (- len 1)))
(if (zero? i)
(integer->char res)
(let ((b2 (fxand 255 (byte-blob-stream-car b))))
(cond
((not (= #b10 (extract-bit-field 2 6 b2)))
(error 'byte-blob-stream-char-car "invalid utf8 sequence"))
(else
(loop (bitwise-ior (arithmetic-shift res 6)
(bitwise-and #b00111111 b2))
(- i 1)))
))
))
)
)))
(define (byte-blob-stream-char-cdr x)
(let ((b (fxand 255 (byte-blob-stream-car x))))
(let ((n (vector-ref utf8-start-byte-length b)))
(if (fx= n 1) (byte-blob-stream-cdr x)
(byte-blob-stream-drop x n))
)))
(define byte-blob-stream-
(make- byte-blob-stream-empty?
byte-blob-stream-char-car
byte-blob-stream-char-cdr))
(define byte-blob-stream-
(Input->Token byte-blob-stream-))
(define byte-blob-stream-
(Token->CharLex byte-blob-stream-))
(define byte-blob-stream-
(CharLex->CoreABNF byte-blob-stream-))
(define byte-blob-stream-
(CoreABNF->InetMessage/UTF8 byte-blob-stream- ))
(define byte-blob-stream-
(make- byte-blob-stream-
byte-blob-stream-find
(compose blob->byte-blob string->blob)
file->byte-blob-stream
))
(define byte-blob-stream-
(Input+.CoreABNF->Mbox/UTF8 byte-blob-stream-
byte-blob-stream-
))
(define byte-blob-
(make- byte-blob-empty?
byte-blob-char-car
byte-blob-char-cdr))
(define byte-blob-
(Input->Token byte-blob-))
(define byte-blob-
(Token->CharLex byte-blob-))
(define byte-blob-
(CharLex->CoreABNF byte-blob-))
(define byte-blob-
(CoreABNF->InetMessage/UTF8 byte-blob- ))
(define byte-blob-
(make- byte-blob-
byte-blob-find
(compose blob->byte-blob string->blob)
file->byte-blob
))
(define byte-blob-
(Input+.CoreABNF->Mbox/UTF8 byte-blob-
byte-blob-
))
(define (main options operands)
(if (opt 'help) (form2wiki:usage))
(let ((mbox-instance (if (opt 'stream) byte-blob-stream- byte-blob-))
(inet-message-instance (if (opt 'stream) byte-blob-stream- byte-blob-)))
(with-instance (( mbox-instance))
(let* ((mbox-messages->form-tree
(mbox-messages->form-tree inet-message-instance))
(tree (if (opt 'read-alist)
(alist->rb-tree (read (open-input-file (opt 'read-alist))))
(mbox-messages->form-tree
(mbox-file->messages (opt 'mbox-path))))))
(pp-formular-tree/wiki tree
(or (opt 'title-prefix) (defopt 'title-prefix))
(opt 'order)
(opt 'fields)
(opt 'exclude)
(opt 'include)
(opt 'flimits)
(opt 'after-date)
))))
)
(main opts (opt '@))