;; ;; ;; A set of routines to read and extract fields from email form ;; submissions generated by the FormMail.pl script. ;; ;; Copyright 2008-2010 Ivan Raikov and the Okinawa Institute of ;; Science and Technology. ;; ;; This program is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; . ;; (module formular (form-delim-start form-delim-end field-delim form-field form mbox-messages->form-tree) (import scheme chicken data-structures srfi-1 srfi-14) (import (only srfi-13 string-ci< string-ci= string-trim-both)) (require-extension mbox rb-tree ) (require-library abnf abnf-consumers ) (import (prefix abnf abnf:) (prefix abnf-consumers abnf:) ) (define consumed-objects-lift-any (abnf:consumed-objects-lift (abnf:consumed-objects identity))) (define (char-list-titlecase x) (if (null? x) x (cons (char-upcase (car x)) (map char-downcase (cdr x))))) ;; construct symbols from consumed chars (define consumed-chars->tsymbol (abnf:consumed-chars->list (compose string->symbol list->string char-list-titlecase abnf:trim-ws-char-list))) ;; shortcut for (abnf:bind consumed-chars->tsymbol (abnf:longest ... )) (define-syntax bind-consumed->tsymbol (syntax-rules () ((_ p) (abnf:bind consumed-chars->tsymbol (abnf:longest p))) )) (define field-delim (make-parameter #\:)) (define form-delim-start (make-parameter "---------------------------------------------------------------------------")) (define form-delim-end (make-parameter "---------------------------------------------------------------------------")) (define (formpar p) (let ((v (p))) (cond ((char? v) (abnf:char v)) ((string? v) (abnf:lit v)) (else (error 'formpar "parameter must be one of char or string" v))))) (define (ftext) (abnf:set (char-set-difference char-set:graphic (let ((v (field-delim))) (cond ((char? v) (char-set v)) ((string? v) (abnf:set-from-string v)) (else v)))))) (define (field-name) (bind-consumed->tsymbol (abnf:repetition1 (ftext)))) ;;; Matches any non-control US-ASCII or extended character except for \ and " (define char-set:quoted (char-set-difference (char-set-union char-set:printing (ucs-range->char-set 128 255)) (char-set #\\ #\"))) (define qtext (abnf:set char-set:quoted)) ;; Matches either qtext or quoted-pair (define qcontent (abnf:repetition1 (abnf:alternatives qtext abnf:quoted-pair))) ;; Matches any number of qcontent between double quotes. (define quoted-string (abnf:bind-consumed->list (abnf:concatenation (abnf:drop-consumed abnf:dquote) (abnf:longest qcontent) (abnf:drop-consumed abnf:dquote)))) (define unstructured (abnf:bind-consumed->list (abnf:repetition (abnf:alternatives (abnf:set (char-set-union (ucs-range->char-set 128 255) char-set:graphic (char-set #\space #\tab))) (abnf:drop-consumed (abnf:lit "\n ")))))) (define field-value (abnf:alternatives quoted-string unstructured)) (define (form-field) (abnf:bind (consumed-objects-lift-any) (abnf:concatenation (field-name) (abnf:drop-consumed (abnf:concatenation (formpar field-delim) (abnf:repetition lwsp))) field-value (abnf:drop-consumed (abnf:repetition lwsp))))) (define lwsp (abnf:set-from-string " \r\n\t")) (define (form) (abnf:bind-consumed-pairs->list (abnf:concatenation (abnf:drop-consumed (abnf:concatenation (abnf:repetition lwsp) (formpar form-delim-start) (abnf:repetition lwsp))) (abnf:repetition (form-field))))) ;; ;; Given a list of messages returned by the file->messages procedure ;; from the mbox library, returns an ordered dictionary structure, ;; where the key is the email address of the form sender, and the ;; value is the list of all forms submitted by that sender. The API of ;; the tree object follows that of the e.g. treap and rb-tree ;; libraries. ;; (define (lookup-def x lst) (let ((v (alist-ref x lst))) (and v (if (pair? (cdr v)) v (car v))))) (define (mbox-messages->form-tree messages) (define (s<= x y) (cond ((string-ci< x y) -1) ((string-ci= x y) 0) (else 1))) (define (subm< x y) (let ((x-seconds (lookup-def 'time-seconds (cdr x))) (y-seconds (lookup-def 'time-seconds (cdr y)))) (< x-seconds y-seconds))) (let loop ((tree (make-rb-tree s<=)) (msgs messages)) (let ((lookup (tree 'get)) (update (tree 'put))) (if (null? msgs) tree (let ((message (car msgs))) (let ((envelope (message-envelope message)) (headers (message-headers message)) (body (message-body message))) (let ((time-seconds (lookup-def 'time-seconds envelope)) (from-address (let ((address (lookup-def 'address envelope))) (string-append (string-trim-both (lookup-def 'local-part address) char-set:whitespace) "@" (string-trim-both (lookup-def 'domain address) char-set:whitespace))))) (let ((exists (lookup from-address #f)) (submission `(submission (time-seconds ,time-seconds) (fields . ,(body mbox-message-body: (form)))))) (if exists (loop (update from-address (merge (list submission) (cdr exists) subm<)) (cdr msgs)) (loop (update from-address (list submission)) (cdr msgs))))))) )))) )