;; ;; ;; A parser for mbox database files. ;; ;; Based on RFC 4155, "The application/mbox Media Type". ;; ;; Copyright 2010 Ivan Raikov and the Okinawa Institute of Science ;; and Technology. ;; ;; This program is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; . (module mbox (mbox-file->messages mbox-file->messages/byte-blob-stream message? message-envelope message-headers message-body) (import scheme chicken data-structures posix srfi-1 srfi-14) (require-extension byte-blob byte-blob-stream) (require-library abnf abnf-consumers internet-message ) (import (prefix abnf abnf:) (prefix abnf-consumers abnf:) (only internet-message addr-spec date-time fields ftext) ) (define-record-type message (make-message envelope headers body ) message? (envelope message-envelope ) (headers message-headers ) (body message-body ) ) ;; Unix ctime date and time specification ;; Parses a date and time specification of the form ;; ;; Tue Jan 5 18:29:55 2010 ;; Matches the abbreviated weekday names (define day-name (abnf:alternatives (abnf:lit "Mon") (abnf:lit "Tue") (abnf:lit "Wed") (abnf:lit "Thu") (abnf:lit "Fri") (abnf:lit "Sat") (abnf:lit "Sun"))) ;; Matches a day-name (define day-of-week (abnf:bind-consumed-strings->list 'day-of-week (abnf:bind-consumed->string day-name))) ;; Matches a four digit decimal number (define year (abnf:bind-consumed-strings->list 'year (abnf:bind-consumed->string (abnf:repetition-n 4 abnf:decimal)))) ;; Matches the abbreviated month names (define month-name (abnf:alternatives (abnf:lit "Jan") (abnf:lit "Feb") (abnf:lit "Mar") (abnf:lit "Apr") (abnf:lit "May") (abnf:lit "Jun") (abnf:lit "Jul") (abnf:lit "Aug") (abnf:lit "Sep") (abnf:lit "Oct") (abnf:lit "Nov") (abnf:lit "Dec"))) ;; Matches a month-name (define month (abnf:bind-consumed-strings->list 'month (abnf:bind-consumed->string month-name))) ;; Matches a one or two digit number (define day (abnf:bind-consumed-strings->list 'day (abnf:bind-consumed->string (abnf:variable-repetition 1 2 abnf:decimal)))) ;; Matches a two-digit number (define hour (abnf:bind-consumed->string (abnf:repetition-n 2 abnf:decimal))) (define minute (abnf:bind-consumed->string (abnf:repetition-n 2 abnf:decimal))) (define isecond (abnf:bind-consumed->string (abnf:repetition-n 2 abnf:decimal))) ;; Matches a time-of-day specification of hh:mm or hh:mm:ss. (define time-of-day (abnf:bind-consumed-strings->list 'time-of-day (abnf:concatenation hour (abnf:drop-consumed (abnf:char #\:)) minute (abnf:optional-sequence (abnf:concatenation (abnf:drop-consumed (abnf:char #\:)) isecond))))) (define (between-sp-drop p) (abnf:concatenation (abnf:drop-consumed (abnf:repetition1 abnf:sp)) p (abnf:drop-consumed (abnf:repetition abnf:sp)))) (define ctime (abnf:bind-consumed-pairs->list 'ctime (abnf:concatenation day-of-week (between-sp-drop month) day (between-sp-drop time-of-day) year))) (define abbrtime (abnf:bind-consumed-pairs->list 'abbrtime (abnf:concatenation day-of-week (between-sp-drop day) year (between-sp-drop month)))) (define address (abnf:bind-consumed-pairs->list 'address (abnf:alternatives addr-spec (abnf:bind-consumed-strings->list 'local-part (abnf:concatenation (abnf:bind-consumed->string (abnf:concatenation (abnf:repetition1 ftext))) (abnf:drop-consumed (abnf:repetition abnf:wsp))))))) (define mbox-envelope (abnf:bind-consumed-pairs->list 'envelope (abnf:concatenation (abnf:drop-consumed (abnf:concatenation (abnf:optional-sequence (abnf:char #\newline)) (abnf:lit "From ") )) address (abnf:alternatives ctime abbrtime) (abnf:drop-consumed abnf:lf)))) (define stream-consumed car) (define stream-rest cdr) (define-inline (byte->char b) (integer->char (fxand 255 b))) (define (byte-blob-stream->parser-stream d) (if (pair? d) `((() ,d)) (letrec ((d:empty? byte-blob-stream-empty?) (d:take byte-blob-stream-take) (d:drop byte-blob-stream-drop) (d:car byte-blob-stream-car) (d:cdr byte-blob-stream-cdr) (succ (lambda (x) (if (d:empty? (d:cdr x)) (list (list (byte->char (d:car x)))) (list (cons (byte->char (d:car x)) (d:cdr x)) succ)))) (strm (succ d))) `((() . ,strm))))) (define mbox-message-fields (fields crlf: abnf:lf)) (define (month->number month) (case (string->symbol month) ((Jan) 0) ((Feb) 1) ((Mar) 2) ((Apr) 3) ((May) 4) ((Jun) 5) ((Jul) 6) ((Aug) 7) ((Sep) 8) ((Oct) 9) ((Nov) 10) ((Dec) 11))) (define (lookup-def x lst) (let ((v (alist-ref x lst))) (and v (if (pair? (cdr v)) v (car v))))) (define (ctime->seconds lst) (let ((month (lookup-def 'month lst)) (day (lookup-def 'day lst)) (time-of-day (lookup-def 'time-of-day lst)) (year (lookup-def 'year lst))) (and month day time-of-day year (let ((month (month->number month)) (day (string->number day)) (time-of-day (map string->number time-of-day)) (year (string->number year))) (let ((t (list->vector (append time-of-day (list day month (- year 1900) 0 0 #f 0))))) (local-time->seconds t) ))))) (define (abbrtime->seconds lst) (let ((month (lookup-def 'month lst)) (day (lookup-def 'day lst)) (year (lookup-def 'year lst))) (and month day year (let ((month (month->number month)) (day (string->number day)) (year (string->number year))) (local-time->seconds (list->vector (append (list 0 0 0) (list day month (- year 1900) 0 0 #f 0)))))))) (define (make-mbox-envelope x) (let ((alst (and (pair? x) (eq? 'envelope (car x)) (cdr x)))) (and alst (let ((ctime (lookup-def 'ctime alst)) (abbrtime (lookup-def 'abbrtime alst))) (let ((time-seconds (cond (ctime (ctime->seconds ctime)) (abbrtime (abbrtime->seconds abbrtime)) (else #f)))) `((time-seconds ,time-seconds) . ,alst)))))) (define (make-mbox-message s) (let* ((res (byte-blob-stream-find (blob->byte-blob (string->blob "\n\n")) s)) (pre `((() ,(byte-blob-stream->list (car res) byte->char)))) (s1 (mbox-envelope identity pre)) (s2 (cadr res))) (and (pair? s1) (make-message (make-mbox-envelope (first (stream-consumed (car s1)))) (lambda () (mbox-message-fields (lambda (x) (stream-consumed (car x))) `((() . ,(stream-rest (car s1)))))) (let ((parts (map (compose (lambda (b) (byte-blob-stream->list b byte->char)) first) s2))) (lambda (#!key (mbox-message-body (lambda (cont s) (cont s)))) (concatenate (map (lambda (part) (concatenate (filter-map identity (map (compose (lambda (x) (and (pair? x) x)) stream-consumed ) (mbox-message-body identity `((() ,part))))))) parts)))) )))) (define (mbox-file->messages filename ) (let* ((strm (file->byte-blob-stream filename)) (res (byte-blob-stream-find (blob->byte-blob (string->blob "\nFrom ")) strm))) (filter-map (compose make-mbox-message car) (cons (list (car res)) (cadr res))))) (define (make-mbox-message/byte-blob-stream s) (let* ((res (byte-blob-stream-find (blob->byte-blob (string->blob "\n\n")) s)) (s1 (mbox-envelope identity (byte-blob-stream->parser-stream (car res)))) (s2 (cadr res))) (and (pair? s1) (make-message (make-mbox-envelope (first (stream-consumed (car s1)))) (lambda () (mbox-message-fields (lambda (x) (stream-consumed (car x))) `((() . ,(stream-rest (car s1)))))) (let ((parts (map (compose byte-blob-stream->parser-stream first) s2))) (lambda (#!key (mbox-message-body (lambda (cont s) (cont s)))) (concatenate (map (lambda (part) (concatenate (filter-map identity (map (compose (lambda (x) (and (pair? x) x)) stream-consumed ) (mbox-message-body identity part))))) parts)))) )))) (define (mbox-file->messages/byte-blob-stream filename ) (let* ((strm (file->byte-blob-stream filename)) (res (byte-blob-stream-find (blob->byte-blob (string->blob "\nFrom ")) strm))) (filter-map (compose make-mbox-message/byte-blob-stream car) (cons (list (car res)) (cadr res))))) )