;; ;; ;; A parser for mbox database files. ;; ;; Based on RFC 4155, "The application/mbox Media Type". ;; ;; Copyright 2010-2012 Ivan Raikov and the Okinawa Institute of Science ;; and Technology. ;; ;; This program is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; . (module mbox ( mbox-file->messages message? message-envelope message-headers message-body Input+.CoreABNF->Mbox Input+.CoreABNF->Mbox/UTF8 ) (import scheme chicken data-structures posix srfi-1 srfi-14) (require-extension typeclass input-classes) (require-library extras abnf abnf-consumers internet-message ) (import (prefix abnf abnf: ) (prefix abnf-consumers abnf: ) (only abnf CharLex->CoreABNF Input->Token Token->CharLex ) (only internet-message CoreABNF->InetMessage CoreABNF->InetMessage/UTF8) (only extras pp) ) (define-record-type message (make-message envelope headers body ) message? (envelope message-envelope ) (headers message-headers ) (body message-body ) ) (define-class ( M) mbox-envelope mbox-message-fields mbox-message mbox-file->messages ) ;; Unix ctime date and time specification ;; Parses a date and time specification of the form ;; ;; Tue Jan 5 18:29:55 2010 ;; Matches the abbreviated weekday names (define=> (day-name ) (abnf:alternatives (lit "Mon") (lit "Tue") (lit "Wed") (lit "Thu") (lit "Fri") (lit "Sat") (lit "Sun"))) ;; Matches a day-name (define=> (day-of-week ) (lambda (day-name) (abnf:bind-consumed-strings->list 'day-of-week (abnf:bind-consumed->string day-name)))) ;; Matches a four digit decimal number (define=> (year ) (abnf:bind-consumed-strings->list 'year (abnf:bind-consumed->string (abnf:repetition-n 4 decimal)))) ;; Matches the abbreviated month names (define=> (month-name ) (abnf:alternatives (lit "Jan") (lit "Feb") (lit "Mar") (lit "Apr") (lit "May") (lit "Jun") (lit "Jul") (lit "Aug") (lit "Sep") (lit "Oct") (lit "Nov") (lit "Dec"))) ;; Matches a month-name (define=> (month ) (lambda (month-name) (abnf:bind-consumed-strings->list 'month (abnf:bind-consumed->string month-name)))) ;; Matches a one or two digit number (define=> (day ) (abnf:bind-consumed-strings->list 'day (abnf:bind-consumed->string (abnf:variable-repetition 1 2 decimal)))) ;; Matches a two-digit number (define=> (hour ) (abnf:bind-consumed->string (abnf:repetition-n 2 decimal))) (define=> (minute ) (abnf:bind-consumed->string (abnf:repetition-n 2 decimal))) (define=> (isecond ) (abnf:bind-consumed->string (abnf:repetition-n 2 decimal))) ;; Matches a time-of-day specification of hh:mm or hh:mm:ss. (define=> (time-of-day ) (lambda (hour minute isecond) (abnf:bind-consumed-strings->list 'time-of-day (abnf:concatenation hour (abnf:drop-consumed (char #\:)) minute (abnf:optional-sequence (abnf:concatenation (abnf:drop-consumed (char #\:)) isecond)))))) (define=> (between-sp-drop ) (lambda (p) (abnf:concatenation (abnf:drop-consumed (abnf:repetition1 sp)) p (abnf:drop-consumed (abnf:repetition sp))))) (define=> (ctime ) (lambda (between-sp-drop day-of-week month day time-of-day year) (abnf:bind-consumed-pairs->list 'ctime (abnf:concatenation day-of-week (between-sp-drop month) day (between-sp-drop time-of-day) year)))) (define=> (abbrtime ) (lambda (between-sp-drop day-of-week day year month) (abnf:bind-consumed-pairs->list 'abbrtime (abnf:concatenation day-of-week (between-sp-drop day) year (between-sp-drop month))))) (define=> (mbox-message-fields ) (fields crlf: lf)) (define=> (address ) (abnf:bind-consumed-pairs->list 'address (abnf:alternatives addr-spec (abnf:bind-consumed-strings->list 'local-part (abnf:concatenation (abnf:bind-consumed->string (abnf:concatenation (abnf:repetition1 ftext))) (abnf:drop-consumed (abnf:repetition wsp))))))) (define=> (mbox-envelope ) (lambda (address ctime abbrtime) (abnf:bind-consumed-pairs->list 'envelope (abnf:concatenation (abnf:drop-consumed (abnf:concatenation (abnf:optional-sequence (char #\newline)) (lit "From ") )) address (abnf:alternatives ctime abbrtime) (abnf:drop-consumed lf))))) (define stream-consumed car) (define stream-rest cdr) (define (month->number month) (case (string->symbol month) ((Jan) 0) ((Feb) 1) ((Mar) 2) ((Apr) 3) ((May) 4) ((Jun) 5) ((Jul) 6) ((Aug) 7) ((Sep) 8) ((Oct) 9) ((Nov) 10) ((Dec) 11))) (define (lookup-def x lst) (let ((v (alist-ref x lst))) (and v (if (pair? (cdr v)) v (car v))))) (define (ctime->seconds lst) (let ((month (lookup-def 'month lst)) (day (lookup-def 'day lst)) (time-of-day (lookup-def 'time-of-day lst)) (year (lookup-def 'year lst))) (and month day time-of-day year (let ((month (month->number month)) (day (string->number day)) (time-of-day (map string->number time-of-day)) (year (string->number year))) (let ((t (list->vector (append time-of-day (list day month (- year 1900) 0 0 #f 0))))) (local-time->seconds t) ))))) (define (abbrtime->seconds lst) (let ((month (lookup-def 'month lst)) (day (lookup-def 'day lst)) (year (lookup-def 'year lst))) (and month day year (let ((month (month->number month)) (day (string->number day)) (year (string->number year))) (local-time->seconds (list->vector (append (list 0 0 0) (list day month (- year 1900) 0 0 #f 0)))))))) (define (make-mbox-envelope x) (let ((alst (and (pair? x) (eq? 'envelope (car x)) (cdr x)))) (and alst (let ((ctime (lookup-def 'ctime alst)) (abbrtime (lookup-def 'abbrtime alst))) (let ((time-seconds (cond (ctime (ctime->seconds ctime)) (abbrtime (abbrtime->seconds abbrtime)) (else #f)))) `((time-seconds ,time-seconds) . ,alst)))))) (define=> (mbox-message ) (lambda (mbox-envelope mbox-message-fields ) (lambda (s) (let* ((res (find (string->input-stream "\n\n") s)) (s1 (mbox-envelope identity error `(() ,(car res)))) (s2 (cadr res))) (and (pair? s1) (make-message (make-mbox-envelope (car (stream-consumed s1))) (lambda () (mbox-message-fields stream-consumed error `(() ,(car (stream-rest s1))))) (let ((parts (map (lambda (x) `(() ,(first x))) s2))) (lambda (#!key (mbox-message-body (lambda (sk fk s) (sk s)))) (concatenate (filter-map (lambda (part) (mbox-message-body stream-consumed (lambda _ #f) part)) parts)))) )))) )) (define=> (mbox-file->messages ) (lambda (mbox-message) (lambda (filename) (let* ((strm (file->input-stream filename 1048476)) (res (find (string->input-stream "\nFrom ") strm))) (let ((lst (cons (list (car res)) (cadr res)))) (filter-map (compose mbox-message car) lst) )) ))) (define (Input+.CoreABNF->Mbox II A) (let* ((M (CoreABNF->InetMessage A)) (between-sp-drop (between-sp-drop M)) (day-name (day-name M)) (day-of-week ((day-of-week M) day-name)) (day (day M)) (month-name (month-name M)) (month ((month M) month-name)) (year (year M)) (hour (hour M)) (minute (minute M)) (isecond (isecond M)) (time-of-day ((time-of-day M) hour minute isecond)) (ctime ((ctime M) between-sp-drop day-of-week month day time-of-day year)) (abbrtime ((abbrtime M) between-sp-drop day-of-week day year month)) (address (address M)) (mbox-message-fields (mbox-message-fields M)) (mbox-envelope ((mbox-envelope M) address ctime abbrtime)) (mbox-message ((mbox-message II) mbox-envelope mbox-message-fields )) (mbox-file->messages ((mbox-file->messages II) mbox-message)) ) (make- M mbox-envelope mbox-message-fields mbox-message mbox-file->messages ) )) (define (Input+.CoreABNF->Mbox/UTF8 II A) (let* ((M (CoreABNF->InetMessage/UTF8 A)) (between-sp-drop (between-sp-drop M)) (day-name (day-name M)) (day-of-week ((day-of-week M) day-name)) (day (day M)) (month-name (month-name M)) (month ((month M) month-name)) (year (year M)) (hour (hour M)) (minute (minute M)) (isecond (isecond M)) (time-of-day ((time-of-day M) hour minute isecond)) (ctime ((ctime M) between-sp-drop day-of-week month day time-of-day year)) (abbrtime ((abbrtime M) between-sp-drop day-of-week day year month)) (address (address M)) (mbox-message-fields (mbox-message-fields M)) (mbox-envelope ((mbox-envelope M) address ctime abbrtime)) (mbox-message ((mbox-message II) mbox-envelope mbox-message-fields )) (mbox-file->messages ((mbox-file->messages II) mbox-message)) ) (make- M mbox-envelope mbox-message-fields mbox-message mbox-file->messages ) )) )