;;
;;
;; A parser for mbox database files.
;;
;; Based on RFC 4155, "The application/mbox Media Type".
;;
;; Copyright 2010-2011 Ivan Raikov and the Okinawa Institute of Science
;; and Technology.
;;
;; This program is free software: you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; A full copy of the GPL license can be found at
;; .
(module mbox
(
mbox-file->messages
message? message-envelope message-headers message-body
Input+.CoreABNF->Mbox
)
(import scheme chicken data-structures posix srfi-1 srfi-14)
(require-extension typeclass input-classes)
(require-library abnf abnf-consumers internet-message )
(import (prefix abnf abnf: )
(prefix abnf-consumers abnf: )
(only abnf
Token.CharLex->CoreABNF Input->Token
Token->CharLex
)
(only internet-message CoreABNF->InetMessage)
)
(define-record-type message
(make-message envelope headers body )
message?
(envelope message-envelope )
(headers message-headers )
(body message-body )
)
(define-class ( M)
mbox-envelope mbox-message-fields
mbox-message mbox-file->messages )
;; Unix ctime date and time specification
;; Parses a date and time specification of the form
;;
;; Tue Jan 5 18:29:55 2010
;; Matches the abbreviated weekday names
(define=> (day-name )
(abnf:alternatives
(lit "Mon")
(lit "Tue")
(lit "Wed")
(lit "Thu")
(lit "Fri")
(lit "Sat")
(lit "Sun")))
;; Matches a day-name
(define=> (day-of-week )
(lambda (day-name)
(abnf:bind-consumed-strings->list
'day-of-week
(abnf:bind-consumed->string day-name))))
;; Matches a four digit decimal number
(define=> (year )
(abnf:bind-consumed-strings->list 'year
(abnf:bind-consumed->string
(abnf:repetition-n 4 decimal))))
;; Matches the abbreviated month names
(define=> (month-name )
(abnf:alternatives
(lit "Jan")
(lit "Feb")
(lit "Mar")
(lit "Apr")
(lit "May")
(lit "Jun")
(lit "Jul")
(lit "Aug")
(lit "Sep")
(lit "Oct")
(lit "Nov")
(lit "Dec")))
;; Matches a month-name
(define=> (month )
(lambda (month-name)
(abnf:bind-consumed-strings->list 'month
(abnf:bind-consumed->string month-name))))
;; Matches a one or two digit number
(define=> (day )
(abnf:bind-consumed-strings->list 'day
(abnf:bind-consumed->string
(abnf:variable-repetition 1 2 decimal))))
;; Matches a two-digit number
(define=> (hour )
(abnf:bind-consumed->string (abnf:repetition-n 2 decimal)))
(define=> (minute )
(abnf:bind-consumed->string (abnf:repetition-n 2 decimal)))
(define=> (isecond )
(abnf:bind-consumed->string (abnf:repetition-n 2 decimal)))
;; Matches a time-of-day specification of hh:mm or hh:mm:ss.
(define=> (time-of-day )
(lambda (hour minute isecond)
(abnf:bind-consumed-strings->list 'time-of-day
(abnf:concatenation
hour (abnf:drop-consumed (char #\:))
minute (abnf:optional-sequence
(abnf:concatenation
(abnf:drop-consumed (char #\:))
isecond))))))
(define=> (between-sp-drop )
(lambda (p)
(abnf:concatenation
(abnf:drop-consumed (abnf:repetition1 sp))
p
(abnf:drop-consumed (abnf:repetition sp)))))
(define=> (ctime )
(lambda (between-sp-drop day-of-week month day time-of-day year)
(abnf:bind-consumed-pairs->list 'ctime
(abnf:concatenation
day-of-week
(between-sp-drop month)
day
(between-sp-drop time-of-day)
year))))
(define=> (abbrtime )
(lambda (between-sp-drop day-of-week day year month)
(abnf:bind-consumed-pairs->list 'abbrtime
(abnf:concatenation
day-of-week
(between-sp-drop day)
year
(between-sp-drop month)))))
(define=> (mbox-message-fields )
(fields crlf: lf))
(define=> (address )
(abnf:bind-consumed-pairs->list 'address
(abnf:alternatives
addr-spec
(abnf:bind-consumed-strings->list 'local-part
(abnf:concatenation
(abnf:bind-consumed->string
(abnf:concatenation
(abnf:repetition1 ftext)))
(abnf:drop-consumed
(abnf:repetition wsp)))))))
(define=> (mbox-envelope )
(lambda (address ctime abbrtime)
(abnf:bind-consumed-pairs->list 'envelope
(abnf:concatenation
(abnf:drop-consumed
(abnf:concatenation
(abnf:optional-sequence (char #\newline))
(lit "From ")
))
address
(abnf:alternatives ctime abbrtime)
(abnf:drop-consumed lf)))))
(define stream-consumed car)
(define stream-rest cdr)
(define (month->number month)
(case (string->symbol month)
((Jan) 0)
((Feb) 1)
((Mar) 2)
((Apr) 3)
((May) 4)
((Jun) 5)
((Jul) 6)
((Aug) 7)
((Sep) 8)
((Oct) 9)
((Nov) 10)
((Dec) 11)))
(define (lookup-def x lst)
(let ((v (alist-ref x lst)))
(and v (if (pair? (cdr v)) v (car v)))))
(define (ctime->seconds lst)
(let ((month (lookup-def 'month lst))
(day (lookup-def 'day lst))
(time-of-day (lookup-def 'time-of-day lst))
(year (lookup-def 'year lst)))
(and month day time-of-day year
(let ((month (month->number month))
(day (string->number day))
(time-of-day (map string->number time-of-day))
(year (string->number year)))
(let ((t (list->vector
(append time-of-day
(list day month (- year 1900)
0 0 #f 0)))))
(local-time->seconds t)
)))))
(define (abbrtime->seconds lst)
(let ((month (lookup-def 'month lst))
(day (lookup-def 'day lst))
(year (lookup-def 'year lst)))
(and month day year
(let ((month (month->number month))
(day (string->number day))
(year (string->number year)))
(local-time->seconds
(list->vector
(append (list 0 0 0)
(list day month (- year 1900)
0 0 #f 0))))))))
(define (make-mbox-envelope x)
(let ((alst (and (pair? x) (eq? 'envelope (car x)) (cdr x))))
(and alst
(let ((ctime (lookup-def 'ctime alst))
(abbrtime (lookup-def 'abbrtime alst)))
(let ((time-seconds
(cond (ctime (ctime->seconds ctime))
(abbrtime (abbrtime->seconds abbrtime))
(else #f))))
`((time-seconds ,time-seconds) . ,alst))))))
(define=> (mbox-message )
(lambda (mbox-envelope mbox-message-fields )
(lambda (s)
(let* ((res (find (string->input-stream "\n\n") s))
(s1 (mbox-envelope (compose stream-consumed car) error `(() ,(car res))))
(s2 (cadr res)))
(and (pair? s1)
(make-message
(make-mbox-envelope s1)
(lambda ()
(mbox-message-fields stream-consumed error
`(() ,(first (car s2)))))
(let ((parts (map (lambda (x) `(() ,(first x))) (cdr s2))))
(lambda (#!key (mbox-message-body (lambda (sk fk s) (sk s))))
(concatenate
(filter-map
(lambda (part) (mbox-message-body stream-consumed (lambda _ #f) part))
parts))))
))))
))
(define=> (mbox-file->messages )
(lambda (mbox-message)
(lambda (filename)
(let* ((strm (file->input-stream filename))
(res (find (string->input-stream "\nFrom ") strm)))
(filter-map (compose mbox-message car)
(cons (list (car res)) (cadr res)))))
))
(define (Input+.CoreABNF->Mbox II A)
(let* ((M (CoreABNF->InetMessage A))
(between-sp-drop (between-sp-drop M))
(day-name (day-name M))
(day-of-week ((day-of-week M) day-name))
(day (day M))
(month-name (month-name M))
(month ((month M) month-name))
(year (year M))
(hour (hour M))
(minute (minute M))
(isecond (isecond M))
(time-of-day ((time-of-day M) hour minute isecond))
(ctime ((ctime M) between-sp-drop day-of-week month day time-of-day year))
(abbrtime ((abbrtime M) between-sp-drop day-of-week day year month))
(address (address M))
(mbox-message-fields (mbox-message-fields M))
(mbox-envelope ((mbox-envelope M) address ctime abbrtime))
(mbox-message ((mbox-message II)
mbox-envelope mbox-message-fields ))
(mbox-file->messages ((mbox-file->messages II)
mbox-message))
)
(make- M mbox-envelope mbox-message-fields
mbox-message mbox-file->messages )
))
)