;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; eMail Address Parsers ;;; ;;; email-address provides eMail address handling procedures for reading eMail ;;; addresses in RFC 822 format as well as reading lists of addresses, such as ;;; commonly found in a 'To:' header. ;;; ;;; The address specification that we conform to when reading and parsing ;;; addresses is taken from RFC 822, Section 6. ;;; http://tools.ietf.org/html/rfc822#section-6 ;;; ;;; The specification for the format of the 'To:' header is taken from ;;; RFC 822, Section 4.1. ;;; http://tools.ietf.org/html/rfc822#section-4.1 ;;; ;;; Some of the examples used for conformance testing have been drawn from ;;; RFC 3696, Section 3. ;;; http://tools.ietf.org/html/rfc3696#section-3 ;;; ;;; ;;; Copyright (C) 2014-2019, Andy Bennett ;;; All rights reserved. ;;; ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions are met: ;;; ;;; Redistributions of source code must retain the above copyright notice, this ;;; list of conditions and the following disclaimer. ;;; Redistributions in binary form must reproduce the above copyright notice, ;;; this list of conditions and the following disclaimer in the documentation ;;; and/or other materials provided with the distribution. ;;; Neither the name of the author nor the names of its contributors may be ;;; used to endorse or promote products derived from this software without ;;; specific prior written permission. ;;; ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ;;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ;;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE ;;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ;;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ;;; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ;;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ;;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ;;; POSSIBILITY OF SUCH DAMAGE. ;;; ;;; Andy Bennett , 2014/03/26 18:16 ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (module email-address (email-address email-address? email-address-group? email-address-list name name->string local-part local-part->string local-part->string-ci domain-part domain-part->string addr-spec->string route mailboxes) (import chicken scheme) ; Units - http://api.call-cc.org/doc/chicken/language (use srfi-1 extras srfi-14 srfi-13 data-structures) ; Eggs - http://wiki.call-cc.org/chicken-projects/egg-index-4.html (use comparse records) ; EMAIL-ADDRESS ADTs ; EMAIL-ADDRESS (define email-address-fields '(name local-part domain-part route)) (define email-address-rtd (make-record-type 'EMAIL-ADDRESS email-address-fields)) (define (new-email-address) (apply (record-constructor email-address-rtd email-address-fields) (make-list (length email-address-fields) '()))) (define email-address? (record-predicate email-address-rtd)) (define-record-printer (EMAIL-ADDRESS e out) (fprintf out "#" (name e) (local-part e) (domain-part e) (route e))) (define (set e field value) ((record-modifier email-address-rtd field) e value)) (define (set-addr-spec e x) (set e 'local-part (first x)) (set e 'domain-part (second x))) (define (mailbox->email-address name route-addr) (let ((e (new-email-address))) (assert-phrase name) (assert-route-addr route-addr) (set e 'name name) (let ((route (first route-addr)) (addr-spec (second route-addr))) (assert-route route) (assert-addr-spec addr-spec) (set e 'route route) (set-addr-spec e addr-spec)) e)) (define (addr-spec->email-address addr-spec) (assert-addr-spec addr-spec) (let ((e (new-email-address))) (set-addr-spec e addr-spec) e)) ; EMAIL-ADDRESS-GROUP (define email-group-fields '(name mailboxes)) (define email-group-rtd (make-record-type 'EMAIL-ADDRESS-GROUP email-group-fields)) (define make-email-group (record-constructor email-group-rtd '(name mailboxes))) (define email-address-group? (record-predicate email-group-rtd)) (define-record-printer (EMAIL-ADDRESS-GROUP g out) (fprintf out "#" (name g) (mailboxes g))) (define (group->email-group name mailboxes) (assert (list? mailboxes)) (assert-phrase name) (make-email-group name mailboxes)) ; ADT Accessors (define (name address-or-group) (assert (or (email-address? address-or-group) (email-address-group? address-or-group)) "Not a structure of the required type") ((record-accessor (cond ((email-address? address-or-group) email-address-rtd) ((email-address-group? address-or-group) email-group-rtd)) 'name) address-or-group)) (define local-part (record-accessor email-address-rtd 'local-part)) (define domain-part (record-accessor email-address-rtd 'domain-part)) (define route (record-accessor email-address-rtd 'route)) (define mailboxes (record-accessor email-group-rtd 'mailboxes)) ; PARSERS ; fIorz's separated-by parser : (define (separated-by sep-parser field-parser) (sequence* ((head field-parser) (tail (zero-or-more (preceded-by sep-parser field-parser)))) (result (cons head tail)))) ; RFC 822, Section 3.3 : http://tools.ietf.org/html/rfc822#section-3.3 ; ; ucs-range->char-set lower and upper bounds define a half-open range ; [LOWER,UPPER). The ranges in RFC822 are inclusive. (define CHAR (in (ucs-range->char-set 0 128))) ; 0 -> 127 ; char-set:ascii? (define ALPHA (in (char-set-union (ucs-range->char-set 65 91) ; 65 -> 90 (ucs-range->char-set 97 123)))) ; 97 -> 122 (define DIGIT (in (ucs-range->char-set 48 58))) ; 48 -> 57 (define CTL (in (char-set-union (ucs-range->char-set 0 32) ; 0 -> 31 (ucs-range->char-set 127 128)))) ; 127 (define CR (in (ucs-range->char-set 13 14))) ; 13 (define LF (in (ucs-range->char-set 10 11))) ; 10 (define SPACE (in (ucs-range->char-set 32 33))) ; 32 (define HTAB (in (ucs-range->char-set 9 10))) ; 9 (define DQUOTE (in (ucs-range->char-set 34 35))) ; 34 (define CRLF (sequence CR LF)) (define LWSP-char (any-of SPACE HTAB)) ; 1*([CRLF] LWSP-char) (define linear-white-space (one-or-more (any-of (sequence CRLF LWSP-char) LWSP-char))) (define specials (in (string->char-set "()<>@,;:\\\".[]"))) ; text ; ; => atoms, specials, comments and quoted-strings are NOT recognized. ; Preserves case (RFC 822, Section 3.4.7). (define text (none-of* CRLF CHAR)) ; atom ; 1* (define atom (as-string (one-or-more (none-of* specials SPACE CTL CHAR)))) (define BSLASH (in (string->char-set "\\"))) ; qtext ; , "\" & CR, and including linear-white-space>. ; Preserves case (RFC 822, Section 3.4.7). (define qtext (any-of linear-white-space (none-of* DQUOTE BSLASH CR CHAR))) ; quoted-pair ; "\" CHAR ; Preserves case (RFC 822, Section 3.4.7). (define quoted-pair (sequence* ((_ (char-seq "\\")) (char CHAR)) (result char))) ; quoted-string ; <"> *(qtext/quoted-pair) <"> (define quoted-string (sequence* ((_ DQUOTE) (string (as-string (zero-or-more (any-of qtext quoted-pair)))) (_ DQUOTE)) (result string))) (define BRACKETS (in (string->char-set "[]"))) ; dtext ; Preserves case (define dtext (any-of linear-white-space (none-of* BRACKETS BSLASH CR CHAR))) ; domain-literal ; "[" *(dtext / quoted-pair) "]" (define domain-literal (sequence (char-seq "[") (zero-or-more (any-of dtext quoted-pair)) (char-seq "]"))) (define PARENS (in (string->char-set "()"))) ; ctext ; Preserves case (RFC 822, Section 3.4.7). (define ctext (any-of linear-white-space (none-of* PARENS BSLASH CR CHAR))) ; comment ; From RFC 822, Section 3.4.2 : http://tools.ietf.org/html/rfc822#section-3.4.3 ; When a comment acts as the delimiter between a sequence of two lexical ; symbols, such as two atoms, it is lexically equivalent with a single ; SPACE, for the purposes of regenerating the sequence, such as when ; passing the sequence onto a mail protocol server. (define comment (recursive-parser (sequence (char-seq "(") (zero-or-more (any-of ctext quoted-pair comment)) (char-seq ")")))) ; delimiters ; These do not require a general parser as specific delimiters are required in ; specific circumstances. We use the delimited-by parser to parse the correct ; delimiter and the whitespace and comments are are allowed around it. ;(define delimiters (any-of specials linear-white-space comment)) ; word (define word (any-of atom quoted-string)) ; phrase ; 1*word ; Sequence of words ; Assume words can be separated by whitespace that will be preserved. ; Use linear-white-space as per RFC 822, Section 3.1.1 (define phrase (separated-by (one-or-more (any-of linear-white-space comment)) word)) (define (assert-phrase x) (assert (list? x))) ; RFC 822, Section 3.1.4 : http://tools.ietf.org/html/rfc822#section-3.1.4 ; Support whitespace around delimiters (define (delimited-by delimiter) (sequence* ((_ (zero-or-more (any-of linear-white-space comment))) (delimiter (char-seq delimiter)) (_ (zero-or-more (any-of linear-white-space comment)))) (result delimiter))) ; RFC 822, Section 6. : http://tools.ietf.org/html/rfc822#section-6 ; ; local-part ; Preserves case (RFC 822, Section 3.4.7). ; word *("." word) (define local-part-parser (separated-by (delimited-by ".") word)) ; domain-ref (define domain-ref atom) ; sub-domain ; domain-ref / domain-literal (define sub-domain (any-of domain-ref domain-literal)) ; domain ; sub-domain *("." sub-domain) (define domain (separated-by (delimited-by ".") sub-domain)) ; addr-spec ; local-part "@" domain ; Result: '(local-part domain-part) (define addr-spec (sequence* ((local-part local-part-parser) (_ (delimited-by "@")) (domain-part domain)) (result (list local-part domain-part)))) (define (assert-addr-spec x) (assert (list? x)) (assert (= 2 (length x)))) ; route ; 1#("@" domain) ":" (define route-parser (sequence* ((domains (separated-by (delimited-by ",") (maybe (preceded-by (delimited-by "@") domain)))) (_ (delimited-by ":"))) (let ((domains (remove not domains))) (if (null? domains) fail (result domains))))) (define (assert-route x) (assert (list x))) ; route-addr ; "<" [route] addr-spec ">" ; Result: '(route addr-spec) (define route-addr (sequence* ((_ (delimited-by "<")) (route (maybe route-parser)) (addr-spec addr-spec) (_ (delimited-by ">"))) (result (list (or route '()) addr-spec)))) (define (assert-route-addr x) (assert (list? x)) (assert (= 2 (length x)))) ; mailbox ; addr-spec / phrase route-addr ; addr-spec ; Result: EMAIL-ADDRESS (define mailbox-1 (sequence* ((addr-spec addr-spec)) (result (addr-spec->email-address addr-spec)))) ; phrase route-addr ; Result: EMAIL-ADDRESS (define mailbox-2 (sequence* ((phrase phrase) (_ (zero-or-more linear-white-space)) (route-addr route-addr)) (result (mailbox->email-address phrase route-addr)))) (define mailbox (any-of mailbox-1 mailbox-2)) ; group ; phrase ":" [#mailbox] ";" ; Result: EMAIL-ADDRESS-GROUP (define group (sequence* ((phrase phrase) (_ (delimited-by ":")) (mailboxes (separated-by (delimited-by ",") (maybe ; support null elements mailbox))) (_ (delimited-by ";"))) (let ((mailboxes (remove not mailboxes))) (result (group->email-group phrase mailboxes))))) ; address ; mailbox / group (define address (any-of mailbox group)) (define one-address (sequence* ((address address) (_ (none-of item))) ; end of data (result address))) ; Serialisers (define (is-only parser part) (parse (sequence parser (none-of item)) part)) (define (quote-string str) (list->string (reverse (cons #\" (string-fold (lambda (v s) (cond ((is-only (any-of DQUOTE BSLASH CR) (->string v)) (cons* v #\\ s)) ((is-only CHAR (->string v)) (cons v s)) (else (abort (conc "Illegal character in string: " v))))) '(#\") str))))) (define (quote-words words) (map (lambda (word) (if (is-only atom word) word (quote-string word))) words)) (define (name->string e) (let ((parts (quote-words (name e)))) (string-intersperse parts " "))) (define (local-part->string e) (let ((local-part (quote-words (local-part e)))) (string-intersperse local-part "."))) (define (local-part->string-ci e) (let ((local-part (quote-words (map string-downcase (local-part e))))) (string-intersperse local-part "."))) (define (quote-domain-parts parts) (map (lambda (part) (if (is-only sub-domain part) part (abort (conc "Illegal character in domain: " part)))) parts)) (define (domain-part->string e) (let ((domain-part (quote-domain-parts (map string-downcase (domain-part e))))) (string-intersperse domain-part "."))) (define (addr-spec->string e) (conc (local-part->string e) "@" (domain-part->string e))) ; API (define (email-address str-port-or-seq) (receive (result rest) (parse one-address str-port-or-seq) result)) ; A list of addresses separated by commas and or whitespace, potentially with ; unparsable trailing data. (define (email-address-list str-port-or-seq) (parse (sequence* ((addresses (separated-by (any-of (delimited-by ",") linear-white-space CR LF) (maybe ; support null elements address)))) (let ((addresses (remove not addresses))) (result addresses))) str-port-or-seq)) )