;;;; locale-posix.scm ;;; Kon Lovett, Dec '05 ;; Issues ;; ;; - Does not interact w/ setlocale or tzset ;; ;; - If LC_ALL or LANG is not set but any LC_* is set then (current-locale) ;; will still be #f, while some locale-categories will be valued (module locale-posix (;export ; seconds->h:m:s-string ; make-posix-timezone ; posix-timezone-string->timezone-components posix-locale-string->locale-components gnu-language-string->locale-components ; posix-load-timezone posix-load-locale gnu-load-locale) (import chicken scheme) (require-extension srfi-1 srfi-13 regex data-structures files locale-categories locale-components) (declare (fixnum) (inline) (no-procedure-checks) ) ;;; (define-constant SEC/HR 3600) (define-constant SEC/MIN 60) ;; (define (nonnull-getenv varnam) (let ((str (getenv varnam))) (and (string? str) (not (string-null? str)) str ) ) ) ;;; Utility (define (seconds->h:m:s-string secs) (let* ((asecs (abs secs)) (rsecs (remainder asecs SEC/HR)) ) (conc (if (negative? secs) #\- #\+) (quotient asecs SEC/HR) #\: (quotient rsecs SEC/MIN) #\: (remainder rsecs SEC/MIN)) ) ) (define (make-posix-timezone dst-tzn dst-off std-tzn std-off) (string-append dst-tzn (seconds->h:m:s-string dst-off) std-tzn (seconds->h:m:s-string std-off)) ) ;;; Timezone ;; Splits an IEEEÊStdÊ1003.1-2001 TZ specifier string into components. ;; ;; Returns a timezone components object or #f, indicating a parse error. ;; ;; - doesn't handle implementation defined entries ;; ;; - cannot differentiate a relative timezone filename that is a valid ;; timezone specifier (define parse-posix-literal-timezone (let ((name-re (regexp "(^[^<:][^0-9,+-]+)|^<([^>]+)>")) (offset-re (regexp "^(\\+|-)?([0-9]+)(:[0-9]+)?(:[0-9]+)?")) ; For compatibility with System V Release 3.1, a semicolon (`;') may be ; used to separate the rule from the rest of the specification. ; Allow it to separate the "to DST" & "from DST" segments since no harm, no foul. (date-re (regexp "^[;,]([JM])?([0-9]+)(\\.[0-9]+)?(\\.[0-9]+)?")) (time-re (regexp "^/([0-9]+)(:[0-9]+)?(:[0-9]+)?")) (+0200hrs+ (* 2 SEC/HR)) ) (lambda (tz str) (let ((strpos 0) (strend (string-length str)) ) (letrec ( (fail (lambda (msg) (error 'parse-posix-literal-timezone msg str) ) ) (next-match (lambda (re) (and-let* ((ml (string-search re str strpos))) (set! strpos (+ strpos (string-length (car ml)))) ml ) ) ) (all-parsed (lambda () (or (<= strend strpos) (fail "bad timezone format") ) ) ) (parse-number (lambda (numstr) (cond ((not numstr) 0 ) ((char-numeric? (string-ref numstr 0)) (string->number numstr) ) (else (fail "bad timezone number") ) ) ) ) (parse-delmcomp (lambda (numstr delm) (parse-number (if (not (and numstr (string-prefix? delm numstr))) numstr (string-trim numstr (string-ref delm 0)) ) ) ) ) (parse-timecomp (lambda (numstr) (parse-delmcomp numstr ":")) ) (parse-daterulecomp (lambda (numstr) (parse-delmcomp numstr ".")) ) (hms->offset (lambda (sgnstr hms-lst) (and-let* ((hr (parse-number (car hms-lst))) (mn (parse-timecomp (cadr hms-lst))) (sc (parse-timecomp (caddr hms-lst))) ) (let ((secs (+ (* hr SEC/HR) (* mn SEC/MIN) sc))) (if (and sgnstr (string=? sgnstr "-")) (- secs) secs)) ) ) ) (decode-dst-rule (lambda (rulstr dat-lst off) ; Must begin w/ a valid integer. Interpreted later. (and-let* ((n1 (parse-number (car dat-lst)))) (cond ((not rulstr) ;Julian Leap rule (make-timezone-dst-rule-julian-leap n1 off) ) ; select rule kind & interpret rest of match (else (case (string-ref rulstr 0) ((#\J) ; Julian No-Leap rule (make-timezone-dst-rule-julian-noleap n1 off) ) ((#\M) ; Date (and-let* ((n (parse-daterulecomp (cadr dat-lst))) (d (parse-daterulecomp (caddr dat-lst))) ) (make-timezone-dst-rule-mwd n1 n d off) ) ) (else (fail "unknown timezone DST rule type") ) ) ) ) ) ) ) (parse-dst-rule (lambda (key) (and-let* ((d-m (next-match date-re))) ; Time component is optional & defaults to 02:00:00 (let* ((t-m (next-match time-re)) (off (if t-m (hms->offset #f (cdr t-m)) +0200hrs+)) ) (set-timezone-component! tz key (decode-dst-rule (cadr d-m) (cddr d-m) off)) #t ) ) ) ) (dst-parse (lambda () ; DST section is optional (let ((n-m (next-match name-re))) (or (not n-m) ; Offset is optional & defaults to 1hr (let* ((o-m (next-match offset-re)) (off (if o-m (hms->offset (cadr o-m) (cddr o-m)) ;XXX What does "ahead" mean? (+ (timezone-component-ref tz 'std-offset) SEC/HR) ) ) ) (set-timezone-component! tz 'dst-name (cadr n-m)) (set-timezone-component! tz 'dst-offset off) ; Rule, if present, must be complete (if (parse-dst-rule 'dst-start) (parse-dst-rule 'dst-end) #t ) ) ) ) ) ) (std-parse (lambda () ; Must have name & offset components (let ((n-m (next-match name-re))) (cond ((not n-m) (fail "bad timezone STD name") ) (else (let ((o-m (next-match offset-re))) (cond ((not o-m) (fail "bad timezone STD offset") ) (else (set-timezone-component! tz 'std-name (cadr n-m)) (set-timezone-component! tz 'std-offset (hms->offset (cadr o-m) (cddr o-m))) #t ) ) ) ) ) ) ) ) ) ; Walk the match set (cond ((string-null? str) (fail "empty timezone") ) (else (and (std-parse) ; Required (dst-parse) ; Optional (all-parsed) ; Must have successfully scanned entire string ; Then valid timezone info tz ) ) ) ) ) ) ) ) ;; #| ;NOT YET (cond-expand (macosx (define *system-timezone-directory* "/usr/share/zoneinfo") ) (else (define *system-timezone-directory* #f) ) ) (define (parse-posix-tzfile tz pn) (warning "cannot understand Posix pathname timezone" pn) #f ) (define (parse-posix-pathname-timezone tz str) (let ((pn (if (string-prefix? "/" str) str (make-pathname *system-timezone-directory* str)))) (if (file-exists? pn) (parse-posix-tzfile tz pn) #f ) ) ) |# (define (parse-posix-implementation-defined-timezone tz str) (warning "cannot understand Posix implementation-defined timezone" str) #f #; ;NOT YET (or (parse-posix-pathname-timezone tz (substring str 1)) (begin (warning "cannot understand Posix implementation-defined timezone" str) #f ) ) ) ;; (define (posix-timezone-string->timezone-components str . src) (let ((tz (make-timezone-components str (optional src "POSIX")))) (cond ((and (string? str) (string-prefix? ":" str)) (parse-posix-implementation-defined-timezone tz str) ) ((or (not (string? str)) (string=? "" str)) #f ) (else (parse-posix-literal-timezone tz str) ) ) ) ) ;;; Locale ;; Splits an IEEEÊStdÊ1003.1-2001 locale specifier string into ;; string components. The standard is extended to support a RFC 3066bis ;; Script specifier. ;; ;; Returns a locale-components object or #f, indicating a parse error. ;; ;; name: language(-script)(_territory)(.codeset)(@modifier) ;; language: ISO 639-1 or ISO 639-2 ;; script: RFC 3066bis ;; region: ISO 3166-1 ;; codeset: ;; modifier: (define parse-posix-literal-locale (let ((locale-re (regexp "([a-zA-Z]+)(-[a-zA-Z]+)?(_[a-zA-Z]+)?(\\.[^@]+)?(@.+)?"))) (lambda (lc str) (let ((matched-len 0)) (and-let* ((r (string-match locale-re str))) (let ((l (cadr r)) (s (caddr r)) (t (cadddr r)) (c (car (cddddr r))) (m (cadr (cddddr r))) (inc-matched-len (lambda (v) (set! matched-len (+ matched-len (string-length v)))))) (when l (inc-matched-len l) (set-locale-component! lc 'language (string-downcase l))) (when s (inc-matched-len s) (set-locale-component! lc 'script (string-titlecase (substring s 1)))) (when t (inc-matched-len t) (set-locale-component! lc 'region (string-upcase (substring t 1)))) (when c (inc-matched-len c) (set-locale-component! lc 'codeset (substring c 1))) (when m (inc-matched-len m) (set-locale-component! lc 'modifier (substring m 1))) (and (= matched-len (string-length str)) lc ) ) ) ) ) ) ) ;; #| ;NOT YET (cond-expand (macosx (define *system-locale-directory* "/usr/share/locale") ) (else (define *system-locale-directory* #f) ) ) (define (parse-posix-localefile lc pn) (warning "cannot understand Posix pathname locale" pn) #f ) (define (parse-posix-pathname-locale lc str) (let ((pn (if (string-prefix? "/" str) str (make-pathname *system-locale-directory* str)))) (if (file-exists? pn) (parse-posix-localefile lc pn) #f ) ) ) |# ;; (define (posix-locale-string->locale-components str . args) (let-optionals args ((src "POSIX") (tag 'locale)) (let ((lc (make-locale-components str src tag))) (cond ((or (not (string? str)) (string=? "" str)) #f ) ((or (string=? str "C") (string=? str "POSIX")) ;FIXME - #f so BUILTIN source used but ... #f ) (else (parse-posix-literal-locale lc str) ) ) ) ) ) ;;; The POSIX/GNU locale categories (define *posix-locale-category-names* '(("LC_COLLATE" . collate) ("LC_CTYPE" . character) ("LC_MESSAGES" . messages) ("LC_MONETARY" . monetary) ("LC_NUMERIC" . numberic) ("LC_ADDRESS" . address) ("LC_IDENTIFICATION" . identification) ("LC_MEASUREMENT" . measurement) ("LC_NAME" . name) ("LC_PAPER" . paper) ("LC_TELEPHONE" . telephone) ("LC_TIME" . time)) ) (define (set-posix-locale-categories func) (for-each (lambda (cell) (let ((cat (cdr cell))) (cond ((func (car cell) cat) => (cute set-locale-category! cat <>))) ) ) *posix-locale-category-names*) ) ;; (define (gnu-language-string->locale-components str . args) (and (string? str) (not (string=? "" str)) (let-optionals args ((src "GNU") (tag 'language)) (let* ((lc (make-locale-components str src tag)) (lang (string-upcase (locale-component-ref lc 'language)))) (update-locale-components! lc 'locales (map (lambda (str) (let ((rlc (posix-locale-string->locale-components str src))) (set-locale-component! rlc 'region lang) rlc ) ) (string-split str ":"))) lc ) ) ) ) ;;; ;; Sets the current timezone posix style (define (posix-load-timezone) (and-let* ((str (nonnull-getenv "TZ"))) (set-locale-category! 'timezone (posix-timezone-string->timezone-components str (list "POSIX" "TZ"))) ) ) ;; Create all local category values from the environment (define (posix-load-locale) (let ((str (nonnull-getenv "LC_ALL"))) (if str ; Then LC_ALL overrides (let ((lc (posix-locale-string->locale-components str '("POSIX" "LC_ALL")))) (set-posix-locale-categories (lambda (e c) lc)) ) ; Else set individually, w/ LANG as default (let* ((str (nonnull-getenv "LANG")) (lc (and str (posix-locale-string->locale-components str '("POSIX" "LANG"))))) (set-posix-locale-categories (lambda (e c) (cond ((nonnull-getenv e) => (cut posix-locale-string->locale-components <> `("POSIX" ,e))) (else lc)))) ) ) ) ) ;; GNU LANGUAGE (PATH-sytle list of LANG) (define (gnu-load-locale) (and-let* ((str (nonnull-getenv "LANGUAGE"))) (let ((lc (gnu-language-string->locale-components str '("GNU" "LANGUAGE") 'language))) (set-locale-category! 'language lc) ) ) ) ) ;module locale-posix