;;;; locale-posix.scm ;;;; Kon Lovett, Sep '21 ;;;; Kon Lovett, Mar '19 ;;;; Kon Lovett, Dec '05 ;; Issues ;; ;; - #f is generated for any parse problems. ;; ;; - Does not interact w/ setlocale or tzset ;; ;; - If LC_ALL or LANG is not set but any LC_* is set then (current-locale) ;; will still be #f, while some locale-categories will be valued (module locale-posix (;export ; seconds->hms ; make-posix-timezone ; posix-timezone-string->timezone-components posix-locale-string->locale-components gnu-language-string->locale-components ; posix-load-timezone posix-load-locale gnu-load-locale) (import scheme) (import (chicken base)) (import (chicken fixnum)) (import (chicken string)) (import (chicken condition)) (import (chicken type)) (import (chicken process-context)) (import (only (chicken time posix) seconds->local-time local-timezone-abbreviation)) (import (srfi 1)) (import (srfi 13)) (import locale-categories) (import locale-components) ;;; ;;fx-utils (: fxnegative? (fixnum --> boolean)) (: fxabs (fixnum --> fixnum)) (define (fxnegative? n) (fx> 0 n)) (define (fxabs n) (if (fxnegative? n) (fxneg n) n)) ;;regex (import (chicken irregex)) (define (regexp pat #!optional caseless extended utf8) (apply irregex pat (let ((opts '())) (when caseless (set! opts (cons 'i opts))) (when extended (set! opts (cons 'x opts))) (when utf8 (set! opts (cons 'utf8 opts))) opts))) (define (string-match rx str) (and-let* ((m (irregex-match rx str))) (let loop ((i (irregex-match-num-submatches m)) (res '())) (if (fx<= i 0) (cons str res) (loop (fx- i 1) (cons (irregex-match-substring m i) res)))))) (define (string-search rx str #!optional (start 0) (range (string-length str))) (let ((n (string-length str))) (and-let* ((m (irregex-search rx str start (min n (fx+ start range))))) (let loop ((i (irregex-match-num-submatches m)) (res '())) (if (fx< i 0) res (loop (fx- i 1) (cons (irregex-match-substring m i) res))))))) ;; (define-type locale-components list) ;; (define-constant SEC/HR 3600) (define-constant SEC/MIN 60) ;; (define (nonnull-getenv varnam) (let ( (str (get-environment-variable varnam)) ) (and (string? str) (not (string-null? str)) str ) ) ) ;;; Utility (define (seconds->hms secs) (let* ( (asecs (fxabs secs)) (rsecs (fxmod asecs SEC/HR)) ) (conc (if (fxnegative? secs) #\- #\+) (fx/ asecs SEC/HR) #\: (fx/ rsecs SEC/MIN) #\: (fxmod rsecs SEC/MIN)) ) ) (define (make-posix-timezone std-tzn std-off dst-tzn dst-off) (string-append std-tzn (seconds->hms std-off) dst-tzn (seconds->hms dst-off)) ) ;;; Timezone ;; Splits an IEEEÊStdÊ1003.1-2001 TZ specifier string into components. ;; ;; Returns a timezone components object or #f, indicating a parse error. ;; ;; - doesn't handle implementation defined entries ;; ;; - cannot differentiate a relative timezone filename that is a valid ;; timezone specifier (define-constant INT-NAME-RE "(^[^<:0-9,+-][^0-9,+-]*)|^<([^>]+)>") (define-constant OFFSET-NAME-RE "(^[+-][0-9]+)") (define parse-posix-literal-timezone (let ( (ext-name-re (regexp (string-append OFFSET-NAME-RE "|" INT-NAME-RE))) (int-name-re (regexp INT-NAME-RE)) (offset-re (regexp "^(\\+|-)?([0-9]+)(:[0-9]+)?(:[0-9]+)?")) ;For compatibility with System V Release 3.1, a semicolon (`;') may be ;used to separate the rule from the rest of the specification. ;Allow it to separate the "to DST" & "from DST" segments since no harm, no foul. (date-re (regexp "^[;,]([JM])?([0-9]+)(\\.[0-9]+)?(\\.[0-9]+)?")) (time-re (regexp "^/([0-9]+)(:[0-9]+)?(:[0-9]+)?")) (+0200hrs+ (fx* 2 SEC/HR)) ) ; (lambda (tz str) (let ( (strpos 0) (strend (string-length str)) ) ; (letrec ( ; (fail (lambda (msg) (error 'parse-posix-literal-timezone msg str) ) ) ; (next-match (lambda (re) (and-let* ( (ml (string-search re str strpos)) ) (set! strpos (fx+ strpos (string-length (car ml)))) ml ) ) ) ; (all-parsed (lambda () (or (<= strend strpos) (fail "bad timezone format") ) ) ) ; (parse-number (lambda (numstr) (cond ((not numstr) 0 ) ((char-numeric? (string-ref numstr 0)) (string->number numstr) ) (else (fail "bad timezone number") ) ) ) ) ; (parse-delmcomp (lambda (numstr delm) (parse-number (if (not (and numstr (string-prefix? delm numstr))) numstr (string-trim numstr (string-ref delm 0) ) ) ) ) ) ; (parse-timecomp (lambda (numstr) (parse-delmcomp numstr ":") ) ) ; (parse-daterulecomp (lambda (numstr) (parse-delmcomp numstr ".") ) ) ; (hms->offset (lambda (sgnstr hms-lst) (and-let* ( (hr (parse-number (first hms-lst))) (mn (parse-timecomp (second hms-lst))) (sc (parse-timecomp (third hms-lst))) ) (let ( (secs (fx+ (fx* hr SEC/HR) (fx+ (fx* mn SEC/MIN) sc))) ) (if (and sgnstr (string=? sgnstr "-")) (fxneg secs) secs ) ) ) ) ) ; (decode-dst-rule (lambda (rulstr dat-lst off) ;Must begin w/ a valid integer. Interpreted later. (and-let* ( (n1 (parse-number (first dat-lst))) ) (cond ((not rulstr) ;Julian Leap rule (make-timezone-dst-rule-julian-leap n1 off) ) ;select rule kind & interpret rest of match (else (case (string-ref rulstr 0) ((#\J) ;Julian No-Leap rule (make-timezone-dst-rule-julian-noleap n1 off) ) ((#\M) ;Date (and-let* ( (n (parse-daterulecomp (second dat-lst))) (d (parse-daterulecomp (third dat-lst))) ) (make-timezone-dst-rule-mwd n1 n d off) ) ) (else (fail "unknown timezone DST rule type") ) ) ) ) ) ) ) ; (parse-dst-rule (lambda (key) (and-let* ( (d-m (next-match date-re)) ) ;Time component is optional & defaults to 02:00:00 (let* ( (t-m (next-match time-re)) (off (if t-m (hms->offset #f (cdr t-m)) +0200hrs+)) ) (set-timezone-component! tz key (decode-dst-rule (cadr d-m) (cddr d-m) off)) #t ) ) ) ) ; (dst-parse (lambda () ;DST section is optional (let ( (n-m (next-match int-name-re)) ) (or (not n-m) ;Offset is optional & defaults to 1hr (let* ( (o-m (next-match offset-re)) (off (if o-m (hms->offset (cadr o-m) (cddr o-m)) (fx- (timezone-component-ref tz 'std-offset) SEC/HR) #; ;XXX What does "ahead" mean? (fx+ (timezone-component-ref tz 'std-offset) SEC/HR))) ) (set-timezone-component! tz 'dst-name (car n-m)) (set-timezone-component! tz 'dst-offset off) ;Rule, if present, must be complete (if (parse-dst-rule 'dst-start) (parse-dst-rule 'dst-end) #t ) ) ) ) ) ) ; (std-parse (lambda () ;Must have name & offset components (let ( (n-m (next-match ext-name-re)) ) (cond ((not n-m) (fail "bad timezone STD name") ) (else (let ( (o-m (next-match offset-re)) ) (cond ((not o-m) (fail "bad timezone STD offset") ) (else (set-timezone-component! tz 'std-name (car n-m)) (set-timezone-component! tz 'std-offset (hms->offset (cadr o-m) (cddr o-m))) #t ) ) ) ) ) ) ) ) ) ; ;walk the match set (cond ((string-null? str) (fail "empty timezone") ) (else (and (std-parse) ;required (dst-parse) ;optional (all-parsed) ;must have successfully scanned entire string ;then valid timezone info tz ) ) ) ) ) ) ) ) ;; #| ;NOT YET (cond-expand (macosx (define *system-timezone-directory* "/usr/share/zoneinfo") ) (else (define *system-timezone-directory* #f) ) ) (define (parse-posix-tzfile tz pn) (warning "cannot understand Posix pathname timezone" pn) #f ) (define (parse-posix-pathname-timezone tz str) (let ( (pn (if (string-prefix? "/" str) str (make-pathname *system-timezone-directory* str))) ) (if (file-exists? pn) (parse-posix-tzfile tz pn) #f ) ) ) |# (define (parse-posix-implementation-defined-timezone tz str) (warning "cannot understand Posix implementation-defined timezone" str) #f #; ;NOT YET (or (parse-posix-pathname-timezone tz (substring str 1)) (begin (warning "cannot understand Posix implementation-defined timezone" str) #f ) ) ) ;; (define (posix-timezone-string->timezone-components tz-str . args) (let-optionals args ( (tz-src "POSIX") ) (let ( (tz (make-timezone-components tz-str tz-src)) ) (cond ((and (string? tz-str) (string-prefix? ":" tz-str)) (parse-posix-implementation-defined-timezone tz tz-str) ) ((or (not (string? tz-str)) (string-null? tz-str)) #f ) (else (parse-posix-literal-timezone tz tz-str) ) ) ) ) ) ;yes, ugly (define *posix-timezone-string-parse-failure-info* #f) (define (*maybe-posix-timezone-string->timezone-components str . args) (let-optionals args ( (tz-src "POSIX") ) (set! *posix-timezone-string-parse-failure-info* #f) (handle-exceptions exn (begin (set! *posix-timezone-string-parse-failure-info* exn) #f) (posix-timezone-string->timezone-components str tz-src)) ) ) ;; (define (tm-dst? tm) (vector-ref tm 8)) (define (tm-off tm) (vector-ref tm 9)) (define (synthetic-posix-timezone-components tz-str tz-src) (let* ( (tz (make-timezone-components tz-str tz-src)) (tm (seconds->local-time)) (keys (if (tm-dst? tm) '(dst-name . dst-offset) '(std-name . std-offset))) ) (set-timezone-component! tz 'dst? (tm-dst? tm)) (set-timezone-component! tz (car keys) (local-timezone-abbreviation)) (set-timezone-component! tz (cdr keys) (tm-off tm)) tz ) ) (define (synthetic-posix-timezone) (and-let* ( (tz-str (nonnull-getenv "TZ")) ) (let ( (tz (synthetic-posix-timezone-components tz-str (list "POSIX" "TZ"))) ) (set-locale-category! 'timezone tz) ) ) ) ;;; Locale ;; Splits an IEEEÊStdÊ1003.1-2001 locale specifier string into ;; string components. The standard is extended to support a RFC 3066bis ;; Script specifier. ;; ;; Returns a locale-components object or #f, indicating a parse error. ;; ;; name: language(-script)(_country)(-subdivision)(.codeset)(@modifier) ;; language: ISO 639-1 or ISO 639-2 ;; script: RFC 3066bis ;; region: ISO 3166-1 or ISO 3166-2 (territory in IEEE Std 1003.1-2001) ;; country & subdivision ;; codeset: ;; modifier: (define-constant POSIX-LOCALE-REGEX "([a-zA-Z]+)(-[a-zA-Z]+)?(_[a-zA-Z]+)?(-[a-zA-Z0-9]+)?(\\.[^@]+)?(@.+)?") (: parse-posix-literal-locale (locale-components string -> (or boolean locale-components))) ; (define parse-posix-literal-locale (let ( (locale-re (regexp POSIX-LOCALE-REGEX)) ) (lambda (lc str) (let ( (matched-len 0) ) (and-let* ( (r (string-match locale-re str)) ) (let ( (language (second r)) (script (third r)) (country (fourth r)) (subdivision (fifth r)) (codeset (sixth r)) (modifier (seventh r)) (inc-matched-len (lambda (v) (set! matched-len (fx+ matched-len (string-length v))) ) ) ) (when language (inc-matched-len language) (set-locale-component! lc 'language (string-downcase language)) ) (when script (inc-matched-len script) (set-locale-component! lc 'script (string-titlecase (substring script 1))) ) (when country (inc-matched-len country) (set-locale-component! lc 'country (string-upcase (substring country 1))) ) (when subdivision (inc-matched-len subdivision) (set-locale-component! lc 'subdivision (string-upcase (substring subdivision 1))) ) (when codeset (inc-matched-len codeset) (set-locale-component! lc 'codeset (substring codeset 1)) ) (when modifier (inc-matched-len modifier) (set-locale-component! lc 'modifier (substring modifier 1)) ) ;Synthetic component (when country (set-locale-component! lc 'region (string-append (or (locale-component-ref lc 'country) "") (let ( (str (locale-component-ref lc 'subdivision)) ) (if str (string-append "-" str) "")))) ) ;Must be at the end of string (and (fx= matched-len (string-length str)) lc ) ) ) ) ) ) ) ;; #| ;NOT YET (cond-expand (macosx (define *system-locale-directory* "/usr/share/locale") ) (else (define *system-locale-directory* #f) ) ) (define (parse-posix-localefile lc pn) (warning "cannot understand Posix pathname locale" pn) #f ) (define (parse-posix-pathname-locale lc str) (let ( (pn (if (string-prefix? "/" str) str (make-pathname *system-locale-directory* str))) ) (if (file-exists? pn) (parse-posix-localefile lc pn) #f ) ) ) |# ;; (define (posix-locale-string->locale-components str . args) (let-optionals args ( (src "POSIX") (tag 'locale) ) (let ( (lc (make-locale-components str src tag)) ) (cond ((or (not (string? str)) (string-null? str)) #f ) ((or (string=? str "C") (string=? str "POSIX")) ;FIXME - #f so BUILTIN source used but ... #f ) (else (parse-posix-literal-locale lc str) ) ) ) ) ) ;;; The POSIX/GNU locale categories (define *posix-locale-category-names* '( ("LC_COLLATE" . collate) ("LC_CTYPE" . character) ("LC_MESSAGES" . messages) ("LC_MONETARY" . monetary) ("LC_NUMERIC" . numberic) ("LC_ADDRESS" . address) ("LC_IDENTIFICATION" . identification) ("LC_MEASUREMENT" . measurement) ("LC_NAME" . name) ("LC_PAPER" . paper) ("LC_TELEPHONE" . telephone) ("LC_TIME" . time)) ) (define (set-posix-locale-categories func) (for-each (lambda (cell) (let ( (cat (cdr cell)) ) (cond ((func (car cell) cat) => (cut set-locale-category! cat <>))) ) ) *posix-locale-category-names*) ) ;; (define (gnu-language-string->locale-components str . args) (and (string? str) (not (string-null? str)) (let-optionals args ( (src "GNU") (tag 'language) ) (let ( (lc (make-locale-components str src tag)) ) (update-locale-components! lc 'locales ;Keep in priority order (reverse! (foldl ;May not have a 'country or 'region. Should use locale's? (lambda (ls str) ;Ignore when no parse (let ( (lc (posix-locale-string->locale-components str src)) ) (if lc (cons lc ls) ls) ) ) '() (string-split str ":")))) ) ) ) ) ;;; ;; Sets the current timezone posix style (define (posix-load-timezone) (and-let* ( (tz-str (nonnull-getenv "TZ")) ) (let* ( (tz-src (list "POSIX" "TZ")) (tz (*maybe-posix-timezone-string->timezone-components tz-str tz-src)) (tz (or tz (synthetic-posix-timezone-components tz-str tz-src))) ) (set-locale-category! 'timezone tz) ) ) ) ;; Create all local category values from the environment (define (posix-load-locale) (let ( (str (nonnull-getenv "LC_ALL")) ) (if str ;then LC_ALL overrides (let ( (lc (posix-locale-string->locale-components str '("POSIX" "LC_ALL"))) ) (set-locale-category! 'current lc) (set-posix-locale-categories (lambda (e c) lc)) ) ;else set individually, w/ LANG as default (let ( (str (nonnull-getenv "LANG")) ) (when str (let ( (lc (posix-locale-string->locale-components str '("POSIX" "LANG"))) ) (set-locale-category! 'current lc) (set-posix-locale-categories (lambda (e c) (cond ((nonnull-getenv e) => (cut posix-locale-string->locale-components <> `("POSIX" ,e))) (else lc)))) ) ) ) ) ) ) ;; GNU LANGUAGE (PATH-sytle list of LANG) (define (gnu-load-locale) (and-let* ( (str (nonnull-getenv "LANGUAGE")) ) (let ( (lc (gnu-language-string->locale-components str '("GNU" "LANGUAGE") 'language)) ) (set-locale-category! 'language lc) ) ) ) ) ;module locale-posix