;;;; locale-posix.scm ;;;; Kon Lovett, Sep '21 ;;;; Kon Lovett, Mar '19 ;;;; Kon Lovett, Dec '05 ;; Issues ;; ;; - #f is generated for any parse problems. ;; ;; - Does not interact w/ setlocale or tzset ;; ;; - If LC_ALL or LANG is not set but any LC_* is set then (current-locale) ;; will still be #f, while some locale-categories will be valued (module locale-posix (;export ; seconds->hms ; make-posix-timezone synthetic-posix-timezone ; posix-timezone-string->timezone-components posix-locale-string->locale-components gnu-language-string->locale-components ; posix-load-timezone posix-load-locale gnu-load-locale) (import scheme utf8 (chicken base) (chicken fixnum) (chicken string) (chicken condition) (chicken type) (chicken irregex) (chicken process-context) (only (chicken time posix) seconds->local-time local-timezone-abbreviation) (srfi 1) utf8-srfi-13 locale-categories locale-components) ;;; ;;fx-utils (: fxnegative? (fixnum -> boolean)) (: fxabs (fixnum -> fixnum)) (define (fxnegative? n) (fx> 0 n)) (define (fxabs n) (if (fxnegative? n) (fxneg n) n)) ;; (define-type locale-components list) (: parse-posix-literal-locale (locale-components string -> (or false locale-components))) ;; (define-constant SEC/HR 3600) (define-constant SEC/MIN 60) ;; (define (nonnull-getenv varnam) (let ((str (get-environment-variable varnam))) (and (string? str) (not (string-null? str)) str ) ) ) ;;; Utility (define (seconds->hms secs) (let* ((asecs (fxabs secs)) (rsecs (fxmod asecs SEC/HR)) ) (conc (if (fxnegative? secs) #\- #\+) (fx/ asecs SEC/HR) #\: (fx/ rsecs SEC/MIN) #\: (fxmod rsecs SEC/MIN)) ) ) (define (make-posix-timezone std-tzn std-off dst-tzn dst-off) (string-append std-tzn (seconds->hms std-off) dst-tzn (seconds->hms dst-off)) ) ;;; Timezone ;; Splits an IEEEÊStdÊ1003.1-2001 TZ specifier string into components. ;; ;; Returns a timezone components object or #f, indicating a parse error. ;; ;; - doesn't handle implementation defined entries ;; ;; - cannot differentiate a relative timezone filename that is a valid ;; timezone specifier (define-constant INT-NAME-RE "(^[^<:0-9,+-][^0-9,+-]*)|^<([^>]+)>") (define-constant OFFSET-NAME-RE "(^[+-][0-9]+)") (define (parse-posix-literal-timezone tz str) (let ((ext-name-re (string->irregex (string-append OFFSET-NAME-RE "|" INT-NAME-RE))) (int-name-re (string->irregex INT-NAME-RE)) (offset-re (string->irregex "^(\\+|-)?([0-9]+)(:[0-9]+)?(:[0-9]+)?")) ;For compatibility with System V Release 3.1, a semicolon (`;') may be ;used to separate the rule from the rest of the specification. ;Allow it to separate the "to DST" & "from DST" segments since no harm, no foul. (date-re (string->irregex "^[;,]([JM])?([0-9]+)(\\.[0-9]+)?(\\.[0-9]+)?")) (time-re (string->irregex "^/([0-9]+)(:[0-9]+)?(:[0-9]+)?")) (+0200hrs+ (fx* 2 SEC/HR)) (strpos 0) (strend (string-length str)) ) ; (letrec ((fail (lambda (msg) (error 'parse-posix-literal-timezone msg str) ) ) ; (next-match (lambda (re) (and-let* ((ml (string-search re str strpos))) (set! strpos (fx+ strpos (string-length (car ml)))) ml ) ) ) ; (all-parsed (lambda () (or (<= strend strpos) (fail "bad timezone format") ) ) ) ; (parse-number (lambda (numstr) (cond ((not numstr) 0 ) ((char-numeric? (string-ref numstr 0)) (string->number numstr) ) (else (fail "bad timezone number") ) ) ) ) ; (parse-delmcomp (lambda (numstr delm) (parse-number (if (not (and numstr (string-prefix? delm numstr))) numstr (string-trim numstr (string-ref delm 0) ) ) ) ) ) ; (parse-timecomp (lambda (numstr) (parse-delmcomp numstr ":") ) ) ; (parse-daterulecomp (lambda (numstr) (parse-delmcomp numstr ".") ) ) ; (hms->offset (lambda (sgnstr hms-lst) (and-let* ((hr (parse-number (first hms-lst))) (mn (parse-timecomp (second hms-lst))) (sc (parse-timecomp (third hms-lst))) ) (let ((secs (fx+ (fx* hr SEC/HR) (fx+ (fx* mn SEC/MIN) sc)))) (if (and sgnstr (string=? sgnstr "-")) (fxneg secs) secs ) ) ) ) ) ; (decode-dst-rule (lambda (rulstr dat-lst off) ;Must begin w/ a valid integer. Interpreted later. (and-let* ((n1 (parse-number (first dat-lst)))) (cond ((not rulstr) ;Julian Leap rule (make-timezone-dst-rule-julian-leap n1 off) ) ;select rule kind & interpret rest of match (else (case (string-ref rulstr 0) ((#\J) ;Julian No-Leap rule (make-timezone-dst-rule-julian-noleap n1 off) ) ((#\M) ;Date (and-let* ((n (parse-daterulecomp (second dat-lst))) (d (parse-daterulecomp (third dat-lst))) ) (make-timezone-dst-rule-mwd n1 n d off) ) ) (else (fail "unknown timezone DST rule type") ) ) ) ) ) ) ) ; (parse-dst-rule (lambda (key) (and-let* ((d-m (next-match date-re))) ;Time component is optional & defaults to 02:00:00 (let* ((t-m (next-match time-re)) (off (if t-m (hms->offset #f (cdr t-m)) +0200hrs+)) ) (set-timezone-component! tz key (decode-dst-rule (cadr d-m) (cddr d-m) off)) #t ) ) ) ) ; (dst-parse (lambda () ;DST section is optional (let ((n-m (next-match int-name-re))) (or (not n-m) ;Offset is optional & defaults to 1hr (let* ((o-m (next-match offset-re)) (off (if o-m (hms->offset (cadr o-m) (cddr o-m)) (fx- (timezone-component-ref tz 'std-offset) SEC/HR) #; ;XXX What does "ahead" mean? (fx+ (timezone-component-ref tz 'std-offset) SEC/HR))) ) (set-timezone-component! tz 'dst-name (car n-m)) (set-timezone-component! tz 'dst-offset off) ;Rule, if present, must be complete (if (parse-dst-rule 'dst-start) (parse-dst-rule 'dst-end) #t ) ) ) ) ) ) ; (std-parse (lambda () ;Must have name & offset components (let ((n-m (next-match ext-name-re))) (cond ((not n-m) (fail "bad timezone STD name") ) (else (let ((o-m (next-match offset-re))) (cond ((not o-m) (fail "bad timezone STD offset") ) (else (set-timezone-component! tz 'std-name (car n-m)) (set-timezone-component! tz 'std-offset (hms->offset (cadr o-m) (cddr o-m))) #t ) ) ) ) ) ) ) ) ) ; ;walk the match set (cond ((string-null? str) (fail "empty timezone") ) (else (and (std-parse) ;required (dst-parse) ;optional (all-parsed) ;must have successfully scanned entire string ;then valid timezone info tz ) ) ) ) ) ) ;; #| ;NOT YET (cond-expand (macosx (define-constant SYSTEM-TIMEZONE-DIRECTORY "/usr/share/zoneinfo") ) (else (define-constant SYSTEM-TIMEZONE-DIRECTORY #f) ) ) (define (parse-posix-tzfile tz pn) (warning "cannot understand Posix pathname timezone" pn) #f ) (define (parse-posix-pathname-timezone tz str) (let ((pn (if (string-prefix? "/" str) str (make-pathname SYSTEM-TIMEZONE-DIRECTORY str)))) (and (file-exists? pn) (parse-posix-tzfile tz pn)) ) ) |# (define (parse-posix-implementation-defined-timezone tz str) (warning "cannot understand Posix implementation-defined timezone" str) #f #; ;NOT YET (or (parse-posix-pathname-timezone tz (substring str 1)) (begin (warning "cannot understand Posix implementation-defined timezone" str) #f ) ) ) ;; (define (posix-timezone-string->timezone-components tz-str . args) (let-optionals args ((tz-src "POSIX")) (let ((tz (make-timezone-components tz-str tz-src))) (cond ((and (string? tz-str) (string-prefix? ":" tz-str)) (parse-posix-implementation-defined-timezone tz tz-str) ) ((or (not (string? tz-str)) (string-null? tz-str)) #f ) (else (parse-posix-literal-timezone tz tz-str) ) ) ) ) ) ;ugly, & unused, for now (define *posix-timezone-string-parse-failure-info* #f) (define (*maybe-posix-timezone-string->timezone-components str . args) (let-optionals args ((tz-src "POSIX")) (set! *posix-timezone-string-parse-failure-info* #f) (handle-exceptions exn (begin (set! *posix-timezone-string-parse-failure-info* exn) #f) (posix-timezone-string->timezone-components str tz-src)) ) ) ;; (define (tm-dst? tm) (vector-ref tm 8)) (define (tm-off tm) (vector-ref tm 9)) (define (synthetic-posix-timezone-components tz-str tz-src) (let* ((tz (make-timezone-components tz-str tz-src)) (tm (seconds->local-time)) (keys (if (tm-dst? tm) '(dst-name . dst-offset) '(std-name . std-offset))) ) (set-timezone-component! tz 'dst? (tm-dst? tm)) (set-timezone-component! tz (car keys) (local-timezone-abbreviation)) (set-timezone-component! tz (cdr keys) (tm-off tm)) tz ) ) (define (synthetic-posix-timezone) (and-let* ((tz-str (nonnull-getenv "TZ"))) (let ((tz (synthetic-posix-timezone-components tz-str (list "POSIX" "TZ")))) (set-locale-category! 'timezone tz) ) ) ) ;;; Locale ;; Splits an IEEEÊStdÊ1003.1-2001 locale specifier string into ;; string components. The standard is extended to support a RFC 3066bis ;; Script specifier. ;; ;; Returns a locale-components object or #f, indicating a parse error. ;; ;; name: language(-script)(_country)(-subdivision)(.codeset)(@modifier) ;; language: ISO 639-1 or ISO 639-2 ;; script: RFC 3066bis ;; region: ISO 3166-1 or ISO 3166-2 (territory in IEEE Std 1003.1-2001) ;; country & subdivision ;; codeset: ;; modifier: (define-constant POSIX-LOCALE-REGEX "([a-zA-Z]+)(-[a-zA-Z]+)?(_[a-zA-Z]+)?(-[a-zA-Z0-9]+)?(\\.[^@]+)?(@.+)?") (define (parse-posix-literal-locale lc str) (let ((locale-re (string->irregex POSIX-LOCALE-REGEX)) (matched-len 0) ) (and-let* ((r (string-match locale-re str))) (let ((language (second r)) (script (third r)) (country (fourth r)) (subdivision (fifth r)) (codeset (sixth r)) (modifier (seventh r)) (inc-matched-len (lambda (v) (set! matched-len (fx+ matched-len (string-length v))) ) ) ) (when language (inc-matched-len language) (set-locale-component! lc 'language (string-downcase language)) ) (when script (inc-matched-len script) (set-locale-component! lc 'script (string-titlecase (substring script 1))) ) (when country (inc-matched-len country) (set-locale-component! lc 'country (string-upcase (substring country 1))) ) (when subdivision (inc-matched-len subdivision) (set-locale-component! lc 'subdivision (string-upcase (substring subdivision 1))) ) (when codeset (inc-matched-len codeset) (set-locale-component! lc 'codeset (substring codeset 1)) ) (when modifier (inc-matched-len modifier) (set-locale-component! lc 'modifier (substring modifier 1)) ) ;Synthetic component (when country (set-locale-component! lc 'region (string-append (or (locale-component-ref lc 'country) "") (let ( (str (locale-component-ref lc 'subdivision)) ) (if str (string-append "-" str) "")))) ) ;Must be at the end of string (and (fx= matched-len (string-length str)) lc ) ) ) ) ) ;; #| ;NOT YET (cond-expand (macosx (define *system-locale-directory* "/usr/share/locale") ) (else (define *system-locale-directory* #f) ) ) (define (parse-posix-localefile lc pn) (warning "cannot understand Posix pathname locale" pn) #f ) (define (parse-posix-pathname-locale lc str) (let ((pn (if (string-prefix? "/" str) str (make-pathname *system-locale-directory* str))) ) (if (file-exists? pn) (parse-posix-localefile lc pn) #f ) ) ) |# ;; (define (posix-locale-string->locale-components str . args) (let-optionals args ((src "POSIX") (tag 'locale) ) (let ((lc (make-locale-components str src tag))) (cond ((or (not (string? str)) (string-null? str)) #f ) ((or (string=? str "C") (string=? str "POSIX")) ;FIXME - #f so BUILTIN source used but ... #f ) (else (parse-posix-literal-locale lc str) ) ) ) ) ) ;;; The POSIX/GNU locale categories ;FIXME parameter w/ copied argument & return values - not shared (define *posix-locale-category-names* '( ("LC_COLLATE" . collate) ("LC_CTYPE" . character) ("LC_MESSAGES" . messages) ("LC_MONETARY" . monetary) ("LC_NUMERIC" . numberic) ("LC_ADDRESS" . address) ("LC_IDENTIFICATION" . identification) ("LC_MEASUREMENT" . measurement) ("LC_NAME" . name) ("LC_PAPER" . paper) ("LC_TELEPHONE" . telephone) ("LC_TIME" . time)) ) (define (set-posix-locale-categories func) (for-each (lambda (cell) (let ((cat (cdr cell))) (cond ((func (car cell) cat) => (cut set-locale-category! cat <>))))) *posix-locale-category-names*) ) ;; (define (gnu-language-string->locale-components str . args) (and (string? str) (not (string-null? str)) (let-optionals args ((src "GNU") (tag 'language) ) (let ((lc (make-locale-components str src tag))) (update-locale-components! lc 'locales ;Keep in priority order (reverse! (foldl ;May not have a 'country or 'region. Should use locale's? (lambda (ls str) ;Ignore when no parse (let ((lc (posix-locale-string->locale-components str src))) (if lc (cons lc ls) ls) ) ) '() (string-split str ":")))) ) ) ) ) ;;; ;; Sets the current timezone posix style (define (posix-load-timezone) (and-let* ((tz-str (nonnull-getenv "TZ"))) (let* ((tz-src (list "POSIX" "TZ")) (tz (*maybe-posix-timezone-string->timezone-components tz-str tz-src)) (tz (or tz (synthetic-posix-timezone-components tz-str tz-src))) ) (set-locale-category! 'timezone tz) ) ) ) ;; Create all local category values from the environment (define (posix-load-locale) (let ((str (nonnull-getenv "LC_ALL"))) (if str ;then LC_ALL overrides (let ((lc (posix-locale-string->locale-components str '("POSIX" "LC_ALL")))) (set-locale-category! 'current lc) (set-posix-locale-categories (lambda (e c) lc)) ) ;else set individually, w/ LANG as default (let ((str (nonnull-getenv "LANG"))) (when str (let ((lc (posix-locale-string->locale-components str '("POSIX" "LANG")))) (set-locale-category! 'current lc) (set-posix-locale-categories (lambda (e c) (cond ((nonnull-getenv e) => (cut posix-locale-string->locale-components <> `("POSIX" ,e))) (else lc)))) ) ) ) ) ) ) ;; GNU LANGUAGE (PATH-sytle list of LANG) (define (gnu-load-locale) (and-let* ((str (nonnull-getenv "LANGUAGE"))) (let ((lc (gnu-language-string->locale-components str '("GNU" "LANGUAGE") 'language))) (set-locale-category! 'language lc) ) ) ) ) ;module locale-posix