;;;; unicode-utils.scm -*- Scheme -*- ;;;; Kon Lovett, Jul '18 ;;;; Kon Lovett, Aug '10 ;; Issues ;; ;; - Implies Unicode support that is not present. (declare (bound-to-procedure ##sys#size ##sys#fragments->string ##sys#make-string ##sys#char->utf8-string ##sys#unicode-surrogate? ##sys#surrogates->codepoint)) (module unicode-utils (;export ascii-codepoint? unicode-surrogate? char->unicode-string unicode-string *unicode-string generic-make-string unicode-make-string *unicode-make-string unicode-surrogates->codepoint ; unicode-char->string) (import scheme) (import (chicken base)) (import (chicken type)) (import (only srfi-1 every make-list)) (import (only (check-errors sys) check-list check-fixnum check-char check-exact-unsigned-integer)) (define-inline (check-natural-fixnum loc obj) (check-exact-unsigned-integer loc (check-fixnum loc obj)) ) (: ascii-codepoint? (* -> boolean)) (: unicode-surrogate? (* -> boolean)) (: char->unicode-string (char -> string)) (: unicode-char->string (deprecated char->unicode-string)) (: *unicode-string ((list-of char) -> string)) (: unicode-string (#!rest -> string)) (: *unicode-make-string (fixnum char -> string)) (: unicode-make-string (fixnum #!optional char -> string)) (: unicode-surrogates->codepoint (fixnum fixnum -> (or false fixnum))) ;; Simple UTF 8 ;nul is not accepted! (define (ascii-codepoint? ch) (and (char? ch) (<= 0 (char->integer ch) #x7f) ) ) (define (unicode-surrogate? n) (and (fixnum? n) (##sys#unicode-surrogate? n) ) ) (define (char->unicode-string ch) (##sys#char->utf8-string (check-char 'char->unicode-string ch)) ) (define unicode-char->string char->unicode-string) (define (*unicode-string chs) (cond ((null? chs) "" ) ((null? (cdr chs)) (##sys#char->utf8-string (car chs)) ) (else (let* ((sts (map ##sys#char->utf8-string chs)) (cnt (foldl (lambda (l s) (+ l (the fixnum (##sys#size s)))) 0 sts)) ) (##sys#fragments->string cnt sts) ) ) ) ) ;inefficient (define (unicode-string . chs) (if (null? chs) "" (begin (if (null? (cdr chs)) (check-char 'unicode-string (car chs)) #;(every (cut check-char 'unicode-string <>) chs) (check-list 'unicode-string chs)) (*unicode-string chs) ) ) ) (define (*unicode-make-string len fill) (cond ((= 0 len) "" ) ;ascii-codepoint < char ((not (ascii-codepoint? fill)) (*unicode-string (the (list-of char) (make-list len fill))) ) (else (##sys#make-string len fill) ) ) ) (define (unicode-make-string len #!optional (fill #\space)) (*unicode-make-string (check-natural-fixnum 'unicode-make-string len) (check-char 'unicode-make-string fill)) ) (define generic-make-string unicode-make-string) (define (unicode-surrogates->codepoint hi lo) ;NOTE use of `sys' checks introduces ambiguity (##sys#surrogates->codepoint (check-natural-fixnum 'unicode-surrogates->codepoint hi) (check-natural-fixnum 'unicode-surrogates->codepoint lo)) ) ) ;module unicode-utils