(module icu (char-from-name char-string-name char-digit-value char-decomposition char-numeric-value char-category char-direction char-combining-class char-mirror char-bidi-paired-bracket char->lower char->upper char->title ;; char-fold-case char-digit char-for-digit string-normalize char-mirrored? char-ualphabetic? char-ulowercase? char-uuppercase? char-uwhitespace? char-whitespace? char-java-space? char-space? char-blank? char-lower? char-upper? char-digit? char-alpha? char-alnum? char-xdigit? char-punct? char-graph? char-defined? char-cntrl? char-iso-control? char-print? char-base?) (import (except scheme char-alphabetic? char-whitespace?) (only chicken.module reexport) (only chicken.base when error) chicken.foreign (only srfi-13 string-null? string-upcase) (only srfi-1 take list-index) srfi-4 (only foreigners define-foreign-enum-type)) (reexport utf8) (foreign-declare "#include ") (foreign-declare "#include ") (define (char-from-name name) (let ((char ((foreign-lambda* char (((const c-string) name)) "UErrorCode status = U_ZERO_ERROR;" "UCharNameChoice choice = U_UNICODE_CHAR_NAME;" "UChar32 res = u_charFromName(choice, name, &status);" "if(U_SUCCESS(status)) {" " C_return(res);" "} else {" " C_return(0);" "}") (string-upcase name)))) (if (char=? #\null char) #f char))) (define (char-string-name char) (let ((name ((foreign-lambda* c-string ((int32 code)) "char dbuffer[150];" "UErrorCode status = U_ZERO_ERROR;" "UCharNameChoice choice = U_UNICODE_CHAR_NAME;" "int32_t len = u_charName(code, choice, dbuffer, 150, &status);" ;; "if(len == 0) {" ;; " status = U_ZERO_ERROR;" ;; " choice = U_CHAR_NAME_ALIAS;" ;; " len = u_charName(code, choice, dbuffer, 1, &status);" ;; ;; "printf(\"%d\\n\", len);" ;; "}" ;; "char* buffer = malloc(sizeof(char) * len);" ;; "status = U_ZERO_ERROR;" ;; "len = u_charName(code, choice, buffer, len, &status);" "C_return(dbuffer);") (char->integer char)))) (if (string-null? name) #f name))) (define (char-digit-value char) (let ((decimal ((foreign-lambda* int32 ((int32 code)) "C_return(u_charDigitValue(code));") (char->integer char)))) (if (= decimal -1) #f decimal))) ;; (define digit decimal) (define (char-decomposition char) (let* ((res (make-u16vector 16 0)) (len ((foreign-lambda* int32 ((int32 code) (u16vector dbuffer)) "UErrorCode status = U_ZERO_ERROR;" "UNormalizer2 const *norm2 = unorm2_getInstance(NULL, \"nfkc\", UNORM2_COMPOSE, &status);" "int32_t len = unorm2_getDecomposition(norm2, code, dbuffer, 16, &status);" "C_return(len);") (char->integer char) res))) (if (negative? len) #f (let ((lst (u16vector->list res))) (map integer->char (take lst (list-index zero? lst))))))) (define (char-numeric-value char) (let ((decimal ((foreign-lambda* double ((int32 code)) "C_return(u_getNumericValue(code));") (char->integer char)))) (if (= decimal (foreign-value U_NO_NUMERIC_VALUE double)) #f decimal))) (define-foreign-enum-type (category int) (category->int int->category) ((unassigned category/unassigned) U_UNASSIGNED) ((uppercase-letter category/uppercase-letter) U_UPPERCASE_LETTER) ((lowercase-letter category/lowercase-letter) U_LOWERCASE_LETTER) ((titlecase-letter category/titlecase-letter) U_TITLECASE_LETTER) ((modifier-letter category/modifier-letter) U_MODIFIER_LETTER) ((other-letter category/other-letter) U_OTHER_LETTER) ((non-spacing-mark category/non-spacing-mark) U_NON_SPACING_MARK) ((enclosing-mark category/enclosing-mark) U_ENCLOSING_MARK) ((combining-spacing-mark category/combining-spacing-mark) U_COMBINING_SPACING_MARK) ((decimal-digit-number category/decimal-digit-number) U_DECIMAL_DIGIT_NUMBER) ((letter-number category/letter-number) U_LETTER_NUMBER) ((other-number category/other-number) U_OTHER_NUMBER) ((space-separator category/space-separator) U_SPACE_SEPARATOR) ((line-separator category/line-separator) U_LINE_SEPARATOR) ((paragraph-separator category/paragraph-separator) U_PARAGRAPH_SEPARATOR) ((control-char category/control-char) U_CONTROL_CHAR) ((format-char category/format-char) U_FORMAT_CHAR) ((private-use-char category/private-use-char) U_PRIVATE_USE_CHAR) ((surrogate category/surrogate) U_SURROGATE) ((dash-punctuation category/dash-punctuation) U_DASH_PUNCTUATION) ((start-punctuation category/start-punctuation) U_START_PUNCTUATION) ((end-punctuation category/end-punctuation) U_END_PUNCTUATION) ((connector-punctuation category/connector-punctuation) U_CONNECTOR_PUNCTUATION) ((other-punctuation category/other-punctuation) U_OTHER_PUNCTUATION) ((math-symbol category/math-symbol) U_MATH_SYMBOL) ((currency-symbol category/currency-symbol) U_CURRENCY_SYMBOL) ((modifier-symbol category/modifier-symbol) U_MODIFIER_SYMBOL) ((other-symbol category/other-symbol) U_OTHER_SYMBOL) ((initial-punctuation category/initial-punctuation) U_INITIAL_PUNCTUATION) ((final-punctuation category/final-punctuation) U_FINAL_PUNCTUATION) ((char-category-count category/char-category-count) U_CHAR_CATEGORY_COUNT)) (define (char-category char) (let ((result ((foreign-lambda* int ((int32 code)) "C_return(u_charType(code));") (char->integer char)))) (int->category result))) (define-foreign-enum-type (direction int) (direction->int int->direction) ((left-to-right direction/left-to-right ) U_LEFT_TO_RIGHT) ((right-to-left direction/right-to-left ) U_RIGHT_TO_LEFT) ((european-number direction/european-number ) U_EUROPEAN_NUMBER) ((european-number-separator direction/european-number-separator ) U_EUROPEAN_NUMBER_SEPARATOR) ((european-number-terminator direction/european-number-terminator) U_EUROPEAN_NUMBER_TERMINATOR) ((arabic-number direction/arabic-number ) U_ARABIC_NUMBER) ((common-number-separator direction/common-number-separator ) U_COMMON_NUMBER_SEPARATOR) ((block-separator direction/block-separator ) U_BLOCK_SEPARATOR) ((segment-separator direction/segment-separator ) U_SEGMENT_SEPARATOR) ((white-space-neutral direction/white-space-neutral ) U_WHITE_SPACE_NEUTRAL) ((other-neutral direction/other-neutral ) U_OTHER_NEUTRAL) ((left-to-right-embedding direction/left-to-right-embedding ) U_LEFT_TO_RIGHT_EMBEDDING) ((left-to-right-override direction/left-to-right-override ) U_LEFT_TO_RIGHT_OVERRIDE) ((right-to-left-arabic direction/right-to-left-arabic ) U_RIGHT_TO_LEFT_ARABIC) ((right-to-left-embedding direction/right-to-left-embedding ) U_RIGHT_TO_LEFT_EMBEDDING) ((right-to-left-override direction/right-to-left-override ) U_RIGHT_TO_LEFT_OVERRIDE) ((pop-directional-format direction/pop-directional-format ) U_POP_DIRECTIONAL_FORMAT) ((dir-non-spacing-mark direction/dir-non-spacing-mark ) U_DIR_NON_SPACING_MARK) ((boundary-neutral direction/boundary-neutral ) U_BOUNDARY_NEUTRAL) ((first-strong-isolate direction/first-strong-isolate ) U_FIRST_STRONG_ISOLATE) ((left-to-right-isolate direction/left-to-right-isolate ) U_LEFT_TO_RIGHT_ISOLATE) ((right-to-left-isolate direction/right-to-left-isolate ) U_RIGHT_TO_LEFT_ISOLATE) ((pop-directional-isolate direction/pop-directional-isolate ) U_POP_DIRECTIONAL_ISOLATE) ((char-direction-count direction/char-direction-count ) U_CHAR_DIRECTION_COUNT)) (define (char-direction char) (let ((result ((foreign-lambda* int ((int32 code)) "C_return(u_charDirection(code));") (char->integer char)))) (int->direction result))) (define (char-combining-class char) ((foreign-lambda* unsigned-byte ((int32 code)) "C_return(u_getCombiningClass(code));") (char->integer char))) (define (char-mirror char) ((foreign-lambda* char ((int32 code)) "C_return(u_charMirror(code));") (char->integer char))) (define (char-bidi-paired-bracket char) ((foreign-lambda* char ((int32 code)) "C_return(u_getBidiPairedBracket(code));") (char->integer char))) (define (char->lower char) ((foreign-lambda* char ((int32 code)) "C_return(u_tolower(code));") (char->integer char))) (define (char->upper char) ((foreign-lambda* char ((int32 code)) "C_return(u_toupper(code));") (char->integer char))) (define (char->title char) ((foreign-lambda* char ((int32 code)) "C_return(u_totitle(code));") (char->integer char))) ;; (define (char-fold-case char) ;; ((foreign-lambda* char ((int32 code)) ;; "C_return(u_foldCase(code));") ;; (char->integer char))) (define (char-digit char radix) ((foreign-lambda* int32 ((int32 code) (byte radix)) "C_return(u_digit(code, radix));") (char->integer char) radix)) (define char-for-digit (foreign-lambda* char ((int32 digit) (byte radix)) "C_return(u_forDigit(digit, radix));")) (define (string-normalize input #!optional (form "nfkc")) (when (not (member form '("nfkc" "nfc" "nfkd" "nfd"))) (error 'normalize "no such normalization form" form)) (let* ((input-len (string-length input)) (input (list->u16vector (map char->integer (string->list input)))) (mode (if (or (string=? form "nfc") (string=? form "nfkc")) (foreign-value UNORM2_COMPOSE int) (foreign-value UNORM2_DECOMPOSE int))) (form (cond ((string=? form "nfd") "nfc") ((string=? form "nfkd") "nfkc") (else form))) (output-len ((foreign-lambda* int32 ((c-string form) (u16vector input) (int32 input_len) (int mode)) "UErrorCode status = U_ZERO_ERROR;" "UNormalizer2 const *norm2 = unorm2_getInstance(NULL, form, mode, &status);" "UChar dest[1];" "int32_t output_len = unorm2_normalize(norm2, input, input_len, dest, 1, &status);" "C_return(output_len);") form input input-len mode)) (output (make-u16vector output-len 0))) ((foreign-lambda* void ((c-string form) (u16vector input) (int32 input_len) (u16vector output) (int32 output_len) (int mode) ) "UErrorCode status = U_ZERO_ERROR;" "UNormalizer2 const *norm2 = unorm2_getInstance(NULL, form, mode, &status);" "int32_t len = unorm2_normalize(norm2, input, input_len, output, output_len, &status);") form input input-len output output-len mode) (list->string (map integer->char (u16vector->list output))))) (define (char-mirrored? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isMirrored(code));") (char->integer char))) (define (char-ualphabetic? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isUAlphabetic(code));") (char->integer char))) (define (char-ulowercase? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isULowercase(code));") (char->integer char))) (define (char-uuppercase? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isUUppercase(code));") (char->integer char))) ;; Whitespace (define (char-uwhitespace? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isUWhiteSpace(code));") (char->integer char))) (define (char-whitespace? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isWhitespace(code));") (char->integer char))) (define (char-java-space? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isJavaSpaceChar(code));") (char->integer char))) (define (char-space? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isspace(code));") (char->integer char))) (define (char-blank? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isblank(code));") (char->integer char))) (define (char-lower? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_islower(code));") (char->integer char))) (define (char-upper? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isupper(code));") (char->integer char))) (define (char-digit? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isdigit(code));") (char->integer char))) (define (char-alpha? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isalpha(code));") (char->integer char))) (define (char-alnum? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isalnum(code));") (char->integer char))) (define (char-xdigit? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isxdigit(code));") (char->integer char))) (define (char-punct? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_ispunct(code));") (char->integer char))) (define (char-graph? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isgraph(code));") (char->integer char))) (define (char-defined? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isdefined(code));") (char->integer char))) (define (char-cntrl? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_iscntrl(code));") (char->integer char))) (define (char-iso-control? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isISOControl(code));") (char->integer char))) (define (char-print? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isprint(code));") (char->integer char))) (define (char-base? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isbase(code));") (char->integer char))) ;; (define (string-normalized? form input) ;; ) ;; (print (char-alphabetic? #\x) ;; ) )