(module icu (char-from-name char-string-name char-digit-value char-decomposition char-numeric-value char-category category->integer integer->category category/unassigned category/uppercase-letter category/lowercase-letter category/titlecase-letter category/modifier-letter category/other-letter category/non-spacing-mark category/enclosing-mark category/combining-spacing-mark category/decimal-digit-number category/letter-number category/other-number category/space-separator category/line-separator category/paragraph-separator category/control-char category/format-char category/private-use-char category/surrogate category/dash-punctuation category/start-punctuation category/end-punctuation category/connector-punctuation category/other-punctuation category/math-symbol category/currency-symbol category/modifier-symbol category/other-symbol category/initial-punctuation category/final-punctuation category/char-category-count char-direction direction->integer integer->direction direction/left-to-right direction/right-to-left direction/european-number direction/european-number-separator direction/european-number-terminator direction/arabic-number direction/common-number-separator direction/block-separator direction/segment-separator direction/white-space-neutral direction/other-neutral direction/left-to-right-embedding direction/left-to-right-override direction/right-to-left-arabic direction/right-to-left-embedding direction/right-to-left-override direction/pop-directional-format direction/dir-non-spacing-mark direction/boundary-neutral direction/first-strong-isolate direction/left-to-right-isolate direction/right-to-left-isolate direction/pop-directional-isolate direction/char-direction-count char-combining-class char-mirror char-bidi-paired-bracket char->lower char->upper char->title ;; char-fold-case char-digit char-for-digit string-normalize char-mirrored? char-ualphabetic? char-ulowercase? char-uuppercase? char-uwhitespace? char-whitespace? char-java-space? char-space? char-blank? char-lower? char-upper? char-digit? char-alpha? char-alnum? char-xdigit? char-punct? char-graph? char-defined? char-cntrl? char-iso-control? char-print? char-base?) (import (except scheme char-alphabetic? char-whitespace?) (only chicken.module reexport) (only chicken.base when error) chicken.foreign (only srfi-13 string-null? string-upcase) (only srfi-1 take list-index) srfi-4 (only foreigners define-foreign-enum-type)) (reexport utf8) (foreign-declare "#include ") (foreign-declare "#include ") (define (char-from-name name) (let ((char ((foreign-lambda* char (((const c-string) name)) "UErrorCode status = U_ZERO_ERROR;" "UCharNameChoice choice = U_UNICODE_CHAR_NAME;" "UChar32 res = u_charFromName(choice, name, &status);" "if(U_SUCCESS(status)) {" " C_return(res);" "} else {" " C_return(0);" "}") (string-upcase name)))) (if (char=? #\null char) #f char))) (define (char-string-name char) (let ((name ((foreign-lambda* c-string ((int32 code)) "char dbuffer[150];" "UErrorCode status = U_ZERO_ERROR;" "UCharNameChoice choice = U_UNICODE_CHAR_NAME;" "int32_t len = u_charName(code, choice, dbuffer, 150, &status);" ;; "if(len == 0) {" ;; " status = U_ZERO_ERROR;" ;; " choice = U_CHAR_NAME_ALIAS;" ;; " len = u_charName(code, choice, dbuffer, 1, &status);" ;; ;; "printf(\"%d\\n\", len);" ;; "}" ;; "char* buffer = malloc(sizeof(char) * len);" ;; "status = U_ZERO_ERROR;" ;; "len = u_charName(code, choice, buffer, len, &status);" "C_return(dbuffer);") (char->integer char)))) (if (string-null? name) #f name))) (define (char-digit-value char) (let ((decimal ((foreign-lambda* int32 ((int32 code)) "C_return(u_charDigitValue(code));") (char->integer char)))) (if (= decimal -1) #f decimal))) ;; (define digit decimal) (define (char-decomposition char) (let* ((res (make-u16vector 16 0)) (len ((foreign-lambda* int32 ((int32 code) (u16vector dbuffer)) "UErrorCode status = U_ZERO_ERROR;" "UNormalizer2 const *norm2 = unorm2_getInstance(NULL, \"nfkc\", UNORM2_COMPOSE, &status);" "int32_t len = unorm2_getDecomposition(norm2, code, dbuffer, 16, &status);" "C_return(len);") (char->integer char) res))) (if (negative? len) #f (let ((lst (u16vector->list res))) (map integer->char (take lst (list-index zero? lst))))))) (define (char-numeric-value char) (let ((decimal ((foreign-lambda* double ((int32 code)) "C_return(u_getNumericValue(code));") (char->integer char)))) (if (= decimal (foreign-value U_NO_NUMERIC_VALUE double)) #f decimal))) (define-foreign-enum-type (category int) (category->integer integer->category) ((category/unassigned U_UNASSIGNED) U_UNASSIGNED) ((category/uppercase-letter U_UPPERCASE_LETTER) U_UPPERCASE_LETTER) ((category/lowercase-letter U_LOWERCASE_LETTER) U_LOWERCASE_LETTER) ((category/titlecase-letter U_TITLECASE_LETTER) U_TITLECASE_LETTER) ((category/modifier-letter U_MODIFIER_LETTER) U_MODIFIER_LETTER) ((category/other-letter U_OTHER_LETTER) U_OTHER_LETTER) ((category/non-spacing-mark U_NON_SPACING_MARK) U_NON_SPACING_MARK) ((category/enclosing-mark U_ENCLOSING_MARK) U_ENCLOSING_MARK) ((category/combining-spacing-mark U_COMBINING_SPACING_MARK) U_COMBINING_SPACING_MARK) ((category/decimal-digit-number U_DECIMAL_DIGIT_NUMBER) U_DECIMAL_DIGIT_NUMBER) ((category/letter-number U_LETTER_NUMBER) U_LETTER_NUMBER) ((category/other-number U_OTHER_NUMBER) U_OTHER_NUMBER) ((category/space-separator U_SPACE_SEPARATOR) U_SPACE_SEPARATOR) ((category/line-separator U_LINE_SEPARATOR) U_LINE_SEPARATOR) ((category/paragraph-separator U_PARAGRAPH_SEPARATOR) U_PARAGRAPH_SEPARATOR) ((category/control-char U_CONTROL_CHAR) U_CONTROL_CHAR) ((category/format-char U_FORMAT_CHAR) U_FORMAT_CHAR) ((category/private-use-char U_PRIVATE_USE_CHAR) U_PRIVATE_USE_CHAR) ((category/surrogate U_SURROGATE) U_SURROGATE) ((category/dash-punctuation U_DASH_PUNCTUATION) U_DASH_PUNCTUATION) ((category/start-punctuation U_START_PUNCTUATION) U_START_PUNCTUATION) ((category/end-punctuation U_END_PUNCTUATION) U_END_PUNCTUATION) ((category/connector-punctuation U_CONNECTOR_PUNCTUATION) U_CONNECTOR_PUNCTUATION) ((category/other-punctuation U_OTHER_PUNCTUATION) U_OTHER_PUNCTUATION) ((category/math-symbol U_MATH_SYMBOL) U_MATH_SYMBOL) ((category/currency-symbol U_CURRENCY_SYMBOL) U_CURRENCY_SYMBOL) ((category/modifier-symbol U_MODIFIER_SYMBOL) U_MODIFIER_SYMBOL) ((category/other-symbol U_OTHER_SYMBOL) U_OTHER_SYMBOL) ((category/initial-punctuation U_INITIAL_PUNCTUATION) U_INITIAL_PUNCTUATION) ((category/final-punctuation U_FINAL_PUNCTUATION) U_FINAL_PUNCTUATION) ((category/char-category-count U_CHAR_CATEGORY_COUNT) U_CHAR_CATEGORY_COUNT)) (define category/unassigned U_UNASSIGNED) (define category/uppercase-letter U_UPPERCASE_LETTER) (define category/lowercase-letter U_LOWERCASE_LETTER) (define category/titlecase-letter U_TITLECASE_LETTER) (define category/modifier-letter U_MODIFIER_LETTER) (define category/other-letter U_OTHER_LETTER) (define category/non-spacing-mark U_NON_SPACING_MARK) (define category/enclosing-mark U_ENCLOSING_MARK) (define category/combining-spacing-mark U_COMBINING_SPACING_MARK) (define category/decimal-digit-number U_DECIMAL_DIGIT_NUMBER) (define category/letter-number U_LETTER_NUMBER) (define category/other-number U_OTHER_NUMBER) (define category/space-separator U_SPACE_SEPARATOR) (define category/line-separator U_LINE_SEPARATOR) (define category/paragraph-separator U_PARAGRAPH_SEPARATOR) (define category/control-char U_CONTROL_CHAR) (define category/format-char U_FORMAT_CHAR) (define category/private-use-char U_PRIVATE_USE_CHAR) (define category/surrogate U_SURROGATE) (define category/dash-punctuation U_DASH_PUNCTUATION) (define category/start-punctuation U_START_PUNCTUATION) (define category/end-punctuation U_END_PUNCTUATION) (define category/connector-punctuation U_CONNECTOR_PUNCTUATION) (define category/other-punctuation U_OTHER_PUNCTUATION) (define category/math-symbol U_MATH_SYMBOL) (define category/currency-symbol U_CURRENCY_SYMBOL) (define category/modifier-symbol U_MODIFIER_SYMBOL) (define category/other-symbol U_OTHER_SYMBOL) (define category/initial-punctuation U_INITIAL_PUNCTUATION) (define category/final-punctuation U_FINAL_PUNCTUATION) (define category/char-category-count U_CHAR_CATEGORY_COUNT) (define (char-category char) ((foreign-lambda* int ((int32 code)) "C_return(u_charType(code));") (char->integer char))) (define-foreign-enum-type (direction int) (direction->integer integer->direction) ((direction/left-to-right U_LEFT_TO_RIGHT) U_LEFT_TO_RIGHT) ((direction/right-to-left U_RIGHT_TO_LEFT) U_RIGHT_TO_LEFT) ((direction/european-number U_EUROPEAN_NUMBER) U_EUROPEAN_NUMBER) ((direction/european-number-separator U_EUROPEAN_NUMBER_SEPARATOR) U_EUROPEAN_NUMBER_SEPARATOR) ((direction/european-number-terminator U_EUROPEAN_NUMBER_TERMINATOR) U_EUROPEAN_NUMBER_TERMINATOR) ((direction/arabic-number U_ARABIC_NUMBER) U_ARABIC_NUMBER) ((direction/common-number-separator U_COMMON_NUMBER_SEPARATOR) U_COMMON_NUMBER_SEPARATOR) ((direction/block-separator U_BLOCK_SEPARATOR) U_BLOCK_SEPARATOR) ((direction/segment-separator U_SEGMENT_SEPARATOR) U_SEGMENT_SEPARATOR) ((direction/white-space-neutral U_WHITE_SPACE_NEUTRAL) U_WHITE_SPACE_NEUTRAL) ((direction/other-neutral U_OTHER_NEUTRAL) U_OTHER_NEUTRAL) ((direction/left-to-right-embedding U_LEFT_TO_RIGHT_EMBEDDING) U_LEFT_TO_RIGHT_EMBEDDING) ((direction/left-to-right-override U_LEFT_TO_RIGHT_OVERRIDE) U_LEFT_TO_RIGHT_OVERRIDE) ((direction/right-to-left-arabic U_RIGHT_TO_LEFT_ARABIC) U_RIGHT_TO_LEFT_ARABIC) ((direction/right-to-left-embedding U_RIGHT_TO_LEFT_EMBEDDING) U_RIGHT_TO_LEFT_EMBEDDING) ((direction/right-to-left-override U_RIGHT_TO_LEFT_OVERRIDE) U_RIGHT_TO_LEFT_OVERRIDE) ((direction/pop-directional-format U_POP_DIRECTIONAL_FORMAT) U_POP_DIRECTIONAL_FORMAT) ((direction/dir-non-spacing-mark U_DIR_NON_SPACING_MARK) U_DIR_NON_SPACING_MARK) ((direction/boundary-neutral U_BOUNDARY_NEUTRAL) U_BOUNDARY_NEUTRAL) ((direction/first-strong-isolate U_FIRST_STRONG_ISOLATE) U_FIRST_STRONG_ISOLATE) ((direction/left-to-right-isolate U_LEFT_TO_RIGHT_ISOLATE) U_LEFT_TO_RIGHT_ISOLATE) ((direction/right-to-left-isolate U_RIGHT_TO_LEFT_ISOLATE) U_RIGHT_TO_LEFT_ISOLATE) ((direction/pop-directional-isolate U_POP_DIRECTIONAL_ISOLATE) U_POP_DIRECTIONAL_ISOLATE) ((direction/char-direction-count U_CHAR_DIRECTION_COUNT) U_CHAR_DIRECTION_COUNT)) (define direction/left-to-right U_LEFT_TO_RIGHT) (define direction/right-to-left U_RIGHT_TO_LEFT) (define direction/european-number U_EUROPEAN_NUMBER) (define direction/european-number-separator U_EUROPEAN_NUMBER_SEPARATOR) (define direction/european-number-terminator U_EUROPEAN_NUMBER_TERMINATOR) (define direction/arabic-number U_ARABIC_NUMBER) (define direction/common-number-separator U_COMMON_NUMBER_SEPARATOR) (define direction/block-separator U_BLOCK_SEPARATOR) (define direction/segment-separator U_SEGMENT_SEPARATOR) (define direction/white-space-neutral U_WHITE_SPACE_NEUTRAL) (define direction/other-neutral U_OTHER_NEUTRAL) (define direction/left-to-right-embedding U_LEFT_TO_RIGHT_EMBEDDING) (define direction/left-to-right-override U_LEFT_TO_RIGHT_OVERRIDE) (define direction/right-to-left-arabic U_RIGHT_TO_LEFT_ARABIC) (define direction/right-to-left-embedding U_RIGHT_TO_LEFT_EMBEDDING) (define direction/right-to-left-override U_RIGHT_TO_LEFT_OVERRIDE) (define direction/pop-directional-format U_POP_DIRECTIONAL_FORMAT) (define direction/dir-non-spacing-mark U_DIR_NON_SPACING_MARK) (define direction/boundary-neutral U_BOUNDARY_NEUTRAL) (define direction/first-strong-isolate U_FIRST_STRONG_ISOLATE) (define direction/left-to-right-isolate U_LEFT_TO_RIGHT_ISOLATE) (define direction/right-to-left-isolate U_RIGHT_TO_LEFT_ISOLATE) (define direction/pop-directional-isolate U_POP_DIRECTIONAL_ISOLATE) (define direction/char-direction-count U_CHAR_DIRECTION_COUNT) (define (char-direction char) ((foreign-lambda* int ((int32 code)) "C_return(u_charDirection(code));") (char->integer char))) (define (char-combining-class char) ((foreign-lambda* unsigned-byte ((int32 code)) "C_return(u_getCombiningClass(code));") (char->integer char))) (define (char-mirror char) ((foreign-lambda* char ((int32 code)) "C_return(u_charMirror(code));") (char->integer char))) (define (char-bidi-paired-bracket char) ((foreign-lambda* char ((int32 code)) "C_return(u_getBidiPairedBracket(code));") (char->integer char))) (define (char->lower char) ((foreign-lambda* char ((int32 code)) "C_return(u_tolower(code));") (char->integer char))) (define (char->upper char) ((foreign-lambda* char ((int32 code)) "C_return(u_toupper(code));") (char->integer char))) (define (char->title char) ((foreign-lambda* char ((int32 code)) "C_return(u_totitle(code));") (char->integer char))) ;; (define (char-fold-case char) ;; ((foreign-lambda* char ((int32 code)) ;; "C_return(u_foldCase(code));") ;; (char->integer char))) (define (char-digit char radix) ((foreign-lambda* int32 ((int32 code) (byte radix)) "C_return(u_digit(code, radix));") (char->integer char) radix)) (define char-for-digit (foreign-lambda* char ((int32 digit) (byte radix)) "C_return(u_forDigit(digit, radix));")) (define (string-normalize input #!optional (form "nfkc")) (when (not (member form '("nfkc" "nfc" "nfkd" "nfd"))) (error 'normalize "no such normalization form" form)) (let* ((input-len (string-length input)) (input (list->u16vector (map char->integer (string->list input)))) (mode (if (or (string=? form "nfc") (string=? form "nfkc")) (foreign-value UNORM2_COMPOSE int) (foreign-value UNORM2_DECOMPOSE int))) (form (cond ((string=? form "nfd") "nfc") ((string=? form "nfkd") "nfkc") (else form))) (output-len ((foreign-lambda* int32 ((c-string form) (u16vector input) (int32 input_len) (int mode)) "UErrorCode status = U_ZERO_ERROR;" "UNormalizer2 const *norm2 = unorm2_getInstance(NULL, form, mode, &status);" "UChar dest[1];" "int32_t output_len = unorm2_normalize(norm2, input, input_len, dest, 1, &status);" "C_return(output_len);") form input input-len mode)) (output (make-u16vector output-len 0))) ((foreign-lambda* void ((c-string form) (u16vector input) (int32 input_len) (u16vector output) (int32 output_len) (int mode) ) "UErrorCode status = U_ZERO_ERROR;" "UNormalizer2 const *norm2 = unorm2_getInstance(NULL, form, mode, &status);" "int32_t len = unorm2_normalize(norm2, input, input_len, output, output_len, &status);") form input input-len output output-len mode) (list->string (map integer->char (u16vector->list output))))) (define (char-mirrored? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isMirrored(code));") (char->integer char))) (define (char-ualphabetic? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isUAlphabetic(code));") (char->integer char))) (define (char-ulowercase? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isULowercase(code));") (char->integer char))) (define (char-uuppercase? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isUUppercase(code));") (char->integer char))) ;; Whitespace (define (char-uwhitespace? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isUWhiteSpace(code));") (char->integer char))) (define (char-whitespace? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isWhitespace(code));") (char->integer char))) (define (char-java-space? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isJavaSpaceChar(code));") (char->integer char))) (define (char-space? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isspace(code));") (char->integer char))) (define (char-blank? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isblank(code));") (char->integer char))) (define (char-lower? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_islower(code));") (char->integer char))) (define (char-upper? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isupper(code));") (char->integer char))) (define (char-digit? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isdigit(code));") (char->integer char))) (define (char-alpha? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isalpha(code));") (char->integer char))) (define (char-alnum? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isalnum(code));") (char->integer char))) (define (char-xdigit? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isxdigit(code));") (char->integer char))) (define (char-punct? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_ispunct(code));") (char->integer char))) (define (char-graph? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isgraph(code));") (char->integer char))) (define (char-defined? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isdefined(code));") (char->integer char))) (define (char-cntrl? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_iscntrl(code));") (char->integer char))) (define (char-iso-control? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isISOControl(code));") (char->integer char))) (define (char-print? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isprint(code));") (char->integer char))) (define (char-base? char) ((foreign-lambda* bool ((int32 code)) "C_return(u_isbase(code));") (char->integer char))) ;; (define (string-normalized? form input) ;; ) ;; (print (char-alphabetic? #\x) ;; ) )