;;;; This is free software. See LICENSE for copyright information. (define-record-type (cursor offset) string-cursor? (offset cursor-offset : fixnum)) (define-type cursor (struct )) (define-type cursdex (or cursor fixnum)) ;;;; Cursor/index utility (: index-or-cursor? (* --> boolean)) (define (index-or-cursor? x) (or (string-cursor? x) (exact-integer? x))) ;; Calls 'proc' on the string 's' and two offset arguments defining ;; a valid range of 's'. These are drawn from 'opt', which can give ;; up to two of them as cursors or indices. (: call-with-optional-range (symbol string list procedure -> *)) (define (call-with-optional-range loc s opt proc) (let-values (((start end) (case (length opt) ((0) (values 0 (string-length s))) ((1) (assert-type loc (index-or-cursor? (car opt))) (values (%offsetify loc s (car opt)) (string-length s))) ((2) (let ((a (car opt)) (b (cadr opt))) (assert-type loc (index-or-cursor? a)) (assert-type loc (index-or-cursor? b)) (%quick-check-range loc s a b) (values (%offsetify loc s a) (%offsetify loc s b)))) (else (arity-exception loc opt))))) (proc s start end))) ;;; Cursor <-/-> index conversions. This are tuned to use the ;;; utf8-lolevel conversions rather than the (type-checking) exported ;;; forms. ;; Convert an index or cursor on 's' into an offset into s. (: %offsetify (symbol string cursdex --> fixnum)) (define (%offsetify loc s c-or-i) (cond ((string-cursor? c-or-i) (%check-cursor loc s c-or-i) (cursor-offset c-or-i)) (else (handle-exceptions junk (bounds-exception loc "invalid index" s c-or-i) (utf8-index->offset s c-or-i))))) ;; Convert an index or cursor on 's' into an index into s. (: %indexify (symbol string cursdex --> fixnum)) (define (%indexify loc s c-or-i) (cond ((string-cursor? c-or-i) (handle-exceptions junk (bounds-exception loc "invalid cursor" s c-or-i) (utf8-offset->index s (cursor-offset c-or-i)))) ((exact-integer? c-or-i) c-or-i) (else (type-exception loc "not an index or cursor" c-or-i)))) ;; Convert an index or cursor on 's' to a cursor into 's'. (: %cursorify (symbol string cursdex --> cursor)) (define (%cursorify loc s c-or-i) (cond ((string-cursor? c-or-i) c-or-i) ((exact-integer? c-or-i) (handle-exceptions junk (bounds-exception loc "invalid index" s c-or-i) (cursor (utf8-index->offset s c-or-i)))) (else (type-exception loc "not an index or cursor" c-or-i)))) ;;;; Checks & exceptions ;; Ensure cursor 'curs' is valid for 's'. This doesn't even come ;; close to dealing with all of the ways in which cursors can be ;; misused, but at least it's fast. (define (%check-cursor loc s curs) (unless (<= 0 (cursor-offset curs) (string-length s)) (bounds-exception loc "invalid cursor" curs s))) ;; If start and end are cursors, ensure that they describe a ;; valid range of s. If they're both indices, then a basic sanity ;; check is performed. ;; ;; This avoids the O(n) index->offset conversion, and thus only ;; catches cursor-cursor mistakes reliably. (: %quick-check-range (symbol string cursdex cursdex -> undefined)) (define (%quick-check-range loc s start end) (cond ((and (string-cursor? start) (string-cursor? end)) (let ((a (cursor-offset start)) (b (cursor-offset end))) (unless (<= 0 a b (string-length s)) (bounds-exception loc "invalid range" s start end)))) ((and (fixnum? start) (fixnum? end)) (unless (<= 0 start end) ; can't check upper bound quickly (bounds-exception loc "invalid range" s start end))))) ;;;; Cursor operations (: string-cursor-start (string --> cursor)) (define (string-cursor-start s) (assert-type 'string-cursor-start (string? s)) (cursor 0)) ;; string-length is expected to produce the number of bytes in s. (: string-cursor-end (string --> cursor)) (define (string-cursor-end s) (assert-type 'string-cursor-end (string? s)) (cursor (string-length s))) (: string-cursor-next (string cursor --> cursor)) (define (string-cursor-next s curs) (assert-type 'string-cursor-next (string? s)) (assert-type 'string-cursor-next (string-cursor? curs)) (cond ((utf8-next-char s (cursor-offset curs)) => cursor) (else (bounds-exception 'string-cursor-next "invalid cursor" curs s)))) (: string-cursor-prev (string cursor --> cursor)) (define (string-cursor-prev s curs) (assert-type 'string-cursor-prev (string? s)) (assert-type 'string-cursor-prev (string-cursor? curs)) (cond ((utf8-prev-char s (cursor-offset curs)) => cursor) (else (bounds-exception 'string-cursor-prev "invalid cursor" curs s)))) (: string-cursor-forward (string cursor fixnum --> cursor)) (define (string-cursor-forward s curs n) (assert-type 'string-cursor-forward (string? s)) (assert-type 'string-cursor-forward (string-cursor? curs)) (assert-type 'string-cursor-forward (exact-natural? n)) (let lp ((off (cursor-offset curs)) (n n)) (if (zero? n) (cursor off) (lp (utf8-next-char s off) (- n 1))))) (: string-cursor-back (string cursor fixnum --> cursor)) (define (string-cursor-back s curs n) (assert-type 'string-cursor-back (string? s)) (assert-type 'string-cursor-back (string-cursor? curs)) (assert-type 'string-cursor-back (exact-natural? n)) (let lp ((off (cursor-offset curs)) (n n)) (if (zero? n) (cursor off) (lp (utf8-prev-char s off) (- n 1))))) (define-syntax define-cursor-comparison (syntax-rules () ((define-cursor-comparison name op) (begin (: name (cursor cursor -> boolean)) (define (name c1 c2) (assert-type 'name (string-cursor? c1)) (assert-type 'name (string-cursor? c2)) (op (cursor-offset c1) (cursor-offset c2))))))) (define-cursor-comparison string-cursor=? =) (define-cursor-comparison string-cursor? >) (define-cursor-comparison string-cursor<=? <=) (define-cursor-comparison string-cursor>=? >=) (: string-cursor-diff (string cursor cursor --> fixnum)) (define (string-cursor-diff s start end) (assert-type 'string-cursor-diff (string? s)) (assert-type 'string-cursor-diff (index-or-cursor? start)) (assert-type 'string-cursor-diff (index-or-cursor? end)) (%quick-check-range 'string-cursor-diff s start end) (- (string-cursor->index s end) (string-cursor->index s start))) (: string-cursor->index (string cursor --> fixnum)) (define (string-cursor->index s curs) (assert-type 'string-cursor->index (string? s)) (assert-type 'string-cursor->index (string-cursor? curs)) (%check-cursor 'string-cursor->index s curs) (utf8-offset->index s (cursor-offset curs))) (: string-index->cursor (string fixnum --> cursor)) (define (string-index->cursor s i) (assert-type 'string-index->cursor (string? s)) (assert-type 'string-index->cursor (fixnum? i)) (cursor (handle-exceptions junk (bounds-exception 'string-index->cursor "invalid index" s i) (utf8-index->offset s i)))) ;;;; Predicates (: string-null? (string -> boolean)) (define (string-null? s) (assert-type 'string-null? (string? s)) (string=? s "")) (: string-every (procedure string #!optional fixnum fixnum -> *)) (define (string-every pred s . opt) (assert-type 'string-every (procedure? pred)) (assert-type 'string-every (string? s)) (call-with-current-continuation (lambda (return) (apply %string-fold 'string-every (lambda (c junk) (or (pred c) (return #f))) #t s opt)))) (: string-any (procedure string #!optional fixnum fixnum -> *)) (define (string-any pred s . opt) (assert-type 'string-any (procedure? pred)) (assert-type 'string-any (string? s)) (call-with-current-continuation (lambda (return) (apply %string-fold 'string-any (lambda (c junk) (cond ((pred c) => return) (else #f))) #f s opt)))) ;;;; Constructors (: string-tabulate (procedure fixnum -> string)) (define (string-tabulate proc len) (assert-type 'string-tabulate (procedure? proc)) (assert-type 'string-tabulate (fixnum? len)) (utf8:string-tabulate proc len)) (: string-unfold (procedure procedure procedure * #!optional string procedure -> string)) (define string-unfold (case-lambda ((p f g seed) (string-unfold p f g seed "" (constantly ""))) ((p f g seed base) (string-unfold p f g seed base (constantly ""))) ((p f g seed base make-final) (assert-type 'string-unfold (procedure? p)) (assert-type 'string-unfold (procedure? f)) (assert-type 'string-unfold (procedure? g)) (assert-type 'string-unfold (string? base)) (assert-type 'string-unfold (procedure? make-final)) (utf8:string-unfold p f g seed base make-final)))) (: string-unfold-right (procedure procedure procedure * #!optional string procedure -> string)) (define string-unfold-right (case-lambda ((p f g seed) (string-unfold-right p f g seed "" (constantly ""))) ((p f g seed base) (string-unfold-right p f g seed base (constantly ""))) ((p f g seed base make-final) (assert-type 'string-unfold-right (procedure? p)) (assert-type 'string-unfold-right (procedure? f)) (assert-type 'string-unfold-right (procedure? g)) (assert-type 'string-unfold-right (string? base)) (assert-type 'string-unfold-right (procedure? make-final)) (utf8:string-unfold-right p f g seed base make-final)))) ;;;; Conversion (: string->list/cursors (string #!optional cursdex cursdex -> (list-of char))) (define (string->list/cursors s . opt) (assert-type 'string->list/cursors (string? s)) (apply %string-fold-right 'string->list/cursors cons '() s opt)) (: string->vector/cursors (string #!optional cursdex cursdex -> (vector-of char))) (define (string->vector/cursors s . opt) (assert-type 'string->vector/cursors (string? s)) (call-with-optional-range 'string->vector/cursors s opt (lambda (s start end) (let* ((len (- (utf8-offset->index s end) (utf8-offset->index s start))) (vec (make-vector len))) (let lp ((i 0) (b start)) (cond ((>= i len) vec) (else (vector-set! vec i (sp-ref s b)) (lp (+ i 1) (utf8-next-char s b))))))))) (: reverse-list->string ((list-of char) -> string)) (define (reverse-list->string lis) (assert-type 'reverse-list->string (pair-or-null? lis)) (string-unfold-right null? car cdr lis)) (: string-join ((list-of string) #!optional string symbol -> string)) (define string-join (case-lambda ((ss) (string-join ss " " 'infix)) ((ss delim) (string-join ss delim 'infix)) ((ss delim gram) (assert-type 'string-join (pair-or-null? ss)) (assert-type 'string-join (string? delim)) (assert-type 'string-join (symbol? gram)) (unless (memv gram '(infix strict-infix suffix prefix)) (error 'string-join "invalid grammar" gram)) (and (null? ss) (eqv? gram 'strict-infix) (error 'string-join "empty list with strict-infix grammar")) (utf8:string-join ss delim gram)))) ;;;; Selection ;; With a cursor argument this should be O(1), a major advantage of ;; cursors containing offsets. (: string-ref/cursor (string cursdex --> char)) (define (string-ref/cursor s cdex) (assert-type 'string-ref/cursor (string? s)) (assert-type 'string-ref/cursor (index-or-cursor? cdex)) (sp-ref s (%offsetify 'string-ref/cursor s cdex))) (: substring/cursors (string cursdex cursdex -> string)) (define (substring/cursors s start end) (assert-type 'substring/cursors (string? s)) (assert-type 'substring/cursors (index-or-cursor? start)) (assert-type 'substring/cursors (index-or-cursor? end)) (%quick-check-range 'substring/cursors s start end) (substring s (%offsetify 'substring/cursors s start) (%offsetify 'substring/cursors s end))) ;; Needs byte-wise string-copy (: string-copy/cursors (string #!optional cursdex cursdex -> string)) (define (string-copy/cursors s . opt) (assert-type 'string-copy/cursors (string? s)) (call-with-optional-range 'string-copy/cursors s opt byte:string-copy)) ;;; Since the utf8 egg doesn't localize the error when a short string ;;; is passed to the take/drop forms, we raise our own exception. (: string-take (string fixnum -> string)) (define (string-take s nchars) (assert-type 'string-take (string? s)) (assert-type 'string-take (fixnum? nchars)) (handle-exceptions con (if ((condition-predicate 'bounds) con) (bounds-exception 'string-take "length out of range" nchars s) (signal con)) (utf8:string-take s nchars))) (: string-take-right (string fixnum -> string)) (define (string-take-right s nchars) (assert-type 'string-take-right (string? s)) (assert-type 'string-take-right (fixnum? nchars)) (handle-exceptions con (if ((condition-predicate 'bounds) con) (bounds-exception 'string-take-right "length out of range" nchars s) (signal con)) (utf8:string-take-right s nchars))) (: string-drop (string fixnum -> string)) (define (string-drop s nchars) (assert-type 'string-drop (string? s)) (assert-type 'string-drop (fixnum? nchars)) (handle-exceptions con (if ((condition-predicate 'bounds) con) (bounds-exception 'string-drop "length out of range" nchars s) (signal con)) (utf8:string-drop s nchars))) (: string-drop-right (string fixnum -> string)) (define (string-drop-right s nchars) (assert-type 'string-drop-right (string? s)) (assert-type 'string-drop-right (fixnum? nchars)) (handle-exceptions con (if ((condition-predicate 'bounds) con) (bounds-exception 'string-drop-right "length out of range" nchars s) (signal con)) (utf8:string-drop-right s nchars))) ;;; Pad & pad-right ;;; In these cases, it's easier to work directly with indices, ;;; so it's more efficient to wrap the utf8-srfi-13 procedures. (: string-pad (string fixnum #!optional char cursdex cursdex -> string)) (define string-pad (case-lambda ((s len) (string-pad s len #\space 0 (utf8:string-length s))) ((s len char) (string-pad s len char 0 (utf8:string-length s))) ((s len char start) (string-pad s len char start (utf8:string-length s))) ((s len char start end) (assert-type 'string-pad (string? s)) (assert-type 'string-pad (fixnum? len)) (assert-type 'string-pad (char? char)) (assert-type 'string-pad (index-or-cursor? start)) (assert-type 'string-pad (index-or-cursor? end)) (%quick-check-range 'string-pad s start end) (let ((start-idx (%indexify 'string-pad s start)) (end-idx (%indexify 'string-pad s end))) (utf8:string-pad s len char start-idx end-idx))))) (: string-pad-right (string fixnum #!optional char cursdex cursdex -> string)) (define string-pad-right (case-lambda ((s len) (string-pad-right s len #\space 0 (utf8:string-length s))) ((s len char) (string-pad-right s len char 0 (utf8:string-length s))) ((s len char start) (string-pad-right s len char start (utf8:string-length s))) ((s len char start end) (assert-type 'string-pad-right (string? s)) (assert-type 'string-pad-right (fixnum? len)) (assert-type 'string-pad-right (char? char)) (assert-type 'string-pad-right (index-or-cursor? start)) (assert-type 'string-pad-right (index-or-cursor? end)) (%quick-check-range 'string-pad-right s start end) (let ((start-idx (%indexify 'string-pad-right s start)) (end-idx (%indexify 'string-pad-right s end))) (utf8:string-pad-right s len char start-idx end-idx))))) ;;; Trim & trim-right ;; Skips over all characters on the left of 's' that satisfy 'pred' ;; and returns the offset of the first that doesn't. (: %trim-left-offset (string procedure fixnum fixnum --> fixnum)) (define (%trim-left-offset s pred start end) (let lp ((i start)) (if (or (>= i end) (not (pred (sp-ref s i)))) (min i end) (lp (sp-next s i))))) ;; Skips over all characters on the right of 's' that satisfy 'pred' ;; and returns the offset one codepoint beyond the first that doesn't. (: %trim-right-offset (string procedure fixnum fixnum --> fixnum)) (define (%trim-right-offset s pred start end) (let lp ((i (sp-prev s end)) (j end)) (if (or (negative? i) (not (pred (sp-ref s i)))) (max start j) (lp (sp-prev s i) i)))) (: string-trim (string procedure #!optional char cursdex cursdex -> string)) (define string-trim (case-lambda ((s) (string-trim s char-whitespace?)) ((s pred . opt) (assert-type 'string-trim (string? s)) (assert-type 'string-trim (procedure? pred)) (call-with-optional-range 'string-trim s opt (lambda (s start end) (substring s (%trim-left-offset s pred start end) end)))))) (: string-trim-right (string procedure #!optional char cursdex cursdex -> string)) (define string-trim-right (case-lambda ((s) (string-trim-right s char-whitespace?)) ((s pred . opt) (assert-type 'string-trim-right (string? s)) (assert-type 'string-trim-right (procedure? pred)) (call-with-optional-range 'string-trim-right s opt (lambda (s start end) (substring s start (%trim-right-offset s pred start end))))))) (: string-trim-both (string procedure #!optional char cursdex cursdex -> string)) (define string-trim-both (case-lambda ((s) (string-trim-both s char-whitespace?)) ((s pred . opt) (assert-type 'string-trim-both (string? s)) (assert-type 'string-trim-both (procedure? pred)) (call-with-optional-range 'string-trim-both s opt (lambda (s start end) (let ((l-off (%trim-left-offset s pred start end))) (substring s l-off (%trim-right-offset s pred l-off end)))))))) ;;;; Prefixes & suffixes ;; All offset arguments are assumed valid. (: %string-prefix-length/offsets (string fixnum fixnum string fixnum fixnum -> fixnum)) (define (%string-prefix-length/offsets s1 start1 end1 s2 start2 end2) (let* ((start1-idx (utf8-offset->index s1 start1)) (delta (min (- (utf8-offset->index s1 end1) start1-idx) (- (utf8-offset->index s2 end2) (utf8-offset->index s2 start2)))) (limit (utf8-index->offset s1 (+ start1-idx delta)))) (if (and (eq? s1 s2) (= start1 start2)) delta (let lp ((i start1) (j start2) (n start1-idx)) (if (or (>= i limit) (not (char=? (sp-ref s1 i) (sp-ref s2 j)))) (- n start1-idx) (lp (utf8-next-char s1 i) (utf8-next-char s2 j) (+ n 1))))))) (: string-prefix-length (string string #!optional cursdex cursdex cursdex cursdex -> fixnum)) (define (string-prefix-length s1 s2 . opt) (define (->off s x) (%offsetify 'string-prefix-length s x)) (unless (< (length opt) 5) (arity-exception 'string-prefix-length opt)) (let-optionals* opt ((start1 (string-cursor-start s1)) (end1 (string-cursor-end s1)) (start2 (string-cursor-start s2)) (end2 (string-cursor-end s2))) (assert-type 'string-prefix-length (string? s1)) (assert-type 'string-prefix-length (string? s2)) (assert-type 'string-prefix-length (index-or-cursor? start1)) (assert-type 'string-prefix-length (index-or-cursor? end1)) (assert-type 'string-prefix-length (index-or-cursor? start2)) (assert-type 'string-prefix-length (index-or-cursor? end2)) (%quick-check-range 'string-prefix-length s1 start1 end1) (%quick-check-range 'string-prefix-length s2 start2 end2) (%string-prefix-length/offsets s1 (->off s1 start1) (->off s1 end1) s2 (->off s2 start2) (->off s2 end2)))) (: string-prefix? (string string #!optional cursdex cursdex cursdex cursdex -> boolean)) (define (string-prefix? s1 s2 . opt) (define (->off s x) (%offsetify 'string-prefix? s x)) (define (->idx s x) (%indexify 'string-prefix? s x)) (unless (< (length opt) 5) (arity-exception 'string-prefix? opt)) (let-optionals* opt ((start1 (string-cursor-start s1)) (end1 (string-cursor-end s1)) (start2 (string-cursor-start s2)) (end2 (string-cursor-end s2))) (assert-type 'string-prefix? (string? s1)) (assert-type 'string-prefix? (string? s2)) (assert-type 'string-prefix? (index-or-cursor? start1)) (assert-type 'string-prefix? (index-or-cursor? end1)) (assert-type 'string-prefix? (index-or-cursor? start2)) (assert-type 'string-prefix? (index-or-cursor? end2)) (%quick-check-range 'string-prefix? s1 start1 end1) (%quick-check-range 'string-prefix? s2 start2 end2) (let ((len1 (- (->idx s1 end1) (->idx s1 start1)))) ;; Try to eliminate strings with incompatible lengths. (and (<= len1 (- (->idx s2 end2) (->idx s2 start2))) (= len1 (%string-prefix-length/offsets s1 (->off s1 start1) (->off s1 end1) s2 (->off s2 start2) (->off s2 end2))))))) ;; All offset arguments are assumed valid. (: %string-suffix-length/offsets (string fixnum fixnum string fixnum fixnum -> fixnum)) (define (%string-suffix-length/offsets s1 start1 end1 s2 start2 end2) ;; Workaround (define (utf8-prev/min s off) (or (utf8-prev-char s off) -1)) (let* ((end1-idx (utf8-offset->index s1 end1)) (delta (min (- end1-idx (utf8-offset->index s1 start1)) (- (utf8-offset->index s2 end2) (utf8-offset->index s2 start2)))) (base (utf8-index->offset s1 (- end1-idx delta)))) (if (and (eq? s1 s2) (= end1 end2)) delta (let lp ((i (utf8-prev/min s1 end1)) (j (utf8-prev/min s2 end2)) (n (- end1-idx 1))) (if (or (< i base) (not (char=? (sp-ref s1 i) (sp-ref s2 j)))) (- (- end1-idx n) 1) (lp (utf8-prev/min s1 i) (utf8-prev/min s2 j) (- n 1))))))) (: string-suffix-length (string string #!optional cursdex cursdex cursdex cursdex -> fixnum)) (define (string-suffix-length s1 s2 . opt) (define (->off s x) (%offsetify 'string-suffix-length s x)) (unless (< (length opt) 5) (arity-exception 'string-suffix-length opt)) (let-optionals* opt ((start1 (string-cursor-start s1)) (end1 (string-cursor-end s1)) (start2 (string-cursor-start s2)) (end2 (string-cursor-end s2))) (assert-type 'string-suffix-length (string? s1)) (assert-type 'string-suffix-length (string? s2)) (assert-type 'string-suffix-length (index-or-cursor? start1)) (assert-type 'string-suffix-length (index-or-cursor? end1)) (assert-type 'string-suffix-length (index-or-cursor? start2)) (assert-type 'string-suffix-length (index-or-cursor? end2)) (%quick-check-range 'string-suffix-length s1 start1 end1) (%quick-check-range 'string-suffix-length s2 start2 end2) (%string-suffix-length/offsets s1 (->off s1 start1) (->off s1 end1) s2 (->off s2 start2) (->off s2 end2)))) (: string-suffix? (string string #!optional cursdex cursdex cursdex cursdex -> boolean)) (define (string-suffix? s1 s2 . opt) (define (->off s x) (%offsetify 'string-suffix? s x)) (define (->index s x) (%indexify 'string-suffix? s x)) (unless (< (length opt) 5) (arity-exception 'string-suffix? opt)) (let-optionals* opt ((start1 (string-cursor-start s1)) (end1 (string-cursor-end s1)) (start2 (string-cursor-start s2)) (end2 (string-cursor-end s2))) (assert-type 'string-suffix? (string? s1)) (assert-type 'string-suffix? (string? s2)) (assert-type 'string-suffix? (index-or-cursor? start1)) (assert-type 'string-suffix? (index-or-cursor? end1)) (assert-type 'string-suffix? (index-or-cursor? start2)) (assert-type 'string-suffix? (index-or-cursor? end2)) (%quick-check-range 'string-suffix? s1 start1 end1) (%quick-check-range 'string-suffix? s2 start2 end2) (let ((len1 (- (->index s1 end1) (->index s1 start1)))) ;; Try to eliminate strings with incompatible lengths. (and (<= len1 (- (->index s2 end2) (->index s2 start2))) (= len1 (%string-suffix-length/offsets s1 (->off s1 start1) (->off s1 end1) s2 (->off s2 start2) (->off s2 end2))))))) ;;;; Searching (: string-index (string procedure #!optional cursdex cursdex -> cursor)) (define (string-index s pred . opt) (assert-type 'string-index (string? s)) (assert-type 'string-index (procedure? pred)) (call-with-optional-range 'string-index s opt (lambda (s start end) (let lp ((b start)) (cond ((>= b end) (cursor end)) ; failure ((pred (sp-ref s b)) (cursor b)) (else (lp (utf8-next-char s b)))))))) (: string-index-right (string procedure #!optional cursdex cursdex -> cursor)) (define (string-index-right s pred . opt) (assert-type 'string-index-right (string? s)) (assert-type 'string-index-right (procedure? pred)) (call-with-optional-range 'string-index-right s opt (lambda (s start end) (let lp ((b (utf8-prev-char s end))) (cond ((or (not b) (< b start)) (cursor start)) ; failure ((pred (sp-ref s b)) (cursor b)) (else (lp (utf8-prev-char s b)))))))) (: string-skip (string procedure #!optional cursdex cursdex -> cursor)) (define (string-skip s pred . opt) (assert-type 'string-skip (string? s)) (assert-type 'string-skip (procedure? pred)) (call-with-optional-range 'string-skip s opt (lambda (s start end) (cursor (%trim-left-offset s pred start end))))) (: string-skip-right (string procedure #!optional cursdex cursdex -> cursor)) (define (string-skip-right s pred . opt) (assert-type 'string-skip-right (string? s)) (assert-type 'string-skip-right (procedure? pred)) (call-with-optional-range 'string-skip-right s opt (lambda (s start end) (cond ((utf8-prev-char s (%trim-right-offset s pred start end)) => cursor) (else (string-cursor-start s)))))) ;; Like the utf8 egg's version, this makes use of CHICKEN's very fast ;; substring-index instead of a nice algorithm. KMP searching is ;; probably the best way to go, but it'd be a project. (: string-contains (string string #!optional cursdex cursdex cursdex cursdex -> (or cursor false))) (define string-contains (case-lambda ((s1 s2) (assert-type 'string-contains (string? s1)) (assert-type 'string-contains (string? s2)) (cond ((substring-index s2 s1) => cursor) (else #f))) ((s1 s2 start1) (assert-type 'string-contains (string? s1)) (assert-type 'string-contains (string? s2)) (assert-type 'string-contains (index-or-cursor? start1)) (let ((off (%offsetify 'string-contains s1 start1))) (cond ((substring-index s2 s1 off) => cursor) (else #f)))) ((s1 s2 start1 end1 . opt) (define ->off (cut %offsetify 'string-contains <> <>)) (assert-type 'string-contains (string? s1)) (assert-type 'string-contains (string? s2)) (let-optionals opt ((start2 (string-cursor-start s2)) (end2 (string-cursor-end s2))) (assert-type 'string-contains (index-or-cursor? start1)) (assert-type 'string-contains (index-or-cursor? end1)) (assert-type 'string-contains (index-or-cursor? start2)) (assert-type 'string-contains (index-or-cursor? end2)) (%quick-check-range 'string-contains s1 start1 end1) (%quick-check-range 'string-contains s2 start2 end2) (let ((start1-off (->off s1 start1)) (s2 (if (null? opt) s2 (substring s2 (->off s2 start2) (->off s2 end2))))) (cond ((substring-index s2 (substring s1 start1-off (->off s1 end1))) => (lambda (off) (cursor (+ off start1-off)))) (else #f))))))) ;; This could be made cleaner, but, for now, leaning on substring-index ;; (as most implementations do) works well enough. ;; I wonder if there's even been a Scheme search-right implementation ;; that *didn't* cut corners? (: string-contains-right (string string #!optional cursdex cursdex cursdex cursdex -> (or cursor false))) (define string-contains-right (case-lambda ((s1 s2) (string-contains-right s1 s2 (string-cursor-start s1) (string-cursor-end s1))) ((s1 s2 start1) (string-contains-right s1 s2 start1 (string-cursor-end s1))) ((s1 s2 start1 end1 . opt) (assert-type 'string-contains-right (string? s1)) (assert-type 'string-contains-right (string? s2)) (let-optionals opt ((start2 (string-cursor-start s2)) (end2 (string-cursor-end s2))) (assert-type 'string-contains-right (index-or-cursor? start1)) (assert-type 'string-contains-right (index-or-cursor? end1)) (assert-type 'string-contains-right (index-or-cursor? start2)) (assert-type 'string-contains-right (index-or-cursor? end2)) (%quick-check-range 'string-contains-right s1 start1 end1) (%quick-check-range 'string-contains-right s2 start2 end2) (if (= (%offsetify 'string-contains-right s2 start2) (%offsetify 'string-contains-right s2 end2)) (%cursorify 'string-contains-right s1 end1) (%string-search-right (substring/cursors s1 start1 end1) (substring/cursors s2 start2 end2) (%offsetify 'string-contains-right s1 start1))))))) (: %string-search-right (string string fixnum -> (or cursor false))) (define (%string-search-right s1 s2 off) (let ((end-off (string-length s1))) (let lp ((i #f) (j (substring-index s2 s1))) (if (and j (< j end-off)) (lp j (substring-index s2 s1 (+ j 1))) (and i (cursor (+ i off))))))) ;;;; The whole string ;; Useless! (: string-reverse (string #!optional cursdex cursdex -> string)) (define string-reverse (case-lambda ((s) (assert-type 'string-reverse (string? s)) (utf8:string-reverse s)) ((s start) (assert-type 'string-reverse (string? s)) (assert-type 'string-reverse (index-or-cursor? start)) (utf8:string-reverse s (%indexify 'string-reverse s start))) ((s start end) (assert-type 'string-reverse (string? s)) (assert-type 'string-reverse (index-or-cursor? start)) (assert-type 'string-reverse (index-or-cursor? end)) (%quick-check-range 'string-reverse s start end) (utf8:string-reverse s (%indexify 'string-reverse s start) (%indexify 'string-reverse s end))))) (: string-concatenate ((list-of string) -> string)) (define (string-concatenate ss) (assert-type 'string-concatenate (pair-or-null? ss)) (assert-type 'string-concatenate (every string? ss)) (string-intersperse ss "")) ;; Because 'string-intersperse' is so fast, there's no reason to ;; avoid a 'reverse' here. (A little benchmarking showed that ;; the left fold version is much slower.) (: string-concatenate-reverse ((list-of string) -> string)) (define string-concatenate-reverse (case-lambda ((ss) (assert-type 'string-concatenate-reverse (pair-or-null? ss)) (assert-type 'string-concatenate-reverse (every string? ss)) (string-intersperse (reverse ss) "")) ((ss final) (assert-type 'string-concatenate-reverse (pair-or-null? ss)) (assert-type 'string-concatenate-reverse (string? final)) (string-intersperse (reverse (cons final ss)) "")) ((ss final end) (assert-type 'string-concatenate-reverse (pair-or-null? ss)) (assert-type 'string-concatenate-reverse (every string? ss)) (assert-type 'string-concatenate-reverse (string? final)) (assert-type 'string-concatenate-reverse (index-or-cursor? end)) (string-intersperse (reverse (cons (substring/cursors final 0 end) ss)) "")))) (: string-for-each-cursor ((cursor -> *) string #!optional cursdex cursdex -> undefined)) (define (string-for-each-cursor proc s . opt) (assert-type 'string-for-each-cursor (procedure? proc)) (assert-type 'string-for-each-cursor (string? s)) (call-with-optional-range 'string-for-each-cursor s opt (lambda (s start end) (let lp ((curs (cursor start))) (unless (>= (cursor-offset curs) end) (proc curs) (lp (string-cursor-next s curs))))))) ;;;; Folds ;;; Several other procedures use these, so here are versions ;;; with no type checking and a location argument for error ;;; reporting. (Bounds checks are still done on the optional ;;; string range.) (: %string-fold (symbol procedure * string #!optional cursdex cursdex -> *)) (define (%string-fold loc kons knil s . opt) (call-with-optional-range loc s opt (lambda (s start end) (let lp ((b start) (acc knil)) (if (>= b end) acc (lp (utf8-next-char s b) (kons (sp-ref s b) acc))))))) (: %string-fold-right (symbol procedure * string #!optional cursdex cursdex -> *)) (define (%string-fold-right loc kons knil s . opt) (call-with-optional-range loc s opt (lambda (s start end) (let lp ((b (utf8-prev-char s end)) (acc knil)) (if (or (not b) (< b start)) acc (lp (utf8-prev-char s b) (kons (sp-ref s b) acc))))))) ;;; Exported procedures (: string-fold (procedure * string #!optional cursdex cursdex -> *)) (define (string-fold kons knil s . opt) (assert-type 'string-fold (procedure? kons)) (assert-type 'string-fold (string? s)) (apply %string-fold 'string-fold kons knil s opt)) (: string-fold-right (procedure * string #!optional cursdex cursdex -> *)) (define (string-fold-right kons knil s . opt) (assert-type 'string-fold-right (procedure? kons)) (assert-type 'string-fold-right (string? s)) (apply %string-fold-right 'string-fold-right kons knil s opt)) (: string-replicate (string fixnum fixnum #!optional cursdex cursdex -> string)) (define string-replicate (case-lambda ((s from to) (string-replicate s from to (string-cursor-start s) (string-cursor-end s))) ((s from to start) (string-replicate s from to start (string-cursor-end s))) ((s from to start end) (assert-type 'string-replicate (string? s)) (assert-type 'string-replicate (exact-integer? from)) (assert-type 'string-replicate (exact-integer? to)) (assert-type 'string-replicate (index-or-cursor? start)) (assert-type 'string-replicate (index-or-cursor? end)) (unless (<= from to) (error 'string-replicate "invalid range" from to)) (utf8:xsubstring s from to (%indexify 'string-replicate s start) (%indexify 'string-replicate s end))))) (: string-count (string (char -> *) #!optional cursdex cursdex -> fixnum)) (define (string-count s pred . opt) (assert-type 'string-count (string? s)) (assert-type 'string-count (procedure? pred)) (apply %string-fold 'string-count (lambda (c i) (if (pred c) (+ i 1) i)) 0 s opt)) ;; Simple but suboptimal. (: string-replace (string string cursdex cursdex #!optional cursdex cursdex -> string)) (define (string-replace s1 s2 start1 end1 . opt) (define ->off (cut %offsetify 'string-replace <> <>)) (assert-type 'string-replace (string? s1)) (assert-type 'string-replace (string? s2)) (assert-type 'string-replace (index-or-cursor? start1)) (assert-type 'string-replace (index-or-cursor? end1)) (%quick-check-range 'string-replace s1 start1 end1) (call-with-optional-range 'string-replace s2 opt (lambda (s2 start2 end2) (string-append (substring s1 0 (->off s1 start1)) (if (and (zero? start2) ; try to avoid substring (= end2 (string-length s2))) s2 (substring s2 start2 end2)) (substring s1 (->off s1 end1)))))) ;; Most of the complexity (and inefficiency) here is in handling ;; the 'limit' argument. (: string-split (string string #!optional symbol (or fixnum false) cursdex cursdex -> (list-of string))) (define string-split (case-lambda ((s delim) (string-split s delim 'infix #f)) ((s delim grammar) (string-split s delim grammar #f)) ((s delim grammar limit start) (string-split (substring/cursors s start (string-cursor-end s)) delim grammar limit)) ((s delim grammar limit start end) (string-split (substring/cursors s start end) delim grammar limit)) ((s delim grammar limit) (assert-type 'string-split (string? s)) (assert-type 'string-split (string? delim)) (assert-type 'string-split (symbol? grammar)) (assert-type 'string-split (or (exact-natural? limit) (not limit))) (unless (memv grammar '(prefix infix strict-infix suffix)) (error 'string-split "invalid grammar" grammar)) (when (and (eqv? grammar 'strict-infix) (equal? s "")) (error 'string-split "empty string with strict-infix grammar")) (%string-split s delim grammar limit)))) ;; Internal routine with fixed arguments and no range. (: %string-split (string string symbol (or fixnum false) -> (list-of string))) (define (%string-split s delim grammar limit) (let* ((limit (or limit (utf8:string-length s))) (splits (if (equal? "" delim) (%string-split-into-chars s limit) (%string-split-on-word s delim limit)))) (case grammar ((infix strict-infix) splits) ((prefix) (if (and (pair? splits) (equal? "" (car splits))) (cdr splits) splits)) ((suffix) (if (and (pair? splits) (equal? "" (last splits))) (butlast splits) splits)) (else (error '%string-split "can't happen: invalid grammar" grammar))))) (: %string-split-into-chars (string fixnum -> (list-of string))) (define (%string-split-into-chars s limit) (let ((len (utf8:string-length s))) (if (> len (+ limit 1)) (append (%string-split-into-chars (utf8:substring s 0 limit) limit) (list (utf8:substring s limit len))) (map utf8:string (utf8:string->list s))))) (: %string-split-on-word (string string fixnum -> (list-of string))) (define (%string-split-on-word s delim limit) (let ((delim-byte-len (string-length delim))) (letrec ((build (lambda (s limit) (cond ((zero? limit) (list s)) ((substring-index delim s) => (lambda (off) (cons (substring s 0 off) (build (substring s (+ off delim-byte-len)) (- limit 1))))) (else (list s)))))) (build s limit)))) (: string-filter (procedure string #!optional cursdex cursdex -> string)) (define (string-filter pred s . opt) (assert-type 'string-filter (procedure? pred)) (assert-type 'string-filter (string? s)) (utf8:list->string (apply %string-fold-right 'string-filter (lambda (c cs) (if (pred c) (cons c cs) cs)) '() s opt))) (: string-remove (procedure string #!optional cursdex cursdex -> string)) (define (string-remove pred s . opt) (assert-type 'string-remove (procedure? pred)) (assert-type 'string-remove (string? s)) (utf8:list->string (apply %string-fold-right 'string-remove (lambda (c cs) (if (pred c) cs (cons c cs))) '() s opt)))