;;;; string-utils.scm -*- Scheme -*- ;;;; Kon Lovett, Jan '21 ;;;; Kon Lovett, Jul '18 ;;;; Kon Lovett, Aug '10 (module string-utils (;export string-split-chars string-unzip string-zip string-trim-whitespace-both list-as-string number->padded-string string-fixed-length string-subsequence? string-longest-prefix string-longest-suffix string-longest-common-prefix string-longest-common-suffix) (import scheme utf8 (chicken base) (chicken type) (chicken sort) (only (chicken port) with-output-to-string) (only (chicken string) string-split) (chicken fixnum) (only (srfi 1) first any append! reverse! map! append-map filter reduce) (only utf8-srfi-13 string-filter string-index string-null? string-take string-prefix-length string-trim-both string-pad string-reverse) (only utf8-srfi-14 char-set:whitespace) (only (check-errors sys) check-list check-char check-string check-fixnum)) (: string-split-chars (string #!optional string -> (list-of string) (list-of char))) (: string-unzip (string #!optional string -> (list-of string) (list-of string))) (: string-zip ((list-of string) (list-of string) -> string)) (: string-trim-whitespace-both (string -> string)) (: list-as-string (list -> string)) (: number->padded-string (number fixnum #!optional char fixnum -> string)) (: string-fixed-length (string fixnum #!rest -> string)) (: string-subsequence? (string string --> boolean)) (: string-longest-common-prefix ((list-of string) -> string)) (: string-longest-common-suffix ((list-of string) -> string)) (: string-longest-prefix (string (list-of string) -> (or false string))) (: string-longest-suffix (string (list-of string) -> (or false string))) ;; (define DEFAULT-PUNCS ".,") ;NOTE irregex-split doesn't preserve "missing" so punct vs parts is ambiguous ;"..." => string-parts char-punct ;"a.3,c" => ("a" "3" "c") (#\. #\,) (define (string-split-chars str #!optional (puncs DEFAULT-PUNCS)) (let ((parts (string-split str puncs #t)) (punct (string->list (string-filter (cut string-index puncs <>) str))) ) (values parts punct) ) ) ;"..." => parts punct ;"a.b,c" => ("a" "b" "c") ("." ",") (define (string-unzip str #!optional (puncs DEFAULT-PUNCS)) (let-values (((parts punct) (string-split-chars str puncs))) (values parts (map string punct)) ) ) ;punct parts => ? (define (string-zip parts punct) (let loop ((punct punct) (parts parts) (ls '())) (cond ((and (null? punct) (null? parts)) (apply string-append (reverse! ls)) ) ((fx= (length punct) (length parts)) (loop (cdr punct) parts (cons (car punct) ls)) ) (else (loop punct (cdr parts) (cons (car parts) ls)) ) ) ) ) ;; (define (string-trim-whitespace-both str) (string-trim-both str char-set:whitespace) ) (define (list-as-string ls) (with-output-to-string (lambda () (write ls))) ) (define (number->padded-string n wid #!optional (ch #\0) (base 10)) (string-pad (number->string n base) wid ch) ) (define (string-fixed-length s n #!key (pad-char #\space) (trailing "...")) (let ((rem (fx- (check-fixnum 'string-fixed-length n) (string-length (check-string 'string-fixed-length s)))) ) (if (fx< 0 rem) (string-append s (make-string rem (check-char 'string-fixed-length pad-char))) (let ((lim (fx- n (string-length (check-string 'string-fixed-length trailing))))) (if (fx< 0 lim) (string-append (substring s 0 lim) trailing) trailing ) ) ) ) ) ;; ;mit swank (define (string-longest-common-prefix strings) (reduce (lambda (s1 s2) (substring s1 0 (string-prefix-length s1 s2))) "" strings)) (define (string-longest-common-suffix strs) (string-reverse (string-longest-common-prefix (map string-reverse strs))) ) ;; (define (*string-longest-prefix cand others handler) ;-> ( <∈ others>) (define (prelength-tag item) (cons (string-prefix-length cand item) item) ) ;descending sort order! (define (prelength-tag> a b) (assume ((a (pair fixnum string)) (b (pair fixnum string)) ) (fx> (car a) (car a)) ) ) ;descending sort order! (define (prelength-coalesced> a b) (assume ((a (pair fixnum (list-of string))) (b (pair fixnum (list-of string))) ) (if (fx= (car a) (car b)) ;then distinguish by length (fx> (length (cdr a)) (length (cdr b))) ;else distinguish by char (fx> (car a) (car b))) ) ) ; (let ((coalesced (foldl (lambda (coalesced cell) (assume ((cell (pair fixnum string))) (let* ((len (car cell)) (str (cdr cell)) (strs `(,str)) ) (if (null? coalesced) `((,len . ,strs) . ,coalesced) (let ((coalesced-cell (car coalesced))) (if (fx= len (car coalesced-cell)) ;share same (begin (set-cdr! coalesced-cell (append! strs (cdr coalesced-cell))) coalesced) ;changing of the guard `((,len . ,strs) . ,coalesced) ) ) ) ) ) ) '() (sort (map prelength-tag others) prelength-tag>))) ) ;longest (handler (sort! coalesced prelength-coalesced>)) ) ) (define (*1st-prefix als) (and (not (null? als)) (let* ((cell (car als)) (len (car cell)) (strs (cdr cell)) ) (and (not (fx= 0 len)) (string-take (car strs) len) ) ) ) ) ;; ;leet392-1 (define (string-subsequence? s t) (let ((slen (string-length s)) (tlen (string-length t))) (let subseq? ((si 0) (ti 0)) (or (fx= si slen) (and-let* (((not (fx= ti tlen))) (ri (string-index t (string-ref s si) ti)) ) (subseq? (fx+ si 1) ri) ) ) ) ) ) (define (string-longest-prefix cand others) (*string-longest-prefix (check-string 'string-longest-prefix cand) (check-list 'string-longest-prefix others) *1st-prefix) ) (define (string-longest-suffix cand others) (and-let* ((pre (*string-longest-prefix (string-reverse (check-string 'string-longest-suffix cand)) (map string-reverse (check-list 'string-longest-suffix others)) *1st-prefix)) ) ;reversing in-place a utf-8 string is problematic (string-reverse pre) ) ) ) ;module string-utils #| ;this just a complicated scratchpad dump! (: string-longest-prefix+ (string (list-of string) -> (list-of string))) ; (define (string-longest-prefix+ cand others) (define (longest coalesced) (filter identity ;????? (append-map (lambda (cell) (let ((len (car cell)) (strs (cdr cell)) ) (if (zero? len) `(#f) (map (cut string-take <> len) strs) ) ) ) coalesced)) ) (*string-longest-prefix cand others longest) ) |# #| ;BUG '("ba" "bar" "fooba") should be '(bar" "fooba") ! ==== string-longest-prefixes (string-longest-prefixes STRINGS) -> (list-of string) Returns the longest comment prefixes amongst the {{STRINGS}}. ; STRINGS : {{(list-of string)}} ;; (: string-longest-prefixes ((list-of string) -> (list-of string))) ; (define (string-longest-prefixes strs) (let loop ((strs strs) (pres '())) (if (null? strs) pres (let* ((pre (string-longest-prefix (car strs) (cdr strs))) (pres (if (not pre) pres (cons pre pres))) ) (loop (cdr strs) pres) ) ) ) ) (let ((strs )) #; ;BUG '("ba" "bar" "fooba") should be '(bar" "fooba") ! (test '("ba" "bar" "fooba") (string-longest-prefixes strs)) ) |# #| (import (only utf8-srfi-13 string-take string-index-right string-drop)) (import (only utf8-srfi-14 char-set-union char-set:whitespace char-set:punctuation)) (: char-set-ws+punct char-set) (: string-delete-word-left (string #!optional fixnum -> string fixnum)) (define char-set-ws+punct (char-set-union char-set:whitespace char-set:punctuation) ) (define (string-delete-word-left line #!optional (end (string-length line))) (let* ((del-end (or (string-index-right line char-set-ws+punct 0 end) 0 ) ) (left-part (if (not (positive? del-end)) "" (string-take line del-end) ) ) ) (values (string-append left-part (string-drop line end)) del-end) ) ) |#