;;;; string-utils.scm -*- Scheme -*- ;;;; Kon Lovett, Jan '21 ;;;; Kon Lovett, Jul '18 ;;;; Kon Lovett, Aug '10 (module string-utils (;export string-unzip string-zip string-trim-whitespace-both list-as-string number->padded-string string-fixed-length string-longest-prefix string-longest-suffix string-longest-common-prefix string-longest-common-suffix) (import scheme utf8 (chicken base) (chicken type) (chicken sort) (only (chicken port) with-output-to-string) (only (chicken string) string-split) (only (srfi 1) first any append! reverse! map! append-map filter) (only utf8-srfi-13 string-filter string-index string-null? string-take string-prefix-length string-trim-both string-pad string-reverse) (only utf8-srfi-14 char-set:whitespace) (only type-checks check-list check-char check-string check-fixnum)) ;; (: string-unzip (string string -> (list-of string) (list-of string))) (: string-zip ((list-of string) (list-of string) -> string)) (: string-trim-whitespace-both (string -> string)) (: list-as-string (list -> string)) (: number->padded-string (number fixnum #!optional char fixnum -> string)) (: string-fixed-length (string fixnum #!rest -> string)) (: *string-longest-common-prefix ((list-of (list-of char)) -> (list-of char))) (: string-longest-common-prefix ((list-of string) -> string)) (: string-longest-common-suffix ((list-of string) -> string)) (: *string-longest-prefix (string (list-of string) procedure -> *)) (: string-longest-prefix (string (list-of string) -> (or boolean string))) (: string-longest-suffix (string (list-of string) -> (or boolean string))) ;; ;(binary-predicate-reduce p? ls) ;=> (and (p? (1st ls) (2nd ls)) (p? (2nd ls) (3rd ls)) ...) ; (define (binary-predicate-reduce pred? ls) (or (null? ls) (let loop ((ls (cdr ls)) (prev (first ls))) (or (null? ls) (let ((curr (first ls))) (and (pred? prev curr) (loop (cdr ls) curr) ) ) ) ) ) ) ;; ;NOTE irregex-split doesn't preserve "missing" so punct vs parts is ambiguous ;"..." => parts punct ;"a.b,c" => ("a" "b" "c") ("." ",") (define (string-unzip str punct-str) (let ( (parts (string-split str punct-str #t)) (punct (map string (string->list (string-filter (cut string-index punct-str <>) str)))) ) (values parts punct) ) ) ;punct parts => ? (define (string-zip parts punct) (let loop ((punct punct) (parts parts) (ls '())) (cond ((and (null? punct) (null? parts)) (apply string-append (reverse! ls)) ) ((= (length punct) (length parts)) (loop (cdr punct) parts (cons (car punct) ls)) ) (else (loop punct (cdr parts) (cons (car parts) ls)) ) ) ) ) ;; (define (string-trim-whitespace-both str) (string-trim-both str char-set:whitespace) ) (define (list-as-string ls) (with-output-to-string (cut write ls)) ) (define (number->padded-string n wid #!optional (ch #\0) (base 10)) (string-pad (number->string n base) wid ch) ) (define (string-fixed-length s n #!key (pad-char #\space) (trailing "...")) (let ( (rem (- (check-fixnum 'string-fixed-length n) (string-length (check-string 'string-fixed-length s)))) ) (if (positive? rem) (string-append s (make-string rem (check-char 'string-fixed-length pad-char))) (let ( (lim (- n (string-length (check-string 'string-fixed-length trailing)))) ) (if (positive? lim) (string-append (substring s 0 lim) trailing) trailing ) ) ) ) ) ;; (define (*string-longest-common-prefix ls) (let loop ((ls ls) (pre '())) (if (any null? ls) pre (let ((1st-chrs (map first ls))) (if (not (binary-predicate-reduce char=? 1st-chrs)) pre (loop (map cdr ls) (cons (first 1st-chrs) pre)) ) ) ) ) ) (define (string-longest-common-prefix strs) (let ((ls (map string->list strs))) (list->string (reverse! (*string-longest-common-prefix ls))) ) ) (define (string-longest-common-suffix strs) (let ((ls (map! reverse! (map string->list strs)))) (list->string (*string-longest-common-prefix ls)) ) ) ;; (define (*string-longest-prefix cand others handler) ;-> ( <∈ others>) (define (prelength-tag item) (cons (string-prefix-length cand item) item) ) ;NOTE descending sort order! (define (prelength-tag> a b) (assume ( (a (pair fixnum string)) (b (pair fixnum string)) ) (> (car a) (car a)) ) ) ;NOTE descending sort order! (define (prelength-coalesced> a b) (assume ( (a (pair fixnum list)) (b (pair fixnum list)) ) (if (= (car a) (car b)) (> (length (cdr a)) (length (cdr b))) (> (car a) (car b))) ) ) ; (let* ( (cells (map prelength-tag others)) (cells (sort cells prelength-tag>)) (coalesced (foldl (lambda (coalesced cell) (assume ( (cell (pair fixnum string)) ) (let* ( (len (car cell)) (str (cdr cell)) (strs `(,str)) ) (if (null? coalesced) (cons (cons len strs) coalesced) (let ( (coalesced-cell (car coalesced)) ) (if (= len (car coalesced-cell)) ;share same (begin (set-cdr! coalesced-cell (append! strs (cdr coalesced-cell))) coalesced) ;changing of the guard (cons (cons len strs) coalesced) ) ) ) ) ) ) '() cells)) ) ;longest (handler (sort coalesced prelength-coalesced>)) ) ) (define (*1st-prefix als) (and (not (null? als)) (let* ( (cell (car als)) (len (car cell)) (strs (cdr cell)) ) (and (not (zero? len)) (string-take (car strs) len) ) ) ) ) ;; (define (string-longest-prefix cand others) (*string-longest-prefix (check-string 'string-longest-prefix cand) (check-list 'string-longest-prefix others) *1st-prefix) ) (define (string-longest-suffix cand others) (and-let* ( (pre (*string-longest-prefix (string-reverse (check-string 'string-longest-suffix cand)) (map string-reverse (check-list 'string-longest-suffix others)) *1st-prefix)) ) (string-reverse pre) ) ) #| ;this just a complicated scratchpad dump! (: string-longest-prefix+ (string (list-of string) --> (list-of string))) ; (define (string-longest-prefix+ cand others) (define (longest coalesced) (filter identity (append-map (lambda (cell) (let ( (len (car cell)) (strs (cdr cell)) ) (if (zero? len) `(#f) (map (cut string-take <> len) strs) ) ) ) coalesced)) ) (*string-longest-prefix cand others longest) ) |# #| ;BUG '("ba" "bar" "fooba") should be '(bar" "fooba") ! ==== string-longest-prefixes (string-longest-prefixes STRINGS) --> (list-of string) Returns the longest comment prefixes amongst the {{STRINGS}}. ; STRINGS : {{(list-of string)}} ;; (: string-longest-prefixes ((list-of string) --> (list-of string))) ; (define (string-longest-prefixes strs) (let loop ((strs strs) (pres '())) (if (null? strs) pres (let* ( (pre (string-longest-prefix (car strs) (cdr strs))) (pres (if (not pre) pres (cons pre pres))) ) (loop (cdr strs) pres) ) ) ) ) (let ((strs )) #; ;BUG '("ba" "bar" "fooba") should be '(bar" "fooba") ! (test '("ba" "bar" "fooba") (string-longest-prefixes strs)) ) |# ) ;module string-utils