;;;; string-utils.scm -*- Scheme -*- ;;;; Kon Lovett, Jan '21 ;;;; Kon Lovett, Jul '18 ;;;; Kon Lovett, Aug '10 (module string-utils (;export string-split-chars string-unzip string-zip string-trim-whitespace-both list-as-string number->padded-string string-fixed-length string-longest-prefix string-longest-suffix string-longest-common-prefix string-longest-common-suffix) (import scheme utf8 (chicken base) (chicken type) (chicken sort) (only (chicken port) with-output-to-string) (only (chicken string) string-split) (chicken fixnum) (only (srfi 1) first any append! reverse! map! append-map filter) (only utf8-srfi-13 string-filter string-index string-null? string-take string-prefix-length string-trim-both string-pad string-reverse) (only utf8-srfi-14 char-set:whitespace) (only (check-errors sys) check-list check-char check-string check-fixnum)) (: string-split-chars (string #!optional string -> (list-of string) (list-of char))) (: string-unzip (string #!optional string -> (list-of string) (list-of string))) (: string-zip ((list-of string) (list-of string) -> string)) (: string-trim-whitespace-both (string -> string)) (: list-as-string (list -> string)) (: number->padded-string (number fixnum #!optional char fixnum -> string)) (: string-fixed-length (string fixnum #!rest -> string)) (: *string-longest-common-prefix ((list-of (list-of char)) -> (list-of char))) (: string-longest-common-prefix ((list-of string) -> string)) (: string-longest-common-suffix ((list-of string) -> string)) (: *string-longest-prefix (string (list-of string) procedure -> *)) (: string-longest-prefix (string (list-of string) -> (or false string))) (: string-longest-suffix (string (list-of string) -> (or false string))) ;; (: binary-reduce (('a 'a -> boolean) (list-of 'a) -> boolean)) ;(1st 2nd 3rd ...) ;=> (and (p? (1st ls) (2nd ls)) (p? (2nd ls) (3rd ls)) ...) ;() ;=> #t ;(1st) ;=> #t -- FIXME should an error! ; (define (binary-reduce pred? ls) (or (null? ls) (let loop ((ls (cdr ls)) (prev (car ls))) (or (null? ls) (let ((curr (car ls))) (and (pred? prev curr) (loop (cdr ls) curr) ) ) ) ) ) ) ;; (define DEFAULT-PUNCS ".,") ;NOTE irregex-split doesn't preserve "missing" so punct vs parts is ambiguous ;"..." => string-parts char-punct ;"a.3,c" => ("a" "3" "c") (#\. #\,) (define (string-split-chars str #!optional (puncs DEFAULT-PUNCS)) (let ((parts (string-split str puncs #t)) (punct (string->list (string-filter (cut string-index puncs <>) str))) ) (values parts punct) ) ) ;"..." => parts punct ;"a.b,c" => ("a" "b" "c") ("." ",") (define (string-unzip str #!optional (puncs DEFAULT-PUNCS)) (let-values (((parts punct) (string-split-chars str puncs))) (values parts (map string punct)) ) ) ;punct parts => ? (define (string-zip parts punct) (let loop ((punct punct) (parts parts) (ls '())) (cond ((and (null? punct) (null? parts)) (apply string-append (reverse! ls)) ) ((fx= (length punct) (length parts)) (loop (cdr punct) parts (cons (car punct) ls)) ) (else (loop punct (cdr parts) (cons (car parts) ls)) ) ) ) ) ;; (define (string-trim-whitespace-both str) (string-trim-both str char-set:whitespace) ) (define (list-as-string ls) (with-output-to-string (cut write ls)) ) (define (number->padded-string n wid #!optional (ch #\0) (base 10)) (string-pad (number->string n base) wid ch) ) (define (string-fixed-length s n #!key (pad-char #\space) (trailing "...")) (let ((rem (fx- (check-fixnum 'string-fixed-length n) (string-length (check-string 'string-fixed-length s)))) ) (if (positive? rem) (string-append s (make-string rem (check-char 'string-fixed-length pad-char))) (let ((lim (fx- n (string-length (check-string 'string-fixed-length trailing))))) (if (positive? lim) (string-append (substring s 0 lim) trailing) trailing ) ) ) ) ) ;; (define (*string-longest-common-prefix ls) (let loop ((ls ls) (pre '())) (if (any null? ls) pre (let ((1st-chrs (map first ls))) (if (not (binary-reduce char=? 1st-chrs)) pre (loop (map cdr ls) (cons (first 1st-chrs) pre)) ) ) ) ) ) (define (string-longest-common-prefix strs) (let ((ls (map string->list strs))) (list->string (reverse! (*string-longest-common-prefix ls))) ) ) (define (string-longest-common-suffix strs) (let ((ls (map! reverse! (map string->list strs)))) (list->string (*string-longest-common-prefix ls)) ) ) ;; (define (*string-longest-prefix cand others handler) ;-> ( <∈ others>) (define (prelength-tag item) (cons (string-prefix-length cand item) item) ) ;NOTE descending sort order! (define (prelength-tag> a b) (assume ((a (pair fixnum string)) (b (pair fixnum string)) ) (fx> (car a) (car a)) ) ) ;NOTE descending sort order! (define (prelength-coalesced> a b) (assume ((a (pair fixnum list)) (b (pair fixnum list)) ) (if (fx= (car a) (car b)) ;then distinguish by length (fx> (length (cdr a)) (length (cdr b))) ;else distinguish by char (fx> (car a) (car b))) ) ) ; (let* ((cells (map prelength-tag others)) (cells (sort cells prelength-tag>)) (coalesced (foldl (lambda (coalesced cell) (assume ((cell (pair fixnum string))) (let* ((len (car cell)) (str (cdr cell)) (strs `(,str)) ) (if (null? coalesced) (cons (cons len strs) coalesced) (let ((coalesced-cell (car coalesced))) (if (fx= len (car coalesced-cell)) ;share same (begin (set-cdr! coalesced-cell (append! strs (cdr coalesced-cell))) coalesced) ;changing of the guard (cons (cons len strs) coalesced) ) ) ) ) ) ) '() cells)) ) ;longest (handler (sort coalesced prelength-coalesced>)) ) ) (define (*1st-prefix als) (and (not (null? als)) (let* ((cell (car als)) (len (car cell)) (strs (cdr cell)) ) (and (not (zero? len)) (string-take (car strs) len) ) ) ) ) ;; (define (string-longest-prefix cand others) (*string-longest-prefix (check-string 'string-longest-prefix cand) (check-list 'string-longest-prefix others) *1st-prefix) ) (define (string-longest-suffix cand others) (and-let* ((pre (*string-longest-prefix (string-reverse (check-string 'string-longest-suffix cand)) (map string-reverse (check-list 'string-longest-suffix others)) *1st-prefix)) ) (string-reverse pre) ) ) #| ;this just a complicated scratchpad dump! (: string-longest-prefix+ (string (list-of string) -> (list-of string))) ; (define (string-longest-prefix+ cand others) (define (longest coalesced) (filter identity ;????? (append-map (lambda (cell) (let ((len (car cell)) (strs (cdr cell)) ) (if (zero? len) `(#f) (map (cut string-take <> len) strs) ) ) ) coalesced)) ) (*string-longest-prefix cand others longest) ) |# #| ;BUG '("ba" "bar" "fooba") should be '(bar" "fooba") ! ==== string-longest-prefixes (string-longest-prefixes STRINGS) -> (list-of string) Returns the longest comment prefixes amongst the {{STRINGS}}. ; STRINGS : {{(list-of string)}} ;; (: string-longest-prefixes ((list-of string) -> (list-of string))) ; (define (string-longest-prefixes strs) (let loop ((strs strs) (pres '())) (if (null? strs) pres (let* ((pre (string-longest-prefix (car strs) (cdr strs))) (pres (if (not pre) pres (cons pre pres))) ) (loop (cdr strs) pres) ) ) ) ) (let ((strs )) #; ;BUG '("ba" "bar" "fooba") should be '(bar" "fooba") ! (test '("ba" "bar" "fooba") (string-longest-prefixes strs)) ) |# #| (import (only utf8-srfi-13 string-take string-index-right string-drop)) (import (only utf8-srfi-14 char-set-union char-set:whitespace char-set:punctuation)) (: char-set-ws+punct char-set) (: string-delete-word-left (string #!optional fixnum -> string fixnum)) (define char-set-ws+punct (char-set-union char-set:whitespace char-set:punctuation) ) (define (string-delete-word-left line #!optional (end (string-length line))) (let* ((del-end (or (string-index-right line char-set-ws+punct 0 end) 0 ) ) (left-part (if (not (positive? del-end)) "" (string-take line del-end) ) ) ) (values (string-append left-part (string-drop line end)) del-end) ) ) |# ) ;module string-utils