;;; Procedures modeled on those from various CHICKEN libraries, ;;; (chicken string) in particular. ;;; ;;; These are extensions to SRFI 207. (define-library (srfi 207 extensions) (import (scheme base) (scheme char) r7rs (chicken base) (chicken condition) (chicken type) (only (srfi 1) list-index unfold fold every) ) (export bytestring-translate bytestring-substitute bytestring-chomp subbytestring=? bytestring-compare3 bytestring-prefix-length bytestring-suffix-length bytestring-prefix? bytestring-suffix? bytestring-segment bytestring-contains bytestring-contains-right bytestring-concatenate-reverse bytestring-replicate ) (begin (include "exceptions.scm") (include "utility.scm") (define-type bytevector u8vector) (: check-range (symbol bytevector fixnum fixnum -> undefined)) (define (check-range loc bstring start end) (unless (<= 0 start end (bytevector-length bstring)) (bounds-exception loc "invalid range" bstring start end))) (: %to-byte (symbol (or fixnum char) -> fixnum)) (define (%to-byte loc x) (cond ((u8? x) x) ((char-ascii? x) (char->integer x)) (else (bytestring-error loc "invalid bytestring element" x)))) (: bytestring-translate (bytevector (or char fixnum list) (or char fixnum list) -> bytevector)) (define (bytestring-translate bstring from to) (assert-type 'bytestring-translate (or (char? from) (fixnum? from) (pair-or-null? from))) (assert-type 'bytestring-translate (or (char? to) (fixnum? to) (pair-or-null? to))) (if (and (pair? from) (pair? to)) (unless (<= (length from) (length to)) (error 'bytestring-translate "'to' must contain at least as many elements as 'from'" from to))) (let ((index (%make-indexer from)) (tos (if (pair-or-null? to) to (list to)))) (u8vector-map (lambda (b) (cond ((index b) => (lambda (j) (%to-byte 'bytestring-translate (list-ref tos j)))) (else b))) bstring))) (: %make-indexer ((or fixnum char list) -> procedure)) (define (%make-indexer obj) (cond ((u8? obj) (lambda (b) (and (= obj b) 0))) ((char-ascii? obj) (lambda (b) (and (= b (char->integer obj)) 0))) ((null? obj) (lambda (b) #f)) ((pair? obj) (lambda (b) (list-index (lambda (c) (= b (%to-byte 'bytestring-translate c))) obj))) (else (type-exception 'bytestring-translate "invalid 'from' object" obj)))) ;; Based on string-translate*, but simpler: only maps bytes to bytes. ;; tmap may mix bytes and characters. (: bytestring-substitute (bytevector (list-of pair) -> bytevector)) (define (bytestring-substitute bstring tmap) (assert-type 'bytestring-substitute (bytevector? bstring)) (assert-type 'bytestring-substitute (pair-or-null? tmap)) (let ((tmap-bytes (map (lambda (p) (cons (%to-byte 'bytestring-substitute (car p)) (%to-byte 'bytestring-substitute (cdr p)))) tmap))) (u8vector-map (lambda (b) (cond ((assv b tmap-bytes) => cdr) (else b))) bstring))) (: bytestring-chomp (bytevector #!optional bytevector -> bytevector)) (define bytestring-chomp (case-lambda ((bstring) (bytestring-chomp bstring #u8(#xa))) ((bstring suffix) (assert-type 'bytestring-chomp (bytevector? bstring)) (assert-type 'bytestring-chomp (bytevector? suffix)) (let ((blen (bytevector-length bstring)) (slen (bytevector-length suffix))) (if (bytestring-suffix? suffix bstring) (bytevector-copy bstring 0 (- blen slen)) (bytevector-copy bstring)))))) (: subbytestring=? (bytevector bytevector #!optional fixnum fixnum fixnum --> boolean)) (define (subbytestring=? bs1 bs2 . opt) (assert-type 'subbytestring=? (bytevector? bs1)) (assert-type 'subbytestring=? (bytevector? bs2)) (if (null? opt) (equal? bs1 bs2) (let-optionals* opt ((start1 0) (start2 0) (len (- (bytevector-length bs1) start1))) (assert-type 'subbytestring=? (fixnum? start1)) (assert-type 'subbytestring=? (fixnum? start2)) (assert-type 'subbytestring=? (fixnum? len)) (check-index 'subbytestring=? bs1 start1) (check-index 'subbytestring=? bs2 start2) (unless (<= 0 len (bytevector-length bs1)) (error 'subbytestring=? "invalid length" len)) (%subbytestring=? = bs1 bs2 start1 start2 len)))) ;; All bounds assumed correct. (: %subbytestring=? (procedure bytevector bytevector fixnum fixnum fixnum -> boolean)) (define (%subbytestring=? comp bs1 bs2 start1 start2 len) (let ((len1 (bytevector-length bs1)) (len2 (bytevector-length bs2))) (let loop ((i start1) (j start2) (k 0)) (cond ((= k len) #t) ((or (>= i len1) (>= j len2)) #f) ((comp (bytevector-u8-ref bs1 i) (bytevector-u8-ref bs2 j)) (loop (+ i 1) (+ j 1) (+ k 1))) (else #f))))) (: bytestring-compare3 (bytevector bytevector --> fixnum)) (define (bytestring-compare3 bs1 bs2) (assert-type 'bytestring-compare3 (bytevector? bs1)) (assert-type 'bytestring-compare3 (bytevector? bs2)) (%bytestring-compare bs1 bs2 -1 0 1)) ;;; Bytestring procedures derived from SRFI 152. Many implementations ;;; are based on SRFI 178 adaptations of the SRFI 1 sample impl.. ;;; TODO: start/end indices. (: bytestring-prefix-length (bytevector bytevector --> fixnum)) (define (bytestring-prefix-length bvec1 bvec2) (assert-type 'bytestring-prefix-length (bytevector? bvec1)) (assert-type 'bytestring-prefix-length (bytevector? bvec2)) (let ((end (min (bytevector-length bvec1) (bytevector-length bvec2)))) (if (eq? bvec1 bvec2) end (let lp ((i 0)) (if (or (>= i end) (not (= (bytevector-u8-ref bvec1 i) (bytevector-u8-ref bvec2 i)))) i (lp (+ i 1))))))) (: bytestring-suffix-length (bytevector bytevector --> fixnum)) (define (bytestring-suffix-length bvec1 bvec2) (assert-type 'bytestring-suffix-length (bytevector? bvec1)) (assert-type 'bytestring-suffix-length (bytevector? bvec2)) (let ((end1 (bytevector-length bvec1)) (end2 (bytevector-length bvec2))) (let* ((delta (min end1 end2)) (start (- end1 delta))) (if (eq? bvec1 bvec2) delta (let lp ((i (- end1 1)) (j (- end2 1))) (if (or (< i start) (not (= (bytevector-u8-ref bvec1 i) (bytevector-u8-ref bvec2 j)))) (- (- end1 i) 1) (lp (- i 1) (- j 1)))))))) (: bytestring-prefix? (bytevector bytevector --> boolean)) (define (bytestring-prefix? bvec1 bvec2) (assert-type 'bytestring-prefix? (bytevector? bvec1)) (assert-type 'bytestring-prefix? (bytevector? bvec2)) (let ((len1 (bytevector-length bvec1))) (and (<= len1 (bytevector-length bvec2)) (= (bytestring-prefix-length bvec1 bvec2) len1)))) (: bytestring-suffix? (bytevector bytevector --> boolean)) (define (bytestring-suffix? bvec1 bvec2) (assert-type 'bytestring-suffix? (bytevector? bvec1)) (assert-type 'bytestring-suffix? (bytevector? bvec2)) (let ((len1 (bytevector-length bvec1))) (and (<= len1 (bytevector-length bvec2)) (= (bytestring-suffix-length bvec1 bvec2) len1)))) ;; Known as "chop" in (chicken string). (: bytestring-segment (bytevector fixnum -> (list-of bytevector))) (define (bytestring-segment bvec k) (assert-type 'bytestring-segment (bytevector? bvec)) (assert-type 'bytestring-segment (fixnum? k)) (unless (> k 0) (error 'bytestring-segment "segment length must be a positive integer" k)) (let ((len (bytevector-length bvec))) (unfold (lambda (i) (>= i len)) (lambda (i) (bytevector-copy bvec i (min (+ i k) len))) (lambda (i) (+ i k)) 0))) ;; FIXME: Inefficient. We should correctly implement ;; Knuth-Morris-Pratt searching and rewrite this. (: bytestring-contains (bytevector bytevector #!optional fixnum fixnum fixnum fixnum --> (or fixnum false))) (define (bytestring-contains bs1 bs2 . opt) (assert-type 'bytestring-contains (bytevector? bs1)) (assert-type 'bytestring-contains (bytevector? bs2)) (let ((len1 (bytevector-length bs1)) (len2 (bytevector-length bs2))) (let-optionals opt ((start1 0) (end1 len1) (start2 0) (end2 len2)) (when (pair? opt) (assert-type 'bytestring-contains (fixnum? start1)) (assert-type 'bytestring-contains (fixnum? end1)) (assert-type 'bytestring-contains (fixnum? start2)) (assert-type 'bytestring-contains (fixnum? end2)) (check-range 'bytestring-contains bs1 start1 end1) (check-range 'bytestring-contains bs2 start2 end2)) (if (= start2 end2) start1 (let* ((sub-len (- end2 start2)) (i-bound (- end1 sub-len))) (let loop ((i start1)) (and (<= i i-bound) (if (subbytestring=? bs1 bs2 i start2 sub-len) i (loop (+ i 1)))))))))) (: bytestring-contains-right (bytevector bytevector #!optional fixnum fixnum fixnum fixnum --> (or fixnum false))) (define (bytestring-contains-right bs1 bs2 . opt) (assert-type 'bytestring-contains-right (bytevector? bs1)) (assert-type 'bytestring-contains-right (bytevector? bs2)) (let ((len1 (bytevector-length bs1)) (len2 (bytevector-length bs2))) (let-optionals opt ((start1 0) (end1 len1) (start2 0) (end2 len2)) (when (pair? opt) (assert-type 'bytestring-contains-right (fixnum? start1)) (assert-type 'bytestring-contains-right (fixnum? end1)) (assert-type 'bytestring-contains-right (fixnum? start2)) (assert-type 'bytestring-contains-right (fixnum? end2)) (check-range 'bytestring-contains-right bs1 start1 end1) (check-range 'bytestring-contains-right bs2 start2 end2)) (if (= start2 end2) end1 (let* ((sub-len (- end2 start2)) (i-bound start1)) (let loop ((i (- end1 sub-len))) (and (>= i i-bound) (if (subbytestring=? bs1 bs2 i start2 sub-len) i (loop (- i 1)))))))))) (: bytestring-concatenate-reverse ((list-of bytevector) #!optional bytevector fixnum -> bytevector)) (define (bytestring-concatenate-reverse bss . opt) (let-optionals* opt ((final #u8()) (end (bytevector-length final))) (assert-type 'bytestring-concatenate-reverse (pair-or-null? bss)) (when (pair? opt) (assert-type 'bytestring-concatenate-reverse (bytevector? final)) (assert-type 'bytestring-concatenate-reverse (fixnum? end)) (unless (<= 0 end (bytevector-length final)) (bounds-exception 'bytestring-concatenate-reverse "end index out of range" final end))) (let ((len (fold + 0 (map bytevector-length bss)))) (%concat-reverse len bss final end)))) (: %concat-reverse (fixnum (list-of bytevector) bytevector fixnum -> bytevector)) (define (%concat-reverse len bss final end) (let ((res (make-bytevector (+ len end)))) (unless (zero? end) (bytevector-copy! res len final 0 end)) (fold (lambda (bs i) (assert-type '%concat-reverse (bytevector? bs)) (let* ((l (bytevector-length bs)) (i* (- i l))) (bytevector-copy! res i* bs 0 l) i*)) len bss) res)) (: bytestring-replicate (bytevector fixnum fixnum #!optional fixnum fixnum -> bytevector)) (define (bytestring-replicate bvec from to . opt) (assert-type 'bytestring-replicate (bytevector? bvec)) (assert-type 'bytestring-replicate (fixnum? from)) (assert-type 'bytestring-replicate (fixnum? to)) (let ((full-len (bytevector-length bvec))) (let-optionals opt ((start 0) (end full-len)) (when (pair? opt) (assert-type 'bytestring-replicate (fixnum? start)) (assert-type 'bytestring-replicate (fixnum? end)) (check-range 'bytestring-replicate bvec start end)) (let ((len (- end start)) (reslen (- to from))) (cond ((zero? reslen) #u8()) ; OK if end = start here. ((= len 1) ; Fast path. (make-bytevector reslen (bytevector-u8-ref bvec start))) ((= (floor (/ from len)) (floor (/ to len))) (bytevector-copy bvec ; All within one span. (+ start (modulo from len)) (+ start (modulo to len)))) ((positive? len) (let ((res (make-bytevector reslen))) (%multispan-repcopy! res 0 bvec from to start end) res)) (else (error 'bytestring-replicate "invalid substring length" start end))))))) ;; Adapted from SRFI 13. Thanks, Olin! (: %multispan-repcopy! (bytevector fixnum bytevector fixnum fixnum fixnum fixnum -> bytevector)) (define (%multispan-repcopy! target tstart bs from to start end) (let* ((len (- end start)) (i0 (+ start (modulo from len))) (total-chars (- to from))) ;; Copy the initial, partial span. (bytevector-copy! target tstart bs i0 end) (let* ((ncopied (- end i0)) ; # of chars copied so far. (nleft (- total-chars ncopied)) ; # of chars left to copy. (nspans (quotient nleft len))) ; # of whole spans to copy. ;; Copy the central, whole spans. (do ((i (+ tstart ncopied) (+ i len)) ; Current target index. (nspans nspans (- nspans 1))) ; # of spans to copy. ((zero? nspans) ;; Copy the ending partial span. (let ((frag-end (+ start (- total-chars (- i tstart))))) (bytevector-copy! target i bs start frag-end))) ;; Copy a whole span. (bytevector-copy! target i bs start end))))) ))