;;; Copyright (C) 2020 Wolfgang Corcoran-Mathe ;;; ;;; Permission is hereby granted, free of charge, to any person obtaining a ;;; copy of this software and associated documentation files (the ;;; "Software"), to deal in the Software without restriction, including ;;; without limitation the rights to use, copy, modify, merge, publish, ;;; distribute, sublicense, and/or sell copies of the Software, and to ;;; permit persons to whom the Software is furnished to do so, subject to ;;; the following conditions: ;;; ;;; The above copyright notice and this permission notice shall be included ;;; in all copies or substantial portions of the Software. ;;; ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ;;; OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (define-library (srfi 207 core) (import (scheme base) (scheme case-lambda) (scheme char) (chicken base) (chicken condition) (chicken type) (only (srfi 1) list-tabulate fold-right unfold) (only (srfi 4) u8vector->list) (only (srfi 13) string-every string-concatenate) (srfi 151) ) (export bytestring bytevector->hex-string bytestring->list make-bytestring make-bytestring! hex-string->bytevector make-bytestring-generator bytestring-pad bytestring-pad-right bytestring-trim bytestring-trim-right bytestring-trim-both bytestring-replace bytestring-index bytestring-index-right bytestring-break bytestring-span bytestring>? bytestring=? bytestring-error? ; in exceptions.scm bytestring-join bytestring-split write-textual-bytestring write-binary-bytestring ) (cond-expand ((library (srfi 158)) (import (only (srfi 158) list->generator))) (else (begin (define (list->generator xs) (lambda () (if (null? xs) (eof-object) (let ((x (car xs))) (set! xs (cdr xs)) x))))))) (cond-expand ((library (srfi 160 u8)) (import (only (srfi 160 u8) u8vector-for-each u8vector-unfold u8vector-map))) (else (begin (define (u8vector-for-each proc bvec) (let ((len (bytevector-length bvec))) (let lp ((i 0)) (cond ((= i len) (if #f #f)) (else (proc (bytevector-u8-ref bvec i)) (lp (+ i 1))))))) (define (u8vector-unfold f len seed) (let ((u8vec (make-bytevector len))) (let lp ((i 0) (seed seed)) (unless (= i len) (let-values (((b seed*) (f i seed))) (bytevector-u8-set! u8vec i b) (lp (+ i 1) seed*)))) u8vec))))) (begin (include "exceptions.scm") (include "utility.scm") (define-type bytevector u8vector) (define-type bstring-elt (or char fixnum bytevector string)) ;;;; Utility (: u8-or-ascii-char? (* -> boolean)) (define u8-or-ascii-char? (disjoin u8? char-ascii?)) (: string-ascii? (* -> boolean)) (define (string-ascii? obj) (and (string? obj) (string-every (lambda (c) (char<=? c #\delete)) obj) #t)) (: valid-bytestring-segment? (* -> boolean)) (define valid-bytestring-segment? (disjoin u8? bytevector? char-ascii? string-ascii?)) (: %bytestring-null? (bytevector -> boolean)) (define (%bytestring-null? bstring) (zero? (bytevector-length bstring))) ;;;; Constructors (: make-bytestring ((list-of bstring-elt) -> bytevector)) (define (make-bytestring lis) (assert-type 'make-bytestring (or (pair? lis) (null? lis))) (call-with-port (open-output-bytevector) (lambda (out) (for-each (lambda (seg) (%write-bytestring-segment 'make-bytestring seg out)) lis) (get-output-bytevector out)))) (: make-bytestring! (bytevector fixnum (list-of bstring-elt) -> undefined)) (define (make-bytestring! bvec at lis) (assert-type 'make-bytestring! (bytevector? bvec)) (assert-type 'make-bytestring! (fixnum? at)) (unless (< at (bytevector-length bvec)) (bounds-exception 'make-bytestring! "index out of bounds" at bvec)) (bytevector-copy! bvec at (make-bytestring lis))) (: %write-bytestring-segment (symbol bstring-elt output-port -> undefined)) (define (%write-bytestring-segment loc obj port) ((cond ((u8? obj) write-u8) ((char-ascii? obj) write-char) ((bytevector? obj) write-bytevector) ((string-ascii? obj) write-string) (else (bytestring-error loc "invalid bytestring element" obj))) obj port)) (: bytestring (#!rest bstring-elt -> bytevector)) (define (bytestring . args) (if (null? args) (bytevector) (make-bytestring args))) ;;;; Conversion ;;; Hex string conversion (: integer->hex-string (integer -> string)) (define (integer->hex-string n) (cond ((number->string n 16) => (lambda (res) (if (even? (string-length res)) res (string-append "0" res)))) (else (bytestring-error 'bytevector->hex-string "not an integer" n)))) (: bytevector->hex-string (bytevector -> string)) (define (bytevector->hex-string bv) (assert-type 'bytevector->hex-string (bytevector? bv)) (string-concatenate (list-tabulate (bytevector-length bv) (lambda (i) (integer->hex-string (bytevector-u8-ref bv i)))))) (: hex-string->bytevector (string -> bytevector)) (define (hex-string->bytevector hex-str) (assert-type 'hex-string->bytevector (string? hex-str)) (let ((len (string-length hex-str))) (unless (even? len) (bytestring-error 'hex-string->bytevector "incomplete hexadecimal string" hex-str)) (u8vector-unfold (lambda (_ i) (let* ((end (+ i 2)) (s (substring hex-str i end)) (n (string->number s 16))) (if n (values n end) (bytestring-error 'hex-string->bytevector "invalid hexadecimal sequence" s)))) (truncate-quotient len 2) 0))) (: bytestring->list (bytevector #!optional fixnum fixnum -> (list-of (or char fixnum)))) (define bytestring->list (case-lambda ((bstring) (bytestring->list bstring 0 (bytevector-length bstring))) ((bstring start) (bytestring->list bstring start (bytevector-length bstring))) ((bstring start end) (assert-type 'bytestring->list (bytevector? bstring)) (assert-type 'bytestring->list (fixnum? start)) (assert-type 'bytestring->list (fixnum? end)) (unless (<= 0 start end (bytevector-length bstring)) (bounds-exception 'bytestring->list "invalid range" start end bstring)) (unfold (lambda (i) (= i end)) (lambda (i) (let ((b (bytevector-u8-ref bstring i))) (if (and (>= b #x20) (< b #x7f)) (integer->char b) b))) (lambda (i) (+ i 1)) start)))) ;; Lazily generate the bytestring constructed from objs. (: make-bytestring-generator (#!rest bstring-elt -> procedure)) (define (make-bytestring-generator . objs) (list->generator (flatten-bytestring-segments objs))) ;; Convert and flatten chars and strings, and flatten bytevectors ;; to yield a flat list of bytes. (define (flatten-bytestring-segments objs) (fold-right (lambda (x res) (cond ((u8? x) (cons x res)) ((char-ascii? x) (cons (char->integer x) res)) ((bytevector? x) (append (u8vector->list x) res)) ((string-ascii? x) (append (map char->integer (string->list x)) res)) (else (bytestring-error 'make-bytestring-generator "invalid bytestring segment" x)))) '() objs)) ;;;; Selection (: %bytestring-pad (symbol bytevector fixnum (or char fixnum) boolean -> bytevector)) (define (%bytestring-pad loc bstring len char-or-u8 right) (assert-type loc (bytevector? bstring)) (assert-type loc (fixnum? len)) (assert-type loc (u8-or-ascii-char? char-or-u8)) (when (negative? len) (error loc "invalid length" len)) (let ((pad-len (- len (bytevector-length bstring))) (pad-byte (if (char? char-or-u8) (char->integer char-or-u8) char-or-u8))) (if (<= pad-len 0) (bytevector-copy bstring) (let ((padded (make-bytevector len pad-byte))) (bytevector-copy! padded (if right 0 pad-len) bstring) padded)))) (: bytestring-pad (bytevector fixnum (or char fixnum) -> bytevector)) (define bytestring-pad (cut %bytestring-pad 'bytestring-pad <> <> <> #f)) (: bytestring-pad-right (bytevector fixnum (or char fixnum) -> bytevector)) (define bytestring-pad-right (cut %bytestring-pad 'bytestring-pad-right <> <> <> #t)) (: bytestring-trim (bytevector (fixnum -> boolean) -> bytevector)) (define (bytestring-trim bstring pred) (assert-type 'bytestring-trim (bytevector? bstring)) (assert-type 'bytestring-trim (procedure? pred)) (let ((new-start (bytestring-index bstring (complement pred)))) (if new-start (bytevector-copy bstring new-start) (bytevector)))) (: bytestring-trim-right (bytevector (integer -> boolean) -> bytevector)) (define (bytestring-trim-right bstring pred) (assert-type 'bytestring-trim-right (bytevector? bstring)) (assert-type 'bytestring-trim-right (procedure? pred)) (cond ((bytestring-index-right bstring (complement pred)) => (lambda (end-1) (bytevector-copy bstring 0 (+ 1 end-1)))) (else (bytevector)))) (: bytestring-trim-both (bytevector (integer -> boolean) -> bytevector)) (define (bytestring-trim-both bstring pred) (assert-type 'bytestring-trim-both (bytevector? bstring)) (assert-type 'bytestring-trim-both (procedure? pred)) (let ((neg-pred (complement pred))) (cond ((bytestring-index bstring neg-pred) => (lambda (start) (bytevector-copy bstring start (+ (bytestring-index-right bstring neg-pred) 1)))) (else (bytevector))))) ;;;; Replacement (: bytestring-replace (bytevector bytevector fixnum fixnum #!optional fixnum fixnum -> bytevector)) (define bytestring-replace (case-lambda ((bstring1 bstring2 start end) (bytestring-replace bstring1 bstring2 start end 0 (bytevector-length bstring2))) ((bstring1 bstring2 start1 end1 start2 end2) (assert-type 'bytestring-replace (bytevector? bstring1)) (assert-type 'bytestring-replace (bytevector? bstring2)) (assert-type 'bytestring-replace (fixnum? start1)) (assert-type 'bytestring-replace (fixnum? end1)) (assert-type 'bytestring-replace (fixnum? start2)) (assert-type 'bytestring-replace (fixnum? end2)) (check-range 'bytestring-replace bstring1 start1 end1) (check-range 'bytestring-replace bstring2 start2 end2) (if (and (= start1 end1) (= start2 end2)) (bytevector-copy bstring1) ; replace no bits with no bits (let* ((b1-len (bytevector-length bstring1)) (sub-len (- end2 start2)) (new-len (+ sub-len (- b1-len (- end1 start1)))) (bs-new (make-bytevector new-len))) (bytevector-copy! bs-new 0 bstring1 0 start1) (bytevector-copy! bs-new start1 bstring2 start2 end2) (bytevector-copy! bs-new (+ start1 sub-len) bstring1 end1 b1-len) bs-new))))) ;;;; Comparison (: bytestring boolean)) (define (bytestring? (bytevector bytevector #!rest bytevector --> boolean)) (define (bytestring>? bvec1 bvec2 . rest) (assert-type 'bytestring>? (bytevector? bvec1)) (assert-type 'bytestring>? (bytevector? bvec2)) (if (null? rest) ; fast path (and (not (eq? bvec1 bvec2)) ; quick check (%bytestring-compare bvec1 bvec2 #f #f #t)) (let loop ((bs bvec1) (bss (cons bvec2 rest))) ; variadic path (or (null? bss) (let ((bs* (car bss))) (and (not (eq? bs bs*)) (%bytestring-compare bs bs* #f #f #t) (loop bs* (cdr bss)))))))) (: bytestring<=? (bytevector bytevector #!rest bytevector --> boolean)) (define (bytestring<=? bvec1 bvec2 . rest) (assert-type 'bytestring<=? (bytevector? bvec1)) (assert-type 'bytestring<=? (bytevector? bvec2)) (if (null? rest) ; fast path (or (eq? bvec1 bvec2) ; quick check (%bytestring-compare bvec1 bvec2 #t #t #f)) (let loop ((bs bvec1) (bss (cons bvec2 rest))) ; variadic path (or (null? bss) (let ((bs* (car bss))) (and (or (eq? bs bs*) (%bytestring-compare bs bs* #t #t #f)) (loop bs* (cdr bss)))))))) (: bytestring>=? (bytevector bytevector #!rest bytevector --> boolean)) (define (bytestring>=? bvec1 bvec2 . rest) (assert-type 'bytestring>=? (bytevector? bvec1)) (assert-type 'bytestring>=? (bytevector? bvec2)) (if (null? rest) ; fast path (or (eq? bvec1 bvec2) ; quick check (%bytestring-compare bvec1 bvec2 #f #t #t)) (let loop ((bs bvec1) (bss (cons bvec2 rest))) ; variadic path (or (null? bss) (let ((bs* (car bss))) (and (or (eq? bs bs*) (%bytestring-compare bs bs* #f #t #t)) (loop bs* (cdr bss)))))))) ;;;; Searching (: bytestring-index (bytevector (integer -> boolean) #!optional fixnum fixnum --> (or fixnum false))) (define bytestring-index (case-lambda ((bstring pred) (bytestring-index bstring pred 0)) ((bstring pred start) (bytestring-index bstring pred start (bytevector-length bstring))) ((bstring pred start end) (assert-type 'bytestring-index (bytevector? bstring)) (assert-type 'bytestring-index (procedure? pred)) (assert-type 'bytestring-index (fixnum? start)) (assert-type 'bytestring-index (fixnum? end)) (check-range 'bytestring-index bstring start end) (let lp ((i start)) (and (< i end) (if (pred (bytevector-u8-ref bstring i)) i (lp (+ i 1)))))))) (: bytestring-index-right (bytevector (integer -> boolean) #!optional fixnum fixnum --> (or fixnum false))) (define bytestring-index-right (case-lambda ((bstring pred) (bytestring-index-right bstring pred 0)) ((bstring pred start) (bytestring-index-right bstring pred start (bytevector-length bstring))) ((bstring pred start end) (assert-type 'bytestring-index-right (bytevector? bstring)) (assert-type 'bytestring-index-right (procedure? pred)) (assert-type 'bytestring-index-right (fixnum? start)) (assert-type 'bytestring-index-right (fixnum? end)) (check-range 'bytestring-index bstring start end) (let lp ((i (- end 1))) (and (>= i start) (if (pred (bytevector-u8-ref bstring i)) i (lp (- i 1)))))))) (: bytestring-break (bytevector (integer -> boolean) -> bytevector bytevector)) (define (bytestring-break bstring pred) (assert-type 'bytestring-break (bytevector? bstring)) (assert-type 'bytestring-break (procedure? pred)) (cond ((bytestring-index bstring pred) => (lambda (len) (values (bytevector-copy bstring 0 len) (bytevector-copy bstring len)))) (else (values (bytevector-copy bstring) (bytevector))))) (: bytestring-span (bytevector (integer -> boolean) -> bytevector bytevector)) (define (bytestring-span bstring pred) (assert-type 'bytestring-span (bytevector? bstring)) (assert-type 'bytestring-span (procedure? pred)) (cond ((bytestring-index bstring (complement pred)) => (lambda (len) (values (bytevector-copy bstring 0 len) (bytevector-copy bstring len)))) (else (values (bytevector-copy bstring) (bytevector))))) ;;;; Joining & Splitting (: %bytestring-join-nonempty ((list-of bytevector) bytevector symbol -> bytevector)) (define (%bytestring-join-nonempty bstrings delimiter grammar) (call-with-port (open-output-bytevector) (lambda (out) (when (eqv? grammar 'prefix) (write-bytevector delimiter out)) (write-bytevector (car bstrings) out) (for-each (lambda (bstr) (write-bytevector delimiter out) (write-bytevector bstr out)) (cdr bstrings)) (when (eqv? grammar 'suffix) (write-bytevector delimiter out)) (get-output-bytevector out)))) (: bytestring-join ((list-of bytevector) bstring-elt #!optional symbol -> bytevector)) (define bytestring-join (case-lambda ((bstrings delimiter) (bytestring-join bstrings delimiter 'infix)) ((bstrings delimiter grammar) (assert-type 'bytestring-join (or (pair? bstrings) (null? bstrings))) (unless (memv grammar '(infix strict-infix prefix suffix)) (bytestring-error "invalid grammar" grammar)) ; FIXME (let ((delim-bstring (bytestring delimiter))) (if (pair? bstrings) (%bytestring-join-nonempty bstrings delim-bstring grammar) (if (eqv? grammar 'strict-infix) (bytestring-error 'bytestring-join "empty list with strict-infix grammar") (bytevector))))))) (: %find-right (bytevector fixnum fixnum -> (or fixnum false))) (define (%find-right bstring byte end) (bytestring-index-right bstring (lambda (b) (= b byte)) 0 end)) (: %bytestring-infix-split (bytevector fixnum -> (list-of bytevector))) (define (%bytestring-infix-split bstring delimiter) (let lp ((token-end (bytevector-length bstring)) (split '())) (cond ((< token-end 0) split) ((%find-right bstring delimiter token-end) => (lambda (token-start-1) (lp token-start-1 (cons (bytevector-copy bstring (+ 1 token-start-1) token-end) split)))) (else (cons (bytevector-copy bstring 0 token-end) split))))) (: %trim-byte (bytevector fixnum -> bytevector)) (define (%trim-byte bstring byte) (bytestring-trim bstring (lambda (b) (= b byte)))) (: %trim-right-byte (bytevector fixnum -> bytevector)) (define (%trim-right-byte bstring byte) (bytestring-trim-right bstring (lambda (b) (= b byte)))) (: %bytestring-split/trim-outliers (bytevector fixnum symbol -> (list-of bytevector))) (define (%bytestring-split/trim-outliers bstring delimiter grammar) (let ((trimmed (case grammar ((infix strict-infix) bstring) ((prefix) (%trim-byte bstring delimiter)) ((suffix) (%trim-right-byte bstring delimiter))))) (%bytestring-infix-split trimmed delimiter))) (: bytestring-split (bytevector bstring-elt #!optional symbol -> (list-of bytevector))) (define bytestring-split (case-lambda ((bstring delimiter) (bytestring-split bstring delimiter 'infix)) ((bstring delimiter grammar) (assert-type 'bytestring-split (bytevector? bstring)) (assert-type 'bytestring-split (u8-or-ascii-char? delimiter)) (unless (memv grammar '(infix strict-infix prefix suffix)) (bytestring-error "invalid grammar" grammar)) ; FIXME (if (%bytestring-null? bstring) '() (%bytestring-split/trim-outliers bstring (if (char? delimiter) (char->integer delimiter) delimiter) grammar))))) ;;;; I/O (: backslash-codepoints (list-of (pair fixnum char))) (define backslash-codepoints '((7 . #\a) (8 . #\b) (9 . #\t) (10 . #\n) (13 . #\r) (34 . #\") (92 . #\\) (124 . #\|))) (: write-textual-bytestring (bytevector #!optional output-port -> undefined)) (define write-textual-bytestring (case-lambda ((bstring) (write-textual-bytestring bstring (current-output-port))) ((bstring port) (parameterize ((current-output-port port)) (write-string "#u8\"") (u8vector-for-each (lambda (b) (cond ((assv b backslash-codepoints) => (lambda (p) (write-char #\\) (write-char (cdr p)))) ((and (>= b #x20) (<= b #x7e)) (write-char (integer->char b))) (else (write-string "\\x") (write-string (number->string b 16)) (write-char #\;)))) bstring) (write-char #\"))))) (: write-binary-bytestring (bytevector output-port #!rest bstring-elt -> undefined)) (define (write-binary-bytestring port . args) (assert-type 'write-binary-bytestring (binary-port? port)) (for-each (lambda (arg) (unless (valid-bytestring-segment? arg) (bytestring-error 'write-binary-bytestring "invalid bytestring element" arg))) args) (for-each (lambda (seg) (%write-bytestring-segment 'write-binary-bytestring seg port)) args)) ))