;; ;; Utility procedures for manipulating SRFI-41 streams containing ;; byte blobs. ;; ;; Copyright 2009-2010 Ivan Raikov and the Okinawa Institute of ;; Science and Technology. ;; ;; ;; This program is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; . ;; (module byte-blob-stream ( byte-blob-stream? byte-blob-stream-empty byte-blob-stream-empty? byte-blob-stream-length byte-blob-stream-cons byte-blob-stream-car byte-blob-stream-cdr byte-blob-stream-ref byte-blob-stream-append byte-blob-stream-reverse byte-blob-stream-intersperse byte-blob-stream-take byte-blob-stream-drop byte-blob-stream-span byte-blob-stream-map byte-blob-stream-fold-left byte-blob-stream-fold-right byte-blob-stream-find byte-blob-stream->list list->byte-blob-stream file->byte-blob-stream ) (import scheme chicken foreign data-structures) (require-extension streams streams-utils byte-blob) (define (byte-blob-stream? x) (and (stream? x) (or (stream-null? x) (byte-blob? (stream-car x))))) (define (byte-blob-stream-empty) stream-null) (define (byte-blob-stream-empty? sb) (stream-null? sb)) (define (byte-blob-stream-length sb) (stream-fold (lambda (b sum) (+ sum (byte-blob-length b))) 0 sb)) (define (byte-blob-stream-cons b sb) (assert (byte-blob? b)) (stream-cons b sb)) (define byte-blob-stream-car (compose byte-blob-car stream-car)) (define (byte-blob-stream-cdr sb) (let ((b (stream-car sb))) (assert (byte-blob? b)) (if (<= (byte-blob-length b) 1) (stream-cdr sb) (stream-cons (byte-blob-cdr b) (stream-cdr sb))))) (define byte-blob-stream-append stream-append) (define (byte-blob-stream-reverse sb) (define-stream (reverse1 a b) (if (stream-null? b) a (let ((bb (stream-car b))) (assert (byte-blob? bb)) (reverse1 (stream-cons (byte-blob-reverse bb) a) (stream-cdr b))))) (reverse1 stream-null sb)) (define (byte-blob-stream-intersperse sb w) (assert (fixnum? w)) (define-stream (intersperse1 sb) (if (stream-null? sb) sb (let ((b (stream-car sb))) (assert (byte-blob? b)) (let ((b1 (byte-blob-intersperse b w))) (if (stream-null? (stream-cdr sb)) (stream-cons b1 stream-null) (stream-cons b1 (stream-cons (list->byte-blob (list w)) (intersperse1 (stream-cdr sb)))) ))))) (intersperse1 sb)) (define-stream (byte-blob-stream-take sb n) (if (positive? n) (let ((b (stream-car sb))) (assert (byte-blob? b)) (let ((bn (byte-blob-length b))) (if (< n bn) (stream-cons (byte-blob-take b n) stream-null) (stream-cons b (byte-blob-stream-take (stream-cdr sb) (- n bn)))))) stream-null)) (define (byte-blob-stream-drop sb n) (if (positive? n) (let ((b (stream-car sb))) (assert (byte-blob? b)) (let ((bn (byte-blob-length b))) (cond ((= n bn) (stream-cdr sb)) ((< n bn) (stream-cons (byte-blob-drop b n) (stream-cdr sb))) (else (byte-blob-stream-drop (stream-cdr sb) (- n bn)))))) stream-null)) (define (byte-blob-stream-span b start end) (assert (and (or (zero? start) (positive? start)) (positive? end) (< start end))) (byte-blob-stream-take (byte-blob-stream-drop b start) (- end start))) (define (byte-blob-stream-map f sb) (define-stream (map1 sb) (if (stream-null? sb) sb (let ((b (stream-car sb))) (assert (byte-blob? b)) (stream-cons (byte-blob-map f b) (map1 (stream-cdr sb)))))) (map1 sb)) (define (byte-blob-stream-fold-left f init sb) (define (foldl1 ax sb) (if (stream-null? sb) ax (let ((b (stream-car sb))) (assert (byte-blob? b)) (foldl1 (byte-blob-fold-left f ax b) (stream-cdr sb))))) (foldl1 init sb)) (define (byte-blob-stream-fold-right f init sb) (define (f1 bb ax) (byte-blob-fold-right f ax bb)) (define (foldr1 sb) (if (stream-null? sb) init (let ((b (stream-car sb))) (assert (byte-blob? b)) (f1 b (foldr1 (stream-cdr sb)))))) (foldr1 sb)) (define (byte-blob-stream-ref sb i) (if (stream-null? sb) #f (let ((b (stream-car sb))) (assert (byte-blob? b)) (let ((len (byte-blob-length b))) (if (< i len) (byte-blob-ref b i) (byte-blob-stream-ref (stream-cdr sb) (- i len))))))) ;; Fast sub-sequence search, based on work by Boyer, Moore, Horspool, ;; Sunday, and Lundh. ;; ;; Based on code from the Haskell text library by Tom Harper and Bryan ;; O'Sullivan. http://hackage.haskell.org/package/text (define (subsequence-search needle haystack) (let* ((nlen (byte-blob-length needle)) (nlast (- nlen 1)) (nindex (lambda (i) (byte-blob-ref needle i)))) (let* ((z (nindex nlast)) (tbl (make-table nlast nindex nlen z))) (let-values (((mask skip) (tbl 0 0 (- nlen 2)))) (cond ((zero? nlen) '()) ((= 1 nlen) (scan1 (nindex 0) 0 haystack)) (else ((scan mask skip nlen nlast nindex z) 0 0 haystack))))))) (define swizzle (foreign-lambda* int ((integer k)) #<= i nlast) (values (bitwise-ior msk (swizzle z)) skp)) (else (let* ((c (nindex i)) (skp1 (cond ((= c z) (- nlen i 2)) (else skp)))) (loop (+ 1 i) (bitwise-ior msk (swizzle c)) skp1))))))) (define (scan1 c i sb) (let loop ((h 0) (sb sb) (ax '())) (let ((b (stream-car sb))) (assert (byte-blob? b)) (let ((len (byte-blob-length b))) (if (>= h len) (if (stream-null? (stream-cdr sb)) (reverse ax) (scan1 c (+ i len) (stream-cdr sb))) (let ((on (byte-blob-ref b h))) (if (= on c) (loop (+ 1 h) sb (cons (+ i h) ax)) (loop (+ 1 h) sb ax)))))))) (define (scan mask skip nlen nlast nindex z) ;; check whether an attempt to index into the haystack at the ;; given offset would fail (define (lacking-hay? q sb) (let loop ((p 0) (sb sb)) (let* ((b (stream-car sb)) (len (byte-blob-length b)) (p1 (+ p len))) (and (<= p1 q) (cond ((stream-null? (stream-cdr sb)) #t) (else (loop p1 (stream-cdr sb)))))))) (define (candidate-match sb i j) (cond ((>= j nlast) #t) ((not (= (byte-blob-stream-ref sb (+ i j)) (nindex j))) #f) (else (candidate-match sb i (+ 1 j))))) (lambda (g i sb) (let loop ((g g) (i i) (sb sb) (ax '())) (let* ((b (stream-car sb)) (m (byte-blob-length b))) (cond ((>= i m) (if (stream-null? (stream-cdr sb)) (reverse ax) (loop g (- i m) (stream-cdr sb) ax))) ((lacking-hay? (+ i nlen) sb) (reverse ax)) (else (let ((c (byte-blob-stream-ref sb (+ i nlast)))) (if (and (= c z) (candidate-match sb i 0)) (loop (+ g nlen) (+ i nlen) sb (cons g ax)) (let* ((next-in-pattern? (zero? (bitwise-and mask (swizzle (byte-blob-stream-ref sb (+ i nlen)))))) (delta (cond (next-in-pattern? (+ 1 nlen)) ((= c z) (+ 1 skip)) (else 1)))) (loop (+ g delta) (+ i delta) sb ax)) ))) )))) ) ;; ;; Based on code from the Haskell text library by Tom Harper and Bryan ;; O'Sullivan. http://hackage.haskell.org/package/text ;; ;; /O(n+m)/ Find all non-overlapping instances of needle in ;; haystack. The first element of the returned pair is the prefix ;; of haystack prior to any matches of needle. The second is a ;; list of pairs. (define (byte-blob-stream-find needle haystack) (assert (byte-blob? needle)) (assert (byte-blob-stream? haystack)) (if (byte-blob-empty? needle) (error 'byte-blob-stream-find "empty search pattern")) (let ((r (subsequence-search needle haystack))) (if (null? r) (list haystack '()) (let-values (((h t) (split-at (car r) haystack))) (letrec ((loop (lambda (i xs cs ax) (if (null? xs) (reverse (cons (list cs cs) ax)) (let-values (((h t) (split-at (- (car xs) i) cs))) (loop (car xs) (cdr xs) t (cons (list h cs) ax))))))) (list h (loop (car r) (cdr r) t '()))))))) ;; (split-at n t) returns a strict pair whose first element is a ;; prefix of t whose chunks contain n bytes, and whose second is the ;; remainder of the byte-blob-stream (define (split-at n t) (cond ((zero? n) (values stream-null t)) ((stream-null? t) (values stream-null stream-null)) (else (let ((b (stream-car t))) (assert (byte-blob? b)) (let ((len (byte-blob-length b))) (if (>= n len) (let-values (((h t) (split-at (- n len) (stream-cdr t)))) (values (stream-cons b h) t)) (let ((b1 (byte-blob-take b n)) (b2 (byte-blob-drop b n))) (values (stream-cons b1 stream-null) (stream-cons b2 (stream-cdr t)))) )) )))) (define (byte-blob-stream->list x . rest) (let-optionals rest ((fmap identity)) (let loop ((lst (stream->list x))) (if (null? lst) '() (append (byte-blob->list (car lst) fmap) (loop (cdr lst))))))) (define (list->byte-blob-stream x) (let ((b1 (list->byte-blob x))) (stream-cons b1 stream-null))) (define (file->byte-blob-stream filename . rest) (let-optionals rest ((blocksize 64)) (let ((reader (lambda (port) (byte-blob-read port blocksize)))) (file->stream filename reader)))) )