;;
;; Utility procedures for manipulating SRFI-41 streams containing
;; byte blobs.
;;
;; Copyright 2009-2010 Ivan Raikov and the Okinawa Institute of
;; Science and Technology.
;;
;;
;; This program is free software: you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; A full copy of the GPL license can be found at
;; .
;;
(module byte-blob-stream
(
byte-blob-stream?
byte-blob-stream-empty
byte-blob-stream-empty?
byte-blob-stream-length
byte-blob-stream-cons
byte-blob-stream-car
byte-blob-stream-cdr
byte-blob-stream-ref
byte-blob-stream-append
byte-blob-stream-reverse
byte-blob-stream-intersperse
byte-blob-stream-take
byte-blob-stream-drop
byte-blob-stream-span
byte-blob-stream-map
byte-blob-stream-fold-left
byte-blob-stream-fold-right
byte-blob-stream-find
byte-blob-stream->list
list->byte-blob-stream
file->byte-blob-stream
)
(import scheme chicken foreign data-structures)
(require-extension streams streams-utils byte-blob)
(define (byte-blob-stream? x)
(and (stream? x) (or (stream-null? x)
(byte-blob? (stream-car x)))))
(define (byte-blob-stream-empty)
stream-null)
(define (byte-blob-stream-empty? sb)
(stream-null? sb))
(define (byte-blob-stream-length sb)
(stream-fold (lambda (b sum) (+ sum (byte-blob-length b))) 0 sb))
(define (byte-blob-stream-cons b sb)
(assert (byte-blob? b))
(stream-cons b sb))
(define byte-blob-stream-car
(compose byte-blob-car stream-car))
(define (byte-blob-stream-cdr sb)
(let ((b (stream-car sb)))
(assert (byte-blob? b))
(if (<= (byte-blob-length b) 1)
(stream-cdr sb)
(stream-cons (byte-blob-cdr b) (stream-cdr sb)))))
(define byte-blob-stream-append stream-append)
(define (byte-blob-stream-reverse sb)
(define-stream (reverse1 a b)
(if (stream-null? b) a
(let ((bb (stream-car b)))
(assert (byte-blob? bb))
(reverse1 (stream-cons (byte-blob-reverse bb) a)
(stream-cdr b)))))
(reverse1 stream-null sb))
(define (byte-blob-stream-intersperse sb w)
(assert (fixnum? w))
(define-stream (intersperse1 sb)
(if (stream-null? sb) sb
(let ((b (stream-car sb)))
(assert (byte-blob? b))
(let ((b1 (byte-blob-intersperse b w)))
(if (stream-null? (stream-cdr sb))
(stream-cons b1 stream-null)
(stream-cons b1 (stream-cons (list->byte-blob (list w))
(intersperse1 (stream-cdr sb))))
)))))
(intersperse1 sb))
(define-stream (byte-blob-stream-take sb n)
(if (positive? n)
(let ((b (stream-car sb)))
(assert (byte-blob? b))
(let ((bn (byte-blob-length b)))
(if (< n bn)
(stream-cons (byte-blob-take b n) stream-null)
(stream-cons b (byte-blob-stream-take (stream-cdr sb) (- n bn))))))
stream-null))
(define (byte-blob-stream-drop sb n)
(if (positive? n)
(let ((b (stream-car sb)))
(assert (byte-blob? b))
(let ((bn (byte-blob-length b)))
(cond ((= n bn) (stream-cdr sb))
((< n bn)
(stream-cons (byte-blob-drop b n) (stream-cdr sb)))
(else
(byte-blob-stream-drop (stream-cdr sb) (- n bn))))))
stream-null))
(define (byte-blob-stream-span b start end)
(assert (and (or (zero? start) (positive? start)) (positive? end) (< start end)))
(byte-blob-stream-take (byte-blob-stream-drop b start) (- end start)))
(define (byte-blob-stream-map f sb)
(define-stream (map1 sb)
(if (stream-null? sb) sb
(let ((b (stream-car sb)))
(assert (byte-blob? b))
(stream-cons (byte-blob-map f b)
(map1 (stream-cdr sb))))))
(map1 sb))
(define (byte-blob-stream-fold-left f init sb)
(define (foldl1 ax sb)
(if (stream-null? sb) ax
(let ((b (stream-car sb)))
(assert (byte-blob? b))
(foldl1 (byte-blob-fold-left f ax b)
(stream-cdr sb)))))
(foldl1 init sb))
(define (byte-blob-stream-fold-right f init sb)
(define (f1 bb ax)
(byte-blob-fold-right f ax bb))
(define (foldr1 sb)
(if (stream-null? sb) init
(let ((b (stream-car sb)))
(assert (byte-blob? b))
(f1 b (foldr1 (stream-cdr sb))))))
(foldr1 sb))
(define (byte-blob-stream-ref sb i)
(if (stream-null? sb) #f
(let ((b (stream-car sb)))
(assert (byte-blob? b))
(let ((len (byte-blob-length b)))
(if (< i len)
(byte-blob-ref b i)
(byte-blob-stream-ref (stream-cdr sb) (- i len)))))))
;; Fast sub-sequence search, based on work by Boyer, Moore, Horspool,
;; Sunday, and Lundh.
;;
;; Based on code from the Haskell text library by Tom Harper and Bryan
;; O'Sullivan. http://hackage.haskell.org/package/text
(define (subsequence-search needle haystack)
(let* ((nlen (byte-blob-length needle))
(nlast (- nlen 1))
(nindex (lambda (i) (byte-blob-ref needle i))))
(let* ((z (nindex nlast))
(tbl (make-table nlast nindex nlen z)))
(let-values
(((mask skip) (tbl 0 0 (- nlen 2))))
(cond ((zero? nlen)
'())
((= 1 nlen)
(scan1 (nindex 0) 0 haystack))
(else
((scan mask skip nlen nlast nindex z) 0 0 haystack)))))))
(define swizzle
(foreign-lambda* int ((integer k))
#<= i nlast)
(values (bitwise-ior msk (swizzle z)) skp))
(else
(let* ((c (nindex i))
(skp1 (cond ((= c z) (- nlen i 2))
(else skp))))
(loop (+ 1 i) (bitwise-ior msk (swizzle c)) skp1)))))))
(define (scan1 c i sb)
(let loop ((h 0) (sb sb) (ax '()))
(let ((b (stream-car sb)))
(assert (byte-blob? b))
(let ((len (byte-blob-length b)))
(if (>= h len)
(if (stream-null? (stream-cdr sb))
(reverse ax)
(scan1 c (+ i len) (stream-cdr sb)))
(let ((on (byte-blob-ref b h)))
(if (= on c) (loop (+ 1 h) sb (cons (+ i h) ax))
(loop (+ 1 h) sb ax))))))))
(define (scan mask skip nlen nlast nindex z)
;; check whether an attempt to index into the haystack at the
;; given offset would fail
(define (lacking-hay? q sb)
(let loop ((p 0) (sb sb))
(let* ((b (stream-car sb))
(len (byte-blob-length b))
(p1 (+ p len)))
(and (<= p1 q) (cond ((stream-null? (stream-cdr sb)) #t)
(else (loop p1 (stream-cdr sb))))))))
(define (candidate-match sb i j)
(cond ((>= j nlast) #t)
((not (= (byte-blob-stream-ref sb (+ i j)) (nindex j))) #f)
(else (candidate-match sb i (+ 1 j)))))
(lambda (g i sb)
(let loop ((g g) (i i) (sb sb) (ax '()))
(let* ((b (stream-car sb))
(m (byte-blob-length b)))
(cond ((>= i m)
(if (stream-null? (stream-cdr sb))
(reverse ax)
(loop g (- i m) (stream-cdr sb) ax)))
((lacking-hay? (+ i nlen) sb)
(reverse ax))
(else
(let ((c (byte-blob-stream-ref sb (+ i nlast))))
(if (and (= c z) (candidate-match sb i 0))
(loop (+ g nlen) (+ i nlen) sb (cons g ax))
(let* ((next-in-pattern?
(zero? (bitwise-and mask (swizzle (byte-blob-stream-ref sb (+ i nlen))))))
(delta (cond (next-in-pattern? (+ 1 nlen))
((= c z) (+ 1 skip))
(else 1))))
(loop (+ g delta) (+ i delta) sb ax))
)))
))))
)
;;
;; Based on code from the Haskell text library by Tom Harper and Bryan
;; O'Sullivan. http://hackage.haskell.org/package/text
;;
;; /O(n+m)/ Find all non-overlapping instances of needle in
;; haystack. The first element of the returned pair is the prefix
;; of haystack prior to any matches of needle. The second is a
;; list of pairs.
(define (byte-blob-stream-find needle haystack)
(assert (byte-blob? needle))
(assert (byte-blob-stream? haystack))
(if (byte-blob-empty? needle)
(error 'byte-blob-stream-find "empty search pattern"))
(let ((r (subsequence-search needle haystack)))
(if (null? r)
(list haystack '())
(let-values (((h t) (split-at (car r) haystack)))
(letrec ((loop (lambda (i xs cs ax)
(if (null? xs)
(reverse (cons (list cs cs) ax))
(let-values (((h t) (split-at (- (car xs) i) cs)))
(loop (car xs) (cdr xs) t (cons (list h cs) ax)))))))
(list h (loop (car r) (cdr r) t '())))))))
;; (split-at n t) returns a strict pair whose first element is a
;; prefix of t whose chunks contain n bytes, and whose second is the
;; remainder of the byte-blob-stream
(define (split-at n t)
(cond ((zero? n) (values stream-null t))
((stream-null? t) (values stream-null stream-null))
(else
(let ((b (stream-car t)))
(assert (byte-blob? b))
(let ((len (byte-blob-length b)))
(if (>= n len)
(let-values (((h t) (split-at (- n len) (stream-cdr t))))
(values (stream-cons b h) t))
(let ((b1 (byte-blob-take b n))
(b2 (byte-blob-drop b n)))
(values (stream-cons b1 stream-null)
(stream-cons b2 (stream-cdr t))))
))
))))
(define (byte-blob-stream->list x . rest)
(let-optionals rest ((fmap identity))
(let loop ((lst (stream->list x)))
(if (null? lst) '()
(append (byte-blob->list (car lst) fmap)
(loop (cdr lst)))))))
(define (list->byte-blob-stream x)
(let ((b1 (list->byte-blob x)))
(stream-cons b1 stream-null)))
(define (file->byte-blob-stream filename . rest)
(let-optionals rest ((blocksize 64))
(let ((reader (lambda (port) (byte-blob-read port blocksize))))
(file->stream filename reader))))
)