;; ;; ;; A dictionary structure implemented with a counting Bloom filter. ;; ;; Based on the Shared-node Fast Hash Table (SFHT) data structure ;; described by Song, et al., in _Fast Hash Table Lookup Using ;; Extended Bloom Filter: An Aid to Network Processing_. (SIGCOMM'05) ;; ;; ;; Copyright 2007-2009 Ivan Raikov. ;; ;; ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; .")))) ;; (module sfht (make-sfht) (import scheme chicken data-structures) (require-extension srfi-1 matchable sparse-vectors) (define ln06185 (log 0.6185)) (define ln05 (log 0.5)) (define (sfht:error x . rest) (let ((port (open-output-string))) (let loop ((objs (cons x rest))) (if (null? objs) (begin (newline port) (error 'sfht (get-output-string port))) (begin (display (car objs) port) (display " " port) (loop (cdr objs))))))) (define (make-sfht n p make-random-state random! key->vector key-vector-ref key-vector-length . rest) (let-optionals rest ((key-equal? equal?)) (define m (round (+ 1.0 (* (* n (log p)) ln06185)))) (define k (inexact->exact (round (+ 0.5 (/ (log p) ln05))))) (define default (list 0 (list))) (define ba (make-sparse-vector default)) (define size 0) ;; Hash functions based on uniform pseudo-random numbers (define rng-states (list-tabulate k (lambda (i) (make-random-state i)))) ;; Pre-calculate hash function coefficients for vectors of size up ;; to 127 elements (define H 127) (define random-coeffs (map (lambda (st) (list-tabulate H (lambda (x) (random! st)))) rng-states)) (define (extend-random-coeffs! H1) (let ((d (- H1 H))) (if (< d 1) (sfht:error 'extend-random-coeffs! ": the new number of coefficients is less than the old one")) (let ((cfs1 (map (lambda (st cfs) (let ((cfs1 (list-tabulate d (lambda (x) (random! st))))) (append cfs cfs1))) rng-states random-coeffs))) (set! random-coeffs cfs1) (set! H H1)))) (define (and-coeffs h kv) (let loop ((i 0) (h h) (ax (list))) (if (null? h) (reverse ax) (loop (fx+ 1 i) (cdr h) (cons (* (car h) (key-vector-ref kv i)) ax))))) (define (hash key k n) (let* ((kv (key->vector key)) (kvl (key-vector-length kv))) (if (fx> kvl H) (extend-random-coeffs! kvl)) (if (fx= kvl 0) (map car random-coeffs) (let loop ((i 0) (hh random-coeffs) (ax (list))) (if (null? hh) (map (lambda (lst) (apply bitwise-xor lst)) (reverse ax)) (loop (fx+ 1 i) (cdr hh) (cons (and-coeffs (car hh) kv) ax))))))) (define (insert! key x) (let ((h (hash key k n)) (b (cons (cons key x) (list)))) (let loop ((i h)) (if (not (null? i)) (let* ((index (car i)) (bkt (sparse-vector-ref ba index))) (let-values (((sz lst) (match bkt ((sz lst) (values sz lst)) (else (sfht:error 'insert! ": invalid bucket " bkt " at index " index))))) (if (fx= 0 sz) (sparse-vector-set! ba index (list 1 (list (cons key x)))) (begin (let tail ((k sz) (lst lst) (prev #f)) (if (null? lst) (let ((b (list (cons key x)))) (if prev (set-cdr! prev b) (set-cdr! bkt b))) (if (fx<= k 0) (set-cdr! bkt (cons (cons key x) (cdr bkt))) (tail (fx- k 1) (cdr lst) lst)))) (set-car! bkt (fx+ 1 (car bkt))))) (loop (cdr i)))))) (set! size (fx+ 1 size)) #f)) (define (delete! key) (define found? #f) (let ((h (hash key k n))) (let loop ((i h)) (if (not (null? i)) (let* ((index (car i)) (bkt (sparse-vector-ref ba index))) (let-values (((sz lst) (match bkt ((sz lst) (values sz lst)) (else (sfht:error 'remove! ": invalid bucket " bkt " at index " i))))) (let bktloop ((k sz) (lst lst) (prev #f)) (if (not (null? lst)) (if (fx< 0 k) (match lst (((key1 . _) . rest) (if (key-equal? key key1) (begin (set! found? #t) (set-car! bkt (fx- (car bkt) 1)) (if prev (set-cdr! prev rest) (set-cdr! bkt (list rest)))) (bktloop (fx- k 1) rest lst))) (else (sfht:error 'remove! ": invalid bucket list " lst))))))) (loop (cdr i))))) (if found? (set! size (fx- size 1))) found?)) (define (min-bucket bkts . rest) (let-optionals rest ((minb (or (null? bkts) (car bkts)))) (if (null? bkts) minb (let ((bkt (car bkts))) (if (fx< (car bkt) (car minb)) (min-bucket (cdr bkts) bkt) (min-bucket (cdr bkts) minb)))))) (define (find key) (let* ((h (hash key k n)) (bkts (map (lambda (i) (sparse-vector-ref ba i)) h)) (minb (min-bucket bkts))) (let loop ((k (car minb)) (lst (cadr minb))) (if (or (fx= 0 k) (null? lst)) #f (match lst (((key1 . _) . rest) (if (key-equal? key key1) (car lst) (loop (fx- k 1) rest))) (else (sfht:error 'find ": invalid bucket list " lst))))))) (define (debugprint) (let ((bkts (sparse-vector->list ba))) (for-each (lambda (bkt) (display "bucket:") (display " sz = ") (display (car bkt)) (display " lst = ") (display (cdr bkt)) (display #\newline)) bkts))) (define (apply-default-clause src key default-clause) (cond ((null? default-clause) (sfht:error src ": key " key " was not found in the SFHT ")) ((procedure? (car default-clause)) ((car default-clause))) (else (car default-clause)))) ;; Dispatcher (lambda (selector) (case selector ((get) (lambda (key . default-clause) (or (find key) (apply-default-clause 'get key default-clause)))) ((delete!) (lambda (key . default-clause) (or (delete! key) (apply-default-clause 'delete! key default-clause)))) ((put!) insert!) ((empty?) (fx= size 0)) ((size) size) ((clear!) (begin (set! ba (make-sparse-vector default)) (set! size 0))) ((debugprint) (debugprint)) (else (sfht:error "Unknown message " selector " sent to an SFHT")))))) )