;;;; bloom-filter-test.scm -*- Scheme -*- ;;;; Kon Lovett, Mar '20 ;;;; Kon Lovett, Jul '18 (import test) (import (only (chicken format) format)) (include "test-gloss.incl") ;; (test-begin "Bloom Filter") (import bloom-filter) ;;; (import scheme) (import (chicken base)) (import (chicken type)) (import (chicken syntax)) (import (only (chicken fixnum) most-positive-fixnum fx<)) (import (only (chicken sort) sort sort!)) (import (only (chicken random) pseudo-random-integer)) (import (only (srfi 1) every filter list-copy map!)) (import (only (srfi 13) string-reverse)) (import message-digest-primitive) ;;; ;; (: shuffle ((list-of *) #!optional (procedure (fixnum) fixnum) -> (list-of *))) ; (define (shuffle ls #!optional (rand pseudo-random-integer)) (define (tag-gen) (rand most-positive-fixnum)) (define (tag-rnd x) (cons (tag-gen) x)) (define (tag< x y) (fx< (car x) (car y))) (map! cdr (sort! (map tag-rnd ls) tag<)) ) ;; (: read-file ((or string input-port) #!optional (procedure (input-port) *) fixnum -> list)) ; (define (read-file inp-or-fil #!optional (reader read) max) (import (chicken io)) (let ( (inp (if (input-port? inp-or-fil) inp-or-fil (open-input-file inp-or-fil))) ) (dynamic-wind (lambda () (void)) (lambda () (read-list inp reader max)) (lambda () (unless (input-port? inp-or-fil) (close-input-port inp)) ) ) ) ) ;; (define-syntax test-success (syntax-rules () ((test-success ?expr) (test-assert (begin ?expr #t)) ) ((test-success ?mesg ?expr) (test-assert ?mesg (begin ?expr #t)) ) ) ) ;;; (import sha1 sha2 md5) (define mdps-basic (list (sha512-primitive) (md5-primitive) (sha1-primitive)) ) (import tiger-hash ripemd #;blake2) (define mdps-xtra (list (tiger192-primitive) (ripemd128-primitive) (ripemd160-primitive) #;(blake2s-primitive) #;(blake2b-primitive)) ) #| (import APHash BKDRHash BRPHash CRCHash DEKHash DJBHash ELFHash FNVAHash FNVHash ISPLHash JSHash NDJBHash PHSFHash PJWHash PYHash RJL3Hash RJMXHash RSHash SDBMHash TWMGMXHash TWMXHash TWSHMLMXHash TWSHMXHash) (define mdps-hashes (list (APHash-primitive) (BKDRHash-primitive) (BRPHash-primitive) (CRCHash-primitive) (DEKHash-primitive) (DJBHash-primitive) (ELFHash-primitive) (FNVAHash-primitive) (FNVHash-primitive) (ISPLHash-primitive) (JSHash-primitive) (NDJBHash-primitive) (PHSFHash-primitive) (PJWHash-primitive) (PYHash-primitive) (RJL3Hash-primitive) (RJMXHash-primitive) (RSHash-primitive) (SDBMHash-primitive) (TWMGMXHash-primitive) (TWMXHash-primitive) (TWSHMLMXHash-primitive) (TWSHMXHash-primitive)) ) |# ;need more hashers for 64bit (define mdps (append mdps-basic mdps-xtra)) (define palindromic-word-list '( "bob" "elle" "evil" "flow" "harrah" "hexeh" "js" "live" "map" "non" "pam" "pop" "radar" "sj" "wolf")) ;; (define test-word-list (read-file "bloom-filter-word-list.txt")) (define mirrored-word-list (map string-reverse test-word-list)) #; ;Compute above from word file (define palindromic-word-list (begin (import (srfi 69)) (let ((word-tbl (alist->hash-table (map (cut cons <> #t) test-word-list)))) (foldl (lambda (ls wrd) (if (hash-table-exists? word-tbl wrd) (cons wrd ls) ls) ) '() mirrored-word-list)))) ;Shuffle the Hashers (set! mdps (shuffle mdps)) (gloss) (glossf "digests: ~A" (map message-digest-primitive-name mdps)) (define N (length test-word-list)) (define P 2.47E-05) (let ( (MK #f) (bf #f) (false-positives '()) ) (test-group "Words In List, All K" (test-assert (= 2 (length (receive (optimum-size P N))))) (set! MK (receive (optimum-size P N))) (glossf "N = ~A, M = ~A, K = ~A, P = ~A" N (car MK) (cadr MK) P) (test-success "Make" (set! bf (make-bloom-filter P N mdps))) (test-success "Add Bloom Filter" (for-each (cut bloom-filter-set! bf <>) test-word-list)) (test-assert "Exists in Bloom Filter?" (every (cut bloom-filter-exists? bf <>) test-word-list)) (test-success "False positives" (set! false-positives (filter (cut bloom-filter-exists? bf <>) mirrored-word-list))) (glossf "Calced Palindromic words: ~A" (sort false-positives string) test-word-list)) (test-assert "Exists in Bloom Filter?" (every (cut bloom-filter-exists? bf <>) test-word-list)) (test-success "False positives" (set! false-positives (filter (cut bloom-filter-exists? bf <>) mirrored-word-list))) ;"FIXME enola" crops up sometimes ? (glossf "Calced Palindromic words: ~A" (sort false-positives string) test-word-list)) (test-assert "Exists in Bloom Filter?" (every (cut bloom-filter-exists? bf <>) test-word-list)) (test-success "False positives" (set! false-positives (filter (cut bloom-filter-exists? bf <>) mirrored-word-list))) (glossf "Calced Palindromic words: ~A" (sort false-positives string