;;; srfi.69.hash.scm - Optional non-standard extensions ; ; Copyright (c) 2008-2021, The Chicken Team ; Copyright (c) 2000-2007, Felix L. Winkelmann ; All rights reserved. ; ; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following ; conditions are met: ; ; Redistributions of source code must retain the above copyright notice, this list of conditions and the following ; disclaimer. ; Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following ; disclaimer in the documentation and/or other materials provided with the distribution. ; Neither the name of the author nor the names of its contributors may be used to endorse or promote ; products derived from this software without specific prior written permission. ; ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS ; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY ; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR ; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ; POSSIBILITY OF SUCH DAMAGE. (declare (usual-integrations)) (foreign-declare "#define C_rnd_fix() (C_fix(rand()))") (define ##sys#number-hash-hook) ; was used in the "numbers" egg (module (srfi 69 hash) (wrap-hash/std *make-hash-function hash-bound-default wrap-hash/triple32 number-hash object-uid-hash symbol-hash keyword-hash eq?-hash eqv?-hash recursive-hash-max-depth recursive-hash-max-length equal?-hash hash string-hash string-ci-hash string-hash-ci hash-by-identity) (import (scheme) (chicken base) (chicken fixnum) (chicken flonum) (chicken keyword) (chicken foreign)) (import-for-syntax (chicken fixnum)) ;;; Naming Conventions: ;; %foo - inline primitive ;; %%foo - local inline (no such thing but at least it looks different) ;; $foo - local macro ;; *foo - local unchecked variant of a checked procedure ;; ##sys#foo - public, but undocumented, un-checked procedure ;; foo - public checked procedure ;;; Core Inlines: (define-inline (%fix wrd) (##core#inline "C_fix" wrd) ) (define-inline (%block? obj) (##core#inline "C_blockp" obj) ) (define-inline (%immediate? obj) (not (##core#inline "C_blockp" obj)) ) (define-inline (%special? obj) (##core#inline "C_specialp" obj) ) (define-inline (%port? obj) (##core#inline "C_portp" obj) ) (define-inline (%byte-block? obj) (##core#inline "C_byteblockp" obj) ) (define-inline (%string-hash str rnd) (##core#inline "C_u_i_string_hash" str rnd) ) (define-inline (%string-ci-hash str rnd) (##core#inline "C_u_i_string_ci_hash" str rnd) ) (define-inline (%subbyte bytvec i) (##core#inline "C_subbyte" bytvec i) ) ;;; Generation of hash-values: ;; All '%foo-hash' return a fixnum, not necessarily positive. The "overflow" of ;; a, supposedly, unsigned hash value into negative is not checked during ;; intermediate computation. ;; ;; The body of '*eq?-hash' is duplicated in '*eqv?-hash' and the body of '*eqv?-hash' ;; is duplicated in '*equal?-hash' to save on procedure calls. ;; Fixed hash-values: (define-constant other-hash-value 99) (define-constant true-hash-value 256) (define-constant false-hash-value 257) (define-constant null-hash-value 258) (define-constant eof-hash-value 259) (define-constant input-port-hash-value 260) (define-constant output-port-hash-value 261) (define-constant unknown-immediate-hash-value 262) (define-constant hash-default-bound 536870912) ;; The salt used by hash functions for optional argument ;; Note that wrap-hash/std creates a new salt. (define hash-default-randomization (##core#inline "C_rnd_fix")) (define (hash-bound-default) hash-default-bound) ;; Force Hash to Bounded Fixnum: (define-inline (fxabs n) (if (fx< n 0) (fxneg n) n)) (define-inline (%hash/limit hsh lim) (fxmod (fxabs hsh) lim)) ;; Number Hash: (define-constant flonum-magic 331804471) (define-syntax $flonum-hash (er-macro-transformer (lambda (form r c) (let ((flo (cadr form)) (_%subbyte (r '%subbyte)) (_flonum-magic (r 'flonum-magic)) (_fx+ (r 'fx+)) (_fx* (r 'fx*)) (_fxshl (r 'fxshl)) ) `(,_fx* ,_flonum-magic ,(let loop ((idx (fx- (##sys#size 1.0) 1))) (if (fx= 0 idx) `(,_%subbyte ,flo 0) `(,_fx+ (,_%subbyte ,flo ,idx) (,_fxshl ,(loop (fx- idx 1)) 1)) ) ) ) ) ) ) ) (define-inline (%non-fixnum-number-hash obj rnd) (cond [(flonum? obj) ($flonum-hash obj rnd)] [else (%fix (##sys#number-hash-hook obj rnd))] ) ) (define-inline (%number-hash obj rnd) (cond [(fixnum? obj) (fxxor obj rnd)] [else (%non-fixnum-number-hash obj rnd)] ) ) (define (number-hash obj #!optional (bound hash-default-bound) (randomization hash-default-randomization)) (unless (number? obj) (##sys#signal-hook #:type 'number-hash "invalid number" obj) ) (##sys#check-fixnum bound 'number-hash) (%hash/limit (%number-hash obj randomization) bound) ) ;; Object UID Hash: #; ;NOT YET (no weak-reference) (define-inline (%object-uid-hash obj) (%uid-hash (##sys#object->uid obj)) ) (define-inline (%object-uid-hash obj rnd) (*equal?-hash obj rnd) ) (define (object-uid-hash obj #!optional (bound hash-default-bound) (randomization hash-default-randomization)) (##sys#check-fixnum bound 'object-uid-hash) (%hash/limit (%object-uid-hash obj randomization) bound) ) ;; Symbol Hash: #; ;NOT YET (no unique-symbol-hash) (define-inline (%symbol-hash obj) (##sys#slot obj INDEX-OF-UNIQUE-HASH-VALUE-COMPUTED-DURING-SYMBOL-CREATION) ) (define-inline (%symbol-hash obj rnd) (%string-hash (##sys#slot obj 1) rnd) ) (define (symbol-hash obj #!optional (bound hash-default-bound) (randomization hash-default-randomization)) (##sys#check-symbol obj 'symbol-hash) (##sys#check-fixnum bound 'symbol-hash) (%hash/limit (%symbol-hash obj randomization) bound) ) ;; Keyword Hash: (define (##sys#check-keyword x . y) (unless (keyword? x) (##sys#signal-hook #:type-error (and (not (null? y)) (car y)) "bad argument type - not a keyword" x) ) ) #; ;NOT YET (no unique-keyword-hash) (define-inline (%keyword-hash obj) (##sys#slot obj INDEX-OF-UNIQUE-HASH-VALUE-COMPUTED-DURING-KEYWORD-CREATION) ) (define-inline (%keyword-hash obj rnd) (%string-hash (##sys#slot obj 1) rnd) ) (define (keyword-hash obj #!optional (bound hash-default-bound) (randomization hash-default-randomization)) (##sys#check-keyword obj 'keyword-hash) (##sys#check-fixnum bound 'keyword-hash) (%hash/limit (%keyword-hash obj randomization) bound) ) ;; Eq Hash: (define-inline (%eq?-hash-object? obj) (or (%immediate? obj) (symbol? obj) (keyword? obj) ) ) (define (*eq?-hash obj rnd) (cond [(fixnum? obj) (fxxor obj rnd)] [(char? obj) (fxxor (char->integer obj) rnd)] [(eq? obj #t) (fxxor true-hash-value rnd)] [(eq? obj #f) (fxxor false-hash-value rnd)] [(null? obj) (fxxor null-hash-value rnd)] [(eof-object? obj) (fxxor eof-hash-value rnd)] [(symbol? obj) (%symbol-hash obj rnd)] [(keyword? obj) (%keyword-hash obj rnd)] [(%immediate? obj) (fxxor unknown-immediate-hash-value rnd)] [else (%object-uid-hash obj rnd) ] ) ) (define (eq?-hash obj #!optional (bound hash-default-bound) (randomization hash-default-randomization)) (##sys#check-fixnum bound 'eq?-hash) (%hash/limit (*eq?-hash obj randomization) bound) ) (define hash-by-identity eq?-hash) ;; Eqv Hash: (define-inline (%eqv?-hash-object? obj) (or (%eq?-hash-object? obj) (number? obj) ) ) (define (*eqv?-hash obj rnd) (cond [(fixnum? obj) (fxxor obj rnd)] [(char? obj) (fxxor (char->integer obj) rnd)] [(eq? obj #t) (fxxor true-hash-value rnd)] [(eq? obj #f) (fxxor false-hash-value rnd)] [(null? obj) (fxxor null-hash-value rnd)] [(eof-object? obj) (fxxor eof-hash-value rnd)] [(symbol? obj) (%symbol-hash obj rnd)] [(keyword? obj) (%keyword-hash obj rnd)] [(number? obj) (%non-fixnum-number-hash obj rnd)] [(%immediate? obj) (fxxor unknown-immediate-hash-value rnd)] [else (%object-uid-hash obj rnd) ] ) ) (define (eqv?-hash obj #!optional (bound hash-default-bound) (randomization hash-default-randomization)) (##sys#check-fixnum bound 'eqv?-hash) (%hash/limit (*eqv?-hash obj randomization) bound) ) ;; Equal Hash: (define-constant default-recursive-hash-max-depth 4) (define-constant default-recursive-hash-max-length 4) (define *recursive-hash-max-depth* default-recursive-hash-max-depth) (define (recursive-hash-max-depth #!optional x) (if (and (fixnum? x) (positive? x)) (begin (set! *recursive-hash-max-depth* x) x ) *recursive-hash-max-depth*)) (define *recursive-hash-max-length* default-recursive-hash-max-length) (define (recursive-hash-max-length #!optional x) (if (and (fixnum? x) (positive? x)) (begin (set! *recursive-hash-max-length* x) x ) *recursive-hash-max-length*)) ;; NOTE - These refer to identifiers available only within the body of '*equal?-hash'. (define-inline (%%pair-hash obj rnd) (fx+ (fxshl (recursive-hash (##sys#slot obj 0) (fx+ depth 1) rnd) 16) (recursive-hash (##sys#slot obj 1) (fx+ depth 1) rnd)) ) (define-inline (%%port-hash obj rnd) (fx+ (fxxor (fxshl (##sys#peek-fixnum obj 0) 4) rnd) ; Little extra "identity" (if (input-port? obj) input-port-hash-value output-port-hash-value)) ) (define-inline (%%special-vector-hash obj rnd) (vector-hash obj (##sys#peek-fixnum obj 0) depth 1 rnd) ) (define-inline (%%regular-vector-hash obj rnd) (vector-hash obj 0 depth 0 rnd) ) (define (*equal?-hash obj rnd) ; Recurse into some portion of the vector's slots (define (vector-hash obj seed depth start rnd) (let ([len (##sys#size obj)]) (let loop ([hsh (fx+ len (fxxor seed rnd))] [i start] [len (fx- (fxmax start (fxmin *recursive-hash-max-length* len)) start)] ) (if (fx= len 0) hsh (loop (fx+ hsh (fx+ (fxshl hsh 4) (recursive-hash (##sys#slot obj i) (fx+ depth 1) rnd))) (fx+ i 1) (fx- len 1) ) ) ) ) ) ; Recurse into structured objects (define (recursive-hash obj depth rnd) (cond [(fx>= depth *recursive-hash-max-depth*) (fxxor other-hash-value rnd)] [(fixnum? obj) (fxxor obj rnd)] [(char? obj) (fxxor (char->integer obj) rnd)] [(eq? obj #t) (fxxor true-hash-value rnd)] [(eq? obj #f) (fxxor false-hash-value rnd)] [(null? obj) (fxxor null-hash-value rnd)] [(eof-object? obj) (fxxor eof-hash-value rnd)] [(symbol? obj) (%symbol-hash obj rnd)] [(keyword? obj) (%keyword-hash obj rnd)] [(flonum? obj) (%non-fixnum-number-hash obj rnd)] [(%immediate? obj) (fxxor unknown-immediate-hash-value rnd)] [(%byte-block? obj) (%string-hash obj rnd)] [(pair? obj) (%%pair-hash obj rnd)] [(%port? obj) (%%port-hash obj rnd)] [(%special? obj) (%%special-vector-hash obj rnd)] [else (%%regular-vector-hash obj rnd)] ) ) ; (recursive-hash obj 0 rnd) ) (define (equal?-hash obj #!optional (bound hash-default-bound) (randomization hash-default-randomization)) (##sys#check-fixnum bound 'hash) (%hash/limit (*equal?-hash obj randomization) bound) ) (define hash equal?-hash) ;; String Hash: (define (string-hash str #!optional (bound hash-default-bound) start end (randomization hash-default-randomization)) (##sys#check-string str 'string-hash) (##sys#check-fixnum bound 'string-hash) (let ((str (if start (let ((end (or end (##sys#size str)))) (##sys#check-range start 0 (##sys#size str) 'string-hash) (##sys#check-range end 0 (##sys#size str) 'string-hash) (##sys#substring str start end)) str)) ) (%hash/limit (%string-hash str randomization) bound) ) ) (define (string-ci-hash str #!optional (bound hash-default-bound) start end (randomization hash-default-randomization)) (##sys#check-string str 'string-ci-hash) (##sys#check-fixnum bound 'string-ci-hash) (let ((str (if start (let ((end (or end (##sys#size str)))) (##sys#check-range start 0 (##sys#size str) 'string-hash) (##sys#check-range end 0 (##sys#size str) 'string-hash) (##sys#substring str start end)) str)) ) (%hash/limit (%string-ci-hash str randomization) bound) ) ) (define string-hash-ci string-ci-hash) ;; (define wrap-hash/std (let ((builtins (list eq?-hash eqv?-hash equal?-hash hash string-hash string-hash-ci number-hash object-uid-hash symbol-hash keyword-hash)) (string-builtins (list string-hash string-hash-ci)) ) (lambda (user-function) ;; Don't add unnecessary bounds checks for procedures known to be ;; well-behaved (these are not user-*created* functions) (if (memq user-function builtins) ;; A new salt every time (let ((randomization (##core#inline "C_rnd_fix"))) ;; String functions have differing signatures; treat them specially (if (memq user-function string-builtins) (lambda (obj bound) (user-function obj bound #f #f randomization)) (lambda (obj bound) (user-function obj bound randomization)))) (lambda (obj bound) (let ((hash (user-function obj bound))) (##sys#check-fixnum hash 'hash) (if (and (fx<= 0 hash) (fx< hash bound)) hash (##sys#signal-hook #:bounds-error 'hash "Hash value out of bounds" bound hash user-function) ))))))) ;deprecated (define *make-hash-function wrap-hash/std) ;; #; ;https://nullprogram.com/blog/2018/07/31/ (define (triple32 u32) (let* ((u32 (bitwise-xor u32 (arithmetic-shift u32 -17))) (u32 (bitwise-and (* u32 #xed5ad4bb) 2^32-1)) (u32 (bitwise-xor u32 (arithmetic-shift u32 -11))) (u32 (bitwise-and (* u32 #xac4c1b51) 2^32-1)) (u32 (bitwise-xor u32 (arithmetic-shift u32 -15))) (u32 (bitwise-and (* u32 #x31848bab) 2^32-1)) (u32 (bitwise-xor u32 (arithmetic-shift u32 -14))) ) u32 ) ) #; (define triple32 (foreign-lambda* unsigned-int32 ((unsigned-int32 u32)) "u32 ^= u32 >> 17;\n" "u32 *= 0xed5ad4bbU;\n" "u32 ^= u32 >> 11;\n" "u32 *= 0xac4c1b51U;\n" "u32 ^= u32 >> 15;\n" "u32 *= 0x31848babU;\n" "u32 ^= u32 >> 14;\n" "C_return( u32 );")) ;restricted input & mixers to 30bits! #; ;ehh, makes little diff (define (triple30 u32) (let* ((u32 (fxxor u32 (fxshr u32 17))) (u32 (fx* u32 #x3d5ad4bb)) (u32 (fxxor u32 (fxshr u32 11))) (u32 (fx* u32 #x3c4c1b51)) (u32 (fxxor u32 (fxshr u32 15))) (u32 (fx* u32 #x31848bab)) (u32 (fxxor u32 (fxshr u32 14))) ) u32 ) ) (define triple30 (foreign-lambda* int32 ((unsigned-int32 u32)) "u32 ^= u32 >> 17;\n" "u32 *= 0x3d5ad4bbU;\n" "u32 ^= u32 >> 11;\n" "u32 *= 0x3c4c1b51U;\n" "u32 ^= u32 >> 15;\n" "u32 *= 0x31848babU;\n" "u32 ^= u32 >> 14;\n" "C_return( (int32_t)u32 );")) (define ((wrap-hash/triple32 hf) obj bound) (%hash/limit (triple30 (hf obj bound)) bound) ) ;; Hooks ;provide a procedure binding (define ##sys#number-hash-hook *equal?-hash) ) ;module (srfi 69 hash)