;;;; rabin-karp.scm ;;;; Kon Lovett, Apr '06 ;;;; Kon Lovett, Aug '10 ;;;; Kon Lovett, Aug '22 (module rabin-karp (;export make-string-search collect-string-search) (import scheme utf8 (chicken base) (only (chicken sort) sort!) (chicken fixnum) (chicken type) (only (srfi 1) unfold-right reverse!) (only utf8-srfi-13 substring/shared) (only (srfi 69) string-hash make-hash-table hash-table-set! hash-table-exists?) (check-errors sys)) (define-type search-result (pair string (pair fixnum fixnum))) (define-type string-searcher (string #!optional fixnum fixnum -> (or false search-result))) (define-type string-hasher (string #!optional fixnum -> fixnum)) (define-type string-comparer (string string -> boolean)) (: make-string-search ((list-of string) #!optional string-comparer string-hasher -> string-searcher)) (: collect-string-search (string-searcher string -> (list-of search-result))) ;; Rabin-Karp hashing string search. ;; ;; Takes a list of strings, the search substrings, and two optional arguments, an ;; equivalence function and a hash function. ;; ;; Returns a procedure of one argument, the search string. The search procedure ;; returns a list, the matching substring & a list of the start & end positions of ;; the match in the search string, or #f when no match. (define-inline (search-result s p l) `(,s . (,p . ,l))) (define-inline (search-result-str r) (car r)) (define-inline (search-result-pos r) (cadr r)) (define-inline (search-result-last r) (cddr r)) (define (make-string-search substrs #!optional (test string=?) (hash string-hash)) ;Return a matching procedure for the given search strings. ;Total order of search string lengths. ;Ensures fewer character comparisons are performed to effect ;a match. (let ((sublens (sort! (map (lambda (s) (string-length (check-string 'make-string-search s))) (check-list 'make-string-search substrs)) <) ) ;Search string lookup table (subtbl (make-hash-table (check-procedure 'make-string-search test) (check-procedure 'make-string-search hash)) ) ) ;Load search string lookup table from the search strings. (for-each (cut hash-table-set! subtbl <> #t) substrs) ;Return a procedure returning the position of a matching search ;string in the target string, otherwise #f. (lambda (str #!optional (start 0) (end (string-length str))) ;Any matching search string at this position? (let ((match@ (lambda (pos) ;Any matching search string of this length at this position? (let ((substr@ (lambda (sublen) (let ((last (fx+ pos sublen))) (and (fx<= last end) (let ((substr (substring/shared str pos last))) (and (hash-table-exists? subtbl substr) (search-result substr pos last) ) ) ) ) ) ) ) (let loop ((lens sublens)) (and (not (null? lens)) (or (substr@ (car lens)) (loop (cdr lens)) ) ) ) ) ) ) ) ;Any matching search string? (let loop ((pos start)) (and (fx< pos end) (or (match@ pos) (loop (fx+ pos 1)) ) ) ) ) ) ) ) (define (collect-string-search srchr str) (reverse! (unfold-right not identity ;start search at end of last search (lambda (r) (srchr str (search-result-last r))) (srchr str))) ) ) ;module rabin-karp