;;;; rabin-karp.scm ;;;; Kon Lovett, Apr '06 ;;;; Kon Lovett, Aug '10 ;;;; Kon Lovett, Aug '22 (module rabin-karp (;export make-string-search collect-string-search) (import scheme utf8 (chicken base) (chicken sort) (chicken type) (srfi 1) utf8-srfi-13 srfi-69 (check-errors sys)) (define-type search-result (pair string (pair fixnum fixnum))) (define-type string-searcher (string #!optional fixnum fixnum --> (or false search-result))) (define-type string-hasher (string #!optional fixnum --> fixnum)) (define-type string-comparer (string string --> boolean)) (: make-string-search ((list-of string) #!optional string-comparer string-hasher --> string-searcher)) (: collect-string-search (string-searcher string --> (list-of search-result))) ;; Rabin-Karp hashing string search. ;; ;; Takes a list of strings, the search substrings, and two optional arguments, an ;; equivalence function and a hash function. ;; ;; Returns a procedure of one argument, the search string. The search procedure ;; returns a list, the matching substring & a list of the start & end positions of ;; the match in the search string, or #f when no match. (define (make-string-search substrs #!optional (test string=?) (hash string-hash)) ; (check-list 'make-string-search substrs) (check-procedure 'make-string-search test) (check-procedure 'make-string-search hash) ; Return a matching procedure for the given search strings. ; Total order of search string lengths. ; Ensures fewer character comparisons are performed to effect ; a match. (let ((substrs-lens (sort! (map (lambda (s) (string-length (check-string 'make-string-search s))) substrs) <) ) ; Search string lookup table (substrs-tbl (make-hash-table test hash) ) ) ; Load search string lookup table from the search strings. (for-each (cut hash-table-set! substrs-tbl <> #t) substrs) ; Return a procedure returning the position of a matching search ; string in the target string, otherwise #f. (lambda (str #!optional (start 0) (end (string-length str))) ; Any matching search string at this position? (let ((match@ (lambda (pos) ; Any matching search string of this length at this position? (let ((substr@ (lambda (sublen) (let ((last (+ pos sublen))) (and (<= last end) (let ((substr (substring/shared str pos last))) (and (hash-table-exists? substrs-tbl substr) `(,substr . (,pos . ,last)) ) ) ) ) ) ) ) (let loop ((lens substrs-lens)) (and (not (null? lens)) (or (substr@ (car lens)) (loop (cdr lens)) ) ) ) ) ) ) ) ; Any matching search string? (let loop ((pos start)) (and (< pos end) (or (match@ pos) (loop (add1 pos)) ) ) ) ) ) ) ) (define (collect-string-search srchr str) (reverse! (unfold-right not identity (lambda (x) ;start search at end of last search (srchr str (cddr x))) (srchr str))) ) ) ;module rabin-karp