;;;; levenshtein-operators.scm -*- Scheme -*- ;;;; Kon Lovett, Mar '20 ;;;; Kon Lovett, Apr '12 ;;;; Kon Lovett, Sep '05 ;; Issues ;; ;; - So as not to force use of "numbers" egg the 'cost' parameter is ;; not validated! (module levenshtein-operators (;export levenshtein-operator make-levenshtein-operator clone-levenshtein-operator levenshtein-operator? levenshtein-operator-key levenshtein-operator-name levenshtein-operator-cost levenshtein-operator-above levenshtein-operator-left levenshtein-operator=? check-levenshtein-operator error-levenshtein-operator levenshtein-operator-ref levenshtein-operator-set! levenshtein-operator-delete! levenshtein-base-operator? levenshtein-insert-operator? levenshtein-delete-operator? levenshtein-substitute-operator? levenshtein-required-operator? levenshtein-operator-reset levenshtein-required-operators levenshtein-base-operators levenshtein-extended-operators) (import scheme (chicken base) (chicken type) (chicken syntax) (chicken format) (chicken read-syntax) (only record-variants define-record-type-variant) (only (check-errors basic) define-check+error-type error-argument-type) (srfi 1) (srfi 69) vector-lib) (define-type levenshtein-operator (struct levenshtein-operator)) (: make-levenshtein-operator (symbol string number fixnum fixnum -> levenshtein-operator)) (: clone-levenshtein-operator ((or symbol levenshtein-operator) #!key #:key symbol #:name string #:cost number #:above fixnum #:left fixnum -> levenshtein-operator)) (: levenshtein-operator? (* -> boolean : levenshtein-operator)) (: levenshtein-operator-key (levenshtein-operator --> symbol)) (: levenshtein-operator-name (levenshtein-operator --> string)) (: levenshtein-operator-cost (levenshtein-operator --> number)) ;FIXME should be * (: levenshtein-operator-above (levenshtein-operator --> fixnum)) (: levenshtein-operator-left (levenshtein-operator --> fixnum)) (: levenshtein-operator=? (levenshtein-operator levenshtein-operator --> boolean)) ;(: check-levenshtein-operator ( -> )) ;(: error-levenshtein-operator ( -> )) (: levenshtein-operator-ref (symbol --> levenshtein-operator)) (: levenshtein-operator-set! (levenshtein-operator -> void)) (: levenshtein-operator-delete! ((or symbol levenshtein-operator) -> void)) (: levenshtein-base-operator? (levenshtein-operator --> boolean)) (: levenshtein-insert-operator? (levenshtein-operator --> boolean)) (: levenshtein-delete-operator? (levenshtein-operator --> boolean)) (: levenshtein-substitute-operator? (levenshtein-operator --> boolean)) (: levenshtein-required-operator? (levenshtein-operator --> boolean)) (: levenshtein-operator-reset (-> void)) (: levenshtein-required-operators (--> (vector-of levenshtein-operator))) (: levenshtein-base-operators (--> (vector-of levenshtein-operator))) (: levenshtein-extended-operators ((list-of levenshtein-operator) --> (vector-of levenshtein-operator))) ;;(std-prelude) (cond-expand ((or chicken-5.0 chicken-5.1) (define (set-record-printer! tag proc) (##sys#register-record-printer tag proc) ) ) (else) ) ;; (define (natural-fixnum? x) (and (fixnum? x) (not (negative? x)))) (define-check+error-type string) (define-check+error-type symbol) (define-check+error-type number) (define-check+error-type natural-fixnum) ;std unhygienic prelude ? (define-for-syntax (symbol-component arg) (cond ((string? arg) arg) ((symbol? arg) (symbol->string arg)) (else (error "invalid symbol component" arg args)) ) ) (define-for-syntax (symbol-make . args) (let loop ((args args) (ls '())) (if (null? args) (string->symbol (string-append (reverse ls))) (loop (cdr args) (cons (symbol-component (car args)) ls)) ) ) ) ;;; ; N & M mean length of source & target vectors respectively. ; ; Edit operation offset - ( Ni Mj) ; where is {Insert Delete Substitute Transpose ...}, ; Ni and Mj are fixnums >= 0, ; interpreted as D[I - Ni, J - Mj], S[I - Ni], and T[J - Mj], ; where Ni = 0 or Mj = 0 means no equivalence test required. ; ; Edit cost - (( Cost) ...) where Cost is number, usually 1. ; ; ?Need source & target indicies separate from above & left? ; Operator type ;create tag & ctor arity (define levenshtein-operator 'levenshtein-operator) (define-record-type-variant levenshtein-operator (unchecked inline unsafe) (%make-levenshtein-operator key name cost above left) %levenshtein-operator? (key %levenshtein-operator-key) (name %levenshtein-operator-name) (cost %levenshtein-operator-cost) (above %levenshtein-operator-above) (left %levenshtein-operator-left)) ;; Levenshtein operator table (memoization) (define clear-lo-table) (define *lo-ref) (define *lo-set!) (define *lo-del!) (let ((+lo-table+ (the (or false hash-table) #f))) (set! clear-lo-table (lambda () (set! +lo-table+ (make-hash-table eq? hash-by-identity 5)) ) ) (set! *lo-ref (lambda (key) (hash-table-ref/default +lo-table+ key #f) ) ) (set! *lo-set! (lambda (lo) (hash-table-set! +lo-table+ (%levenshtein-operator-key lo) lo) ) ) (set! *lo-del! (lambda (lo) (hash-table-delete! +lo-table+ (%levenshtein-operator-key lo)) ;FIXME 'hash-table-delete!' -> boolean but should be void per SRFI-69 doc (void) ) ) ) ;;public, checked, interface (define (make-levenshtein-operator key name cost above left) (%make-levenshtein-operator (check-symbol 'make-levenshtein-operator key "key") (check-string 'make-levenshtein-operator name "name") (check-number 'make-levenshtein-operator cost "cost") (check-natural-fixnum 'make-levenshtein-operator above "above") (check-natural-fixnum 'make-levenshtein-operator left "left")) ) (define (levenshtein-operator? obj) (%levenshtein-operator? obj)) (define-check+error-type levenshtein-operator) ;record literal (define (srfi-10-levenshtein-operator-printer lo #!optional (out (current-output-port))) #; ;we know type (check-levenshtein-operator 'srfi-10-levenshtein-operator-printer lo) (format out "#,(levenshtein-operator ~A ~S ~A ~A ~A)" (%levenshtein-operator-key lo) (%levenshtein-operator-name lo) (%levenshtein-operator-cost lo) (%levenshtein-operator-above lo) (%levenshtein-operator-left lo)) ) (set-record-printer! levenshtein-operator srfi-10-levenshtein-operator-printer) (define-reader-ctor 'levenshtein-operator make-levenshtein-operator) ;field ref (define (levenshtein-operator-key lo) (%levenshtein-operator-key (check-levenshtein-operator 'levenshtein-operator-key lo))) (define (levenshtein-operator-name lo) (%levenshtein-operator-name (check-levenshtein-operator 'levenshtein-operator-name lo))) (define (levenshtein-operator-cost lo) (%levenshtein-operator-cost (check-levenshtein-operator 'levenshtein-operator-cost lo))) (define (levenshtein-operator-above lo) (%levenshtein-operator-above (check-levenshtein-operator 'levenshtein-operator-above lo))) (define (levenshtein-operator-left lo) (%levenshtein-operator-left (check-levenshtein-operator 'levenshtein-operator-left lo))) ;; Operator table access (define (levenshtein-operator=? a b) (check-levenshtein-operator 'levenshtein-operator=? a) (check-levenshtein-operator 'levenshtein-operator=? b) (and (eq? (%levenshtein-operator-key a) (%levenshtein-operator-key b)) (string=? (%levenshtein-operator-name a) (%levenshtein-operator-name b)) (= (%levenshtein-operator-cost a) (%levenshtein-operator-cost b)) (= (%levenshtein-operator-above a) (%levenshtein-operator-above b)) (= (%levenshtein-operator-left a) (%levenshtein-operator-left b))) ) (define (get-levenshtein-operator loc key) (or (*lo-ref key) (error loc "couldn't find levenshtein operator for key" key) ) ) (define (get-levenshtein-operator-arg loc lo) (cond ((levenshtein-operator? lo) lo) ((symbol? lo) (get-levenshtein-operator loc lo)) (else (error-argument-type loc lo "levenshtein operator or an operator key" "oper") ) ) ) (define (levenshtein-operator-ref key) (get-levenshtein-operator 'levenshtein-operator-ref (check-symbol 'levenshtein-operator-ref key "key")) ) (define (levenshtein-operator-set! lo) (*lo-set! (check-levenshtein-operator 'levenshtein-operator-set! lo "oper")) ) (define (levenshtein-operator-delete! lo) (*lo-del! (get-levenshtein-operator-arg 'levenshtein-operator-delete! lo)) ) ;let-syntax & (let ((KEY (or KEY (levenshtein-operator-KEY EO)))... ) ...) (define-syntax set-oper-key-var (ir-macro-transformer (lambda (exp inj cmp) (let* ((key-var (cadr exp)) (lo (caddr exp)) (sym (symbol-append 'levenshtein-operator- (strip-syntax key-var))) ) `(unless ,key-var (set! ,key-var (,(inj sym) ,lo)) ) ) ) ) ) (define (clone-levenshtein-operator lo #!key key name cost above left) (let ((lo (get-levenshtein-operator-arg 'clone-levenshtein-operator lo))) (set-oper-key-var key lo) (set-oper-key-var name lo) (set-oper-key-var cost lo) (set-oper-key-var above lo) (set-oper-key-var left lo) (make-levenshtein-operator key name cost above left) ) ) ;; Base levenshtein operators predicates (define (levenshtein-base-operator? lo) (and (levenshtein-operator? lo) (<= (%levenshtein-operator-above lo) 1) (<= (%levenshtein-operator-left lo) 1)) ) (define (levenshtein-insert-operator? lo) (and (levenshtein-operator? lo) (= (%levenshtein-operator-above lo) 0) (= (%levenshtein-operator-left lo) 1)) ) (define (levenshtein-delete-operator? lo) (and (levenshtein-operator? lo) (= (%levenshtein-operator-above lo) 1) (= (%levenshtein-operator-left lo) 0)) ) (define (levenshtein-substitute-operator? lo) (and (levenshtein-operator? lo) (= (%levenshtein-operator-above lo) 1) (= (%levenshtein-operator-left lo) 1)) ) (define levenshtein-operator-reset) (define levenshtein-required-operators) (define levenshtein-base-operators) (let ((+base-opers+ (the (or false vector) #f))) (set! levenshtein-operator-reset (lambda () ; insert must be 1st! (set! +base-opers+ (vector (%make-levenshtein-operator 'Insert "Insert" 1 0 1) (%make-levenshtein-operator 'Delete "Delete" 1 1 0) (%make-levenshtein-operator 'Substitute "Substitute" 1 1 1))) (clear-lo-table) (*lo-set! (vector-ref +base-opers+ 0)) (*lo-set! (vector-ref +base-opers+ 1)) (*lo-set! (vector-ref +base-opers+ 2)) ; Standard, but not part of the base set (*lo-set! (%make-levenshtein-operator 'Transpose "Transpose" 1 2 2)) ) ) (set! levenshtein-required-operators (lambda () (vector (vector-ref +base-opers+ 0) (vector-ref +base-opers+ 1)))) (set! levenshtein-base-operators (lambda () (vector-copy +base-opers+))) ) (define levenshtein-required-operator? (disjoin levenshtein-insert-operator? levenshtein-delete-operator?)) (define (levenshtein-extended-operators operlist) (for-each (cut check-levenshtein-operator 'levenshtein-extended-operators <>) operlist) (if (or (null? operlist) (every levenshtein-base-operator? operlist)) (levenshtein-base-operators) (let ((insopr (or (find levenshtein-insert-operator? operlist) (levenshtein-operator-ref 'Insert))) (delopr (or (find levenshtein-delete-operator? operlist) (levenshtein-operator-ref 'Delete))) (rstops (filter (complement levenshtein-required-operator?) operlist)) ) (list->vector `(,insopr ,delopr ,@rstops)) ) ) ) ;;; ;;; Module init ;;; (levenshtein-operator-reset) ) ;module levenshtein-operators