;;;; levenshtein-operators.scm -*- Scheme -*- ;;;; Kon Lovett, Mar '20 ;;;; Kon Lovett, Apr '12 ;;;; Kon Lovett, Sep '05 ;; Issues ;; ;; - So as not to force use of "numbers" egg the 'cost' parameter is ;; not validated! (module levenshtein-operators (;export make-levenshtein-operator clone-levenshtein-operator levenshtein-operator? levenshtein-operator-key levenshtein-operator-name levenshtein-operator-cost levenshtein-operator-above levenshtein-operator-left levenshtein-operator=? check-levenshtein-operator error-levenshtein-operator levenshtein-operator-ref levenshtein-operator-set! levenshtein-operator-delete! levenshtein-base-operator? levenshtein-insert-operator? levenshtein-delete-operator? levenshtein-operator-reset levenshtein-base-operators-vector) (import scheme) (import (chicken base)) (import (chicken type)) (import (chicken syntax)) (import (chicken format)) (import (chicken read-syntax)) (import (only type-checks check-string check-symbol check-natural-fixnum check-number define-check+error-type)) (import (only type-errors error-argument-type)) (import (srfi 69)) (import vector-lib) ;;; ;; Types (define-type levenshtein-operator (struct levenshtein-operator)) (: make-levenshtein-operator (symbol string number fixnum fixnum -> levenshtein-operator)) (: clone-levenshtein-operator ((or symbol levenshtein-operator) #!rest -> levenshtein-operator)) (: levenshtein-operator? (* -> boolean : levenshtein-operator)) (: levenshtein-operator-key (levenshtein-operator -> symbol)) (: levenshtein-operator-name (levenshtein-operator -> string)) (: levenshtein-operator-cost (levenshtein-operator -> number)) (: levenshtein-operator-above (levenshtein-operator -> fixnum)) (: levenshtein-operator-left (levenshtein-operator -> fixnum)) (: levenshtein-operator=? (levenshtein-operator levenshtein-operator -> boolean)) ;(: check-levenshtein-operator ( -> )) ;(: error-levenshtein-operator ( -> )) (: levenshtein-operator-ref (symbol -> levenshtein-operator)) (: levenshtein-operator-set! (levenshtein-operator -> void)) (: levenshtein-operator-delete! ((or symbol levenshtein-operator) -> void)) (: levenshtein-base-operator? (levenshtein-operator -> boolean)) (: levenshtein-insert-operator? (levenshtein-operator -> boolean)) (: levenshtein-delete-operator? (levenshtein-operator -> boolean)) (: levenshtein-operator-reset (-> void)) (: levenshtein-base-operators-vector (-> vector)) ;std unhygienic prelude ? (define-for-syntax (symbol-component arg) (cond ((string? arg) arg) ((symbol? arg) (symbol->string arg)) (else (error "invalid symbol component" arg args)) ) ) (define-for-syntax (symbol-make . args) (let loop ((args args) (ls '())) (if (null? args) (string->symbol (string-append (reverse ls))) (loop (cdr args) (cons (symbol-component (car args)) ls)) ) ) ) ;;; ; N & M mean length of source & target vectors respectively. ; ; Edit operation offset - ( Ni Mj) ; where is {Insert Delete Substitute Transpose ...}, ; Ni and Mj are fixnums >= 0, ; interpreted as D[I - Ni, J - Mj], S[I - Ni], and T[J - Mj], ; where Ni = 0 or Mj = 0 means no equivalence test required. ; ; Edit cost - (( Cost) ...) where Cost is number, usually 1. ; ; ?Need source & target indicies separate from above & left? ; Operator type ;create tag & ctor arity (define-record levenshtein-operator key name cost above left) (define-record-type levenshtein-operator (*make-levenshtein-operator key name cost above left) levenshtein-operator? (key levenshtein-operator-key) (name levenshtein-operator-name) (cost levenshtein-operator-cost) (above levenshtein-operator-above) (left levenshtein-operator-left)) (define-check+error-type levenshtein-operator) ; The operator printer (define-record-printer (levenshtein-operator lo out) (fprintf out "#,(levenshtein-operator ~A ~S ~A ~A ~A)" (levenshtein-operator-key lo) (levenshtein-operator-name lo) (levenshtein-operator-cost lo) (levenshtein-operator-above lo) (levenshtein-operator-left lo)) ) ; No validation of reader input, only for printer output! (define-reader-ctor 'levenshtein-operator *make-levenshtein-operator) ;; The levenshtein operator table (define clear-lo-table) (define *lo-ref) (define *lo-set!) (define *lo-del!) (let ((+lo-table+ #f)) (set! clear-lo-table (lambda () (set! +lo-table+ (make-hash-table eq? hash-by-identity 5)) ) ) (set! *lo-ref (lambda (key) (hash-table-ref/default +lo-table+ key #f) ) ) (set! *lo-set! (lambda (lo) (hash-table-set! +lo-table+ (levenshtein-operator-key lo) lo) ) ) (set! *lo-del! (lambda (lo) (hash-table-delete! +lo-table+ (levenshtein-operator-key lo)) ;FIXME 'hash-table-delete!' -> boolean but should be void per SRFI-69 doc (void) ) ) ) ;; Operator table access (define (levenshtein-operator=? a b) (check-levenshtein-operator 'levenshtein-operator=? a) (check-levenshtein-operator 'levenshtein-operator=? b) (and (eq? (levenshtein-operator-key a) (levenshtein-operator-key b)) (string=? (levenshtein-operator-name a) (levenshtein-operator-name b)) (= (levenshtein-operator-cost a) (levenshtein-operator-cost b)) (= (levenshtein-operator-above a) (levenshtein-operator-above b)) (= (levenshtein-operator-left a) (levenshtein-operator-left b))) ) (define (get-levenshtein-operator loc key) (or (*lo-ref key) (error loc "couldn't find levenshtein operator for key" key) ) ) (define (get-levenshtein-operator-arg loc lo) (cond ((levenshtein-operator? lo) lo) ((symbol? lo) (get-levenshtein-operator loc lo)) (else (error-argument-type loc lo "levenshtein operator or an operator key" "oper") ) ) #; ;FIXME in C4 !!!! (unless (levenshtein-operator? lo) (if (symbol? lo) (get-levenshtein-operator loc lo) (error-argument-type loc lo "levenshtein operator or an operator key" "oper")) ) ) (define (levenshtein-operator-ref key) (get-levenshtein-operator 'levenshtein-operator-ref (check-symbol 'levenshtein-operator-ref key "key")) ) (define (levenshtein-operator-set! lo) (*lo-set! (check-levenshtein-operator 'levenshtein-operator-set! lo "oper")) ) (define (levenshtein-operator-delete! lo) (*lo-del! (get-levenshtein-operator-arg 'levenshtein-operator-delete! lo)) ) (define (make-levenshtein-operator key name cost above left) (*make-levenshtein-operator (check-symbol 'make-levenshtein-operator key "key") (check-string 'make-levenshtein-operator name "name") (check-number 'make-levenshtein-operator cost "cost") (check-natural-fixnum 'make-levenshtein-operator above "above") (check-natural-fixnum 'make-levenshtein-operator left "left")) ) ;let-syntax & (let ((KEY (or KEY (levenshtein-operator-KEY EO)))... ) ...) (define-syntax set-oper-key-var (ir-macro-transformer (lambda (exp inj cmp) (let* ( (key-var (cadr exp)) (lo (caddr exp)) (sym (symbol-append 'levenshtein-operator- (strip-syntax key-var))) ) `(unless ,key-var (set! ,key-var (,(inj sym) ,lo)) ) ) ) ) ) (define (clone-levenshtein-operator lo #!key key name cost above left) (let ((lo (get-levenshtein-operator-arg 'clone-levenshtein-operator lo))) (set-oper-key-var key lo) (set-oper-key-var name lo) (set-oper-key-var cost lo) (set-oper-key-var above lo) (set-oper-key-var left lo) (make-levenshtein-operator key name cost above left) ) ) ;; Base levenshtein operators predicates (define (levenshtein-base-operator? lo) (and (<= (levenshtein-operator-above lo) 1) (<= (levenshtein-operator-left lo) 1))) (define (levenshtein-insert-operator? lo) (and (levenshtein-operator? lo) (= (levenshtein-operator-above lo) 0) (= (levenshtein-operator-left lo) 1))) (define (levenshtein-delete-operator? lo) (and (levenshtein-operator? lo) (= (levenshtein-operator-above lo) 1) (= (levenshtein-operator-left lo) 0))) (define levenshtein-operator-reset) (define levenshtein-base-operators-vector) (let ((+base-opers+ #f)) (set! levenshtein-operator-reset (lambda () ; insert must be 1st! (set! +base-opers+ (vector (*make-levenshtein-operator 'Insert "Insert" 1 0 1) (*make-levenshtein-operator 'Delete "Delete" 1 1 0) (*make-levenshtein-operator 'Substitute "Substitute" 1 1 1))) (clear-lo-table) (*lo-set! (vector-ref +base-opers+ 0)) (*lo-set! (vector-ref +base-opers+ 1)) (*lo-set! (vector-ref +base-opers+ 2)) ; Standard, but not part of the base set (*lo-set! (*make-levenshtein-operator 'Transpose "Transpose" 1 2 2)) ) ) (set! levenshtein-base-operators-vector (lambda () (vector-copy +base-opers+) ) ) ) ;;; ;;; Module init ;;; (levenshtein-operator-reset) ) ;module levenshtein-operators