;;;; levenshtein-test.scm -*- Scheme -*- (import test) (import (only (chicken format) format) (test-utils gloss)) (test-begin "Levenshtein") ;;; ;until R⁷RS (define (string->vector s) (list->vector (string->list s))) ;;; ;;; (import levenshtein-operators) (test-begin "Levenshtein Operators") (test-group "Operations" (test "Insert ref" (make-levenshtein-operator 'Insert "Insert" 1 0 1) (levenshtein-operator-ref 'Insert)) (test "Transpose ref" (make-levenshtein-operator 'Transpose "Transpose" 1 2 2) (levenshtein-operator-ref 'Transpose)) ;#; ;why bother (test-assert "Make foo" (levenshtein-operator=? (make-levenshtein-operator 'foo "foo" 0.5 12 1) (make-levenshtein-operator 'foo "foo" 0.5 12 1))) ;-- Do not re-order these (test-assert "Set! foo" (levenshtein-operator-set! (make-levenshtein-operator 'foo "foo" 0.5 12 1))) (test-assert "Ref foo" (levenshtein-operator=? (make-levenshtein-operator 'foo "foo" 0.5 12 1) (levenshtein-operator-ref 'foo))) (test-assert "Delete! foo" (levenshtein-operator-delete! 'foo)) ;-- (test-assert "Delete clone =" (levenshtein-operator=? (make-levenshtein-operator 'Delete "Delete" 0.5 1 0) (clone-levenshtein-operator 'Delete cost: 0.5))) ) (import (chicken port)) (test-group "Printing" (test "Insert ref" "#,(levenshtein-operator Insert \"Insert\" 1 0 1)" (with-output-to-string (cut write (levenshtein-operator-ref 'Insert)))) (test "Transpose ref" "#,(levenshtein-operator Transpose \"Transpose\" 1 2 2)" (with-output-to-string (cut write (levenshtein-operator-ref 'Transpose)))) (test "Make foo" "#,(levenshtein-operator foo \"foo\" 0.5 12 1)" (with-output-to-string (cut write (make-levenshtein-operator 'foo "foo" 0.5 12 1)))) ) (test-end "Levenshtein Operators") ;;; (import (chicken pretty-print) (srfi 63)) (import levenshtein-vector) (define (print-array arr) (pretty-print (array->list arr))) (define (cost-and-oper-matrix-match? l) (and (= 2 (length l)) (= 6 (car l)) (array? (cadr l)))) (test-begin "Levenshtein Vector") (let () (test-group "levenshtein" (let ((YWCQPGK (string->vector "YWCQPGK")) (LAWYQQKPGKA (string->vector "LAWYQQKPGKA")) ) (test "distance" 6 (let-values (((cost _) (levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA))) cost)) (test "distance limit" #f (let-values (((cost _) (levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA #:limit-cost 4))) cost)) (test-assert "cost-and-oper-matrix-match" (cost-and-oper-matrix-match? (receive (levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA operations: #t)))) (test-assert "cost-and-oper-matrix-match: explicit opers" (cost-and-oper-matrix-match? (receive (levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA operations: #t)))) ) ) ) (test-end "Levenshtein Vector") ;; (test-begin "Levenshtein Vector Functor") (import levenshtein-vector-functor) (import levenshtein-cost-fixnum) (module levenshtein-vector-fixnum = (levenshtein-vector-functor levenshtein-cost-fixnum)) (import levenshtein-cost-flonum) (module levenshtein-vector-flonum = (levenshtein-vector-functor levenshtein-cost-flonum)) (import levenshtein-vector-functor levenshtein-cost-fixnum) (module levenshtein-vector-fixnum = (levenshtein-vector-functor levenshtein-cost-fixnum)) (import (prefix levenshtein-vector-fixnum vcfx:)) (test "should be 2" 2 (let-values (((cost _) (vcfx:levenshtein-distance/vector* (string->vector "ctas") (string->vector "cats") #:elm-eql char=?)) ) cost)) (test "should be 1" 1 (let-values (((cost _) (vcfx:levenshtein-distance/vector* (string->vector "ctas") (string->vector "cats") (levenshtein-operator-ref 'Substitute) (levenshtein-operator-ref 'Transpose) #:elm-eql char=?)) ) cost)) (let () (import (prefix levenshtein-vector-fixnum fixn:)) (import (prefix levenshtein-vector-flonum flon:)) (test-group "fixnum cost" (let ((YWCQPGK (string->vector "YWCQPGK")) (LAWYQQKPGKA (string->vector "LAWYQQKPGKA")) ) (test "distance" 6 (let-values (((cost _) (fixn:levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA))) cost)) (test "distance limit" #f (let-values (((cost _) (fixn:levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA #:limit-cost 4))) cost)) (test-assert "cost-and-oper-matrix-match" (cost-and-oper-matrix-match? (receive (fixn:levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA operations: #t)))) (test-assert "cost-and-oper-matrix-match: explicit opers" (cost-and-oper-matrix-match? (receive (fixn:levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA operations: #t (levenshtein-operator-ref 'Insert) (levenshtein-operator-ref 'Delete) (levenshtein-operator-ref 'Substitute) (levenshtein-operator-ref 'Transpose))))) ) ) (test-group "flonum cost" (let ((YWCQPGK (string->vector "YWCQPGK")) (LAWYQQKPGKA (string->vector "LAWYQQKPGKA")) ) ;must override cost type (test "distance" 6.0 (let-values (((cost _) (flon:levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA (clone-levenshtein-operator 'Insert cost: 1.0) (clone-levenshtein-operator 'Delete cost: 1.0) (clone-levenshtein-operator 'Substitute cost: 1.0) (clone-levenshtein-operator 'Transpose cost: 1.0)))) cost)) (test-assert "cost-and-oper-matrix-match" (cost-and-oper-matrix-match? (receive (flon:levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA operations: #t (clone-levenshtein-operator 'Insert cost: 1.0) (clone-levenshtein-operator 'Delete cost: 1.0) (clone-levenshtein-operator 'Substitute cost: 1.0) (clone-levenshtein-operator 'Transpose cost: 1.0))))) ) ) ) (test-end "Levenshtein Vector Functor") ;;; (import levenshtein-path-iterator) (import levenshtein-print) (test-begin "Levenshtein Path Iterator") (let () ;FIXME these are a transpose of the actual PM since source length < target length ; see YWCQPGK & LAWYQQKPGKA above, shortest used for target ; not a problem in practice since path is the same (define (costs-list) (list 1 2 3 3 4 5 6 7 8 9 10 2 2 2 3 4 5 6 7 8 9 10 3 3 3 3 4 5 6 7 8 9 10 4 4 4 4 3 4 5 6 7 8 9 5 5 5 5 4 4 5 5 6 7 8 6 6 6 6 5 5 5 6 5 6 7 7 7 7 7 6 6 5 6 6 5 6)) (define (opers-list) (let ((io (levenshtein-operator-ref 'Insert)) (so (levenshtein-operator-ref 'Substitute)) (do (levenshtein-operator-ref 'Delete))) (list so do do so do do do do do do do io so so do do do do do do do do io io io so do do do do do do do io io io io so do do do do do do io io io io io so do so do do do io io io io io io so io so do do io io io io io io so do io so do))) (define (gen-test-pm) (let ((rs 7) (cs 11)) (let ((pm (make-array '#() rs cs)) (costs (costs-list)) (opers (opers-list))) (do ((r 0 (add1 r))) ((= r rs) pm) (do ((c 0 (add1 c))) ((= c cs)) (array-set! pm (cons (car costs) (car opers)) r c) (set! costs (cdr costs)) (set! opers (cdr opers)) ) ) ) ) ) (define *test-pm* (gen-test-pm)) (define (gen-real-iter-vec) (let ((vec (make-vector 6)) (iter (levenshtein-path-iterator *test-pm*))) ;FIXME path-iterator goes +1 too far (do ((r (iter) (iter)) (i 0 (add1 i))) ((or (not r) (= i (vector-length vec))) vec) (vector-set! vec i r)))) (define (gen-test-iter-vec) (let ((vec (make-vector 6)) (io (levenshtein-operator-ref 'Insert)) (so (levenshtein-operator-ref 'Substitute)) (do (levenshtein-operator-ref 'Delete))) (vector-set! vec 0 `((1 0 0 ,so) (2 1 1 ,so) (3 2 3 ,so) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do))) (vector-set! vec 1 ;`((1 0 0 ,so) (2 0 1 ,do) (3 2 3 ,so) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do)) `((1 0 0 ,so) (2 1 0 ,io) (3 2 3 ,so) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do))) (vector-set! vec 2 ;`((1 0 0 ,so) (2 1 1 ,so) (3 2 3 ,so) (4 3 5 ,do) (5 4 6 ,do) (6 6 10 ,do)) `((1 0 0 ,so) (2 0 1 ,do) (3 2 3 ,so) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do))) (vector-set! vec 3 ;`((1 0 0 ,so) (2 0 1 ,do) (3 2 3 ,so) (4 3 5 ,do) (5 4 6 ,do) (6 6 10 ,do)) `((1 0 0 ,so) (2 0 1 ,do) (3 2 3 ,so) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do))) (vector-set! vec 4 ;`((1 0 0 ,so) (2 1 1 ,so) (3 2 3 ,so) (4 3 5 ,do) (5 3 6 ,do) (6 6 10 ,do)) `((1 0 0 ,so) (2 1 1 ,so) (3 2 2 ,io) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do))) (vector-set! vec 5 ;`((1 0 0 ,so) (2 0 1 ,do) (3 2 3 ,so) (4 3 5 ,do) (5 3 6 ,do) (6 6 10 ,do)) `((1 0 0 ,so) (2 1 0 ,io) (3 2 2 ,io) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do))) vec)) (gloss "A Path Matrix (strapped)") (print-levenshtein-matrix *test-pm*) (test-assert "iterator proc" (procedure? (levenshtein-path-iterator *test-pm*))) (test-group "Iterator Path (strapped)" (let ((vec (gen-test-iter-vec)) (rvec (gen-real-iter-vec)) ) (test "gen 0" (vector-ref vec 0) (vector-ref rvec 0)) (test "gen 1" (vector-ref vec 1) (vector-ref rvec 1)) (test "gen 2" (vector-ref vec 2) (vector-ref rvec 2)) (test "gen 3" (vector-ref vec 3) (vector-ref rvec 3)) (test "gen 4" (vector-ref vec 4) (vector-ref rvec 4)) (test "gen 5" (vector-ref vec 5) (vector-ref rvec 5)) ) ) ) #; ;NOTE too long output (let () (import (prefix levenshtein-vector-fixnum fixn:)) (test-group "Iterator Path (\"YWCQPGK\" <> \"LAWYQQKPGKA\")" (let-values (((cost pm) (fixn:levenshtein-distance/vector* (string->vector "YWCQPGK") (string->vector "LAWYQQKPGKA") operations: #t)) ) (gloss "A Path Matrix:") (print-levenshtein-matrix pm) (let loop ((iter (levenshtein-path-iterator pm))) (and-let* ((res (iter))) (gloss res) (loop iter) ) ) ) ) ) (test-end "Levenshtein Path Iterator") ;;; (test-begin "Levenshtein Sequence Functor") (import levenshtein-sequence-functor levenshtein-cost-fixnum levenshtein-sequence-vector) (module levenshtein-sequence-fixnum-vector = (levenshtein-sequence-functor levenshtein-cost-fixnum levenshtein-sequence-vector)) (import (prefix levenshtein-sequence-fixnum-vector fxvc:)) (test "should be 2" 2 (fxvc:levenshtein-distance/sequence (string->vector "ctas") (string->vector "cats") #:elm-eql char=?)) (import levenshtein-sequence-functor levenshtein-cost-flonum levenshtein-sequence-string) (module levenshtein-sequence-flonum-string = (levenshtein-sequence-functor levenshtein-cost-flonum levenshtein-sequence-string)) (import (prefix levenshtein-sequence-flonum-string flnmstrn:)) (test-group "flonum cost & byte string" (test 6.0 (flnmstrn:levenshtein-distance/sequence "YWCQPGK" "LAWYQQKPGKA" #:insert-cost 1.0 #:delete-cost 1.0 #:substitute-cost 1.0 #:elm-eql char=?)) (test #f (flnmstrn:levenshtein-distance/sequence "YWCQPGK" "LAWYQQKPGKA" #:insert-cost 1.0 #:delete-cost 1.0 #:substitute-cost 1.0 #:elm-eql char=? #:limit-cost 5.0)) (test 2.75 (flnmstrn:levenshtein-distance/sequence "YWCQPGK" "LAWYQQKPGKA" #:insert-cost 0.5 #:delete-cost 0.25 #:substitute-cost 0.75 #:elm-eql char=?)) ) (import levenshtein-sequence-functor levenshtein-cost-number levenshtein-sequence-string) (module levenshtein-sequence-number-string = (levenshtein-sequence-functor levenshtein-cost-number levenshtein-sequence-string)) (import (prefix levenshtein-sequence-number-string numbrstrn:)) (test-group "number cost & utf8 string" (test 6 (numbrstrn:levenshtein-distance/sequence "YWCQPGK" "LAWYQQKPGKA" #:elm-eql char=?)) (test #f (numbrstrn:levenshtein-distance/sequence "YWCQPGK" "LAWYQQKPGKA" #:elm-eql char=? #:limit-cost 5)) (test 11/4 (numbrstrn:levenshtein-distance/sequence "YWCQPGK" "LAWYQQKPGKA" #:insert-cost 1/2 #:delete-cost 1/4 #:substitute-cost 3/4 #:elm-eql char=?)) (test 9 (numbrstrn:levenshtein-distance/sequence "強食" "弱肉強食秋冬あいうえお" #:elm-eql char=?)) (test #f (numbrstrn:levenshtein-distance/sequence "強食" "弱肉強食秋冬あいうえお" #:elm-eql char=? #:limit-cost 5)) (test 11/4 (numbrstrn:levenshtein-distance/sequence "強食" "弱肉強食秋冬あいうえお" #:insert-cost 1/2 #:delete-cost 1/4 #:substitute-cost 3/4 #:elm-eql char=?)) ) (import levenshtein-sequence-functor levenshtein-cost-flonum levenshtein-sequence-vector) (module levenshtein-sequence-flonum-vector = (levenshtein-sequence-functor levenshtein-cost-flonum levenshtein-sequence-vector)) (import (prefix levenshtein-sequence-flonum-vector flnmvect:)) (test-group "flonum cost & vector" (let ((YWCQPGK (string->vector "YWCQPGK")) (LAWYQQKPGKA (string->vector "LAWYQQKPGKA")) ) (test 6.0 (flnmvect:levenshtein-distance/sequence YWCQPGK LAWYQQKPGKA #:insert-cost 1.0 #:delete-cost 1.0 #:substitute-cost 1.0 #:elm-eql char=?)) (test #f (flnmvect:levenshtein-distance/sequence YWCQPGK LAWYQQKPGKA #:insert-cost 1.0 #:delete-cost 1.0 #:substitute-cost 1.0 #:elm-eql char=? #:limit-cost 5.0)) (test 2.75 (flnmvect:levenshtein-distance/sequence YWCQPGK LAWYQQKPGKA #:insert-cost 0.5 #:delete-cost 0.25 #:substitute-cost 0.75 #:elm-eql char=?)) ) ) (import (prefix levenshtein-sequence-vector sv:)) (test-group "flonum cost & subvector" (let ((YWCQPGK (sv:subsequence/shared (string->vector "00YWCQPGK000") 2 9)) (LAWYQQKPGKA (sv:subsequence/shared (string->vector "000LAWYQQKPGKA00") 3 14)) ) (test 6.0 (flnmvect:levenshtein-distance/sequence YWCQPGK LAWYQQKPGKA #:insert-cost 1.0 #:delete-cost 1.0 #:substitute-cost 1.0 #:elm-eql char=?)) (test #f (flnmvect:levenshtein-distance/sequence YWCQPGK LAWYQQKPGKA #:insert-cost 1.0 #:delete-cost 1.0 #:substitute-cost 1.0 #:elm-eql char=? #:limit-cost 5.0)) (test 2.75 (flnmvect:levenshtein-distance/sequence YWCQPGK LAWYQQKPGKA #:insert-cost 0.5 #:delete-cost 0.25 #:substitute-cost 0.75 #:elm-eql char=?)) ) ) (test-end "Levenshtein Sequence Functor") ;;; (test-end "Levenshtein") (test-exit)