;;;; levenshtein-test.scm -*- Scheme -*- (import test) (import (chicken format)) (include "test-gloss.incl") (test-begin "Levenshtein") ;;; ;until R⁷RS (define (string->vector s) (list->vector (string->list s))) ;;; ;;; (import levenshtein-operators) (test-begin "Levenshtein Operators") (test-group "Operations" (test "Insert ref" (make-levenshtein-operator 'Insert "Insert" 1 0 1) (levenshtein-operator-ref 'Insert)) (test "Transpose ref" (make-levenshtein-operator 'Transpose "Transpose" 1 2 2) (levenshtein-operator-ref 'Transpose)) ;#; ;why bother (test-assert "Make foo" (levenshtein-operator=? (make-levenshtein-operator 'foo "foo" 0.5 12 1) (make-levenshtein-operator 'foo "foo" 0.5 12 1))) ;-- Do not re-order these (test-assert "Set! foo" (levenshtein-operator-set! (make-levenshtein-operator 'foo "foo" 0.5 12 1))) (test-assert "Ref foo" (levenshtein-operator=? (make-levenshtein-operator 'foo "foo" 0.5 12 1) (levenshtein-operator-ref 'foo))) (test-assert "Delete! foo" (levenshtein-operator-delete! 'foo)) ;-- (test-assert "Delete clone =" (levenshtein-operator=? (make-levenshtein-operator 'Delete "Delete" 0.5 1 0) (clone-levenshtein-operator 'Delete cost: 0.5))) ) (import (chicken port)) (test-group "Printing" (test "Insert ref" "#,(levenshtein-operator Insert \"Insert\" 1 0 1)" (with-output-to-string (cut write (levenshtein-operator-ref 'Insert)))) (test "Transpose ref" "#,(levenshtein-operator Transpose \"Transpose\" 1 2 2)" (with-output-to-string (cut write (levenshtein-operator-ref 'Transpose)))) (test "Make foo" "#,(levenshtein-operator foo \"foo\" 0.5 12 1)" (with-output-to-string (cut write (make-levenshtein-operator 'foo "foo" 0.5 12 1)))) ) (test-end "Levenshtein Operators") ;;; (import (chicken pretty-print) (srfi 63)) (import levenshtein-vector) (define (print-array arr) (pretty-print (array->list arr))) (define (cost-and-oper-matrix-match? l) (and (= 2 (length l)) (= 6 (car l)) (array? (cadr l)))) (test-begin "Levenshtein Vector") (let () (test-group "levenshtein" (let ((YWCQPGK (string->vector "YWCQPGK")) (LAWYQQKPGKA (string->vector "LAWYQQKPGKA")) ) (test "distance" 6 (let-values (((cost _) (levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA))) cost)) (test "distance limit" 4 (let-values (((cost _) (levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA #:limit-cost 4))) cost)) (test-assert "cost-and-oper-matrix-match" (cost-and-oper-matrix-match? (receive (levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA operations: #t)))) (test-assert "cost-and-oper-matrix-match: explicit opers" (cost-and-oper-matrix-match? (receive (levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA operations: #t)))) ) ) ) (test-end "Levenshtein Vector") ;; (cond-expand (functors (test-begin "Levenshtein Vector Functor") (import levenshtein-vector-functor) (include "levenshtein-cost-fixnum") (module levenshtein-vector-fixnum = (levenshtein-vector-functor levenshtein-cost-fixnum)) (include "levenshtein-cost-flonum") (module levenshtein-vector-flonum = (levenshtein-vector-functor levenshtein-cost-flonum)) (let () (import (prefix levenshtein-vector-fixnum fixn:)) (import (prefix levenshtein-vector-flonum flon:)) (test-group "fixnum cost" (let ((YWCQPGK (string->vector "YWCQPGK")) (LAWYQQKPGKA (string->vector "LAWYQQKPGKA")) ) (test "distance" 6 (let-values (((cost _) (fixn:levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA))) cost)) (test "distance limit" 4 (let-values (((cost _) (fixn:levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA #:limit-cost 4))) cost)) (test-assert "cost-and-oper-matrix-match" (cost-and-oper-matrix-match? (receive (fixn:levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA operations: #t)))) (test-assert "cost-and-oper-matrix-match: explicit opers" (cost-and-oper-matrix-match? (receive (fixn:levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA operations: #t (levenshtein-operator-ref 'Insert) (levenshtein-operator-ref 'Delete) (levenshtein-operator-ref 'Substitute) (levenshtein-operator-ref 'Transpose))))) ) ) (test-group "flonum cost" (let ((YWCQPGK (string->vector "YWCQPGK")) (LAWYQQKPGKA (string->vector "LAWYQQKPGKA")) ) ;must override cost type (test "distance" 6.0 (let-values (((cost _) (flon:levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA (clone-levenshtein-operator 'Insert cost: 1.0) (clone-levenshtein-operator 'Delete cost: 1.0) (clone-levenshtein-operator 'Substitute cost: 1.0) (clone-levenshtein-operator 'Transpose cost: 1.0)))) cost)) (test-assert "cost-and-oper-matrix-match" (cost-and-oper-matrix-match? (receive (flon:levenshtein-distance/vector* YWCQPGK LAWYQQKPGKA operations: #t (clone-levenshtein-operator 'Insert cost: 1.0) (clone-levenshtein-operator 'Delete cost: 1.0) (clone-levenshtein-operator 'Substitute cost: 1.0) (clone-levenshtein-operator 'Transpose cost: 1.0))))) ) ) ) (test-end "Levenshtein Vector Functor") ) (else (gloss) (gloss "Skip \"Levenshtein Vector Functor\" testing due to scheme-include install bug") (gloss) ) ) ;;; (import levenshtein-path-iterator) (import levenshtein-print) (test-begin "Levenshtein Path Iterator") (let () ;FIXME these are a transpose of the actual PM since source length < target length ; see YWCQPGK & LAWYQQKPGKA above, shortest used for target ; not a problem in practice since path is the same (define (costs-list) (list 1 2 3 3 4 5 6 7 8 9 10 2 2 2 3 4 5 6 7 8 9 10 3 3 3 3 4 5 6 7 8 9 10 4 4 4 4 3 4 5 6 7 8 9 5 5 5 5 4 4 5 5 6 7 8 6 6 6 6 5 5 5 6 5 6 7 7 7 7 7 6 6 5 6 6 5 6)) (define (opers-list) (let ((io (levenshtein-operator-ref 'Insert)) (so (levenshtein-operator-ref 'Substitute)) (do (levenshtein-operator-ref 'Delete))) (list so do do so do do do do do do do io so so do do do do do do do do io io io so do do do do do do do io io io io so do do do do do do io io io io io so do so do do do io io io io io io so io so do do io io io io io io so do io so do))) (define (gen-test-pm) (let ((rs 7) (cs 11)) (let ((pm (make-array '#() rs cs)) (costs (costs-list)) (opers (opers-list))) (do ((r 0 (add1 r))) ((= r rs) pm) (do ((c 0 (add1 c))) ((= c cs)) (array-set! pm (cons (car costs) (car opers)) r c) (set! costs (cdr costs)) (set! opers (cdr opers)) ) ) ) ) ) (define *test-pm* (gen-test-pm)) (define (gen-real-iter-vec) (let ((vec (make-vector 6)) (iter (levenshtein-path-iterator *test-pm*))) ;FIXME path-iterator goes +1 too far (do ((r (iter) (iter)) (i 0 (add1 i))) ((or (not r) (= i (vector-length vec))) vec) (vector-set! vec i r)))) (define (gen-test-iter-vec) (let ((vec (make-vector 6)) (io (levenshtein-operator-ref 'Insert)) (so (levenshtein-operator-ref 'Substitute)) (do (levenshtein-operator-ref 'Delete))) (vector-set! vec 0 `((1 0 0 ,so) (2 1 1 ,so) (3 2 3 ,so) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do))) (vector-set! vec 1 ;`((1 0 0 ,so) (2 0 1 ,do) (3 2 3 ,so) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do)) `((1 0 0 ,so) (2 1 0 ,io) (3 2 3 ,so) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do))) (vector-set! vec 2 ;`((1 0 0 ,so) (2 1 1 ,so) (3 2 3 ,so) (4 3 5 ,do) (5 4 6 ,do) (6 6 10 ,do)) `((1 0 0 ,so) (2 0 1 ,do) (3 2 3 ,so) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do))) (vector-set! vec 3 ;`((1 0 0 ,so) (2 0 1 ,do) (3 2 3 ,so) (4 3 5 ,do) (5 4 6 ,do) (6 6 10 ,do)) `((1 0 0 ,so) (2 0 1 ,do) (3 2 3 ,so) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do))) (vector-set! vec 4 ;`((1 0 0 ,so) (2 1 1 ,so) (3 2 3 ,so) (4 3 5 ,do) (5 3 6 ,do) (6 6 10 ,do)) `((1 0 0 ,so) (2 1 1 ,so) (3 2 2 ,io) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do))) (vector-set! vec 5 ;`((1 0 0 ,so) (2 0 1 ,do) (3 2 3 ,so) (4 3 5 ,do) (5 3 6 ,do) (6 6 10 ,do)) `((1 0 0 ,so) (2 1 0 ,io) (3 2 2 ,io) (4 4 5 ,so) (5 4 6 ,do) (6 6 10 ,do))) vec)) (gloss "A Path Matrix (strapped)") (print-levenshtein-matrix *test-pm*) (test-assert "iterator proc" (procedure? (levenshtein-path-iterator *test-pm*))) (test-group "Iterator Path (strapped)" (let ((vec (gen-test-iter-vec)) (rvec (gen-real-iter-vec)) ) (test "gen 0" (vector-ref vec 0) (vector-ref rvec 0)) (test "gen 1" (vector-ref vec 1) (vector-ref rvec 1)) (test "gen 2" (vector-ref vec 2) (vector-ref rvec 2)) (test "gen 3" (vector-ref vec 3) (vector-ref rvec 3)) (test "gen 4" (vector-ref vec 4) (vector-ref rvec 4)) (test "gen 5" (vector-ref vec 5) (vector-ref rvec 5)) ) ) ) #; ;works, just very long (let () (import (prefix levenshtein-vector-fixnum fixn:)) (test-group "Iterator Path (\"YWCQPGK\" <> \"LAWYQQKPGKA\")" (let-values (((cost pm) (fixn:levenshtein-distance/vector* (string->vector "YWCQPGK") (string->vector "LAWYQQKPGKA") operations: #t)) ) (gloss "A Path Matrix:") (print-levenshtein-matrix pm) (let loop ((iter (levenshtein-path-iterator pm))) (and-let* ((res (iter))) (gloss res) (loop iter) ) ) ) ) ) (test-end "Levenshtein Path Iterator") ;;; ;FIXME UTF-8 Chars! (cond-expand (functors (test-begin "Levenshtein Sequence Functor") (include "levenshtein-cost-number") (include "levenshtein-sequence-utf8") (import levenshtein-sequence-functor) (module levenshtein-sequence-number-utf8 = (levenshtein-sequence-functor levenshtein-cost-number levenshtein-sequence-utf8)) (import (prefix levenshtein-sequence-number-utf8 numbrutf8:)) (test-group "number cost & utf8 string" (test 6 (numbrutf8:levenshtein-distance/sequence "YWCQPGK" "LAWYQQKPGKA" #:elm-eql char=?)) (test 5 (numbrutf8:levenshtein-distance/sequence "YWCQPGK" "LAWYQQKPGKA" #:elm-eql char=? #:limit-cost 5)) (test 11/4 (numbrutf8:levenshtein-distance/sequence "YWCQPGK" "LAWYQQKPGKA" #:insert-cost 1/2 #:delete-cost 1/4 #:substitute-cost 3/4 #:elm-eql char=?)) (test 9 (numbrutf8:levenshtein-distance/sequence "強食" "弱肉強食秋冬あいうえお" #:elm-eql char=?)) (test 5 (numbrutf8:levenshtein-distance/sequence "強食" "弱肉強食秋冬あいうえお" #:elm-eql char=? #:limit-cost 5)) (test 11/4 (numbrutf8:levenshtein-distance/sequence "強食" "弱肉強食秋冬あいうえお" #:insert-cost 1/2 #:delete-cost 1/4 #:substitute-cost 3/4 #:elm-eql char=?)) ) (include "levenshtein-cost-flonum") (include "levenshtein-sequence-string") (import levenshtein-sequence-functor) (module levenshtein-sequence-flonum-string = (levenshtein-sequence-functor levenshtein-cost-flonum levenshtein-sequence-string)) (import (prefix levenshtein-sequence-flonum-string flnmstrn:)) (test-group "flonum cost & byte string" (test 6.0 (flnmstrn:levenshtein-distance/sequence "YWCQPGK" "LAWYQQKPGKA" #:insert-cost 1.0 #:delete-cost 1.0 #:substitute-cost 1.0 #:elm-eql char=?)) (test 5.0 (flnmstrn:levenshtein-distance/sequence "YWCQPGK" "LAWYQQKPGKA" #:insert-cost 1.0 #:delete-cost 1.0 #:substitute-cost 1.0 #:elm-eql char=? #:limit-cost 5.0)) (test 2.75 (flnmstrn:levenshtein-distance/sequence "YWCQPGK" "LAWYQQKPGKA" #:insert-cost 0.5 #:delete-cost 0.25 #:substitute-cost 0.75 #:elm-eql char=?)) ) (include "levenshtein-cost-flonum") (include "levenshtein-sequence-vector") (import levenshtein-sequence-functor) (module levenshtein-sequence-flonum-vector = (levenshtein-sequence-functor levenshtein-cost-flonum levenshtein-sequence-vector)) (import (prefix levenshtein-sequence-flonum-vector flnmvect:)) (test-group "flonum cost & vector" (let ((YWCQPGK (string->vector "YWCQPGK")) (LAWYQQKPGKA (string->vector "LAWYQQKPGKA")) ) (test 6.0 (flnmvect:levenshtein-distance/sequence YWCQPGK LAWYQQKPGKA #:insert-cost 1.0 #:delete-cost 1.0 #:substitute-cost 1.0 #:elm-eql char=?)) (test 5.0 (flnmvect:levenshtein-distance/sequence YWCQPGK LAWYQQKPGKA #:insert-cost 1.0 #:delete-cost 1.0 #:substitute-cost 1.0 #:elm-eql char=? #:limit-cost 5.0)) (test 2.75 (flnmvect:levenshtein-distance/sequence YWCQPGK LAWYQQKPGKA #:insert-cost 0.5 #:delete-cost 0.25 #:substitute-cost 0.75 #:elm-eql char=?)) ) ) (import (prefix levenshtein-sequence-vector sv:)) (test-group "flonum cost & subvector" (let ((YWCQPGK (sv:subsequence/shared (string->vector "00YWCQPGK000") 2 9)) (LAWYQQKPGKA (sv:subsequence/shared (string->vector "000LAWYQQKPGKA00") 3 14)) ) (test 6.0 (flnmvect:levenshtein-distance/sequence YWCQPGK LAWYQQKPGKA #:insert-cost 1.0 #:delete-cost 1.0 #:substitute-cost 1.0 #:elm-eql char=?)) (test 5.0 (flnmvect:levenshtein-distance/sequence YWCQPGK LAWYQQKPGKA #:insert-cost 1.0 #:delete-cost 1.0 #:substitute-cost 1.0 #:elm-eql char=? #:limit-cost 5.0)) (test 2.75 (flnmvect:levenshtein-distance/sequence YWCQPGK LAWYQQKPGKA #:insert-cost 0.5 #:delete-cost 0.25 #:substitute-cost 0.75 #:elm-eql char=?)) ) ) (test-end "Levenshtein Sequence Functor") ) (else (gloss) (gloss "Skip \"Levenshtein Sequence Functor\" testing due to scheme-include install bug") (gloss) ) ) ;;; (test-end "Levenshtein") (test-exit)