;; ;; ;; An implementation of suffix tree, a data structure for representing ;; sets of lists efficiently, provided there is an ordering relation ;; on the elements of lists. ;; ;; Copyright 2011 Ivan Raikov and the Okinawa Institute of Science and ;; Technology. ;; ;; ;; A suffix tree is a tree with arcs labeled by elements from the ;; element type of the lists and with branches ordered on the basis of ;; their arc labels; moreover, only one branch per distinct label ;; value is allowed per node. Ends of lists are designated by an ;; "EOL" marker; a value may be associated with the EOL symbol. ;; ;; (module suffix-tree ( make-suffix-tree suffix-tree-equal? ) (import scheme chicken) (require-library srfi-1 data-structures) (import (only srfi-1 every) (only data-structures identity)) (require-extension datatype matchable) (define (list-of pred) (lambda (x) (every pred x))) (define-datatype branch branch? (EOL (v identity)) (BRN (label identity) (branches (list-of branch?)))) (define suffix-tree? (list-of branch?)) (define (suffix-tree-equal? t1 t2) (let ((t1 (t1 'repr)) (t2 (t2 'repr))) (let ((aeq (car t1)) (tr1 (caddr t1)) (beq (car t2)) (tr2 (caddr t2))) (let recur ((tr1 tr1) (tr2 tr2)) (match (list tr1 tr2) ((() ()) #t) (((($ branch 'EOL b1) . tr1) (($ branch 'EOL b2) . tr2)) (and (beq b1 b2) (recur tr1 tr2))) (((($ branch 'BRN a1 tr11) . tr1) (($ branch 'BRN a2 tr21) . tr2)) (and (aeq a1 a2) (recur tr11 tr21) (recur tr1 tr2))) (else #f)) )) )) (define (make-suffix-tree leq key->list . rest) (let-optionals rest ((tr '())) (assert (suffix-tree? tr)) (define empty '()) ;; Inserts list into tr and associates bval with the EOL indicator for the list (define (insert lst bval tr) (match (list lst bval tr) ((() b ()) (list (EOL b))) (((a . t) b ()) (list (BRN a (insert t b '())))) ((() b (($ branch 'EOL _) . _)) (error 'insert "element already in tree" )) (((and a (_ . _)) b (($ branch 'EOL b1) . tr)) (cons (EOL b1) (insert a b tr))) ((() b tr) (cons (EOL b) tr)) (((and al (a . t)) b (and tr (($ branch 'BRN a1 tr1) . tr2))) (if (leq a a1) (if (leq a1 a) (cons (BRN a1 (insert t b tr1)) tr2) (cons (BRN a (insert t b '())) tr)) (cons (BRN a1 tr1) (insert al b tr2)) )) )) ;; Returns the value associated with lst in tr (define (lookup k tr . rest) (let-optionals rest ((partial #f)) (let recur ((lst k) (tr tr)) (match (list lst tr) ((_ ()) (error 'lookup "not found" k)) ((() (($ branch 'EOL b) . tr)) b) (((and al (_ . _)) (($ branch 'EOL _) . tr)) (recur al tr)) ((() tr) (if (not partial) (error 'lookup "not found" k) (partial tr) )) (((and al (a . t)) (($ branch 'BRN a1 tr1) . tr2)) (if (leq a a1) (if (leq a1 a) (recur t tr1) (error 'lookup "not found" k)) (recur al tr2))) )) )) ;; Removes lst from tr. Any branches having a null subsuffix-tree ;; associated with them are deleted. (define (remove lst tr) (match (list lst tr) ((() ((EOL _) . tr1)) tr1) (((and al (_ . _)) (($ branch 'EOL b) . tr1)) (cons (EOL b) (remove al tr1))) ((() tr1) tr1) (((and al (a . t)) (and tr (($ branch 'BRN a1 tr1) . tr2))) (if (leq a a1) (if (leq a1 a) (let ((tr3 (remove t tr1))) (if (null? tr3) tr2 (cons (BRN a1 tr3) tr2))) tr) (cons (BRN a1 tr1) (remove al tr2)))) )) ;; Merges tr1 and tr2. If there is a list that appears in both ;; suffix-trees, an exception is raised. (define (merge tr1 tr2) (match (list tr1 tr2) ((() tr2) tr2) ((tr1 ()) tr1) (((($ branch 'EOL b1) . _) (($ branch 'EOL _) . _)) (error "already in suffix-tree" tr1 tr2)) (((($ branch 'EOL b1) . tr11) tr2) (cons (EOL b1) (merge tr11 tr2))) ((tr1 (($ branch 'EOL b2) . tr21)) (cons (EOL b2) (merge tr1 tr21))) (((and tr1 (($ branch 'BRN a1 tr11) . tr12)) (and tr2 (($ branch 'BRN a2 tr21) . tr22))) (if (leq a1 a2) (if (leq a2 a1) (cons (BRN a1 (merge tr11 tr21)) (merge tr12 tr22)) (cons (BRN a1 tr11) (merge tr12 tr2))) (cons (BRN a2 tr21) (merge tr1 tr22)))) )) ;; Splits tr into three suffix-trees on the basis of a. The first suffix-tree ;; consists of branches headed by actions less than a (plus any EOL ;; symbol), the second contains the branch (if any) associated with a, ;; and the third consists of branches headed by actions greater than a. (define (partition a tr) (match (list tr a) ((() a) (list '() '() '())) (((($ branch 'EOL b) . tr1) a) (match-let (((tr1 tr2 tr3) (partition a tr1))) (list (cons (EOL b) tr1) tr2 tr3))) (((and tr (($ branch 'BRN a1 tr1) . tr2)) a) (if (leq a a1) (if (leq a1 a) (list '() (list (BRN a tr1)) tr2) (list '() '() tr)) (match-let (((tr1 tr2 tr3) (partition a tr2))) (list (cons (BRN a1 tr1) tr1) tr2 tr3)))) )) ;; Message dispatcher (lambda (selector) (case selector ((insert) (lambda (k bval) (make-suffix-tree leq key->list (insert (key->list k) bval tr)))) ((lookup) (lambda (k) (lookup (key->list k) tr))) ((lookup/partial) (lambda (k) (let ((v (lookup (key->list k) tr identity))) (if (suffix-tree? v) (make-suffix-tree leq key->list v) v)))) ((remove) (lambda (k) (make-suffix-tree leq key->list (remove (key->list k) tr)))) ((merge) (lambda (x) (make-suffix-tree leq key->list (merge tr x)))) ((partition) (lambda (a) (partition a tr))) ((repr) (lambda () (list leq key->list tr))) )) )) )