;;;; sexp-diff.scm - stolen from Racket/unstable ;;; diff-sexp.lisp -- diffs s-expressions based on Levenshtein-like edit distance. ;; Author: Michael Weber ;; Date: 2005-09-03 ;; Modified: 2005-09-04 ;; Modified: 2005-09-07 ;; Modified: 2005-09-15 ;; Modified: 2010-06-22 (Ported to racket by Vincent St-Amour) ;; ;; This code is in the Public Domain. ;;; Description: ;; DIFF-SEXP computes a diff between two s-expressions which minimizes ;; the number of atoms in the result tree, also counting edit ;; conditionals #:new, #:old. ;;; Examples: ;; > (sexp-diff ;; '(DEFUN F (X) (+ (* X 2) 1)) ;; '(DEFUN F (X) (- (* X 2) 3 1))) ;; ((DEFUN F (X) (#:new - #:old + (* X 2) #:new 3 1))) ;; > (sexp-diff ;; '(DEFUN F (X) (+ (* X 2) 4 1)) ;; '(DEFUN F (X) (- (* X 2) 5 3 1))) ;; ((DEFUN F (X) (#:new - #:old + (* X 2) #:new 5 #:new 3 #:old 4 1))) ;; > (sexp-diff ;; '(DEFUN F (X) (+ (* X 2) 4 4 1)) ;; '(DEFUN F (X) (- (* X 2) 5 5 3 1))) ;; ((DEFUN F (X) #:new (- (* X 2) 5 5 3 1) #:old (+ (* X 2) 4 4 1))) ;;; Todo: ;; * Support for moved subtrees ;; * The algorithm treats vectors, arrays, etc. as opaque objects ;; * This article might describe a better method (unchecked): ;; Hélène Touzet: "A linear tree edit distance algorithm for similar ordered trees" ;; LIFL - UMR CNRS 8022 - Université Lille 1 ;; 59 655 Villeneuve d'Ascq cedex, France ;; Helene.Touzet@lifl.fr (module sexp-diff (sexp-diff sexp-diff-markers) (import scheme (chicken base)) (import (chicken fixnum)) (import (chicken keyword)) (import srfi-1) ;; Computes the number of atoms contained in TREE. (define (tree-size tree) (if (pair? tree) (apply + 1 (map tree-size tree)) ;(reduce fx+ 1 ...) ehh 1)) (define-record edit-record type ; 'unchanged | 'deletion | 'insertion | 'update | 'compound edit-distance change) ; CHANGE | (OLD . NEW) | (CHANGE ...) (define (make-unchanged-record change) (make-edit-record 'unchanged (tree-size change) change)) (define (make-deletion-record change) (make-edit-record 'deletion (fx+ (tree-size change) 1) change)) (define (make-insertion-record change) (make-edit-record 'insertion (fx+ (tree-size change) 1) change)) (define (make-update-record old new) (make-edit-record 'update (fx+ (fx+ (tree-size old) 1) (fx+ (tree-size new) 1)) (cons old new))) (define (make-compound-record changes) (make-edit-record 'compound (apply + (map edit-record-edit-distance changes)) ;(reduce fx+ 0 ...) ehh changes)) (define (make-empty-compound-record) (make-compound-record '())) (define (make-extend-compound-record r0 record) (make-compound-record (cons record (edit-record-change r0)))) (define (unchanged-record? r) (eq? (edit-record-type r) 'unchanged)) (define (deletion-record? r) (eq? (edit-record-type r) 'deletion)) (define (insertion-record? r) (eq? (edit-record-type r) 'insertion)) (define (compound-record? r) (eq? (edit-record-type r) 'compound)) (define (update-record? r) (eq? (edit-record-type r) 'update)) (define sexp-diff-markers (make-parameter (list #:old #:new) (lambda (x) (assert (and (list? x) (= 2 (length x)) ;too specific? (keyword? (car x)) (keyword? (cadr x))) 'sexp-diff-markers "invalid old & new markers" x) x))) (define (render-difference! record) (define old-marker) (define new-marker) (let ((markers (sexp-diff-markers))) (set! old-marker (car markers)) (set! new-marker (cadr markers)) ) ;Note the "linear-update variant" use ;edit-record-change must be voided since ownership transfered ;assumes all lists are new, which they are (cond ((insertion-record? record) (list new-marker (edit-record-change record))) ((deletion-record? record) (list old-marker (edit-record-change record))) ((update-record? record) (let ((change (edit-record-change record))) (list old-marker (car change) new-marker (cdr change)))) ((unchanged-record? record) (list (edit-record-change record))) ((compound-record? record) (list (foldl (lambda (res r) (append! res (render-difference! r))) '() (let ((cs (reverse! (edit-record-change record)))) (edit-record-change-set! record #f) ;invalidate cs))))) ) ;; Returns record with minimum edit distance. (define (min/edit record . records) (foldr (lambda (a b) (if (fx<= (edit-record-edit-distance a) (edit-record-edit-distance b)) a b)) record records)) ;; Prepares initial data vectors for Levenshtein algorithm from LIST. (define (initial-distance func lst) (let ((seq (make-vector (fx+ (length lst) 1) (make-empty-compound-record)))) (do ((i 0 (fx+ i 1)) (elt lst (cdr elt))) ((null? elt)) (vector-set! seq (fx+ i 1) (make-extend-compound-record (vector-ref seq i) (func (car elt))))) seq)) ;; Calculates the minimal edits needed to transform OLD-TREE into NEW-TREE. ;; It minimizes the number of atoms in the result tree, also counting ;; edit conditionals. (define (levenshtein-tree-edit old-tree new-tree) (cond ((equal? old-tree new-tree) (make-unchanged-record old-tree)) ((not (and (pair? old-tree) (pair? new-tree))) (make-update-record old-tree new-tree)) (else (min/edit (make-update-record old-tree new-tree) (let* ((best-edit #f) (row (initial-distance make-deletion-record old-tree)) (col (initial-distance make-insertion-record new-tree))) (do ((new-part new-tree (cdr new-part)) (currentlist (cdr (vector->list col)) (cdr currentlist))) ((or (null? new-part) (null? currentlist))) (let ((current (car currentlist))) (do ((old-part old-tree (cdr old-part)) (row-idx 0 (fx+ row-idx 1))) ((null? old-part)) (set! best-edit (min/edit (make-extend-compound-record (vector-ref row (fx+ row-idx 1)) (make-insertion-record (car new-part))) (make-extend-compound-record current (make-deletion-record (car old-part))) (make-extend-compound-record (vector-ref row row-idx) (levenshtein-tree-edit (car old-part) (car new-part))))) (vector-set! row row-idx current) (set! current best-edit))) (vector-set! row (fx- (vector-length row) 1) best-edit)) best-edit))))) ;; Computes a diff between OLD-TREE and NEW-TREE which minimizes the ;; number of atoms in the result tree, also counting inserted edit conditionals ;; #:new, #:old. (define (sexp-diff old-tree new-tree) (render-difference! (levenshtein-tree-edit old-tree new-tree))) )