;; ;; ;; Output text diff scripts in different formats. ;; ;; Copyright 2007-2010 Ivan Raikov. ;; ;; ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or (at ;; your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; ;; (module format-textdiff (export textdiff textdiff->sexp make-format-textdiff) (import scheme chicken data-structures (only extras pp)) (require-extension srfi-1 srfi-4 srfi-13 matchable vector-lib dyn-vector npdiff) (define (textdiff text1 text2 . rest) (let-optionals rest ((context-len 0)) (let ((A (list->vector text1)) (B (list->vector text2))) (let ((vector-slice (lambda (v start end) (if (zero? end) '() (vector->list (vector-copy v start end)))))) ((make-npdiff string=? vector-ref vector-length (make-hunks vector-ref vector-length vector-slice)) A B context-len))))) (define (make-format-textdiff type) (case type ((ed) edformat) ((normal) normalformat) ((rcs) rcsformat) ((context) contextformat) (else (error 'make-format-textdiff "unknown format type " type)))) ;; ;; Generate s-expressions for the patch egg: ;; ;; ([c|a|d] start-line finish-line new-start-line new-finish-line (lines to be deleted) (lines to be inserted)) ;; ;; (define (textdiff->sexp hunks) (define (format h) (match h (($ diffop 'Insert target source seq) (match-let (((l . r) source)) `(a ,target ,target ,l ,r ,(list) ,seq))) (($ diffop 'Remove target seq) (match-let (((l . r) target)) `(d ,(fx+ 1 l) ,r #f #f ,seq ,(list)))) (($ diffop 'Change target source seqin seqout contextin contextout) (match-let (((l . r) source) ((l1 . r1) target)) `(c ,(fx+ 1 l1) ,r1 ,(fx+ 1 l) ,r ,seqout ,seqin))) )) (map format hunks)) ;; ed script (define (edformat out hunks) (define (pair->string p) (match-let (((a . b) p)) (let ((a (fx+ 1 a))) (if (fx= a b) (number->string a) (conc a "," b))))) (define (format-lines lines out) (let ((escape #f)) (for-each (lambda (l) (if (string=? l ".") (begin (set! escape #t) (display "..\n.\ns/.//\n" out)) (display (string-concatenate (list (if escape (begin (set! escape #f) "a\n") "") l "\n")) out))) lines))) (define (format hs out) (if (not (null? hs)) (let ((h (car hs))) (match h (($ diffop 'Insert target source seq) (begin (display (conc target "a\n") out) (format-lines seq out) (display ".\n" out))) (($ diffop 'Remove target seq) (begin (display (pair->string target) out) (display "d\n" out))) (($ diffop 'Change target source seqin seqout contextin contextout) (begin (display (pair->string target) out) (display "c\n" out) (format-lines seqin out) (display ".\n" out)))) (format (cdr hs) out)))) (format (reverse hunks) out)) ;; normal diff format (define (normalformat out hunks) (define (pair->string p) (match-let (((a . b) p)) (let ((a (fx+ 1 a))) (if (fx= a b) (number->string a) (conc a "," b))))) (define (format-lines prefix lines out) (for-each (lambda (l) (display prefix out) (display l out) (display "\n" out)) lines)) (define (format h n out) (match h (($ diffop 'Insert target source seq) (match-let (((l . r) source)) (display target out) (display (string-concatenate (list "a" (pair->string source) "\n")) out) (format-lines "> " seq out) (fx+ n (fx- r l)))) (($ diffop 'Remove target seq) (match-let (((l . r) target)) (display (pair->string target) out) (display "d" out) (display (fx+ l n) out) (display "\n" out) (format-lines "< " seq out) (fx- n (fx- r l)))) (($ diffop 'Change target source seqin seqout contextin contextout) (match-let (((l . r) source) ((l1 . r1) target)) (display (string-concatenate (list (pair->string target) "c" (pair->string source) "\n")) out) (format-lines "< " seqout out) (display "---\n" out) (format-lines "> " seqin out) (fx+ n (fx- (fx- r l) (fx- r1 l1) )))) )) (fold (lambda (h n) (format h n out)) 0 hunks)) ;; RCS format (define (rcsformat out hunks) (define (pair->string p) (match-let (((a . b) p)) (let ((a (fx+ 1 a))) (if (fx= a b) (number->string a) (conc a "," b))))) (define (format-lines lines out) (for-each (lambda (l) (display l out) (display "\n" out)) lines)) (define (format h out) (match h (($ diffop 'Insert target source seq) (match-let (((l . r) source)) (display (string-concatenate (list "a" (number->string target) " ")) out) (display (fx- r l) out) (display "\n" out) (format-lines seq out))) (($ diffop 'Remove target seq) (match-let (((l . r) target)) (display "d" out) (display (fx+ 1 l) out) (display " " out) (display (fx- r l) out) (display "\n" out))) (($ diffop 'Change target source seqin seqout contextin contextout) (match-let (((l . r) target) ((l1 . r1) source)) (display "d" out) (display (fx+ 1 l) out) (display " " out) (display (fx- r l) out) (display "\n" out) (display "a" out) (display (fx+ l (fx- r l)) out) (display " " out) (display (fx- r1 l1) out) (display "\n" out) (format-lines seqin out))))) (for-each (lambda (h) (format h out)) hunks)) ;; Context format (patch) (define (contextformat out hunks fname1 tstamp1 fname2 tstamp2) (define hunkhead "***************\n") (define fromhead "*** ") (define fromtail " ****\n") (define tohead "--- ") (define totail " ----\n") (define (pair->string p) (match-let (((a . b) p)) (if (fx= a b) (number->string a) (let ((a (fx+ 1 a))) (if (fx= a b) (number->string a) (conc a "," b)))) )) ;; compute the line ranges of context hunks (define (get-target-range h) (match h (($ diffop 'Insert x source data (before . after)) (let ((na (or (and after (length after)) 0)) (nb (or (and before (length before)) 0))) (cons (fx- x nb) (fx+ x na)))) (($ diffop 'Remove (x . y) data (before . after)) (let ((na (or (and after (length after)) 0)) (nb (or (and before (length before)) 0))) (cons (fx- x nb) (fx+ y (max 0 (fx- na (fx- y x))))))) (($ diffop 'Change (x . y) source datain dataout contextin (before . after)) (let ((na (or (and after (length after)) 0)) (nb (or (and before (length before)) 0))) (cons (fx- x nb) (fx+ y na)))) )) (define (get-source-range h) (match h (($ diffop 'Insert target (x . y) data (before . after)) (let ((na (or (and after (length after)) 0)) (nb (or (and before (length before)) 0))) (cons (fx- x nb) (fx+ y na)))) (($ diffop 'Remove (x . y) data (before . after)) (let ((na (or (and after (length after)) 0)) (nb (or (and before (length before)) 0))) (cons (fx- x nb) (fx+ x (max 0 (fx- na (fx- y x))))))) (($ diffop 'Change target (x . y) datain dataout (before . after) contextout) (let ((na (or (and after (length after)) 0)) (nb (or (and before (length before)) 0))) (cons (fx- x nb) (fx+ y na)))) )) ;; Counts all lines in v that are not #f (define (line-count v) (dynvector-fold (lambda (i state vv) (if vv (fx+ state 1) state)) 0 v)) ;; converts a hunk to a vector of lines where each line can be ;; prefixed by - + ! or nothing (define (hunk->vector h . rest) (let-optionals rest ((target-vect #f) (source-vect #f) (target-range (get-target-range h)) (source-range (get-source-range h)) (target-start #f) (source-start #f)) (match h (($ diffop 'Insert target source data (before . after)) (let ((invect (or source-vect (make-dynvector (fx- (cdr source-range) (car source-range)) #f))) (outvect (or target-vect (make-dynvector (fx+ (length after) (length before)) #f))) (source-start (or source-start 0)) (target-start (or target-start 0))) (fold (lambda (s i) (match (dynvector-ref invect i) ((or #f (#f . _)) (dynvector-set! invect i (cons #f s))) (else (void))) (fx+ 1 i)) source-start before) (fold (lambda (s i) (dynvector-set! invect i (cons '+ s)) (fx+ 1 i)) (fx+ source-start (length before)) data) (fold (lambda (s i) (match (dynvector-ref invect i) ((or #f (#f . _)) (dynvector-set! invect i (cons #f s))) (else (void))) (fx+ 1 i)) (fx+ source-start (fx+ (length before) (length data))) after) (fold (lambda (s i) (match (dynvector-ref outvect i) ((or #f (#f . _)) (dynvector-set! outvect i (cons #f s))) (else (void))) (fx+ 1 i)) target-start before) (fold (lambda (s i) (match (dynvector-ref outvect i) ((or #f (#f . _)) (dynvector-set! outvect i (cons #f s))) (else (void))) (fx+ 1 i)) (fx+ target-start (length before)) after) (list invect outvect (cons (car source-range) (fx+ (car source-range) (line-count invect))) (cons (car target-range) (fx+ (car target-range) (line-count outvect)))) )) (($ diffop 'Remove (x . y) data (before . after)) (let ((invect (or source-vect (make-dynvector (fx+ (length after) (length before)) #f))) (outvect (or target-vect (make-dynvector (fx- (cdr target-range) (car target-range)) #f))) (start (or target-start 0))) (fold (lambda (s i) (if (not (dynvector-ref outvect i)) (dynvector-set! outvect i (cons #f s))) (fx+ 1 i)) start before) (fold (lambda (s i) (dynvector-set! outvect i (cons '- s)) (fx+ 1 i)) (fx+ start (length before)) data) (fold (lambda (s i) (if (not (dynvector-ref outvect i)) (dynvector-set! outvect i (cons #f s))) (fx+ 1 i)) (fx- (fx+ start (fx+ (length data) (length before))) 1) after) (let ((nb (length before))) (let loop ((i (if (fx>= start nb) (fx- start nb) start)) (ss before)) (if (pair? ss) (let ((v (dynvector-ref outvect i))) (if (not (dynvector-ref invect i)) (match v (((or '+ '! #f) . _) (dynvector-set! invect i (cons #f (car ss)))) (else (void)) )) (loop (fx+ 1 i) (cdr ss) ) )))) (let ((nd (fx- y x)) (n (fx+ start (length after)))) (let loop ((i (fx+ 1 start)) (j (fx+ 1 (fx+ nd start)))) (let ((v (dynvector-ref outvect j))) (if (and (not (dynvector-ref invect i)) v (match v (((or '+ '! #f) . _) #t) (else #f))) (dynvector-set! invect i (cons #f (cdr v)))) (if (fx<= i n) (loop (fx+ 1 i) (fx+ 1 j) )) ))) (list invect outvect (cons (car source-range) (fx+ (car source-range) (line-count invect))) (cons (car target-range) (fx+ (car target-range) (line-count outvect)))) )) (($ diffop 'Change (x . y) (w . z) datain dataout (beforein . afterin) (beforeout . afterout)) (let ((outvect (or target-vect (make-dynvector (fx- (cdr target-range) (car target-range)) #f))) (invect (or source-vect (make-dynvector (fx- (cdr source-range) (car source-range)) #f))) (outstart (or target-start 0)) (instart (or source-start 0))) (fold (lambda (s i) (match (dynvector-ref outvect i) ((or #f (#f . _)) (dynvector-set! outvect i (cons #f s))) (else (void))) (fx+ 1 i)) outstart beforeout) (fold (lambda (s i) (dynvector-set! outvect i (cons '! s)) (fx+ 1 i)) (fx+ outstart (length beforeout)) dataout) (fold (lambda (s i) (match (dynvector-ref outvect i) ((or #f (#f . _)) (dynvector-set! outvect i (cons #f s))) (else (void))) (fx+ 1 i)) (fx+ outstart (fx+ (length dataout) (length beforeout))) afterout) (fold (lambda (s i) (match (dynvector-ref invect i) ((or #f (#f . _)) (dynvector-set! invect i (cons #f s))) (else (void))) (fx+ 1 i)) instart beforein) (fold (lambda (s i) (dynvector-set! invect i (cons '! s)) (fx+ 1 i)) (fx+ instart (length beforein)) datain) (fold (lambda (s i) (match (dynvector-ref invect i) ((or #f (#f . _)) (dynvector-set! invect i (cons #f s))) (else (void))) (fx+ 1 i)) (fx+ instart (fx+ (length datain) (length beforein))) afterin) (list invect outvect source-range target-range))) (else (error 'hunk->vector "invalid hunk" h)) ))) ;; checks if hunk ranges overlap or are adjacent (define (adjacent? range1 range2) (and (and range1 range2) (fx>= 0 (fx- (car range2) (cdr range1))))) ;; incorporates hunk h into the given source/target vectors (define (merge h target-vect source-vect target-range source-range) (let ((h-target-range (get-target-range h)) (h-source-range (get-source-range h))) (hunk->vector h target-vect source-vect ;; merge the ranges (let ((target-range (cond ((and target-range h-target-range) (cons (min (car target-range) (car h-target-range)) (max (cdr target-range) (cdr h-target-range)))) (target-range target-range) (h-target-range h-target-range) (else (error "context diff merge: invalid target range"))))) target-range) (let ((source-range (cond ((and source-range h-source-range) (cons (min (car source-range) (car h-source-range)) (max (cdr source-range) (cdr h-source-range)))) (source-range source-range) (h-source-range h-source-range) (else (error "context diff merge: invalid source range"))))) source-range) ;; determine start index (and h-target-range target-range (let ((hx (car h-target-range)) (x (car target-range))) (and (fx> hx x) (fx- hx x)))) (and h-source-range source-range (let ((hx (car h-source-range)) (x (car source-range))) (and (fx> hx x) (fx- hx x))))))) (define (format source-vect target-vect source-range target-range out) (let ((target-vect-change? (and target-vect (dynvector-any (lambda (x) (and x (car x))) target-vect))) (source-vect-change? (and source-vect (dynvector-any (lambda (x) (and x (car x))) source-vect)))) (cond ((and source-vect-change? target-vect-change?) ;; change hunk (display hunkhead out) (display fromhead out) (display (pair->string target-range) out) (display fromtail out) (dynvector-for-each (lambda (i l) (if l (let ((p (car l))) (display (conc (or p " ") " ") out) (display (cdr l) out) (display "\n" out)))) target-vect) (display tohead out) (display (pair->string source-range) out) (display totail out) (dynvector-for-each (lambda (i l) (if l (let ((p (car l))) (display (conc (or p " ") " ") out) (display (cdr l) out) (display "\n" out)) )) source-vect)) (target-vect-change? ;; remove hunk (display hunkhead out) (display fromhead out) (display (pair->string target-range) out) (display fromtail out) (dynvector-for-each (lambda (i l) (if l (let ((p (car l))) (display (conc (or p " ") " ") out) (display (cdr l) out) (display "\n" out)) )) target-vect) (display tohead out) (display (pair->string source-range) out) (display totail out)) (source-vect-change? ;; insert hunk (display hunkhead out) (display fromhead out) (display (pair->string target-range) out) (display fromtail out) (display tohead out) (display (pair->string source-range) out) (display totail out) (dynvector-for-each (lambda (i l) (let ((p (car l))) (display (conc (or p " ") " ") out) (display (cdr l) out) (display "\n" out))) source-vect)) (else (void))))) (display (string-concatenate (list fromhead fname1 " " tstamp1 "\n")) out) (display (string-concatenate (list tohead fname2 " " tstamp2 "\n")) out) (if (not (null? hunks)) (match-let (((source-vect target-vect source-range target-range) (hunk->vector (car hunks)))) (let loop ((hunks (cdr hunks)) (source-vect source-vect) (target-vect target-vect) (source-range source-range) (target-range target-range)) (if (null? hunks) (format source-vect target-vect source-range target-range out) (let* ((h (car hunks)) (h-target-range (get-target-range h))) (if (adjacent? target-range h-target-range) ;; merge contiguous hunks and recurse (match-let (((source-vect1 target-vect1 source-range1 target-range1) (merge h target-vect source-vect target-range source-range))) (loop (cdr hunks) source-vect1 target-vect1 source-range1 target-range1)) ;; print current hunk and recurse (match-let (((source-vect1 target-vect1 source-range1 target-range1) (hunk->vector h))) (format source-vect target-vect source-range target-range out) (loop (cdr hunks) source-vect1 target-vect1 source-range1 target-range1))))))))) )