;;
;; Compute the longest common subsequence of two sequences 
;;
;; Copyright 2007-2010 Ivan Raikov.
;;
;; This program is free software: you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.

;; This program is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.

;; A full copy of the GPL license can be found at
;; <http://www.gnu.org/licenses/>.

(module npdiff

 (export diffop? Insert Remove Change
	 make-npdiff make-hunks)
		   
 (import scheme chicken data-structures  )

 (require-extension srfi-1 srfi-4 datatype )

(include "box.scm")
(include "stack.scm")

(define (psplit2 lst) (values (car lst) (cdr lst)))

(define (intpair? x)
  (and (pair? x) (fixnum? (car x)) (fixnum? (cdr x))))


; Datatype: diffop
;
; A representation of the three diff operations; insert, remove, change. 
;
; TARGET is the line or range of lines that is being operated on
;
; SOURCE is the range of lines that is used as input of the insert and
; change commands.  
;
; DATA, DATAIN, DATAOUT is a sequence of the
; elements (e.g. lines) that are being inserted or replaced.
;
; CONTEXT, CONTEXTIN, CONTEXTOUT is optional context; these are pairs
; in which the car is a list of elements preceding the operation, and
; the cdr is a list of elements following the operation.
;
(define-datatype diffop diffop?
  (Insert   (target fixnum?) (source intpair?)
	    (seq list?) (context (lambda (x) (or (not x) (list? x)))) )
  (Remove   (target intpair?)
	    (seq list?) (context (lambda (x) (or (not x) (list? x)))) )
  (Change   (target intpair?)
	    (source intpair?)
	    (seqin list?)
	    (seqout list?)
	    (contextin (lambda (x) (or (not x) (list? x))))
	    (contextout (lambda (x) (or (not x) (list? x))))))


(define-record-printer (diffop x out)
  (cases diffop x 
	 (Insert (target source seq context)
		 (display "#(Insert" out)
		 (display (conc " target=" target) out)
		 (display (conc " source=" source) out)
		 (display (conc " seq=" seq) out)
		 (display (conc " context=" context) out)
		 (display ")" out))
	 
	 (Remove (target seq context)
		 (display "#(Remove " out)
		 (display (conc " target=" target) out)
		 (display (conc " seq=" seq) out)
		 (display (conc " context=" context) out)
		 (display ")" out))
	 
	 (Change (target source seqin seqout contextin contextout)
		 (display "#(Change" out)
		 (display (conc " target=" target) out)
		 (display (conc " source=" source) out)
		 (display (conc " seqin=" seqin) out)
		 (display (conc " seqout=" seqout) out)
		 (display (conc " contextin=" contextin) out)
		 (display (conc " contextout=" contextout) out)
		 (display ")" out))))


;;
;;
;;  S. Wu, U. Manber, and E. Myers. An O(NP) sequence comparison
;;  algorithm. In Information Processing Letters, volume 35, pages
;;  317--323, September 1990.
;;
;;
(define (make-npdiff equal? is-ref is-length hunks) 
  (lambda (A B . rest)
   (let-optionals  rest ((context-len 0))
    (define css (make-stack))

    (let ((M (is-length A))
	  (N (is-length B)))

      (let-values (((A B M N swap) 
		    (if (fx> M N) (values B A N M #t)
			(values A B M N #f))))

       ;; The algorithm outlined in the paper calls for the creation
       ;; of an array that contains the furthest paths, and that is
       ;; defined as [-(M+1),(N+1)].

       ;; Since the vector library in Scheme does not support negative
       ;; array indices, we are going to have to bump everything by
       ;; offset M+1 whenever accessing array FP

       (define (compare delta offset fp p) 
	 
	 (define (update k)
	   (s32vector-set! fp (fx+ k offset)
			   (snake k (max (fx+ 1 (s32vector-ref fp (fx+ offset (fx- k 1))))
					 (s32vector-ref fp (fx+ offset (fx+ 1 k)))))))
      
	 (define (lowerloop k)
	   (if (fx<= k (fx- delta 1))
	       (begin
		 (update k)
		 (lowerloop (fx+ 1 k)))))
	 
	 (define (upperloop k)
	   (if (fx>= k (fx+ 1 delta))
	       (begin
		 (update k)
		 (upperloop (fx- k 1)))))
	 
	 (let ((p (fx+ p 1)))
	   (lowerloop (fx* -1 p))
	   (upperloop (fx+ delta p))
	   (update delta)
	   (if (not (fx= N (s32vector-ref fp (fx+ offset delta))))
	       (compare delta offset fp p))))
       
		  
       (define (snake k y)
	 (let ((a (fx- y k))
	       (b y))
	   (let-values ((( x y ) 
			 (let loop ((x  a)  (y  b))
			   (if (and (fx< x M) (fx< y N)
				    (equal? (is-ref A x) (is-ref B y)))
			       (loop (fx+ 1 x) (fx+ y 1))
			       (values x y)))))
		       (if (or (not (fx= a x)) (not (fx= b y)))
			   (let-values (((lasta lastb)
					 (if (stack-empty? css) (values -1 -1)
					     (psplit2 (car (stack-rest css))))))
			       (if (and (fx< lasta a) (fx< lastb b))
				   ;; we have found a common substring; push the end
				   ;; and start pairs onto the common substring stack
				   (if swap
				       (begin
					 (stack-push! css (cons b a))
					 (stack-push! css (cons y x)))
				       (begin
					 (stack-push! css (cons a b))
					 (stack-push! css (cons x y)))))))
		       y)))


       (let ((offset (fx+ 1 M))
	     (fp     (make-s32vector (fx+ 3 (fx+ M N)) -1))
	     (delta  (fx- N M))
	     (p       -1))
	 (compare delta offset fp p)
	 (if swap 
	     (values (hunks B A css context-len) B A)
	     (values (hunks A B css context-len) A B))))))))


;;  Pop matching pairs from the given stack, and fill in the gaps
;;  between them with insert/change/remove hunks.
;;
;;  This function expects the following stack layout:
;;
;; 	endpair n
;; 	startpair n
;; 	endpair n-1
;;      startpair n-1
;; 	.
;; 	.
;; 	.
;; 	endpair 1
;; 	startpair 1
;;			 
;;  i.e. the one constructed by function `npdiff' above. endpair
;;  marks the end of a common substring. startpair marks the beginning
;;  of a common substring. Each pair has the form (x,y) where x is a
;;  line number in text A, and y is a line number in text B.
;;
;;  If substring n (i.e. the one at the top of the stack) does not
;;  reach the last line of text A (its endpair does NOT have the last
;;  line number in A as x coordinate) that means we have some extra
;;  lines at the end of text A that need to be removed, so we make a
;;  remove hunk for them. If instead the y component does not reach
;;  the end of B, we make an insert hunk.
;;
;;  If substring 1 (i.e. the one at the bottom of the stack) does not
;;  start from the first line of text A (its endpair does NOT have 0
;;  as y coordinate) that means we have some extra lines at the
;;  beginning of text B that need to be inserted, so we make an insert
;;  hunk for them. If instead the x component is non-zero, we make a
;;  remove hunk.
;;
;;  For all other cases, we make change hunks that fill in the gaps
;;  between any two common substrings.  
(define (make-hunks is-ref is-length is-slice)
  
  (lambda (A B css . rest)
    (let-optionals  rest ((context-len 0))
     (let ((M (is-length A))
	   (N (is-length B))
	   (context? (fx> context-len 0)))
       
       (define (make-context seq len start end)
	 (if (or (fx> start len) (fx< end start)) (list)
	     (let ((start (if (fx< start 0) 0 start))
		   (end   (if (fx< len end) len end)))
	       (is-slice seq start end))))
       
       (define (loop css hunks)
	 (if (stack-empty? css) hunks
	    ;; make a change hunk and recurse
	    (let-values (((endpair startpair)  (stack-ppeek css)))
	      (let ((k (stack-depth css)))
		(let-values (((x y)  (psplit2 startpair))
			     ((w z)  (psplit2 endpair)))
		    ;; are these the the last two elements of the stack?
		    (if (fx= 1 k)
			(cond ((and (fx= 0 x) (fx= 0 y))   hunks)

			      ((fx= 0 x) (cons (Insert x (cons 0 y) (is-slice B 0 y) 
					       (and context? (cons (list) (make-context B N y (fx+ y context-len)))))
					       hunks))

			      ((fx= 0 y) (cons (Remove (cons 0 x) (is-slice A 0 x)
						       (cons (list) (make-context A M x (fx+ x context-len))))
					       hunks))

			      (else (cons (Change (cons 0 x) (cons 0 y)
						  (is-slice B 0 y) (is-slice A 0 x) 
						  (and context? (cons (list) (make-context B N y (fx+ y context-len))))
						  (and context? (cons (list) (make-context A M x (fx+ x context-len)))))
					  hunks)))
			(begin
			  (stack-pop! css)
			  (stack-pop! css)
			  (let-values (((w z) (values x y))
				       ((x y) (psplit2 (stack-peek css))))
                            (let ((newhunk  (cond ((fx= y z)  
						   (Remove (cons x w) (is-slice A x w)
							   (and context? 
								(cons (make-context A M (fx- x context-len) x)
								      (make-context A M x (fx+ x context-len))))))
						  
						  ((fx= x w)  
						   (Insert x (cons y z) (is-slice B y z)
							   (and context? 
								(cons (make-context B N (fx- y context-len) y)
								      (make-context B N z (fx+ z context-len))))))

						  (else (Change (cons x w) (cons y z)
								(is-slice B y z ) (is-slice A x w)
								(and context? 
								     (cons (make-context B N (fx- y context-len) y)
									   (make-context B N z (fx+ z context-len))))
								(and context?
								     (cons (make-context A M (fx- x context-len) x)
									   (make-context A M w (fx+ w context-len)))))))))
;;			      (match hunks
;;				     ((h . rest)  (loop css (merge newhunk h rest)))
			      (loop css (if newhunk (cons newhunk hunks) hunks)))))))))))

      (if (stack-empty? css)

	  (cond ((and (zero? M) (zero? N)) ;; both sequences are empty
		 (list))
		
		((zero? M) ;; sequence A is empty
		 (list (Insert 0 (cons 0 N) (is-slice B 0 N) (and context? `(())))))

		((zero? N) ;; sequence B is empty
		 (list (Remove (cons 0 M) (is-slice A 0 M)   (and context? `(())))))

		;; the two sequences are completely different
		(list (Change (cons 1 M) (cons 1 N)
			      (is-slice B 0 N)
			      (is-slice A 0 M)
			      (and context? (cons (make-context B N 0 N) (list)))
			      (and context? (cons (make-context A M 0 M) (list)))))
		)

	  (let-values (((endpair startpair)  (stack-ppeek css)))
	     (let ((k (stack-depth css)))
		(let-values (((x y)  (psplit2 startpair))
			     ((w z)  (psplit2 endpair)))

                  (cond ((and (fx= w M) (fx= z N))

			 (loop css (list)))

			((fx= z N)
			 (loop css (list (Remove (cons w M) (is-slice A w M)
						 (and context? (cons (make-context A M w (fx- w context-len))
								     (list)))))))

			((fx= w M)
			 (loop css (list (Insert w (cons z N) (is-slice B z N)
						 (and context? (cons (make-context B N (fx- z context-len) z)
								     (list)))))))

			(else (loop css (list (Change (cons w M) (cons z N)
						      (is-slice B z N )
						      (is-slice A w M)
						      (and context? (cons (make-context B N (fx- z context-len) z)
									  (list)))
						      (and context? (cons (make-context A M (fx- w context-len) w)
									  (list)))
						      )))
			      ))
		  ))
	     ))))))


)