diff.go raw

   1  // Copyright 2013 Google Inc.  All rights reserved.
   2  //
   3  // Licensed under the Apache License, Version 2.0 (the "License");
   4  // you may not use this file except in compliance with the License.
   5  // You may obtain a copy of the License at
   6  //
   7  //     http://www.apache.org/licenses/LICENSE-2.0
   8  //
   9  // Unless required by applicable law or agreed to in writing, software
  10  // distributed under the License is distributed on an "AS IS" BASIS,
  11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  // See the License for the specific language governing permissions and
  13  // limitations under the License.
  14  
  15  // Package diff implements a linewise diff algorithm.
  16  package diff
  17  
  18  import (
  19  	"bytes"
  20  	"fmt"
  21  	"strings"
  22  )
  23  
  24  // Chunk represents a piece of the diff.  A chunk will not have both added and
  25  // deleted lines.  Equal lines are always after any added or deleted lines.
  26  // A Chunk may or may not have any lines in it, especially for the first or last
  27  // chunk in a computation.
  28  type Chunk struct {
  29  	Added   []string
  30  	Deleted []string
  31  	Equal   []string
  32  }
  33  
  34  func (c *Chunk) empty() bool {
  35  	return len(c.Added) == 0 && len(c.Deleted) == 0 && len(c.Equal) == 0
  36  }
  37  
  38  // Diff returns a string containing a line-by-line unified diff of the linewise
  39  // changes required to make A into B.  Each line is prefixed with '+', '-', or
  40  // ' ' to indicate if it should be added, removed, or is correct respectively.
  41  func Diff(A, B string) string {
  42  	aLines := strings.Split(A, "\n")
  43  	bLines := strings.Split(B, "\n")
  44  
  45  	chunks := DiffChunks(aLines, bLines)
  46  
  47  	buf := new(bytes.Buffer)
  48  	for _, c := range chunks {
  49  		for _, line := range c.Added {
  50  			fmt.Fprintf(buf, "+%s\n", line)
  51  		}
  52  		for _, line := range c.Deleted {
  53  			fmt.Fprintf(buf, "-%s\n", line)
  54  		}
  55  		for _, line := range c.Equal {
  56  			fmt.Fprintf(buf, " %s\n", line)
  57  		}
  58  	}
  59  	return strings.TrimRight(buf.String(), "\n")
  60  }
  61  
  62  // DiffChunks uses an O(D(N+M)) shortest-edit-script algorithm
  63  // to compute the edits required from A to B and returns the
  64  // edit chunks.
  65  func DiffChunks(a, b []string) []Chunk {
  66  	// algorithm: http://www.xmailserver.org/diff2.pdf
  67  
  68  	// We'll need these quantities a lot.
  69  	alen, blen := len(a), len(b) // M, N
  70  
  71  	// At most, it will require len(a) deletions and len(b) additions
  72  	// to transform a into b.
  73  	maxPath := alen + blen // MAX
  74  	if maxPath == 0 {
  75  		// degenerate case: two empty lists are the same
  76  		return nil
  77  	}
  78  
  79  	// Store the endpoint of the path for diagonals.
  80  	// We store only the a index, because the b index on any diagonal
  81  	// (which we know during the loop below) is aidx-diag.
  82  	// endpoint[maxPath] represents the 0 diagonal.
  83  	//
  84  	// Stated differently:
  85  	// endpoint[d] contains the aidx of a furthest reaching path in diagonal d
  86  	endpoint := make([]int, 2*maxPath+1) // V
  87  
  88  	saved := make([][]int, 0, 8) // Vs
  89  	save := func() {
  90  		dup := make([]int, len(endpoint))
  91  		copy(dup, endpoint)
  92  		saved = append(saved, dup)
  93  	}
  94  
  95  	var editDistance int // D
  96  dLoop:
  97  	for editDistance = 0; editDistance <= maxPath; editDistance++ {
  98  		// The 0 diag(onal) represents equality of a and b.  Each diagonal to
  99  		// the left is numbered one lower, to the right is one higher, from
 100  		// -alen to +blen.  Negative diagonals favor differences from a,
 101  		// positive diagonals favor differences from b.  The edit distance to a
 102  		// diagonal d cannot be shorter than d itself.
 103  		//
 104  		// The iterations of this loop cover either odds or evens, but not both,
 105  		// If odd indices are inputs, even indices are outputs and vice versa.
 106  		for diag := -editDistance; diag <= editDistance; diag += 2 { // k
 107  			var aidx int // x
 108  			switch {
 109  			case diag == -editDistance:
 110  				// This is a new diagonal; copy from previous iter
 111  				aidx = endpoint[maxPath-editDistance+1] + 0
 112  			case diag == editDistance:
 113  				// This is a new diagonal; copy from previous iter
 114  				aidx = endpoint[maxPath+editDistance-1] + 1
 115  			case endpoint[maxPath+diag+1] > endpoint[maxPath+diag-1]:
 116  				// diagonal d+1 was farther along, so use that
 117  				aidx = endpoint[maxPath+diag+1] + 0
 118  			default:
 119  				// diagonal d-1 was farther (or the same), so use that
 120  				aidx = endpoint[maxPath+diag-1] + 1
 121  			}
 122  			// On diagonal d, we can compute bidx from aidx.
 123  			bidx := aidx - diag // y
 124  			// See how far we can go on this diagonal before we find a difference.
 125  			for aidx < alen && bidx < blen && a[aidx] == b[bidx] {
 126  				aidx++
 127  				bidx++
 128  			}
 129  			// Store the end of the current edit chain.
 130  			endpoint[maxPath+diag] = aidx
 131  			// If we've found the end of both inputs, we're done!
 132  			if aidx >= alen && bidx >= blen {
 133  				save() // save the final path
 134  				break dLoop
 135  			}
 136  		}
 137  		save() // save the current path
 138  	}
 139  	if editDistance == 0 {
 140  		return nil
 141  	}
 142  	chunks := make([]Chunk, editDistance+1)
 143  
 144  	x, y := alen, blen
 145  	for d := editDistance; d > 0; d-- {
 146  		endpoint := saved[d]
 147  		diag := x - y
 148  		insert := diag == -d || (diag != d && endpoint[maxPath+diag-1] < endpoint[maxPath+diag+1])
 149  
 150  		x1 := endpoint[maxPath+diag]
 151  		var x0, xM, kk int
 152  		if insert {
 153  			kk = diag + 1
 154  			x0 = endpoint[maxPath+kk]
 155  			xM = x0
 156  		} else {
 157  			kk = diag - 1
 158  			x0 = endpoint[maxPath+kk]
 159  			xM = x0 + 1
 160  		}
 161  		y0 := x0 - kk
 162  
 163  		var c Chunk
 164  		if insert {
 165  			c.Added = b[y0:][:1]
 166  		} else {
 167  			c.Deleted = a[x0:][:1]
 168  		}
 169  		if xM < x1 {
 170  			c.Equal = a[xM:][:x1-xM]
 171  		}
 172  
 173  		x, y = x0, y0
 174  		chunks[d] = c
 175  	}
 176  	if x > 0 {
 177  		chunks[0].Equal = a[:x]
 178  	}
 179  	if chunks[0].empty() {
 180  		chunks = chunks[1:]
 181  	}
 182  	if len(chunks) == 0 {
 183  		return nil
 184  	}
 185  	return chunks
 186  }
 187