dict_decoder.mx raw

   1  // Copyright 2016 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package flate
   6  
   7  // dictDecoder implements the LZ77 sliding dictionary as used in decompression.
   8  // LZ77 decompresses data through sequences of two forms of commands:
   9  //
  10  //   - Literal insertions: Runs of one or more symbols are inserted into the data
  11  //     stream as is. This is accomplished through the writeByte method for a
  12  //     single symbol, or combinations of writeSlice/writeMark for multiple symbols.
  13  //     Any valid stream must start with a literal insertion if no preset dictionary
  14  //     is used.
  15  //
  16  //   - Backward copies: Runs of one or more symbols are copied from previously
  17  //     emitted data. Backward copies come as the tuple (dist, length) where dist
  18  //     determines how far back in the stream to copy from and length determines how
  19  //     many bytes to copy. Note that it is valid for the length to be greater than
  20  //     the distance. Since LZ77 uses forward copies, that situation is used to
  21  //     perform a form of run-length encoding on repeated runs of symbols.
  22  //     The writeCopy and tryWriteCopy are used to implement this command.
  23  //
  24  // For performance reasons, this implementation performs little to no sanity
  25  // checks about the arguments. As such, the invariants documented for each
  26  // method call must be respected.
  27  type dictDecoder struct {
  28  	hist []byte // Sliding window history
  29  
  30  	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
  31  	wrPos int  // Current output position in buffer
  32  	rdPos int  // Have emitted hist[:rdPos] already
  33  	full  bool // Has a full window length been written yet?
  34  }
  35  
  36  // init initializes dictDecoder to have a sliding window dictionary of the given
  37  // size. If a preset dict is provided, it will initialize the dictionary with
  38  // the contents of dict.
  39  func (dd *dictDecoder) init(size int, dict []byte) {
  40  	*dd = dictDecoder{hist: dd.hist}
  41  
  42  	if cap(dd.hist) < size {
  43  		dd.hist = []byte{:size}
  44  	}
  45  	dd.hist = dd.hist[:size]
  46  
  47  	if len(dict) > len(dd.hist) {
  48  		dict = dict[len(dict)-len(dd.hist):]
  49  	}
  50  	dd.wrPos = copy(dd.hist, dict)
  51  	if dd.wrPos == len(dd.hist) {
  52  		dd.wrPos = 0
  53  		dd.full = true
  54  	}
  55  	dd.rdPos = dd.wrPos
  56  }
  57  
  58  // histSize reports the total amount of historical data in the dictionary.
  59  func (dd *dictDecoder) histSize() int {
  60  	if dd.full {
  61  		return len(dd.hist)
  62  	}
  63  	return dd.wrPos
  64  }
  65  
  66  // availRead reports the number of bytes that can be flushed by readFlush.
  67  func (dd *dictDecoder) availRead() int {
  68  	return dd.wrPos - dd.rdPos
  69  }
  70  
  71  // availWrite reports the available amount of output buffer space.
  72  func (dd *dictDecoder) availWrite() int {
  73  	return len(dd.hist) - dd.wrPos
  74  }
  75  
  76  // writeSlice returns a slice of the available buffer to write data to.
  77  //
  78  // This invariant will be kept: len(s) <= availWrite()
  79  func (dd *dictDecoder) writeSlice() []byte {
  80  	return dd.hist[dd.wrPos:]
  81  }
  82  
  83  // writeMark advances the writer pointer by cnt.
  84  //
  85  // This invariant must be kept: 0 <= cnt <= availWrite()
  86  func (dd *dictDecoder) writeMark(cnt int) {
  87  	dd.wrPos += cnt
  88  }
  89  
  90  // writeByte writes a single byte to the dictionary.
  91  //
  92  // This invariant must be kept: 0 < availWrite()
  93  func (dd *dictDecoder) writeByte(c byte) {
  94  	dd.hist[dd.wrPos] = c
  95  	dd.wrPos++
  96  }
  97  
  98  // writeCopy copies a string at a given (dist, length) to the output.
  99  // This returns the number of bytes copied and may be less than the requested
 100  // length if the available space in the output buffer is too small.
 101  //
 102  // This invariant must be kept: 0 < dist <= histSize()
 103  func (dd *dictDecoder) writeCopy(dist, length int) int {
 104  	dstBase := dd.wrPos
 105  	dstPos := dstBase
 106  	srcPos := dstPos - dist
 107  	endPos := dstPos + length
 108  	if endPos > len(dd.hist) {
 109  		endPos = len(dd.hist)
 110  	}
 111  
 112  	// Copy non-overlapping section after destination position.
 113  	//
 114  	// This section is non-overlapping in that the copy length for this section
 115  	// is always less than or equal to the backwards distance. This can occur
 116  	// if a distance refers to data that wraps-around in the buffer.
 117  	// Thus, a backwards copy is performed here; that is, the exact bytes in
 118  	// the source prior to the copy is placed in the destination.
 119  	if srcPos < 0 {
 120  		srcPos += len(dd.hist)
 121  		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
 122  		srcPos = 0
 123  	}
 124  
 125  	// Copy possibly overlapping section before destination position.
 126  	//
 127  	// This section can overlap if the copy length for this section is larger
 128  	// than the backwards distance. This is allowed by LZ77 so that repeated
 129  	// strings can be succinctly represented using (dist, length) pairs.
 130  	// Thus, a forwards copy is performed here; that is, the bytes copied is
 131  	// possibly dependent on the resulting bytes in the destination as the copy
 132  	// progresses along. This is functionally equivalent to the following:
 133  	//
 134  	//	for i := 0; i < endPos-dstPos; i++ {
 135  	//		dd.hist[dstPos+i] = dd.hist[srcPos+i]
 136  	//	}
 137  	//	dstPos = endPos
 138  	//
 139  	for dstPos < endPos {
 140  		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
 141  	}
 142  
 143  	dd.wrPos = dstPos
 144  	return dstPos - dstBase
 145  }
 146  
 147  // tryWriteCopy tries to copy a string at a given (distance, length) to the
 148  // output. This specialized version is optimized for short distances.
 149  //
 150  // This method is designed to be inlined for performance reasons.
 151  //
 152  // This invariant must be kept: 0 < dist <= histSize()
 153  func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
 154  	dstPos := dd.wrPos
 155  	endPos := dstPos + length
 156  	if dstPos < dist || endPos > len(dd.hist) {
 157  		return 0
 158  	}
 159  	dstBase := dstPos
 160  	srcPos := dstPos - dist
 161  
 162  	// Copy possibly overlapping section before destination position.
 163  	for dstPos < endPos {
 164  		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
 165  	}
 166  
 167  	dd.wrPos = dstPos
 168  	return dstPos - dstBase
 169  }
 170  
 171  // readFlush returns a slice of the historical buffer that is ready to be
 172  // emitted to the user. The data returned by readFlush must be fully consumed
 173  // before calling any other dictDecoder methods.
 174  func (dd *dictDecoder) readFlush() []byte {
 175  	toRead := dd.hist[dd.rdPos:dd.wrPos]
 176  	dd.rdPos = dd.wrPos
 177  	if dd.wrPos == len(dd.hist) {
 178  		dd.wrPos, dd.rdPos = 0, 0
 179  		dd.full = true
 180  	}
 181  	return toRead
 182  }
 183