enc_base.go raw

   1  package zstd
   2  
   3  import (
   4  	"fmt"
   5  	"math/bits"
   6  
   7  	"github.com/klauspost/compress/zstd/internal/xxhash"
   8  )
   9  
  10  const (
  11  	dictShardBits = 7
  12  )
  13  
  14  type fastBase struct {
  15  	// cur is the offset at the start of hist
  16  	cur int32
  17  	// maximum offset. Should be at least 2x block size.
  18  	maxMatchOff int32
  19  	bufferReset int32
  20  	hist        []byte
  21  	crc         *xxhash.Digest
  22  	tmp         [8]byte
  23  	blk         *blockEnc
  24  	lastDictID  uint32
  25  	lowMem      bool
  26  }
  27  
  28  // CRC returns the underlying CRC writer.
  29  func (e *fastBase) CRC() *xxhash.Digest {
  30  	return e.crc
  31  }
  32  
  33  // AppendCRC will append the CRC to the destination slice and return it.
  34  func (e *fastBase) AppendCRC(dst []byte) []byte {
  35  	crc := e.crc.Sum(e.tmp[:0])
  36  	dst = append(dst, crc[7], crc[6], crc[5], crc[4])
  37  	return dst
  38  }
  39  
  40  // WindowSize returns the window size of the encoder,
  41  // or a window size small enough to contain the input size, if > 0.
  42  func (e *fastBase) WindowSize(size int64) int32 {
  43  	if size > 0 && size < int64(e.maxMatchOff) {
  44  		b := max(
  45  			// Keep minimum window.
  46  			int32(1)<<uint(bits.Len(uint(size))), 1024)
  47  		return b
  48  	}
  49  	return e.maxMatchOff
  50  }
  51  
  52  // Block returns the current block.
  53  func (e *fastBase) Block() *blockEnc {
  54  	return e.blk
  55  }
  56  
  57  func (e *fastBase) addBlock(src []byte) int32 {
  58  	if debugAsserts && e.cur > e.bufferReset {
  59  		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset))
  60  	}
  61  	// check if we have space already
  62  	if len(e.hist)+len(src) > cap(e.hist) {
  63  		if cap(e.hist) == 0 {
  64  			e.ensureHist(len(src))
  65  		} else {
  66  			if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) {
  67  				panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff))
  68  			}
  69  			// Move down
  70  			offset := int32(len(e.hist)) - e.maxMatchOff
  71  			copy(e.hist[0:e.maxMatchOff], e.hist[offset:])
  72  			e.cur += offset
  73  			e.hist = e.hist[:e.maxMatchOff]
  74  		}
  75  	}
  76  	s := int32(len(e.hist))
  77  	e.hist = append(e.hist, src...)
  78  	return s
  79  }
  80  
  81  // ensureHist will ensure that history can keep at least this many bytes.
  82  func (e *fastBase) ensureHist(n int) {
  83  	if cap(e.hist) >= n {
  84  		return
  85  	}
  86  	l := e.maxMatchOff
  87  	if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize {
  88  		l += maxCompressedBlockSize
  89  	} else {
  90  		l += e.maxMatchOff
  91  	}
  92  	// Make it at least 1MB.
  93  	if l < 1<<20 && !e.lowMem {
  94  		l = 1 << 20
  95  	}
  96  	// Make it at least the requested size.
  97  	if l < int32(n) {
  98  		l = int32(n)
  99  	}
 100  	e.hist = make([]byte, 0, l)
 101  }
 102  
 103  // useBlock will replace the block with the provided one,
 104  // but transfer recent offsets from the previous.
 105  func (e *fastBase) UseBlock(enc *blockEnc) {
 106  	enc.reset(e.blk)
 107  	e.blk = enc
 108  }
 109  
 110  func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
 111  	if debugAsserts {
 112  		if s < 0 {
 113  			err := fmt.Sprintf("s (%d) < 0", s)
 114  			panic(err)
 115  		}
 116  		if t < 0 {
 117  			err := fmt.Sprintf("t (%d) < 0", t)
 118  			panic(err)
 119  		}
 120  		if s-t > e.maxMatchOff {
 121  			err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff)
 122  			panic(err)
 123  		}
 124  		if len(src)-int(s) > maxCompressedBlockSize {
 125  			panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
 126  		}
 127  	}
 128  	return int32(matchLen(src[s:], src[t:]))
 129  }
 130  
 131  // Reset the encoding table.
 132  func (e *fastBase) resetBase(d *dict, singleBlock bool) {
 133  	if e.blk == nil {
 134  		e.blk = &blockEnc{lowMem: e.lowMem}
 135  		e.blk.init()
 136  	} else {
 137  		e.blk.reset(nil)
 138  	}
 139  	e.blk.initNewEncode()
 140  	if e.crc == nil {
 141  		e.crc = xxhash.New()
 142  	} else {
 143  		e.crc.Reset()
 144  	}
 145  	e.blk.dictLitEnc = nil
 146  	if d != nil {
 147  		low := e.lowMem
 148  		if singleBlock {
 149  			e.lowMem = true
 150  		}
 151  		e.ensureHist(d.ContentSize() + maxCompressedBlockSize)
 152  		e.lowMem = low
 153  	}
 154  
 155  	// We offset current position so everything will be out of reach.
 156  	// If above reset line, history will be purged.
 157  	if e.cur < e.bufferReset {
 158  		e.cur += e.maxMatchOff + int32(len(e.hist))
 159  	}
 160  	e.hist = e.hist[:0]
 161  	if d != nil {
 162  		// Set offsets (currently not used)
 163  		for i, off := range d.offsets {
 164  			e.blk.recentOffsets[i] = uint32(off)
 165  			e.blk.prevRecentOffsets[i] = e.blk.recentOffsets[i]
 166  		}
 167  		// Transfer litenc.
 168  		e.blk.dictLitEnc = d.litEnc
 169  		e.hist = append(e.hist, d.content...)
 170  	}
 171  }
 172