encode.go raw

   1  // Copyright 2011 The Snappy-Go Authors. All rights reserved.
   2  // Copyright (c) 2019 Klaus Post. All rights reserved.
   3  // Use of this source code is governed by a BSD-style
   4  // license that can be found in the LICENSE file.
   5  
   6  package s2
   7  
   8  import (
   9  	"encoding/binary"
  10  	"math"
  11  	"math/bits"
  12  	"sync"
  13  
  14  	"github.com/klauspost/compress/internal/race"
  15  )
  16  
  17  // Encode returns the encoded form of src. The returned slice may be a sub-
  18  // slice of dst if dst was large enough to hold the entire encoded block.
  19  // Otherwise, a newly allocated slice will be returned.
  20  //
  21  // The dst and src must not overlap. It is valid to pass a nil dst.
  22  //
  23  // The blocks will require the same amount of memory to decode as encoding,
  24  // and does not make for concurrent decoding.
  25  // Also note that blocks do not contain CRC information, so corruption may be undetected.
  26  //
  27  // If you need to encode larger amounts of data, consider using
  28  // the streaming interface which gives all of these features.
  29  func Encode(dst, src []byte) []byte {
  30  	if n := MaxEncodedLen(len(src)); n < 0 {
  31  		panic(ErrTooLarge)
  32  	} else if cap(dst) < n {
  33  		dst = make([]byte, n)
  34  	} else {
  35  		dst = dst[:n]
  36  	}
  37  
  38  	// The block starts with the varint-encoded length of the decompressed bytes.
  39  	d := binary.PutUvarint(dst, uint64(len(src)))
  40  
  41  	if len(src) == 0 {
  42  		return dst[:d]
  43  	}
  44  	if len(src) < minNonLiteralBlockSize {
  45  		d += emitLiteral(dst[d:], src)
  46  		return dst[:d]
  47  	}
  48  	n := encodeBlock(dst[d:], src)
  49  	if n > 0 {
  50  		d += n
  51  		return dst[:d]
  52  	}
  53  	// Not compressible
  54  	d += emitLiteral(dst[d:], src)
  55  	return dst[:d]
  56  }
  57  
  58  var estblockPool [2]sync.Pool
  59  
  60  // EstimateBlockSize will perform a very fast compression
  61  // without outputting the result and return the compressed output size.
  62  // The function returns -1 if no improvement could be achieved.
  63  // Using actual compression will most often produce better compression than the estimate.
  64  func EstimateBlockSize(src []byte) (d int) {
  65  	if len(src) <= inputMargin || int64(len(src)) > 0xffffffff {
  66  		return -1
  67  	}
  68  	if len(src) <= 1024 {
  69  		const sz, pool = 2048, 0
  70  		tmp, ok := estblockPool[pool].Get().(*[sz]byte)
  71  		if !ok {
  72  			tmp = &[sz]byte{}
  73  		}
  74  		race.WriteSlice(tmp[:])
  75  		defer estblockPool[pool].Put(tmp)
  76  
  77  		d = calcBlockSizeSmall(src, tmp)
  78  	} else {
  79  		const sz, pool = 32768, 1
  80  		tmp, ok := estblockPool[pool].Get().(*[sz]byte)
  81  		if !ok {
  82  			tmp = &[sz]byte{}
  83  		}
  84  		race.WriteSlice(tmp[:])
  85  		defer estblockPool[pool].Put(tmp)
  86  
  87  		d = calcBlockSize(src, tmp)
  88  	}
  89  
  90  	if d == 0 {
  91  		return -1
  92  	}
  93  	// Size of the varint encoded block size.
  94  	d += (bits.Len64(uint64(len(src))) + 7) / 7
  95  
  96  	if d >= len(src) {
  97  		return -1
  98  	}
  99  	return d
 100  }
 101  
 102  // EncodeBetter returns the encoded form of src. The returned slice may be a sub-
 103  // slice of dst if dst was large enough to hold the entire encoded block.
 104  // Otherwise, a newly allocated slice will be returned.
 105  //
 106  // EncodeBetter compresses better than Encode but typically with a
 107  // 10-40% speed decrease on both compression and decompression.
 108  //
 109  // The dst and src must not overlap. It is valid to pass a nil dst.
 110  //
 111  // The blocks will require the same amount of memory to decode as encoding,
 112  // and does not make for concurrent decoding.
 113  // Also note that blocks do not contain CRC information, so corruption may be undetected.
 114  //
 115  // If you need to encode larger amounts of data, consider using
 116  // the streaming interface which gives all of these features.
 117  func EncodeBetter(dst, src []byte) []byte {
 118  	if n := MaxEncodedLen(len(src)); n < 0 {
 119  		panic(ErrTooLarge)
 120  	} else if cap(dst) < n {
 121  		dst = make([]byte, n)
 122  	} else {
 123  		dst = dst[:n]
 124  	}
 125  
 126  	// The block starts with the varint-encoded length of the decompressed bytes.
 127  	d := binary.PutUvarint(dst, uint64(len(src)))
 128  
 129  	if len(src) == 0 {
 130  		return dst[:d]
 131  	}
 132  	if len(src) < minNonLiteralBlockSize {
 133  		d += emitLiteral(dst[d:], src)
 134  		return dst[:d]
 135  	}
 136  	n := encodeBlockBetter(dst[d:], src)
 137  	if n > 0 {
 138  		d += n
 139  		return dst[:d]
 140  	}
 141  	// Not compressible
 142  	d += emitLiteral(dst[d:], src)
 143  	return dst[:d]
 144  }
 145  
 146  // EncodeBest returns the encoded form of src. The returned slice may be a sub-
 147  // slice of dst if dst was large enough to hold the entire encoded block.
 148  // Otherwise, a newly allocated slice will be returned.
 149  //
 150  // EncodeBest compresses as good as reasonably possible but with a
 151  // big speed decrease.
 152  //
 153  // The dst and src must not overlap. It is valid to pass a nil dst.
 154  //
 155  // The blocks will require the same amount of memory to decode as encoding,
 156  // and does not make for concurrent decoding.
 157  // Also note that blocks do not contain CRC information, so corruption may be undetected.
 158  //
 159  // If you need to encode larger amounts of data, consider using
 160  // the streaming interface which gives all of these features.
 161  func EncodeBest(dst, src []byte) []byte {
 162  	if n := MaxEncodedLen(len(src)); n < 0 {
 163  		panic(ErrTooLarge)
 164  	} else if cap(dst) < n {
 165  		dst = make([]byte, n)
 166  	} else {
 167  		dst = dst[:n]
 168  	}
 169  
 170  	// The block starts with the varint-encoded length of the decompressed bytes.
 171  	d := binary.PutUvarint(dst, uint64(len(src)))
 172  
 173  	if len(src) == 0 {
 174  		return dst[:d]
 175  	}
 176  	if len(src) < minNonLiteralBlockSize {
 177  		d += emitLiteral(dst[d:], src)
 178  		return dst[:d]
 179  	}
 180  	n := encodeBlockBest(dst[d:], src, nil)
 181  	if n > 0 {
 182  		d += n
 183  		return dst[:d]
 184  	}
 185  	// Not compressible
 186  	d += emitLiteral(dst[d:], src)
 187  	return dst[:d]
 188  }
 189  
 190  // EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
 191  // slice of dst if dst was large enough to hold the entire encoded block.
 192  // Otherwise, a newly allocated slice will be returned.
 193  //
 194  // The output is Snappy compatible and will likely decompress faster.
 195  //
 196  // The dst and src must not overlap. It is valid to pass a nil dst.
 197  //
 198  // The blocks will require the same amount of memory to decode as encoding,
 199  // and does not make for concurrent decoding.
 200  // Also note that blocks do not contain CRC information, so corruption may be undetected.
 201  //
 202  // If you need to encode larger amounts of data, consider using
 203  // the streaming interface which gives all of these features.
 204  func EncodeSnappy(dst, src []byte) []byte {
 205  	if n := MaxEncodedLen(len(src)); n < 0 {
 206  		panic(ErrTooLarge)
 207  	} else if cap(dst) < n {
 208  		dst = make([]byte, n)
 209  	} else {
 210  		dst = dst[:n]
 211  	}
 212  
 213  	// The block starts with the varint-encoded length of the decompressed bytes.
 214  	d := binary.PutUvarint(dst, uint64(len(src)))
 215  
 216  	if len(src) == 0 {
 217  		return dst[:d]
 218  	}
 219  	if len(src) < minNonLiteralBlockSize {
 220  		d += emitLiteral(dst[d:], src)
 221  		return dst[:d]
 222  	}
 223  
 224  	n := encodeBlockSnappy(dst[d:], src)
 225  	if n > 0 {
 226  		d += n
 227  		return dst[:d]
 228  	}
 229  	// Not compressible
 230  	d += emitLiteral(dst[d:], src)
 231  	return dst[:d]
 232  }
 233  
 234  // EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub-
 235  // slice of dst if dst was large enough to hold the entire encoded block.
 236  // Otherwise, a newly allocated slice will be returned.
 237  //
 238  // The output is Snappy compatible and will likely decompress faster.
 239  //
 240  // The dst and src must not overlap. It is valid to pass a nil dst.
 241  //
 242  // The blocks will require the same amount of memory to decode as encoding,
 243  // and does not make for concurrent decoding.
 244  // Also note that blocks do not contain CRC information, so corruption may be undetected.
 245  //
 246  // If you need to encode larger amounts of data, consider using
 247  // the streaming interface which gives all of these features.
 248  func EncodeSnappyBetter(dst, src []byte) []byte {
 249  	if n := MaxEncodedLen(len(src)); n < 0 {
 250  		panic(ErrTooLarge)
 251  	} else if cap(dst) < n {
 252  		dst = make([]byte, n)
 253  	} else {
 254  		dst = dst[:n]
 255  	}
 256  
 257  	// The block starts with the varint-encoded length of the decompressed bytes.
 258  	d := binary.PutUvarint(dst, uint64(len(src)))
 259  
 260  	if len(src) == 0 {
 261  		return dst[:d]
 262  	}
 263  	if len(src) < minNonLiteralBlockSize {
 264  		d += emitLiteral(dst[d:], src)
 265  		return dst[:d]
 266  	}
 267  
 268  	n := encodeBlockBetterSnappy(dst[d:], src)
 269  	if n > 0 {
 270  		d += n
 271  		return dst[:d]
 272  	}
 273  	// Not compressible
 274  	d += emitLiteral(dst[d:], src)
 275  	return dst[:d]
 276  }
 277  
 278  // EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub-
 279  // slice of dst if dst was large enough to hold the entire encoded block.
 280  // Otherwise, a newly allocated slice will be returned.
 281  //
 282  // The output is Snappy compatible and will likely decompress faster.
 283  //
 284  // The dst and src must not overlap. It is valid to pass a nil dst.
 285  //
 286  // The blocks will require the same amount of memory to decode as encoding,
 287  // and does not make for concurrent decoding.
 288  // Also note that blocks do not contain CRC information, so corruption may be undetected.
 289  //
 290  // If you need to encode larger amounts of data, consider using
 291  // the streaming interface which gives all of these features.
 292  func EncodeSnappyBest(dst, src []byte) []byte {
 293  	if n := MaxEncodedLen(len(src)); n < 0 {
 294  		panic(ErrTooLarge)
 295  	} else if cap(dst) < n {
 296  		dst = make([]byte, n)
 297  	} else {
 298  		dst = dst[:n]
 299  	}
 300  
 301  	// The block starts with the varint-encoded length of the decompressed bytes.
 302  	d := binary.PutUvarint(dst, uint64(len(src)))
 303  
 304  	if len(src) == 0 {
 305  		return dst[:d]
 306  	}
 307  	if len(src) < minNonLiteralBlockSize {
 308  		d += emitLiteral(dst[d:], src)
 309  		return dst[:d]
 310  	}
 311  
 312  	n := encodeBlockBestSnappy(dst[d:], src)
 313  	if n > 0 {
 314  		d += n
 315  		return dst[:d]
 316  	}
 317  	// Not compressible
 318  	d += emitLiteral(dst[d:], src)
 319  	return dst[:d]
 320  }
 321  
 322  // ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination.
 323  // If the destination is nil or too small, a new will be allocated.
 324  // The blocks are not validated, so garbage in = garbage out.
 325  // dst may not overlap block data.
 326  // Any data in dst is preserved as is, so it will not be considered a block.
 327  func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) {
 328  	totalSize := uint64(0)
 329  	compSize := 0
 330  	for _, b := range blocks {
 331  		l, hdr, err := decodedLen(b)
 332  		if err != nil {
 333  			return nil, err
 334  		}
 335  		totalSize += uint64(l)
 336  		compSize += len(b) - hdr
 337  	}
 338  	if totalSize == 0 {
 339  		dst = append(dst, 0)
 340  		return dst, nil
 341  	}
 342  	if totalSize > math.MaxUint32 {
 343  		return nil, ErrTooLarge
 344  	}
 345  	var tmp [binary.MaxVarintLen32]byte
 346  	hdrSize := binary.PutUvarint(tmp[:], totalSize)
 347  	wantSize := hdrSize + compSize
 348  
 349  	if cap(dst)-len(dst) < wantSize {
 350  		dst = append(make([]byte, 0, wantSize+len(dst)), dst...)
 351  	}
 352  	dst = append(dst, tmp[:hdrSize]...)
 353  	for _, b := range blocks {
 354  		_, hdr, err := decodedLen(b)
 355  		if err != nil {
 356  			return nil, err
 357  		}
 358  		dst = append(dst, b[hdr:]...)
 359  	}
 360  	return dst, nil
 361  }
 362  
 363  // inputMargin is the minimum number of extra input bytes to keep, inside
 364  // encodeBlock's inner loop. On some architectures, this margin lets us
 365  // implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
 366  // literals can be implemented as a single load to and store from a 16-byte
 367  // register. That literal's actual length can be as short as 1 byte, so this
 368  // can copy up to 15 bytes too much, but that's OK as subsequent iterations of
 369  // the encoding loop will fix up the copy overrun, and this inputMargin ensures
 370  // that we don't overrun the dst and src buffers.
 371  const inputMargin = 8
 372  
 373  // minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
 374  // will be accepted by the encoder.
 375  const minNonLiteralBlockSize = 32
 376  
 377  const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits)
 378  
 379  // MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size.
 380  // Blocks this big are highly discouraged, though.
 381  // Half the size on 32 bit systems.
 382  const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5
 383  
 384  // MaxEncodedLen returns the maximum length of a snappy block, given its
 385  // uncompressed length.
 386  //
 387  // It will return a negative value if srcLen is too large to encode.
 388  // 32 bit platforms will have lower thresholds for rejecting big content.
 389  func MaxEncodedLen(srcLen int) int {
 390  	n := uint64(srcLen)
 391  	if intReduction == 1 {
 392  		// 32 bits
 393  		if n > math.MaxInt32 {
 394  			// Also includes negative.
 395  			return -1
 396  		}
 397  	} else if n > 0xffffffff {
 398  		// 64 bits
 399  		// Also includes negative.
 400  		return -1
 401  	}
 402  	// Size of the varint encoded block size.
 403  	n = n + uint64((bits.Len64(n)+7)/7)
 404  
 405  	// Add maximum size of encoding block as literals.
 406  	n += uint64(literalExtraSize(int64(srcLen)))
 407  	if intReduction == 1 {
 408  		// 32 bits
 409  		if n > math.MaxInt32 {
 410  			return -1
 411  		}
 412  	} else if n > 0xffffffff {
 413  		// 64 bits
 414  		// Also includes negative.
 415  		return -1
 416  	}
 417  	return int(n)
 418  }
 419