encode_amd64.go raw

   1  //go:build !appengine && !noasm && gc
   2  // +build !appengine,!noasm,gc
   3  
   4  package s2
   5  
   6  import (
   7  	"sync"
   8  
   9  	"github.com/klauspost/compress/internal/race"
  10  )
  11  
  12  const hasAmd64Asm = true
  13  
  14  var encPools [4]sync.Pool
  15  
  16  // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
  17  // assumes that the varint-encoded length of the decompressed bytes has already
  18  // been written.
  19  //
  20  // It also assumes that:
  21  //
  22  //	len(dst) >= MaxEncodedLen(len(src)) &&
  23  //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  24  func encodeBlock(dst, src []byte) (d int) {
  25  	race.ReadSlice(src)
  26  	race.WriteSlice(dst)
  27  
  28  	const (
  29  		// Use 12 bit table when less than...
  30  		limit12B = 16 << 10
  31  		// Use 10 bit table when less than...
  32  		limit10B = 4 << 10
  33  		// Use 8 bit table when less than...
  34  		limit8B = 512
  35  	)
  36  
  37  	if len(src) >= 4<<20 {
  38  		const sz, pool = 65536, 0
  39  		tmp, ok := encPools[pool].Get().(*[sz]byte)
  40  		if !ok {
  41  			tmp = &[sz]byte{}
  42  		}
  43  		race.WriteSlice(tmp[:])
  44  		defer encPools[pool].Put(tmp)
  45  		return encodeBlockAsm(dst, src, tmp)
  46  	}
  47  	if len(src) >= limit12B {
  48  		const sz, pool = 65536, 0
  49  		tmp, ok := encPools[pool].Get().(*[sz]byte)
  50  		if !ok {
  51  			tmp = &[sz]byte{}
  52  		}
  53  		race.WriteSlice(tmp[:])
  54  		defer encPools[pool].Put(tmp)
  55  		return encodeBlockAsm4MB(dst, src, tmp)
  56  	}
  57  	if len(src) >= limit10B {
  58  		const sz, pool = 16384, 1
  59  		tmp, ok := encPools[pool].Get().(*[sz]byte)
  60  		if !ok {
  61  			tmp = &[sz]byte{}
  62  		}
  63  		race.WriteSlice(tmp[:])
  64  		defer encPools[pool].Put(tmp)
  65  		return encodeBlockAsm12B(dst, src, tmp)
  66  	}
  67  	if len(src) >= limit8B {
  68  		const sz, pool = 4096, 2
  69  		tmp, ok := encPools[pool].Get().(*[sz]byte)
  70  		if !ok {
  71  			tmp = &[sz]byte{}
  72  		}
  73  		race.WriteSlice(tmp[:])
  74  		defer encPools[pool].Put(tmp)
  75  		return encodeBlockAsm10B(dst, src, tmp)
  76  	}
  77  	if len(src) < minNonLiteralBlockSize {
  78  		return 0
  79  	}
  80  	const sz, pool = 1024, 3
  81  	tmp, ok := encPools[pool].Get().(*[sz]byte)
  82  	if !ok {
  83  		tmp = &[sz]byte{}
  84  	}
  85  	race.WriteSlice(tmp[:])
  86  	defer encPools[pool].Put(tmp)
  87  	return encodeBlockAsm8B(dst, src, tmp)
  88  }
  89  
  90  var encBetterPools [5]sync.Pool
  91  
  92  // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
  93  // assumes that the varint-encoded length of the decompressed bytes has already
  94  // been written.
  95  //
  96  // It also assumes that:
  97  //
  98  //	len(dst) >= MaxEncodedLen(len(src)) &&
  99  //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
 100  func encodeBlockBetter(dst, src []byte) (d int) {
 101  	race.ReadSlice(src)
 102  	race.WriteSlice(dst)
 103  
 104  	const (
 105  		// Use 12 bit table when less than...
 106  		limit12B = 16 << 10
 107  		// Use 10 bit table when less than...
 108  		limit10B = 4 << 10
 109  		// Use 8 bit table when less than...
 110  		limit8B = 512
 111  	)
 112  
 113  	if len(src) > 4<<20 {
 114  		const sz, pool = 589824, 0
 115  		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
 116  		if !ok {
 117  			tmp = &[sz]byte{}
 118  		}
 119  		race.WriteSlice(tmp[:])
 120  		defer encBetterPools[pool].Put(tmp)
 121  		return encodeBetterBlockAsm(dst, src, tmp)
 122  	}
 123  	if len(src) >= limit12B {
 124  		const sz, pool = 589824, 0
 125  		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
 126  		if !ok {
 127  			tmp = &[sz]byte{}
 128  		}
 129  		race.WriteSlice(tmp[:])
 130  		defer encBetterPools[pool].Put(tmp)
 131  
 132  		return encodeBetterBlockAsm4MB(dst, src, tmp)
 133  	}
 134  	if len(src) >= limit10B {
 135  		const sz, pool = 81920, 0
 136  		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
 137  		if !ok {
 138  			tmp = &[sz]byte{}
 139  		}
 140  		race.WriteSlice(tmp[:])
 141  		defer encBetterPools[pool].Put(tmp)
 142  
 143  		return encodeBetterBlockAsm12B(dst, src, tmp)
 144  	}
 145  	if len(src) >= limit8B {
 146  		const sz, pool = 20480, 1
 147  		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
 148  		if !ok {
 149  			tmp = &[sz]byte{}
 150  		}
 151  		race.WriteSlice(tmp[:])
 152  		defer encBetterPools[pool].Put(tmp)
 153  		return encodeBetterBlockAsm10B(dst, src, tmp)
 154  	}
 155  	if len(src) < minNonLiteralBlockSize {
 156  		return 0
 157  	}
 158  
 159  	const sz, pool = 5120, 2
 160  	tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
 161  	if !ok {
 162  		tmp = &[sz]byte{}
 163  	}
 164  	race.WriteSlice(tmp[:])
 165  	defer encBetterPools[pool].Put(tmp)
 166  	return encodeBetterBlockAsm8B(dst, src, tmp)
 167  }
 168  
 169  // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
 170  // assumes that the varint-encoded length of the decompressed bytes has already
 171  // been written.
 172  //
 173  // It also assumes that:
 174  //
 175  //	len(dst) >= MaxEncodedLen(len(src)) &&
 176  //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
 177  func encodeBlockSnappy(dst, src []byte) (d int) {
 178  	race.ReadSlice(src)
 179  	race.WriteSlice(dst)
 180  
 181  	const (
 182  		// Use 12 bit table when less than...
 183  		limit12B = 16 << 10
 184  		// Use 10 bit table when less than...
 185  		limit10B = 4 << 10
 186  		// Use 8 bit table when less than...
 187  		limit8B = 512
 188  	)
 189  	if len(src) > 65536 {
 190  		const sz, pool = 65536, 0
 191  		tmp, ok := encPools[pool].Get().(*[sz]byte)
 192  		if !ok {
 193  			tmp = &[sz]byte{}
 194  		}
 195  		race.WriteSlice(tmp[:])
 196  		defer encPools[pool].Put(tmp)
 197  		return encodeSnappyBlockAsm(dst, src, tmp)
 198  	}
 199  	if len(src) >= limit12B {
 200  		const sz, pool = 65536, 0
 201  		tmp, ok := encPools[pool].Get().(*[sz]byte)
 202  		if !ok {
 203  			tmp = &[sz]byte{}
 204  		}
 205  		race.WriteSlice(tmp[:])
 206  		defer encPools[pool].Put(tmp)
 207  		return encodeSnappyBlockAsm64K(dst, src, tmp)
 208  	}
 209  	if len(src) >= limit10B {
 210  		const sz, pool = 16384, 1
 211  		tmp, ok := encPools[pool].Get().(*[sz]byte)
 212  		if !ok {
 213  			tmp = &[sz]byte{}
 214  		}
 215  		race.WriteSlice(tmp[:])
 216  		defer encPools[pool].Put(tmp)
 217  		return encodeSnappyBlockAsm12B(dst, src, tmp)
 218  	}
 219  	if len(src) >= limit8B {
 220  		const sz, pool = 4096, 2
 221  		tmp, ok := encPools[pool].Get().(*[sz]byte)
 222  		if !ok {
 223  			tmp = &[sz]byte{}
 224  		}
 225  		race.WriteSlice(tmp[:])
 226  		defer encPools[pool].Put(tmp)
 227  		return encodeSnappyBlockAsm10B(dst, src, tmp)
 228  	}
 229  	if len(src) < minNonLiteralBlockSize {
 230  		return 0
 231  	}
 232  	const sz, pool = 1024, 3
 233  	tmp, ok := encPools[pool].Get().(*[sz]byte)
 234  	if !ok {
 235  		tmp = &[sz]byte{}
 236  	}
 237  	race.WriteSlice(tmp[:])
 238  	defer encPools[pool].Put(tmp)
 239  	return encodeSnappyBlockAsm8B(dst, src, tmp)
 240  }
 241  
 242  // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
 243  // assumes that the varint-encoded length of the decompressed bytes has already
 244  // been written.
 245  //
 246  // It also assumes that:
 247  //
 248  //	len(dst) >= MaxEncodedLen(len(src)) &&
 249  //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
 250  func encodeBlockBetterSnappy(dst, src []byte) (d int) {
 251  	race.ReadSlice(src)
 252  	race.WriteSlice(dst)
 253  
 254  	const (
 255  		// Use 12 bit table when less than...
 256  		limit12B = 16 << 10
 257  		// Use 10 bit table when less than...
 258  		limit10B = 4 << 10
 259  		// Use 8 bit table when less than...
 260  		limit8B = 512
 261  	)
 262  	if len(src) > 65536 {
 263  		const sz, pool = 589824, 0
 264  		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
 265  		if !ok {
 266  			tmp = &[sz]byte{}
 267  		}
 268  		race.WriteSlice(tmp[:])
 269  		defer encBetterPools[pool].Put(tmp)
 270  		return encodeSnappyBetterBlockAsm(dst, src, tmp)
 271  	}
 272  
 273  	if len(src) >= limit12B {
 274  		const sz, pool = 294912, 4
 275  		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
 276  		if !ok {
 277  			tmp = &[sz]byte{}
 278  		}
 279  		race.WriteSlice(tmp[:])
 280  		defer encBetterPools[pool].Put(tmp)
 281  
 282  		return encodeSnappyBetterBlockAsm64K(dst, src, tmp)
 283  	}
 284  	if len(src) >= limit10B {
 285  		const sz, pool = 81920, 0
 286  		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
 287  		if !ok {
 288  			tmp = &[sz]byte{}
 289  		}
 290  		race.WriteSlice(tmp[:])
 291  		defer encBetterPools[pool].Put(tmp)
 292  
 293  		return encodeSnappyBetterBlockAsm12B(dst, src, tmp)
 294  	}
 295  	if len(src) >= limit8B {
 296  		const sz, pool = 20480, 1
 297  		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
 298  		if !ok {
 299  			tmp = &[sz]byte{}
 300  		}
 301  		race.WriteSlice(tmp[:])
 302  		defer encBetterPools[pool].Put(tmp)
 303  		return encodeSnappyBetterBlockAsm10B(dst, src, tmp)
 304  	}
 305  	if len(src) < minNonLiteralBlockSize {
 306  		return 0
 307  	}
 308  
 309  	const sz, pool = 5120, 2
 310  	tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
 311  	if !ok {
 312  		tmp = &[sz]byte{}
 313  	}
 314  	race.WriteSlice(tmp[:])
 315  	defer encBetterPools[pool].Put(tmp)
 316  	return encodeSnappyBetterBlockAsm8B(dst, src, tmp)
 317  }
 318