seqdec_amd64.go raw

   1  //go:build amd64 && !appengine && !noasm && gc
   2  // +build amd64,!appengine,!noasm,gc
   3  
   4  package zstd
   5  
   6  import (
   7  	"fmt"
   8  	"io"
   9  
  10  	"github.com/klauspost/compress/internal/cpuinfo"
  11  )
  12  
  13  type decodeSyncAsmContext struct {
  14  	llTable     []decSymbol
  15  	mlTable     []decSymbol
  16  	ofTable     []decSymbol
  17  	llState     uint64
  18  	mlState     uint64
  19  	ofState     uint64
  20  	iteration   int
  21  	litRemain   int
  22  	out         []byte
  23  	outPosition int
  24  	literals    []byte
  25  	litPosition int
  26  	history     []byte
  27  	windowSize  int
  28  	ll          int // set on error (not for all errors, please refer to _generate/gen.go)
  29  	ml          int // set on error (not for all errors, please refer to _generate/gen.go)
  30  	mo          int // set on error (not for all errors, please refer to _generate/gen.go)
  31  }
  32  
  33  // sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
  34  //
  35  // Please refer to seqdec_generic.go for the reference implementation.
  36  //
  37  //go:noescape
  38  func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
  39  
  40  // sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
  41  //
  42  //go:noescape
  43  func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
  44  
  45  // sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
  46  //
  47  //go:noescape
  48  func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
  49  
  50  // sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
  51  //
  52  //go:noescape
  53  func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
  54  
  55  // decode sequences from the stream with the provided history but without a dictionary.
  56  func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
  57  	if len(s.dict) > 0 {
  58  		return false, nil
  59  	}
  60  	if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
  61  		return false, nil
  62  	}
  63  
  64  	// FIXME: Using unsafe memory copies leads to rare, random crashes
  65  	// with fuzz testing. It is therefore disabled for now.
  66  	const useSafe = true
  67  	/*
  68  		useSafe := false
  69  		if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
  70  			useSafe = true
  71  		}
  72  		if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
  73  			useSafe = true
  74  		}
  75  		if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
  76  			useSafe = true
  77  		}
  78  	*/
  79  
  80  	br := s.br
  81  
  82  	maxBlockSize := min(s.windowSize, maxCompressedBlockSize)
  83  
  84  	ctx := decodeSyncAsmContext{
  85  		llTable:     s.litLengths.fse.dt[:maxTablesize],
  86  		mlTable:     s.matchLengths.fse.dt[:maxTablesize],
  87  		ofTable:     s.offsets.fse.dt[:maxTablesize],
  88  		llState:     uint64(s.litLengths.state.state),
  89  		mlState:     uint64(s.matchLengths.state.state),
  90  		ofState:     uint64(s.offsets.state.state),
  91  		iteration:   s.nSeqs - 1,
  92  		litRemain:   len(s.literals),
  93  		out:         s.out,
  94  		outPosition: len(s.out),
  95  		literals:    s.literals,
  96  		windowSize:  s.windowSize,
  97  		history:     hist,
  98  	}
  99  
 100  	s.seqSize = 0
 101  	startSize := len(s.out)
 102  
 103  	var errCode int
 104  	if cpuinfo.HasBMI2() {
 105  		if useSafe {
 106  			errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx)
 107  		} else {
 108  			errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
 109  		}
 110  	} else {
 111  		if useSafe {
 112  			errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx)
 113  		} else {
 114  			errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
 115  		}
 116  	}
 117  	switch errCode {
 118  	case noError:
 119  		break
 120  
 121  	case errorMatchLenOfsMismatch:
 122  		return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml)
 123  
 124  	case errorMatchLenTooBig:
 125  		return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml)
 126  
 127  	case errorMatchOffTooBig:
 128  		return true, fmt.Errorf("match offset (%d) bigger than current history (%d)",
 129  			ctx.mo, ctx.outPosition+len(hist)-startSize)
 130  
 131  	case errorNotEnoughLiterals:
 132  		return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
 133  			ctx.ll, ctx.litRemain+ctx.ll)
 134  
 135  	case errorOverread:
 136  		return true, io.ErrUnexpectedEOF
 137  
 138  	case errorNotEnoughSpace:
 139  		size := ctx.outPosition + ctx.ll + ctx.ml
 140  		if debugDecoder {
 141  			println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
 142  		}
 143  		return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 144  
 145  	default:
 146  		return true, fmt.Errorf("sequenceDecs_decode returned erroneous code %d", errCode)
 147  	}
 148  
 149  	s.seqSize += ctx.litRemain
 150  	if s.seqSize > maxBlockSize {
 151  		return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 152  	}
 153  	err := br.close()
 154  	if err != nil {
 155  		printf("Closing sequences: %v, %+v\n", err, *br)
 156  		return true, err
 157  	}
 158  
 159  	s.literals = s.literals[ctx.litPosition:]
 160  	t := ctx.outPosition
 161  	s.out = s.out[:t]
 162  
 163  	// Add final literals
 164  	s.out = append(s.out, s.literals...)
 165  	if debugDecoder {
 166  		t += len(s.literals)
 167  		if t != len(s.out) {
 168  			panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t))
 169  		}
 170  	}
 171  
 172  	return true, nil
 173  }
 174  
 175  // --------------------------------------------------------------------------------
 176  
 177  type decodeAsmContext struct {
 178  	llTable   []decSymbol
 179  	mlTable   []decSymbol
 180  	ofTable   []decSymbol
 181  	llState   uint64
 182  	mlState   uint64
 183  	ofState   uint64
 184  	iteration int
 185  	seqs      []seqVals
 186  	litRemain int
 187  }
 188  
 189  const noError = 0
 190  
 191  // error reported when mo == 0 && ml > 0
 192  const errorMatchLenOfsMismatch = 1
 193  
 194  // error reported when ml > maxMatchLen
 195  const errorMatchLenTooBig = 2
 196  
 197  // error reported when mo > available history or mo > s.windowSize
 198  const errorMatchOffTooBig = 3
 199  
 200  // error reported when the sum of literal lengths exeeceds the literal buffer size
 201  const errorNotEnoughLiterals = 4
 202  
 203  // error reported when capacity of `out` is too small
 204  const errorNotEnoughSpace = 5
 205  
 206  // error reported when bits are overread.
 207  const errorOverread = 6
 208  
 209  // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
 210  //
 211  // Please refer to seqdec_generic.go for the reference implementation.
 212  //
 213  //go:noescape
 214  func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 215  
 216  // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
 217  //
 218  // Please refer to seqdec_generic.go for the reference implementation.
 219  //
 220  //go:noescape
 221  func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 222  
 223  // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
 224  //
 225  //go:noescape
 226  func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 227  
 228  // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
 229  //
 230  //go:noescape
 231  func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 232  
 233  // decode sequences from the stream without the provided history.
 234  func (s *sequenceDecs) decode(seqs []seqVals) error {
 235  	br := s.br
 236  
 237  	maxBlockSize := min(s.windowSize, maxCompressedBlockSize)
 238  
 239  	ctx := decodeAsmContext{
 240  		llTable:   s.litLengths.fse.dt[:maxTablesize],
 241  		mlTable:   s.matchLengths.fse.dt[:maxTablesize],
 242  		ofTable:   s.offsets.fse.dt[:maxTablesize],
 243  		llState:   uint64(s.litLengths.state.state),
 244  		mlState:   uint64(s.matchLengths.state.state),
 245  		ofState:   uint64(s.offsets.state.state),
 246  		seqs:      seqs,
 247  		iteration: len(seqs) - 1,
 248  		litRemain: len(s.literals),
 249  	}
 250  
 251  	if debugDecoder {
 252  		println("decode: decoding", len(seqs), "sequences", br.remain(), "bits remain on stream")
 253  	}
 254  
 255  	s.seqSize = 0
 256  	lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
 257  	var errCode int
 258  	if cpuinfo.HasBMI2() {
 259  		if lte56bits {
 260  			errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx)
 261  		} else {
 262  			errCode = sequenceDecs_decode_bmi2(s, br, &ctx)
 263  		}
 264  	} else {
 265  		if lte56bits {
 266  			errCode = sequenceDecs_decode_56_amd64(s, br, &ctx)
 267  		} else {
 268  			errCode = sequenceDecs_decode_amd64(s, br, &ctx)
 269  		}
 270  	}
 271  	if errCode != 0 {
 272  		i := len(seqs) - ctx.iteration - 1
 273  		switch errCode {
 274  		case errorMatchLenOfsMismatch:
 275  			ml := ctx.seqs[i].ml
 276  			return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
 277  
 278  		case errorMatchLenTooBig:
 279  			ml := ctx.seqs[i].ml
 280  			return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
 281  
 282  		case errorNotEnoughLiterals:
 283  			ll := ctx.seqs[i].ll
 284  			return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
 285  		case errorOverread:
 286  			return io.ErrUnexpectedEOF
 287  		}
 288  
 289  		return fmt.Errorf("sequenceDecs_decode_amd64 returned erroneous code %d", errCode)
 290  	}
 291  
 292  	if ctx.litRemain < 0 {
 293  		return fmt.Errorf("literal count is too big: total available %d, total requested %d",
 294  			len(s.literals), len(s.literals)-ctx.litRemain)
 295  	}
 296  
 297  	s.seqSize += ctx.litRemain
 298  	if s.seqSize > maxBlockSize {
 299  		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 300  	}
 301  	if debugDecoder {
 302  		println("decode: ", br.remain(), "bits remain on stream. code:", errCode)
 303  	}
 304  	err := br.close()
 305  	if err != nil {
 306  		printf("Closing sequences: %v, %+v\n", err, *br)
 307  	}
 308  	return err
 309  }
 310  
 311  // --------------------------------------------------------------------------------
 312  
 313  type executeAsmContext struct {
 314  	seqs        []seqVals
 315  	seqIndex    int
 316  	out         []byte
 317  	history     []byte
 318  	literals    []byte
 319  	outPosition int
 320  	litPosition int
 321  	windowSize  int
 322  }
 323  
 324  // sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm.
 325  //
 326  // Returns false if a match offset is too big.
 327  //
 328  // Please refer to seqdec_generic.go for the reference implementation.
 329  //
 330  //go:noescape
 331  func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
 332  
 333  // Same as above, but with safe memcopies
 334  //
 335  //go:noescape
 336  func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
 337  
 338  // executeSimple handles cases when dictionary is not used.
 339  func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
 340  	// Ensure we have enough output size...
 341  	if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) {
 342  		addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc
 343  		s.out = append(s.out, make([]byte, addBytes)...)
 344  		s.out = s.out[:len(s.out)-addBytes]
 345  	}
 346  
 347  	if debugDecoder {
 348  		printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
 349  	}
 350  
 351  	var t = len(s.out)
 352  	out := s.out[:t+s.seqSize]
 353  
 354  	ctx := executeAsmContext{
 355  		seqs:        seqs,
 356  		seqIndex:    0,
 357  		out:         out,
 358  		history:     hist,
 359  		outPosition: t,
 360  		litPosition: 0,
 361  		literals:    s.literals,
 362  		windowSize:  s.windowSize,
 363  	}
 364  	var ok bool
 365  	if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
 366  		ok = sequenceDecs_executeSimple_safe_amd64(&ctx)
 367  	} else {
 368  		ok = sequenceDecs_executeSimple_amd64(&ctx)
 369  	}
 370  	if !ok {
 371  		return fmt.Errorf("match offset (%d) bigger than current history (%d)",
 372  			seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
 373  	}
 374  	s.literals = s.literals[ctx.litPosition:]
 375  	t = ctx.outPosition
 376  
 377  	// Add final literals
 378  	copy(out[t:], s.literals)
 379  	if debugDecoder {
 380  		t += len(s.literals)
 381  		if t != len(out) {
 382  			panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
 383  		}
 384  	}
 385  	s.out = out
 386  
 387  	return nil
 388  }
 389