seqdec_amd64.go raw
1 //go:build amd64 && !appengine && !noasm && gc
2 // +build amd64,!appengine,!noasm,gc
3
4 package zstd
5
6 import (
7 "fmt"
8 "io"
9
10 "github.com/klauspost/compress/internal/cpuinfo"
11 )
12
13 type decodeSyncAsmContext struct {
14 llTable []decSymbol
15 mlTable []decSymbol
16 ofTable []decSymbol
17 llState uint64
18 mlState uint64
19 ofState uint64
20 iteration int
21 litRemain int
22 out []byte
23 outPosition int
24 literals []byte
25 litPosition int
26 history []byte
27 windowSize int
28 ll int // set on error (not for all errors, please refer to _generate/gen.go)
29 ml int // set on error (not for all errors, please refer to _generate/gen.go)
30 mo int // set on error (not for all errors, please refer to _generate/gen.go)
31 }
32
33 // sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
34 //
35 // Please refer to seqdec_generic.go for the reference implementation.
36 //
37 //go:noescape
38 func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
39
40 // sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
41 //
42 //go:noescape
43 func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
44
45 // sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
46 //
47 //go:noescape
48 func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
49
50 // sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
51 //
52 //go:noescape
53 func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
54
55 // decode sequences from the stream with the provided history but without a dictionary.
56 func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
57 if len(s.dict) > 0 {
58 return false, nil
59 }
60 if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
61 return false, nil
62 }
63
64 // FIXME: Using unsafe memory copies leads to rare, random crashes
65 // with fuzz testing. It is therefore disabled for now.
66 const useSafe = true
67 /*
68 useSafe := false
69 if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
70 useSafe = true
71 }
72 if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
73 useSafe = true
74 }
75 if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
76 useSafe = true
77 }
78 */
79
80 br := s.br
81
82 maxBlockSize := min(s.windowSize, maxCompressedBlockSize)
83
84 ctx := decodeSyncAsmContext{
85 llTable: s.litLengths.fse.dt[:maxTablesize],
86 mlTable: s.matchLengths.fse.dt[:maxTablesize],
87 ofTable: s.offsets.fse.dt[:maxTablesize],
88 llState: uint64(s.litLengths.state.state),
89 mlState: uint64(s.matchLengths.state.state),
90 ofState: uint64(s.offsets.state.state),
91 iteration: s.nSeqs - 1,
92 litRemain: len(s.literals),
93 out: s.out,
94 outPosition: len(s.out),
95 literals: s.literals,
96 windowSize: s.windowSize,
97 history: hist,
98 }
99
100 s.seqSize = 0
101 startSize := len(s.out)
102
103 var errCode int
104 if cpuinfo.HasBMI2() {
105 if useSafe {
106 errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx)
107 } else {
108 errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
109 }
110 } else {
111 if useSafe {
112 errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx)
113 } else {
114 errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
115 }
116 }
117 switch errCode {
118 case noError:
119 break
120
121 case errorMatchLenOfsMismatch:
122 return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml)
123
124 case errorMatchLenTooBig:
125 return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml)
126
127 case errorMatchOffTooBig:
128 return true, fmt.Errorf("match offset (%d) bigger than current history (%d)",
129 ctx.mo, ctx.outPosition+len(hist)-startSize)
130
131 case errorNotEnoughLiterals:
132 return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
133 ctx.ll, ctx.litRemain+ctx.ll)
134
135 case errorOverread:
136 return true, io.ErrUnexpectedEOF
137
138 case errorNotEnoughSpace:
139 size := ctx.outPosition + ctx.ll + ctx.ml
140 if debugDecoder {
141 println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
142 }
143 return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
144
145 default:
146 return true, fmt.Errorf("sequenceDecs_decode returned erroneous code %d", errCode)
147 }
148
149 s.seqSize += ctx.litRemain
150 if s.seqSize > maxBlockSize {
151 return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
152 }
153 err := br.close()
154 if err != nil {
155 printf("Closing sequences: %v, %+v\n", err, *br)
156 return true, err
157 }
158
159 s.literals = s.literals[ctx.litPosition:]
160 t := ctx.outPosition
161 s.out = s.out[:t]
162
163 // Add final literals
164 s.out = append(s.out, s.literals...)
165 if debugDecoder {
166 t += len(s.literals)
167 if t != len(s.out) {
168 panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t))
169 }
170 }
171
172 return true, nil
173 }
174
175 // --------------------------------------------------------------------------------
176
177 type decodeAsmContext struct {
178 llTable []decSymbol
179 mlTable []decSymbol
180 ofTable []decSymbol
181 llState uint64
182 mlState uint64
183 ofState uint64
184 iteration int
185 seqs []seqVals
186 litRemain int
187 }
188
189 const noError = 0
190
191 // error reported when mo == 0 && ml > 0
192 const errorMatchLenOfsMismatch = 1
193
194 // error reported when ml > maxMatchLen
195 const errorMatchLenTooBig = 2
196
197 // error reported when mo > available history or mo > s.windowSize
198 const errorMatchOffTooBig = 3
199
200 // error reported when the sum of literal lengths exeeceds the literal buffer size
201 const errorNotEnoughLiterals = 4
202
203 // error reported when capacity of `out` is too small
204 const errorNotEnoughSpace = 5
205
206 // error reported when bits are overread.
207 const errorOverread = 6
208
209 // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
210 //
211 // Please refer to seqdec_generic.go for the reference implementation.
212 //
213 //go:noescape
214 func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
215
216 // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
217 //
218 // Please refer to seqdec_generic.go for the reference implementation.
219 //
220 //go:noescape
221 func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
222
223 // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
224 //
225 //go:noescape
226 func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
227
228 // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
229 //
230 //go:noescape
231 func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
232
233 // decode sequences from the stream without the provided history.
234 func (s *sequenceDecs) decode(seqs []seqVals) error {
235 br := s.br
236
237 maxBlockSize := min(s.windowSize, maxCompressedBlockSize)
238
239 ctx := decodeAsmContext{
240 llTable: s.litLengths.fse.dt[:maxTablesize],
241 mlTable: s.matchLengths.fse.dt[:maxTablesize],
242 ofTable: s.offsets.fse.dt[:maxTablesize],
243 llState: uint64(s.litLengths.state.state),
244 mlState: uint64(s.matchLengths.state.state),
245 ofState: uint64(s.offsets.state.state),
246 seqs: seqs,
247 iteration: len(seqs) - 1,
248 litRemain: len(s.literals),
249 }
250
251 if debugDecoder {
252 println("decode: decoding", len(seqs), "sequences", br.remain(), "bits remain on stream")
253 }
254
255 s.seqSize = 0
256 lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
257 var errCode int
258 if cpuinfo.HasBMI2() {
259 if lte56bits {
260 errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx)
261 } else {
262 errCode = sequenceDecs_decode_bmi2(s, br, &ctx)
263 }
264 } else {
265 if lte56bits {
266 errCode = sequenceDecs_decode_56_amd64(s, br, &ctx)
267 } else {
268 errCode = sequenceDecs_decode_amd64(s, br, &ctx)
269 }
270 }
271 if errCode != 0 {
272 i := len(seqs) - ctx.iteration - 1
273 switch errCode {
274 case errorMatchLenOfsMismatch:
275 ml := ctx.seqs[i].ml
276 return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
277
278 case errorMatchLenTooBig:
279 ml := ctx.seqs[i].ml
280 return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
281
282 case errorNotEnoughLiterals:
283 ll := ctx.seqs[i].ll
284 return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
285 case errorOverread:
286 return io.ErrUnexpectedEOF
287 }
288
289 return fmt.Errorf("sequenceDecs_decode_amd64 returned erroneous code %d", errCode)
290 }
291
292 if ctx.litRemain < 0 {
293 return fmt.Errorf("literal count is too big: total available %d, total requested %d",
294 len(s.literals), len(s.literals)-ctx.litRemain)
295 }
296
297 s.seqSize += ctx.litRemain
298 if s.seqSize > maxBlockSize {
299 return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
300 }
301 if debugDecoder {
302 println("decode: ", br.remain(), "bits remain on stream. code:", errCode)
303 }
304 err := br.close()
305 if err != nil {
306 printf("Closing sequences: %v, %+v\n", err, *br)
307 }
308 return err
309 }
310
311 // --------------------------------------------------------------------------------
312
313 type executeAsmContext struct {
314 seqs []seqVals
315 seqIndex int
316 out []byte
317 history []byte
318 literals []byte
319 outPosition int
320 litPosition int
321 windowSize int
322 }
323
324 // sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm.
325 //
326 // Returns false if a match offset is too big.
327 //
328 // Please refer to seqdec_generic.go for the reference implementation.
329 //
330 //go:noescape
331 func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
332
333 // Same as above, but with safe memcopies
334 //
335 //go:noescape
336 func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
337
338 // executeSimple handles cases when dictionary is not used.
339 func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
340 // Ensure we have enough output size...
341 if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) {
342 addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc
343 s.out = append(s.out, make([]byte, addBytes)...)
344 s.out = s.out[:len(s.out)-addBytes]
345 }
346
347 if debugDecoder {
348 printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
349 }
350
351 var t = len(s.out)
352 out := s.out[:t+s.seqSize]
353
354 ctx := executeAsmContext{
355 seqs: seqs,
356 seqIndex: 0,
357 out: out,
358 history: hist,
359 outPosition: t,
360 litPosition: 0,
361 literals: s.literals,
362 windowSize: s.windowSize,
363 }
364 var ok bool
365 if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
366 ok = sequenceDecs_executeSimple_safe_amd64(&ctx)
367 } else {
368 ok = sequenceDecs_executeSimple_amd64(&ctx)
369 }
370 if !ok {
371 return fmt.Errorf("match offset (%d) bigger than current history (%d)",
372 seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
373 }
374 s.literals = s.literals[ctx.litPosition:]
375 t = ctx.outPosition
376
377 // Add final literals
378 copy(out[t:], s.literals)
379 if debugDecoder {
380 t += len(s.literals)
381 if t != len(out) {
382 panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
383 }
384 }
385 s.out = out
386
387 return nil
388 }
389