enc_dfast.go raw
1 // Copyright 2019+ Klaus Post. All rights reserved.
2 // License information can be found in the LICENSE file.
3 // Based on work by Yann Collet, released under BSD License.
4
5 package zstd
6
7 import "fmt"
8
9 const (
10 dFastLongTableBits = 17 // Bits used in the long match table
11 dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
12 dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
13 dFastLongLen = 8 // Bytes used for table hash
14
15 dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
16 dLongTableShardSize = dFastLongTableSize / dLongTableShardCnt // Size of an individual shard
17
18 dFastShortTableBits = tableBits // Bits used in the short match table
19 dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
20 dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
21 dFastShortLen = 5 // Bytes used for table hash
22
23 )
24
25 type doubleFastEncoder struct {
26 fastEncoder
27 longTable [dFastLongTableSize]tableEntry
28 }
29
30 type doubleFastEncoderDict struct {
31 fastEncoderDict
32 longTable [dFastLongTableSize]tableEntry
33 dictLongTable []tableEntry
34 longTableShardDirty [dLongTableShardCnt]bool
35 }
36
37 // Encode mimmics functionality in zstd_dfast.c
38 func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
39 const (
40 // Input margin is the number of bytes we read (8)
41 // and the maximum we will read ahead (2)
42 inputMargin = 8 + 2
43 minNonLiteralBlockSize = 16
44 )
45
46 // Protect against e.cur wraparound.
47 for e.cur >= e.bufferReset-int32(len(e.hist)) {
48 if len(e.hist) == 0 {
49 e.table = [dFastShortTableSize]tableEntry{}
50 e.longTable = [dFastLongTableSize]tableEntry{}
51 e.cur = e.maxMatchOff
52 break
53 }
54 // Shift down everything in the table that isn't already too far away.
55 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
56 for i := range e.table[:] {
57 v := e.table[i].offset
58 if v < minOff {
59 v = 0
60 } else {
61 v = v - e.cur + e.maxMatchOff
62 }
63 e.table[i].offset = v
64 }
65 for i := range e.longTable[:] {
66 v := e.longTable[i].offset
67 if v < minOff {
68 v = 0
69 } else {
70 v = v - e.cur + e.maxMatchOff
71 }
72 e.longTable[i].offset = v
73 }
74 e.cur = e.maxMatchOff
75 break
76 }
77
78 s := e.addBlock(src)
79 blk.size = len(src)
80 if len(src) < minNonLiteralBlockSize {
81 blk.extraLits = len(src)
82 blk.literals = blk.literals[:len(src)]
83 copy(blk.literals, src)
84 return
85 }
86
87 // Override src
88 src = e.hist
89 sLimit := int32(len(src)) - inputMargin
90 // stepSize is the number of bytes to skip on every main loop iteration.
91 // It should be >= 1.
92 const stepSize = 1
93
94 const kSearchStrength = 8
95
96 // nextEmit is where in src the next emitLiteral should start from.
97 nextEmit := s
98 cv := load6432(src, s)
99
100 // Relative offsets
101 offset1 := int32(blk.recentOffsets[0])
102 offset2 := int32(blk.recentOffsets[1])
103
104 addLiterals := func(s *seq, until int32) {
105 if until == nextEmit {
106 return
107 }
108 blk.literals = append(blk.literals, src[nextEmit:until]...)
109 s.litLen = uint32(until - nextEmit)
110 }
111 if debugEncoder {
112 println("recent offsets:", blk.recentOffsets)
113 }
114
115 encodeLoop:
116 for {
117 var t int32
118 // We allow the encoder to optionally turn off repeat offsets across blocks
119 canRepeat := len(blk.sequences) > 2
120
121 for {
122 if debugAsserts && canRepeat && offset1 == 0 {
123 panic("offset0 was 0")
124 }
125
126 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
127 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
128 candidateL := e.longTable[nextHashL]
129 candidateS := e.table[nextHashS]
130
131 const repOff = 1
132 repIndex := s - offset1 + repOff
133 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
134 e.longTable[nextHashL] = entry
135 e.table[nextHashS] = entry
136
137 if canRepeat {
138 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
139 // Consider history as well.
140 var seq seq
141 length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
142
143 seq.matchLen = uint32(length - zstdMinMatch)
144
145 // We might be able to match backwards.
146 // Extend as long as we can.
147 start := s + repOff
148 // We end the search early, so we don't risk 0 literals
149 // and have to do special offset treatment.
150 startLimit := nextEmit + 1
151
152 tMin := max(s-e.maxMatchOff, 0)
153 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
154 repIndex--
155 start--
156 seq.matchLen++
157 }
158 addLiterals(&seq, start)
159
160 // rep 0
161 seq.offset = 1
162 if debugSequences {
163 println("repeat sequence", seq, "next s:", s)
164 }
165 blk.sequences = append(blk.sequences, seq)
166 s += length + repOff
167 nextEmit = s
168 if s >= sLimit {
169 if debugEncoder {
170 println("repeat ended", s, length)
171
172 }
173 break encodeLoop
174 }
175 cv = load6432(src, s)
176 continue
177 }
178 }
179 // Find the offsets of our two matches.
180 coffsetL := s - (candidateL.offset - e.cur)
181 coffsetS := s - (candidateS.offset - e.cur)
182
183 // Check if we have a long match.
184 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
185 // Found a long match, likely at least 8 bytes.
186 // Reference encoder checks all 8 bytes, we only check 4,
187 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
188 t = candidateL.offset - e.cur
189 if debugAsserts && s <= t {
190 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
191 }
192 if debugAsserts && s-t > e.maxMatchOff {
193 panic("s - t >e.maxMatchOff")
194 }
195 if debugMatches {
196 println("long match")
197 }
198 break
199 }
200
201 // Check if we have a short match.
202 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
203 // found a regular match
204 // See if we can find a long match at s+1
205 const checkAt = 1
206 cv := load6432(src, s+checkAt)
207 nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
208 candidateL = e.longTable[nextHashL]
209 coffsetL = s - (candidateL.offset - e.cur) + checkAt
210
211 // We can store it, since we have at least a 4 byte match.
212 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
213 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
214 // Found a long match, likely at least 8 bytes.
215 // Reference encoder checks all 8 bytes, we only check 4,
216 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
217 t = candidateL.offset - e.cur
218 s += checkAt
219 if debugMatches {
220 println("long match (after short)")
221 }
222 break
223 }
224
225 t = candidateS.offset - e.cur
226 if debugAsserts && s <= t {
227 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
228 }
229 if debugAsserts && s-t > e.maxMatchOff {
230 panic("s - t >e.maxMatchOff")
231 }
232 if debugAsserts && t < 0 {
233 panic("t<0")
234 }
235 if debugMatches {
236 println("short match")
237 }
238 break
239 }
240
241 // No match found, move forward in input.
242 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
243 if s >= sLimit {
244 break encodeLoop
245 }
246 cv = load6432(src, s)
247 }
248
249 // A 4-byte match has been found. Update recent offsets.
250 // We'll later see if more than 4 bytes.
251 offset2 = offset1
252 offset1 = s - t
253
254 if debugAsserts && s <= t {
255 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
256 }
257
258 if debugAsserts && canRepeat && int(offset1) > len(src) {
259 panic("invalid offset")
260 }
261
262 // Extend the 4-byte match as long as possible.
263 l := e.matchlen(s+4, t+4, src) + 4
264
265 // Extend backwards
266 tMin := max(s-e.maxMatchOff, 0)
267 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
268 s--
269 t--
270 l++
271 }
272
273 // Write our sequence
274 var seq seq
275 seq.litLen = uint32(s - nextEmit)
276 seq.matchLen = uint32(l - zstdMinMatch)
277 if seq.litLen > 0 {
278 blk.literals = append(blk.literals, src[nextEmit:s]...)
279 }
280 seq.offset = uint32(s-t) + 3
281 s += l
282 if debugSequences {
283 println("sequence", seq, "next s:", s)
284 }
285 blk.sequences = append(blk.sequences, seq)
286 nextEmit = s
287 if s >= sLimit {
288 break encodeLoop
289 }
290
291 // Index match start+1 (long) and start+2 (short)
292 index0 := s - l + 1
293 // Index match end-2 (long) and end-1 (short)
294 index1 := s - 2
295
296 cv0 := load6432(src, index0)
297 cv1 := load6432(src, index1)
298 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
299 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
300 e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
301 e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
302 cv0 >>= 8
303 cv1 >>= 8
304 te0.offset++
305 te1.offset++
306 te0.val = uint32(cv0)
307 te1.val = uint32(cv1)
308 e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
309 e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1
310
311 cv = load6432(src, s)
312
313 if !canRepeat {
314 continue
315 }
316
317 // Check offset 2
318 for {
319 o2 := s - offset2
320 if load3232(src, o2) != uint32(cv) {
321 // Do regular search
322 break
323 }
324
325 // Store this, since we have it.
326 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
327 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
328
329 // We have at least 4 byte match.
330 // No need to check backwards. We come straight from a match
331 l := 4 + e.matchlen(s+4, o2+4, src)
332
333 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
334 e.longTable[nextHashL] = entry
335 e.table[nextHashS] = entry
336 seq.matchLen = uint32(l) - zstdMinMatch
337 seq.litLen = 0
338
339 // Since litlen is always 0, this is offset 1.
340 seq.offset = 1
341 s += l
342 nextEmit = s
343 if debugSequences {
344 println("sequence", seq, "next s:", s)
345 }
346 blk.sequences = append(blk.sequences, seq)
347
348 // Swap offset 1 and 2.
349 offset1, offset2 = offset2, offset1
350 if s >= sLimit {
351 // Finished
352 break encodeLoop
353 }
354 cv = load6432(src, s)
355 }
356 }
357
358 if int(nextEmit) < len(src) {
359 blk.literals = append(blk.literals, src[nextEmit:]...)
360 blk.extraLits = len(src) - int(nextEmit)
361 }
362 blk.recentOffsets[0] = uint32(offset1)
363 blk.recentOffsets[1] = uint32(offset2)
364 if debugEncoder {
365 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
366 }
367 }
368
369 // EncodeNoHist will encode a block with no history and no following blocks.
370 // Most notable difference is that src will not be copied for history and
371 // we do not need to check for max match length.
372 func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
373 const (
374 // Input margin is the number of bytes we read (8)
375 // and the maximum we will read ahead (2)
376 inputMargin = 8 + 2
377 minNonLiteralBlockSize = 16
378 )
379
380 // Protect against e.cur wraparound.
381 if e.cur >= e.bufferReset {
382 for i := range e.table[:] {
383 e.table[i] = tableEntry{}
384 }
385 for i := range e.longTable[:] {
386 e.longTable[i] = tableEntry{}
387 }
388 e.cur = e.maxMatchOff
389 }
390
391 s := int32(0)
392 blk.size = len(src)
393 if len(src) < minNonLiteralBlockSize {
394 blk.extraLits = len(src)
395 blk.literals = blk.literals[:len(src)]
396 copy(blk.literals, src)
397 return
398 }
399
400 // Override src
401 sLimit := int32(len(src)) - inputMargin
402 // stepSize is the number of bytes to skip on every main loop iteration.
403 // It should be >= 1.
404 const stepSize = 1
405
406 const kSearchStrength = 8
407
408 // nextEmit is where in src the next emitLiteral should start from.
409 nextEmit := s
410 cv := load6432(src, s)
411
412 // Relative offsets
413 offset1 := int32(blk.recentOffsets[0])
414 offset2 := int32(blk.recentOffsets[1])
415
416 addLiterals := func(s *seq, until int32) {
417 if until == nextEmit {
418 return
419 }
420 blk.literals = append(blk.literals, src[nextEmit:until]...)
421 s.litLen = uint32(until - nextEmit)
422 }
423 if debugEncoder {
424 println("recent offsets:", blk.recentOffsets)
425 }
426
427 encodeLoop:
428 for {
429 var t int32
430 for {
431
432 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
433 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
434 candidateL := e.longTable[nextHashL]
435 candidateS := e.table[nextHashS]
436
437 const repOff = 1
438 repIndex := s - offset1 + repOff
439 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
440 e.longTable[nextHashL] = entry
441 e.table[nextHashS] = entry
442
443 if len(blk.sequences) > 2 {
444 if load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
445 // Consider history as well.
446 var seq seq
447 //length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
448 length := 4 + int32(matchLen(src[s+4+repOff:], src[repIndex+4:]))
449
450 seq.matchLen = uint32(length - zstdMinMatch)
451
452 // We might be able to match backwards.
453 // Extend as long as we can.
454 start := s + repOff
455 // We end the search early, so we don't risk 0 literals
456 // and have to do special offset treatment.
457 startLimit := nextEmit + 1
458
459 tMin := max(s-e.maxMatchOff, 0)
460 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] {
461 repIndex--
462 start--
463 seq.matchLen++
464 }
465 addLiterals(&seq, start)
466
467 // rep 0
468 seq.offset = 1
469 if debugSequences {
470 println("repeat sequence", seq, "next s:", s)
471 }
472 blk.sequences = append(blk.sequences, seq)
473 s += length + repOff
474 nextEmit = s
475 if s >= sLimit {
476 if debugEncoder {
477 println("repeat ended", s, length)
478
479 }
480 break encodeLoop
481 }
482 cv = load6432(src, s)
483 continue
484 }
485 }
486 // Find the offsets of our two matches.
487 coffsetL := s - (candidateL.offset - e.cur)
488 coffsetS := s - (candidateS.offset - e.cur)
489
490 // Check if we have a long match.
491 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
492 // Found a long match, likely at least 8 bytes.
493 // Reference encoder checks all 8 bytes, we only check 4,
494 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
495 t = candidateL.offset - e.cur
496 if debugAsserts && s <= t {
497 panic(fmt.Sprintf("s (%d) <= t (%d). cur: %d", s, t, e.cur))
498 }
499 if debugAsserts && s-t > e.maxMatchOff {
500 panic("s - t >e.maxMatchOff")
501 }
502 if debugMatches {
503 println("long match")
504 }
505 break
506 }
507
508 // Check if we have a short match.
509 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
510 // found a regular match
511 // See if we can find a long match at s+1
512 const checkAt = 1
513 cv := load6432(src, s+checkAt)
514 nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
515 candidateL = e.longTable[nextHashL]
516 coffsetL = s - (candidateL.offset - e.cur) + checkAt
517
518 // We can store it, since we have at least a 4 byte match.
519 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
520 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
521 // Found a long match, likely at least 8 bytes.
522 // Reference encoder checks all 8 bytes, we only check 4,
523 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
524 t = candidateL.offset - e.cur
525 s += checkAt
526 if debugMatches {
527 println("long match (after short)")
528 }
529 break
530 }
531
532 t = candidateS.offset - e.cur
533 if debugAsserts && s <= t {
534 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
535 }
536 if debugAsserts && s-t > e.maxMatchOff {
537 panic("s - t >e.maxMatchOff")
538 }
539 if debugAsserts && t < 0 {
540 panic("t<0")
541 }
542 if debugMatches {
543 println("short match")
544 }
545 break
546 }
547
548 // No match found, move forward in input.
549 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
550 if s >= sLimit {
551 break encodeLoop
552 }
553 cv = load6432(src, s)
554 }
555
556 // A 4-byte match has been found. Update recent offsets.
557 // We'll later see if more than 4 bytes.
558 offset2 = offset1
559 offset1 = s - t
560
561 if debugAsserts && s <= t {
562 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
563 }
564
565 // Extend the 4-byte match as long as possible.
566 //l := e.matchlen(s+4, t+4, src) + 4
567 l := int32(matchLen(src[s+4:], src[t+4:])) + 4
568
569 // Extend backwards
570 tMin := max(s-e.maxMatchOff, 0)
571 for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
572 s--
573 t--
574 l++
575 }
576
577 // Write our sequence
578 var seq seq
579 seq.litLen = uint32(s - nextEmit)
580 seq.matchLen = uint32(l - zstdMinMatch)
581 if seq.litLen > 0 {
582 blk.literals = append(blk.literals, src[nextEmit:s]...)
583 }
584 seq.offset = uint32(s-t) + 3
585 s += l
586 if debugSequences {
587 println("sequence", seq, "next s:", s)
588 }
589 blk.sequences = append(blk.sequences, seq)
590 nextEmit = s
591 if s >= sLimit {
592 break encodeLoop
593 }
594
595 // Index match start+1 (long) and start+2 (short)
596 index0 := s - l + 1
597 // Index match end-2 (long) and end-1 (short)
598 index1 := s - 2
599
600 cv0 := load6432(src, index0)
601 cv1 := load6432(src, index1)
602 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
603 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
604 e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
605 e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
606 cv0 >>= 8
607 cv1 >>= 8
608 te0.offset++
609 te1.offset++
610 te0.val = uint32(cv0)
611 te1.val = uint32(cv1)
612 e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
613 e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1
614
615 cv = load6432(src, s)
616
617 if len(blk.sequences) <= 2 {
618 continue
619 }
620
621 // Check offset 2
622 for {
623 o2 := s - offset2
624 if load3232(src, o2) != uint32(cv) {
625 // Do regular search
626 break
627 }
628
629 // Store this, since we have it.
630 nextHashS := hashLen(cv1>>8, dFastShortTableBits, dFastShortLen)
631 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
632
633 // We have at least 4 byte match.
634 // No need to check backwards. We come straight from a match
635 //l := 4 + e.matchlen(s+4, o2+4, src)
636 l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
637
638 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
639 e.longTable[nextHashL] = entry
640 e.table[nextHashS] = entry
641 seq.matchLen = uint32(l) - zstdMinMatch
642 seq.litLen = 0
643
644 // Since litlen is always 0, this is offset 1.
645 seq.offset = 1
646 s += l
647 nextEmit = s
648 if debugSequences {
649 println("sequence", seq, "next s:", s)
650 }
651 blk.sequences = append(blk.sequences, seq)
652
653 // Swap offset 1 and 2.
654 offset1, offset2 = offset2, offset1
655 if s >= sLimit {
656 // Finished
657 break encodeLoop
658 }
659 cv = load6432(src, s)
660 }
661 }
662
663 if int(nextEmit) < len(src) {
664 blk.literals = append(blk.literals, src[nextEmit:]...)
665 blk.extraLits = len(src) - int(nextEmit)
666 }
667 if debugEncoder {
668 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
669 }
670
671 // We do not store history, so we must offset e.cur to avoid false matches for next user.
672 if e.cur < e.bufferReset {
673 e.cur += int32(len(src))
674 }
675 }
676
677 // Encode will encode the content, with a dictionary if initialized for it.
678 func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
679 const (
680 // Input margin is the number of bytes we read (8)
681 // and the maximum we will read ahead (2)
682 inputMargin = 8 + 2
683 minNonLiteralBlockSize = 16
684 )
685
686 // Protect against e.cur wraparound.
687 for e.cur >= e.bufferReset-int32(len(e.hist)) {
688 if len(e.hist) == 0 {
689 for i := range e.table[:] {
690 e.table[i] = tableEntry{}
691 }
692 for i := range e.longTable[:] {
693 e.longTable[i] = tableEntry{}
694 }
695 e.markAllShardsDirty()
696 e.cur = e.maxMatchOff
697 break
698 }
699 // Shift down everything in the table that isn't already too far away.
700 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
701 for i := range e.table[:] {
702 v := e.table[i].offset
703 if v < minOff {
704 v = 0
705 } else {
706 v = v - e.cur + e.maxMatchOff
707 }
708 e.table[i].offset = v
709 }
710 for i := range e.longTable[:] {
711 v := e.longTable[i].offset
712 if v < minOff {
713 v = 0
714 } else {
715 v = v - e.cur + e.maxMatchOff
716 }
717 e.longTable[i].offset = v
718 }
719 e.markAllShardsDirty()
720 e.cur = e.maxMatchOff
721 break
722 }
723
724 s := e.addBlock(src)
725 blk.size = len(src)
726 if len(src) < minNonLiteralBlockSize {
727 blk.extraLits = len(src)
728 blk.literals = blk.literals[:len(src)]
729 copy(blk.literals, src)
730 return
731 }
732
733 // Override src
734 src = e.hist
735 sLimit := int32(len(src)) - inputMargin
736 // stepSize is the number of bytes to skip on every main loop iteration.
737 // It should be >= 1.
738 const stepSize = 1
739
740 const kSearchStrength = 8
741
742 // nextEmit is where in src the next emitLiteral should start from.
743 nextEmit := s
744 cv := load6432(src, s)
745
746 // Relative offsets
747 offset1 := int32(blk.recentOffsets[0])
748 offset2 := int32(blk.recentOffsets[1])
749
750 addLiterals := func(s *seq, until int32) {
751 if until == nextEmit {
752 return
753 }
754 blk.literals = append(blk.literals, src[nextEmit:until]...)
755 s.litLen = uint32(until - nextEmit)
756 }
757 if debugEncoder {
758 println("recent offsets:", blk.recentOffsets)
759 }
760
761 encodeLoop:
762 for {
763 var t int32
764 // We allow the encoder to optionally turn off repeat offsets across blocks
765 canRepeat := len(blk.sequences) > 2
766
767 for {
768 if debugAsserts && canRepeat && offset1 == 0 {
769 panic("offset0 was 0")
770 }
771
772 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
773 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
774 candidateL := e.longTable[nextHashL]
775 candidateS := e.table[nextHashS]
776
777 const repOff = 1
778 repIndex := s - offset1 + repOff
779 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
780 e.longTable[nextHashL] = entry
781 e.markLongShardDirty(nextHashL)
782 e.table[nextHashS] = entry
783 e.markShardDirty(nextHashS)
784
785 if canRepeat {
786 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
787 // Consider history as well.
788 var seq seq
789 length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
790
791 seq.matchLen = uint32(length - zstdMinMatch)
792
793 // We might be able to match backwards.
794 // Extend as long as we can.
795 start := s + repOff
796 // We end the search early, so we don't risk 0 literals
797 // and have to do special offset treatment.
798 startLimit := nextEmit + 1
799
800 tMin := max(s-e.maxMatchOff, 0)
801 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
802 repIndex--
803 start--
804 seq.matchLen++
805 }
806 addLiterals(&seq, start)
807
808 // rep 0
809 seq.offset = 1
810 if debugSequences {
811 println("repeat sequence", seq, "next s:", s)
812 }
813 blk.sequences = append(blk.sequences, seq)
814 s += length + repOff
815 nextEmit = s
816 if s >= sLimit {
817 if debugEncoder {
818 println("repeat ended", s, length)
819
820 }
821 break encodeLoop
822 }
823 cv = load6432(src, s)
824 continue
825 }
826 }
827 // Find the offsets of our two matches.
828 coffsetL := s - (candidateL.offset - e.cur)
829 coffsetS := s - (candidateS.offset - e.cur)
830
831 // Check if we have a long match.
832 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
833 // Found a long match, likely at least 8 bytes.
834 // Reference encoder checks all 8 bytes, we only check 4,
835 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
836 t = candidateL.offset - e.cur
837 if debugAsserts && s <= t {
838 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
839 }
840 if debugAsserts && s-t > e.maxMatchOff {
841 panic("s - t >e.maxMatchOff")
842 }
843 if debugMatches {
844 println("long match")
845 }
846 break
847 }
848
849 // Check if we have a short match.
850 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
851 // found a regular match
852 // See if we can find a long match at s+1
853 const checkAt = 1
854 cv := load6432(src, s+checkAt)
855 nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
856 candidateL = e.longTable[nextHashL]
857 coffsetL = s - (candidateL.offset - e.cur) + checkAt
858
859 // We can store it, since we have at least a 4 byte match.
860 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
861 e.markLongShardDirty(nextHashL)
862 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
863 // Found a long match, likely at least 8 bytes.
864 // Reference encoder checks all 8 bytes, we only check 4,
865 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
866 t = candidateL.offset - e.cur
867 s += checkAt
868 if debugMatches {
869 println("long match (after short)")
870 }
871 break
872 }
873
874 t = candidateS.offset - e.cur
875 if debugAsserts && s <= t {
876 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
877 }
878 if debugAsserts && s-t > e.maxMatchOff {
879 panic("s - t >e.maxMatchOff")
880 }
881 if debugAsserts && t < 0 {
882 panic("t<0")
883 }
884 if debugMatches {
885 println("short match")
886 }
887 break
888 }
889
890 // No match found, move forward in input.
891 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
892 if s >= sLimit {
893 break encodeLoop
894 }
895 cv = load6432(src, s)
896 }
897
898 // A 4-byte match has been found. Update recent offsets.
899 // We'll later see if more than 4 bytes.
900 offset2 = offset1
901 offset1 = s - t
902
903 if debugAsserts && s <= t {
904 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
905 }
906
907 if debugAsserts && canRepeat && int(offset1) > len(src) {
908 panic("invalid offset")
909 }
910
911 // Extend the 4-byte match as long as possible.
912 l := e.matchlen(s+4, t+4, src) + 4
913
914 // Extend backwards
915 tMin := max(s-e.maxMatchOff, 0)
916 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
917 s--
918 t--
919 l++
920 }
921
922 // Write our sequence
923 var seq seq
924 seq.litLen = uint32(s - nextEmit)
925 seq.matchLen = uint32(l - zstdMinMatch)
926 if seq.litLen > 0 {
927 blk.literals = append(blk.literals, src[nextEmit:s]...)
928 }
929 seq.offset = uint32(s-t) + 3
930 s += l
931 if debugSequences {
932 println("sequence", seq, "next s:", s)
933 }
934 blk.sequences = append(blk.sequences, seq)
935 nextEmit = s
936 if s >= sLimit {
937 break encodeLoop
938 }
939
940 // Index match start+1 (long) and start+2 (short)
941 index0 := s - l + 1
942 // Index match end-2 (long) and end-1 (short)
943 index1 := s - 2
944
945 cv0 := load6432(src, index0)
946 cv1 := load6432(src, index1)
947 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
948 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
949 longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
950 longHash2 := hashLen(cv1, dFastLongTableBits, dFastLongLen)
951 e.longTable[longHash1] = te0
952 e.longTable[longHash2] = te1
953 e.markLongShardDirty(longHash1)
954 e.markLongShardDirty(longHash2)
955 cv0 >>= 8
956 cv1 >>= 8
957 te0.offset++
958 te1.offset++
959 te0.val = uint32(cv0)
960 te1.val = uint32(cv1)
961 hashVal1 := hashLen(cv0, dFastShortTableBits, dFastShortLen)
962 hashVal2 := hashLen(cv1, dFastShortTableBits, dFastShortLen)
963 e.table[hashVal1] = te0
964 e.markShardDirty(hashVal1)
965 e.table[hashVal2] = te1
966 e.markShardDirty(hashVal2)
967
968 cv = load6432(src, s)
969
970 if !canRepeat {
971 continue
972 }
973
974 // Check offset 2
975 for {
976 o2 := s - offset2
977 if load3232(src, o2) != uint32(cv) {
978 // Do regular search
979 break
980 }
981
982 // Store this, since we have it.
983 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
984 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
985
986 // We have at least 4 byte match.
987 // No need to check backwards. We come straight from a match
988 l := 4 + e.matchlen(s+4, o2+4, src)
989
990 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
991 e.longTable[nextHashL] = entry
992 e.markLongShardDirty(nextHashL)
993 e.table[nextHashS] = entry
994 e.markShardDirty(nextHashS)
995 seq.matchLen = uint32(l) - zstdMinMatch
996 seq.litLen = 0
997
998 // Since litlen is always 0, this is offset 1.
999 seq.offset = 1
1000 s += l
1001 nextEmit = s
1002 if debugSequences {
1003 println("sequence", seq, "next s:", s)
1004 }
1005 blk.sequences = append(blk.sequences, seq)
1006
1007 // Swap offset 1 and 2.
1008 offset1, offset2 = offset2, offset1
1009 if s >= sLimit {
1010 // Finished
1011 break encodeLoop
1012 }
1013 cv = load6432(src, s)
1014 }
1015 }
1016
1017 if int(nextEmit) < len(src) {
1018 blk.literals = append(blk.literals, src[nextEmit:]...)
1019 blk.extraLits = len(src) - int(nextEmit)
1020 }
1021 blk.recentOffsets[0] = uint32(offset1)
1022 blk.recentOffsets[1] = uint32(offset2)
1023 if debugEncoder {
1024 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
1025 }
1026 // If we encoded more than 64K mark all dirty.
1027 if len(src) > 64<<10 {
1028 e.markAllShardsDirty()
1029 }
1030 }
1031
1032 // ResetDict will reset and set a dictionary if not nil
1033 func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) {
1034 e.fastEncoder.Reset(d, singleBlock)
1035 if d != nil {
1036 panic("doubleFastEncoder: Reset with dict not supported")
1037 }
1038 }
1039
1040 // ResetDict will reset and set a dictionary if not nil
1041 func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
1042 allDirty := e.allDirty
1043 e.fastEncoderDict.Reset(d, singleBlock)
1044 if d == nil {
1045 return
1046 }
1047
1048 // Init or copy dict table
1049 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
1050 if len(e.dictLongTable) != len(e.longTable) {
1051 e.dictLongTable = make([]tableEntry, len(e.longTable))
1052 }
1053 if len(d.content) >= 8 {
1054 cv := load6432(d.content, 0)
1055 e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
1056 val: uint32(cv),
1057 offset: e.maxMatchOff,
1058 }
1059 end := int32(len(d.content)) - 8 + e.maxMatchOff
1060 for i := e.maxMatchOff + 1; i < end; i++ {
1061 cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56)
1062 e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
1063 val: uint32(cv),
1064 offset: i,
1065 }
1066 }
1067 }
1068 e.lastDictID = d.id
1069 allDirty = true
1070 }
1071 // Reset table to initial state
1072 e.cur = e.maxMatchOff
1073
1074 dirtyShardCnt := 0
1075 if !allDirty {
1076 for i := range e.longTableShardDirty {
1077 if e.longTableShardDirty[i] {
1078 dirtyShardCnt++
1079 }
1080 }
1081 }
1082
1083 if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
1084 //copy(e.longTable[:], e.dictLongTable)
1085 e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable)
1086 for i := range e.longTableShardDirty {
1087 e.longTableShardDirty[i] = false
1088 }
1089 return
1090 }
1091 for i := range e.longTableShardDirty {
1092 if !e.longTableShardDirty[i] {
1093 continue
1094 }
1095
1096 // copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
1097 *(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:])
1098
1099 e.longTableShardDirty[i] = false
1100 }
1101 }
1102
1103 func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) {
1104 e.longTableShardDirty[entryNum/dLongTableShardSize] = true
1105 }
1106