enc_better.go raw
1 // Copyright 2019+ Klaus Post. All rights reserved.
2 // License information can be found in the LICENSE file.
3 // Based on work by Yann Collet, released under BSD License.
4
5 package zstd
6
7 import "fmt"
8
9 const (
10 betterLongTableBits = 19 // Bits used in the long match table
11 betterLongTableSize = 1 << betterLongTableBits // Size of the table
12 betterLongLen = 8 // Bytes used for table hash
13
14 // Note: Increasing the short table bits or making the hash shorter
15 // can actually lead to compression degradation since it will 'steal' more from the
16 // long match table and match offsets are quite big.
17 // This greatly depends on the type of input.
18 betterShortTableBits = 13 // Bits used in the short match table
19 betterShortTableSize = 1 << betterShortTableBits // Size of the table
20 betterShortLen = 5 // Bytes used for table hash
21
22 betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
23 betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
24
25 betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table
26 betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
27 )
28
29 type prevEntry struct {
30 offset int32
31 prev int32
32 }
33
34 // betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
35 // The long match table contains the previous entry with the same hash,
36 // effectively making it a "chain" of length 2.
37 // When we find a long match we choose between the two values and select the longest.
38 // When we find a short match, after checking the long, we check if we can find a long at n+1
39 // and that it is longer (lazy matching).
40 type betterFastEncoder struct {
41 fastBase
42 table [betterShortTableSize]tableEntry
43 longTable [betterLongTableSize]prevEntry
44 }
45
46 type betterFastEncoderDict struct {
47 betterFastEncoder
48 dictTable []tableEntry
49 dictLongTable []prevEntry
50 shortTableShardDirty [betterShortTableShardCnt]bool
51 longTableShardDirty [betterLongTableShardCnt]bool
52 allDirty bool
53 }
54
55 // Encode improves compression...
56 func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
57 const (
58 // Input margin is the number of bytes we read (8)
59 // and the maximum we will read ahead (2)
60 inputMargin = 8 + 2
61 minNonLiteralBlockSize = 16
62 )
63
64 // Protect against e.cur wraparound.
65 for e.cur >= e.bufferReset-int32(len(e.hist)) {
66 if len(e.hist) == 0 {
67 e.table = [betterShortTableSize]tableEntry{}
68 e.longTable = [betterLongTableSize]prevEntry{}
69 e.cur = e.maxMatchOff
70 break
71 }
72 // Shift down everything in the table that isn't already too far away.
73 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
74 for i := range e.table[:] {
75 v := e.table[i].offset
76 if v < minOff {
77 v = 0
78 } else {
79 v = v - e.cur + e.maxMatchOff
80 }
81 e.table[i].offset = v
82 }
83 for i := range e.longTable[:] {
84 v := e.longTable[i].offset
85 v2 := e.longTable[i].prev
86 if v < minOff {
87 v = 0
88 v2 = 0
89 } else {
90 v = v - e.cur + e.maxMatchOff
91 if v2 < minOff {
92 v2 = 0
93 } else {
94 v2 = v2 - e.cur + e.maxMatchOff
95 }
96 }
97 e.longTable[i] = prevEntry{
98 offset: v,
99 prev: v2,
100 }
101 }
102 e.cur = e.maxMatchOff
103 break
104 }
105 // Add block to history
106 s := e.addBlock(src)
107 blk.size = len(src)
108
109 // Check RLE first
110 if len(src) > zstdMinMatch {
111 ml := matchLen(src[1:], src)
112 if ml == len(src)-1 {
113 blk.literals = append(blk.literals, src[0])
114 blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3})
115 return
116 }
117 }
118
119 if len(src) < minNonLiteralBlockSize {
120 blk.extraLits = len(src)
121 blk.literals = blk.literals[:len(src)]
122 copy(blk.literals, src)
123 return
124 }
125
126 // Override src
127 src = e.hist
128 sLimit := int32(len(src)) - inputMargin
129 // stepSize is the number of bytes to skip on every main loop iteration.
130 // It should be >= 1.
131 const stepSize = 1
132
133 const kSearchStrength = 9
134
135 // nextEmit is where in src the next emitLiteral should start from.
136 nextEmit := s
137 cv := load6432(src, s)
138
139 // Relative offsets
140 offset1 := int32(blk.recentOffsets[0])
141 offset2 := int32(blk.recentOffsets[1])
142
143 addLiterals := func(s *seq, until int32) {
144 if until == nextEmit {
145 return
146 }
147 blk.literals = append(blk.literals, src[nextEmit:until]...)
148 s.litLen = uint32(until - nextEmit)
149 }
150 if debugEncoder {
151 println("recent offsets:", blk.recentOffsets)
152 }
153
154 encodeLoop:
155 for {
156 var t int32
157 // We allow the encoder to optionally turn off repeat offsets across blocks
158 canRepeat := len(blk.sequences) > 2
159 var matched, index0 int32
160
161 for {
162 if debugAsserts && canRepeat && offset1 == 0 {
163 panic("offset0 was 0")
164 }
165
166 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
167 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
168 candidateL := e.longTable[nextHashL]
169 candidateS := e.table[nextHashS]
170
171 const repOff = 1
172 repIndex := s - offset1 + repOff
173 off := s + e.cur
174 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
175 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
176 index0 = s + 1
177
178 if canRepeat {
179 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
180 // Consider history as well.
181 var seq seq
182 length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
183
184 seq.matchLen = uint32(length - zstdMinMatch)
185
186 // We might be able to match backwards.
187 // Extend as long as we can.
188 start := s + repOff
189 // We end the search early, so we don't risk 0 literals
190 // and have to do special offset treatment.
191 startLimit := nextEmit + 1
192
193 tMin := max(s-e.maxMatchOff, 0)
194 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
195 repIndex--
196 start--
197 seq.matchLen++
198 }
199 addLiterals(&seq, start)
200
201 // rep 0
202 seq.offset = 1
203 if debugSequences {
204 println("repeat sequence", seq, "next s:", s)
205 }
206 blk.sequences = append(blk.sequences, seq)
207
208 // Index match start+1 (long) -> s - 1
209 index0 := s + repOff
210 s += length + repOff
211
212 nextEmit = s
213 if s >= sLimit {
214 if debugEncoder {
215 println("repeat ended", s, length)
216
217 }
218 break encodeLoop
219 }
220 // Index skipped...
221 for index0 < s-1 {
222 cv0 := load6432(src, index0)
223 cv1 := cv0 >> 8
224 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
225 off := index0 + e.cur
226 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
227 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
228 index0 += 2
229 }
230 cv = load6432(src, s)
231 continue
232 }
233 const repOff2 = 1
234
235 // We deviate from the reference encoder and also check offset 2.
236 // Still slower and not much better, so disabled.
237 // repIndex = s - offset2 + repOff2
238 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
239 // Consider history as well.
240 var seq seq
241 length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
242
243 seq.matchLen = uint32(length - zstdMinMatch)
244
245 // We might be able to match backwards.
246 // Extend as long as we can.
247 start := s + repOff2
248 // We end the search early, so we don't risk 0 literals
249 // and have to do special offset treatment.
250 startLimit := nextEmit + 1
251
252 tMin := max(s-e.maxMatchOff, 0)
253 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
254 repIndex--
255 start--
256 seq.matchLen++
257 }
258 addLiterals(&seq, start)
259
260 // rep 2
261 seq.offset = 2
262 if debugSequences {
263 println("repeat sequence 2", seq, "next s:", s)
264 }
265 blk.sequences = append(blk.sequences, seq)
266
267 s += length + repOff2
268 nextEmit = s
269 if s >= sLimit {
270 if debugEncoder {
271 println("repeat ended", s, length)
272
273 }
274 break encodeLoop
275 }
276
277 // Index skipped...
278 for index0 < s-1 {
279 cv0 := load6432(src, index0)
280 cv1 := cv0 >> 8
281 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
282 off := index0 + e.cur
283 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
284 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
285 index0 += 2
286 }
287 cv = load6432(src, s)
288 // Swap offsets
289 offset1, offset2 = offset2, offset1
290 continue
291 }
292 }
293 // Find the offsets of our two matches.
294 coffsetL := candidateL.offset - e.cur
295 coffsetLP := candidateL.prev - e.cur
296
297 // Check if we have a long match.
298 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
299 // Found a long match, at least 8 bytes.
300 matched = e.matchlen(s+8, coffsetL+8, src) + 8
301 t = coffsetL
302 if debugAsserts && s <= t {
303 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
304 }
305 if debugAsserts && s-t > e.maxMatchOff {
306 panic("s - t >e.maxMatchOff")
307 }
308 if debugMatches {
309 println("long match")
310 }
311
312 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
313 // Found a long match, at least 8 bytes.
314 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
315 if prevMatch > matched {
316 matched = prevMatch
317 t = coffsetLP
318 }
319 if debugAsserts && s <= t {
320 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
321 }
322 if debugAsserts && s-t > e.maxMatchOff {
323 panic("s - t >e.maxMatchOff")
324 }
325 if debugMatches {
326 println("long match")
327 }
328 }
329 break
330 }
331
332 // Check if we have a long match on prev.
333 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
334 // Found a long match, at least 8 bytes.
335 matched = e.matchlen(s+8, coffsetLP+8, src) + 8
336 t = coffsetLP
337 if debugAsserts && s <= t {
338 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
339 }
340 if debugAsserts && s-t > e.maxMatchOff {
341 panic("s - t >e.maxMatchOff")
342 }
343 if debugMatches {
344 println("long match")
345 }
346 break
347 }
348
349 coffsetS := candidateS.offset - e.cur
350
351 // Check if we have a short match.
352 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
353 // found a regular match
354 matched = e.matchlen(s+4, coffsetS+4, src) + 4
355
356 // See if we can find a long match at s+1
357 const checkAt = 1
358 cv := load6432(src, s+checkAt)
359 nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
360 candidateL = e.longTable[nextHashL]
361 coffsetL = candidateL.offset - e.cur
362
363 // We can store it, since we have at least a 4 byte match.
364 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
365 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
366 // Found a long match, at least 8 bytes.
367 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
368 if matchedNext > matched {
369 t = coffsetL
370 s += checkAt
371 matched = matchedNext
372 if debugMatches {
373 println("long match (after short)")
374 }
375 break
376 }
377 }
378
379 // Check prev long...
380 coffsetL = candidateL.prev - e.cur
381 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
382 // Found a long match, at least 8 bytes.
383 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
384 if matchedNext > matched {
385 t = coffsetL
386 s += checkAt
387 matched = matchedNext
388 if debugMatches {
389 println("prev long match (after short)")
390 }
391 break
392 }
393 }
394 t = coffsetS
395 if debugAsserts && s <= t {
396 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
397 }
398 if debugAsserts && s-t > e.maxMatchOff {
399 panic("s - t >e.maxMatchOff")
400 }
401 if debugAsserts && t < 0 {
402 panic("t<0")
403 }
404 if debugMatches {
405 println("short match")
406 }
407 break
408 }
409
410 // No match found, move forward in input.
411 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
412 if s >= sLimit {
413 break encodeLoop
414 }
415 cv = load6432(src, s)
416 }
417
418 // Try to find a better match by searching for a long match at the end of the current best match
419 if s+matched < sLimit {
420 // Allow some bytes at the beginning to mismatch.
421 // Sweet spot is around 3 bytes, but depends on input.
422 // The skipped bytes are tested in Extend backwards,
423 // and still picked up as part of the match if they do.
424 const skipBeginning = 3
425
426 nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
427 s2 := s + skipBeginning
428 cv := load3232(src, s2)
429 candidateL := e.longTable[nextHashL]
430 coffsetL := candidateL.offset - e.cur - matched + skipBeginning
431 if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
432 // Found a long match, at least 4 bytes.
433 matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
434 if matchedNext > matched {
435 t = coffsetL
436 s = s2
437 matched = matchedNext
438 if debugMatches {
439 println("long match at end-of-match")
440 }
441 }
442 }
443
444 // Check prev long...
445 if true {
446 coffsetL = candidateL.prev - e.cur - matched + skipBeginning
447 if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
448 // Found a long match, at least 4 bytes.
449 matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
450 if matchedNext > matched {
451 t = coffsetL
452 s = s2
453 matched = matchedNext
454 if debugMatches {
455 println("prev long match at end-of-match")
456 }
457 }
458 }
459 }
460 }
461 // A match has been found. Update recent offsets.
462 offset2 = offset1
463 offset1 = s - t
464
465 if debugAsserts && s <= t {
466 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
467 }
468
469 if debugAsserts && canRepeat && int(offset1) > len(src) {
470 panic("invalid offset")
471 }
472
473 // Extend the n-byte match as long as possible.
474 l := matched
475
476 // Extend backwards
477 tMin := max(s-e.maxMatchOff, 0)
478 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
479 s--
480 t--
481 l++
482 }
483
484 // Write our sequence
485 var seq seq
486 seq.litLen = uint32(s - nextEmit)
487 seq.matchLen = uint32(l - zstdMinMatch)
488 if seq.litLen > 0 {
489 blk.literals = append(blk.literals, src[nextEmit:s]...)
490 }
491 seq.offset = uint32(s-t) + 3
492 s += l
493 if debugSequences {
494 println("sequence", seq, "next s:", s)
495 }
496 blk.sequences = append(blk.sequences, seq)
497 nextEmit = s
498 if s >= sLimit {
499 break encodeLoop
500 }
501
502 // Index match start+1 (long) -> s - 1
503 off := index0 + e.cur
504 for index0 < s-1 {
505 cv0 := load6432(src, index0)
506 cv1 := cv0 >> 8
507 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
508 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
509 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
510 index0 += 2
511 off += 2
512 }
513
514 cv = load6432(src, s)
515 if !canRepeat {
516 continue
517 }
518
519 // Check offset 2
520 for {
521 o2 := s - offset2
522 if load3232(src, o2) != uint32(cv) {
523 // Do regular search
524 break
525 }
526
527 // Store this, since we have it.
528 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
529 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
530
531 // We have at least 4 byte match.
532 // No need to check backwards. We come straight from a match
533 l := 4 + e.matchlen(s+4, o2+4, src)
534
535 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
536 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
537 seq.matchLen = uint32(l) - zstdMinMatch
538 seq.litLen = 0
539
540 // Since litlen is always 0, this is offset 1.
541 seq.offset = 1
542 s += l
543 nextEmit = s
544 if debugSequences {
545 println("sequence", seq, "next s:", s)
546 }
547 blk.sequences = append(blk.sequences, seq)
548
549 // Swap offset 1 and 2.
550 offset1, offset2 = offset2, offset1
551 if s >= sLimit {
552 // Finished
553 break encodeLoop
554 }
555 cv = load6432(src, s)
556 }
557 }
558
559 if int(nextEmit) < len(src) {
560 blk.literals = append(blk.literals, src[nextEmit:]...)
561 blk.extraLits = len(src) - int(nextEmit)
562 }
563 blk.recentOffsets[0] = uint32(offset1)
564 blk.recentOffsets[1] = uint32(offset2)
565 if debugEncoder {
566 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
567 }
568 }
569
570 // EncodeNoHist will encode a block with no history and no following blocks.
571 // Most notable difference is that src will not be copied for history and
572 // we do not need to check for max match length.
573 func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
574 e.ensureHist(len(src))
575 e.Encode(blk, src)
576 }
577
578 // Encode improves compression...
579 func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
580 const (
581 // Input margin is the number of bytes we read (8)
582 // and the maximum we will read ahead (2)
583 inputMargin = 8 + 2
584 minNonLiteralBlockSize = 16
585 )
586
587 // Protect against e.cur wraparound.
588 for e.cur >= e.bufferReset-int32(len(e.hist)) {
589 if len(e.hist) == 0 {
590 for i := range e.table[:] {
591 e.table[i] = tableEntry{}
592 }
593 for i := range e.longTable[:] {
594 e.longTable[i] = prevEntry{}
595 }
596 e.cur = e.maxMatchOff
597 e.allDirty = true
598 break
599 }
600 // Shift down everything in the table that isn't already too far away.
601 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
602 for i := range e.table[:] {
603 v := e.table[i].offset
604 if v < minOff {
605 v = 0
606 } else {
607 v = v - e.cur + e.maxMatchOff
608 }
609 e.table[i].offset = v
610 }
611 for i := range e.longTable[:] {
612 v := e.longTable[i].offset
613 v2 := e.longTable[i].prev
614 if v < minOff {
615 v = 0
616 v2 = 0
617 } else {
618 v = v - e.cur + e.maxMatchOff
619 if v2 < minOff {
620 v2 = 0
621 } else {
622 v2 = v2 - e.cur + e.maxMatchOff
623 }
624 }
625 e.longTable[i] = prevEntry{
626 offset: v,
627 prev: v2,
628 }
629 }
630 e.allDirty = true
631 e.cur = e.maxMatchOff
632 break
633 }
634
635 s := e.addBlock(src)
636 blk.size = len(src)
637 if len(src) < minNonLiteralBlockSize {
638 blk.extraLits = len(src)
639 blk.literals = blk.literals[:len(src)]
640 copy(blk.literals, src)
641 return
642 }
643
644 // Override src
645 src = e.hist
646 sLimit := int32(len(src)) - inputMargin
647 // stepSize is the number of bytes to skip on every main loop iteration.
648 // It should be >= 1.
649 const stepSize = 1
650
651 const kSearchStrength = 9
652
653 // nextEmit is where in src the next emitLiteral should start from.
654 nextEmit := s
655 cv := load6432(src, s)
656
657 // Relative offsets
658 offset1 := int32(blk.recentOffsets[0])
659 offset2 := int32(blk.recentOffsets[1])
660
661 addLiterals := func(s *seq, until int32) {
662 if until == nextEmit {
663 return
664 }
665 blk.literals = append(blk.literals, src[nextEmit:until]...)
666 s.litLen = uint32(until - nextEmit)
667 }
668 if debugEncoder {
669 println("recent offsets:", blk.recentOffsets)
670 }
671
672 encodeLoop:
673 for {
674 var t int32
675 // We allow the encoder to optionally turn off repeat offsets across blocks
676 canRepeat := len(blk.sequences) > 2
677 var matched, index0 int32
678
679 for {
680 if debugAsserts && canRepeat && offset1 == 0 {
681 panic("offset0 was 0")
682 }
683
684 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
685 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
686 candidateL := e.longTable[nextHashL]
687 candidateS := e.table[nextHashS]
688
689 const repOff = 1
690 repIndex := s - offset1 + repOff
691 off := s + e.cur
692 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
693 e.markLongShardDirty(nextHashL)
694 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
695 e.markShortShardDirty(nextHashS)
696 index0 = s + 1
697
698 if canRepeat {
699 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
700 // Consider history as well.
701 var seq seq
702 length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
703
704 seq.matchLen = uint32(length - zstdMinMatch)
705
706 // We might be able to match backwards.
707 // Extend as long as we can.
708 start := s + repOff
709 // We end the search early, so we don't risk 0 literals
710 // and have to do special offset treatment.
711 startLimit := nextEmit + 1
712
713 tMin := max(s-e.maxMatchOff, 0)
714 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
715 repIndex--
716 start--
717 seq.matchLen++
718 }
719 addLiterals(&seq, start)
720
721 // rep 0
722 seq.offset = 1
723 if debugSequences {
724 println("repeat sequence", seq, "next s:", s)
725 }
726 blk.sequences = append(blk.sequences, seq)
727
728 // Index match start+1 (long) -> s - 1
729 s += length + repOff
730
731 nextEmit = s
732 if s >= sLimit {
733 if debugEncoder {
734 println("repeat ended", s, length)
735
736 }
737 break encodeLoop
738 }
739 // Index skipped...
740 for index0 < s-1 {
741 cv0 := load6432(src, index0)
742 cv1 := cv0 >> 8
743 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
744 off := index0 + e.cur
745 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
746 e.markLongShardDirty(h0)
747 h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
748 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
749 e.markShortShardDirty(h1)
750 index0 += 2
751 }
752 cv = load6432(src, s)
753 continue
754 }
755 const repOff2 = 1
756
757 // We deviate from the reference encoder and also check offset 2.
758 // Still slower and not much better, so disabled.
759 // repIndex = s - offset2 + repOff2
760 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
761 // Consider history as well.
762 var seq seq
763 length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
764
765 seq.matchLen = uint32(length - zstdMinMatch)
766
767 // We might be able to match backwards.
768 // Extend as long as we can.
769 start := s + repOff2
770 // We end the search early, so we don't risk 0 literals
771 // and have to do special offset treatment.
772 startLimit := nextEmit + 1
773
774 tMin := max(s-e.maxMatchOff, 0)
775 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
776 repIndex--
777 start--
778 seq.matchLen++
779 }
780 addLiterals(&seq, start)
781
782 // rep 2
783 seq.offset = 2
784 if debugSequences {
785 println("repeat sequence 2", seq, "next s:", s)
786 }
787 blk.sequences = append(blk.sequences, seq)
788
789 s += length + repOff2
790 nextEmit = s
791 if s >= sLimit {
792 if debugEncoder {
793 println("repeat ended", s, length)
794
795 }
796 break encodeLoop
797 }
798
799 // Index skipped...
800 for index0 < s-1 {
801 cv0 := load6432(src, index0)
802 cv1 := cv0 >> 8
803 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
804 off := index0 + e.cur
805 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
806 e.markLongShardDirty(h0)
807 h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
808 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
809 e.markShortShardDirty(h1)
810 index0 += 2
811 }
812 cv = load6432(src, s)
813 // Swap offsets
814 offset1, offset2 = offset2, offset1
815 continue
816 }
817 }
818 // Find the offsets of our two matches.
819 coffsetL := candidateL.offset - e.cur
820 coffsetLP := candidateL.prev - e.cur
821
822 // Check if we have a long match.
823 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
824 // Found a long match, at least 8 bytes.
825 matched = e.matchlen(s+8, coffsetL+8, src) + 8
826 t = coffsetL
827 if debugAsserts && s <= t {
828 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
829 }
830 if debugAsserts && s-t > e.maxMatchOff {
831 panic("s - t >e.maxMatchOff")
832 }
833 if debugMatches {
834 println("long match")
835 }
836
837 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
838 // Found a long match, at least 8 bytes.
839 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
840 if prevMatch > matched {
841 matched = prevMatch
842 t = coffsetLP
843 }
844 if debugAsserts && s <= t {
845 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
846 }
847 if debugAsserts && s-t > e.maxMatchOff {
848 panic("s - t >e.maxMatchOff")
849 }
850 if debugMatches {
851 println("long match")
852 }
853 }
854 break
855 }
856
857 // Check if we have a long match on prev.
858 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
859 // Found a long match, at least 8 bytes.
860 matched = e.matchlen(s+8, coffsetLP+8, src) + 8
861 t = coffsetLP
862 if debugAsserts && s <= t {
863 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
864 }
865 if debugAsserts && s-t > e.maxMatchOff {
866 panic("s - t >e.maxMatchOff")
867 }
868 if debugMatches {
869 println("long match")
870 }
871 break
872 }
873
874 coffsetS := candidateS.offset - e.cur
875
876 // Check if we have a short match.
877 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
878 // found a regular match
879 matched = e.matchlen(s+4, coffsetS+4, src) + 4
880
881 // See if we can find a long match at s+1
882 const checkAt = 1
883 cv := load6432(src, s+checkAt)
884 nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
885 candidateL = e.longTable[nextHashL]
886 coffsetL = candidateL.offset - e.cur
887
888 // We can store it, since we have at least a 4 byte match.
889 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
890 e.markLongShardDirty(nextHashL)
891 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
892 // Found a long match, at least 8 bytes.
893 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
894 if matchedNext > matched {
895 t = coffsetL
896 s += checkAt
897 matched = matchedNext
898 if debugMatches {
899 println("long match (after short)")
900 }
901 break
902 }
903 }
904
905 // Check prev long...
906 coffsetL = candidateL.prev - e.cur
907 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
908 // Found a long match, at least 8 bytes.
909 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
910 if matchedNext > matched {
911 t = coffsetL
912 s += checkAt
913 matched = matchedNext
914 if debugMatches {
915 println("prev long match (after short)")
916 }
917 break
918 }
919 }
920 t = coffsetS
921 if debugAsserts && s <= t {
922 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
923 }
924 if debugAsserts && s-t > e.maxMatchOff {
925 panic("s - t >e.maxMatchOff")
926 }
927 if debugAsserts && t < 0 {
928 panic("t<0")
929 }
930 if debugMatches {
931 println("short match")
932 }
933 break
934 }
935
936 // No match found, move forward in input.
937 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
938 if s >= sLimit {
939 break encodeLoop
940 }
941 cv = load6432(src, s)
942 }
943 // Try to find a better match by searching for a long match at the end of the current best match
944 if s+matched < sLimit {
945 nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
946 cv := load3232(src, s)
947 candidateL := e.longTable[nextHashL]
948 coffsetL := candidateL.offset - e.cur - matched
949 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
950 // Found a long match, at least 4 bytes.
951 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
952 if matchedNext > matched {
953 t = coffsetL
954 matched = matchedNext
955 if debugMatches {
956 println("long match at end-of-match")
957 }
958 }
959 }
960
961 // Check prev long...
962 if true {
963 coffsetL = candidateL.prev - e.cur - matched
964 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
965 // Found a long match, at least 4 bytes.
966 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
967 if matchedNext > matched {
968 t = coffsetL
969 matched = matchedNext
970 if debugMatches {
971 println("prev long match at end-of-match")
972 }
973 }
974 }
975 }
976 }
977 // A match has been found. Update recent offsets.
978 offset2 = offset1
979 offset1 = s - t
980
981 if debugAsserts && s <= t {
982 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
983 }
984
985 if debugAsserts && canRepeat && int(offset1) > len(src) {
986 panic("invalid offset")
987 }
988
989 // Extend the n-byte match as long as possible.
990 l := matched
991
992 // Extend backwards
993 tMin := max(s-e.maxMatchOff, 0)
994 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
995 s--
996 t--
997 l++
998 }
999
1000 // Write our sequence
1001 var seq seq
1002 seq.litLen = uint32(s - nextEmit)
1003 seq.matchLen = uint32(l - zstdMinMatch)
1004 if seq.litLen > 0 {
1005 blk.literals = append(blk.literals, src[nextEmit:s]...)
1006 }
1007 seq.offset = uint32(s-t) + 3
1008 s += l
1009 if debugSequences {
1010 println("sequence", seq, "next s:", s)
1011 }
1012 blk.sequences = append(blk.sequences, seq)
1013 nextEmit = s
1014 if s >= sLimit {
1015 break encodeLoop
1016 }
1017
1018 // Index match start+1 (long) -> s - 1
1019 off := index0 + e.cur
1020 for index0 < s-1 {
1021 cv0 := load6432(src, index0)
1022 cv1 := cv0 >> 8
1023 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
1024 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
1025 e.markLongShardDirty(h0)
1026 h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
1027 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
1028 e.markShortShardDirty(h1)
1029 index0 += 2
1030 off += 2
1031 }
1032
1033 cv = load6432(src, s)
1034 if !canRepeat {
1035 continue
1036 }
1037
1038 // Check offset 2
1039 for {
1040 o2 := s - offset2
1041 if load3232(src, o2) != uint32(cv) {
1042 // Do regular search
1043 break
1044 }
1045
1046 // Store this, since we have it.
1047 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
1048 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
1049
1050 // We have at least 4 byte match.
1051 // No need to check backwards. We come straight from a match
1052 l := 4 + e.matchlen(s+4, o2+4, src)
1053
1054 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
1055 e.markLongShardDirty(nextHashL)
1056 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
1057 e.markShortShardDirty(nextHashS)
1058 seq.matchLen = uint32(l) - zstdMinMatch
1059 seq.litLen = 0
1060
1061 // Since litlen is always 0, this is offset 1.
1062 seq.offset = 1
1063 s += l
1064 nextEmit = s
1065 if debugSequences {
1066 println("sequence", seq, "next s:", s)
1067 }
1068 blk.sequences = append(blk.sequences, seq)
1069
1070 // Swap offset 1 and 2.
1071 offset1, offset2 = offset2, offset1
1072 if s >= sLimit {
1073 // Finished
1074 break encodeLoop
1075 }
1076 cv = load6432(src, s)
1077 }
1078 }
1079
1080 if int(nextEmit) < len(src) {
1081 blk.literals = append(blk.literals, src[nextEmit:]...)
1082 blk.extraLits = len(src) - int(nextEmit)
1083 }
1084 blk.recentOffsets[0] = uint32(offset1)
1085 blk.recentOffsets[1] = uint32(offset2)
1086 if debugEncoder {
1087 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
1088 }
1089 }
1090
1091 // ResetDict will reset and set a dictionary if not nil
1092 func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
1093 e.resetBase(d, singleBlock)
1094 if d != nil {
1095 panic("betterFastEncoder: Reset with dict")
1096 }
1097 }
1098
1099 // ResetDict will reset and set a dictionary if not nil
1100 func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
1101 e.resetBase(d, singleBlock)
1102 if d == nil {
1103 return
1104 }
1105 // Init or copy dict table
1106 if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
1107 if len(e.dictTable) != len(e.table) {
1108 e.dictTable = make([]tableEntry, len(e.table))
1109 }
1110 end := int32(len(d.content)) - 8 + e.maxMatchOff
1111 for i := e.maxMatchOff; i < end; i += 4 {
1112 const hashLog = betterShortTableBits
1113
1114 cv := load6432(d.content, i-e.maxMatchOff)
1115 nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4
1116 nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5
1117 nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6
1118 nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7
1119 e.dictTable[nextHash] = tableEntry{
1120 val: uint32(cv),
1121 offset: i,
1122 }
1123 e.dictTable[nextHash1] = tableEntry{
1124 val: uint32(cv >> 8),
1125 offset: i + 1,
1126 }
1127 e.dictTable[nextHash2] = tableEntry{
1128 val: uint32(cv >> 16),
1129 offset: i + 2,
1130 }
1131 e.dictTable[nextHash3] = tableEntry{
1132 val: uint32(cv >> 24),
1133 offset: i + 3,
1134 }
1135 }
1136 e.lastDictID = d.id
1137 e.allDirty = true
1138 }
1139
1140 // Init or copy dict table
1141 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
1142 if len(e.dictLongTable) != len(e.longTable) {
1143 e.dictLongTable = make([]prevEntry, len(e.longTable))
1144 }
1145 if len(d.content) >= 8 {
1146 cv := load6432(d.content, 0)
1147 h := hashLen(cv, betterLongTableBits, betterLongLen)
1148 e.dictLongTable[h] = prevEntry{
1149 offset: e.maxMatchOff,
1150 prev: e.dictLongTable[h].offset,
1151 }
1152
1153 end := int32(len(d.content)) - 8 + e.maxMatchOff
1154 off := 8 // First to read
1155 for i := e.maxMatchOff + 1; i < end; i++ {
1156 cv = cv>>8 | (uint64(d.content[off]) << 56)
1157 h := hashLen(cv, betterLongTableBits, betterLongLen)
1158 e.dictLongTable[h] = prevEntry{
1159 offset: i,
1160 prev: e.dictLongTable[h].offset,
1161 }
1162 off++
1163 }
1164 }
1165 e.lastDictID = d.id
1166 e.allDirty = true
1167 }
1168
1169 // Reset table to initial state
1170 {
1171 dirtyShardCnt := 0
1172 if !e.allDirty {
1173 for i := range e.shortTableShardDirty {
1174 if e.shortTableShardDirty[i] {
1175 dirtyShardCnt++
1176 }
1177 }
1178 }
1179 const shardCnt = betterShortTableShardCnt
1180 const shardSize = betterShortTableShardSize
1181 if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
1182 copy(e.table[:], e.dictTable)
1183 for i := range e.shortTableShardDirty {
1184 e.shortTableShardDirty[i] = false
1185 }
1186 } else {
1187 for i := range e.shortTableShardDirty {
1188 if !e.shortTableShardDirty[i] {
1189 continue
1190 }
1191
1192 copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
1193 e.shortTableShardDirty[i] = false
1194 }
1195 }
1196 }
1197 {
1198 dirtyShardCnt := 0
1199 if !e.allDirty {
1200 for i := range e.shortTableShardDirty {
1201 if e.shortTableShardDirty[i] {
1202 dirtyShardCnt++
1203 }
1204 }
1205 }
1206 const shardCnt = betterLongTableShardCnt
1207 const shardSize = betterLongTableShardSize
1208 if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
1209 copy(e.longTable[:], e.dictLongTable)
1210 for i := range e.longTableShardDirty {
1211 e.longTableShardDirty[i] = false
1212 }
1213 } else {
1214 for i := range e.longTableShardDirty {
1215 if !e.longTableShardDirty[i] {
1216 continue
1217 }
1218
1219 copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize])
1220 e.longTableShardDirty[i] = false
1221 }
1222 }
1223 }
1224 e.cur = e.maxMatchOff
1225 e.allDirty = false
1226 }
1227
1228 func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
1229 e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
1230 }
1231
1232 func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
1233 e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
1234 }
1235