1 // Copyright 2016 The Snappy-Go Authors. All rights reserved.
2 // Copyright (c) 2019 Klaus Post. All rights reserved.
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5 6 package s2
7 8 import (
9 "bytes"
10 "encoding/binary"
11 "fmt"
12 "math/bits"
13 14 "github.com/klauspost/compress/internal/le"
15 )
16 17 func load32(b []byte, i int) uint32 {
18 return le.Load32(b, i)
19 }
20 21 func load64(b []byte, i int) uint64 {
22 return le.Load64(b, i)
23 }
24 25 // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
26 // Preferably h should be a constant and should always be <64.
27 func hash6(u uint64, h uint8) uint32 {
28 const prime6bytes = 227718039650203
29 return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
30 }
31 32 func encodeGo(dst, src []byte) []byte {
33 if n := MaxEncodedLen(len(src)); n < 0 {
34 panic(ErrTooLarge)
35 } else if len(dst) < n {
36 dst = make([]byte, n)
37 }
38 39 // The block starts with the varint-encoded length of the decompressed bytes.
40 d := binary.PutUvarint(dst, uint64(len(src)))
41 42 if len(src) == 0 {
43 return dst[:d]
44 }
45 if len(src) < minNonLiteralBlockSize {
46 d += emitLiteral(dst[d:], src)
47 return dst[:d]
48 }
49 var n int
50 if len(src) < 64<<10 {
51 n = encodeBlockGo64K(dst[d:], src)
52 } else {
53 n = encodeBlockGo(dst[d:], src)
54 }
55 if n > 0 {
56 d += n
57 return dst[:d]
58 }
59 // Not compressible
60 d += emitLiteral(dst[d:], src)
61 return dst[:d]
62 }
63 64 // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
65 // assumes that the varint-encoded length of the decompressed bytes has already
66 // been written.
67 //
68 // It also assumes that:
69 //
70 // len(dst) >= MaxEncodedLen(len(src)) &&
71 // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
72 func encodeBlockGo(dst, src []byte) (d int) {
73 // Initialize the hash table.
74 const (
75 tableBits = 14
76 maxTableSize = 1 << tableBits
77 78 debug = false
79 )
80 var table [maxTableSize]uint32
81 82 // sLimit is when to stop looking for offset/length copies. The inputMargin
83 // lets us use a fast path for emitLiteral in the main loop, while we are
84 // looking for copies.
85 sLimit := len(src) - inputMargin
86 87 // Bail if we can't compress to at least this.
88 dstLimit := len(src) - len(src)>>5 - 5
89 90 // nextEmit is where in src the next emitLiteral should start from.
91 nextEmit := 0
92 93 // The encoded form must start with a literal, as there are no previous
94 // bytes to copy, so we start looking for hash matches at s == 1.
95 s := 1
96 cv := load64(src, s)
97 98 // We search for a repeat at -1, but don't output repeats when nextEmit == 0
99 repeat := 1
100 101 for {
102 candidate := 0
103 for {
104 // Next src position to check
105 nextS := s + (s-nextEmit)>>6 + 4
106 if nextS > sLimit {
107 goto emitRemainder
108 }
109 hash0 := hash6(cv, tableBits)
110 hash1 := hash6(cv>>8, tableBits)
111 candidate = int(table[hash0])
112 candidate2 := int(table[hash1])
113 table[hash0] = uint32(s)
114 table[hash1] = uint32(s + 1)
115 hash2 := hash6(cv>>16, tableBits)
116 117 // Check repeat at offset checkRep.
118 const checkRep = 1
119 if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
120 base := s + checkRep
121 // Extend back
122 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
123 i--
124 base--
125 }
126 127 // Bail if we exceed the maximum size.
128 if d+(base-nextEmit) > dstLimit {
129 return 0
130 }
131 132 d += emitLiteral(dst[d:], src[nextEmit:base])
133 134 // Extend forward
135 candidate := s - repeat + 4 + checkRep
136 s += 4 + checkRep
137 for s <= sLimit {
138 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
139 s += bits.TrailingZeros64(diff) >> 3
140 break
141 }
142 s += 8
143 candidate += 8
144 }
145 if debug {
146 // Validate match.
147 if s <= candidate {
148 panic("s <= candidate")
149 }
150 a := src[base:s]
151 b := src[base-repeat : base-repeat+(s-base)]
152 if !bytes.Equal(a, b) {
153 panic("mismatch")
154 }
155 }
156 if nextEmit > 0 {
157 // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
158 d += emitRepeat(dst[d:], repeat, s-base)
159 } else {
160 // First match, cannot be repeat.
161 d += emitCopy(dst[d:], repeat, s-base)
162 }
163 nextEmit = s
164 if s >= sLimit {
165 goto emitRemainder
166 }
167 cv = load64(src, s)
168 continue
169 }
170 171 if uint32(cv) == load32(src, candidate) {
172 break
173 }
174 candidate = int(table[hash2])
175 if uint32(cv>>8) == load32(src, candidate2) {
176 table[hash2] = uint32(s + 2)
177 candidate = candidate2
178 s++
179 break
180 }
181 table[hash2] = uint32(s + 2)
182 if uint32(cv>>16) == load32(src, candidate) {
183 s += 2
184 break
185 }
186 187 cv = load64(src, nextS)
188 s = nextS
189 }
190 191 // Extend backwards.
192 // The top bytes will be rechecked to get the full match.
193 for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
194 candidate--
195 s--
196 }
197 198 // Bail if we exceed the maximum size.
199 if d+(s-nextEmit) > dstLimit {
200 return 0
201 }
202 203 // A 4-byte match has been found. We'll later see if more than 4 bytes
204 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
205 // them as literal bytes.
206 207 d += emitLiteral(dst[d:], src[nextEmit:s])
208 209 // Call emitCopy, and then see if another emitCopy could be our next
210 // move. Repeat until we find no match for the input immediately after
211 // what was consumed by the last emitCopy call.
212 //
213 // If we exit this loop normally then we need to call emitLiteral next,
214 // though we don't yet know how big the literal will be. We handle that
215 // by proceeding to the next iteration of the main loop. We also can
216 // exit this loop via goto if we get close to exhausting the input.
217 for {
218 // Invariant: we have a 4-byte match at s, and no need to emit any
219 // literal bytes prior to s.
220 base := s
221 repeat = base - candidate
222 223 // Extend the 4-byte match as long as possible.
224 s += 4
225 candidate += 4
226 for s <= len(src)-8 {
227 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
228 s += bits.TrailingZeros64(diff) >> 3
229 break
230 }
231 s += 8
232 candidate += 8
233 }
234 235 d += emitCopy(dst[d:], repeat, s-base)
236 if debug {
237 // Validate match.
238 if s <= candidate {
239 panic("s <= candidate")
240 }
241 a := src[base:s]
242 b := src[base-repeat : base-repeat+(s-base)]
243 if !bytes.Equal(a, b) {
244 panic("mismatch")
245 }
246 }
247 248 nextEmit = s
249 if s >= sLimit {
250 goto emitRemainder
251 }
252 253 if d > dstLimit {
254 // Do we have space for more, if not bail.
255 return 0
256 }
257 // Check for an immediate match, otherwise start search at s+1
258 x := load64(src, s-2)
259 m2Hash := hash6(x, tableBits)
260 currHash := hash6(x>>16, tableBits)
261 candidate = int(table[currHash])
262 table[m2Hash] = uint32(s - 2)
263 table[currHash] = uint32(s)
264 if debug && s == candidate {
265 panic("s == candidate")
266 }
267 if uint32(x>>16) != load32(src, candidate) {
268 cv = load64(src, s+1)
269 s++
270 break
271 }
272 }
273 }
274 275 emitRemainder:
276 if nextEmit < len(src) {
277 // Bail if we exceed the maximum size.
278 if d+len(src)-nextEmit > dstLimit {
279 return 0
280 }
281 d += emitLiteral(dst[d:], src[nextEmit:])
282 }
283 return d
284 }
285 286 // encodeBlockGo64K is a specialized version for compressing blocks <= 64KB
287 func encodeBlockGo64K(dst, src []byte) (d int) {
288 // Initialize the hash table.
289 const (
290 tableBits = 14
291 maxTableSize = 1 << tableBits
292 293 debug = false
294 )
295 296 var table [maxTableSize]uint16
297 298 // sLimit is when to stop looking for offset/length copies. The inputMargin
299 // lets us use a fast path for emitLiteral in the main loop, while we are
300 // looking for copies.
301 sLimit := len(src) - inputMargin
302 303 // Bail if we can't compress to at least this.
304 dstLimit := len(src) - len(src)>>5 - 5
305 306 // nextEmit is where in src the next emitLiteral should start from.
307 nextEmit := 0
308 309 // The encoded form must start with a literal, as there are no previous
310 // bytes to copy, so we start looking for hash matches at s == 1.
311 s := 1
312 cv := load64(src, s)
313 314 // We search for a repeat at -1, but don't output repeats when nextEmit == 0
315 repeat := 1
316 317 for {
318 candidate := 0
319 for {
320 // Next src position to check
321 nextS := s + (s-nextEmit)>>5 + 4
322 if nextS > sLimit {
323 goto emitRemainder
324 }
325 hash0 := hash6(cv, tableBits)
326 hash1 := hash6(cv>>8, tableBits)
327 candidate = int(table[hash0])
328 candidate2 := int(table[hash1])
329 table[hash0] = uint16(s)
330 table[hash1] = uint16(s + 1)
331 hash2 := hash6(cv>>16, tableBits)
332 333 // Check repeat at offset checkRep.
334 const checkRep = 1
335 if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
336 base := s + checkRep
337 // Extend back
338 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
339 i--
340 base--
341 }
342 343 // Bail if we exceed the maximum size.
344 if d+(base-nextEmit) > dstLimit {
345 return 0
346 }
347 348 d += emitLiteral(dst[d:], src[nextEmit:base])
349 350 // Extend forward
351 candidate := s - repeat + 4 + checkRep
352 s += 4 + checkRep
353 for s <= sLimit {
354 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
355 s += bits.TrailingZeros64(diff) >> 3
356 break
357 }
358 s += 8
359 candidate += 8
360 }
361 if debug {
362 // Validate match.
363 if s <= candidate {
364 panic("s <= candidate")
365 }
366 a := src[base:s]
367 b := src[base-repeat : base-repeat+(s-base)]
368 if !bytes.Equal(a, b) {
369 panic("mismatch")
370 }
371 }
372 if nextEmit > 0 {
373 // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
374 d += emitRepeat(dst[d:], repeat, s-base)
375 } else {
376 // First match, cannot be repeat.
377 d += emitCopy(dst[d:], repeat, s-base)
378 }
379 nextEmit = s
380 if s >= sLimit {
381 goto emitRemainder
382 }
383 cv = load64(src, s)
384 continue
385 }
386 387 if uint32(cv) == load32(src, candidate) {
388 break
389 }
390 candidate = int(table[hash2])
391 if uint32(cv>>8) == load32(src, candidate2) {
392 table[hash2] = uint16(s + 2)
393 candidate = candidate2
394 s++
395 break
396 }
397 table[hash2] = uint16(s + 2)
398 if uint32(cv>>16) == load32(src, candidate) {
399 s += 2
400 break
401 }
402 403 cv = load64(src, nextS)
404 s = nextS
405 }
406 407 // Extend backwards.
408 // The top bytes will be rechecked to get the full match.
409 for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
410 candidate--
411 s--
412 }
413 414 // Bail if we exceed the maximum size.
415 if d+(s-nextEmit) > dstLimit {
416 return 0
417 }
418 419 // A 4-byte match has been found. We'll later see if more than 4 bytes
420 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
421 // them as literal bytes.
422 423 d += emitLiteral(dst[d:], src[nextEmit:s])
424 425 // Call emitCopy, and then see if another emitCopy could be our next
426 // move. Repeat until we find no match for the input immediately after
427 // what was consumed by the last emitCopy call.
428 //
429 // If we exit this loop normally then we need to call emitLiteral next,
430 // though we don't yet know how big the literal will be. We handle that
431 // by proceeding to the next iteration of the main loop. We also can
432 // exit this loop via goto if we get close to exhausting the input.
433 for {
434 // Invariant: we have a 4-byte match at s, and no need to emit any
435 // literal bytes prior to s.
436 base := s
437 repeat = base - candidate
438 439 // Extend the 4-byte match as long as possible.
440 s += 4
441 candidate += 4
442 for s <= len(src)-8 {
443 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
444 s += bits.TrailingZeros64(diff) >> 3
445 break
446 }
447 s += 8
448 candidate += 8
449 }
450 451 d += emitCopy(dst[d:], repeat, s-base)
452 if debug {
453 // Validate match.
454 if s <= candidate {
455 panic("s <= candidate")
456 }
457 a := src[base:s]
458 b := src[base-repeat : base-repeat+(s-base)]
459 if !bytes.Equal(a, b) {
460 panic("mismatch")
461 }
462 }
463 464 nextEmit = s
465 if s >= sLimit {
466 goto emitRemainder
467 }
468 469 if d > dstLimit {
470 // Do we have space for more, if not bail.
471 return 0
472 }
473 // Check for an immediate match, otherwise start search at s+1
474 x := load64(src, s-2)
475 m2Hash := hash6(x, tableBits)
476 currHash := hash6(x>>16, tableBits)
477 candidate = int(table[currHash])
478 table[m2Hash] = uint16(s - 2)
479 table[currHash] = uint16(s)
480 if debug && s == candidate {
481 panic("s == candidate")
482 }
483 if uint32(x>>16) != load32(src, candidate) {
484 cv = load64(src, s+1)
485 s++
486 break
487 }
488 }
489 }
490 491 emitRemainder:
492 if nextEmit < len(src) {
493 // Bail if we exceed the maximum size.
494 if d+len(src)-nextEmit > dstLimit {
495 return 0
496 }
497 d += emitLiteral(dst[d:], src[nextEmit:])
498 }
499 return d
500 }
501 502 func encodeBlockSnappyGo(dst, src []byte) (d int) {
503 // Initialize the hash table.
504 const (
505 tableBits = 14
506 maxTableSize = 1 << tableBits
507 )
508 var table [maxTableSize]uint32
509 510 // sLimit is when to stop looking for offset/length copies. The inputMargin
511 // lets us use a fast path for emitLiteral in the main loop, while we are
512 // looking for copies.
513 sLimit := len(src) - inputMargin
514 515 // Bail if we can't compress to at least this.
516 dstLimit := len(src) - len(src)>>5 - 5
517 518 // nextEmit is where in src the next emitLiteral should start from.
519 nextEmit := 0
520 521 // The encoded form must start with a literal, as there are no previous
522 // bytes to copy, so we start looking for hash matches at s == 1.
523 s := 1
524 cv := load64(src, s)
525 526 // We search for a repeat at -1, but don't output repeats when nextEmit == 0
527 repeat := 1
528 529 for {
530 candidate := 0
531 for {
532 // Next src position to check
533 nextS := s + (s-nextEmit)>>6 + 4
534 if nextS > sLimit {
535 goto emitRemainder
536 }
537 hash0 := hash6(cv, tableBits)
538 hash1 := hash6(cv>>8, tableBits)
539 candidate = int(table[hash0])
540 candidate2 := int(table[hash1])
541 table[hash0] = uint32(s)
542 table[hash1] = uint32(s + 1)
543 hash2 := hash6(cv>>16, tableBits)
544 545 // Check repeat at offset checkRep.
546 const checkRep = 1
547 if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
548 base := s + checkRep
549 // Extend back
550 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
551 i--
552 base--
553 }
554 // Bail if we exceed the maximum size.
555 if d+(base-nextEmit) > dstLimit {
556 return 0
557 }
558 559 d += emitLiteral(dst[d:], src[nextEmit:base])
560 561 // Extend forward
562 candidate := s - repeat + 4 + checkRep
563 s += 4 + checkRep
564 for s <= sLimit {
565 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
566 s += bits.TrailingZeros64(diff) >> 3
567 break
568 }
569 s += 8
570 candidate += 8
571 }
572 573 d += emitCopyNoRepeat(dst[d:], repeat, s-base)
574 nextEmit = s
575 if s >= sLimit {
576 goto emitRemainder
577 }
578 579 cv = load64(src, s)
580 continue
581 }
582 583 if uint32(cv) == load32(src, candidate) {
584 break
585 }
586 candidate = int(table[hash2])
587 if uint32(cv>>8) == load32(src, candidate2) {
588 table[hash2] = uint32(s + 2)
589 candidate = candidate2
590 s++
591 break
592 }
593 table[hash2] = uint32(s + 2)
594 if uint32(cv>>16) == load32(src, candidate) {
595 s += 2
596 break
597 }
598 599 cv = load64(src, nextS)
600 s = nextS
601 }
602 603 // Extend backwards
604 for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
605 candidate--
606 s--
607 }
608 609 // Bail if we exceed the maximum size.
610 if d+(s-nextEmit) > dstLimit {
611 return 0
612 }
613 614 // A 4-byte match has been found. We'll later see if more than 4 bytes
615 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
616 // them as literal bytes.
617 618 d += emitLiteral(dst[d:], src[nextEmit:s])
619 620 // Call emitCopy, and then see if another emitCopy could be our next
621 // move. Repeat until we find no match for the input immediately after
622 // what was consumed by the last emitCopy call.
623 //
624 // If we exit this loop normally then we need to call emitLiteral next,
625 // though we don't yet know how big the literal will be. We handle that
626 // by proceeding to the next iteration of the main loop. We also can
627 // exit this loop via goto if we get close to exhausting the input.
628 for {
629 // Invariant: we have a 4-byte match at s, and no need to emit any
630 // literal bytes prior to s.
631 base := s
632 repeat = base - candidate
633 634 // Extend the 4-byte match as long as possible.
635 s += 4
636 candidate += 4
637 for s <= len(src)-8 {
638 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
639 s += bits.TrailingZeros64(diff) >> 3
640 break
641 }
642 s += 8
643 candidate += 8
644 }
645 646 d += emitCopyNoRepeat(dst[d:], repeat, s-base)
647 if false {
648 // Validate match.
649 a := src[base:s]
650 b := src[base-repeat : base-repeat+(s-base)]
651 if !bytes.Equal(a, b) {
652 panic("mismatch")
653 }
654 }
655 656 nextEmit = s
657 if s >= sLimit {
658 goto emitRemainder
659 }
660 661 if d > dstLimit {
662 // Do we have space for more, if not bail.
663 return 0
664 }
665 // Check for an immediate match, otherwise start search at s+1
666 x := load64(src, s-2)
667 m2Hash := hash6(x, tableBits)
668 currHash := hash6(x>>16, tableBits)
669 candidate = int(table[currHash])
670 table[m2Hash] = uint32(s - 2)
671 table[currHash] = uint32(s)
672 if uint32(x>>16) != load32(src, candidate) {
673 cv = load64(src, s+1)
674 s++
675 break
676 }
677 }
678 }
679 680 emitRemainder:
681 if nextEmit < len(src) {
682 // Bail if we exceed the maximum size.
683 if d+len(src)-nextEmit > dstLimit {
684 return 0
685 }
686 d += emitLiteral(dst[d:], src[nextEmit:])
687 }
688 return d
689 }
690 691 // encodeBlockSnappyGo64K is a special version of encodeBlockSnappyGo for sizes <64KB
692 func encodeBlockSnappyGo64K(dst, src []byte) (d int) {
693 // Initialize the hash table.
694 const (
695 tableBits = 14
696 maxTableSize = 1 << tableBits
697 )
698 699 var table [maxTableSize]uint16
700 701 // sLimit is when to stop looking for offset/length copies. The inputMargin
702 // lets us use a fast path for emitLiteral in the main loop, while we are
703 // looking for copies.
704 sLimit := len(src) - inputMargin
705 706 // Bail if we can't compress to at least this.
707 dstLimit := len(src) - len(src)>>5 - 5
708 709 // nextEmit is where in src the next emitLiteral should start from.
710 nextEmit := 0
711 712 // The encoded form must start with a literal, as there are no previous
713 // bytes to copy, so we start looking for hash matches at s == 1.
714 s := 1
715 cv := load64(src, s)
716 717 // We search for a repeat at -1, but don't output repeats when nextEmit == 0
718 repeat := 1
719 720 for {
721 candidate := 0
722 for {
723 // Next src position to check
724 nextS := s + (s-nextEmit)>>5 + 4
725 if nextS > sLimit {
726 goto emitRemainder
727 }
728 hash0 := hash6(cv, tableBits)
729 hash1 := hash6(cv>>8, tableBits)
730 candidate = int(table[hash0])
731 candidate2 := int(table[hash1])
732 table[hash0] = uint16(s)
733 table[hash1] = uint16(s + 1)
734 hash2 := hash6(cv>>16, tableBits)
735 736 // Check repeat at offset checkRep.
737 const checkRep = 1
738 if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
739 base := s + checkRep
740 // Extend back
741 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
742 i--
743 base--
744 }
745 // Bail if we exceed the maximum size.
746 if d+(base-nextEmit) > dstLimit {
747 return 0
748 }
749 750 d += emitLiteral(dst[d:], src[nextEmit:base])
751 752 // Extend forward
753 candidate := s - repeat + 4 + checkRep
754 s += 4 + checkRep
755 for s <= sLimit {
756 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
757 s += bits.TrailingZeros64(diff) >> 3
758 break
759 }
760 s += 8
761 candidate += 8
762 }
763 764 d += emitCopyNoRepeat(dst[d:], repeat, s-base)
765 nextEmit = s
766 if s >= sLimit {
767 goto emitRemainder
768 }
769 770 cv = load64(src, s)
771 continue
772 }
773 774 if uint32(cv) == load32(src, candidate) {
775 break
776 }
777 candidate = int(table[hash2])
778 if uint32(cv>>8) == load32(src, candidate2) {
779 table[hash2] = uint16(s + 2)
780 candidate = candidate2
781 s++
782 break
783 }
784 table[hash2] = uint16(s + 2)
785 if uint32(cv>>16) == load32(src, candidate) {
786 s += 2
787 break
788 }
789 790 cv = load64(src, nextS)
791 s = nextS
792 }
793 794 // Extend backwards
795 for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
796 candidate--
797 s--
798 }
799 800 // Bail if we exceed the maximum size.
801 if d+(s-nextEmit) > dstLimit {
802 return 0
803 }
804 805 // A 4-byte match has been found. We'll later see if more than 4 bytes
806 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
807 // them as literal bytes.
808 809 d += emitLiteral(dst[d:], src[nextEmit:s])
810 811 // Call emitCopy, and then see if another emitCopy could be our next
812 // move. Repeat until we find no match for the input immediately after
813 // what was consumed by the last emitCopy call.
814 //
815 // If we exit this loop normally then we need to call emitLiteral next,
816 // though we don't yet know how big the literal will be. We handle that
817 // by proceeding to the next iteration of the main loop. We also can
818 // exit this loop via goto if we get close to exhausting the input.
819 for {
820 // Invariant: we have a 4-byte match at s, and no need to emit any
821 // literal bytes prior to s.
822 base := s
823 repeat = base - candidate
824 825 // Extend the 4-byte match as long as possible.
826 s += 4
827 candidate += 4
828 for s <= len(src)-8 {
829 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
830 s += bits.TrailingZeros64(diff) >> 3
831 break
832 }
833 s += 8
834 candidate += 8
835 }
836 837 d += emitCopyNoRepeat(dst[d:], repeat, s-base)
838 if false {
839 // Validate match.
840 a := src[base:s]
841 b := src[base-repeat : base-repeat+(s-base)]
842 if !bytes.Equal(a, b) {
843 panic("mismatch")
844 }
845 }
846 847 nextEmit = s
848 if s >= sLimit {
849 goto emitRemainder
850 }
851 852 if d > dstLimit {
853 // Do we have space for more, if not bail.
854 return 0
855 }
856 // Check for an immediate match, otherwise start search at s+1
857 x := load64(src, s-2)
858 m2Hash := hash6(x, tableBits)
859 currHash := hash6(x>>16, tableBits)
860 candidate = int(table[currHash])
861 table[m2Hash] = uint16(s - 2)
862 table[currHash] = uint16(s)
863 if uint32(x>>16) != load32(src, candidate) {
864 cv = load64(src, s+1)
865 s++
866 break
867 }
868 }
869 }
870 871 emitRemainder:
872 if nextEmit < len(src) {
873 // Bail if we exceed the maximum size.
874 if d+len(src)-nextEmit > dstLimit {
875 return 0
876 }
877 d += emitLiteral(dst[d:], src[nextEmit:])
878 }
879 return d
880 }
881 882 // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
883 // assumes that the varint-encoded length of the decompressed bytes has already
884 // been written.
885 //
886 // It also assumes that:
887 //
888 // len(dst) >= MaxEncodedLen(len(src)) &&
889 // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
890 func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) {
891 // Initialize the hash table.
892 const (
893 tableBits = 14
894 maxTableSize = 1 << tableBits
895 maxAhead = 8 // maximum bytes ahead without checking sLimit
896 897 debug = false
898 )
899 dict.initFast()
900 901 var table [maxTableSize]uint32
902 903 // sLimit is when to stop looking for offset/length copies. The inputMargin
904 // lets us use a fast path for emitLiteral in the main loop, while we are
905 // looking for copies.
906 sLimit := min(len(src)-inputMargin, MaxDictSrcOffset-maxAhead)
907 908 // Bail if we can't compress to at least this.
909 dstLimit := len(src) - len(src)>>5 - 5
910 911 // nextEmit is where in src the next emitLiteral should start from.
912 nextEmit := 0
913 914 // The encoded form can start with a dict entry (copy or repeat).
915 s := 0
916 917 // Convert dict repeat to offset
918 repeat := len(dict.dict) - dict.repeat
919 cv := load64(src, 0)
920 921 // While in dict
922 searchDict:
923 for {
924 // Next src position to check
925 nextS := s + (s-nextEmit)>>6 + 4
926 hash0 := hash6(cv, tableBits)
927 hash1 := hash6(cv>>8, tableBits)
928 if nextS > sLimit {
929 if debug {
930 fmt.Println("slimit reached", s, nextS)
931 }
932 break searchDict
933 }
934 candidateDict := int(dict.fastTable[hash0])
935 candidateDict2 := int(dict.fastTable[hash1])
936 candidate2 := int(table[hash1])
937 candidate := int(table[hash0])
938 table[hash0] = uint32(s)
939 table[hash1] = uint32(s + 1)
940 hash2 := hash6(cv>>16, tableBits)
941 942 // Check repeat at offset checkRep.
943 const checkRep = 1
944 945 if repeat > s {
946 candidate := len(dict.dict) - repeat + s
947 if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) {
948 // Extend back
949 base := s
950 for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
951 i--
952 base--
953 }
954 // Bail if we exceed the maximum size.
955 if d+(base-nextEmit) > dstLimit {
956 return 0
957 }
958 959 d += emitLiteral(dst[d:], src[nextEmit:base])
960 if debug && nextEmit != base {
961 fmt.Println("emitted ", base-nextEmit, "literals")
962 }
963 s += 4
964 candidate += 4
965 for candidate < len(dict.dict)-8 && s <= len(src)-8 {
966 if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
967 s += bits.TrailingZeros64(diff) >> 3
968 break
969 }
970 s += 8
971 candidate += 8
972 }
973 d += emitRepeat(dst[d:], repeat, s-base)
974 if debug {
975 fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
976 }
977 nextEmit = s
978 if s >= sLimit {
979 break searchDict
980 }
981 cv = load64(src, s)
982 continue
983 }
984 } else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
985 base := s + checkRep
986 // Extend back
987 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
988 i--
989 base--
990 }
991 d += emitLiteral(dst[d:], src[nextEmit:base])
992 if debug && nextEmit != base {
993 fmt.Println("emitted ", base-nextEmit, "literals")
994 }
995 996 // Extend forward
997 candidate := s - repeat + 4 + checkRep
998 s += 4 + checkRep
999 for s <= sLimit {
1000 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
1001 s += bits.TrailingZeros64(diff) >> 3
1002 break
1003 }
1004 s += 8
1005 candidate += 8
1006 }
1007 if debug {
1008 // Validate match.
1009 if s <= candidate {
1010 panic("s <= candidate")
1011 }
1012 a := src[base:s]
1013 b := src[base-repeat : base-repeat+(s-base)]
1014 if !bytes.Equal(a, b) {
1015 panic("mismatch")
1016 }
1017 }
1018 1019 if nextEmit > 0 {
1020 // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
1021 d += emitRepeat(dst[d:], repeat, s-base)
1022 } else {
1023 // First match, cannot be repeat.
1024 d += emitCopy(dst[d:], repeat, s-base)
1025 }
1026 1027 nextEmit = s
1028 if s >= sLimit {
1029 break searchDict
1030 }
1031 if debug {
1032 fmt.Println("emitted reg repeat", s-base, "s:", s)
1033 }
1034 cv = load64(src, s)
1035 continue searchDict
1036 }
1037 if s == 0 {
1038 cv = load64(src, nextS)
1039 s = nextS
1040 continue searchDict
1041 }
1042 // Start with table. These matches will always be closer.
1043 if uint32(cv) == load32(src, candidate) {
1044 goto emitMatch
1045 }
1046 candidate = int(table[hash2])
1047 if uint32(cv>>8) == load32(src, candidate2) {
1048 table[hash2] = uint32(s + 2)
1049 candidate = candidate2
1050 s++
1051 goto emitMatch
1052 }
1053 1054 // Check dict. Dicts have longer offsets, so we want longer matches.
1055 if cv == load64(dict.dict, candidateDict) {
1056 table[hash2] = uint32(s + 2)
1057 goto emitDict
1058 }
1059 1060 candidateDict = int(dict.fastTable[hash2])
1061 // Check if upper 7 bytes match
1062 if candidateDict2 >= 1 {
1063 if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) {
1064 table[hash2] = uint32(s + 2)
1065 candidateDict = candidateDict2
1066 s++
1067 goto emitDict
1068 }
1069 }
1070 1071 table[hash2] = uint32(s + 2)
1072 if uint32(cv>>16) == load32(src, candidate) {
1073 s += 2
1074 goto emitMatch
1075 }
1076 if candidateDict >= 2 {
1077 // Check if upper 6 bytes match
1078 if cv^load64(dict.dict, candidateDict-2) < (1 << 16) {
1079 s += 2
1080 goto emitDict
1081 }
1082 }
1083 1084 cv = load64(src, nextS)
1085 s = nextS
1086 continue searchDict
1087 1088 emitDict:
1089 {
1090 if debug {
1091 if load32(dict.dict, candidateDict) != load32(src, s) {
1092 panic("dict emit mismatch")
1093 }
1094 }
1095 // Extend backwards.
1096 // The top bytes will be rechecked to get the full match.
1097 for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] {
1098 candidateDict--
1099 s--
1100 }
1101 1102 // Bail if we exceed the maximum size.
1103 if d+(s-nextEmit) > dstLimit {
1104 return 0
1105 }
1106 1107 // A 4-byte match has been found. We'll later see if more than 4 bytes
1108 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
1109 // them as literal bytes.
1110 1111 d += emitLiteral(dst[d:], src[nextEmit:s])
1112 if debug && nextEmit != s {
1113 fmt.Println("emitted ", s-nextEmit, "literals")
1114 }
1115 {
1116 // Invariant: we have a 4-byte match at s, and no need to emit any
1117 // literal bytes prior to s.
1118 base := s
1119 repeat = s + (len(dict.dict)) - candidateDict
1120 1121 // Extend the 4-byte match as long as possible.
1122 s += 4
1123 candidateDict += 4
1124 for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 {
1125 if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 {
1126 s += bits.TrailingZeros64(diff) >> 3
1127 break
1128 }
1129 s += 8
1130 candidateDict += 8
1131 }
1132 1133 // Matches longer than 64 are split.
1134 if s <= sLimit || s-base < 8 {
1135 d += emitCopy(dst[d:], repeat, s-base)
1136 } else {
1137 // Split to ensure we don't start a copy within next block
1138 d += emitCopy(dst[d:], repeat, 4)
1139 d += emitRepeat(dst[d:], repeat, s-base-4)
1140 }
1141 if false {
1142 // Validate match.
1143 if s <= candidate {
1144 panic("s <= candidate")
1145 }
1146 a := src[base:s]
1147 b := dict.dict[base-repeat : base-repeat+(s-base)]
1148 if !bytes.Equal(a, b) {
1149 panic("mismatch")
1150 }
1151 }
1152 if debug {
1153 fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s)
1154 }
1155 nextEmit = s
1156 if s >= sLimit {
1157 break searchDict
1158 }
1159 1160 if d > dstLimit {
1161 // Do we have space for more, if not bail.
1162 return 0
1163 }
1164 1165 // Index and continue loop to try new candidate.
1166 x := load64(src, s-2)
1167 m2Hash := hash6(x, tableBits)
1168 currHash := hash6(x>>8, tableBits)
1169 table[m2Hash] = uint32(s - 2)
1170 table[currHash] = uint32(s - 1)
1171 cv = load64(src, s)
1172 }
1173 continue
1174 }
1175 emitMatch:
1176 1177 // Extend backwards.
1178 // The top bytes will be rechecked to get the full match.
1179 for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
1180 candidate--
1181 s--
1182 }
1183 1184 // Bail if we exceed the maximum size.
1185 if d+(s-nextEmit) > dstLimit {
1186 return 0
1187 }
1188 1189 // A 4-byte match has been found. We'll later see if more than 4 bytes
1190 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
1191 // them as literal bytes.
1192 1193 d += emitLiteral(dst[d:], src[nextEmit:s])
1194 if debug && nextEmit != s {
1195 fmt.Println("emitted ", s-nextEmit, "literals")
1196 }
1197 // Call emitCopy, and then see if another emitCopy could be our next
1198 // move. Repeat until we find no match for the input immediately after
1199 // what was consumed by the last emitCopy call.
1200 //
1201 // If we exit this loop normally then we need to call emitLiteral next,
1202 // though we don't yet know how big the literal will be. We handle that
1203 // by proceeding to the next iteration of the main loop. We also can
1204 // exit this loop via goto if we get close to exhausting the input.
1205 for {
1206 // Invariant: we have a 4-byte match at s, and no need to emit any
1207 // literal bytes prior to s.
1208 base := s
1209 repeat = base - candidate
1210 1211 // Extend the 4-byte match as long as possible.
1212 s += 4
1213 candidate += 4
1214 for s <= len(src)-8 {
1215 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
1216 s += bits.TrailingZeros64(diff) >> 3
1217 break
1218 }
1219 s += 8
1220 candidate += 8
1221 }
1222 1223 d += emitCopy(dst[d:], repeat, s-base)
1224 if debug {
1225 // Validate match.
1226 if s <= candidate {
1227 panic("s <= candidate")
1228 }
1229 a := src[base:s]
1230 b := src[base-repeat : base-repeat+(s-base)]
1231 if !bytes.Equal(a, b) {
1232 panic("mismatch")
1233 }
1234 }
1235 if debug {
1236 fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
1237 }
1238 nextEmit = s
1239 if s >= sLimit {
1240 break searchDict
1241 }
1242 1243 if d > dstLimit {
1244 // Do we have space for more, if not bail.
1245 return 0
1246 }
1247 // Check for an immediate match, otherwise start search at s+1
1248 x := load64(src, s-2)
1249 m2Hash := hash6(x, tableBits)
1250 currHash := hash6(x>>16, tableBits)
1251 candidate = int(table[currHash])
1252 table[m2Hash] = uint32(s - 2)
1253 table[currHash] = uint32(s)
1254 if debug && s == candidate {
1255 panic("s == candidate")
1256 }
1257 if uint32(x>>16) != load32(src, candidate) {
1258 cv = load64(src, s+1)
1259 s++
1260 break
1261 }
1262 }
1263 }
1264 1265 // Search without dict:
1266 if repeat > s {
1267 repeat = 0
1268 }
1269 1270 // No more dict
1271 sLimit = len(src) - inputMargin
1272 if s >= sLimit {
1273 goto emitRemainder
1274 }
1275 if debug {
1276 fmt.Println("non-dict matching at", s, "repeat:", repeat)
1277 }
1278 cv = load64(src, s)
1279 if debug {
1280 fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
1281 }
1282 for {
1283 candidate := 0
1284 for {
1285 // Next src position to check
1286 nextS := s + (s-nextEmit)>>6 + 4
1287 if nextS > sLimit {
1288 goto emitRemainder
1289 }
1290 hash0 := hash6(cv, tableBits)
1291 hash1 := hash6(cv>>8, tableBits)
1292 candidate = int(table[hash0])
1293 candidate2 := int(table[hash1])
1294 table[hash0] = uint32(s)
1295 table[hash1] = uint32(s + 1)
1296 hash2 := hash6(cv>>16, tableBits)
1297 1298 // Check repeat at offset checkRep.
1299 const checkRep = 1
1300 if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
1301 base := s + checkRep
1302 // Extend back
1303 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
1304 i--
1305 base--
1306 }
1307 // Bail if we exceed the maximum size.
1308 if d+(base-nextEmit) > dstLimit {
1309 return 0
1310 }
1311 1312 d += emitLiteral(dst[d:], src[nextEmit:base])
1313 if debug && nextEmit != base {
1314 fmt.Println("emitted ", base-nextEmit, "literals")
1315 }
1316 // Extend forward
1317 candidate := s - repeat + 4 + checkRep
1318 s += 4 + checkRep
1319 for s <= sLimit {
1320 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
1321 s += bits.TrailingZeros64(diff) >> 3
1322 break
1323 }
1324 s += 8
1325 candidate += 8
1326 }
1327 if debug {
1328 // Validate match.
1329 if s <= candidate {
1330 panic("s <= candidate")
1331 }
1332 a := src[base:s]
1333 b := src[base-repeat : base-repeat+(s-base)]
1334 if !bytes.Equal(a, b) {
1335 panic("mismatch")
1336 }
1337 }
1338 if nextEmit > 0 {
1339 // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
1340 d += emitRepeat(dst[d:], repeat, s-base)
1341 } else {
1342 // First match, cannot be repeat.
1343 d += emitCopy(dst[d:], repeat, s-base)
1344 }
1345 if debug {
1346 fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s)
1347 }
1348 nextEmit = s
1349 if s >= sLimit {
1350 goto emitRemainder
1351 }
1352 1353 cv = load64(src, s)
1354 continue
1355 }
1356 1357 if uint32(cv) == load32(src, candidate) {
1358 break
1359 }
1360 candidate = int(table[hash2])
1361 if uint32(cv>>8) == load32(src, candidate2) {
1362 table[hash2] = uint32(s + 2)
1363 candidate = candidate2
1364 s++
1365 break
1366 }
1367 table[hash2] = uint32(s + 2)
1368 if uint32(cv>>16) == load32(src, candidate) {
1369 s += 2
1370 break
1371 }
1372 1373 cv = load64(src, nextS)
1374 s = nextS
1375 }
1376 1377 // Extend backwards.
1378 // The top bytes will be rechecked to get the full match.
1379 for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
1380 candidate--
1381 s--
1382 }
1383 1384 // Bail if we exceed the maximum size.
1385 if d+(s-nextEmit) > dstLimit {
1386 return 0
1387 }
1388 1389 // A 4-byte match has been found. We'll later see if more than 4 bytes
1390 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
1391 // them as literal bytes.
1392 1393 d += emitLiteral(dst[d:], src[nextEmit:s])
1394 if debug && nextEmit != s {
1395 fmt.Println("emitted ", s-nextEmit, "literals")
1396 }
1397 // Call emitCopy, and then see if another emitCopy could be our next
1398 // move. Repeat until we find no match for the input immediately after
1399 // what was consumed by the last emitCopy call.
1400 //
1401 // If we exit this loop normally then we need to call emitLiteral next,
1402 // though we don't yet know how big the literal will be. We handle that
1403 // by proceeding to the next iteration of the main loop. We also can
1404 // exit this loop via goto if we get close to exhausting the input.
1405 for {
1406 // Invariant: we have a 4-byte match at s, and no need to emit any
1407 // literal bytes prior to s.
1408 base := s
1409 repeat = base - candidate
1410 1411 // Extend the 4-byte match as long as possible.
1412 s += 4
1413 candidate += 4
1414 for s <= len(src)-8 {
1415 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
1416 s += bits.TrailingZeros64(diff) >> 3
1417 break
1418 }
1419 s += 8
1420 candidate += 8
1421 }
1422 1423 d += emitCopy(dst[d:], repeat, s-base)
1424 if debug {
1425 // Validate match.
1426 if s <= candidate {
1427 panic("s <= candidate")
1428 }
1429 a := src[base:s]
1430 b := src[base-repeat : base-repeat+(s-base)]
1431 if !bytes.Equal(a, b) {
1432 panic("mismatch")
1433 }
1434 }
1435 if debug {
1436 fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
1437 }
1438 nextEmit = s
1439 if s >= sLimit {
1440 goto emitRemainder
1441 }
1442 1443 if d > dstLimit {
1444 // Do we have space for more, if not bail.
1445 return 0
1446 }
1447 // Check for an immediate match, otherwise start search at s+1
1448 x := load64(src, s-2)
1449 m2Hash := hash6(x, tableBits)
1450 currHash := hash6(x>>16, tableBits)
1451 candidate = int(table[currHash])
1452 table[m2Hash] = uint32(s - 2)
1453 table[currHash] = uint32(s)
1454 if debug && s == candidate {
1455 panic("s == candidate")
1456 }
1457 if uint32(x>>16) != load32(src, candidate) {
1458 cv = load64(src, s+1)
1459 s++
1460 break
1461 }
1462 }
1463 }
1464 1465 emitRemainder:
1466 if nextEmit < len(src) {
1467 // Bail if we exceed the maximum size.
1468 if d+len(src)-nextEmit > dstLimit {
1469 return 0
1470 }
1471 d += emitLiteral(dst[d:], src[nextEmit:])
1472 if debug && nextEmit != s {
1473 fmt.Println("emitted ", len(src)-nextEmit, "literals")
1474 }
1475 }
1476 return d
1477 }
1478