encode_all.go raw

   1  // Copyright 2016 The Snappy-Go Authors. All rights reserved.
   2  // Copyright (c) 2019 Klaus Post. All rights reserved.
   3  // Use of this source code is governed by a BSD-style
   4  // license that can be found in the LICENSE file.
   5  
   6  package s2
   7  
   8  import (
   9  	"bytes"
  10  	"encoding/binary"
  11  	"fmt"
  12  	"math/bits"
  13  
  14  	"github.com/klauspost/compress/internal/le"
  15  )
  16  
  17  func load32(b []byte, i int) uint32 {
  18  	return le.Load32(b, i)
  19  }
  20  
  21  func load64(b []byte, i int) uint64 {
  22  	return le.Load64(b, i)
  23  }
  24  
  25  // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
  26  // Preferably h should be a constant and should always be <64.
  27  func hash6(u uint64, h uint8) uint32 {
  28  	const prime6bytes = 227718039650203
  29  	return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
  30  }
  31  
  32  func encodeGo(dst, src []byte) []byte {
  33  	if n := MaxEncodedLen(len(src)); n < 0 {
  34  		panic(ErrTooLarge)
  35  	} else if len(dst) < n {
  36  		dst = make([]byte, n)
  37  	}
  38  
  39  	// The block starts with the varint-encoded length of the decompressed bytes.
  40  	d := binary.PutUvarint(dst, uint64(len(src)))
  41  
  42  	if len(src) == 0 {
  43  		return dst[:d]
  44  	}
  45  	if len(src) < minNonLiteralBlockSize {
  46  		d += emitLiteral(dst[d:], src)
  47  		return dst[:d]
  48  	}
  49  	var n int
  50  	if len(src) < 64<<10 {
  51  		n = encodeBlockGo64K(dst[d:], src)
  52  	} else {
  53  		n = encodeBlockGo(dst[d:], src)
  54  	}
  55  	if n > 0 {
  56  		d += n
  57  		return dst[:d]
  58  	}
  59  	// Not compressible
  60  	d += emitLiteral(dst[d:], src)
  61  	return dst[:d]
  62  }
  63  
  64  // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
  65  // assumes that the varint-encoded length of the decompressed bytes has already
  66  // been written.
  67  //
  68  // It also assumes that:
  69  //
  70  //	len(dst) >= MaxEncodedLen(len(src)) &&
  71  //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  72  func encodeBlockGo(dst, src []byte) (d int) {
  73  	// Initialize the hash table.
  74  	const (
  75  		tableBits    = 14
  76  		maxTableSize = 1 << tableBits
  77  
  78  		debug = false
  79  	)
  80  	var table [maxTableSize]uint32
  81  
  82  	// sLimit is when to stop looking for offset/length copies. The inputMargin
  83  	// lets us use a fast path for emitLiteral in the main loop, while we are
  84  	// looking for copies.
  85  	sLimit := len(src) - inputMargin
  86  
  87  	// Bail if we can't compress to at least this.
  88  	dstLimit := len(src) - len(src)>>5 - 5
  89  
  90  	// nextEmit is where in src the next emitLiteral should start from.
  91  	nextEmit := 0
  92  
  93  	// The encoded form must start with a literal, as there are no previous
  94  	// bytes to copy, so we start looking for hash matches at s == 1.
  95  	s := 1
  96  	cv := load64(src, s)
  97  
  98  	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
  99  	repeat := 1
 100  
 101  	for {
 102  		candidate := 0
 103  		for {
 104  			// Next src position to check
 105  			nextS := s + (s-nextEmit)>>6 + 4
 106  			if nextS > sLimit {
 107  				goto emitRemainder
 108  			}
 109  			hash0 := hash6(cv, tableBits)
 110  			hash1 := hash6(cv>>8, tableBits)
 111  			candidate = int(table[hash0])
 112  			candidate2 := int(table[hash1])
 113  			table[hash0] = uint32(s)
 114  			table[hash1] = uint32(s + 1)
 115  			hash2 := hash6(cv>>16, tableBits)
 116  
 117  			// Check repeat at offset checkRep.
 118  			const checkRep = 1
 119  			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
 120  				base := s + checkRep
 121  				// Extend back
 122  				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
 123  					i--
 124  					base--
 125  				}
 126  
 127  				// Bail if we exceed the maximum size.
 128  				if d+(base-nextEmit) > dstLimit {
 129  					return 0
 130  				}
 131  
 132  				d += emitLiteral(dst[d:], src[nextEmit:base])
 133  
 134  				// Extend forward
 135  				candidate := s - repeat + 4 + checkRep
 136  				s += 4 + checkRep
 137  				for s <= sLimit {
 138  					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
 139  						s += bits.TrailingZeros64(diff) >> 3
 140  						break
 141  					}
 142  					s += 8
 143  					candidate += 8
 144  				}
 145  				if debug {
 146  					// Validate match.
 147  					if s <= candidate {
 148  						panic("s <= candidate")
 149  					}
 150  					a := src[base:s]
 151  					b := src[base-repeat : base-repeat+(s-base)]
 152  					if !bytes.Equal(a, b) {
 153  						panic("mismatch")
 154  					}
 155  				}
 156  				if nextEmit > 0 {
 157  					// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
 158  					d += emitRepeat(dst[d:], repeat, s-base)
 159  				} else {
 160  					// First match, cannot be repeat.
 161  					d += emitCopy(dst[d:], repeat, s-base)
 162  				}
 163  				nextEmit = s
 164  				if s >= sLimit {
 165  					goto emitRemainder
 166  				}
 167  				cv = load64(src, s)
 168  				continue
 169  			}
 170  
 171  			if uint32(cv) == load32(src, candidate) {
 172  				break
 173  			}
 174  			candidate = int(table[hash2])
 175  			if uint32(cv>>8) == load32(src, candidate2) {
 176  				table[hash2] = uint32(s + 2)
 177  				candidate = candidate2
 178  				s++
 179  				break
 180  			}
 181  			table[hash2] = uint32(s + 2)
 182  			if uint32(cv>>16) == load32(src, candidate) {
 183  				s += 2
 184  				break
 185  			}
 186  
 187  			cv = load64(src, nextS)
 188  			s = nextS
 189  		}
 190  
 191  		// Extend backwards.
 192  		// The top bytes will be rechecked to get the full match.
 193  		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
 194  			candidate--
 195  			s--
 196  		}
 197  
 198  		// Bail if we exceed the maximum size.
 199  		if d+(s-nextEmit) > dstLimit {
 200  			return 0
 201  		}
 202  
 203  		// A 4-byte match has been found. We'll later see if more than 4 bytes
 204  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
 205  		// them as literal bytes.
 206  
 207  		d += emitLiteral(dst[d:], src[nextEmit:s])
 208  
 209  		// Call emitCopy, and then see if another emitCopy could be our next
 210  		// move. Repeat until we find no match for the input immediately after
 211  		// what was consumed by the last emitCopy call.
 212  		//
 213  		// If we exit this loop normally then we need to call emitLiteral next,
 214  		// though we don't yet know how big the literal will be. We handle that
 215  		// by proceeding to the next iteration of the main loop. We also can
 216  		// exit this loop via goto if we get close to exhausting the input.
 217  		for {
 218  			// Invariant: we have a 4-byte match at s, and no need to emit any
 219  			// literal bytes prior to s.
 220  			base := s
 221  			repeat = base - candidate
 222  
 223  			// Extend the 4-byte match as long as possible.
 224  			s += 4
 225  			candidate += 4
 226  			for s <= len(src)-8 {
 227  				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
 228  					s += bits.TrailingZeros64(diff) >> 3
 229  					break
 230  				}
 231  				s += 8
 232  				candidate += 8
 233  			}
 234  
 235  			d += emitCopy(dst[d:], repeat, s-base)
 236  			if debug {
 237  				// Validate match.
 238  				if s <= candidate {
 239  					panic("s <= candidate")
 240  				}
 241  				a := src[base:s]
 242  				b := src[base-repeat : base-repeat+(s-base)]
 243  				if !bytes.Equal(a, b) {
 244  					panic("mismatch")
 245  				}
 246  			}
 247  
 248  			nextEmit = s
 249  			if s >= sLimit {
 250  				goto emitRemainder
 251  			}
 252  
 253  			if d > dstLimit {
 254  				// Do we have space for more, if not bail.
 255  				return 0
 256  			}
 257  			// Check for an immediate match, otherwise start search at s+1
 258  			x := load64(src, s-2)
 259  			m2Hash := hash6(x, tableBits)
 260  			currHash := hash6(x>>16, tableBits)
 261  			candidate = int(table[currHash])
 262  			table[m2Hash] = uint32(s - 2)
 263  			table[currHash] = uint32(s)
 264  			if debug && s == candidate {
 265  				panic("s == candidate")
 266  			}
 267  			if uint32(x>>16) != load32(src, candidate) {
 268  				cv = load64(src, s+1)
 269  				s++
 270  				break
 271  			}
 272  		}
 273  	}
 274  
 275  emitRemainder:
 276  	if nextEmit < len(src) {
 277  		// Bail if we exceed the maximum size.
 278  		if d+len(src)-nextEmit > dstLimit {
 279  			return 0
 280  		}
 281  		d += emitLiteral(dst[d:], src[nextEmit:])
 282  	}
 283  	return d
 284  }
 285  
 286  // encodeBlockGo64K is a specialized version for compressing blocks <= 64KB
 287  func encodeBlockGo64K(dst, src []byte) (d int) {
 288  	// Initialize the hash table.
 289  	const (
 290  		tableBits    = 14
 291  		maxTableSize = 1 << tableBits
 292  
 293  		debug = false
 294  	)
 295  
 296  	var table [maxTableSize]uint16
 297  
 298  	// sLimit is when to stop looking for offset/length copies. The inputMargin
 299  	// lets us use a fast path for emitLiteral in the main loop, while we are
 300  	// looking for copies.
 301  	sLimit := len(src) - inputMargin
 302  
 303  	// Bail if we can't compress to at least this.
 304  	dstLimit := len(src) - len(src)>>5 - 5
 305  
 306  	// nextEmit is where in src the next emitLiteral should start from.
 307  	nextEmit := 0
 308  
 309  	// The encoded form must start with a literal, as there are no previous
 310  	// bytes to copy, so we start looking for hash matches at s == 1.
 311  	s := 1
 312  	cv := load64(src, s)
 313  
 314  	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
 315  	repeat := 1
 316  
 317  	for {
 318  		candidate := 0
 319  		for {
 320  			// Next src position to check
 321  			nextS := s + (s-nextEmit)>>5 + 4
 322  			if nextS > sLimit {
 323  				goto emitRemainder
 324  			}
 325  			hash0 := hash6(cv, tableBits)
 326  			hash1 := hash6(cv>>8, tableBits)
 327  			candidate = int(table[hash0])
 328  			candidate2 := int(table[hash1])
 329  			table[hash0] = uint16(s)
 330  			table[hash1] = uint16(s + 1)
 331  			hash2 := hash6(cv>>16, tableBits)
 332  
 333  			// Check repeat at offset checkRep.
 334  			const checkRep = 1
 335  			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
 336  				base := s + checkRep
 337  				// Extend back
 338  				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
 339  					i--
 340  					base--
 341  				}
 342  
 343  				// Bail if we exceed the maximum size.
 344  				if d+(base-nextEmit) > dstLimit {
 345  					return 0
 346  				}
 347  
 348  				d += emitLiteral(dst[d:], src[nextEmit:base])
 349  
 350  				// Extend forward
 351  				candidate := s - repeat + 4 + checkRep
 352  				s += 4 + checkRep
 353  				for s <= sLimit {
 354  					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
 355  						s += bits.TrailingZeros64(diff) >> 3
 356  						break
 357  					}
 358  					s += 8
 359  					candidate += 8
 360  				}
 361  				if debug {
 362  					// Validate match.
 363  					if s <= candidate {
 364  						panic("s <= candidate")
 365  					}
 366  					a := src[base:s]
 367  					b := src[base-repeat : base-repeat+(s-base)]
 368  					if !bytes.Equal(a, b) {
 369  						panic("mismatch")
 370  					}
 371  				}
 372  				if nextEmit > 0 {
 373  					// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
 374  					d += emitRepeat(dst[d:], repeat, s-base)
 375  				} else {
 376  					// First match, cannot be repeat.
 377  					d += emitCopy(dst[d:], repeat, s-base)
 378  				}
 379  				nextEmit = s
 380  				if s >= sLimit {
 381  					goto emitRemainder
 382  				}
 383  				cv = load64(src, s)
 384  				continue
 385  			}
 386  
 387  			if uint32(cv) == load32(src, candidate) {
 388  				break
 389  			}
 390  			candidate = int(table[hash2])
 391  			if uint32(cv>>8) == load32(src, candidate2) {
 392  				table[hash2] = uint16(s + 2)
 393  				candidate = candidate2
 394  				s++
 395  				break
 396  			}
 397  			table[hash2] = uint16(s + 2)
 398  			if uint32(cv>>16) == load32(src, candidate) {
 399  				s += 2
 400  				break
 401  			}
 402  
 403  			cv = load64(src, nextS)
 404  			s = nextS
 405  		}
 406  
 407  		// Extend backwards.
 408  		// The top bytes will be rechecked to get the full match.
 409  		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
 410  			candidate--
 411  			s--
 412  		}
 413  
 414  		// Bail if we exceed the maximum size.
 415  		if d+(s-nextEmit) > dstLimit {
 416  			return 0
 417  		}
 418  
 419  		// A 4-byte match has been found. We'll later see if more than 4 bytes
 420  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
 421  		// them as literal bytes.
 422  
 423  		d += emitLiteral(dst[d:], src[nextEmit:s])
 424  
 425  		// Call emitCopy, and then see if another emitCopy could be our next
 426  		// move. Repeat until we find no match for the input immediately after
 427  		// what was consumed by the last emitCopy call.
 428  		//
 429  		// If we exit this loop normally then we need to call emitLiteral next,
 430  		// though we don't yet know how big the literal will be. We handle that
 431  		// by proceeding to the next iteration of the main loop. We also can
 432  		// exit this loop via goto if we get close to exhausting the input.
 433  		for {
 434  			// Invariant: we have a 4-byte match at s, and no need to emit any
 435  			// literal bytes prior to s.
 436  			base := s
 437  			repeat = base - candidate
 438  
 439  			// Extend the 4-byte match as long as possible.
 440  			s += 4
 441  			candidate += 4
 442  			for s <= len(src)-8 {
 443  				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
 444  					s += bits.TrailingZeros64(diff) >> 3
 445  					break
 446  				}
 447  				s += 8
 448  				candidate += 8
 449  			}
 450  
 451  			d += emitCopy(dst[d:], repeat, s-base)
 452  			if debug {
 453  				// Validate match.
 454  				if s <= candidate {
 455  					panic("s <= candidate")
 456  				}
 457  				a := src[base:s]
 458  				b := src[base-repeat : base-repeat+(s-base)]
 459  				if !bytes.Equal(a, b) {
 460  					panic("mismatch")
 461  				}
 462  			}
 463  
 464  			nextEmit = s
 465  			if s >= sLimit {
 466  				goto emitRemainder
 467  			}
 468  
 469  			if d > dstLimit {
 470  				// Do we have space for more, if not bail.
 471  				return 0
 472  			}
 473  			// Check for an immediate match, otherwise start search at s+1
 474  			x := load64(src, s-2)
 475  			m2Hash := hash6(x, tableBits)
 476  			currHash := hash6(x>>16, tableBits)
 477  			candidate = int(table[currHash])
 478  			table[m2Hash] = uint16(s - 2)
 479  			table[currHash] = uint16(s)
 480  			if debug && s == candidate {
 481  				panic("s == candidate")
 482  			}
 483  			if uint32(x>>16) != load32(src, candidate) {
 484  				cv = load64(src, s+1)
 485  				s++
 486  				break
 487  			}
 488  		}
 489  	}
 490  
 491  emitRemainder:
 492  	if nextEmit < len(src) {
 493  		// Bail if we exceed the maximum size.
 494  		if d+len(src)-nextEmit > dstLimit {
 495  			return 0
 496  		}
 497  		d += emitLiteral(dst[d:], src[nextEmit:])
 498  	}
 499  	return d
 500  }
 501  
 502  func encodeBlockSnappyGo(dst, src []byte) (d int) {
 503  	// Initialize the hash table.
 504  	const (
 505  		tableBits    = 14
 506  		maxTableSize = 1 << tableBits
 507  	)
 508  	var table [maxTableSize]uint32
 509  
 510  	// sLimit is when to stop looking for offset/length copies. The inputMargin
 511  	// lets us use a fast path for emitLiteral in the main loop, while we are
 512  	// looking for copies.
 513  	sLimit := len(src) - inputMargin
 514  
 515  	// Bail if we can't compress to at least this.
 516  	dstLimit := len(src) - len(src)>>5 - 5
 517  
 518  	// nextEmit is where in src the next emitLiteral should start from.
 519  	nextEmit := 0
 520  
 521  	// The encoded form must start with a literal, as there are no previous
 522  	// bytes to copy, so we start looking for hash matches at s == 1.
 523  	s := 1
 524  	cv := load64(src, s)
 525  
 526  	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
 527  	repeat := 1
 528  
 529  	for {
 530  		candidate := 0
 531  		for {
 532  			// Next src position to check
 533  			nextS := s + (s-nextEmit)>>6 + 4
 534  			if nextS > sLimit {
 535  				goto emitRemainder
 536  			}
 537  			hash0 := hash6(cv, tableBits)
 538  			hash1 := hash6(cv>>8, tableBits)
 539  			candidate = int(table[hash0])
 540  			candidate2 := int(table[hash1])
 541  			table[hash0] = uint32(s)
 542  			table[hash1] = uint32(s + 1)
 543  			hash2 := hash6(cv>>16, tableBits)
 544  
 545  			// Check repeat at offset checkRep.
 546  			const checkRep = 1
 547  			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
 548  				base := s + checkRep
 549  				// Extend back
 550  				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
 551  					i--
 552  					base--
 553  				}
 554  				// Bail if we exceed the maximum size.
 555  				if d+(base-nextEmit) > dstLimit {
 556  					return 0
 557  				}
 558  
 559  				d += emitLiteral(dst[d:], src[nextEmit:base])
 560  
 561  				// Extend forward
 562  				candidate := s - repeat + 4 + checkRep
 563  				s += 4 + checkRep
 564  				for s <= sLimit {
 565  					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
 566  						s += bits.TrailingZeros64(diff) >> 3
 567  						break
 568  					}
 569  					s += 8
 570  					candidate += 8
 571  				}
 572  
 573  				d += emitCopyNoRepeat(dst[d:], repeat, s-base)
 574  				nextEmit = s
 575  				if s >= sLimit {
 576  					goto emitRemainder
 577  				}
 578  
 579  				cv = load64(src, s)
 580  				continue
 581  			}
 582  
 583  			if uint32(cv) == load32(src, candidate) {
 584  				break
 585  			}
 586  			candidate = int(table[hash2])
 587  			if uint32(cv>>8) == load32(src, candidate2) {
 588  				table[hash2] = uint32(s + 2)
 589  				candidate = candidate2
 590  				s++
 591  				break
 592  			}
 593  			table[hash2] = uint32(s + 2)
 594  			if uint32(cv>>16) == load32(src, candidate) {
 595  				s += 2
 596  				break
 597  			}
 598  
 599  			cv = load64(src, nextS)
 600  			s = nextS
 601  		}
 602  
 603  		// Extend backwards
 604  		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
 605  			candidate--
 606  			s--
 607  		}
 608  
 609  		// Bail if we exceed the maximum size.
 610  		if d+(s-nextEmit) > dstLimit {
 611  			return 0
 612  		}
 613  
 614  		// A 4-byte match has been found. We'll later see if more than 4 bytes
 615  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
 616  		// them as literal bytes.
 617  
 618  		d += emitLiteral(dst[d:], src[nextEmit:s])
 619  
 620  		// Call emitCopy, and then see if another emitCopy could be our next
 621  		// move. Repeat until we find no match for the input immediately after
 622  		// what was consumed by the last emitCopy call.
 623  		//
 624  		// If we exit this loop normally then we need to call emitLiteral next,
 625  		// though we don't yet know how big the literal will be. We handle that
 626  		// by proceeding to the next iteration of the main loop. We also can
 627  		// exit this loop via goto if we get close to exhausting the input.
 628  		for {
 629  			// Invariant: we have a 4-byte match at s, and no need to emit any
 630  			// literal bytes prior to s.
 631  			base := s
 632  			repeat = base - candidate
 633  
 634  			// Extend the 4-byte match as long as possible.
 635  			s += 4
 636  			candidate += 4
 637  			for s <= len(src)-8 {
 638  				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
 639  					s += bits.TrailingZeros64(diff) >> 3
 640  					break
 641  				}
 642  				s += 8
 643  				candidate += 8
 644  			}
 645  
 646  			d += emitCopyNoRepeat(dst[d:], repeat, s-base)
 647  			if false {
 648  				// Validate match.
 649  				a := src[base:s]
 650  				b := src[base-repeat : base-repeat+(s-base)]
 651  				if !bytes.Equal(a, b) {
 652  					panic("mismatch")
 653  				}
 654  			}
 655  
 656  			nextEmit = s
 657  			if s >= sLimit {
 658  				goto emitRemainder
 659  			}
 660  
 661  			if d > dstLimit {
 662  				// Do we have space for more, if not bail.
 663  				return 0
 664  			}
 665  			// Check for an immediate match, otherwise start search at s+1
 666  			x := load64(src, s-2)
 667  			m2Hash := hash6(x, tableBits)
 668  			currHash := hash6(x>>16, tableBits)
 669  			candidate = int(table[currHash])
 670  			table[m2Hash] = uint32(s - 2)
 671  			table[currHash] = uint32(s)
 672  			if uint32(x>>16) != load32(src, candidate) {
 673  				cv = load64(src, s+1)
 674  				s++
 675  				break
 676  			}
 677  		}
 678  	}
 679  
 680  emitRemainder:
 681  	if nextEmit < len(src) {
 682  		// Bail if we exceed the maximum size.
 683  		if d+len(src)-nextEmit > dstLimit {
 684  			return 0
 685  		}
 686  		d += emitLiteral(dst[d:], src[nextEmit:])
 687  	}
 688  	return d
 689  }
 690  
 691  // encodeBlockSnappyGo64K is a special version of encodeBlockSnappyGo for sizes <64KB
 692  func encodeBlockSnappyGo64K(dst, src []byte) (d int) {
 693  	// Initialize the hash table.
 694  	const (
 695  		tableBits    = 14
 696  		maxTableSize = 1 << tableBits
 697  	)
 698  
 699  	var table [maxTableSize]uint16
 700  
 701  	// sLimit is when to stop looking for offset/length copies. The inputMargin
 702  	// lets us use a fast path for emitLiteral in the main loop, while we are
 703  	// looking for copies.
 704  	sLimit := len(src) - inputMargin
 705  
 706  	// Bail if we can't compress to at least this.
 707  	dstLimit := len(src) - len(src)>>5 - 5
 708  
 709  	// nextEmit is where in src the next emitLiteral should start from.
 710  	nextEmit := 0
 711  
 712  	// The encoded form must start with a literal, as there are no previous
 713  	// bytes to copy, so we start looking for hash matches at s == 1.
 714  	s := 1
 715  	cv := load64(src, s)
 716  
 717  	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
 718  	repeat := 1
 719  
 720  	for {
 721  		candidate := 0
 722  		for {
 723  			// Next src position to check
 724  			nextS := s + (s-nextEmit)>>5 + 4
 725  			if nextS > sLimit {
 726  				goto emitRemainder
 727  			}
 728  			hash0 := hash6(cv, tableBits)
 729  			hash1 := hash6(cv>>8, tableBits)
 730  			candidate = int(table[hash0])
 731  			candidate2 := int(table[hash1])
 732  			table[hash0] = uint16(s)
 733  			table[hash1] = uint16(s + 1)
 734  			hash2 := hash6(cv>>16, tableBits)
 735  
 736  			// Check repeat at offset checkRep.
 737  			const checkRep = 1
 738  			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
 739  				base := s + checkRep
 740  				// Extend back
 741  				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
 742  					i--
 743  					base--
 744  				}
 745  				// Bail if we exceed the maximum size.
 746  				if d+(base-nextEmit) > dstLimit {
 747  					return 0
 748  				}
 749  
 750  				d += emitLiteral(dst[d:], src[nextEmit:base])
 751  
 752  				// Extend forward
 753  				candidate := s - repeat + 4 + checkRep
 754  				s += 4 + checkRep
 755  				for s <= sLimit {
 756  					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
 757  						s += bits.TrailingZeros64(diff) >> 3
 758  						break
 759  					}
 760  					s += 8
 761  					candidate += 8
 762  				}
 763  
 764  				d += emitCopyNoRepeat(dst[d:], repeat, s-base)
 765  				nextEmit = s
 766  				if s >= sLimit {
 767  					goto emitRemainder
 768  				}
 769  
 770  				cv = load64(src, s)
 771  				continue
 772  			}
 773  
 774  			if uint32(cv) == load32(src, candidate) {
 775  				break
 776  			}
 777  			candidate = int(table[hash2])
 778  			if uint32(cv>>8) == load32(src, candidate2) {
 779  				table[hash2] = uint16(s + 2)
 780  				candidate = candidate2
 781  				s++
 782  				break
 783  			}
 784  			table[hash2] = uint16(s + 2)
 785  			if uint32(cv>>16) == load32(src, candidate) {
 786  				s += 2
 787  				break
 788  			}
 789  
 790  			cv = load64(src, nextS)
 791  			s = nextS
 792  		}
 793  
 794  		// Extend backwards
 795  		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
 796  			candidate--
 797  			s--
 798  		}
 799  
 800  		// Bail if we exceed the maximum size.
 801  		if d+(s-nextEmit) > dstLimit {
 802  			return 0
 803  		}
 804  
 805  		// A 4-byte match has been found. We'll later see if more than 4 bytes
 806  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
 807  		// them as literal bytes.
 808  
 809  		d += emitLiteral(dst[d:], src[nextEmit:s])
 810  
 811  		// Call emitCopy, and then see if another emitCopy could be our next
 812  		// move. Repeat until we find no match for the input immediately after
 813  		// what was consumed by the last emitCopy call.
 814  		//
 815  		// If we exit this loop normally then we need to call emitLiteral next,
 816  		// though we don't yet know how big the literal will be. We handle that
 817  		// by proceeding to the next iteration of the main loop. We also can
 818  		// exit this loop via goto if we get close to exhausting the input.
 819  		for {
 820  			// Invariant: we have a 4-byte match at s, and no need to emit any
 821  			// literal bytes prior to s.
 822  			base := s
 823  			repeat = base - candidate
 824  
 825  			// Extend the 4-byte match as long as possible.
 826  			s += 4
 827  			candidate += 4
 828  			for s <= len(src)-8 {
 829  				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
 830  					s += bits.TrailingZeros64(diff) >> 3
 831  					break
 832  				}
 833  				s += 8
 834  				candidate += 8
 835  			}
 836  
 837  			d += emitCopyNoRepeat(dst[d:], repeat, s-base)
 838  			if false {
 839  				// Validate match.
 840  				a := src[base:s]
 841  				b := src[base-repeat : base-repeat+(s-base)]
 842  				if !bytes.Equal(a, b) {
 843  					panic("mismatch")
 844  				}
 845  			}
 846  
 847  			nextEmit = s
 848  			if s >= sLimit {
 849  				goto emitRemainder
 850  			}
 851  
 852  			if d > dstLimit {
 853  				// Do we have space for more, if not bail.
 854  				return 0
 855  			}
 856  			// Check for an immediate match, otherwise start search at s+1
 857  			x := load64(src, s-2)
 858  			m2Hash := hash6(x, tableBits)
 859  			currHash := hash6(x>>16, tableBits)
 860  			candidate = int(table[currHash])
 861  			table[m2Hash] = uint16(s - 2)
 862  			table[currHash] = uint16(s)
 863  			if uint32(x>>16) != load32(src, candidate) {
 864  				cv = load64(src, s+1)
 865  				s++
 866  				break
 867  			}
 868  		}
 869  	}
 870  
 871  emitRemainder:
 872  	if nextEmit < len(src) {
 873  		// Bail if we exceed the maximum size.
 874  		if d+len(src)-nextEmit > dstLimit {
 875  			return 0
 876  		}
 877  		d += emitLiteral(dst[d:], src[nextEmit:])
 878  	}
 879  	return d
 880  }
 881  
 882  // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
 883  // assumes that the varint-encoded length of the decompressed bytes has already
 884  // been written.
 885  //
 886  // It also assumes that:
 887  //
 888  //	len(dst) >= MaxEncodedLen(len(src)) &&
 889  //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
 890  func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) {
 891  	// Initialize the hash table.
 892  	const (
 893  		tableBits    = 14
 894  		maxTableSize = 1 << tableBits
 895  		maxAhead     = 8 // maximum bytes ahead without checking sLimit
 896  
 897  		debug = false
 898  	)
 899  	dict.initFast()
 900  
 901  	var table [maxTableSize]uint32
 902  
 903  	// sLimit is when to stop looking for offset/length copies. The inputMargin
 904  	// lets us use a fast path for emitLiteral in the main loop, while we are
 905  	// looking for copies.
 906  	sLimit := min(len(src)-inputMargin, MaxDictSrcOffset-maxAhead)
 907  
 908  	// Bail if we can't compress to at least this.
 909  	dstLimit := len(src) - len(src)>>5 - 5
 910  
 911  	// nextEmit is where in src the next emitLiteral should start from.
 912  	nextEmit := 0
 913  
 914  	// The encoded form can start with a dict entry (copy or repeat).
 915  	s := 0
 916  
 917  	// Convert dict repeat to offset
 918  	repeat := len(dict.dict) - dict.repeat
 919  	cv := load64(src, 0)
 920  
 921  	// While in dict
 922  searchDict:
 923  	for {
 924  		// Next src position to check
 925  		nextS := s + (s-nextEmit)>>6 + 4
 926  		hash0 := hash6(cv, tableBits)
 927  		hash1 := hash6(cv>>8, tableBits)
 928  		if nextS > sLimit {
 929  			if debug {
 930  				fmt.Println("slimit reached", s, nextS)
 931  			}
 932  			break searchDict
 933  		}
 934  		candidateDict := int(dict.fastTable[hash0])
 935  		candidateDict2 := int(dict.fastTable[hash1])
 936  		candidate2 := int(table[hash1])
 937  		candidate := int(table[hash0])
 938  		table[hash0] = uint32(s)
 939  		table[hash1] = uint32(s + 1)
 940  		hash2 := hash6(cv>>16, tableBits)
 941  
 942  		// Check repeat at offset checkRep.
 943  		const checkRep = 1
 944  
 945  		if repeat > s {
 946  			candidate := len(dict.dict) - repeat + s
 947  			if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) {
 948  				// Extend back
 949  				base := s
 950  				for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
 951  					i--
 952  					base--
 953  				}
 954  				// Bail if we exceed the maximum size.
 955  				if d+(base-nextEmit) > dstLimit {
 956  					return 0
 957  				}
 958  
 959  				d += emitLiteral(dst[d:], src[nextEmit:base])
 960  				if debug && nextEmit != base {
 961  					fmt.Println("emitted ", base-nextEmit, "literals")
 962  				}
 963  				s += 4
 964  				candidate += 4
 965  				for candidate < len(dict.dict)-8 && s <= len(src)-8 {
 966  					if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
 967  						s += bits.TrailingZeros64(diff) >> 3
 968  						break
 969  					}
 970  					s += 8
 971  					candidate += 8
 972  				}
 973  				d += emitRepeat(dst[d:], repeat, s-base)
 974  				if debug {
 975  					fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
 976  				}
 977  				nextEmit = s
 978  				if s >= sLimit {
 979  					break searchDict
 980  				}
 981  				cv = load64(src, s)
 982  				continue
 983  			}
 984  		} else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
 985  			base := s + checkRep
 986  			// Extend back
 987  			for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
 988  				i--
 989  				base--
 990  			}
 991  			d += emitLiteral(dst[d:], src[nextEmit:base])
 992  			if debug && nextEmit != base {
 993  				fmt.Println("emitted ", base-nextEmit, "literals")
 994  			}
 995  
 996  			// Extend forward
 997  			candidate := s - repeat + 4 + checkRep
 998  			s += 4 + checkRep
 999  			for s <= sLimit {
1000  				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
1001  					s += bits.TrailingZeros64(diff) >> 3
1002  					break
1003  				}
1004  				s += 8
1005  				candidate += 8
1006  			}
1007  			if debug {
1008  				// Validate match.
1009  				if s <= candidate {
1010  					panic("s <= candidate")
1011  				}
1012  				a := src[base:s]
1013  				b := src[base-repeat : base-repeat+(s-base)]
1014  				if !bytes.Equal(a, b) {
1015  					panic("mismatch")
1016  				}
1017  			}
1018  
1019  			if nextEmit > 0 {
1020  				// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
1021  				d += emitRepeat(dst[d:], repeat, s-base)
1022  			} else {
1023  				// First match, cannot be repeat.
1024  				d += emitCopy(dst[d:], repeat, s-base)
1025  			}
1026  
1027  			nextEmit = s
1028  			if s >= sLimit {
1029  				break searchDict
1030  			}
1031  			if debug {
1032  				fmt.Println("emitted reg repeat", s-base, "s:", s)
1033  			}
1034  			cv = load64(src, s)
1035  			continue searchDict
1036  		}
1037  		if s == 0 {
1038  			cv = load64(src, nextS)
1039  			s = nextS
1040  			continue searchDict
1041  		}
1042  		// Start with table. These matches will always be closer.
1043  		if uint32(cv) == load32(src, candidate) {
1044  			goto emitMatch
1045  		}
1046  		candidate = int(table[hash2])
1047  		if uint32(cv>>8) == load32(src, candidate2) {
1048  			table[hash2] = uint32(s + 2)
1049  			candidate = candidate2
1050  			s++
1051  			goto emitMatch
1052  		}
1053  
1054  		// Check dict. Dicts have longer offsets, so we want longer matches.
1055  		if cv == load64(dict.dict, candidateDict) {
1056  			table[hash2] = uint32(s + 2)
1057  			goto emitDict
1058  		}
1059  
1060  		candidateDict = int(dict.fastTable[hash2])
1061  		// Check if upper 7 bytes match
1062  		if candidateDict2 >= 1 {
1063  			if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) {
1064  				table[hash2] = uint32(s + 2)
1065  				candidateDict = candidateDict2
1066  				s++
1067  				goto emitDict
1068  			}
1069  		}
1070  
1071  		table[hash2] = uint32(s + 2)
1072  		if uint32(cv>>16) == load32(src, candidate) {
1073  			s += 2
1074  			goto emitMatch
1075  		}
1076  		if candidateDict >= 2 {
1077  			// Check if upper 6 bytes match
1078  			if cv^load64(dict.dict, candidateDict-2) < (1 << 16) {
1079  				s += 2
1080  				goto emitDict
1081  			}
1082  		}
1083  
1084  		cv = load64(src, nextS)
1085  		s = nextS
1086  		continue searchDict
1087  
1088  	emitDict:
1089  		{
1090  			if debug {
1091  				if load32(dict.dict, candidateDict) != load32(src, s) {
1092  					panic("dict emit mismatch")
1093  				}
1094  			}
1095  			// Extend backwards.
1096  			// The top bytes will be rechecked to get the full match.
1097  			for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] {
1098  				candidateDict--
1099  				s--
1100  			}
1101  
1102  			// Bail if we exceed the maximum size.
1103  			if d+(s-nextEmit) > dstLimit {
1104  				return 0
1105  			}
1106  
1107  			// A 4-byte match has been found. We'll later see if more than 4 bytes
1108  			// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
1109  			// them as literal bytes.
1110  
1111  			d += emitLiteral(dst[d:], src[nextEmit:s])
1112  			if debug && nextEmit != s {
1113  				fmt.Println("emitted ", s-nextEmit, "literals")
1114  			}
1115  			{
1116  				// Invariant: we have a 4-byte match at s, and no need to emit any
1117  				// literal bytes prior to s.
1118  				base := s
1119  				repeat = s + (len(dict.dict)) - candidateDict
1120  
1121  				// Extend the 4-byte match as long as possible.
1122  				s += 4
1123  				candidateDict += 4
1124  				for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 {
1125  					if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 {
1126  						s += bits.TrailingZeros64(diff) >> 3
1127  						break
1128  					}
1129  					s += 8
1130  					candidateDict += 8
1131  				}
1132  
1133  				// Matches longer than 64 are split.
1134  				if s <= sLimit || s-base < 8 {
1135  					d += emitCopy(dst[d:], repeat, s-base)
1136  				} else {
1137  					// Split to ensure we don't start a copy within next block
1138  					d += emitCopy(dst[d:], repeat, 4)
1139  					d += emitRepeat(dst[d:], repeat, s-base-4)
1140  				}
1141  				if false {
1142  					// Validate match.
1143  					if s <= candidate {
1144  						panic("s <= candidate")
1145  					}
1146  					a := src[base:s]
1147  					b := dict.dict[base-repeat : base-repeat+(s-base)]
1148  					if !bytes.Equal(a, b) {
1149  						panic("mismatch")
1150  					}
1151  				}
1152  				if debug {
1153  					fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s)
1154  				}
1155  				nextEmit = s
1156  				if s >= sLimit {
1157  					break searchDict
1158  				}
1159  
1160  				if d > dstLimit {
1161  					// Do we have space for more, if not bail.
1162  					return 0
1163  				}
1164  
1165  				// Index and continue loop to try new candidate.
1166  				x := load64(src, s-2)
1167  				m2Hash := hash6(x, tableBits)
1168  				currHash := hash6(x>>8, tableBits)
1169  				table[m2Hash] = uint32(s - 2)
1170  				table[currHash] = uint32(s - 1)
1171  				cv = load64(src, s)
1172  			}
1173  			continue
1174  		}
1175  	emitMatch:
1176  
1177  		// Extend backwards.
1178  		// The top bytes will be rechecked to get the full match.
1179  		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
1180  			candidate--
1181  			s--
1182  		}
1183  
1184  		// Bail if we exceed the maximum size.
1185  		if d+(s-nextEmit) > dstLimit {
1186  			return 0
1187  		}
1188  
1189  		// A 4-byte match has been found. We'll later see if more than 4 bytes
1190  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
1191  		// them as literal bytes.
1192  
1193  		d += emitLiteral(dst[d:], src[nextEmit:s])
1194  		if debug && nextEmit != s {
1195  			fmt.Println("emitted ", s-nextEmit, "literals")
1196  		}
1197  		// Call emitCopy, and then see if another emitCopy could be our next
1198  		// move. Repeat until we find no match for the input immediately after
1199  		// what was consumed by the last emitCopy call.
1200  		//
1201  		// If we exit this loop normally then we need to call emitLiteral next,
1202  		// though we don't yet know how big the literal will be. We handle that
1203  		// by proceeding to the next iteration of the main loop. We also can
1204  		// exit this loop via goto if we get close to exhausting the input.
1205  		for {
1206  			// Invariant: we have a 4-byte match at s, and no need to emit any
1207  			// literal bytes prior to s.
1208  			base := s
1209  			repeat = base - candidate
1210  
1211  			// Extend the 4-byte match as long as possible.
1212  			s += 4
1213  			candidate += 4
1214  			for s <= len(src)-8 {
1215  				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
1216  					s += bits.TrailingZeros64(diff) >> 3
1217  					break
1218  				}
1219  				s += 8
1220  				candidate += 8
1221  			}
1222  
1223  			d += emitCopy(dst[d:], repeat, s-base)
1224  			if debug {
1225  				// Validate match.
1226  				if s <= candidate {
1227  					panic("s <= candidate")
1228  				}
1229  				a := src[base:s]
1230  				b := src[base-repeat : base-repeat+(s-base)]
1231  				if !bytes.Equal(a, b) {
1232  					panic("mismatch")
1233  				}
1234  			}
1235  			if debug {
1236  				fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
1237  			}
1238  			nextEmit = s
1239  			if s >= sLimit {
1240  				break searchDict
1241  			}
1242  
1243  			if d > dstLimit {
1244  				// Do we have space for more, if not bail.
1245  				return 0
1246  			}
1247  			// Check for an immediate match, otherwise start search at s+1
1248  			x := load64(src, s-2)
1249  			m2Hash := hash6(x, tableBits)
1250  			currHash := hash6(x>>16, tableBits)
1251  			candidate = int(table[currHash])
1252  			table[m2Hash] = uint32(s - 2)
1253  			table[currHash] = uint32(s)
1254  			if debug && s == candidate {
1255  				panic("s == candidate")
1256  			}
1257  			if uint32(x>>16) != load32(src, candidate) {
1258  				cv = load64(src, s+1)
1259  				s++
1260  				break
1261  			}
1262  		}
1263  	}
1264  
1265  	// Search without dict:
1266  	if repeat > s {
1267  		repeat = 0
1268  	}
1269  
1270  	// No more dict
1271  	sLimit = len(src) - inputMargin
1272  	if s >= sLimit {
1273  		goto emitRemainder
1274  	}
1275  	if debug {
1276  		fmt.Println("non-dict matching at", s, "repeat:", repeat)
1277  	}
1278  	cv = load64(src, s)
1279  	if debug {
1280  		fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
1281  	}
1282  	for {
1283  		candidate := 0
1284  		for {
1285  			// Next src position to check
1286  			nextS := s + (s-nextEmit)>>6 + 4
1287  			if nextS > sLimit {
1288  				goto emitRemainder
1289  			}
1290  			hash0 := hash6(cv, tableBits)
1291  			hash1 := hash6(cv>>8, tableBits)
1292  			candidate = int(table[hash0])
1293  			candidate2 := int(table[hash1])
1294  			table[hash0] = uint32(s)
1295  			table[hash1] = uint32(s + 1)
1296  			hash2 := hash6(cv>>16, tableBits)
1297  
1298  			// Check repeat at offset checkRep.
1299  			const checkRep = 1
1300  			if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
1301  				base := s + checkRep
1302  				// Extend back
1303  				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
1304  					i--
1305  					base--
1306  				}
1307  				// Bail if we exceed the maximum size.
1308  				if d+(base-nextEmit) > dstLimit {
1309  					return 0
1310  				}
1311  
1312  				d += emitLiteral(dst[d:], src[nextEmit:base])
1313  				if debug && nextEmit != base {
1314  					fmt.Println("emitted ", base-nextEmit, "literals")
1315  				}
1316  				// Extend forward
1317  				candidate := s - repeat + 4 + checkRep
1318  				s += 4 + checkRep
1319  				for s <= sLimit {
1320  					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
1321  						s += bits.TrailingZeros64(diff) >> 3
1322  						break
1323  					}
1324  					s += 8
1325  					candidate += 8
1326  				}
1327  				if debug {
1328  					// Validate match.
1329  					if s <= candidate {
1330  						panic("s <= candidate")
1331  					}
1332  					a := src[base:s]
1333  					b := src[base-repeat : base-repeat+(s-base)]
1334  					if !bytes.Equal(a, b) {
1335  						panic("mismatch")
1336  					}
1337  				}
1338  				if nextEmit > 0 {
1339  					// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
1340  					d += emitRepeat(dst[d:], repeat, s-base)
1341  				} else {
1342  					// First match, cannot be repeat.
1343  					d += emitCopy(dst[d:], repeat, s-base)
1344  				}
1345  				if debug {
1346  					fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s)
1347  				}
1348  				nextEmit = s
1349  				if s >= sLimit {
1350  					goto emitRemainder
1351  				}
1352  
1353  				cv = load64(src, s)
1354  				continue
1355  			}
1356  
1357  			if uint32(cv) == load32(src, candidate) {
1358  				break
1359  			}
1360  			candidate = int(table[hash2])
1361  			if uint32(cv>>8) == load32(src, candidate2) {
1362  				table[hash2] = uint32(s + 2)
1363  				candidate = candidate2
1364  				s++
1365  				break
1366  			}
1367  			table[hash2] = uint32(s + 2)
1368  			if uint32(cv>>16) == load32(src, candidate) {
1369  				s += 2
1370  				break
1371  			}
1372  
1373  			cv = load64(src, nextS)
1374  			s = nextS
1375  		}
1376  
1377  		// Extend backwards.
1378  		// The top bytes will be rechecked to get the full match.
1379  		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
1380  			candidate--
1381  			s--
1382  		}
1383  
1384  		// Bail if we exceed the maximum size.
1385  		if d+(s-nextEmit) > dstLimit {
1386  			return 0
1387  		}
1388  
1389  		// A 4-byte match has been found. We'll later see if more than 4 bytes
1390  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
1391  		// them as literal bytes.
1392  
1393  		d += emitLiteral(dst[d:], src[nextEmit:s])
1394  		if debug && nextEmit != s {
1395  			fmt.Println("emitted ", s-nextEmit, "literals")
1396  		}
1397  		// Call emitCopy, and then see if another emitCopy could be our next
1398  		// move. Repeat until we find no match for the input immediately after
1399  		// what was consumed by the last emitCopy call.
1400  		//
1401  		// If we exit this loop normally then we need to call emitLiteral next,
1402  		// though we don't yet know how big the literal will be. We handle that
1403  		// by proceeding to the next iteration of the main loop. We also can
1404  		// exit this loop via goto if we get close to exhausting the input.
1405  		for {
1406  			// Invariant: we have a 4-byte match at s, and no need to emit any
1407  			// literal bytes prior to s.
1408  			base := s
1409  			repeat = base - candidate
1410  
1411  			// Extend the 4-byte match as long as possible.
1412  			s += 4
1413  			candidate += 4
1414  			for s <= len(src)-8 {
1415  				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
1416  					s += bits.TrailingZeros64(diff) >> 3
1417  					break
1418  				}
1419  				s += 8
1420  				candidate += 8
1421  			}
1422  
1423  			d += emitCopy(dst[d:], repeat, s-base)
1424  			if debug {
1425  				// Validate match.
1426  				if s <= candidate {
1427  					panic("s <= candidate")
1428  				}
1429  				a := src[base:s]
1430  				b := src[base-repeat : base-repeat+(s-base)]
1431  				if !bytes.Equal(a, b) {
1432  					panic("mismatch")
1433  				}
1434  			}
1435  			if debug {
1436  				fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
1437  			}
1438  			nextEmit = s
1439  			if s >= sLimit {
1440  				goto emitRemainder
1441  			}
1442  
1443  			if d > dstLimit {
1444  				// Do we have space for more, if not bail.
1445  				return 0
1446  			}
1447  			// Check for an immediate match, otherwise start search at s+1
1448  			x := load64(src, s-2)
1449  			m2Hash := hash6(x, tableBits)
1450  			currHash := hash6(x>>16, tableBits)
1451  			candidate = int(table[currHash])
1452  			table[m2Hash] = uint32(s - 2)
1453  			table[currHash] = uint32(s)
1454  			if debug && s == candidate {
1455  				panic("s == candidate")
1456  			}
1457  			if uint32(x>>16) != load32(src, candidate) {
1458  				cv = load64(src, s+1)
1459  				s++
1460  				break
1461  			}
1462  		}
1463  	}
1464  
1465  emitRemainder:
1466  	if nextEmit < len(src) {
1467  		// Bail if we exceed the maximum size.
1468  		if d+len(src)-nextEmit > dstLimit {
1469  			return 0
1470  		}
1471  		d += emitLiteral(dst[d:], src[nextEmit:])
1472  		if debug && nextEmit != s {
1473  			fmt.Println("emitted ", len(src)-nextEmit, "literals")
1474  		}
1475  	}
1476  	return d
1477  }
1478