encode_best.go raw

   1  // Copyright 2016 The Snappy-Go Authors. All rights reserved.
   2  // Copyright (c) 2019 Klaus Post. All rights reserved.
   3  // Use of this source code is governed by a BSD-style
   4  // license that can be found in the LICENSE file.
   5  
   6  package s2
   7  
   8  import (
   9  	"fmt"
  10  	"math"
  11  	"math/bits"
  12  )
  13  
  14  // encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It
  15  // assumes that the varint-encoded length of the decompressed bytes has already
  16  // been written.
  17  //
  18  // It also assumes that:
  19  //
  20  //	len(dst) >= MaxEncodedLen(len(src)) &&
  21  //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  22  func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
  23  	// Initialize the hash tables.
  24  	const (
  25  		// Long hash matches.
  26  		lTableBits    = 19
  27  		maxLTableSize = 1 << lTableBits
  28  
  29  		// Short hash matches.
  30  		sTableBits    = 16
  31  		maxSTableSize = 1 << sTableBits
  32  
  33  		inputMargin = 8 + 2
  34  
  35  		debug = false
  36  	)
  37  
  38  	// sLimit is when to stop looking for offset/length copies. The inputMargin
  39  	// lets us use a fast path for emitLiteral in the main loop, while we are
  40  	// looking for copies.
  41  	sLimit := len(src) - inputMargin
  42  	if len(src) < minNonLiteralBlockSize {
  43  		return 0
  44  	}
  45  	sLimitDict := min(len(src)-inputMargin, MaxDictSrcOffset-inputMargin)
  46  
  47  	var lTable [maxLTableSize]uint64
  48  	var sTable [maxSTableSize]uint64
  49  
  50  	// Bail if we can't compress to at least this.
  51  	dstLimit := len(src) - 5
  52  
  53  	// nextEmit is where in src the next emitLiteral should start from.
  54  	nextEmit := 0
  55  
  56  	// The encoded form must start with a literal, as there are no previous
  57  	// bytes to copy, so we start looking for hash matches at s == 1.
  58  	s := 1
  59  	repeat := 1
  60  	if dict != nil {
  61  		dict.initBest()
  62  		s = 0
  63  		repeat = len(dict.dict) - dict.repeat
  64  	}
  65  	cv := load64(src, s)
  66  
  67  	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
  68  	const lowbitMask = 0xffffffff
  69  	getCur := func(x uint64) int {
  70  		return int(x & lowbitMask)
  71  	}
  72  	getPrev := func(x uint64) int {
  73  		return int(x >> 32)
  74  	}
  75  	const maxSkip = 64
  76  
  77  	for {
  78  		type match struct {
  79  			offset    int
  80  			s         int
  81  			length    int
  82  			score     int
  83  			rep, dict bool
  84  		}
  85  		var best match
  86  		for {
  87  			// Next src position to check
  88  			nextS := (s-nextEmit)>>8 + 1
  89  			if nextS > maxSkip {
  90  				nextS = s + maxSkip
  91  			} else {
  92  				nextS += s
  93  			}
  94  			if nextS > sLimit {
  95  				goto emitRemainder
  96  			}
  97  			if dict != nil && s >= MaxDictSrcOffset {
  98  				dict = nil
  99  				if repeat > s {
 100  					repeat = math.MinInt32
 101  				}
 102  			}
 103  			hashL := hash8(cv, lTableBits)
 104  			hashS := hash4(cv, sTableBits)
 105  			candidateL := lTable[hashL]
 106  			candidateS := sTable[hashS]
 107  
 108  			score := func(m match) int {
 109  				// Matches that are longer forward are penalized since we must emit it as a literal.
 110  				score := m.length - m.s
 111  				if nextEmit == m.s {
 112  					// If we do not have to emit literals, we save 1 byte
 113  					score++
 114  				}
 115  				offset := m.s - m.offset
 116  				if m.rep {
 117  					return score - emitRepeatSize(offset, m.length)
 118  				}
 119  				return score - emitCopySize(offset, m.length)
 120  			}
 121  
 122  			matchAt := func(offset, s int, first uint32, rep bool) match {
 123  				if best.length != 0 && best.s-best.offset == s-offset {
 124  					// Don't retest if we have the same offset.
 125  					return match{offset: offset, s: s}
 126  				}
 127  				if load32(src, offset) != first {
 128  					return match{offset: offset, s: s}
 129  				}
 130  				m := match{offset: offset, s: s, length: 4 + offset, rep: rep}
 131  				s += 4
 132  				for s < len(src) {
 133  					if len(src)-s < 8 {
 134  						if src[s] == src[m.length] {
 135  							m.length++
 136  							s++
 137  							continue
 138  						}
 139  						break
 140  					}
 141  					if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
 142  						m.length += bits.TrailingZeros64(diff) >> 3
 143  						break
 144  					}
 145  					s += 8
 146  					m.length += 8
 147  				}
 148  				m.length -= offset
 149  				m.score = score(m)
 150  				if m.score <= -m.s {
 151  					// Eliminate if no savings, we might find a better one.
 152  					m.length = 0
 153  				}
 154  				return m
 155  			}
 156  			matchDict := func(candidate, s int, first uint32, rep bool) match {
 157  				if s >= MaxDictSrcOffset {
 158  					return match{offset: candidate, s: s}
 159  				}
 160  				// Calculate offset as if in continuous array with s
 161  				offset := -len(dict.dict) + candidate
 162  				if best.length != 0 && best.s-best.offset == s-offset && !rep {
 163  					// Don't retest if we have the same offset.
 164  					return match{offset: offset, s: s}
 165  				}
 166  
 167  				if load32(dict.dict, candidate) != first {
 168  					return match{offset: offset, s: s}
 169  				}
 170  				m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true}
 171  				s += 4
 172  				if !rep {
 173  					for s < sLimitDict && m.length < len(dict.dict) {
 174  						if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
 175  							if src[s] == dict.dict[m.length] {
 176  								m.length++
 177  								s++
 178  								continue
 179  							}
 180  							break
 181  						}
 182  						if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
 183  							m.length += bits.TrailingZeros64(diff) >> 3
 184  							break
 185  						}
 186  						s += 8
 187  						m.length += 8
 188  					}
 189  				} else {
 190  					for s < len(src) && m.length < len(dict.dict) {
 191  						if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
 192  							if src[s] == dict.dict[m.length] {
 193  								m.length++
 194  								s++
 195  								continue
 196  							}
 197  							break
 198  						}
 199  						if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
 200  							m.length += bits.TrailingZeros64(diff) >> 3
 201  							break
 202  						}
 203  						s += 8
 204  						m.length += 8
 205  					}
 206  				}
 207  				m.length -= candidate
 208  				m.score = score(m)
 209  				if m.score <= -m.s {
 210  					// Eliminate if no savings, we might find a better one.
 211  					m.length = 0
 212  				}
 213  				return m
 214  			}
 215  
 216  			bestOf := func(a, b match) match {
 217  				if b.length == 0 {
 218  					return a
 219  				}
 220  				if a.length == 0 {
 221  					return b
 222  				}
 223  				as := a.score + b.s
 224  				bs := b.score + a.s
 225  				if as >= bs {
 226  					return a
 227  				}
 228  				return b
 229  			}
 230  
 231  			if s > 0 {
 232  				best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false))
 233  				best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false))
 234  				best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false))
 235  			}
 236  			if dict != nil {
 237  				candidateL := dict.bestTableLong[hashL]
 238  				candidateS := dict.bestTableShort[hashS]
 239  				best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
 240  				best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false))
 241  				best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
 242  				best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false))
 243  			}
 244  			{
 245  				if (dict == nil || repeat <= s) && repeat > 0 {
 246  					best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
 247  				} else if s-repeat < -4 && dict != nil {
 248  					candidate := len(dict.dict) - (repeat - s)
 249  					best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
 250  					candidate++
 251  					best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true))
 252  				}
 253  
 254  				if best.length > 0 {
 255  					hashS := hash4(cv>>8, sTableBits)
 256  					// s+1
 257  					nextShort := sTable[hashS]
 258  					s := s + 1
 259  					cv := load64(src, s)
 260  					hashL := hash8(cv, lTableBits)
 261  					nextLong := lTable[hashL]
 262  					best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
 263  					best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
 264  					best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
 265  					best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
 266  
 267  					// Dict at + 1
 268  					if dict != nil {
 269  						candidateL := dict.bestTableLong[hashL]
 270  						candidateS := dict.bestTableShort[hashS]
 271  
 272  						best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
 273  						best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
 274  					}
 275  
 276  					// s+2
 277  					if true {
 278  						hashS := hash4(cv>>8, sTableBits)
 279  
 280  						nextShort = sTable[hashS]
 281  						s++
 282  						cv = load64(src, s)
 283  						hashL := hash8(cv, lTableBits)
 284  						nextLong = lTable[hashL]
 285  
 286  						if (dict == nil || repeat <= s) && repeat > 0 {
 287  							// Repeat at + 2
 288  							best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true))
 289  						} else if repeat-s > 4 && dict != nil {
 290  							candidate := len(dict.dict) - (repeat - s)
 291  							best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
 292  						}
 293  						best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
 294  						best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
 295  						best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
 296  						best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
 297  
 298  						// Dict at +2
 299  						// Very small gain
 300  						if dict != nil {
 301  							candidateL := dict.bestTableLong[hashL]
 302  							candidateS := dict.bestTableShort[hashS]
 303  
 304  							best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
 305  							best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
 306  						}
 307  					}
 308  					// Search for a match at best match end, see if that is better.
 309  					// Allow some bytes at the beginning to mismatch.
 310  					// Sweet spot is around 1-2 bytes, but depends on input.
 311  					// The skipped bytes are tested in Extend backwards,
 312  					// and still picked up as part of the match if they do.
 313  					const skipBeginning = 2
 314  					const skipEnd = 1
 315  					if sAt := best.s + best.length - skipEnd; sAt < sLimit {
 316  
 317  						sBack := best.s + skipBeginning - skipEnd
 318  						backL := best.length - skipBeginning
 319  						// Load initial values
 320  						cv = load64(src, sBack)
 321  
 322  						// Grab candidates...
 323  						next := lTable[hash8(load64(src, sAt), lTableBits)]
 324  
 325  						if checkAt := getCur(next) - backL; checkAt > 0 {
 326  							best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
 327  						}
 328  						if checkAt := getPrev(next) - backL; checkAt > 0 {
 329  							best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
 330  						}
 331  						// Disabled: Extremely small gain
 332  						if false {
 333  							next = sTable[hash4(load64(src, sAt), sTableBits)]
 334  							if checkAt := getCur(next) - backL; checkAt > 0 {
 335  								best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
 336  							}
 337  							if checkAt := getPrev(next) - backL; checkAt > 0 {
 338  								best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
 339  							}
 340  						}
 341  					}
 342  				}
 343  			}
 344  
 345  			// Update table
 346  			lTable[hashL] = uint64(s) | candidateL<<32
 347  			sTable[hashS] = uint64(s) | candidateS<<32
 348  
 349  			if best.length > 0 {
 350  				break
 351  			}
 352  
 353  			cv = load64(src, nextS)
 354  			s = nextS
 355  		}
 356  
 357  		// Extend backwards, not needed for repeats...
 358  		s = best.s
 359  		if !best.rep && !best.dict {
 360  			for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
 361  				best.offset--
 362  				best.length++
 363  				s--
 364  			}
 365  		}
 366  		if false && best.offset >= s {
 367  			panic(fmt.Errorf("t %d >= s %d", best.offset, s))
 368  		}
 369  		// Bail if we exceed the maximum size.
 370  		if d+(s-nextEmit) > dstLimit {
 371  			return 0
 372  		}
 373  
 374  		base := s
 375  		offset := s - best.offset
 376  		s += best.length
 377  
 378  		if offset > 65535 && s-base <= 5 && !best.rep {
 379  			// Bail if the match is equal or worse to the encoding.
 380  			s = best.s + 1
 381  			if s >= sLimit {
 382  				goto emitRemainder
 383  			}
 384  			cv = load64(src, s)
 385  			continue
 386  		}
 387  		if debug && nextEmit != base {
 388  			fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base)
 389  		}
 390  		d += emitLiteral(dst[d:], src[nextEmit:base])
 391  		if best.rep {
 392  			if nextEmit > 0 || best.dict {
 393  				if debug {
 394  					fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
 395  				}
 396  				// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
 397  				d += emitRepeat(dst[d:], offset, best.length)
 398  			} else {
 399  				// First match without dict cannot be a repeat.
 400  				if debug {
 401  					fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
 402  				}
 403  				d += emitCopy(dst[d:], offset, best.length)
 404  			}
 405  		} else {
 406  			if debug {
 407  				fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
 408  			}
 409  			d += emitCopy(dst[d:], offset, best.length)
 410  		}
 411  		repeat = offset
 412  
 413  		nextEmit = s
 414  		if s >= sLimit {
 415  			goto emitRemainder
 416  		}
 417  
 418  		if d > dstLimit {
 419  			// Do we have space for more, if not bail.
 420  			return 0
 421  		}
 422  		// Fill tables...
 423  		for i := best.s + 1; i < s; i++ {
 424  			cv0 := load64(src, i)
 425  			long0 := hash8(cv0, lTableBits)
 426  			short0 := hash4(cv0, sTableBits)
 427  			lTable[long0] = uint64(i) | lTable[long0]<<32
 428  			sTable[short0] = uint64(i) | sTable[short0]<<32
 429  		}
 430  		cv = load64(src, s)
 431  	}
 432  
 433  emitRemainder:
 434  	if nextEmit < len(src) {
 435  		// Bail if we exceed the maximum size.
 436  		if d+len(src)-nextEmit > dstLimit {
 437  			return 0
 438  		}
 439  		if debug && nextEmit != s {
 440  			fmt.Println("emitted ", len(src)-nextEmit, "literals")
 441  		}
 442  		d += emitLiteral(dst[d:], src[nextEmit:])
 443  	}
 444  	return d
 445  }
 446  
 447  // encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
 448  // assumes that the varint-encoded length of the decompressed bytes has already
 449  // been written.
 450  //
 451  // It also assumes that:
 452  //
 453  //	len(dst) >= MaxEncodedLen(len(src)) &&
 454  //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
 455  func encodeBlockBestSnappy(dst, src []byte) (d int) {
 456  	// Initialize the hash tables.
 457  	const (
 458  		// Long hash matches.
 459  		lTableBits    = 19
 460  		maxLTableSize = 1 << lTableBits
 461  
 462  		// Short hash matches.
 463  		sTableBits    = 16
 464  		maxSTableSize = 1 << sTableBits
 465  
 466  		inputMargin = 8 + 2
 467  	)
 468  
 469  	// sLimit is when to stop looking for offset/length copies. The inputMargin
 470  	// lets us use a fast path for emitLiteral in the main loop, while we are
 471  	// looking for copies.
 472  	sLimit := len(src) - inputMargin
 473  	if len(src) < minNonLiteralBlockSize {
 474  		return 0
 475  	}
 476  
 477  	var lTable [maxLTableSize]uint64
 478  	var sTable [maxSTableSize]uint64
 479  
 480  	// Bail if we can't compress to at least this.
 481  	dstLimit := len(src) - 5
 482  
 483  	// nextEmit is where in src the next emitLiteral should start from.
 484  	nextEmit := 0
 485  
 486  	// The encoded form must start with a literal, as there are no previous
 487  	// bytes to copy, so we start looking for hash matches at s == 1.
 488  	s := 1
 489  	cv := load64(src, s)
 490  
 491  	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
 492  	repeat := 1
 493  	const lowbitMask = 0xffffffff
 494  	getCur := func(x uint64) int {
 495  		return int(x & lowbitMask)
 496  	}
 497  	getPrev := func(x uint64) int {
 498  		return int(x >> 32)
 499  	}
 500  	const maxSkip = 64
 501  
 502  	for {
 503  		type match struct {
 504  			offset int
 505  			s      int
 506  			length int
 507  			score  int
 508  		}
 509  		var best match
 510  		for {
 511  			// Next src position to check
 512  			nextS := (s-nextEmit)>>8 + 1
 513  			if nextS > maxSkip {
 514  				nextS = s + maxSkip
 515  			} else {
 516  				nextS += s
 517  			}
 518  			if nextS > sLimit {
 519  				goto emitRemainder
 520  			}
 521  			hashL := hash8(cv, lTableBits)
 522  			hashS := hash4(cv, sTableBits)
 523  			candidateL := lTable[hashL]
 524  			candidateS := sTable[hashS]
 525  
 526  			score := func(m match) int {
 527  				// Matches that are longer forward are penalized since we must emit it as a literal.
 528  				score := m.length - m.s
 529  				if nextEmit == m.s {
 530  					// If we do not have to emit literals, we save 1 byte
 531  					score++
 532  				}
 533  				offset := m.s - m.offset
 534  
 535  				return score - emitCopyNoRepeatSize(offset, m.length)
 536  			}
 537  
 538  			matchAt := func(offset, s int, first uint32) match {
 539  				if best.length != 0 && best.s-best.offset == s-offset {
 540  					// Don't retest if we have the same offset.
 541  					return match{offset: offset, s: s}
 542  				}
 543  				if load32(src, offset) != first {
 544  					return match{offset: offset, s: s}
 545  				}
 546  				m := match{offset: offset, s: s, length: 4 + offset}
 547  				s += 4
 548  				for s <= sLimit {
 549  					if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
 550  						m.length += bits.TrailingZeros64(diff) >> 3
 551  						break
 552  					}
 553  					s += 8
 554  					m.length += 8
 555  				}
 556  				m.length -= offset
 557  				m.score = score(m)
 558  				if m.score <= -m.s {
 559  					// Eliminate if no savings, we might find a better one.
 560  					m.length = 0
 561  				}
 562  				return m
 563  			}
 564  
 565  			bestOf := func(a, b match) match {
 566  				if b.length == 0 {
 567  					return a
 568  				}
 569  				if a.length == 0 {
 570  					return b
 571  				}
 572  				as := a.score + b.s
 573  				bs := b.score + a.s
 574  				if as >= bs {
 575  					return a
 576  				}
 577  				return b
 578  			}
 579  
 580  			best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv)))
 581  			best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv)))
 582  			best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv)))
 583  
 584  			{
 585  				best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
 586  				if best.length > 0 {
 587  					// s+1
 588  					nextShort := sTable[hash4(cv>>8, sTableBits)]
 589  					s := s + 1
 590  					cv := load64(src, s)
 591  					nextLong := lTable[hash8(cv, lTableBits)]
 592  					best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
 593  					best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
 594  					best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
 595  					best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
 596  					// Repeat at + 2
 597  					best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
 598  
 599  					// s+2
 600  					if true {
 601  						nextShort = sTable[hash4(cv>>8, sTableBits)]
 602  						s++
 603  						cv = load64(src, s)
 604  						nextLong = lTable[hash8(cv, lTableBits)]
 605  						best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
 606  						best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
 607  						best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
 608  						best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
 609  					}
 610  					// Search for a match at best match end, see if that is better.
 611  					if sAt := best.s + best.length; sAt < sLimit {
 612  						sBack := best.s
 613  						backL := best.length
 614  						// Load initial values
 615  						cv = load64(src, sBack)
 616  						// Search for mismatch
 617  						next := lTable[hash8(load64(src, sAt), lTableBits)]
 618  						//next := sTable[hash4(load64(src, sAt), sTableBits)]
 619  
 620  						if checkAt := getCur(next) - backL; checkAt > 0 {
 621  							best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
 622  						}
 623  						if checkAt := getPrev(next) - backL; checkAt > 0 {
 624  							best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
 625  						}
 626  					}
 627  				}
 628  			}
 629  
 630  			// Update table
 631  			lTable[hashL] = uint64(s) | candidateL<<32
 632  			sTable[hashS] = uint64(s) | candidateS<<32
 633  
 634  			if best.length > 0 {
 635  				break
 636  			}
 637  
 638  			cv = load64(src, nextS)
 639  			s = nextS
 640  		}
 641  
 642  		// Extend backwards, not needed for repeats...
 643  		s = best.s
 644  		if true {
 645  			for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
 646  				best.offset--
 647  				best.length++
 648  				s--
 649  			}
 650  		}
 651  		if false && best.offset >= s {
 652  			panic(fmt.Errorf("t %d >= s %d", best.offset, s))
 653  		}
 654  		// Bail if we exceed the maximum size.
 655  		if d+(s-nextEmit) > dstLimit {
 656  			return 0
 657  		}
 658  
 659  		base := s
 660  		offset := s - best.offset
 661  
 662  		s += best.length
 663  
 664  		if offset > 65535 && s-base <= 5 {
 665  			// Bail if the match is equal or worse to the encoding.
 666  			s = best.s + 1
 667  			if s >= sLimit {
 668  				goto emitRemainder
 669  			}
 670  			cv = load64(src, s)
 671  			continue
 672  		}
 673  		d += emitLiteral(dst[d:], src[nextEmit:base])
 674  		d += emitCopyNoRepeat(dst[d:], offset, best.length)
 675  		repeat = offset
 676  
 677  		nextEmit = s
 678  		if s >= sLimit {
 679  			goto emitRemainder
 680  		}
 681  
 682  		if d > dstLimit {
 683  			// Do we have space for more, if not bail.
 684  			return 0
 685  		}
 686  		// Fill tables...
 687  		for i := best.s + 1; i < s; i++ {
 688  			cv0 := load64(src, i)
 689  			long0 := hash8(cv0, lTableBits)
 690  			short0 := hash4(cv0, sTableBits)
 691  			lTable[long0] = uint64(i) | lTable[long0]<<32
 692  			sTable[short0] = uint64(i) | sTable[short0]<<32
 693  		}
 694  		cv = load64(src, s)
 695  	}
 696  
 697  emitRemainder:
 698  	if nextEmit < len(src) {
 699  		// Bail if we exceed the maximum size.
 700  		if d+len(src)-nextEmit > dstLimit {
 701  			return 0
 702  		}
 703  		d += emitLiteral(dst[d:], src[nextEmit:])
 704  	}
 705  	return d
 706  }
 707  
 708  // emitCopySize returns the size to encode the offset+length
 709  //
 710  // It assumes that:
 711  //
 712  //	1 <= offset && offset <= math.MaxUint32
 713  //	4 <= length && length <= 1 << 24
 714  func emitCopySize(offset, length int) int {
 715  	if offset >= 65536 {
 716  		i := 0
 717  		if length > 64 {
 718  			length -= 64
 719  			if length >= 4 {
 720  				// Emit remaining as repeats
 721  				return 5 + emitRepeatSize(offset, length)
 722  			}
 723  			i = 5
 724  		}
 725  		if length == 0 {
 726  			return i
 727  		}
 728  		return i + 5
 729  	}
 730  
 731  	// Offset no more than 2 bytes.
 732  	if length > 64 {
 733  		if offset < 2048 {
 734  			// Emit 8 bytes, then rest as repeats...
 735  			return 2 + emitRepeatSize(offset, length-8)
 736  		}
 737  		// Emit remaining as repeats, at least 4 bytes remain.
 738  		return 3 + emitRepeatSize(offset, length-60)
 739  	}
 740  	if length >= 12 || offset >= 2048 {
 741  		return 3
 742  	}
 743  	// Emit the remaining copy, encoded as 2 bytes.
 744  	return 2
 745  }
 746  
 747  // emitCopyNoRepeatSize returns the size to encode the offset+length
 748  //
 749  // It assumes that:
 750  //
 751  //	1 <= offset && offset <= math.MaxUint32
 752  //	4 <= length && length <= 1 << 24
 753  func emitCopyNoRepeatSize(offset, length int) int {
 754  	if offset >= 65536 {
 755  		return 5 + 5*(length/64)
 756  	}
 757  
 758  	// Offset no more than 2 bytes.
 759  	if length > 64 {
 760  		// Emit remaining as repeats, at least 4 bytes remain.
 761  		return 3 + 3*(length/60)
 762  	}
 763  	if length >= 12 || offset >= 2048 {
 764  		return 3
 765  	}
 766  	// Emit the remaining copy, encoded as 2 bytes.
 767  	return 2
 768  }
 769  
 770  // emitRepeatSize returns the number of bytes required to encode a repeat.
 771  // Length must be at least 4 and < 1<<24
 772  func emitRepeatSize(offset, length int) int {
 773  	// Repeat offset, make length cheaper
 774  	if length <= 4+4 || (length < 8+4 && offset < 2048) {
 775  		return 2
 776  	}
 777  	if length < (1<<8)+4+4 {
 778  		return 3
 779  	}
 780  	if length < (1<<16)+(1<<8)+4 {
 781  		return 4
 782  	}
 783  	const maxRepeat = (1 << 24) - 1
 784  	length -= (1 << 16) - 4
 785  	left := 0
 786  	if length > maxRepeat {
 787  		left = length - maxRepeat + 4
 788  	}
 789  	if left > 0 {
 790  		return 5 + emitRepeatSize(offset, left)
 791  	}
 792  	return 5
 793  }
 794