decompress.go raw

   1  package huff0
   2  
   3  import (
   4  	"errors"
   5  	"fmt"
   6  	"io"
   7  	"sync"
   8  
   9  	"github.com/klauspost/compress/fse"
  10  )
  11  
  12  type dTable struct {
  13  	single []dEntrySingle
  14  }
  15  
  16  // single-symbols decoding
  17  type dEntrySingle struct {
  18  	entry uint16
  19  }
  20  
  21  // Uses special code for all tables that are < 8 bits.
  22  const use8BitTables = true
  23  
  24  // ReadTable will read a table from the input.
  25  // The size of the input may be larger than the table definition.
  26  // Any content remaining after the table definition will be returned.
  27  // If no Scratch is provided a new one is allocated.
  28  // The returned Scratch can be used for encoding or decoding input using this table.
  29  func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
  30  	s, err = s.prepare(nil)
  31  	if err != nil {
  32  		return s, nil, err
  33  	}
  34  	if len(in) <= 1 {
  35  		return s, nil, errors.New("input too small for table")
  36  	}
  37  	iSize := in[0]
  38  	in = in[1:]
  39  	if iSize >= 128 {
  40  		// Uncompressed
  41  		oSize := iSize - 127
  42  		iSize = (oSize + 1) / 2
  43  		if int(iSize) > len(in) {
  44  			return s, nil, errors.New("input too small for table")
  45  		}
  46  		for n := uint8(0); n < oSize; n += 2 {
  47  			v := in[n/2]
  48  			s.huffWeight[n] = v >> 4
  49  			s.huffWeight[n+1] = v & 15
  50  		}
  51  		s.symbolLen = uint16(oSize)
  52  		in = in[iSize:]
  53  	} else {
  54  		if len(in) < int(iSize) {
  55  			return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in))
  56  		}
  57  		// FSE compressed weights
  58  		s.fse.DecompressLimit = 255
  59  		hw := s.huffWeight[:]
  60  		s.fse.Out = hw
  61  		b, err := fse.Decompress(in[:iSize], s.fse)
  62  		s.fse.Out = nil
  63  		if err != nil {
  64  			return s, nil, fmt.Errorf("fse decompress returned: %w", err)
  65  		}
  66  		if len(b) > 255 {
  67  			return s, nil, errors.New("corrupt input: output table too large")
  68  		}
  69  		s.symbolLen = uint16(len(b))
  70  		in = in[iSize:]
  71  	}
  72  
  73  	// collect weight stats
  74  	var rankStats [16]uint32
  75  	weightTotal := uint32(0)
  76  	for _, v := range s.huffWeight[:s.symbolLen] {
  77  		if v > tableLogMax {
  78  			return s, nil, errors.New("corrupt input: weight too large")
  79  		}
  80  		v2 := v & 15
  81  		rankStats[v2]++
  82  		// (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
  83  		weightTotal += (1 << v2) >> 1
  84  	}
  85  	if weightTotal == 0 {
  86  		return s, nil, errors.New("corrupt input: weights zero")
  87  	}
  88  
  89  	// get last non-null symbol weight (implied, total must be 2^n)
  90  	{
  91  		tableLog := highBit32(weightTotal) + 1
  92  		if tableLog > tableLogMax {
  93  			return s, nil, errors.New("corrupt input: tableLog too big")
  94  		}
  95  		s.actualTableLog = uint8(tableLog)
  96  		// determine last weight
  97  		{
  98  			total := uint32(1) << tableLog
  99  			rest := total - weightTotal
 100  			verif := uint32(1) << highBit32(rest)
 101  			lastWeight := highBit32(rest) + 1
 102  			if verif != rest {
 103  				// last value must be a clean power of 2
 104  				return s, nil, errors.New("corrupt input: last value not power of two")
 105  			}
 106  			s.huffWeight[s.symbolLen] = uint8(lastWeight)
 107  			s.symbolLen++
 108  			rankStats[lastWeight]++
 109  		}
 110  	}
 111  
 112  	if (rankStats[1] < 2) || (rankStats[1]&1 != 0) {
 113  		// by construction : at least 2 elts of rank 1, must be even
 114  		return s, nil, errors.New("corrupt input: min elt size, even check failed ")
 115  	}
 116  
 117  	// TODO: Choose between single/double symbol decoding
 118  
 119  	// Calculate starting value for each rank
 120  	{
 121  		var nextRankStart uint32
 122  		for n := uint8(1); n < s.actualTableLog+1; n++ {
 123  			current := nextRankStart
 124  			nextRankStart += rankStats[n] << (n - 1)
 125  			rankStats[n] = current
 126  		}
 127  	}
 128  
 129  	// fill DTable (always full size)
 130  	tSize := 1 << tableLogMax
 131  	if len(s.dt.single) != tSize {
 132  		s.dt.single = make([]dEntrySingle, tSize)
 133  	}
 134  	cTable := s.prevTable
 135  	if cap(cTable) < maxSymbolValue+1 {
 136  		cTable = make([]cTableEntry, 0, maxSymbolValue+1)
 137  	}
 138  	cTable = cTable[:maxSymbolValue+1]
 139  	s.prevTable = cTable[:s.symbolLen]
 140  	s.prevTableLog = s.actualTableLog
 141  
 142  	for n, w := range s.huffWeight[:s.symbolLen] {
 143  		if w == 0 {
 144  			cTable[n] = cTableEntry{
 145  				val:   0,
 146  				nBits: 0,
 147  			}
 148  			continue
 149  		}
 150  		length := (uint32(1) << w) >> 1
 151  		d := dEntrySingle{
 152  			entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8),
 153  		}
 154  
 155  		rank := &rankStats[w]
 156  		cTable[n] = cTableEntry{
 157  			val:   uint16(*rank >> (w - 1)),
 158  			nBits: uint8(d.entry),
 159  		}
 160  
 161  		single := s.dt.single[*rank : *rank+length]
 162  		for i := range single {
 163  			single[i] = d
 164  		}
 165  		*rank += length
 166  	}
 167  
 168  	return s, in, nil
 169  }
 170  
 171  // Decompress1X will decompress a 1X encoded stream.
 172  // The length of the supplied input must match the end of a block exactly.
 173  // Before this is called, the table must be initialized with ReadTable unless
 174  // the encoder re-used the table.
 175  // deprecated: Use the stateless Decoder() to get a concurrent version.
 176  func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) {
 177  	if cap(s.Out) < s.MaxDecodedSize {
 178  		s.Out = make([]byte, s.MaxDecodedSize)
 179  	}
 180  	s.Out = s.Out[:0:s.MaxDecodedSize]
 181  	s.Out, err = s.Decoder().Decompress1X(s.Out, in)
 182  	return s.Out, err
 183  }
 184  
 185  // Decompress4X will decompress a 4X encoded stream.
 186  // Before this is called, the table must be initialized with ReadTable unless
 187  // the encoder re-used the table.
 188  // The length of the supplied input must match the end of a block exactly.
 189  // The destination size of the uncompressed data must be known and provided.
 190  // deprecated: Use the stateless Decoder() to get a concurrent version.
 191  func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
 192  	if dstSize > s.MaxDecodedSize {
 193  		return nil, ErrMaxDecodedSizeExceeded
 194  	}
 195  	if cap(s.Out) < dstSize {
 196  		s.Out = make([]byte, s.MaxDecodedSize)
 197  	}
 198  	s.Out = s.Out[:0:dstSize]
 199  	s.Out, err = s.Decoder().Decompress4X(s.Out, in)
 200  	return s.Out, err
 201  }
 202  
 203  // Decoder will return a stateless decoder that can be used by multiple
 204  // decompressors concurrently.
 205  // Before this is called, the table must be initialized with ReadTable.
 206  // The Decoder is still linked to the scratch buffer so that cannot be reused.
 207  // However, it is safe to discard the scratch.
 208  func (s *Scratch) Decoder() *Decoder {
 209  	return &Decoder{
 210  		dt:             s.dt,
 211  		actualTableLog: s.actualTableLog,
 212  		bufs:           &s.decPool,
 213  	}
 214  }
 215  
 216  // Decoder provides stateless decoding.
 217  type Decoder struct {
 218  	dt             dTable
 219  	actualTableLog uint8
 220  	bufs           *sync.Pool
 221  }
 222  
 223  func (d *Decoder) buffer() *[4][256]byte {
 224  	buf, ok := d.bufs.Get().(*[4][256]byte)
 225  	if ok {
 226  		return buf
 227  	}
 228  	return &[4][256]byte{}
 229  }
 230  
 231  // decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
 232  // The cap of the output buffer will be the maximum decompressed size.
 233  // The length of the supplied input must match the end of a block exactly.
 234  func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 235  	if d.actualTableLog == 8 {
 236  		return d.decompress1X8BitExactly(dst, src)
 237  	}
 238  	var br bitReaderBytes
 239  	err := br.init(src)
 240  	if err != nil {
 241  		return dst, err
 242  	}
 243  	maxDecodedSize := cap(dst)
 244  	dst = dst[:0]
 245  
 246  	// Avoid bounds check by always having full sized table.
 247  	dt := d.dt.single[:256]
 248  
 249  	// Use temp table to avoid bound checks/append penalty.
 250  	bufs := d.buffer()
 251  	buf := &bufs[0]
 252  	var off uint8
 253  
 254  	switch d.actualTableLog {
 255  	case 8:
 256  		const shift = 0
 257  		for br.off >= 4 {
 258  			br.fillFast()
 259  			v := dt[uint8(br.value>>(56+shift))]
 260  			br.advance(uint8(v.entry))
 261  			buf[off+0] = uint8(v.entry >> 8)
 262  
 263  			v = dt[uint8(br.value>>(56+shift))]
 264  			br.advance(uint8(v.entry))
 265  			buf[off+1] = uint8(v.entry >> 8)
 266  
 267  			v = dt[uint8(br.value>>(56+shift))]
 268  			br.advance(uint8(v.entry))
 269  			buf[off+2] = uint8(v.entry >> 8)
 270  
 271  			v = dt[uint8(br.value>>(56+shift))]
 272  			br.advance(uint8(v.entry))
 273  			buf[off+3] = uint8(v.entry >> 8)
 274  
 275  			off += 4
 276  			if off == 0 {
 277  				if len(dst)+256 > maxDecodedSize {
 278  					br.close()
 279  					d.bufs.Put(bufs)
 280  					return nil, ErrMaxDecodedSizeExceeded
 281  				}
 282  				dst = append(dst, buf[:]...)
 283  			}
 284  		}
 285  	case 7:
 286  		const shift = 8 - 7
 287  		for br.off >= 4 {
 288  			br.fillFast()
 289  			v := dt[uint8(br.value>>(56+shift))]
 290  			br.advance(uint8(v.entry))
 291  			buf[off+0] = uint8(v.entry >> 8)
 292  
 293  			v = dt[uint8(br.value>>(56+shift))]
 294  			br.advance(uint8(v.entry))
 295  			buf[off+1] = uint8(v.entry >> 8)
 296  
 297  			v = dt[uint8(br.value>>(56+shift))]
 298  			br.advance(uint8(v.entry))
 299  			buf[off+2] = uint8(v.entry >> 8)
 300  
 301  			v = dt[uint8(br.value>>(56+shift))]
 302  			br.advance(uint8(v.entry))
 303  			buf[off+3] = uint8(v.entry >> 8)
 304  
 305  			off += 4
 306  			if off == 0 {
 307  				if len(dst)+256 > maxDecodedSize {
 308  					br.close()
 309  					d.bufs.Put(bufs)
 310  					return nil, ErrMaxDecodedSizeExceeded
 311  				}
 312  				dst = append(dst, buf[:]...)
 313  			}
 314  		}
 315  	case 6:
 316  		const shift = 8 - 6
 317  		for br.off >= 4 {
 318  			br.fillFast()
 319  			v := dt[uint8(br.value>>(56+shift))]
 320  			br.advance(uint8(v.entry))
 321  			buf[off+0] = uint8(v.entry >> 8)
 322  
 323  			v = dt[uint8(br.value>>(56+shift))]
 324  			br.advance(uint8(v.entry))
 325  			buf[off+1] = uint8(v.entry >> 8)
 326  
 327  			v = dt[uint8(br.value>>(56+shift))]
 328  			br.advance(uint8(v.entry))
 329  			buf[off+2] = uint8(v.entry >> 8)
 330  
 331  			v = dt[uint8(br.value>>(56+shift))]
 332  			br.advance(uint8(v.entry))
 333  			buf[off+3] = uint8(v.entry >> 8)
 334  
 335  			off += 4
 336  			if off == 0 {
 337  				if len(dst)+256 > maxDecodedSize {
 338  					d.bufs.Put(bufs)
 339  					br.close()
 340  					return nil, ErrMaxDecodedSizeExceeded
 341  				}
 342  				dst = append(dst, buf[:]...)
 343  			}
 344  		}
 345  	case 5:
 346  		const shift = 8 - 5
 347  		for br.off >= 4 {
 348  			br.fillFast()
 349  			v := dt[uint8(br.value>>(56+shift))]
 350  			br.advance(uint8(v.entry))
 351  			buf[off+0] = uint8(v.entry >> 8)
 352  
 353  			v = dt[uint8(br.value>>(56+shift))]
 354  			br.advance(uint8(v.entry))
 355  			buf[off+1] = uint8(v.entry >> 8)
 356  
 357  			v = dt[uint8(br.value>>(56+shift))]
 358  			br.advance(uint8(v.entry))
 359  			buf[off+2] = uint8(v.entry >> 8)
 360  
 361  			v = dt[uint8(br.value>>(56+shift))]
 362  			br.advance(uint8(v.entry))
 363  			buf[off+3] = uint8(v.entry >> 8)
 364  
 365  			off += 4
 366  			if off == 0 {
 367  				if len(dst)+256 > maxDecodedSize {
 368  					d.bufs.Put(bufs)
 369  					br.close()
 370  					return nil, ErrMaxDecodedSizeExceeded
 371  				}
 372  				dst = append(dst, buf[:]...)
 373  			}
 374  		}
 375  	case 4:
 376  		const shift = 8 - 4
 377  		for br.off >= 4 {
 378  			br.fillFast()
 379  			v := dt[uint8(br.value>>(56+shift))]
 380  			br.advance(uint8(v.entry))
 381  			buf[off+0] = uint8(v.entry >> 8)
 382  
 383  			v = dt[uint8(br.value>>(56+shift))]
 384  			br.advance(uint8(v.entry))
 385  			buf[off+1] = uint8(v.entry >> 8)
 386  
 387  			v = dt[uint8(br.value>>(56+shift))]
 388  			br.advance(uint8(v.entry))
 389  			buf[off+2] = uint8(v.entry >> 8)
 390  
 391  			v = dt[uint8(br.value>>(56+shift))]
 392  			br.advance(uint8(v.entry))
 393  			buf[off+3] = uint8(v.entry >> 8)
 394  
 395  			off += 4
 396  			if off == 0 {
 397  				if len(dst)+256 > maxDecodedSize {
 398  					d.bufs.Put(bufs)
 399  					br.close()
 400  					return nil, ErrMaxDecodedSizeExceeded
 401  				}
 402  				dst = append(dst, buf[:]...)
 403  			}
 404  		}
 405  	case 3:
 406  		const shift = 8 - 3
 407  		for br.off >= 4 {
 408  			br.fillFast()
 409  			v := dt[uint8(br.value>>(56+shift))]
 410  			br.advance(uint8(v.entry))
 411  			buf[off+0] = uint8(v.entry >> 8)
 412  
 413  			v = dt[uint8(br.value>>(56+shift))]
 414  			br.advance(uint8(v.entry))
 415  			buf[off+1] = uint8(v.entry >> 8)
 416  
 417  			v = dt[uint8(br.value>>(56+shift))]
 418  			br.advance(uint8(v.entry))
 419  			buf[off+2] = uint8(v.entry >> 8)
 420  
 421  			v = dt[uint8(br.value>>(56+shift))]
 422  			br.advance(uint8(v.entry))
 423  			buf[off+3] = uint8(v.entry >> 8)
 424  
 425  			off += 4
 426  			if off == 0 {
 427  				if len(dst)+256 > maxDecodedSize {
 428  					d.bufs.Put(bufs)
 429  					br.close()
 430  					return nil, ErrMaxDecodedSizeExceeded
 431  				}
 432  				dst = append(dst, buf[:]...)
 433  			}
 434  		}
 435  	case 2:
 436  		const shift = 8 - 2
 437  		for br.off >= 4 {
 438  			br.fillFast()
 439  			v := dt[uint8(br.value>>(56+shift))]
 440  			br.advance(uint8(v.entry))
 441  			buf[off+0] = uint8(v.entry >> 8)
 442  
 443  			v = dt[uint8(br.value>>(56+shift))]
 444  			br.advance(uint8(v.entry))
 445  			buf[off+1] = uint8(v.entry >> 8)
 446  
 447  			v = dt[uint8(br.value>>(56+shift))]
 448  			br.advance(uint8(v.entry))
 449  			buf[off+2] = uint8(v.entry >> 8)
 450  
 451  			v = dt[uint8(br.value>>(56+shift))]
 452  			br.advance(uint8(v.entry))
 453  			buf[off+3] = uint8(v.entry >> 8)
 454  
 455  			off += 4
 456  			if off == 0 {
 457  				if len(dst)+256 > maxDecodedSize {
 458  					d.bufs.Put(bufs)
 459  					br.close()
 460  					return nil, ErrMaxDecodedSizeExceeded
 461  				}
 462  				dst = append(dst, buf[:]...)
 463  			}
 464  		}
 465  	case 1:
 466  		const shift = 8 - 1
 467  		for br.off >= 4 {
 468  			br.fillFast()
 469  			v := dt[uint8(br.value>>(56+shift))]
 470  			br.advance(uint8(v.entry))
 471  			buf[off+0] = uint8(v.entry >> 8)
 472  
 473  			v = dt[uint8(br.value>>(56+shift))]
 474  			br.advance(uint8(v.entry))
 475  			buf[off+1] = uint8(v.entry >> 8)
 476  
 477  			v = dt[uint8(br.value>>(56+shift))]
 478  			br.advance(uint8(v.entry))
 479  			buf[off+2] = uint8(v.entry >> 8)
 480  
 481  			v = dt[uint8(br.value>>(56+shift))]
 482  			br.advance(uint8(v.entry))
 483  			buf[off+3] = uint8(v.entry >> 8)
 484  
 485  			off += 4
 486  			if off == 0 {
 487  				if len(dst)+256 > maxDecodedSize {
 488  					d.bufs.Put(bufs)
 489  					br.close()
 490  					return nil, ErrMaxDecodedSizeExceeded
 491  				}
 492  				dst = append(dst, buf[:]...)
 493  			}
 494  		}
 495  	default:
 496  		d.bufs.Put(bufs)
 497  		return nil, fmt.Errorf("invalid tablelog: %d", d.actualTableLog)
 498  	}
 499  
 500  	if len(dst)+int(off) > maxDecodedSize {
 501  		d.bufs.Put(bufs)
 502  		br.close()
 503  		return nil, ErrMaxDecodedSizeExceeded
 504  	}
 505  	dst = append(dst, buf[:off]...)
 506  
 507  	// br < 4, so uint8 is fine
 508  	bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
 509  	shift := (8 - d.actualTableLog) & 7
 510  
 511  	for bitsLeft > 0 {
 512  		if br.bitsRead >= 64-8 {
 513  			for br.off > 0 {
 514  				br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
 515  				br.bitsRead -= 8
 516  				br.off--
 517  			}
 518  		}
 519  		if len(dst) >= maxDecodedSize {
 520  			br.close()
 521  			d.bufs.Put(bufs)
 522  			return nil, ErrMaxDecodedSizeExceeded
 523  		}
 524  		v := dt[br.peekByteFast()>>shift]
 525  		nBits := uint8(v.entry)
 526  		br.advance(nBits)
 527  		bitsLeft -= int8(nBits)
 528  		dst = append(dst, uint8(v.entry>>8))
 529  	}
 530  	d.bufs.Put(bufs)
 531  	return dst, br.close()
 532  }
 533  
 534  // decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
 535  // The cap of the output buffer will be the maximum decompressed size.
 536  // The length of the supplied input must match the end of a block exactly.
 537  func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
 538  	var br bitReaderBytes
 539  	err := br.init(src)
 540  	if err != nil {
 541  		return dst, err
 542  	}
 543  	maxDecodedSize := cap(dst)
 544  	dst = dst[:0]
 545  
 546  	// Avoid bounds check by always having full sized table.
 547  	dt := d.dt.single[:256]
 548  
 549  	// Use temp table to avoid bound checks/append penalty.
 550  	bufs := d.buffer()
 551  	buf := &bufs[0]
 552  	var off uint8
 553  
 554  	const shift = 56
 555  
 556  	//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
 557  	for br.off >= 4 {
 558  		br.fillFast()
 559  		v := dt[uint8(br.value>>shift)]
 560  		br.advance(uint8(v.entry))
 561  		buf[off+0] = uint8(v.entry >> 8)
 562  
 563  		v = dt[uint8(br.value>>shift)]
 564  		br.advance(uint8(v.entry))
 565  		buf[off+1] = uint8(v.entry >> 8)
 566  
 567  		v = dt[uint8(br.value>>shift)]
 568  		br.advance(uint8(v.entry))
 569  		buf[off+2] = uint8(v.entry >> 8)
 570  
 571  		v = dt[uint8(br.value>>shift)]
 572  		br.advance(uint8(v.entry))
 573  		buf[off+3] = uint8(v.entry >> 8)
 574  
 575  		off += 4
 576  		if off == 0 {
 577  			if len(dst)+256 > maxDecodedSize {
 578  				d.bufs.Put(bufs)
 579  				br.close()
 580  				return nil, ErrMaxDecodedSizeExceeded
 581  			}
 582  			dst = append(dst, buf[:]...)
 583  		}
 584  	}
 585  
 586  	if len(dst)+int(off) > maxDecodedSize {
 587  		d.bufs.Put(bufs)
 588  		br.close()
 589  		return nil, ErrMaxDecodedSizeExceeded
 590  	}
 591  	dst = append(dst, buf[:off]...)
 592  
 593  	// br < 4, so uint8 is fine
 594  	bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
 595  	for bitsLeft > 0 {
 596  		if br.bitsRead >= 64-8 {
 597  			for br.off > 0 {
 598  				br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
 599  				br.bitsRead -= 8
 600  				br.off--
 601  			}
 602  		}
 603  		if len(dst) >= maxDecodedSize {
 604  			d.bufs.Put(bufs)
 605  			br.close()
 606  			return nil, ErrMaxDecodedSizeExceeded
 607  		}
 608  		v := dt[br.peekByteFast()]
 609  		nBits := uint8(v.entry)
 610  		br.advance(nBits)
 611  		bitsLeft -= int8(nBits)
 612  		dst = append(dst, uint8(v.entry>>8))
 613  	}
 614  	d.bufs.Put(bufs)
 615  	return dst, br.close()
 616  }
 617  
 618  // Decompress4X will decompress a 4X encoded stream.
 619  // The length of the supplied input must match the end of a block exactly.
 620  // The *capacity* of the dst slice must match the destination size of
 621  // the uncompressed data exactly.
 622  func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
 623  	if d.actualTableLog == 8 {
 624  		return d.decompress4X8bitExactly(dst, src)
 625  	}
 626  
 627  	var br [4]bitReaderBytes
 628  	start := 6
 629  	for i := range 3 {
 630  		length := int(src[i*2]) | (int(src[i*2+1]) << 8)
 631  		if start+length >= len(src) {
 632  			return nil, errors.New("truncated input (or invalid offset)")
 633  		}
 634  		err := br[i].init(src[start : start+length])
 635  		if err != nil {
 636  			return nil, err
 637  		}
 638  		start += length
 639  	}
 640  	err := br[3].init(src[start:])
 641  	if err != nil {
 642  		return nil, err
 643  	}
 644  
 645  	// destination, offset to match first output
 646  	dstSize := cap(dst)
 647  	dst = dst[:dstSize]
 648  	out := dst
 649  	dstEvery := (dstSize + 3) / 4
 650  
 651  	shift := (56 + (8 - d.actualTableLog)) & 63
 652  
 653  	const tlSize = 1 << 8
 654  	single := d.dt.single[:tlSize]
 655  
 656  	// Use temp table to avoid bound checks/append penalty.
 657  	buf := d.buffer()
 658  	var off uint8
 659  	var decoded int
 660  
 661  	// Decode 4 values from each decoder/loop.
 662  	const bufoff = 256
 663  	for {
 664  		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
 665  			break
 666  		}
 667  
 668  		{
 669  			// Interleave 2 decodes.
 670  			const stream = 0
 671  			const stream2 = 1
 672  			br1 := &br[stream]
 673  			br2 := &br[stream2]
 674  			br1.fillFast()
 675  			br2.fillFast()
 676  
 677  			v := single[uint8(br1.value>>shift)].entry
 678  			v2 := single[uint8(br2.value>>shift)].entry
 679  			br1.bitsRead += uint8(v)
 680  			br1.value <<= v & 63
 681  			br2.bitsRead += uint8(v2)
 682  			br2.value <<= v2 & 63
 683  			buf[stream][off] = uint8(v >> 8)
 684  			buf[stream2][off] = uint8(v2 >> 8)
 685  
 686  			v = single[uint8(br1.value>>shift)].entry
 687  			v2 = single[uint8(br2.value>>shift)].entry
 688  			br1.bitsRead += uint8(v)
 689  			br1.value <<= v & 63
 690  			br2.bitsRead += uint8(v2)
 691  			br2.value <<= v2 & 63
 692  			buf[stream][off+1] = uint8(v >> 8)
 693  			buf[stream2][off+1] = uint8(v2 >> 8)
 694  
 695  			v = single[uint8(br1.value>>shift)].entry
 696  			v2 = single[uint8(br2.value>>shift)].entry
 697  			br1.bitsRead += uint8(v)
 698  			br1.value <<= v & 63
 699  			br2.bitsRead += uint8(v2)
 700  			br2.value <<= v2 & 63
 701  			buf[stream][off+2] = uint8(v >> 8)
 702  			buf[stream2][off+2] = uint8(v2 >> 8)
 703  
 704  			v = single[uint8(br1.value>>shift)].entry
 705  			v2 = single[uint8(br2.value>>shift)].entry
 706  			br1.bitsRead += uint8(v)
 707  			br1.value <<= v & 63
 708  			br2.bitsRead += uint8(v2)
 709  			br2.value <<= v2 & 63
 710  			buf[stream][off+3] = uint8(v >> 8)
 711  			buf[stream2][off+3] = uint8(v2 >> 8)
 712  		}
 713  
 714  		{
 715  			const stream = 2
 716  			const stream2 = 3
 717  			br1 := &br[stream]
 718  			br2 := &br[stream2]
 719  			br1.fillFast()
 720  			br2.fillFast()
 721  
 722  			v := single[uint8(br1.value>>shift)].entry
 723  			v2 := single[uint8(br2.value>>shift)].entry
 724  			br1.bitsRead += uint8(v)
 725  			br1.value <<= v & 63
 726  			br2.bitsRead += uint8(v2)
 727  			br2.value <<= v2 & 63
 728  			buf[stream][off] = uint8(v >> 8)
 729  			buf[stream2][off] = uint8(v2 >> 8)
 730  
 731  			v = single[uint8(br1.value>>shift)].entry
 732  			v2 = single[uint8(br2.value>>shift)].entry
 733  			br1.bitsRead += uint8(v)
 734  			br1.value <<= v & 63
 735  			br2.bitsRead += uint8(v2)
 736  			br2.value <<= v2 & 63
 737  			buf[stream][off+1] = uint8(v >> 8)
 738  			buf[stream2][off+1] = uint8(v2 >> 8)
 739  
 740  			v = single[uint8(br1.value>>shift)].entry
 741  			v2 = single[uint8(br2.value>>shift)].entry
 742  			br1.bitsRead += uint8(v)
 743  			br1.value <<= v & 63
 744  			br2.bitsRead += uint8(v2)
 745  			br2.value <<= v2 & 63
 746  			buf[stream][off+2] = uint8(v >> 8)
 747  			buf[stream2][off+2] = uint8(v2 >> 8)
 748  
 749  			v = single[uint8(br1.value>>shift)].entry
 750  			v2 = single[uint8(br2.value>>shift)].entry
 751  			br1.bitsRead += uint8(v)
 752  			br1.value <<= v & 63
 753  			br2.bitsRead += uint8(v2)
 754  			br2.value <<= v2 & 63
 755  			buf[stream][off+3] = uint8(v >> 8)
 756  			buf[stream2][off+3] = uint8(v2 >> 8)
 757  		}
 758  
 759  		off += 4
 760  
 761  		if off == 0 {
 762  			if bufoff > dstEvery {
 763  				d.bufs.Put(buf)
 764  				return nil, errors.New("corruption detected: stream overrun 1")
 765  			}
 766  			// There must at least be 3 buffers left.
 767  			if len(out)-bufoff < dstEvery*3 {
 768  				d.bufs.Put(buf)
 769  				return nil, errors.New("corruption detected: stream overrun 2")
 770  			}
 771  			//copy(out, buf[0][:])
 772  			//copy(out[dstEvery:], buf[1][:])
 773  			//copy(out[dstEvery*2:], buf[2][:])
 774  			*(*[bufoff]byte)(out) = buf[0]
 775  			*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
 776  			*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
 777  			*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
 778  			out = out[bufoff:]
 779  			decoded += bufoff * 4
 780  		}
 781  	}
 782  	if off > 0 {
 783  		ioff := int(off)
 784  		if len(out) < dstEvery*3+ioff {
 785  			d.bufs.Put(buf)
 786  			return nil, errors.New("corruption detected: stream overrun 3")
 787  		}
 788  		copy(out, buf[0][:off])
 789  		copy(out[dstEvery:], buf[1][:off])
 790  		copy(out[dstEvery*2:], buf[2][:off])
 791  		copy(out[dstEvery*3:], buf[3][:off])
 792  		decoded += int(off) * 4
 793  		out = out[off:]
 794  	}
 795  
 796  	// Decode remaining.
 797  	// Decode remaining.
 798  	remainBytes := dstEvery - (decoded / 4)
 799  	for i := range br {
 800  		offset := dstEvery * i
 801  		endsAt := min(offset+remainBytes, len(out))
 802  		br := &br[i]
 803  		bitsLeft := br.remaining()
 804  		for bitsLeft > 0 {
 805  			if br.finished() {
 806  				d.bufs.Put(buf)
 807  				return nil, io.ErrUnexpectedEOF
 808  			}
 809  			if br.bitsRead >= 56 {
 810  				if br.off >= 4 {
 811  					v := br.in[br.off-4:]
 812  					v = v[:4]
 813  					low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 814  					br.value |= uint64(low) << (br.bitsRead - 32)
 815  					br.bitsRead -= 32
 816  					br.off -= 4
 817  				} else {
 818  					for br.off > 0 {
 819  						br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
 820  						br.bitsRead -= 8
 821  						br.off--
 822  					}
 823  				}
 824  			}
 825  			// end inline...
 826  			if offset >= endsAt {
 827  				d.bufs.Put(buf)
 828  				return nil, errors.New("corruption detected: stream overrun 4")
 829  			}
 830  
 831  			// Read value and increment offset.
 832  			v := single[uint8(br.value>>shift)].entry
 833  			nBits := uint8(v)
 834  			br.advance(nBits)
 835  			bitsLeft -= uint(nBits)
 836  			out[offset] = uint8(v >> 8)
 837  			offset++
 838  		}
 839  		if offset != endsAt {
 840  			d.bufs.Put(buf)
 841  			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
 842  		}
 843  		decoded += offset - dstEvery*i
 844  		err = br.close()
 845  		if err != nil {
 846  			d.bufs.Put(buf)
 847  			return nil, err
 848  		}
 849  	}
 850  	d.bufs.Put(buf)
 851  	if dstSize != decoded {
 852  		return nil, errors.New("corruption detected: short output block")
 853  	}
 854  	return dst, nil
 855  }
 856  
 857  // Decompress4X will decompress a 4X encoded stream.
 858  // The length of the supplied input must match the end of a block exactly.
 859  // The *capacity* of the dst slice must match the destination size of
 860  // the uncompressed data exactly.
 861  func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
 862  	var br [4]bitReaderBytes
 863  	start := 6
 864  	for i := range 3 {
 865  		length := int(src[i*2]) | (int(src[i*2+1]) << 8)
 866  		if start+length >= len(src) {
 867  			return nil, errors.New("truncated input (or invalid offset)")
 868  		}
 869  		err := br[i].init(src[start : start+length])
 870  		if err != nil {
 871  			return nil, err
 872  		}
 873  		start += length
 874  	}
 875  	err := br[3].init(src[start:])
 876  	if err != nil {
 877  		return nil, err
 878  	}
 879  
 880  	// destination, offset to match first output
 881  	dstSize := cap(dst)
 882  	dst = dst[:dstSize]
 883  	out := dst
 884  	dstEvery := (dstSize + 3) / 4
 885  
 886  	const shift = 56
 887  	const tlSize = 1 << 8
 888  	single := d.dt.single[:tlSize]
 889  
 890  	// Use temp table to avoid bound checks/append penalty.
 891  	buf := d.buffer()
 892  	var off uint8
 893  	var decoded int
 894  
 895  	// Decode 4 values from each decoder/loop.
 896  	const bufoff = 256
 897  	for {
 898  		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
 899  			break
 900  		}
 901  
 902  		{
 903  			// Interleave 2 decodes.
 904  			const stream = 0
 905  			const stream2 = 1
 906  			br1 := &br[stream]
 907  			br2 := &br[stream2]
 908  			br1.fillFast()
 909  			br2.fillFast()
 910  
 911  			v := single[uint8(br1.value>>shift)].entry
 912  			v2 := single[uint8(br2.value>>shift)].entry
 913  			br1.bitsRead += uint8(v)
 914  			br1.value <<= v & 63
 915  			br2.bitsRead += uint8(v2)
 916  			br2.value <<= v2 & 63
 917  			buf[stream][off] = uint8(v >> 8)
 918  			buf[stream2][off] = uint8(v2 >> 8)
 919  
 920  			v = single[uint8(br1.value>>shift)].entry
 921  			v2 = single[uint8(br2.value>>shift)].entry
 922  			br1.bitsRead += uint8(v)
 923  			br1.value <<= v & 63
 924  			br2.bitsRead += uint8(v2)
 925  			br2.value <<= v2 & 63
 926  			buf[stream][off+1] = uint8(v >> 8)
 927  			buf[stream2][off+1] = uint8(v2 >> 8)
 928  
 929  			v = single[uint8(br1.value>>shift)].entry
 930  			v2 = single[uint8(br2.value>>shift)].entry
 931  			br1.bitsRead += uint8(v)
 932  			br1.value <<= v & 63
 933  			br2.bitsRead += uint8(v2)
 934  			br2.value <<= v2 & 63
 935  			buf[stream][off+2] = uint8(v >> 8)
 936  			buf[stream2][off+2] = uint8(v2 >> 8)
 937  
 938  			v = single[uint8(br1.value>>shift)].entry
 939  			v2 = single[uint8(br2.value>>shift)].entry
 940  			br1.bitsRead += uint8(v)
 941  			br1.value <<= v & 63
 942  			br2.bitsRead += uint8(v2)
 943  			br2.value <<= v2 & 63
 944  			buf[stream][off+3] = uint8(v >> 8)
 945  			buf[stream2][off+3] = uint8(v2 >> 8)
 946  		}
 947  
 948  		{
 949  			const stream = 2
 950  			const stream2 = 3
 951  			br1 := &br[stream]
 952  			br2 := &br[stream2]
 953  			br1.fillFast()
 954  			br2.fillFast()
 955  
 956  			v := single[uint8(br1.value>>shift)].entry
 957  			v2 := single[uint8(br2.value>>shift)].entry
 958  			br1.bitsRead += uint8(v)
 959  			br1.value <<= v & 63
 960  			br2.bitsRead += uint8(v2)
 961  			br2.value <<= v2 & 63
 962  			buf[stream][off] = uint8(v >> 8)
 963  			buf[stream2][off] = uint8(v2 >> 8)
 964  
 965  			v = single[uint8(br1.value>>shift)].entry
 966  			v2 = single[uint8(br2.value>>shift)].entry
 967  			br1.bitsRead += uint8(v)
 968  			br1.value <<= v & 63
 969  			br2.bitsRead += uint8(v2)
 970  			br2.value <<= v2 & 63
 971  			buf[stream][off+1] = uint8(v >> 8)
 972  			buf[stream2][off+1] = uint8(v2 >> 8)
 973  
 974  			v = single[uint8(br1.value>>shift)].entry
 975  			v2 = single[uint8(br2.value>>shift)].entry
 976  			br1.bitsRead += uint8(v)
 977  			br1.value <<= v & 63
 978  			br2.bitsRead += uint8(v2)
 979  			br2.value <<= v2 & 63
 980  			buf[stream][off+2] = uint8(v >> 8)
 981  			buf[stream2][off+2] = uint8(v2 >> 8)
 982  
 983  			v = single[uint8(br1.value>>shift)].entry
 984  			v2 = single[uint8(br2.value>>shift)].entry
 985  			br1.bitsRead += uint8(v)
 986  			br1.value <<= v & 63
 987  			br2.bitsRead += uint8(v2)
 988  			br2.value <<= v2 & 63
 989  			buf[stream][off+3] = uint8(v >> 8)
 990  			buf[stream2][off+3] = uint8(v2 >> 8)
 991  		}
 992  
 993  		off += 4
 994  
 995  		if off == 0 {
 996  			if bufoff > dstEvery {
 997  				d.bufs.Put(buf)
 998  				return nil, errors.New("corruption detected: stream overrun 1")
 999  			}
1000  			// There must at least be 3 buffers left.
1001  			if len(out)-bufoff < dstEvery*3 {
1002  				d.bufs.Put(buf)
1003  				return nil, errors.New("corruption detected: stream overrun 2")
1004  			}
1005  
1006  			//copy(out, buf[0][:])
1007  			//copy(out[dstEvery:], buf[1][:])
1008  			//copy(out[dstEvery*2:], buf[2][:])
1009  			// copy(out[dstEvery*3:], buf[3][:])
1010  			*(*[bufoff]byte)(out) = buf[0]
1011  			*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
1012  			*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
1013  			*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
1014  			out = out[bufoff:]
1015  			decoded += bufoff * 4
1016  		}
1017  	}
1018  	if off > 0 {
1019  		ioff := int(off)
1020  		if len(out) < dstEvery*3+ioff {
1021  			return nil, errors.New("corruption detected: stream overrun 3")
1022  		}
1023  		copy(out, buf[0][:off])
1024  		copy(out[dstEvery:], buf[1][:off])
1025  		copy(out[dstEvery*2:], buf[2][:off])
1026  		copy(out[dstEvery*3:], buf[3][:off])
1027  		decoded += int(off) * 4
1028  		out = out[off:]
1029  	}
1030  
1031  	// Decode remaining.
1032  	remainBytes := dstEvery - (decoded / 4)
1033  	for i := range br {
1034  		offset := dstEvery * i
1035  		endsAt := min(offset+remainBytes, len(out))
1036  		br := &br[i]
1037  		bitsLeft := br.remaining()
1038  		for bitsLeft > 0 {
1039  			if br.finished() {
1040  				d.bufs.Put(buf)
1041  				return nil, io.ErrUnexpectedEOF
1042  			}
1043  			if br.bitsRead >= 56 {
1044  				if br.off >= 4 {
1045  					v := br.in[br.off-4:]
1046  					v = v[:4]
1047  					low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
1048  					br.value |= uint64(low) << (br.bitsRead - 32)
1049  					br.bitsRead -= 32
1050  					br.off -= 4
1051  				} else {
1052  					for br.off > 0 {
1053  						br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
1054  						br.bitsRead -= 8
1055  						br.off--
1056  					}
1057  				}
1058  			}
1059  			// end inline...
1060  			if offset >= endsAt {
1061  				d.bufs.Put(buf)
1062  				return nil, errors.New("corruption detected: stream overrun 4")
1063  			}
1064  
1065  			// Read value and increment offset.
1066  			v := single[br.peekByteFast()].entry
1067  			nBits := uint8(v)
1068  			br.advance(nBits)
1069  			bitsLeft -= uint(nBits)
1070  			out[offset] = uint8(v >> 8)
1071  			offset++
1072  		}
1073  		if offset != endsAt {
1074  			d.bufs.Put(buf)
1075  			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
1076  		}
1077  
1078  		decoded += offset - dstEvery*i
1079  		err = br.close()
1080  		if err != nil {
1081  			d.bufs.Put(buf)
1082  			return nil, err
1083  		}
1084  	}
1085  	d.bufs.Put(buf)
1086  	if dstSize != decoded {
1087  		return nil, errors.New("corruption detected: short output block")
1088  	}
1089  	return dst, nil
1090  }
1091  
1092  // matches will compare a decoding table to a coding table.
1093  // Errors are written to the writer.
1094  // Nothing will be written if table is ok.
1095  func (s *Scratch) matches(ct cTable, w io.Writer) {
1096  	if s == nil || len(s.dt.single) == 0 {
1097  		return
1098  	}
1099  	dt := s.dt.single[:1<<s.actualTableLog]
1100  	tablelog := s.actualTableLog
1101  	ok := 0
1102  	broken := 0
1103  	for sym, enc := range ct {
1104  		errs := 0
1105  		broken++
1106  		if enc.nBits == 0 {
1107  			for _, dec := range dt {
1108  				if uint8(dec.entry>>8) == byte(sym) {
1109  					fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym)
1110  					errs++
1111  					break
1112  				}
1113  			}
1114  			if errs == 0 {
1115  				broken--
1116  			}
1117  			continue
1118  		}
1119  		// Unused bits in input
1120  		ub := tablelog - enc.nBits
1121  		top := enc.val << ub
1122  		// decoder looks at top bits.
1123  		dec := dt[top]
1124  		if uint8(dec.entry) != enc.nBits {
1125  			fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, uint8(dec.entry))
1126  			errs++
1127  		}
1128  		if uint8(dec.entry>>8) != uint8(sym) {
1129  			fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, uint8(dec.entry>>8))
1130  			errs++
1131  		}
1132  		if errs > 0 {
1133  			fmt.Fprintf(w, "%d errors in base, stopping\n", errs)
1134  			continue
1135  		}
1136  		// Ensure that all combinations are covered.
1137  		for i := uint16(0); i < (1 << ub); i++ {
1138  			vval := top | i
1139  			dec := dt[vval]
1140  			if uint8(dec.entry) != enc.nBits {
1141  				fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, uint8(dec.entry))
1142  				errs++
1143  			}
1144  			if uint8(dec.entry>>8) != uint8(sym) {
1145  				fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, uint8(dec.entry>>8))
1146  				errs++
1147  			}
1148  			if errs > 20 {
1149  				fmt.Fprintf(w, "%d errors, stopping\n", errs)
1150  				break
1151  			}
1152  		}
1153  		if errs == 0 {
1154  			ok++
1155  			broken--
1156  		}
1157  	}
1158  	if broken > 0 {
1159  		fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok)
1160  	}
1161  }
1162