1 // Copyright 2018 Klaus Post. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 // Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
5 6 // Package fse provides Finite State Entropy encoding and decoding.
7 //
8 // Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
9 // for byte blocks as implemented in zstd.
10 //
11 // See https://github.com/klauspost/compress/tree/master/fse for more information.
12 package fse
13 14 import (
15 "errors"
16 "fmt"
17 "math/bits"
18 )
19 20 const (
21 /*!MEMORY_USAGE :
22 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
23 * Increasing memory usage improves compression ratio
24 * Reduced memory usage can improve speed, due to cache effect
25 * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
26 maxMemoryUsage = 14
27 defaultMemoryUsage = 13
28 29 maxTableLog = maxMemoryUsage - 2
30 maxTablesize = 1 << maxTableLog
31 defaultTablelog = defaultMemoryUsage - 2
32 minTablelog = 5
33 maxSymbolValue = 255
34 )
35 36 var (
37 // ErrIncompressible is returned when input is judged to be too hard to compress.
38 ErrIncompressible = errors.New("input is not compressible")
39 40 // ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
41 ErrUseRLE = errors.New("input is single value repeated")
42 )
43 44 // Scratch provides temporary storage for compression and decompression.
45 type Scratch struct {
46 // Private
47 count [maxSymbolValue + 1]uint32
48 norm [maxSymbolValue + 1]int16
49 br byteReader
50 bits bitReader
51 bw bitWriter
52 ct cTable // Compression tables.
53 decTable []decSymbol // Decompression table.
54 maxCount int // count of the most probable symbol
55 56 // Per block parameters.
57 // These can be used to override compression parameters of the block.
58 // Do not touch, unless you know what you are doing.
59 60 // Out is output buffer.
61 // If the scratch is re-used before the caller is done processing the output,
62 // set this field to nil.
63 // Otherwise the output buffer will be re-used for next Compression/Decompression step
64 // and allocation will be avoided.
65 Out []byte
66 67 // DecompressLimit limits the maximum decoded size acceptable.
68 // If > 0 decompression will stop when approximately this many bytes
69 // has been decoded.
70 // If 0, maximum size will be 2GB.
71 DecompressLimit int
72 73 symbolLen uint16 // Length of active part of the symbol table.
74 actualTableLog uint8 // Selected tablelog.
75 zeroBits bool // no bits has prob > 50%.
76 clearCount bool // clear count
77 78 // MaxSymbolValue will override the maximum symbol value of the next block.
79 MaxSymbolValue uint8
80 81 // TableLog will attempt to override the tablelog for the next block.
82 TableLog uint8
83 }
84 85 // Histogram allows to populate the histogram and skip that step in the compression,
86 // It otherwise allows to inspect the histogram when compression is done.
87 // To indicate that you have populated the histogram call HistogramFinished
88 // with the value of the highest populated symbol, as well as the number of entries
89 // in the most populated entry. These are accepted at face value.
90 // The returned slice will always be length 256.
91 func (s *Scratch) Histogram() []uint32 {
92 return s.count[:]
93 }
94 95 // HistogramFinished can be called to indicate that the histogram has been populated.
96 // maxSymbol is the index of the highest set symbol of the next data segment.
97 // maxCount is the number of entries in the most populated entry.
98 // These are accepted at face value.
99 func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {
100 s.maxCount = maxCount
101 s.symbolLen = uint16(maxSymbol) + 1
102 s.clearCount = maxCount != 0
103 }
104 105 // prepare will prepare and allocate scratch tables used for both compression and decompression.
106 func (s *Scratch) prepare(in []byte) (*Scratch, error) {
107 if s == nil {
108 s = &Scratch{}
109 }
110 if s.MaxSymbolValue == 0 {
111 s.MaxSymbolValue = 255
112 }
113 if s.TableLog == 0 {
114 s.TableLog = defaultTablelog
115 }
116 if s.TableLog > maxTableLog {
117 return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)
118 }
119 if cap(s.Out) == 0 {
120 s.Out = make([]byte, 0, len(in))
121 }
122 if s.clearCount && s.maxCount == 0 {
123 for i := range s.count {
124 s.count[i] = 0
125 }
126 s.clearCount = false
127 }
128 s.br.init(in)
129 if s.DecompressLimit == 0 {
130 // Max size 2GB.
131 s.DecompressLimit = (2 << 30) - 1
132 }
133 134 return s, nil
135 }
136 137 // tableStep returns the next table index.
138 func tableStep(tableSize uint32) uint32 {
139 return (tableSize >> 1) + (tableSize >> 3) + 3
140 }
141 142 func highBits(val uint32) (n uint32) {
143 return uint32(bits.Len32(val) - 1)
144 }
145