1 // Copyright (c) 2024 The Decred developers
2 // Use of this source code is governed by an ISC
3 // license that can be found in the LICENSE file.
4 //
5 // Main Go code originally written and optimized by Dave Collins May 2020.
6 // Additional cleanup and comments added July 2024.
7 8 // Package blake256 implements BLAKE-256 and BLAKE-224 with SSE2, SSE4.1, and
9 // AVX acceleration and zero allocations.
10 package blake256
11 12 import (
13 "encoding/binary"
14 "fmt"
15 16 "github.com/decred/dcrd/crypto/blake256/internal/compress"
17 )
18 19 const (
20 // BlockSize is the block size of the hash algorithm in bytes.
21 BlockSize = 64
22 23 // Size is the size of a BLAKE-256 hash in bytes.
24 Size = 32
25 26 // Size224 is the size of a BLAKE-224 hash in bytes.
27 Size224 = 28
28 29 // SavedStateSize is the number of bytes of a serialized intermediate state.
30 SavedStateSize = 128
31 )
32 33 // pad provides an efficient means to pad a message.
34 var pad = [64]byte{0x80}
35 36 // hasher implements a zero-allocation rolling BLAKE checksum. It can safely be
37 // copied at any point to save its internal state for use in additional
38 // processing later, without having to write the previously written data again.
39 //
40 // It contains the common logic between BLAKE-224 and BLAKE-256.
41 type hasher struct {
42 state compress.State // the current chain value and salt
43 count uint64 // running total of message bits hashed
44 buf [BlockSize]byte // partial block data buffer
45 nbuf uint32 // number of bytes written to data buffer
46 }
47 48 // makeHasher returns an instance of a rolling hasher initialized with the
49 // provided chain value.
50 func makeHasher(cv [8]uint32) hasher {
51 return hasher{state: compress.State{CV: cv}}
52 }
53 54 // reset resets the state of the rolling hash.
55 func (h *hasher) reset(iv [8]uint32) {
56 h.state.CV = iv
57 h.count = 0
58 h.nbuf = 0
59 }
60 61 // initializeSalt initialize the hasher state with the provided salt. Note that
62 // this must only be done when first creating the hasher state for correct
63 // results.
64 //
65 // It will panic if the provided salt is not 16 bytes.
66 func (h *hasher) initializeSalt(salt []byte) {
67 if len(salt) != 16 {
68 panic("salt length must be 16 bytes")
69 }
70 h.state.S[0] = binary.BigEndian.Uint32(salt)
71 h.state.S[1] = binary.BigEndian.Uint32(salt[4:])
72 h.state.S[2] = binary.BigEndian.Uint32(salt[8:])
73 h.state.S[3] = binary.BigEndian.Uint32(salt[12:])
74 }
75 76 // write adds the given bytes to the rolling hash.
77 //
78 // NOTE: This method only returns an error in order to satisfy the [io.Writer]
79 // and [hash.Hash] interfaces. However, it will never error, meaning the error
80 // will always be nil, so it is safe to ignore.
81 func (h *hasher) write(b []byte) (int, error) {
82 // All bytes will be written.
83 totalWritten := len(b)
84 85 // When a partial block exists and adding the new data would meet or exceed
86 // the size of a block, fill up the partial block and compress it.
87 if h.nbuf > 0 && h.nbuf+uint32(len(b)) >= BlockSize {
88 written := uint32(copy(h.buf[h.nbuf:], b))
89 h.count += BlockSize << 3
90 compress.Blocks(&h.state, h.buf[:], h.count)
91 b = b[written:]
92 h.nbuf = 0
93 }
94 95 // The previous section ensures there is no partial block data remaining.
96 //
97 // Use that fact to compress full blocks directly when the remaining number
98 // of bytes to write will completely fill one or more additional blocks.
99 //
100 // It is perhaps also worth noting that this approach is used over having a
101 // compression function that only accepts a single block because it provides
102 // a rather significant speed advantage on inputs that are larger than the
103 // size of a couple of blocks while only having a negligible impact on small
104 // inputs.
105 if len(b) >= BlockSize {
106 h.count += BlockSize << 3
107 compress.Blocks(&h.state, b, h.count)
108 109 // Update the count of message bits hashed and slice of remaining
110 // unwritten bytes to account for the total number of blocks compressed.
111 bytesHashed := uint64(len(b) &^ (BlockSize - 1))
112 h.count += (bytesHashed - BlockSize) << 3
113 b = b[bytesHashed:]
114 }
115 116 // Write any remaining bytes to the next partial block. Note the number of
117 // remaining bytes is guaranteed to be less than the size of a full block
118 // due to the previous sections.
119 if len(b) > 0 {
120 h.nbuf += uint32(copy(h.buf[h.nbuf:], b))
121 }
122 123 return totalWritten, nil
124 }
125 126 // writeByte adds the given byte to the rolling hash.
127 func (h *hasher) writeByte(b byte) {
128 var buf [1]byte
129 buf[0] = b
130 h.write(buf[:])
131 }
132 133 // writeString adds the given string to the rolling hash.
134 func (h *hasher) writeString(s string) {
135 h.write([]byte(s))
136 }
137 138 // writeUint16LE encodes the given unsigned 16-bit integer as a 2-byte
139 // little-endian byte sequence and adds it to the rolling hash.
140 func (h *hasher) writeUint16LE(v uint16) {
141 var buf [2]byte
142 binary.LittleEndian.PutUint16(buf[:], v)
143 h.write(buf[:])
144 }
145 146 // writeUint16BE encodes the given unsigned 16-bit integer as a 2-byte
147 // big-endian byte sequence and adds it to the rolling hash.
148 func (h *hasher) writeUint16BE(v uint16) {
149 var buf [2]byte
150 binary.BigEndian.PutUint16(buf[:], v)
151 h.write(buf[:])
152 }
153 154 // writeUint32LE encodes the given unsigned 32-bit integer as a 4-byte
155 // little-endian byte sequence and adds it to the rolling hash.
156 func (h *hasher) writeUint32LE(v uint32) {
157 var buf [4]byte
158 binary.LittleEndian.PutUint32(buf[:], v)
159 h.write(buf[:])
160 }
161 162 // writeUint32BE encodes the given unsigned 32-bit integer as a 4-byte
163 // big-endian byte sequence and adds it to the rolling hash.
164 func (h *hasher) writeUint32BE(v uint32) {
165 var buf [4]byte
166 binary.BigEndian.PutUint32(buf[:], v)
167 h.write(buf[:])
168 }
169 170 // writeUint64LE encodes the given unsigned 64-bit integer as an 8-byte
171 // little-endian byte sequence and adds it to the rolling hash.
172 func (h *hasher) writeUint64LE(v uint64) {
173 var buf [8]byte
174 binary.LittleEndian.PutUint64(buf[:], v)
175 h.write(buf[:])
176 }
177 178 // writeUint64BE encodes the given unsigned 64-bit integer as an 8-byte
179 // big-endian byte sequence and adds it to the rolling hash.
180 func (h *hasher) writeUint64BE(v uint64) {
181 var buf [8]byte
182 binary.BigEndian.PutUint64(buf[:], v)
183 h.write(buf[:])
184 }
185 186 // finalize finalizes of the rolling hash by writing any remaining partial block
187 // data and appending the necessary padding.
188 //
189 // The hasher may no longer be used after invoking this method. Callers always
190 // run finalize on a copy of the hasher so the original hasher state is not
191 // modified.
192 //
193 // The length preamble bit MUST be 0 (for BLAKE-224) or 1 (for BLAKE-256).
194 func (h *hasher) finalize(lenPreambleBit uint8) {
195 // Hashing a message consists of padding the message to a multiple of the
196 // block size and processing it block per block by the compression function.
197 //
198 // Padding the message consists of first extending the message so that its
199 // bit length is congruent to 447 modulo 512 by appending a 1 bit followed
200 // by enough 0s to reach the required congruence. Then a length preamble
201 // bit is added (1 for BLAKE-256, 0 for BLAKE-224) followed by the length
202 // of original message encoded as a 64-bit unsigned big-endian integer.
203 // This ensures the message length is a multiple of the block size since
204 // 447+1+64 = 512.
205 //
206 // Note that a special case occurs when the final block contains no original
207 // message bit. In that case, the message bit counter provided to the
208 // compression function is set to zero for that final block. This
209 // guarantees unique blocks.
210 //
211 // This implementation performs iterated hashing by compressing full blocks
212 // as data is written and storing the resulting chain value, total number of
213 // message bits compressed, and any remaining partial block data in the
214 // state.
215 //
216 // Thus, finalization consists of writing any remaining partial block data
217 // that hasn't already been compressed and padding the message out per the
218 // above.
219 //
220 // Since this implementation only allows writing full 8-bit bytes at a time,
221 // the following is optimized to only consider message bit lengths that are
222 // multiples of 8. Concretely, note that floor(447/8) = 55. Therefore, as
223 // long as the remaining partial block data is <= 55, only one compression
224 // is needed. Otherwise a second compression is needed.
225 msgBitLen := h.count + uint64(h.nbuf)<<3
226 switch {
227 // Exactly one padding byte is needed.
228 case h.nbuf == 55:
229 h.buf[55] = 0x80 | lenPreambleBit
230 binary.BigEndian.PutUint64(h.buf[56:], msgBitLen)
231 compress.Blocks(&h.state, h.buf[:], msgBitLen)
232 return
233 234 // Appending the padding to the remaining partial block data will fit
235 // without needing another block.
236 case h.nbuf < 55:
237 copy(h.buf[h.nbuf:55], pad[:])
238 h.buf[55] = lenPreambleBit
239 binary.BigEndian.PutUint64(h.buf[56:], msgBitLen)
240 241 // Per the specification, the counter is set to zero for the final
242 // compression when the final block contains no bits from the original
243 // message.
244 if h.nbuf == 0 {
245 msgBitLen = 0
246 }
247 compress.Blocks(&h.state, h.buf[:], msgBitLen)
248 return
249 }
250 251 // The partial block data plus the padding and message bit length exceed the
252 // size of a block, so two compressions are needed where the second one is
253 // a padding block (all zeros except for the final 8 bytes which house the
254 // original message length encoded as a 64-bit unsigned big-endian integer).
255 256 // Pad the remaining partial block data and compress it.
257 copy(h.buf[h.nbuf:], pad[:])
258 compress.Blocks(&h.state, h.buf[:], msgBitLen)
259 260 // Create the final padding block and compress it.
261 //
262 // Note that since the padding block does not contain any bits from the
263 // original message, the counter is set to zero when performing compression
264 // per the specification.
265 copy(h.buf[:], pad[1:56])
266 h.buf[55] = lenPreambleBit
267 binary.BigEndian.PutUint64(h.buf[56:], msgBitLen)
268 compress.Blocks(&h.state, h.buf[:], 0)
269 }
270 271 // wordsToBytes224 converts an array of 8 32-bit unsigned big-endian words to an
272 // array of 28 bytes. The final word is truncated.
273 func wordsToBytes224(cv [8]uint32) (out [28]byte) {
274 binary.BigEndian.PutUint32(out[24:], cv[6])
275 binary.BigEndian.PutUint32(out[20:], cv[5])
276 binary.BigEndian.PutUint32(out[16:], cv[4])
277 binary.BigEndian.PutUint32(out[12:], cv[3])
278 binary.BigEndian.PutUint32(out[8:], cv[2])
279 binary.BigEndian.PutUint32(out[4:], cv[1])
280 binary.BigEndian.PutUint32(out[0:], cv[0])
281 return out
282 }
283 284 // finalize224 finalizes of the rolling hash by writing any remaining partial
285 // block data and appending the necessary padding for BLAKE-224.
286 //
287 // The hasher may no longer be used after invoking this method. Callers always
288 // run finalize on a copy of the hasher so the original hasher state is not
289 // modified.
290 func (h *hasher) finalize224() [Size224]byte {
291 const lenPreambleBit = 0x00
292 h.finalize(lenPreambleBit)
293 return wordsToBytes224(h.state.CV)
294 }
295 296 // wordsToBytes256 converts an array of 8 32-bit unsigned big-endian words to an
297 // array of 32 bytes.
298 func wordsToBytes256(cv [8]uint32) (out [32]byte) {
299 binary.BigEndian.PutUint32(out[28:], cv[7])
300 binary.BigEndian.PutUint32(out[24:], cv[6])
301 binary.BigEndian.PutUint32(out[20:], cv[5])
302 binary.BigEndian.PutUint32(out[16:], cv[4])
303 binary.BigEndian.PutUint32(out[12:], cv[3])
304 binary.BigEndian.PutUint32(out[8:], cv[2])
305 binary.BigEndian.PutUint32(out[4:], cv[1])
306 binary.BigEndian.PutUint32(out[0:], cv[0])
307 return out
308 }
309 310 // finalize256 finalizes of the rolling hash by writing any remaining partial
311 // block data and appending the necessary padding for BLAKE-256.
312 //
313 // The hasher may no longer be used after invoking this method. Callers always
314 // run finalize on a copy of the hasher so the original hasher state is not
315 // modified.
316 func (h *hasher) finalize256() [Size]byte {
317 const lenPreambleBit = 0x01
318 h.finalize(lenPreambleBit)
319 return wordsToBytes256(h.state.CV)
320 }
321 322 // putSavedState serializes the intermediate state directly into the passed byte
323 // slice. The target slice MUST have at least [SavedStateSize] bytes available
324 // or it will panic.
325 func (h *hasher) putSavedState(target []byte, prefix uint32) {
326 var offset uint32
327 binary.BigEndian.PutUint32(target[offset:], prefix)
328 offset += 4
329 for _, cv := range h.state.CV {
330 binary.BigEndian.PutUint32(target[offset:], cv)
331 offset += 4
332 }
333 for _, s := range h.state.S {
334 binary.BigEndian.PutUint32(target[offset:], s)
335 offset += 4
336 }
337 binary.BigEndian.PutUint64(target[offset:], h.count)
338 offset += 8
339 offset += uint32(copy(target[offset:], h.buf[:]))
340 binary.BigEndian.PutUint32(target[offset:], h.nbuf)
341 }
342 343 // saveState appends the current intermediate state of the rolling hash prefixed
344 // by the passed value to the provided slice and returns the resulting slice.
345 // It does not change the underlying hash state.
346 //
347 // The provided prefix is expected to either be [statePrefix224] or
348 // [statePrefix256] depending on which hash variant is being saved.
349 //
350 // As described by the [hasher] documentation, the hasher instance can simply be
351 // copied to achieve the same result much more efficiently when the caller is
352 // able to keep a copy. Therefore, that approach should be preferred when
353 // possible.
354 //
355 // However, the ability to serialize the state is also provided to enable
356 // sharing it across process boundaries.
357 func (h *hasher) saveState(target []byte, prefix uint32) []byte {
358 // Create a new array and append it to the target when there is not enough
359 // space remaining in the slice. Otherwise, write directly into it.
360 //
361 // Note that this could alternatively just grow the slice if needed and then
362 // write directly into it unconditionally, but this approach is faster for
363 // the two much more common cases of the caller providing a slice that is
364 // already big enough or a nil slice.
365 if needed := SavedStateSize - (cap(target) - len(target)); needed > 0 {
366 var state [SavedStateSize]byte
367 h.putSavedState(state[:], prefix)
368 return append(target, state[:]...)
369 }
370 h.putSavedState(target[len(target):len(target)+SavedStateSize], prefix)
371 return target[:len(target)+SavedStateSize]
372 }
373 374 // loadState restores the rolling hash to the provided serialized intermediate
375 // state. See [hasher.saveState] for more details.
376 //
377 // The provided prefix is expected to either be [statePrefix224] or
378 // [statePrefix256] depending on which hash variant is being loaded.
379 //
380 // [ErrMalformedState] will be returned when the provided serialized state is
381 // not at least the required [SavedStateSize] number of bytes.
382 //
383 // [ErrMismatchedState] will be returned if the prefix in the serialized state
384 // does not match the given required prefix.
385 func (h *hasher) loadState(state []byte, requiredPrefix uint32) error {
386 if len(state) < SavedStateSize {
387 str := fmt.Sprintf("malformed intermediate state - must be at least "+
388 "%d bytes", SavedStateSize)
389 return makeError(ErrMalformedState, str)
390 }
391 var offset uint32
392 if pre := binary.BigEndian.Uint32(state[offset:]); pre != requiredPrefix {
393 hashType := "BLAKE-256"
394 if requiredPrefix != statePrefix256 {
395 hashType = "BLAKE-224"
396 }
397 str := fmt.Sprintf("the provided intermediate state is not for %s",
398 hashType)
399 return makeError(ErrMismatchedState, str)
400 }
401 offset += 4
402 for i := range h.state.CV {
403 h.state.CV[i] = binary.BigEndian.Uint32(state[offset:])
404 offset += 4
405 }
406 for i := range h.state.S {
407 h.state.S[i] = binary.BigEndian.Uint32(state[offset:])
408 offset += 4
409 }
410 h.count = binary.BigEndian.Uint64(state[offset:])
411 offset += 8
412 offset += uint32(copy(h.buf[:], state[offset:]))
413 h.nbuf = binary.BigEndian.Uint32(state[offset:])
414 return nil
415 }
416