//go:build darwin || (linux && !baremetal && !wasip1 && !wasm_unknown && !wasip2 && !nintendoswitch) package runtime // ChaCha12 keystream generator used by the secure allocator to fill its // noise buffer once at process init. // // This is a minimal port of github.com/aead/chacha20/chacha (generic path // only). It lives inside the runtime package because importing a separate // frand package would create a cycle: runtime → frand → encoding/binary // → errors → runtime. The chacha primitive has no external dependencies, // so it sits next to the secure allocator with no fan-out. // // Only the bare-minimum surface is exposed to secalloc.mx: // - secChachaXORKeyStream(dst, src, nonce, key, rounds) // 8-byte nonce only, no XChaCha20, no IETF 96-bit nonce, no Cipher struct. const ( secChachaKeySize = 32 secChachaNonceSize = 8 ) func _secChachaSigma() [4]uint32 { return [4]uint32{0x61707865, 0x3320646e, 0x79622d32, 0x6b206574} } func secChachaXORKeyStream(dst, src, nonce, key []byte, rounds int) { if rounds != 20 && rounds != 12 && rounds != 8 { runtimePanic("secalloc: bad chacha rounds") } if len(key) != secChachaKeySize { runtimePanic("secalloc: bad chacha key length") } if len(nonce) != secChachaNonceSize { runtimePanic("secalloc: bad chacha nonce length") } if len(dst) < len(src) { runtimePanic("secalloc: chacha dst too small") } var state [64]byte var block [64]byte sigma := _secChachaSigma() secPutUint32LE(state[0:], sigma[0]) secPutUint32LE(state[4:], sigma[1]) secPutUint32LE(state[8:], sigma[2]) secPutUint32LE(state[12:], sigma[3]) copy(state[16:48], key) copy(state[56:64], nonce) for len(src) >= 64 { secChachaBlock(&block, &state, rounds) for i, v := range block { dst[i] = src[i] ^ v } src = src[64:] dst = dst[64:] } if n := len(src); n > 0 { secChachaBlock(&block, &state, rounds) for i := 0; i < n; i++ { dst[i] = src[i] ^ block[i] } } } // secChachaBlock runs the ChaCha core on state, writes 64 bytes of // keystream to dst, and increments the 64-bit counter at state[48:56]. func secChachaBlock(dst *[64]byte, state *[64]byte, rounds int) { v00 := secLoadUint32LE(state[0:]) v01 := secLoadUint32LE(state[4:]) v02 := secLoadUint32LE(state[8:]) v03 := secLoadUint32LE(state[12:]) v04 := secLoadUint32LE(state[16:]) v05 := secLoadUint32LE(state[20:]) v06 := secLoadUint32LE(state[24:]) v07 := secLoadUint32LE(state[28:]) v08 := secLoadUint32LE(state[32:]) v09 := secLoadUint32LE(state[36:]) v10 := secLoadUint32LE(state[40:]) v11 := secLoadUint32LE(state[44:]) v12 := secLoadUint32LE(state[48:]) v13 := secLoadUint32LE(state[52:]) v14 := secLoadUint32LE(state[56:]) v15 := secLoadUint32LE(state[60:]) s00, s01, s02, s03, s04, s05, s06, s07 := v00, v01, v02, v03, v04, v05, v06, v07 s08, s09, s10, s11, s12, s13, s14, s15 := v08, v09, v10, v11, v12, v13, v14, v15 for i := 0; i < rounds; i += 2 { // Column round. v00 += v04 v12 ^= v00 v12 = (v12 << 16) | (v12 >> 16) v08 += v12 v04 ^= v08 v04 = (v04 << 12) | (v04 >> 20) v00 += v04 v12 ^= v00 v12 = (v12 << 8) | (v12 >> 24) v08 += v12 v04 ^= v08 v04 = (v04 << 7) | (v04 >> 25) v01 += v05 v13 ^= v01 v13 = (v13 << 16) | (v13 >> 16) v09 += v13 v05 ^= v09 v05 = (v05 << 12) | (v05 >> 20) v01 += v05 v13 ^= v01 v13 = (v13 << 8) | (v13 >> 24) v09 += v13 v05 ^= v09 v05 = (v05 << 7) | (v05 >> 25) v02 += v06 v14 ^= v02 v14 = (v14 << 16) | (v14 >> 16) v10 += v14 v06 ^= v10 v06 = (v06 << 12) | (v06 >> 20) v02 += v06 v14 ^= v02 v14 = (v14 << 8) | (v14 >> 24) v10 += v14 v06 ^= v10 v06 = (v06 << 7) | (v06 >> 25) v03 += v07 v15 ^= v03 v15 = (v15 << 16) | (v15 >> 16) v11 += v15 v07 ^= v11 v07 = (v07 << 12) | (v07 >> 20) v03 += v07 v15 ^= v03 v15 = (v15 << 8) | (v15 >> 24) v11 += v15 v07 ^= v11 v07 = (v07 << 7) | (v07 >> 25) // Diagonal round. v00 += v05 v15 ^= v00 v15 = (v15 << 16) | (v15 >> 16) v10 += v15 v05 ^= v10 v05 = (v05 << 12) | (v05 >> 20) v00 += v05 v15 ^= v00 v15 = (v15 << 8) | (v15 >> 24) v10 += v15 v05 ^= v10 v05 = (v05 << 7) | (v05 >> 25) v01 += v06 v12 ^= v01 v12 = (v12 << 16) | (v12 >> 16) v11 += v12 v06 ^= v11 v06 = (v06 << 12) | (v06 >> 20) v01 += v06 v12 ^= v01 v12 = (v12 << 8) | (v12 >> 24) v11 += v12 v06 ^= v11 v06 = (v06 << 7) | (v06 >> 25) v02 += v07 v13 ^= v02 v13 = (v13 << 16) | (v13 >> 16) v08 += v13 v07 ^= v08 v07 = (v07 << 12) | (v07 >> 20) v02 += v07 v13 ^= v02 v13 = (v13 << 8) | (v13 >> 24) v08 += v13 v07 ^= v08 v07 = (v07 << 7) | (v07 >> 25) v03 += v04 v14 ^= v03 v14 = (v14 << 16) | (v14 >> 16) v09 += v14 v04 ^= v09 v04 = (v04 << 12) | (v04 >> 20) v03 += v04 v14 ^= v03 v14 = (v14 << 8) | (v14 >> 24) v09 += v14 v04 ^= v09 v04 = (v04 << 7) | (v04 >> 25) } v00 += s00 v01 += s01 v02 += s02 v03 += s03 v04 += s04 v05 += s05 v06 += s06 v07 += s07 v08 += s08 v09 += s09 v10 += s10 v11 += s11 v12 += s12 v13 += s13 v14 += s14 v15 += s15 // Increment 64-bit counter at state[48:56]. s12++ secPutUint32LE(state[48:], s12) if s12 == 0 { s13++ secPutUint32LE(state[52:], s13) } secPutUint32LE(dst[0:], v00) secPutUint32LE(dst[4:], v01) secPutUint32LE(dst[8:], v02) secPutUint32LE(dst[12:], v03) secPutUint32LE(dst[16:], v04) secPutUint32LE(dst[20:], v05) secPutUint32LE(dst[24:], v06) secPutUint32LE(dst[28:], v07) secPutUint32LE(dst[32:], v08) secPutUint32LE(dst[36:], v09) secPutUint32LE(dst[40:], v10) secPutUint32LE(dst[44:], v11) secPutUint32LE(dst[48:], v12) secPutUint32LE(dst[52:], v13) secPutUint32LE(dst[56:], v14) secPutUint32LE(dst[60:], v15) } func secLoadUint32LE(b []byte) uint32 { return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 } func secPutUint32LE(b []byte, v uint32) { b[0] = byte(v) b[1] = byte(v >> 8) b[2] = byte(v >> 16) b[3] = byte(v >> 24) }