secalloc_chacha.mx raw
1 //go:build darwin || (linux && !baremetal && !wasip1 && !wasm_unknown && !wasip2 && !nintendoswitch)
2
3 package runtime
4
5 // ChaCha12 keystream generator used by the secure allocator to fill its
6 // noise buffer once at process init.
7 //
8 // This is a minimal port of github.com/aead/chacha20/chacha (generic path
9 // only). It lives inside the runtime package because importing a separate
10 // frand package would create a cycle: runtime → frand → encoding/binary
11 // → errors → runtime. The chacha primitive has no external dependencies,
12 // so it sits next to the secure allocator with no fan-out.
13 //
14 // Only the bare-minimum surface is exposed to secalloc.mx:
15 // - secChachaXORKeyStream(dst, src, nonce, key, rounds)
16 // 8-byte nonce only, no XChaCha20, no IETF 96-bit nonce, no Cipher struct.
17
18 const (
19 secChachaKeySize = 32
20 secChachaNonceSize = 8
21 )
22
23 func _secChachaSigma() [4]uint32 {
24 return [4]uint32{0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}
25 }
26
27 func secChachaXORKeyStream(dst, src, nonce, key []byte, rounds int) {
28 if rounds != 20 && rounds != 12 && rounds != 8 {
29 runtimePanic("secalloc: bad chacha rounds")
30 }
31 if len(key) != secChachaKeySize {
32 runtimePanic("secalloc: bad chacha key length")
33 }
34 if len(nonce) != secChachaNonceSize {
35 runtimePanic("secalloc: bad chacha nonce length")
36 }
37 if len(dst) < len(src) {
38 runtimePanic("secalloc: chacha dst too small")
39 }
40
41 var state [64]byte
42 var block [64]byte
43
44 sigma := _secChachaSigma()
45 secPutUint32LE(state[0:], sigma[0])
46 secPutUint32LE(state[4:], sigma[1])
47 secPutUint32LE(state[8:], sigma[2])
48 secPutUint32LE(state[12:], sigma[3])
49 copy(state[16:48], key)
50 copy(state[56:64], nonce)
51
52 for len(src) >= 64 {
53 secChachaBlock(&block, &state, rounds)
54 for i, v := range block {
55 dst[i] = src[i] ^ v
56 }
57 src = src[64:]
58 dst = dst[64:]
59 }
60 if n := len(src); n > 0 {
61 secChachaBlock(&block, &state, rounds)
62 for i := 0; i < n; i++ {
63 dst[i] = src[i] ^ block[i]
64 }
65 }
66 }
67
68 // secChachaBlock runs the ChaCha core on state, writes 64 bytes of
69 // keystream to dst, and increments the 64-bit counter at state[48:56].
70 func secChachaBlock(dst *[64]byte, state *[64]byte, rounds int) {
71 v00 := secLoadUint32LE(state[0:])
72 v01 := secLoadUint32LE(state[4:])
73 v02 := secLoadUint32LE(state[8:])
74 v03 := secLoadUint32LE(state[12:])
75 v04 := secLoadUint32LE(state[16:])
76 v05 := secLoadUint32LE(state[20:])
77 v06 := secLoadUint32LE(state[24:])
78 v07 := secLoadUint32LE(state[28:])
79 v08 := secLoadUint32LE(state[32:])
80 v09 := secLoadUint32LE(state[36:])
81 v10 := secLoadUint32LE(state[40:])
82 v11 := secLoadUint32LE(state[44:])
83 v12 := secLoadUint32LE(state[48:])
84 v13 := secLoadUint32LE(state[52:])
85 v14 := secLoadUint32LE(state[56:])
86 v15 := secLoadUint32LE(state[60:])
87
88 s00, s01, s02, s03, s04, s05, s06, s07 := v00, v01, v02, v03, v04, v05, v06, v07
89 s08, s09, s10, s11, s12, s13, s14, s15 := v08, v09, v10, v11, v12, v13, v14, v15
90
91 for i := 0; i < rounds; i += 2 {
92 // Column round.
93 v00 += v04
94 v12 ^= v00
95 v12 = (v12 << 16) | (v12 >> 16)
96 v08 += v12
97 v04 ^= v08
98 v04 = (v04 << 12) | (v04 >> 20)
99 v00 += v04
100 v12 ^= v00
101 v12 = (v12 << 8) | (v12 >> 24)
102 v08 += v12
103 v04 ^= v08
104 v04 = (v04 << 7) | (v04 >> 25)
105
106 v01 += v05
107 v13 ^= v01
108 v13 = (v13 << 16) | (v13 >> 16)
109 v09 += v13
110 v05 ^= v09
111 v05 = (v05 << 12) | (v05 >> 20)
112 v01 += v05
113 v13 ^= v01
114 v13 = (v13 << 8) | (v13 >> 24)
115 v09 += v13
116 v05 ^= v09
117 v05 = (v05 << 7) | (v05 >> 25)
118
119 v02 += v06
120 v14 ^= v02
121 v14 = (v14 << 16) | (v14 >> 16)
122 v10 += v14
123 v06 ^= v10
124 v06 = (v06 << 12) | (v06 >> 20)
125 v02 += v06
126 v14 ^= v02
127 v14 = (v14 << 8) | (v14 >> 24)
128 v10 += v14
129 v06 ^= v10
130 v06 = (v06 << 7) | (v06 >> 25)
131
132 v03 += v07
133 v15 ^= v03
134 v15 = (v15 << 16) | (v15 >> 16)
135 v11 += v15
136 v07 ^= v11
137 v07 = (v07 << 12) | (v07 >> 20)
138 v03 += v07
139 v15 ^= v03
140 v15 = (v15 << 8) | (v15 >> 24)
141 v11 += v15
142 v07 ^= v11
143 v07 = (v07 << 7) | (v07 >> 25)
144
145 // Diagonal round.
146 v00 += v05
147 v15 ^= v00
148 v15 = (v15 << 16) | (v15 >> 16)
149 v10 += v15
150 v05 ^= v10
151 v05 = (v05 << 12) | (v05 >> 20)
152 v00 += v05
153 v15 ^= v00
154 v15 = (v15 << 8) | (v15 >> 24)
155 v10 += v15
156 v05 ^= v10
157 v05 = (v05 << 7) | (v05 >> 25)
158
159 v01 += v06
160 v12 ^= v01
161 v12 = (v12 << 16) | (v12 >> 16)
162 v11 += v12
163 v06 ^= v11
164 v06 = (v06 << 12) | (v06 >> 20)
165 v01 += v06
166 v12 ^= v01
167 v12 = (v12 << 8) | (v12 >> 24)
168 v11 += v12
169 v06 ^= v11
170 v06 = (v06 << 7) | (v06 >> 25)
171
172 v02 += v07
173 v13 ^= v02
174 v13 = (v13 << 16) | (v13 >> 16)
175 v08 += v13
176 v07 ^= v08
177 v07 = (v07 << 12) | (v07 >> 20)
178 v02 += v07
179 v13 ^= v02
180 v13 = (v13 << 8) | (v13 >> 24)
181 v08 += v13
182 v07 ^= v08
183 v07 = (v07 << 7) | (v07 >> 25)
184
185 v03 += v04
186 v14 ^= v03
187 v14 = (v14 << 16) | (v14 >> 16)
188 v09 += v14
189 v04 ^= v09
190 v04 = (v04 << 12) | (v04 >> 20)
191 v03 += v04
192 v14 ^= v03
193 v14 = (v14 << 8) | (v14 >> 24)
194 v09 += v14
195 v04 ^= v09
196 v04 = (v04 << 7) | (v04 >> 25)
197 }
198
199 v00 += s00
200 v01 += s01
201 v02 += s02
202 v03 += s03
203 v04 += s04
204 v05 += s05
205 v06 += s06
206 v07 += s07
207 v08 += s08
208 v09 += s09
209 v10 += s10
210 v11 += s11
211 v12 += s12
212 v13 += s13
213 v14 += s14
214 v15 += s15
215
216 // Increment 64-bit counter at state[48:56].
217 s12++
218 secPutUint32LE(state[48:], s12)
219 if s12 == 0 {
220 s13++
221 secPutUint32LE(state[52:], s13)
222 }
223
224 secPutUint32LE(dst[0:], v00)
225 secPutUint32LE(dst[4:], v01)
226 secPutUint32LE(dst[8:], v02)
227 secPutUint32LE(dst[12:], v03)
228 secPutUint32LE(dst[16:], v04)
229 secPutUint32LE(dst[20:], v05)
230 secPutUint32LE(dst[24:], v06)
231 secPutUint32LE(dst[28:], v07)
232 secPutUint32LE(dst[32:], v08)
233 secPutUint32LE(dst[36:], v09)
234 secPutUint32LE(dst[40:], v10)
235 secPutUint32LE(dst[44:], v11)
236 secPutUint32LE(dst[48:], v12)
237 secPutUint32LE(dst[52:], v13)
238 secPutUint32LE(dst[56:], v14)
239 secPutUint32LE(dst[60:], v15)
240 }
241
242 func secLoadUint32LE(b []byte) uint32 {
243 return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
244 }
245
246 func secPutUint32LE(b []byte, v uint32) {
247 b[0] = byte(v)
248 b[1] = byte(v >> 8)
249 b[2] = byte(v >> 16)
250 b[3] = byte(v >> 24)
251 }
252