1 //go:build darwin || (linux && !baremetal && !wasip1 && !wasm_unknown && !wasip2 && !nintendoswitch)
2 3 package runtime
4 5 // Secure allocator — page-guarded arenas with signal-handler wipe.
6 //
7 // Each secure allocation returns a []byte slice backed by its own mmap'd
8 // arena. The arena layout is:
9 //
10 // [guard page][data page(s)][guard page]
11 // PROT_NONE PROT_RW PROT_NONE
12 // mlock'd,
13 // DONTFORK|DONTDUMP
14 //
15 // Any out-of-bounds access via pointer arithmetic hits a guard page and
16 // raises SIGSEGV. runtime_unix.c's signal_handler calls into secalloc.c's
17 // moxie_secalloc_on_fatal_signal() BEFORE the rest of the fatal path, which
18 // synchronously memcpy's a noise pattern over every registered arena and
19 // writes one byte to the lockdown fd. The process then dies; any secret
20 // that lived in the arena has been overwritten before the handler returned,
21 // so no subsequent code (attacker ROP, debugger, kernel core dumper) can
22 // observe it.
23 //
24 // The noise buffer is generated once at first secureAlloc call, seeded from
25 // the kernel via hardwareRand() and expanded with ChaCha12 (see
26 // secalloc_chacha.mx). It lives in a package-level byte array and is shared
27 // across all arenas.
28 //
29 // This is Milestone 1: the lockdown fd defaults to stderr (fd 2) so the
30 // test program can observe the notification byte. Milestone 2 will route
31 // the notification through a spawn-level lockdown channel inherited from
32 // the parent domain.
33 34 import (
35 "internal/gclayout"
36 "unsafe"
37 )
38 39 const (
40 secPageSize = 4096
41 secNoiseSize = 4096
42 )
43 44 // secNoise holds the noise pattern used to overwrite arena contents on
45 // fault. Filled once by secureInit() via ChaCha12 keystream. Lives in
46 // .bss until then, which would produce a zero-wipe — still secure, just
47 // less entropy. The noise buffer is shared by every arena.
48 var secNoise [secNoiseSize]byte
49 50 // secInited is set once secureInit() has generated the noise buffer and
51 // configured the C signal handler side.
52 var secInited bool
53 54 // secLockdownFd is the write end of the notification pipe. Default 2 is
55 // stderr; spawned children that want a private lockdown channel call
56 // SetSecureLockdownFd with a pipe fd inherited from the parent domain.
57 var secLockdownFd int32 = 2
58 59 // SetSecureLockdownFd routes fault notifications to fd instead of stderr.
60 // Typical use: a parent domain creates a pipe, spawns a child with the
61 // write end inherited via fork, and the child calls SetSecureLockdownFd
62 // with that fd before any SecureAlloc. The parent's event loop watches
63 // the read end and reacts to lockdown bytes (e.g. clearing the UI of any
64 // memory derived from the secret, redrawing a tamper warning).
65 //
66 // Safe to call before or after the first SecureAlloc. Updates both the
67 // Moxie-side var (so a future secureInit picks it up if it hasn't run)
68 // AND the C-side notification fd (so an already-initialized handler
69 // switches over immediately).
70 func SetSecureLockdownFd(fd int32) {
71 secLockdownFd = fd
72 moxie_secalloc_set_lockdown_fd(fd)
73 }
74 75 // secureGenerateNoise fills secNoise with a fresh ChaCha12 keystream
76 // seeded from kernel entropy. Called at first SecureAlloc (via
77 // secureInit) and from SecureRekey / SecureRotate / SecureLockdown
78 // when refreshing the wipe pattern.
79 //
80 // Zeroes the buffer before XORing the keystream: ChaCha XOR against
81 // zero produces the keystream directly, but XOR against a previous
82 // keystream produces two overlapping streams, which is not a clean
83 // fresh keystream. The zero step is the difference between "first
84 // call" semantics (buffer starts clean) and "rekey" semantics
85 // (buffer holds stale noise).
86 func secureGenerateNoise() {
87 var seed [32]byte
88 for i := 0; i < 4; i++ {
89 n, ok := hardwareRand()
90 if !ok {
91 runtimePanic("secalloc: kernel entropy unavailable")
92 }
93 seed[i*8+0] = byte(n)
94 seed[i*8+1] = byte(n >> 8)
95 seed[i*8+2] = byte(n >> 16)
96 seed[i*8+3] = byte(n >> 24)
97 seed[i*8+4] = byte(n >> 32)
98 seed[i*8+5] = byte(n >> 40)
99 seed[i*8+6] = byte(n >> 48)
100 seed[i*8+7] = byte(n >> 56)
101 }
102 for i := range secNoise {
103 secNoise[i] = 0
104 }
105 var nonce [secChachaNonceSize]byte
106 secChachaXORKeyStream(secNoise[:], secNoise[:], nonce[:], seed[:], 12)
107 }
108 109 // secureInit generates the noise buffer and hands it to the C signal
110 // handler. Idempotent.
111 func secureInit() {
112 if secInited {
113 return
114 }
115 secureGenerateNoise()
116 moxie_secalloc_configure(
117 unsafe.Pointer(&secNoise[0]),
118 uintptr(secNoiseSize),
119 secLockdownFd,
120 )
121 secInited = true
122 }
123 124 // SecureRekey regenerates the noise buffer from fresh kernel entropy.
125 // Subsequent wipes (signal-handler, SecureClear, SecureLockdown, or
126 // SecureRotate) will use the new bytes. Existing wiped contents are
127 // NOT re-wiped — rekey only affects the pattern used from now on.
128 //
129 // Callers can invoke this periodically (on a timer, on session
130 // boundaries, on every rotation) to defeat attackers who captured
131 // noise bytes from a prior memory snapshot and would otherwise
132 // correlate them against future wipes. The noise buffer's virtual
133 // address does not change across rekey, so the C-side configuration
134 // pointer remains valid.
135 func SecureRekey() {
136 if !secInited {
137 return
138 }
139 secureGenerateNoise()
140 }
141 142 // SecureClear overwrites buf with the current noise pattern. Targeted
143 // wipe for point-in-time residency minimization: the caller invokes
144 // this at application context-change boundaries — logout, tenant
145 // switch, navigation away from a decrypted conversation, tab
146 // backgrounding — any moment where specific decrypted material is no
147 // longer needed and should not keep occupying memory.
148 //
149 // buf need not be SecureAlloc'd. SecureClear works on any []byte the
150 // caller holds; the guarantee is that on return, the bytes contain
151 // noise rather than the caller's prior contents. For SecureAlloc'd
152 // slices the underlying mapping stays valid (no unmap), so the slice
153 // can be reused immediately for fresh data.
154 //
155 // Policy note: SecureClear does NOT rekey. Frequent small clears
156 // should not pay the entropy cost of refreshing the shared noise
157 // buffer. Callers that want fresh bytes per clear can call SecureRekey
158 // explicitly.
159 func SecureClear(buf []byte) {
160 secureInit()
161 if len(buf) == 0 {
162 return
163 }
164 moxie_secalloc_clear(unsafe.Pointer(&buf[0]), uintptr(len(buf)))
165 }
166 167 // SecureLockdown synchronously wipes every registered arena with the
168 // current noise pattern and writes the lockdown marker to the notify
169 // fd. Unlike the fatal-signal path (which runs this sequence and then
170 // dies) SecureLockdown returns, so the caller stays alive.
171 //
172 // After the wipe, secureGenerateNoise regenerates the noise buffer
173 // from fresh kernel entropy. This bounds the lifetime of any given
174 // noise pattern to at most one lockdown — so if an attacker captured
175 // the noise from a memory snapshot taken before the lockdown, that
176 // snapshot does not help them interpret subsequent wipes.
177 //
178 // Semantics: "something broad happened and every secure arena should
179 // be treated as compromised." Typical triggers:
180 // - Emergency revocation from user or policy engine
181 // - Pre-suspend preparation before kernel puts RAM to sleep
182 // - Detected anomaly that doesn't warrant a full crash
183 //
184 // For routine context changes (wiping one specific buffer when it
185 // goes out of app-level scope) use SecureClear instead — it's
186 // targeted and cheaper.
187 func SecureLockdown() {
188 secureInit()
189 moxie_secalloc_lockdown()
190 secureGenerateNoise()
191 }
192 193 // secureAwareByteAlloc allocates n bytes. If secure is true, the allocation
194 // comes from a fresh SecureAlloc arena (guard-paged, mlocked, wipe-on-fault).
195 // If secure is false, a regular heap allocation is used. Used by bytesConcat
196 // to propagate the secure flag across concatenation: if either operand is
197 // secure, the concatenation result lives in a secure arena too.
198 func secureAwareByteAlloc(n uintptr, secure bool) []byte {
199 if secure {
200 return SecureAlloc(int32(n))
201 }
202 buf := alloc(n, gclayout.NoPtrs.AsPtr())
203 return unsafe.Slice((*byte)(buf), n)
204 }
205 206 // SecureAlloc returns a byte slice of exactly n bytes backed by a freshly
207 // mmap'd guarded arena. The underlying data pages are locked into RAM,
208 // excluded from core dumps, and excluded from fork inheritance. Any
209 // pointer-arithmetic access outside [0, n) hits a guard page and triggers
210 // the wipe-and-die handler.
211 //
212 // No free. Arenas persist for the process lifetime; regeneration must come
213 // from an authoritative source (re-derive, re-decrypt, re-prompt). For
214 // long-lived secrets, see SecureRotate which moves contents to a fresh
215 // mapping and wipes the old one.
216 func SecureAlloc(n int32) []byte {
217 secureInit()
218 if n <= 0 {
219 runtimePanic("secalloc: size must be positive")
220 }
221 dataStart, dataSize := secureMap(n)
222 if moxie_secalloc_register_arena(dataStart, dataSize) != 0 {
223 runtimePanic("secalloc: arena registry full")
224 }
225 return unsafe.Slice((*byte)(dataStart), n)
226 }
227 228 // secureMap mmaps a fresh guarded arena sized for n user bytes. Returns
229 // (dataStart, dataSize) — the first address the caller may write to and
230 // the number of usable bytes before the tail guard. The leading and
231 // trailing guard pages sit immediately before dataStart and immediately
232 // after dataStart+dataSize. Shared by SecureAlloc and SecureRotate.
233 func secureMap(n int32) (unsafe.Pointer, uintptr) {
234 dataSize := (uintptr(n) + secPageSize - 1) &^ (secPageSize - 1)
235 totalSize := dataSize + 2*secPageSize
236 237 addr := mmap(
238 nil,
239 totalSize,
240 flag_PROT_READ|flag_PROT_WRITE,
241 flag_MAP_PRIVATE|flag_MAP_ANONYMOUS,
242 -1,
243 0,
244 )
245 if addr == unsafe.Pointer(^uintptr(0)) {
246 runtimePanic("secalloc: mmap failed")
247 }
248 249 dataStart := unsafe.Add(addr, secPageSize)
250 tailGuard := unsafe.Add(dataStart, dataSize)
251 252 // Gold-standard upgrade: on Linux ≥5.14 replace the anonymous data pages
253 // with memfd_secret(2)-backed secretmem — pages the kernel itself cannot
254 // read through /proc/<pid>/mem or ptrace. Failure is silent and non-fatal;
255 // on Darwin or older Linux the anonymous mapping (with mlock + guards)
256 // remains in place and the arena stays secure via the portable path.
257 _ = moxie_secalloc_try_secretmem(dataStart, dataSize)
258 259 if mprotect(addr, secPageSize, flag_PROT_NONE) != 0 {
260 runtimePanic("secalloc: mprotect head guard failed")
261 }
262 if mprotect(tailGuard, secPageSize, flag_PROT_NONE) != 0 {
263 runtimePanic("secalloc: mprotect tail guard failed")
264 }
265 266 // Pin data pages in RAM (prevent swap leaks), exclude from core dumps
267 // and fork inheritance. Failures on these are advisory, not fatal:
268 // mlock may hit RLIMIT_MEMLOCK, madvise flags may be unsupported on
269 // older kernels. The guard pages and signal wipe still work without
270 // them.
271 _ = mlock(dataStart, dataSize)
272 _ = madvise(dataStart, dataSize, flag_MADV_DONTDUMP)
273 _ = madvise(dataStart, dataSize, flag_MADV_DONTFORK)
274 275 return dataStart, dataSize
276 }
277 278 // SecureRotate moves the contents of an existing SecureAlloc'd slice to a
279 // fresh guarded mapping, wipes the old arena with noise, unmaps it, and
280 // returns the new slice. The input slice must have been returned by
281 // SecureAlloc (directly or via a prior SecureRotate). After the call, the
282 // old backing memory is unmapped — any Moxie code still holding a pointer
283 // into it will SIGSEGV on access, triggering the normal wipe-and-die path.
284 //
285 // Use cases:
286 // - Rotating a long-lived session key so its virtual address changes over
287 // time, defeating adversaries who observed the VA at any prior moment
288 // (e.g. /proc/self/mem leak, core-dump escape, ROP read gadget).
289 // - Forcing eviction from page-cache entries that may have been scanned.
290 //
291 // The caller MUST drop any aliases to the old slice before calling, since
292 // those aliases become dangling pointers into unmapped memory.
293 func SecureRotate(old []byte) []byte {
294 if len(old) == 0 {
295 runtimePanic("secalloc: rotate empty slice")
296 }
297 oldBase := unsafe.Pointer(&old[0])
298 oldDataSize := (uintptr(len(old)) + secPageSize - 1) &^ (secPageSize - 1)
299 300 // Allocate the replacement arena first. If this fails we panic before
301 // touching the old one, leaving the caller's slice valid.
302 newBase, newDataSize := secureMap(int32(len(old)))
303 304 // Copy bytes into the new arena.
305 dst := unsafe.Slice((*byte)(newBase), len(old))
306 copy(dst, old)
307 308 // Register the replacement. If the registry is full the old arena is
309 // still live; unmap the replacement and panic.
310 if moxie_secalloc_register_arena(newBase, newDataSize) != 0 {
311 _ = munmap(unsafe.Add(newBase, -int(secPageSize)), newDataSize+2*secPageSize)
312 runtimePanic("secalloc: arena registry full")
313 }
314 315 // Regenerate the noise buffer BEFORE wiping the old arena. Rotation
316 // is the natural re-key point: both the virtual address (new mmap)
317 // and the wipe signature (new noise) change in one step, so an
318 // attacker who observed either side of the old arena gets no useful
319 // bridge to the new one.
320 secureGenerateNoise()
321 322 // Wipe the old arena with the (now-fresh) noise pattern BEFORE
323 // unmapping. If any page aliases survive in TLB or another thread's
324 // view, they see noise instead of the secret.
325 secureWipe(oldBase, oldDataSize)
326 327 // Drop the old arena from the registry so the signal handler stops
328 // touching it. Must happen before munmap: otherwise a concurrent fault
329 // could hit unmapped memory mid-wipe.
330 moxie_secalloc_unregister_arena(oldBase)
331 332 // Unmap the full three-page mapping (head guard, data, tail guard).
333 _ = munmap(unsafe.Add(oldBase, -int(secPageSize)), oldDataSize+2*secPageSize)
334 335 return dst
336 }
337 338 // secureWipe overwrites length bytes at base with the repeating noise
339 // pattern generated by secureInit. Inlined memcpy loop — no C call,
340 // keeps this usable from contexts where the C side might be re-entered
341 // (not currently the case, but cheap insurance).
342 func secureWipe(base unsafe.Pointer, length uintptr) {
343 if !secInited {
344 return
345 }
346 noise := unsafe.Pointer(&secNoise[0])
347 off := uintptr(0)
348 for off < length {
349 n := length - off
350 if n > secNoiseSize {
351 n = secNoiseSize
352 }
353 dst := unsafe.Slice((*byte)(unsafe.Add(base, off)), n)
354 src := unsafe.Slice((*byte)(noise), n)
355 copy(dst, src)
356 off += n
357 }
358 }
359 360 // moxie_secalloc_register_arena hands a new guarded region to the C-side
361 // signal handler for wipe tracking. Returns 0 on success, -1 if the
362 // registry is full. The C side reuses NULL'd-out slots left by previous
363 // unregister calls so long-running domains that rotate arenas don't
364 // exhaust the fixed-size registry.
365 //
366 //export moxie_secalloc_register_arena
367 func moxie_secalloc_register_arena(base unsafe.Pointer, length uintptr) int32
368 369 // moxie_secalloc_unregister_arena marks the slot for an arena as free.
370 // The next register call may reuse it. The signal-handler wipe loop
371 // skips NULL entries, so after unregister the arena is no longer wiped.
372 //
373 //export moxie_secalloc_unregister_arena
374 func moxie_secalloc_unregister_arena(base unsafe.Pointer)
375 376 // moxie_secalloc_configure hands the noise buffer and lockdown fd to the
377 // C-side signal handler. Called once by secureInit().
378 //
379 //export moxie_secalloc_configure
380 func moxie_secalloc_configure(noise unsafe.Pointer, noiseLen uintptr, fd int32)
381 382 // moxie_secalloc_set_lockdown_fd updates only the lockdown fd on the C side.
383 // Used by SetSecureLockdownFd to retarget notifications without rewriting
384 // the noise buffer pointer.
385 //
386 //export moxie_secalloc_set_lockdown_fd
387 func moxie_secalloc_set_lockdown_fd(fd int32)
388 389 // moxie_secalloc_clear overwrites a single buffer with the current noise
390 // pattern. No registry mutation, no fd write — targeted per-buffer wipe
391 // for SecureClear's context-change use case.
392 //
393 //export moxie_secalloc_clear
394 func moxie_secalloc_clear(base unsafe.Pointer, length uintptr)
395 396 // moxie_secalloc_lockdown wipes every registered arena with noise and
397 // writes the lockdown marker to the notify fd. Shared entry point for
398 // both the fatal-signal path and the explicit SecureLockdown primitive.
399 //
400 //export moxie_secalloc_lockdown
401 func moxie_secalloc_lockdown()
402 403 // moxie_secalloc_try_secretmem attempts to replace the anonymous data pages
404 // at base with memfd_secret(2)-backed secretmem (Linux ≥5.14). Returns 0 on
405 // success, -1 on failure. Failure is silent — the existing mmap+mlock
406 // mapping stays in place. Darwin always returns -1.
407 //
408 //export moxie_secalloc_try_secretmem
409 func moxie_secalloc_try_secretmem(base unsafe.Pointer, length uintptr) int32
410 411 // moxie_secalloc_contains returns 1 if ptr is inside any currently registered
412 // arena's data region, 0 otherwise. The runtime comparison/concat dispatch
413 // (stringEqual / stringLess / bytesConcat) calls this once per operand and
414 // routes through the constant-time path if either returns 1. Fast-out when
415 // no arenas are registered so non-crypto programs pay one load per compare.
416 //
417 //export moxie_secalloc_contains
418 func moxie_secalloc_contains(ptr unsafe.Pointer) int32
419 420 // isSecurePtr is the runtime-internal wrapper around moxie_secalloc_contains.
421 // Returns true when ptr is the backing address of a secure allocation (or
422 // any subslice derived from one, since subslicing preserves the pointer's
423 // arena membership). The runtime comparison path calls this on each operand
424 // of stringEqual / stringLess / bytesConcat and promotes the operation to
425 // constant-time when either side is secure.
426 //
427 // On platforms without the secalloc machinery (WASM, baremetal) this symbol
428 // is provided by a stub that always returns false.
429 func isSecurePtr(ptr unsafe.Pointer) bool {
430 return moxie_secalloc_contains(ptr) != 0
431 }
432