secalloc.mx raw

   1  //go:build darwin || (linux && !baremetal && !wasip1 && !wasm_unknown && !wasip2 && !nintendoswitch)
   2  
   3  package runtime
   4  
   5  // Secure allocator — page-guarded arenas with signal-handler wipe.
   6  //
   7  // Each secure allocation returns a []byte slice backed by its own mmap'd
   8  // arena. The arena layout is:
   9  //
  10  //   [guard page][data page(s)][guard page]
  11  //     PROT_NONE    PROT_RW        PROT_NONE
  12  //                 mlock'd,
  13  //                 DONTFORK|DONTDUMP
  14  //
  15  // Any out-of-bounds access via pointer arithmetic hits a guard page and
  16  // raises SIGSEGV. runtime_unix.c's signal_handler calls into secalloc.c's
  17  // moxie_secalloc_on_fatal_signal() BEFORE the rest of the fatal path, which
  18  // synchronously memcpy's a noise pattern over every registered arena and
  19  // writes one byte to the lockdown fd. The process then dies; any secret
  20  // that lived in the arena has been overwritten before the handler returned,
  21  // so no subsequent code (attacker ROP, debugger, kernel core dumper) can
  22  // observe it.
  23  //
  24  // The noise buffer is generated once at first secureAlloc call, seeded from
  25  // the kernel via hardwareRand() and expanded with ChaCha12 (see
  26  // secalloc_chacha.mx). It lives in a package-level byte array and is shared
  27  // across all arenas.
  28  //
  29  // This is Milestone 1: the lockdown fd defaults to stderr (fd 2) so the
  30  // test program can observe the notification byte. Milestone 2 will route
  31  // the notification through a spawn-level lockdown channel inherited from
  32  // the parent domain.
  33  
  34  import (
  35  	"internal/gclayout"
  36  	"unsafe"
  37  )
  38  
  39  const (
  40  	secPageSize  = 4096
  41  	secNoiseSize = 4096
  42  )
  43  
  44  // secNoise holds the noise pattern used to overwrite arena contents on
  45  // fault. Filled once by secureInit() via ChaCha12 keystream. Lives in
  46  // .bss until then, which would produce a zero-wipe — still secure, just
  47  // less entropy. The noise buffer is shared by every arena.
  48  var secNoise [secNoiseSize]byte
  49  
  50  // secInited is set once secureInit() has generated the noise buffer and
  51  // configured the C signal handler side.
  52  var secInited bool
  53  
  54  // secLockdownFd is the write end of the notification pipe. Default 2 is
  55  // stderr; spawned children that want a private lockdown channel call
  56  // SetSecureLockdownFd with a pipe fd inherited from the parent domain.
  57  var secLockdownFd int32 = 2
  58  
  59  // SetSecureLockdownFd routes fault notifications to fd instead of stderr.
  60  // Typical use: a parent domain creates a pipe, spawns a child with the
  61  // write end inherited via fork, and the child calls SetSecureLockdownFd
  62  // with that fd before any SecureAlloc. The parent's event loop watches
  63  // the read end and reacts to lockdown bytes (e.g. clearing the UI of any
  64  // memory derived from the secret, redrawing a tamper warning).
  65  //
  66  // Safe to call before or after the first SecureAlloc. Updates both the
  67  // Moxie-side var (so a future secureInit picks it up if it hasn't run)
  68  // AND the C-side notification fd (so an already-initialized handler
  69  // switches over immediately).
  70  func SetSecureLockdownFd(fd int32) {
  71  	secLockdownFd = fd
  72  	moxie_secalloc_set_lockdown_fd(fd)
  73  }
  74  
  75  // secureGenerateNoise fills secNoise with a fresh ChaCha12 keystream
  76  // seeded from kernel entropy. Called at first SecureAlloc (via
  77  // secureInit) and from SecureRekey / SecureRotate / SecureLockdown
  78  // when refreshing the wipe pattern.
  79  //
  80  // Zeroes the buffer before XORing the keystream: ChaCha XOR against
  81  // zero produces the keystream directly, but XOR against a previous
  82  // keystream produces two overlapping streams, which is not a clean
  83  // fresh keystream. The zero step is the difference between "first
  84  // call" semantics (buffer starts clean) and "rekey" semantics
  85  // (buffer holds stale noise).
  86  func secureGenerateNoise() {
  87  	var seed [32]byte
  88  	for i := 0; i < 4; i++ {
  89  		n, ok := hardwareRand()
  90  		if !ok {
  91  			runtimePanic("secalloc: kernel entropy unavailable")
  92  		}
  93  		seed[i*8+0] = byte(n)
  94  		seed[i*8+1] = byte(n >> 8)
  95  		seed[i*8+2] = byte(n >> 16)
  96  		seed[i*8+3] = byte(n >> 24)
  97  		seed[i*8+4] = byte(n >> 32)
  98  		seed[i*8+5] = byte(n >> 40)
  99  		seed[i*8+6] = byte(n >> 48)
 100  		seed[i*8+7] = byte(n >> 56)
 101  	}
 102  	for i := range secNoise {
 103  		secNoise[i] = 0
 104  	}
 105  	var nonce [secChachaNonceSize]byte
 106  	secChachaXORKeyStream(secNoise[:], secNoise[:], nonce[:], seed[:], 12)
 107  }
 108  
 109  // secureInit generates the noise buffer and hands it to the C signal
 110  // handler. Idempotent.
 111  func secureInit() {
 112  	if secInited {
 113  		return
 114  	}
 115  	secureGenerateNoise()
 116  	moxie_secalloc_configure(
 117  		unsafe.Pointer(&secNoise[0]),
 118  		uintptr(secNoiseSize),
 119  		secLockdownFd,
 120  	)
 121  	secInited = true
 122  }
 123  
 124  // SecureRekey regenerates the noise buffer from fresh kernel entropy.
 125  // Subsequent wipes (signal-handler, SecureClear, SecureLockdown, or
 126  // SecureRotate) will use the new bytes. Existing wiped contents are
 127  // NOT re-wiped — rekey only affects the pattern used from now on.
 128  //
 129  // Callers can invoke this periodically (on a timer, on session
 130  // boundaries, on every rotation) to defeat attackers who captured
 131  // noise bytes from a prior memory snapshot and would otherwise
 132  // correlate them against future wipes. The noise buffer's virtual
 133  // address does not change across rekey, so the C-side configuration
 134  // pointer remains valid.
 135  func SecureRekey() {
 136  	if !secInited {
 137  		return
 138  	}
 139  	secureGenerateNoise()
 140  }
 141  
 142  // SecureClear overwrites buf with the current noise pattern. Targeted
 143  // wipe for point-in-time residency minimization: the caller invokes
 144  // this at application context-change boundaries — logout, tenant
 145  // switch, navigation away from a decrypted conversation, tab
 146  // backgrounding — any moment where specific decrypted material is no
 147  // longer needed and should not keep occupying memory.
 148  //
 149  // buf need not be SecureAlloc'd. SecureClear works on any []byte the
 150  // caller holds; the guarantee is that on return, the bytes contain
 151  // noise rather than the caller's prior contents. For SecureAlloc'd
 152  // slices the underlying mapping stays valid (no unmap), so the slice
 153  // can be reused immediately for fresh data.
 154  //
 155  // Policy note: SecureClear does NOT rekey. Frequent small clears
 156  // should not pay the entropy cost of refreshing the shared noise
 157  // buffer. Callers that want fresh bytes per clear can call SecureRekey
 158  // explicitly.
 159  func SecureClear(buf []byte) {
 160  	secureInit()
 161  	if len(buf) == 0 {
 162  		return
 163  	}
 164  	moxie_secalloc_clear(unsafe.Pointer(&buf[0]), uintptr(len(buf)))
 165  }
 166  
 167  // SecureLockdown synchronously wipes every registered arena with the
 168  // current noise pattern and writes the lockdown marker to the notify
 169  // fd. Unlike the fatal-signal path (which runs this sequence and then
 170  // dies) SecureLockdown returns, so the caller stays alive.
 171  //
 172  // After the wipe, secureGenerateNoise regenerates the noise buffer
 173  // from fresh kernel entropy. This bounds the lifetime of any given
 174  // noise pattern to at most one lockdown — so if an attacker captured
 175  // the noise from a memory snapshot taken before the lockdown, that
 176  // snapshot does not help them interpret subsequent wipes.
 177  //
 178  // Semantics: "something broad happened and every secure arena should
 179  // be treated as compromised." Typical triggers:
 180  //   - Emergency revocation from user or policy engine
 181  //   - Pre-suspend preparation before kernel puts RAM to sleep
 182  //   - Detected anomaly that doesn't warrant a full crash
 183  //
 184  // For routine context changes (wiping one specific buffer when it
 185  // goes out of app-level scope) use SecureClear instead — it's
 186  // targeted and cheaper.
 187  func SecureLockdown() {
 188  	secureInit()
 189  	moxie_secalloc_lockdown()
 190  	secureGenerateNoise()
 191  }
 192  
 193  // secureAwareByteAlloc allocates n bytes. If secure is true, the allocation
 194  // comes from a fresh SecureAlloc arena (guard-paged, mlocked, wipe-on-fault).
 195  // If secure is false, a regular heap allocation is used. Used by bytesConcat
 196  // to propagate the secure flag across concatenation: if either operand is
 197  // secure, the concatenation result lives in a secure arena too.
 198  func secureAwareByteAlloc(n uintptr, secure bool) []byte {
 199  	if secure {
 200  		return SecureAlloc(int32(n))
 201  	}
 202  	buf := alloc(n, gclayout.NoPtrs.AsPtr())
 203  	return unsafe.Slice((*byte)(buf), n)
 204  }
 205  
 206  // SecureAlloc returns a byte slice of exactly n bytes backed by a freshly
 207  // mmap'd guarded arena. The underlying data pages are locked into RAM,
 208  // excluded from core dumps, and excluded from fork inheritance. Any
 209  // pointer-arithmetic access outside [0, n) hits a guard page and triggers
 210  // the wipe-and-die handler.
 211  //
 212  // No free. Arenas persist for the process lifetime; regeneration must come
 213  // from an authoritative source (re-derive, re-decrypt, re-prompt). For
 214  // long-lived secrets, see SecureRotate which moves contents to a fresh
 215  // mapping and wipes the old one.
 216  func SecureAlloc(n int32) []byte {
 217  	secureInit()
 218  	if n <= 0 {
 219  		runtimePanic("secalloc: size must be positive")
 220  	}
 221  	dataStart, dataSize := secureMap(n)
 222  	if moxie_secalloc_register_arena(dataStart, dataSize) != 0 {
 223  		runtimePanic("secalloc: arena registry full")
 224  	}
 225  	return unsafe.Slice((*byte)(dataStart), n)
 226  }
 227  
 228  // secureMap mmaps a fresh guarded arena sized for n user bytes. Returns
 229  // (dataStart, dataSize) — the first address the caller may write to and
 230  // the number of usable bytes before the tail guard. The leading and
 231  // trailing guard pages sit immediately before dataStart and immediately
 232  // after dataStart+dataSize. Shared by SecureAlloc and SecureRotate.
 233  func secureMap(n int32) (unsafe.Pointer, uintptr) {
 234  	dataSize := (uintptr(n) + secPageSize - 1) &^ (secPageSize - 1)
 235  	totalSize := dataSize + 2*secPageSize
 236  
 237  	addr := mmap(
 238  		nil,
 239  		totalSize,
 240  		flag_PROT_READ|flag_PROT_WRITE,
 241  		flag_MAP_PRIVATE|flag_MAP_ANONYMOUS,
 242  		-1,
 243  		0,
 244  	)
 245  	if addr == unsafe.Pointer(^uintptr(0)) {
 246  		runtimePanic("secalloc: mmap failed")
 247  	}
 248  
 249  	dataStart := unsafe.Add(addr, secPageSize)
 250  	tailGuard := unsafe.Add(dataStart, dataSize)
 251  
 252  	// Gold-standard upgrade: on Linux ≥5.14 replace the anonymous data pages
 253  	// with memfd_secret(2)-backed secretmem — pages the kernel itself cannot
 254  	// read through /proc/<pid>/mem or ptrace. Failure is silent and non-fatal;
 255  	// on Darwin or older Linux the anonymous mapping (with mlock + guards)
 256  	// remains in place and the arena stays secure via the portable path.
 257  	_ = moxie_secalloc_try_secretmem(dataStart, dataSize)
 258  
 259  	if mprotect(addr, secPageSize, flag_PROT_NONE) != 0 {
 260  		runtimePanic("secalloc: mprotect head guard failed")
 261  	}
 262  	if mprotect(tailGuard, secPageSize, flag_PROT_NONE) != 0 {
 263  		runtimePanic("secalloc: mprotect tail guard failed")
 264  	}
 265  
 266  	// Pin data pages in RAM (prevent swap leaks), exclude from core dumps
 267  	// and fork inheritance. Failures on these are advisory, not fatal:
 268  	// mlock may hit RLIMIT_MEMLOCK, madvise flags may be unsupported on
 269  	// older kernels. The guard pages and signal wipe still work without
 270  	// them.
 271  	_ = mlock(dataStart, dataSize)
 272  	_ = madvise(dataStart, dataSize, flag_MADV_DONTDUMP)
 273  	_ = madvise(dataStart, dataSize, flag_MADV_DONTFORK)
 274  
 275  	return dataStart, dataSize
 276  }
 277  
 278  // SecureRotate moves the contents of an existing SecureAlloc'd slice to a
 279  // fresh guarded mapping, wipes the old arena with noise, unmaps it, and
 280  // returns the new slice. The input slice must have been returned by
 281  // SecureAlloc (directly or via a prior SecureRotate). After the call, the
 282  // old backing memory is unmapped — any Moxie code still holding a pointer
 283  // into it will SIGSEGV on access, triggering the normal wipe-and-die path.
 284  //
 285  // Use cases:
 286  //   - Rotating a long-lived session key so its virtual address changes over
 287  //     time, defeating adversaries who observed the VA at any prior moment
 288  //     (e.g. /proc/self/mem leak, core-dump escape, ROP read gadget).
 289  //   - Forcing eviction from page-cache entries that may have been scanned.
 290  //
 291  // The caller MUST drop any aliases to the old slice before calling, since
 292  // those aliases become dangling pointers into unmapped memory.
 293  func SecureRotate(old []byte) []byte {
 294  	if len(old) == 0 {
 295  		runtimePanic("secalloc: rotate empty slice")
 296  	}
 297  	oldBase := unsafe.Pointer(&old[0])
 298  	oldDataSize := (uintptr(len(old)) + secPageSize - 1) &^ (secPageSize - 1)
 299  
 300  	// Allocate the replacement arena first. If this fails we panic before
 301  	// touching the old one, leaving the caller's slice valid.
 302  	newBase, newDataSize := secureMap(int32(len(old)))
 303  
 304  	// Copy bytes into the new arena.
 305  	dst := unsafe.Slice((*byte)(newBase), len(old))
 306  	copy(dst, old)
 307  
 308  	// Register the replacement. If the registry is full the old arena is
 309  	// still live; unmap the replacement and panic.
 310  	if moxie_secalloc_register_arena(newBase, newDataSize) != 0 {
 311  		_ = munmap(unsafe.Add(newBase, -int(secPageSize)), newDataSize+2*secPageSize)
 312  		runtimePanic("secalloc: arena registry full")
 313  	}
 314  
 315  	// Regenerate the noise buffer BEFORE wiping the old arena. Rotation
 316  	// is the natural re-key point: both the virtual address (new mmap)
 317  	// and the wipe signature (new noise) change in one step, so an
 318  	// attacker who observed either side of the old arena gets no useful
 319  	// bridge to the new one.
 320  	secureGenerateNoise()
 321  
 322  	// Wipe the old arena with the (now-fresh) noise pattern BEFORE
 323  	// unmapping. If any page aliases survive in TLB or another thread's
 324  	// view, they see noise instead of the secret.
 325  	secureWipe(oldBase, oldDataSize)
 326  
 327  	// Drop the old arena from the registry so the signal handler stops
 328  	// touching it. Must happen before munmap: otherwise a concurrent fault
 329  	// could hit unmapped memory mid-wipe.
 330  	moxie_secalloc_unregister_arena(oldBase)
 331  
 332  	// Unmap the full three-page mapping (head guard, data, tail guard).
 333  	_ = munmap(unsafe.Add(oldBase, -int(secPageSize)), oldDataSize+2*secPageSize)
 334  
 335  	return dst
 336  }
 337  
 338  // secureWipe overwrites length bytes at base with the repeating noise
 339  // pattern generated by secureInit. Inlined memcpy loop — no C call,
 340  // keeps this usable from contexts where the C side might be re-entered
 341  // (not currently the case, but cheap insurance).
 342  func secureWipe(base unsafe.Pointer, length uintptr) {
 343  	if !secInited {
 344  		return
 345  	}
 346  	noise := unsafe.Pointer(&secNoise[0])
 347  	off := uintptr(0)
 348  	for off < length {
 349  		n := length - off
 350  		if n > secNoiseSize {
 351  			n = secNoiseSize
 352  		}
 353  		dst := unsafe.Slice((*byte)(unsafe.Add(base, off)), n)
 354  		src := unsafe.Slice((*byte)(noise), n)
 355  		copy(dst, src)
 356  		off += n
 357  	}
 358  }
 359  
 360  // moxie_secalloc_register_arena hands a new guarded region to the C-side
 361  // signal handler for wipe tracking. Returns 0 on success, -1 if the
 362  // registry is full. The C side reuses NULL'd-out slots left by previous
 363  // unregister calls so long-running domains that rotate arenas don't
 364  // exhaust the fixed-size registry.
 365  //
 366  //export moxie_secalloc_register_arena
 367  func moxie_secalloc_register_arena(base unsafe.Pointer, length uintptr) int32
 368  
 369  // moxie_secalloc_unregister_arena marks the slot for an arena as free.
 370  // The next register call may reuse it. The signal-handler wipe loop
 371  // skips NULL entries, so after unregister the arena is no longer wiped.
 372  //
 373  //export moxie_secalloc_unregister_arena
 374  func moxie_secalloc_unregister_arena(base unsafe.Pointer)
 375  
 376  // moxie_secalloc_configure hands the noise buffer and lockdown fd to the
 377  // C-side signal handler. Called once by secureInit().
 378  //
 379  //export moxie_secalloc_configure
 380  func moxie_secalloc_configure(noise unsafe.Pointer, noiseLen uintptr, fd int32)
 381  
 382  // moxie_secalloc_set_lockdown_fd updates only the lockdown fd on the C side.
 383  // Used by SetSecureLockdownFd to retarget notifications without rewriting
 384  // the noise buffer pointer.
 385  //
 386  //export moxie_secalloc_set_lockdown_fd
 387  func moxie_secalloc_set_lockdown_fd(fd int32)
 388  
 389  // moxie_secalloc_clear overwrites a single buffer with the current noise
 390  // pattern. No registry mutation, no fd write — targeted per-buffer wipe
 391  // for SecureClear's context-change use case.
 392  //
 393  //export moxie_secalloc_clear
 394  func moxie_secalloc_clear(base unsafe.Pointer, length uintptr)
 395  
 396  // moxie_secalloc_lockdown wipes every registered arena with noise and
 397  // writes the lockdown marker to the notify fd. Shared entry point for
 398  // both the fatal-signal path and the explicit SecureLockdown primitive.
 399  //
 400  //export moxie_secalloc_lockdown
 401  func moxie_secalloc_lockdown()
 402  
 403  // moxie_secalloc_try_secretmem attempts to replace the anonymous data pages
 404  // at base with memfd_secret(2)-backed secretmem (Linux ≥5.14). Returns 0 on
 405  // success, -1 on failure. Failure is silent — the existing mmap+mlock
 406  // mapping stays in place. Darwin always returns -1.
 407  //
 408  //export moxie_secalloc_try_secretmem
 409  func moxie_secalloc_try_secretmem(base unsafe.Pointer, length uintptr) int32
 410  
 411  // moxie_secalloc_contains returns 1 if ptr is inside any currently registered
 412  // arena's data region, 0 otherwise. The runtime comparison/concat dispatch
 413  // (stringEqual / stringLess / bytesConcat) calls this once per operand and
 414  // routes through the constant-time path if either returns 1. Fast-out when
 415  // no arenas are registered so non-crypto programs pay one load per compare.
 416  //
 417  //export moxie_secalloc_contains
 418  func moxie_secalloc_contains(ptr unsafe.Pointer) int32
 419  
 420  // isSecurePtr is the runtime-internal wrapper around moxie_secalloc_contains.
 421  // Returns true when ptr is the backing address of a secure allocation (or
 422  // any subslice derived from one, since subslicing preserves the pointer's
 423  // arena membership). The runtime comparison path calls this on each operand
 424  // of stringEqual / stringLess / bytesConcat and promotes the operation to
 425  // constant-time when either side is secure.
 426  //
 427  // On platforms without the secalloc machinery (WASM, baremetal) this symbol
 428  // is provided by a stub that always returns false.
 429  func isSecurePtr(ptr unsafe.Pointer) bool {
 430  	return moxie_secalloc_contains(ptr) != 0
 431  }
 432