secalloc.c raw

   1  //go:build none
   2  
   3  // secalloc.c - signal-handler side of the secure allocator.
   4  //
   5  // Design: secalloc.mx mmap's guarded arenas and registers them here via
   6  // moxie_secalloc_register_arena(). At init it also calls moxie_secalloc_configure()
   7  // to hand over the noise buffer and lockdown pipe fd. When a fatal signal fires,
   8  // runtime_unix.c's signal_handler calls moxie_secalloc_on_fatal_signal() which:
   9  //
  10  //   1. Wipes every registered arena with noise bytes — SYNCHRONOUSLY, before
  11  //      the handler returns, so no attacker can observe secret contents after
  12  //      the fault but before teardown.
  13  //   2. Writes one byte to the lockdown pipe to notify the parent domain.
  14  //
  15  // Only async-signal-safe primitives are used: memcpy (pure compute) and write(2)
  16  // (POSIX-guaranteed). No malloc, no locks, no printf. The arena registry is a
  17  // fixed-size global populated at init; no dynamic allocation from the handler.
  18  //
  19  // This file is included on both Darwin and Linux.
  20  
  21  #include <stdint.h>
  22  #include <stddef.h>
  23  #include <string.h>
  24  #include <unistd.h>
  25  
  26  #ifdef __linux__
  27  #include <sys/mman.h>
  28  #include <sys/syscall.h>
  29  #ifndef SYS_memfd_secret
  30  #define SYS_memfd_secret 447
  31  #endif
  32  #endif
  33  
  34  #define MOXIE_SECALLOC_MAX_ARENAS 64
  35  
  36  struct moxie_secalloc_arena {
  37  	void  *base;
  38  	size_t len;
  39  };
  40  
  41  static struct moxie_secalloc_arena moxie_secalloc_arenas[MOXIE_SECALLOC_MAX_ARENAS];
  42  static int    moxie_secalloc_narenas = 0;
  43  static const uint8_t *moxie_secalloc_noise = NULL;
  44  static size_t moxie_secalloc_noise_len = 0;
  45  static int    moxie_secalloc_lockdown_fd = -1;
  46  
  47  // Moxie-facing: register a new guarded arena. Must be called from normal
  48  // (non-signal) context, once per arena, before any secret is written into it.
  49  // Scans for a free slot (NULL base, set by unregister) before appending, so
  50  // rotation cycles can reuse slots and the registry doesn't grow unboundedly.
  51  // Returns 0 on success, -1 if the table is full.
  52  int moxie_secalloc_register_arena(void *base, size_t len) {
  53  	for (int i = 0; i < moxie_secalloc_narenas; i++) {
  54  		if (moxie_secalloc_arenas[i].base == NULL) {
  55  			moxie_secalloc_arenas[i].base = base;
  56  			moxie_secalloc_arenas[i].len  = len;
  57  			return 0;
  58  		}
  59  	}
  60  	if (moxie_secalloc_narenas >= MOXIE_SECALLOC_MAX_ARENAS) {
  61  		return -1;
  62  	}
  63  	moxie_secalloc_arenas[moxie_secalloc_narenas].base = base;
  64  	moxie_secalloc_arenas[moxie_secalloc_narenas].len  = len;
  65  	moxie_secalloc_narenas++;
  66  	return 0;
  67  }
  68  
  69  // Moxie-facing: drop a previously registered arena. Marks the slot free so
  70  // moxie_secalloc_register_arena can reuse it on the next call. Called by
  71  // SecureRotate after the old arena has been wiped and munmap'd. Idempotent
  72  // — unknown bases are silently ignored. The signal-handler wipe path skips
  73  // NULL entries so unregistered arenas are no longer touched.
  74  void moxie_secalloc_unregister_arena(void *base) {
  75  	for (int i = 0; i < moxie_secalloc_narenas; i++) {
  76  		if (moxie_secalloc_arenas[i].base == base) {
  77  			moxie_secalloc_arenas[i].base = NULL;
  78  			moxie_secalloc_arenas[i].len  = 0;
  79  			return;
  80  		}
  81  	}
  82  }
  83  
  84  // Moxie-facing: return current arena count so the Moxie side can check for
  85  // full-table conditions before registering. Slot count, not live count —
  86  // includes NULL'd-out entries waiting to be reused.
  87  int moxie_secalloc_arena_count(void) {
  88  	return moxie_secalloc_narenas;
  89  }
  90  
  91  // Moxie-facing: return 1 if ptr is inside any currently registered arena's
  92  // data region, 0 otherwise. Used by the runtime's stringEqual / stringLess /
  93  // bytesConcat dispatch to decide whether to route through the constant-time
  94  // comparison path. Linear scan over the registry; bounded at
  95  // MOXIE_SECALLOC_MAX_ARENAS. Fast-out when no arenas have ever been
  96  // registered: programs that never call SecureAlloc pay one load+branch
  97  // per comparison and nothing else.
  98  //
  99  // Pointer-based detection is the native analogue of JS's Slice.$secure flag.
 100  // Taint propagates implicitly through slicing (a subslice points into the
 101  // same arena) but does NOT propagate through copy() into a heap slice —
 102  // the destination's pointer is outside every registered arena and returns
 103  // 0 here. Callers that need to preserve secrecy across a copy must allocate
 104  // the destination with SecureAlloc.
 105  int moxie_secalloc_contains(const void *ptr) {
 106  	if (moxie_secalloc_narenas == 0) {
 107  		return 0;
 108  	}
 109  	const uint8_t *p = (const uint8_t *)ptr;
 110  	for (int i = 0; i < moxie_secalloc_narenas; i++) {
 111  		const uint8_t *base = (const uint8_t *)moxie_secalloc_arenas[i].base;
 112  		if (base == NULL) {
 113  			continue;
 114  		}
 115  		if (p >= base && p < base + moxie_secalloc_arenas[i].len) {
 116  			return 1;
 117  		}
 118  	}
 119  	return 0;
 120  }
 121  
 122  // Moxie-facing: one-shot configuration. noise must be a buffer of noise_len
 123  // bytes that will live for the rest of the process. lockdown_fd is the write
 124  // end of a pipe inherited from the parent domain; the read end is watched by
 125  // the parent's event loop. Set lockdown_fd = -1 to disable notification.
 126  void moxie_secalloc_configure(const void *noise, size_t noise_len, int lockdown_fd) {
 127  	moxie_secalloc_noise = (const uint8_t *)noise;
 128  	moxie_secalloc_noise_len = noise_len;
 129  	moxie_secalloc_lockdown_fd = lockdown_fd;
 130  }
 131  
 132  // Moxie-facing: update only the lockdown fd. Used after spawn when the child
 133  // receives an inherited pipe fd from the parent domain and needs to route
 134  // fault notifications there instead of stderr. Safe to call before or after
 135  // moxie_secalloc_configure() — if called before, the configure call will not
 136  // override the explicit fd (but the current implementation does set it from
 137  // the Moxie-side secLockdownFd var, so callers should set the var too).
 138  void moxie_secalloc_set_lockdown_fd(int fd) {
 139  	moxie_secalloc_lockdown_fd = fd;
 140  }
 141  
 142  // Wipe every registered arena with noise bytes. Repeats the noise buffer if
 143  // an arena is larger than the noise. Runs from signal context — must be
 144  // async-signal-safe.
 145  static void moxie_secalloc_wipe_all(void) {
 146  	if (moxie_secalloc_noise == NULL || moxie_secalloc_noise_len == 0) {
 147  		return;
 148  	}
 149  	for (int i = 0; i < moxie_secalloc_narenas; i++) {
 150  		uint8_t *dst = (uint8_t *)moxie_secalloc_arenas[i].base;
 151  		size_t   remaining = moxie_secalloc_arenas[i].len;
 152  		size_t   off = 0;
 153  		if (dst == NULL) {
 154  			continue;
 155  		}
 156  		while (remaining > 0) {
 157  			size_t n = remaining;
 158  			if (n > moxie_secalloc_noise_len) {
 159  				n = moxie_secalloc_noise_len;
 160  			}
 161  			memcpy(dst + off, moxie_secalloc_noise, n);
 162  			off += n;
 163  			remaining -= n;
 164  		}
 165  	}
 166  }
 167  
 168  // Write a lockdown marker to the notification fd. Non-blocking: if the
 169  // pipe is full or invalid we just give up — the process is about to die
 170  // anyway and the parent will observe child death as a backstop.
 171  //
 172  // The marker string is async-signal-safe: it's a fixed constant in .rodata,
 173  // not heap data, and write(2) is on POSIX's async-signal-safe list. Writing
 174  // a human-readable string (rather than a single byte) makes the milestone-1
 175  // test observable via stderr; milestone-2 will replace this with a framed
 176  // IPC byte on a spawn-inherited pipe.
 177  static void moxie_secalloc_notify(void) {
 178  	if (moxie_secalloc_lockdown_fd < 0) {
 179  		return;
 180  	}
 181  	static const char marker[] = "MOXIE_SECALLOC_LOCKDOWN\n";
 182  	ssize_t r = write(moxie_secalloc_lockdown_fd, marker, sizeof(marker) - 1);
 183  	(void)r;
 184  }
 185  
 186  // Moxie-facing: run the full lockdown sequence (wipe every registered arena
 187  // with noise, then write the lockdown marker). Shared entry point for both
 188  // the fatal-signal handler (via moxie_secalloc_on_fatal_signal) and the
 189  // explicit SecureLockdown primitive. One body, two triggers — keeps the
 190  // "something fired the wipe" semantics identical regardless of who fired it.
 191  //
 192  // INVARIANT — DO NOT VIOLATE:
 193  // Everything reachable from this function must be async-signal-safe. That
 194  // currently means: memcpy (pure compute, POSIX-safe) and write(2) (on the
 195  // POSIX async-signal-safe list). No malloc, no pthread primitives, no stdio
 196  // (printf/fprintf), no locks, no non-reentrant libc (getenv, localtime,
 197  // strerror, etc.). The registry and noise buffer are fixed at init and
 198  // read-only from here.
 199  //
 200  // This is the constraint that lets ONE function body serve BOTH triggers:
 201  // any signal-safe routine is also regular-safe (the signal-safe subset is
 202  // strictly smaller). If a future modification needs logging, allocation,
 203  // or locking, the signal-safe property breaks and the two callers must be
 204  // split into separate code paths — moxie_secalloc_on_fatal_signal stays
 205  // signal-safe, and the explicit path gets its own relaxed implementation.
 206  // Do not "just add a log line here" without splitting first.
 207  void moxie_secalloc_lockdown(void) {
 208  	moxie_secalloc_wipe_all();
 209  	moxie_secalloc_notify();
 210  }
 211  
 212  // Moxie-facing: overwrite a single caller-supplied buffer with the current
 213  // noise pattern. Unlike moxie_secalloc_lockdown this does not touch the
 214  // arena registry and does not write the notify marker — it is a targeted
 215  // wipe for point-in-time residency minimization, invoked by SecureClear
 216  // at application context-change boundaries (logout, navigation, tab
 217  // backgrounding). The buffer need not be SecureAlloc'd; SecureClear is
 218  // also valid on ordinary heap slices.
 219  //
 220  // Repeats the noise pattern if len > noise_len. No-op if the noise buffer
 221  // has not been configured yet (pre-init caller).
 222  void moxie_secalloc_clear(void *base, size_t len) {
 223  	if (moxie_secalloc_noise == NULL || moxie_secalloc_noise_len == 0) {
 224  		return;
 225  	}
 226  	uint8_t *dst = (uint8_t *)base;
 227  	size_t   off = 0;
 228  	while (len > 0) {
 229  		size_t n = len;
 230  		if (n > moxie_secalloc_noise_len) {
 231  			n = moxie_secalloc_noise_len;
 232  		}
 233  		memcpy(dst + off, moxie_secalloc_noise, n);
 234  		off += n;
 235  		len -= n;
 236  	}
 237  }
 238  
 239  // Called from runtime_unix.c's signal_handler at the very start of a fatal
 240  // signal. Delegates to moxie_secalloc_lockdown so fault-triggered and
 241  // explicit lockdowns share one code path. Must be async-signal-safe.
 242  void moxie_secalloc_on_fatal_signal(void) {
 243  	moxie_secalloc_lockdown();
 244  }
 245  
 246  // Moxie-facing: attempt to replace the anonymous data pages at addr with
 247  // memfd_secret(2)-backed secretmem. Returns 0 on success, -1 on failure.
 248  //
 249  // memfd_secret was added in Linux 5.14. Pages from a secretmem mapping:
 250  //   - are excluded from the kernel direct map — the kernel itself cannot
 251  //     read them through /proc/<pid>/mem or ptrace(PTRACE_PEEKDATA)
 252  //   - are never swapped (implicit mlock, no RLIMIT_MEMLOCK cost)
 253  //   - are destroyed when the last mapping is unmapped or the fd is closed
 254  //   - cannot be shared with another process via fork or file descriptor
 255  //
 256  // The sequence is: memfd_secret → ftruncate to size → mmap over the existing
 257  // VA with MAP_SHARED|MAP_FIXED → close the fd. MAP_FIXED replaces the prior
 258  // anonymous mapping atomically; the backing pages become secretmem while the
 259  // VA is preserved so the guard pages on either side remain in place and the
 260  // caller's pointer into the arena is unchanged. The fd is dropped immediately
 261  // after mmap — the mapping keeps the underlying memfd alive until munmap.
 262  //
 263  // Failure is not an error. On Darwin or on Linux kernels without support the
 264  // caller keeps its existing mmap+mlock mapping, which is still secure via
 265  // guard pages and mlock. The secretmem path is a gold-standard upgrade, not
 266  // a prerequisite — the secalloc API above doesn't know or care which path
 267  // succeeded, and a mixed process (some arenas secretmem, some not) is fine.
 268  int moxie_secalloc_try_secretmem(void *addr, size_t len) {
 269  #ifdef __linux__
 270  	long fd = syscall(SYS_memfd_secret, 0UL);
 271  	if (fd < 0) {
 272  		return -1;
 273  	}
 274  	if (ftruncate((int)fd, (off_t)len) != 0) {
 275  		close((int)fd);
 276  		return -1;
 277  	}
 278  	void *p = mmap(addr, len, PROT_READ | PROT_WRITE,
 279  		MAP_SHARED | MAP_FIXED, (int)fd, 0);
 280  	close((int)fd);
 281  	if (p == MAP_FAILED || p != addr) {
 282  		return -1;
 283  	}
 284  	return 0;
 285  #else
 286  	(void)addr;
 287  	(void)len;
 288  	return -1;
 289  #endif
 290  }
 291