1 //go:build none
2 3 // secalloc.c - signal-handler side of the secure allocator.
4 //
5 // Design: secalloc.mx mmap's guarded arenas and registers them here via
6 // moxie_secalloc_register_arena(). At init it also calls moxie_secalloc_configure()
7 // to hand over the noise buffer and lockdown pipe fd. When a fatal signal fires,
8 // runtime_unix.c's signal_handler calls moxie_secalloc_on_fatal_signal() which:
9 //
10 // 1. Wipes every registered arena with noise bytes — SYNCHRONOUSLY, before
11 // the handler returns, so no attacker can observe secret contents after
12 // the fault but before teardown.
13 // 2. Writes one byte to the lockdown pipe to notify the parent domain.
14 //
15 // Only async-signal-safe primitives are used: memcpy (pure compute) and write(2)
16 // (POSIX-guaranteed). No malloc, no locks, no printf. The arena registry is a
17 // fixed-size global populated at init; no dynamic allocation from the handler.
18 //
19 // This file is included on both Darwin and Linux.
20 21 #include <stdint.h>
22 #include <stddef.h>
23 #include <string.h>
24 #include <unistd.h>
25 26 #ifdef __linux__
27 #include <sys/mman.h>
28 #include <sys/syscall.h>
29 #ifndef SYS_memfd_secret
30 #define SYS_memfd_secret 447
31 #endif
32 #endif
33 34 #define MOXIE_SECALLOC_MAX_ARENAS 64
35 36 struct moxie_secalloc_arena {
37 void *base;
38 size_t len;
39 };
40 41 static struct moxie_secalloc_arena moxie_secalloc_arenas[MOXIE_SECALLOC_MAX_ARENAS];
42 static int moxie_secalloc_narenas = 0;
43 static const uint8_t *moxie_secalloc_noise = NULL;
44 static size_t moxie_secalloc_noise_len = 0;
45 static int moxie_secalloc_lockdown_fd = -1;
46 47 // Moxie-facing: register a new guarded arena. Must be called from normal
48 // (non-signal) context, once per arena, before any secret is written into it.
49 // Scans for a free slot (NULL base, set by unregister) before appending, so
50 // rotation cycles can reuse slots and the registry doesn't grow unboundedly.
51 // Returns 0 on success, -1 if the table is full.
52 int moxie_secalloc_register_arena(void *base, size_t len) {
53 for (int i = 0; i < moxie_secalloc_narenas; i++) {
54 if (moxie_secalloc_arenas[i].base == NULL) {
55 moxie_secalloc_arenas[i].base = base;
56 moxie_secalloc_arenas[i].len = len;
57 return 0;
58 }
59 }
60 if (moxie_secalloc_narenas >= MOXIE_SECALLOC_MAX_ARENAS) {
61 return -1;
62 }
63 moxie_secalloc_arenas[moxie_secalloc_narenas].base = base;
64 moxie_secalloc_arenas[moxie_secalloc_narenas].len = len;
65 moxie_secalloc_narenas++;
66 return 0;
67 }
68 69 // Moxie-facing: drop a previously registered arena. Marks the slot free so
70 // moxie_secalloc_register_arena can reuse it on the next call. Called by
71 // SecureRotate after the old arena has been wiped and munmap'd. Idempotent
72 // — unknown bases are silently ignored. The signal-handler wipe path skips
73 // NULL entries so unregistered arenas are no longer touched.
74 void moxie_secalloc_unregister_arena(void *base) {
75 for (int i = 0; i < moxie_secalloc_narenas; i++) {
76 if (moxie_secalloc_arenas[i].base == base) {
77 moxie_secalloc_arenas[i].base = NULL;
78 moxie_secalloc_arenas[i].len = 0;
79 return;
80 }
81 }
82 }
83 84 // Moxie-facing: return current arena count so the Moxie side can check for
85 // full-table conditions before registering. Slot count, not live count —
86 // includes NULL'd-out entries waiting to be reused.
87 int moxie_secalloc_arena_count(void) {
88 return moxie_secalloc_narenas;
89 }
90 91 // Moxie-facing: return 1 if ptr is inside any currently registered arena's
92 // data region, 0 otherwise. Used by the runtime's stringEqual / stringLess /
93 // bytesConcat dispatch to decide whether to route through the constant-time
94 // comparison path. Linear scan over the registry; bounded at
95 // MOXIE_SECALLOC_MAX_ARENAS. Fast-out when no arenas have ever been
96 // registered: programs that never call SecureAlloc pay one load+branch
97 // per comparison and nothing else.
98 //
99 // Pointer-based detection is the native analogue of JS's Slice.$secure flag.
100 // Taint propagates implicitly through slicing (a subslice points into the
101 // same arena) but does NOT propagate through copy() into a heap slice —
102 // the destination's pointer is outside every registered arena and returns
103 // 0 here. Callers that need to preserve secrecy across a copy must allocate
104 // the destination with SecureAlloc.
105 int moxie_secalloc_contains(const void *ptr) {
106 if (moxie_secalloc_narenas == 0) {
107 return 0;
108 }
109 const uint8_t *p = (const uint8_t *)ptr;
110 for (int i = 0; i < moxie_secalloc_narenas; i++) {
111 const uint8_t *base = (const uint8_t *)moxie_secalloc_arenas[i].base;
112 if (base == NULL) {
113 continue;
114 }
115 if (p >= base && p < base + moxie_secalloc_arenas[i].len) {
116 return 1;
117 }
118 }
119 return 0;
120 }
121 122 // Moxie-facing: one-shot configuration. noise must be a buffer of noise_len
123 // bytes that will live for the rest of the process. lockdown_fd is the write
124 // end of a pipe inherited from the parent domain; the read end is watched by
125 // the parent's event loop. Set lockdown_fd = -1 to disable notification.
126 void moxie_secalloc_configure(const void *noise, size_t noise_len, int lockdown_fd) {
127 moxie_secalloc_noise = (const uint8_t *)noise;
128 moxie_secalloc_noise_len = noise_len;
129 moxie_secalloc_lockdown_fd = lockdown_fd;
130 }
131 132 // Moxie-facing: update only the lockdown fd. Used after spawn when the child
133 // receives an inherited pipe fd from the parent domain and needs to route
134 // fault notifications there instead of stderr. Safe to call before or after
135 // moxie_secalloc_configure() — if called before, the configure call will not
136 // override the explicit fd (but the current implementation does set it from
137 // the Moxie-side secLockdownFd var, so callers should set the var too).
138 void moxie_secalloc_set_lockdown_fd(int fd) {
139 moxie_secalloc_lockdown_fd = fd;
140 }
141 142 // Wipe every registered arena with noise bytes. Repeats the noise buffer if
143 // an arena is larger than the noise. Runs from signal context — must be
144 // async-signal-safe.
145 static void moxie_secalloc_wipe_all(void) {
146 if (moxie_secalloc_noise == NULL || moxie_secalloc_noise_len == 0) {
147 return;
148 }
149 for (int i = 0; i < moxie_secalloc_narenas; i++) {
150 uint8_t *dst = (uint8_t *)moxie_secalloc_arenas[i].base;
151 size_t remaining = moxie_secalloc_arenas[i].len;
152 size_t off = 0;
153 if (dst == NULL) {
154 continue;
155 }
156 while (remaining > 0) {
157 size_t n = remaining;
158 if (n > moxie_secalloc_noise_len) {
159 n = moxie_secalloc_noise_len;
160 }
161 memcpy(dst + off, moxie_secalloc_noise, n);
162 off += n;
163 remaining -= n;
164 }
165 }
166 }
167 168 // Write a lockdown marker to the notification fd. Non-blocking: if the
169 // pipe is full or invalid we just give up — the process is about to die
170 // anyway and the parent will observe child death as a backstop.
171 //
172 // The marker string is async-signal-safe: it's a fixed constant in .rodata,
173 // not heap data, and write(2) is on POSIX's async-signal-safe list. Writing
174 // a human-readable string (rather than a single byte) makes the milestone-1
175 // test observable via stderr; milestone-2 will replace this with a framed
176 // IPC byte on a spawn-inherited pipe.
177 static void moxie_secalloc_notify(void) {
178 if (moxie_secalloc_lockdown_fd < 0) {
179 return;
180 }
181 static const char marker[] = "MOXIE_SECALLOC_LOCKDOWN\n";
182 ssize_t r = write(moxie_secalloc_lockdown_fd, marker, sizeof(marker) - 1);
183 (void)r;
184 }
185 186 // Moxie-facing: run the full lockdown sequence (wipe every registered arena
187 // with noise, then write the lockdown marker). Shared entry point for both
188 // the fatal-signal handler (via moxie_secalloc_on_fatal_signal) and the
189 // explicit SecureLockdown primitive. One body, two triggers — keeps the
190 // "something fired the wipe" semantics identical regardless of who fired it.
191 //
192 // INVARIANT — DO NOT VIOLATE:
193 // Everything reachable from this function must be async-signal-safe. That
194 // currently means: memcpy (pure compute, POSIX-safe) and write(2) (on the
195 // POSIX async-signal-safe list). No malloc, no pthread primitives, no stdio
196 // (printf/fprintf), no locks, no non-reentrant libc (getenv, localtime,
197 // strerror, etc.). The registry and noise buffer are fixed at init and
198 // read-only from here.
199 //
200 // This is the constraint that lets ONE function body serve BOTH triggers:
201 // any signal-safe routine is also regular-safe (the signal-safe subset is
202 // strictly smaller). If a future modification needs logging, allocation,
203 // or locking, the signal-safe property breaks and the two callers must be
204 // split into separate code paths — moxie_secalloc_on_fatal_signal stays
205 // signal-safe, and the explicit path gets its own relaxed implementation.
206 // Do not "just add a log line here" without splitting first.
207 void moxie_secalloc_lockdown(void) {
208 moxie_secalloc_wipe_all();
209 moxie_secalloc_notify();
210 }
211 212 // Moxie-facing: overwrite a single caller-supplied buffer with the current
213 // noise pattern. Unlike moxie_secalloc_lockdown this does not touch the
214 // arena registry and does not write the notify marker — it is a targeted
215 // wipe for point-in-time residency minimization, invoked by SecureClear
216 // at application context-change boundaries (logout, navigation, tab
217 // backgrounding). The buffer need not be SecureAlloc'd; SecureClear is
218 // also valid on ordinary heap slices.
219 //
220 // Repeats the noise pattern if len > noise_len. No-op if the noise buffer
221 // has not been configured yet (pre-init caller).
222 void moxie_secalloc_clear(void *base, size_t len) {
223 if (moxie_secalloc_noise == NULL || moxie_secalloc_noise_len == 0) {
224 return;
225 }
226 uint8_t *dst = (uint8_t *)base;
227 size_t off = 0;
228 while (len > 0) {
229 size_t n = len;
230 if (n > moxie_secalloc_noise_len) {
231 n = moxie_secalloc_noise_len;
232 }
233 memcpy(dst + off, moxie_secalloc_noise, n);
234 off += n;
235 len -= n;
236 }
237 }
238 239 // Called from runtime_unix.c's signal_handler at the very start of a fatal
240 // signal. Delegates to moxie_secalloc_lockdown so fault-triggered and
241 // explicit lockdowns share one code path. Must be async-signal-safe.
242 void moxie_secalloc_on_fatal_signal(void) {
243 moxie_secalloc_lockdown();
244 }
245 246 // Moxie-facing: attempt to replace the anonymous data pages at addr with
247 // memfd_secret(2)-backed secretmem. Returns 0 on success, -1 on failure.
248 //
249 // memfd_secret was added in Linux 5.14. Pages from a secretmem mapping:
250 // - are excluded from the kernel direct map — the kernel itself cannot
251 // read them through /proc/<pid>/mem or ptrace(PTRACE_PEEKDATA)
252 // - are never swapped (implicit mlock, no RLIMIT_MEMLOCK cost)
253 // - are destroyed when the last mapping is unmapped or the fd is closed
254 // - cannot be shared with another process via fork or file descriptor
255 //
256 // The sequence is: memfd_secret → ftruncate to size → mmap over the existing
257 // VA with MAP_SHARED|MAP_FIXED → close the fd. MAP_FIXED replaces the prior
258 // anonymous mapping atomically; the backing pages become secretmem while the
259 // VA is preserved so the guard pages on either side remain in place and the
260 // caller's pointer into the arena is unchanged. The fd is dropped immediately
261 // after mmap — the mapping keeps the underlying memfd alive until munmap.
262 //
263 // Failure is not an error. On Darwin or on Linux kernels without support the
264 // caller keeps its existing mmap+mlock mapping, which is still secure via
265 // guard pages and mlock. The secretmem path is a gold-standard upgrade, not
266 // a prerequisite — the secalloc API above doesn't know or care which path
267 // succeeded, and a mixed process (some arenas secretmem, some not) is fine.
268 int moxie_secalloc_try_secretmem(void *addr, size_t len) {
269 #ifdef __linux__
270 long fd = syscall(SYS_memfd_secret, 0UL);
271 if (fd < 0) {
272 return -1;
273 }
274 if (ftruncate((int)fd, (off_t)len) != 0) {
275 close((int)fd);
276 return -1;
277 }
278 void *p = mmap(addr, len, PROT_READ | PROT_WRITE,
279 MAP_SHARED | MAP_FIXED, (int)fd, 0);
280 close((int)fd);
281 if (p == MAP_FAILED || p != addr) {
282 return -1;
283 }
284 return 0;
285 #else
286 (void)addr;
287 (void)len;
288 return -1;
289 #endif
290 }
291