mark.c raw
1 /*
2 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
3 * Copyright (c) 1991-1995 by Xerox Corporation. All rights reserved.
4 * Copyright (c) 2000 by Hewlett-Packard Company. All rights reserved.
5 * Copyright (c) 2008-2022 Ivan Maidanski
6 *
7 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
8 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
9 *
10 * Permission is hereby granted to use or copy this program
11 * for any purpose, provided the above notices are retained on all copies.
12 * Permission to modify the code and to distribute modified code is granted,
13 * provided the above notices are retained, and a notice that the code was
14 * modified is included with the above copyright notice.
15 */
16
17 #include "private/gc_pmark.h"
18
19 /*
20 * Make arguments appear live to compiler. Put here to minimize the
21 * risk of inlining. Used to minimize junk left in registers.
22 */
23 GC_ATTR_NOINLINE
24 void
25 GC_noop6(word arg1, word arg2, word arg3, word arg4, word arg5, word arg6)
26 {
27 UNUSED_ARG(arg1);
28 UNUSED_ARG(arg2);
29 UNUSED_ARG(arg3);
30 UNUSED_ARG(arg4);
31 UNUSED_ARG(arg5);
32 UNUSED_ARG(arg6);
33 /* Avoid `GC_noop6` calls to be optimized away. */
34 #if defined(AO_HAVE_compiler_barrier) && !defined(BASE_ATOMIC_OPS_EMULATED)
35 AO_compiler_barrier(); /*< to serve as a special side-effect */
36 #else
37 GC_noop1(0);
38 #endif
39 }
40
41 GC_API void GC_CALL
42 GC_noop1(GC_word x)
43 {
44 #if defined(AO_HAVE_store) && defined(THREAD_SANITIZER)
45 AO_store(&GC_noop_sink, (AO_t)x);
46 #else
47 GC_noop_sink = x;
48 #endif
49 }
50
51 GC_API void GC_CALL
52 GC_noop1_ptr(volatile void *p)
53 {
54 #if CPP_PTRSZ > CPP_WORDSZ
55 # if defined(AO_HAVE_store) && defined(THREAD_SANITIZER)
56 GC_cptr_store(&GC_noop_sink_ptr, (ptr_t)CAST_AWAY_VOLATILE_PVOID(p));
57 # else
58 GC_noop_sink_ptr = (ptr_t)CAST_AWAY_VOLATILE_PVOID(p);
59 # endif
60 #else
61 GC_noop1(ADDR(p));
62 #endif
63 }
64
65 /*
66 * Initialize `GC_obj_kinds` properly and standard free lists properly.
67 * This must be done statically since they may be accessed before
68 * `GC_init` is called. It is done here, since we need to deal with
69 * mark descriptors. Note: `GC_obj_kinds[NORMAL].ok_descriptor` is
70 * adjusted in `GC_init()` for `EXTRA_BYTES`.
71 */
72 GC_INNER struct obj_kind GC_obj_kinds[MAXOBJKINDS] = {
73 /* `PTRFREE` */
74 { &GC_aobjfreelist[0], 0 /*< filled in dynamically */,
75 /* `0 |` */ GC_DS_LENGTH, FALSE,
76 FALSE
77 /*, */ OK_DISCLAIM_INITZ },
78 /* `NORMAL` */
79 { &GC_objfreelist[0], 0,
80 /* `0 |` */ GC_DS_LENGTH, TRUE /*< add length to descriptor template */,
81 TRUE
82 /*, */ OK_DISCLAIM_INITZ },
83 /* `UNCOLLECTABLE` */
84 { &GC_uobjfreelist[0], 0,
85 /* `0 |` */ GC_DS_LENGTH, TRUE /*< add length to descriptor template */,
86 TRUE
87 /*, */ OK_DISCLAIM_INITZ },
88 #ifdef GC_ATOMIC_UNCOLLECTABLE
89 { &GC_auobjfreelist[0], 0,
90 /* `0 |` */ GC_DS_LENGTH, FALSE,
91 FALSE
92 /*, */ OK_DISCLAIM_INITZ },
93 #endif
94 };
95
96 #ifndef INITIAL_MARK_STACK_SIZE
97 /*
98 * `INITIAL_MARK_STACK_SIZE * sizeof(mse)` should be a multiple of `HBLKSIZE`.
99 * The incremental collector actually likes a larger size, since it wants to
100 * push all marked dirty objects before marking anything new.
101 * Currently we let it grow dynamically.
102 */
103 # define INITIAL_MARK_STACK_SIZE (1 * HBLKSIZE)
104 #endif
105
106 #if !defined(GC_DISABLE_INCREMENTAL)
107 /*
108 * The number of dirty pages we marked from, excluding pointer-free pages,
109 * etc. Used for logging only.
110 */
111 STATIC word GC_n_rescuing_pages = 0;
112 #endif
113
114 GC_API void GC_CALL
115 GC_set_pointer_mask(GC_word value)
116 {
117 #ifdef DYNAMIC_POINTER_MASK
118 GC_ASSERT(value >= 0xff); /*< a simple sanity check */
119 GC_pointer_mask = value;
120 #else
121 if (value
122 # ifdef POINTER_MASK
123 != (word)(POINTER_MASK)
124 # else
125 != GC_WORD_MAX
126 # endif
127 ) {
128 ABORT("Dynamic pointer mask/shift is unsupported");
129 }
130 #endif
131 }
132
133 GC_API GC_word GC_CALL
134 GC_get_pointer_mask(void)
135 {
136 #ifdef DYNAMIC_POINTER_MASK
137 GC_word value = GC_pointer_mask;
138
139 if (0 == value) {
140 GC_ASSERT(!GC_is_initialized);
141 value = GC_WORD_MAX;
142 }
143 return value;
144 #elif defined(POINTER_MASK)
145 return POINTER_MASK;
146 #else
147 return GC_WORD_MAX;
148 #endif
149 }
150
151 GC_API void GC_CALL
152 GC_set_pointer_shift(unsigned value)
153 {
154 #ifdef DYNAMIC_POINTER_MASK
155 GC_ASSERT(value < CPP_WORDSZ);
156 GC_pointer_shift = (unsigned char)value;
157 #else
158 if (value
159 # ifdef POINTER_SHIFT
160 != (unsigned)(POINTER_SHIFT)
161 # endif
162 ) {
163 ABORT("Dynamic pointer mask/shift is unsupported");
164 }
165 #endif
166 }
167
168 GC_API unsigned GC_CALL
169 GC_get_pointer_shift(void)
170 {
171 #ifdef DYNAMIC_POINTER_MASK
172 return GC_pointer_shift;
173 #elif defined(POINTER_SHIFT)
174 GC_STATIC_ASSERT((unsigned)(POINTER_SHIFT) < CPP_WORDSZ);
175 return POINTER_SHIFT;
176 #else
177 return 0;
178 #endif
179 }
180
181 GC_INNER GC_bool
182 GC_collection_in_progress(void)
183 {
184 return GC_mark_state != MS_NONE;
185 }
186
187 GC_INNER void
188 GC_clear_hdr_marks(hdr *hhdr)
189 {
190 size_t last_bit;
191
192 #ifdef AO_HAVE_load
193 /* Atomic access is used to avoid racing with `GC_realloc`. */
194 last_bit = FINAL_MARK_BIT(AO_load((volatile AO_t *)&hhdr->hb_sz));
195 #else
196 /*
197 * No race as `GC_realloc` holds the allocator lock while updating
198 * `hb_sz` field.
199 */
200 last_bit = FINAL_MARK_BIT(hhdr->hb_sz);
201 #endif
202
203 BZERO(CAST_AWAY_VOLATILE_PVOID(hhdr->hb_marks), sizeof(hhdr->hb_marks));
204 set_mark_bit_from_hdr(hhdr, last_bit);
205 hhdr->hb_n_marks = 0;
206 }
207
208 GC_INNER void
209 GC_set_hdr_marks(hdr *hhdr)
210 {
211 size_t i;
212 size_t sz = hhdr->hb_sz;
213 size_t n_marks = FINAL_MARK_BIT(sz);
214
215 #ifdef USE_MARK_BYTES
216 for (i = 0; i <= n_marks; i += MARK_BIT_OFFSET(sz)) {
217 hhdr->hb_marks[i] = 1;
218 }
219 #else
220 /*
221 * Note that all bits are set even in case of not `MARK_BIT_PER_OBJ`,
222 * instead of setting every `n`-th bit where `n` is `MARK_BIT_OFFSET(sz)`.
223 * This is done for a performance reason.
224 */
225 for (i = 0; i < divWORDSZ(n_marks); ++i) {
226 hhdr->hb_marks[i] = GC_WORD_MAX;
227 }
228 /* Set the remaining bits near the end (plus one bit past the end). */
229 hhdr->hb_marks[i] = ((((word)1 << modWORDSZ(n_marks)) - 1) << 1) | 1;
230 #endif
231 #ifdef MARK_BIT_PER_OBJ
232 hhdr->hb_n_marks = n_marks;
233 #else
234 hhdr->hb_n_marks = HBLK_OBJS(sz);
235 #endif
236 }
237
238 /* Clear all mark bits associated with block `h`. */
239 static void GC_CALLBACK
240 clear_marks_for_block(struct hblk *h, void *dummy)
241 {
242 hdr *hhdr = HDR(h);
243
244 UNUSED_ARG(dummy);
245 if (IS_UNCOLLECTABLE(hhdr->hb_obj_kind)) {
246 /*
247 * Mark bit for these is cleared only once the object is deallocated
248 * explicitly. This either frees the block, or the bit is cleared
249 * once the object is on the free list.
250 */
251 return;
252 }
253 GC_clear_hdr_marks(hhdr);
254 #if defined(CPPCHECK)
255 GC_noop1_ptr(h);
256 #endif
257 }
258
259 /* Slow but general routines for setting/clearing/getting mark bits. */
260
261 GC_API void GC_CALL
262 GC_set_mark_bit(const void *p)
263 {
264 struct hblk *h = HBLKPTR(p);
265 hdr *hhdr = HDR(h);
266 size_t bit_no = MARK_BIT_NO((size_t)((ptr_t)p - (ptr_t)h), hhdr->hb_sz);
267
268 if (!mark_bit_from_hdr(hhdr, bit_no)) {
269 set_mark_bit_from_hdr(hhdr, bit_no);
270 INCR_MARKS(hhdr);
271 }
272 }
273
274 GC_API void GC_CALL
275 GC_clear_mark_bit(const void *p)
276 {
277 struct hblk *h = HBLKPTR(p);
278 hdr *hhdr = HDR(h);
279 size_t bit_no = MARK_BIT_NO((size_t)((ptr_t)p - (ptr_t)h), hhdr->hb_sz);
280
281 if (mark_bit_from_hdr(hhdr, bit_no)) {
282 size_t n_marks = hhdr->hb_n_marks;
283
284 GC_ASSERT(n_marks != 0);
285 clear_mark_bit_from_hdr(hhdr, bit_no);
286 n_marks--;
287 #ifdef PARALLEL_MARK
288 /*
289 * Do not decrement to zero. The counts are approximate due to
290 * concurrency issues, but we need to ensure that a count of zero
291 * implies an empty block.
292 */
293 if (n_marks != 0 || !GC_parallel)
294 hhdr->hb_n_marks = n_marks;
295 #else
296 hhdr->hb_n_marks = n_marks;
297 #endif
298 }
299 }
300
301 GC_API int GC_CALL
302 GC_is_marked(const void *p)
303 {
304 struct hblk *h = HBLKPTR(p);
305 hdr *hhdr = HDR(h);
306 size_t bit_no = MARK_BIT_NO((size_t)((ptr_t)p - (ptr_t)h), hhdr->hb_sz);
307
308 return (int)mark_bit_from_hdr(hhdr, bit_no); /*< 0 or 1 */
309 }
310
311 GC_INNER void
312 GC_clear_marks(void)
313 {
314 /* The initialization is needed for `GC_push_roots()`. */
315 GC_ASSERT(GC_is_initialized);
316
317 GC_apply_to_all_blocks(clear_marks_for_block, NULL);
318 GC_objects_are_marked = FALSE;
319 GC_mark_state = MS_INVALID;
320 GC_scan_ptr = NULL;
321 }
322
323 GC_INNER void
324 GC_initiate_gc(void)
325 {
326 GC_ASSERT(I_HOLD_LOCK());
327 GC_ASSERT(GC_is_initialized);
328 #ifndef GC_DISABLE_INCREMENTAL
329 if (GC_incremental) {
330 # ifdef CHECKSUMS
331 GC_read_dirty(FALSE);
332 GC_check_dirty();
333 # else
334 GC_read_dirty(GC_mark_state == MS_INVALID);
335 # endif
336 }
337 GC_n_rescuing_pages = 0;
338 #endif
339 if (GC_mark_state == MS_NONE) {
340 GC_mark_state = MS_PUSH_RESCUERS;
341 } else {
342 /* This is really a full collection, and mark bits are invalid. */
343 GC_ASSERT(GC_mark_state == MS_INVALID);
344 }
345 GC_scan_ptr = NULL;
346 }
347
348 #ifdef PARALLEL_MARK
349 /* Initiate parallel marking. */
350 STATIC void GC_do_parallel_mark(void);
351 #endif
352
353 #ifdef GC_DISABLE_INCREMENTAL
354 # define GC_push_next_marked_dirty(h) GC_push_next_marked(h)
355 #else
356 STATIC struct hblk *GC_push_next_marked_dirty(struct hblk *h);
357 #endif /* !GC_DISABLE_INCREMENTAL */
358
359 STATIC struct hblk *GC_push_next_marked(struct hblk *h);
360 STATIC struct hblk *GC_push_next_marked_uncollectable(struct hblk *h);
361
362 static void alloc_mark_stack(size_t);
363
364 static void
365 push_roots_and_advance(GC_bool push_all, ptr_t cold_gc_frame)
366 {
367 if (GC_scan_ptr != NULL) {
368 /* Not ready to push. */
369 return;
370 }
371 GC_push_roots(push_all, cold_gc_frame);
372 GC_objects_are_marked = TRUE;
373 if (GC_mark_state != MS_INVALID)
374 GC_mark_state = MS_ROOTS_PUSHED;
375 }
376
377 STATIC GC_on_mark_stack_empty_proc GC_on_mark_stack_empty;
378
379 GC_API void GC_CALL
380 GC_set_on_mark_stack_empty(GC_on_mark_stack_empty_proc fn)
381 {
382 LOCK();
383 GC_on_mark_stack_empty = fn;
384 UNLOCK();
385 }
386
387 GC_API GC_on_mark_stack_empty_proc GC_CALL
388 GC_get_on_mark_stack_empty(void)
389 {
390 GC_on_mark_stack_empty_proc fn;
391
392 READER_LOCK();
393 fn = GC_on_mark_stack_empty;
394 READER_UNLOCK();
395 return fn;
396 }
397
398 #ifdef WRAP_MARK_SOME
399 /*
400 * For Win32, this is called after we establish a structured exception
401 * (or signal) handler, in case Windows unmaps one of our root segments.
402 * Note that this code should never generate an incremental GC write fault.
403 */
404 STATIC GC_bool
405 GC_mark_some_inner(ptr_t cold_gc_frame)
406 #else
407 GC_INNER GC_bool
408 GC_mark_some(ptr_t cold_gc_frame)
409 #endif
410 {
411 GC_ASSERT(I_HOLD_LOCK());
412 switch (GC_mark_state) {
413 case MS_NONE:
414 return TRUE;
415
416 case MS_PUSH_RESCUERS:
417 if (ADDR_GE((ptr_t)GC_mark_stack_top,
418 (ptr_t)(GC_mark_stack_limit - INITIAL_MARK_STACK_SIZE / 2))) {
419 /*
420 * Go ahead and mark, even though that might cause us to see more
421 * marked dirty objects later on. Avoid this in the future.
422 */
423 GC_mark_stack_too_small = TRUE;
424 MARK_FROM_MARK_STACK();
425 } else {
426 GC_scan_ptr = GC_push_next_marked_dirty(GC_scan_ptr);
427 #ifndef GC_DISABLE_INCREMENTAL
428 if (NULL == GC_scan_ptr) {
429 GC_COND_LOG_PRINTF("Marked from %lu dirty pages\n",
430 (unsigned long)GC_n_rescuing_pages);
431 }
432 #endif
433 push_roots_and_advance(FALSE, cold_gc_frame);
434 }
435 GC_ASSERT(GC_mark_state == MS_PUSH_RESCUERS
436 || GC_mark_state == MS_ROOTS_PUSHED
437 || GC_mark_state == MS_INVALID);
438 break;
439
440 case MS_PUSH_UNCOLLECTABLE:
441 if (ADDR_GE((ptr_t)GC_mark_stack_top,
442 (ptr_t)(GC_mark_stack + GC_mark_stack_size / 4))) {
443 #ifdef PARALLEL_MARK
444 /* Avoid this, since we do not parallelize the marker here. */
445 if (GC_parallel)
446 GC_mark_stack_too_small = TRUE;
447 #endif
448 MARK_FROM_MARK_STACK();
449 } else {
450 GC_scan_ptr = GC_push_next_marked_uncollectable(GC_scan_ptr);
451 push_roots_and_advance(TRUE, cold_gc_frame);
452 }
453 GC_ASSERT(GC_mark_state == MS_PUSH_UNCOLLECTABLE
454 || GC_mark_state == MS_ROOTS_PUSHED
455 || GC_mark_state == MS_INVALID);
456 break;
457
458 case MS_ROOTS_PUSHED:
459 #ifdef PARALLEL_MARK
460 /*
461 * Eventually, incremental marking should run asynchronously
462 * in multiple threads, without acquiring the allocator lock.
463 * For now, parallel marker is disabled if there is a chance that
464 * marking could be interrupted by a client-supplied time limit
465 * or custom stop function.
466 */
467 if (GC_parallel && !GC_parallel_mark_disabled) {
468 GC_do_parallel_mark();
469 GC_ASSERT(ADDR_LT((ptr_t)GC_mark_stack_top, GC_first_nonempty));
470 GC_mark_stack_top = GC_mark_stack - 1;
471 if (GC_mark_stack_too_small) {
472 alloc_mark_stack(2 * GC_mark_stack_size);
473 }
474 if (GC_mark_state == MS_ROOTS_PUSHED) {
475 GC_mark_state = MS_NONE;
476 return TRUE;
477 }
478 GC_ASSERT(GC_mark_state == MS_INVALID);
479 break;
480 }
481 #endif
482 if (ADDR_GE((ptr_t)GC_mark_stack_top, (ptr_t)GC_mark_stack)) {
483 MARK_FROM_MARK_STACK();
484 } else {
485 GC_on_mark_stack_empty_proc on_ms_empty = GC_on_mark_stack_empty;
486
487 if (on_ms_empty != 0) {
488 GC_mark_stack_top
489 = on_ms_empty(GC_mark_stack_top, GC_mark_stack_limit);
490 /* If we pushed new items, we need to continue processing. */
491 if (ADDR_GE((ptr_t)GC_mark_stack_top, (ptr_t)GC_mark_stack))
492 break;
493 }
494 if (GC_mark_stack_too_small) {
495 alloc_mark_stack(2 * GC_mark_stack_size);
496 }
497 GC_mark_state = MS_NONE;
498 return TRUE;
499 }
500 GC_ASSERT(GC_mark_state == MS_ROOTS_PUSHED || GC_mark_state == MS_INVALID);
501 break;
502
503 case MS_INVALID:
504 case MS_PARTIALLY_INVALID:
505 if (!GC_objects_are_marked) {
506 GC_mark_state = MS_PUSH_UNCOLLECTABLE;
507 break;
508 }
509 if (ADDR_GE((ptr_t)GC_mark_stack_top, (ptr_t)GC_mark_stack)) {
510 MARK_FROM_MARK_STACK();
511 GC_ASSERT(GC_mark_state == MS_PARTIALLY_INVALID
512 || GC_mark_state == MS_INVALID);
513 break;
514 }
515 if (NULL == GC_scan_ptr && GC_mark_state == MS_INVALID) {
516 /*
517 * About to start a heap scan for marked objects.
518 * Mark stack is empty. OK to reallocate.
519 */
520 if (GC_mark_stack_too_small) {
521 alloc_mark_stack(2 * GC_mark_stack_size);
522 }
523 GC_mark_state = MS_PARTIALLY_INVALID;
524 }
525 GC_scan_ptr = GC_push_next_marked(GC_scan_ptr);
526 if (GC_mark_state == MS_PARTIALLY_INVALID)
527 push_roots_and_advance(TRUE, cold_gc_frame);
528 GC_ASSERT(GC_mark_state == MS_ROOTS_PUSHED
529 || GC_mark_state == MS_PARTIALLY_INVALID
530 || GC_mark_state == MS_INVALID);
531 break;
532
533 default:
534 ABORT("GC_mark_some: bad state");
535 }
536 return FALSE;
537 }
538
539 #ifdef PARALLEL_MARK
540 GC_INNER GC_bool GC_parallel_mark_disabled = FALSE;
541 #endif
542
543 #ifdef WRAP_MARK_SOME
544 GC_INNER GC_bool
545 GC_mark_some(ptr_t cold_gc_frame)
546 {
547 GC_bool ret_val;
548
549 if (GC_no_dls) {
550 ret_val = GC_mark_some_inner(cold_gc_frame);
551 } else {
552 /*
553 * Windows appears to asynchronously create and remove writable
554 * memory mappings, for reasons we have not yet understood.
555 * Since we look for writable regions to determine the root set, we
556 * may try to mark from an address range that disappeared since we
557 * started the collection. Thus we have to recover from faults here.
558 * This code seems to be necessary for WinCE (at least in the case
559 * we would decide to add `MEM_PRIVATE` sections to data roots in
560 * `GC_register_dynamic_libraries`). It is conceivable that this is
561 * the same issue as with terminating threads that we see with Linux
562 * and `USE_PROC_FOR_LIBRARIES`.
563 */
564 # ifndef NO_SEH_AVAILABLE
565 __try {
566 ret_val = GC_mark_some_inner(cold_gc_frame);
567 } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION
568 ? EXCEPTION_EXECUTE_HANDLER
569 : EXCEPTION_CONTINUE_SEARCH) {
570 goto handle_ex;
571 }
572 # else
573 # if defined(USE_PROC_FOR_LIBRARIES) && !defined(DEFAULT_VDB)
574 if (GC_auto_incremental) {
575 static GC_bool is_warned = FALSE;
576
577 if (!is_warned) {
578 is_warned = TRUE;
579 WARN("Incremental GC incompatible with /proc roots\n", 0);
580 }
581 /* Unclear if this could still work... */
582 }
583 # endif
584 /*
585 * If `USE_PROC_FOR_LIBRARIES`, then we are handling the case in
586 * which `/proc` is used for root finding, and we have threads.
587 * We may find a stack for a thread that is in the process of
588 * exiting, and disappears while we are marking it.
589 * This seems extremely difficult to avoid otherwise.
590 */
591 GC_setup_temporary_fault_handler();
592 if (SETJMP(GC_jmp_buf) != 0)
593 goto handle_ex;
594 ret_val = GC_mark_some_inner(cold_gc_frame);
595 GC_reset_fault_handler();
596 # endif
597 }
598
599 # if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS)
600 /*
601 * With `DllMain`-based thread tracking, a thread may have started
602 * while we were marking. This is logically equivalent to the
603 * exception case; our results are invalid and we have to start over.
604 * This cannot be prevented since we cannot block in `DllMain()`.
605 */
606 if (GC_started_thread_while_stopped())
607 goto handle_thr_start;
608 # endif
609 return ret_val;
610
611 handle_ex:
612 /* Exception handler starts here for all cases. */
613 # if defined(NO_SEH_AVAILABLE)
614 GC_reset_fault_handler();
615 # endif
616 {
617 static word warned_gc_no;
618
619 /* Report caught `ACCESS_VIOLATION`, once per collection. */
620 if (warned_gc_no != GC_gc_no) {
621 GC_COND_LOG_PRINTF("Memory mapping disappeared at collection #%lu\n",
622 (unsigned long)GC_gc_no + 1);
623 warned_gc_no = GC_gc_no;
624 }
625 }
626 # if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS)
627 handle_thr_start:
628 # endif
629 /*
630 * We have bad roots on the mark stack - discard it.
631 * Rescan from the marked objects; redetermine the roots.
632 */
633 # ifdef REGISTER_LIBRARIES_EARLY
634 START_WORLD();
635 GC_cond_register_dynamic_libraries();
636 STOP_WORLD();
637 # endif
638 GC_invalidate_mark_state();
639 GC_scan_ptr = NULL;
640 return FALSE;
641 }
642 #endif /* WRAP_MARK_SOME */
643
644 GC_INNER void
645 GC_invalidate_mark_state(void)
646 {
647 GC_mark_state = MS_INVALID;
648 GC_mark_stack_top = GC_mark_stack - 1;
649 }
650
651 STATIC mse *
652 GC_signal_mark_stack_overflow(mse *msp)
653 {
654 GC_mark_state = MS_INVALID;
655 #ifdef PARALLEL_MARK
656 /*
657 * We are using a `local_mark_stack` in parallel mode, so do
658 * not signal the global mark stack to be resized.
659 * That will be done in `GC_return_mark_stack` if required.
660 */
661 if (!GC_parallel)
662 GC_mark_stack_too_small = TRUE;
663 #else
664 GC_mark_stack_too_small = TRUE;
665 #endif
666 GC_COND_LOG_PRINTF("Mark stack overflow; current size: %lu entries\n",
667 (unsigned long)GC_mark_stack_size);
668 #if defined(CPPCHECK)
669 GC_noop1_ptr(msp);
670 #endif
671 return msp - GC_MARK_STACK_DISCARDS;
672 }
673
674 GC_ATTR_NO_SANITIZE_ADDR_MEM_THREAD
675 GC_INNER mse *
676 GC_mark_from(mse *mark_stack_top, mse *mark_stack, mse *mark_stack_limit)
677 {
678 GC_signed_word credit = HBLKSIZE; /*< remaining credit for marking work */
679 word descr;
680 ptr_t current_p; /*< pointer to the current candidate pointer */
681 ptr_t q; /*< the candidate pointer itself */
682 ptr_t limit = NULL; /*< the limit (incl.) of the current candidate range */
683 ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
684 ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
685 DECLARE_HDR_CACHE;
686
687 #define SPLIT_RANGE_PTRS 128 /*< must be power of 2 */
688
689 GC_objects_are_marked = TRUE;
690 INIT_HDR_CACHE;
691 #if defined(OS2) || CPP_PTRSZ > CPP_WORDSZ
692 /* OS/2: use untweaked variant to circumvent a compiler problem. */
693 while (ADDR_GE((ptr_t)mark_stack_top, (ptr_t)mark_stack) && credit >= 0)
694 #else
695 while (((((word)mark_stack_top - (word)mark_stack) | (word)credit) & SIGNB)
696 == 0)
697 #endif
698 {
699 current_p = mark_stack_top->mse_start;
700 descr = mark_stack_top->mse_descr;
701 retry:
702 /*
703 * `current_p` and `descr` describe the current object.
704 * `*mark_stack_top` is vacant.
705 * The following is zero only for small objects described by a simple
706 * length descriptor. For many applications this is the common case,
707 * so we try to detect it quickly.
708 */
709 if (descr & (~(word)(PTRS_TO_BYTES(SPLIT_RANGE_PTRS) - 1) | GC_DS_TAGS)) {
710 word tag = descr & GC_DS_TAGS;
711
712 GC_STATIC_ASSERT(GC_DS_TAGS == 0x3);
713 switch (tag) {
714 case GC_DS_LENGTH:
715 /*
716 * Large length. Process part of the range to avoid pushing
717 * too much on the stack.
718 */
719
720 /* Either it is a heap object or a region outside the heap. */
721 GC_ASSERT(descr < GC_greatest_real_heap_addr - GC_least_real_heap_addr
722 || GC_least_real_heap_addr + sizeof(ptr_t)
723 >= ADDR(current_p) + descr
724 || ADDR(current_p) >= GC_greatest_real_heap_addr);
725 #ifdef PARALLEL_MARK
726 # define SHARE_BYTES 2048
727 if (descr > SHARE_BYTES && GC_parallel
728 && ADDR_LT((ptr_t)mark_stack_top, (ptr_t)(mark_stack_limit - 1))) {
729 word new_size = (descr >> 1) & ~(word)(sizeof(ptr_t) - 1);
730
731 mark_stack_top->mse_start = current_p;
732 /* This makes sure we handle misaligned pointers. */
733 mark_stack_top->mse_descr
734 = (new_size + sizeof(ptr_t)) | GC_DS_LENGTH;
735 mark_stack_top++;
736 # ifdef ENABLE_TRACE
737 if (ADDR_INSIDE(GC_trace_ptr, current_p, current_p + descr)) {
738 GC_log_printf("GC #%lu: large section; start %p, len %lu,"
739 " splitting (parallel) at %p\n",
740 (unsigned long)GC_gc_no, (void *)current_p,
741 (unsigned long)descr,
742 (void *)(current_p + new_size));
743 }
744 # endif
745 current_p += new_size;
746 descr -= new_size;
747 goto retry;
748 }
749 #endif /* PARALLEL_MARK */
750 limit = current_p + PTRS_TO_BYTES(SPLIT_RANGE_PTRS - 1);
751 mark_stack_top->mse_start = limit;
752 mark_stack_top->mse_descr
753 = descr - PTRS_TO_BYTES(SPLIT_RANGE_PTRS - 1);
754 #ifdef ENABLE_TRACE
755 if (ADDR_INSIDE(GC_trace_ptr, current_p, current_p + descr)) {
756 GC_log_printf("GC #%lu: large section; start %p, len %lu,"
757 " splitting at %p\n",
758 (unsigned long)GC_gc_no, (void *)current_p,
759 (unsigned long)descr, (void *)limit);
760 }
761 #endif
762 /*
763 * Make sure that pointers overlapping the two ranges are
764 * considered.
765 */
766 limit += sizeof(ptr_t) - ALIGNMENT;
767 break;
768 case GC_DS_BITMAP:
769 mark_stack_top--;
770 #ifdef ENABLE_TRACE
771 if (ADDR_INSIDE(GC_trace_ptr, current_p,
772 current_p + PTRS_TO_BYTES(BITMAP_BITS))) {
773 GC_log_printf("GC #%lu: tracing from %p bitmap descr 0x%lx\n",
774 (unsigned long)GC_gc_no, (void *)current_p,
775 (unsigned long)descr);
776 }
777 #endif
778 descr &= ~(word)GC_DS_TAGS;
779 credit -= (GC_signed_word)PTRS_TO_BYTES(CPP_PTRSZ / 2); /*< guess */
780 for (; descr != 0;
781 descr <<= 1, current_p += sizeof(ptr_t)) { /*< not `ALIGNMENT` */
782 if ((descr & SIGNB) == 0)
783 continue;
784 LOAD_PTR_OR_CONTINUE(q, current_p);
785 FIXUP_POINTER(q);
786 if (ADDR_LT(least_ha, q) && ADDR_LT(q, greatest_ha)) {
787 PREFETCH(q);
788 #ifdef ENABLE_TRACE
789 if (GC_trace_ptr == current_p) {
790 GC_log_printf("GC #%lu: considering(3) %p -> %p\n",
791 (unsigned long)GC_gc_no, (void *)current_p,
792 (void *)q);
793 }
794 #endif
795 PUSH_CONTENTS(q, mark_stack_top, mark_stack_limit, current_p);
796 }
797 }
798 continue;
799 case GC_DS_PROC:
800 mark_stack_top--;
801 #ifdef ENABLE_TRACE
802 if (ADDR_GE(GC_trace_ptr, current_p)) {
803 const void *base = GC_base(current_p);
804
805 if (base != NULL && GC_base(GC_trace_ptr) == base) {
806 GC_log_printf("GC #%lu: tracing from %p, proc descr 0x%lx\n",
807 (unsigned long)GC_gc_no, (void *)current_p,
808 (unsigned long)descr);
809 }
810 }
811 #endif
812 credit -= GC_PROC_BYTES;
813 mark_stack_top = (*PROC(descr))((word *)current_p, mark_stack_top,
814 mark_stack_limit, ENV(descr));
815 continue;
816 case GC_DS_PER_OBJECT:
817 if (!(descr & SIGNB)) {
818 /* Descriptor is in the object. */
819 descr = *(word *)(current_p + descr - GC_DS_PER_OBJECT);
820 } else {
821 /*
822 * Descriptor is in the type descriptor pointed to by the first
823 * "pointer-sized" word of the object.
824 */
825 ptr_t type_descr = *(ptr_t *)current_p;
826
827 /*
828 * `type_descr` is either a valid pointer to the descriptor
829 * structure, or this object was on a free list.
830 * If it was anything but the last object on the free list,
831 * we will misinterpret the next object on the free list as
832 * the type descriptor, and get a zero GC descriptor, which
833 * is ideal. Unfortunately, we need to check for the last
834 * object case explicitly.
835 */
836 if (UNLIKELY(NULL == type_descr)) {
837 mark_stack_top--;
838 continue;
839 }
840 descr = *(word *)(type_descr
841 - ((GC_signed_word)descr
842 + (GC_INDIR_PER_OBJ_BIAS - GC_DS_PER_OBJECT)));
843 }
844 if (0 == descr) {
845 /*
846 * Can happen either because we generated a zero GC descriptor
847 * or we saw a pointer to a free object.
848 */
849 mark_stack_top--;
850 continue;
851 }
852 goto retry;
853 }
854 } else {
855 /* Small object with length descriptor. */
856 mark_stack_top--;
857 #ifndef SMALL_CONFIG
858 if (descr < sizeof(ptr_t))
859 continue;
860 #endif
861 #ifdef ENABLE_TRACE
862 if (ADDR_INSIDE(GC_trace_ptr, current_p, current_p + descr)) {
863 GC_log_printf("GC #%lu: small object; start %p, len %lu\n",
864 (unsigned long)GC_gc_no, (void *)current_p,
865 (unsigned long)descr);
866 }
867 #endif
868 limit = current_p + descr;
869 }
870 /* The simple case in which we are scanning a range. */
871 GC_ASSERT((ADDR(current_p) & (ALIGNMENT - 1)) == 0);
872 credit -= limit - current_p;
873 limit -= sizeof(ptr_t);
874 {
875 #define PREF_DIST 4
876
877 #if !defined(SMALL_CONFIG) && !(defined(E2K) && defined(USE_PTR_HWTAG))
878 ptr_t deferred;
879
880 # ifdef CHERI_PURECAP
881 /*
882 * Check each pointer for validity before dereferencing to prevent
883 * capability exceptions. Utilize the pointer meta-data to speed-up
884 * the loop. If the loop is below the pointer bounds, skip the rest
885 * of marking for that chunk. If the limit capability restricts us to
886 * reading fewer than size of a pointer, then there cannot possibly be
887 * a pointer at `limit`'s pointer, and reading at that location will
888 * raise a capability exception.
889 */
890 {
891 word cap_limit = cheri_base_get(limit) + cheri_length_get(limit);
892
893 if (ADDR(limit) + sizeof(ptr_t) > cap_limit) {
894 /* Decrement limit so that it to be within bounds of `current_p`. */
895 GC_ASSERT(cap_limit > sizeof(ptr_t));
896 limit = (ptr_t)cheri_address_set(
897 current_p, (cap_limit - sizeof(ptr_t)) & ~(sizeof(ptr_t) - 1));
898 goto check_limit;
899 }
900 }
901 # endif
902 /*
903 * Try to prefetch the next pointer to be examined as soon as possible.
904 * Empirically, this also seems to help slightly without prefetches,
905 * at least on Linux/i686. Presumably this loop ends up with less
906 * register pressure, and gcc thus ends up generating slightly better
907 * code. Overall gcc code quality for this loop is still not great.
908 */
909 for (;;) {
910 PREFETCH(limit - PREF_DIST * CACHE_LINE_SIZE);
911 GC_ASSERT(ADDR_GE(limit, current_p));
912 # ifdef CHERI_PURECAP
913 if (ADDR(limit) < cheri_base_get(limit))
914 goto next_object;
915 if (!HAS_TAG_AND_PERM_LOAD(limit)) {
916 limit -= ALIGNMENT;
917 goto check_limit;
918 }
919 # endif
920 deferred = *(ptr_t *)limit;
921 FIXUP_POINTER(deferred);
922 limit -= ALIGNMENT;
923 # ifdef CHERI_PURECAP
924 if (!HAS_TAG_AND_PERM_LOAD(deferred))
925 goto check_limit;
926 # endif
927 if (ADDR_LT(least_ha, deferred) && ADDR_LT(deferred, greatest_ha)) {
928 PREFETCH(deferred);
929 break;
930 }
931 # ifndef CHERI_PURECAP
932 if (ADDR_LT(limit, current_p))
933 goto next_object;
934 /*
935 * Unroll once, so we do not do too many of the prefetches based
936 * on `limit`.
937 */
938 deferred = *(ptr_t *)limit;
939 FIXUP_POINTER(deferred);
940 limit -= ALIGNMENT;
941 if (ADDR_LT(least_ha, deferred) && ADDR_LT(deferred, greatest_ha)) {
942 PREFETCH(deferred);
943 break;
944 }
945 # else
946 check_limit:
947 # endif
948 if (ADDR_LT(limit, current_p))
949 goto next_object;
950 }
951 #endif
952
953 for (; ADDR_GE(limit, current_p); current_p += ALIGNMENT) {
954 /*
955 * Empirically, unrolling this loop does not help a lot.
956 * Since `PUSH_CONTENTS` expands to a lot of code, we do not.
957 */
958 LOAD_PTR_OR_CONTINUE(q, current_p);
959 FIXUP_POINTER(q);
960 PREFETCH(current_p + PREF_DIST * CACHE_LINE_SIZE);
961 if (ADDR_LT(least_ha, q) && ADDR_LT(q, greatest_ha)) {
962 /*
963 * Prefetch the content of the object we just pushed.
964 * It is likely we will need them soon.
965 */
966 PREFETCH(q);
967 #ifdef ENABLE_TRACE
968 if (GC_trace_ptr == current_p) {
969 GC_log_printf("GC #%lu: considering(1) %p -> %p\n",
970 (unsigned long)GC_gc_no, (void *)current_p,
971 (void *)q);
972 }
973 #endif
974 PUSH_CONTENTS(q, mark_stack_top, mark_stack_limit, current_p);
975 }
976 }
977
978 #if !defined(SMALL_CONFIG) && !(defined(E2K) && defined(USE_PTR_HWTAG))
979 /*
980 * We still need to mark the entry we previously prefetched.
981 * We already know that it passes the preliminary pointer validity test.
982 */
983 # ifdef ENABLE_TRACE
984 if (GC_trace_ptr == current_p) {
985 GC_log_printf("GC #%lu: considering(2) %p -> %p\n",
986 (unsigned long)GC_gc_no, (void *)current_p,
987 (void *)deferred);
988 }
989 # endif
990 PUSH_CONTENTS(deferred, mark_stack_top, mark_stack_limit, current_p);
991 next_object:;
992 #endif
993 }
994 }
995 return mark_stack_top;
996 }
997
998 #ifdef PARALLEL_MARK
999
1000 /* Note: this is protected by the mark lock. */
1001 STATIC GC_bool GC_help_wanted = FALSE;
1002
1003 /* Number of running helpers. Protected by the mark lock. */
1004 STATIC unsigned GC_helper_count = 0;
1005
1006 /*
1007 * Number of active helpers. May increase and decrease within each
1008 * mark cycle; but once it returns to zero, it stays for the cycle.
1009 * Protected by the mark lock.
1010 */
1011 STATIC unsigned GC_active_count = 0;
1012
1013 GC_INNER word GC_mark_no = 0;
1014
1015 # ifdef LINT2
1016 # define LOCAL_MARK_STACK_SIZE (HBLKSIZE / 8)
1017 # else
1018 /*
1019 * Under normal circumstances, this is big enough to guarantee we do not
1020 * overflow half of it in a single call to `GC_mark_from`.
1021 */
1022 # define LOCAL_MARK_STACK_SIZE HBLKSIZE
1023 # endif
1024
1025 GC_INNER void
1026 GC_wait_for_markers_init(void)
1027 {
1028 GC_signed_word count;
1029
1030 GC_ASSERT(I_HOLD_LOCK());
1031 if (0 == GC_markers_m1)
1032 return;
1033
1034 # ifndef CAN_HANDLE_FORK
1035 GC_ASSERT(NULL == GC_main_local_mark_stack);
1036 # else
1037 if (NULL == GC_main_local_mark_stack)
1038 # endif
1039 {
1040 size_t bytes_to_get
1041 = ROUNDUP_PAGESIZE_IF_MMAP(LOCAL_MARK_STACK_SIZE * sizeof(mse));
1042
1043 /*
1044 * Allocate the local mark stack for the thread that holds the
1045 * allocator lock.
1046 */
1047 GC_ASSERT(GC_page_size != 0);
1048 GC_main_local_mark_stack = (mse *)GC_os_get_mem(bytes_to_get);
1049 if (NULL == GC_main_local_mark_stack)
1050 ABORT("Insufficient memory for main local_mark_stack");
1051 }
1052
1053 /*
1054 * Reuse the mark lock and builders count to synchronize marker threads
1055 * startup.
1056 */
1057 GC_acquire_mark_lock();
1058 GC_fl_builder_count += GC_markers_m1;
1059 count = GC_fl_builder_count;
1060 GC_release_mark_lock();
1061 if (count != 0) {
1062 GC_ASSERT(count > 0);
1063 GC_wait_for_reclaim();
1064 }
1065 }
1066
1067 /*
1068 * Steal mark stack entries starting at `mse` `low` into mark stack `local`
1069 * until we either steal `mse` `high`, or we have `n_to_get` entries.
1070 * Return a pointer to the top of the local mark stack. `*next` is replaced
1071 * by a pointer to the next unscanned mark stack entry.
1072 */
1073 STATIC mse *
1074 GC_steal_mark_stack(mse *low, mse *high, mse *local, size_t n_to_get,
1075 mse **next)
1076 {
1077 mse *p;
1078 mse *top = local - 1;
1079 size_t i = 0;
1080
1081 GC_ASSERT(ADDR_GE((ptr_t)high, (ptr_t)(low - 1))
1082 && (word)(high - low + 1) <= GC_mark_stack_size);
1083 for (p = low; ADDR_GE((ptr_t)high, (ptr_t)p) && i <= n_to_get; ++p) {
1084 word descr = AO_load(&p->mse_descr);
1085
1086 if (descr != 0) {
1087 /* Must be ordered after read of `mse_descr`. */
1088 AO_store_release_write(&p->mse_descr, 0);
1089 /*
1090 * More than one thread may get this entry, but that is only
1091 * a minor performance problem.
1092 */
1093 ++top;
1094 top->mse_start = p->mse_start;
1095 top->mse_descr = descr;
1096 GC_ASSERT((descr & GC_DS_TAGS) != GC_DS_LENGTH /* 0 */
1097 || descr < GC_greatest_real_heap_addr - GC_least_real_heap_addr
1098 || GC_least_real_heap_addr + sizeof(ptr_t)
1099 >= ADDR(p->mse_start) + descr
1100 || ADDR(p->mse_start) >= GC_greatest_real_heap_addr);
1101 /* If this is a big object, count it as `descr / 256 + 1` objects. */
1102 ++i;
1103 if ((descr & GC_DS_TAGS) == GC_DS_LENGTH)
1104 i += (size_t)(descr >> 8);
1105 }
1106 }
1107 *next = p;
1108 # if defined(CPPCHECK)
1109 GC_noop1_ptr(local);
1110 # endif
1111 return top;
1112 }
1113
1114 /* Copy back a local mark stack. `low` and `high` are inclusive bounds. */
1115 STATIC void
1116 GC_return_mark_stack(mse *low, mse *high)
1117 {
1118 mse *my_top;
1119 mse *my_start;
1120 size_t stack_size;
1121
1122 if (ADDR_LT((ptr_t)high, (ptr_t)low))
1123 return;
1124 stack_size = high - low + 1;
1125 GC_acquire_mark_lock();
1126 /* Note: the concurrent modification is impossible. */
1127 my_top = GC_mark_stack_top;
1128 my_start = my_top + 1;
1129 if ((word)(my_start - GC_mark_stack + stack_size)
1130 > (word)GC_mark_stack_size) {
1131 GC_COND_LOG_PRINTF("No room to copy back mark stack\n");
1132 GC_mark_state = MS_INVALID;
1133 GC_mark_stack_too_small = TRUE;
1134 /* We drop the local mark stack. We will fix things later. */
1135 } else {
1136 BCOPY(low, my_start, stack_size * sizeof(mse));
1137 GC_ASSERT((mse *)GC_cptr_load((volatile ptr_t *)&GC_mark_stack_top)
1138 == my_top);
1139 /* Ensures visibility of previously written stack contents. */
1140 GC_cptr_store_release_write((volatile ptr_t *)&GC_mark_stack_top,
1141 (ptr_t)(my_top + stack_size));
1142 }
1143 GC_release_mark_lock();
1144 GC_notify_all_marker();
1145 }
1146
1147 # ifndef N_LOCAL_ITERS
1148 # define N_LOCAL_ITERS 1
1149 # endif
1150
1151 /*
1152 * Note: called only when the local and the main mark stacks are both
1153 * empty.
1154 */
1155 static GC_bool
1156 has_inactive_helpers(void)
1157 {
1158 GC_bool res;
1159
1160 GC_acquire_mark_lock();
1161 res = GC_active_count < GC_helper_count;
1162 GC_release_mark_lock();
1163 return res;
1164 }
1165
1166 /*
1167 * Mark from the local mark stack. On return, the local mark stack
1168 * is empty. But this may be achieved by copying the local mark stack
1169 * back into the global one. We do not hold the mark lock.
1170 */
1171 STATIC void
1172 GC_do_local_mark(mse *local_mark_stack, mse *local_top)
1173 {
1174 unsigned n;
1175
1176 for (;;) {
1177 for (n = 0; n < N_LOCAL_ITERS; ++n) {
1178 local_top = GC_mark_from(local_top, local_mark_stack,
1179 local_mark_stack + LOCAL_MARK_STACK_SIZE);
1180 if (ADDR_LT((ptr_t)local_top, (ptr_t)local_mark_stack))
1181 return;
1182 if ((word)(local_top - local_mark_stack) >= LOCAL_MARK_STACK_SIZE / 2) {
1183 GC_return_mark_stack(local_mark_stack, local_top);
1184 return;
1185 }
1186 }
1187 if (ADDR_LT(GC_cptr_load((volatile ptr_t *)&GC_mark_stack_top),
1188 GC_cptr_load(&GC_first_nonempty))
1189 && ADDR_LT((ptr_t)(local_mark_stack + 1), (ptr_t)local_top)
1190 && has_inactive_helpers()) {
1191 /*
1192 * Try to share the load, since the main stack is empty, and the helper
1193 * threads are waiting for a refill. The entries near the bottom of
1194 * the stack are likely to require more work. Thus we return those,
1195 * even though it is harder.
1196 */
1197 mse *new_bottom = local_mark_stack + (local_top - local_mark_stack) / 2;
1198
1199 GC_ASSERT(ADDR_LT((ptr_t)local_mark_stack, (ptr_t)new_bottom)
1200 && ADDR_LT((ptr_t)new_bottom, (ptr_t)local_top));
1201 GC_return_mark_stack(local_mark_stack, new_bottom - 1);
1202 memmove(local_mark_stack, new_bottom,
1203 (local_top - new_bottom + 1) * sizeof(mse));
1204 local_top -= new_bottom - local_mark_stack;
1205 }
1206 }
1207 }
1208
1209 # ifndef ENTRIES_TO_GET
1210 # define ENTRIES_TO_GET 5
1211 # endif
1212
1213 /*
1214 * Mark using the local mark stack until the global mark stack is empty and
1215 * there are no active workers. Update `GC_first_nonempty` to reflect the
1216 * progress. Caller holds the mark lock. Caller has already incremented
1217 * `GC_helper_count`; we decrement it, and maintain `GC_active_count`.
1218 */
1219 STATIC void
1220 GC_mark_local(mse *local_mark_stack, int id)
1221 {
1222 mse *my_first_nonempty;
1223
1224 GC_active_count++;
1225 my_first_nonempty = (mse *)GC_cptr_load(&GC_first_nonempty);
1226 GC_ASSERT(ADDR_GE((ptr_t)my_first_nonempty, (ptr_t)GC_mark_stack));
1227 GC_ASSERT(
1228 ADDR_GE(GC_cptr_load((volatile ptr_t *)&GC_mark_stack_top) + sizeof(mse),
1229 (ptr_t)my_first_nonempty));
1230 GC_VERBOSE_LOG_PRINTF("Starting mark helper %d\n", id);
1231 GC_release_mark_lock();
1232 for (;;) {
1233 size_t n_on_stack, n_to_get;
1234 mse *my_top, *local_top;
1235 mse *global_first_nonempty = (mse *)GC_cptr_load(&GC_first_nonempty);
1236
1237 GC_ASSERT(ADDR_GE((ptr_t)my_first_nonempty, (ptr_t)GC_mark_stack)
1238 && ADDR_GE(GC_cptr_load((volatile ptr_t *)&GC_mark_stack_top)
1239 + sizeof(mse),
1240 (ptr_t)my_first_nonempty));
1241 GC_ASSERT(ADDR_GE((ptr_t)global_first_nonempty, (ptr_t)GC_mark_stack));
1242 if (ADDR_LT((ptr_t)my_first_nonempty, (ptr_t)global_first_nonempty)) {
1243 my_first_nonempty = global_first_nonempty;
1244 } else if (ADDR_LT((ptr_t)global_first_nonempty,
1245 (ptr_t)my_first_nonempty)) {
1246 (void)GC_cptr_compare_and_swap(&GC_first_nonempty,
1247 (ptr_t)global_first_nonempty,
1248 (ptr_t)my_first_nonempty);
1249 /*
1250 * If this fails, then we just go ahead, without updating
1251 * `GC_first_nonempty`.
1252 */
1253 }
1254 /*
1255 * Perhaps we should also update `GC_first_nonempty`, if it is less.
1256 * But that would require usage of the atomic updates.
1257 */
1258 my_top = (mse *)GC_cptr_load_acquire((volatile ptr_t *)&GC_mark_stack_top);
1259 if (ADDR_LT((ptr_t)my_top, (ptr_t)my_first_nonempty)) {
1260 GC_acquire_mark_lock();
1261 /*
1262 * Note: asynchronous modification is impossible here, since
1263 * we hold the mark lock.
1264 */
1265 my_top = GC_mark_stack_top;
1266 n_on_stack = my_top - my_first_nonempty + 1;
1267 if (0 == n_on_stack) {
1268 GC_active_count--;
1269 GC_ASSERT(GC_active_count <= GC_helper_count);
1270 /* Other markers may redeposit objects on the stack. */
1271 if (0 == GC_active_count)
1272 GC_notify_all_marker();
1273 while (GC_active_count > 0
1274 && ADDR_LT((ptr_t)GC_mark_stack_top,
1275 GC_cptr_load(&GC_first_nonempty))) {
1276 /*
1277 * We will be notified if either `GC_active_count` reaches zero,
1278 * or if more objects are pushed on the global mark stack.
1279 */
1280 GC_wait_marker();
1281 }
1282 if (0 == GC_active_count
1283 && ADDR_LT((ptr_t)GC_mark_stack_top,
1284 GC_cptr_load(&GC_first_nonempty))) {
1285 GC_bool need_to_notify = FALSE;
1286
1287 /*
1288 * The above conditions cannot be falsified while we hold
1289 * the mark lock, since neither `GC_active_count` nor
1290 * `GC_mark_stack_top` can change. `GC_first_nonempty` can
1291 * only be incremented asynchronously. Thus we know that
1292 * both conditions are actually held simultaneously.
1293 */
1294 GC_helper_count--;
1295 if (0 == GC_helper_count)
1296 need_to_notify = TRUE;
1297 GC_VERBOSE_LOG_PRINTF("Finished mark helper %d\n", id);
1298 if (need_to_notify)
1299 GC_notify_all_marker();
1300 return;
1301 }
1302 /*
1303 * Else there is something on the stack again, or another helper
1304 * may push something.
1305 */
1306 GC_active_count++;
1307 GC_ASSERT(GC_active_count > 0);
1308 GC_release_mark_lock();
1309 continue;
1310 } else {
1311 GC_release_mark_lock();
1312 }
1313 } else {
1314 n_on_stack = my_top - my_first_nonempty + 1;
1315 }
1316 n_to_get = ENTRIES_TO_GET;
1317 if (n_on_stack < 2 * ENTRIES_TO_GET)
1318 n_to_get = 1;
1319 local_top
1320 = GC_steal_mark_stack(my_first_nonempty, my_top, local_mark_stack,
1321 n_to_get, &my_first_nonempty);
1322 GC_ASSERT(ADDR_GE((ptr_t)my_first_nonempty, (ptr_t)GC_mark_stack)
1323 && ADDR_GE(GC_cptr_load((volatile ptr_t *)&GC_mark_stack_top)
1324 + sizeof(mse),
1325 (ptr_t)my_first_nonempty));
1326 GC_do_local_mark(local_mark_stack, local_top);
1327 }
1328 }
1329
1330 /*
1331 * Perform parallel mark. We hold the allocator lock, but not the mark lock.
1332 * Currently runs until the mark stack is empty.
1333 */
1334 STATIC void
1335 GC_do_parallel_mark(void)
1336 {
1337 GC_ASSERT(I_HOLD_LOCK());
1338 GC_acquire_mark_lock();
1339 GC_ASSERT(!GC_help_wanted);
1340 GC_ASSERT(0 == GC_active_count && 0 == GC_helper_count);
1341 GC_VERBOSE_LOG_PRINTF("Starting marking for mark phase number %lu\n",
1342 (unsigned long)GC_mark_no);
1343
1344 GC_cptr_store(&GC_first_nonempty, (ptr_t)GC_mark_stack);
1345 GC_active_count = 0;
1346 GC_helper_count = 1;
1347 GC_help_wanted = TRUE;
1348 /* Wake up potential helpers. */
1349 GC_notify_all_marker();
1350 GC_mark_local(GC_main_local_mark_stack, 0);
1351 GC_help_wanted = FALSE;
1352 /* Done; clean up. */
1353 while (GC_helper_count > 0) {
1354 GC_wait_marker();
1355 }
1356 /* `GC_helper_count` cannot be incremented while not `GC_help_wanted`. */
1357 GC_VERBOSE_LOG_PRINTF("Finished marking for mark phase number %lu\n",
1358 (unsigned long)GC_mark_no);
1359 GC_mark_no++;
1360 GC_release_mark_lock();
1361 GC_notify_all_marker();
1362 }
1363
1364 GC_INNER void
1365 GC_help_marker(word my_mark_no)
1366 {
1367 # define my_id my_id_mse.mse_descr
1368 /*
1369 * Put `my_id` inside the structure to keep `local_mark_stack` aligned
1370 * explicitly.
1371 */
1372 mse my_id_mse;
1373 mse local_mark_stack[LOCAL_MARK_STACK_SIZE];
1374 /* Note: `local_mark_stack` is quite big (up to 128 KiB). */
1375
1376 GC_ASSERT(I_DONT_HOLD_LOCK());
1377 GC_ASSERT(GC_parallel);
1378 while (GC_mark_no < my_mark_no
1379 || (!GC_help_wanted && GC_mark_no == my_mark_no)) {
1380 GC_wait_marker();
1381 }
1382 my_id = GC_helper_count;
1383 if (GC_mark_no != my_mark_no || my_id > (unsigned)GC_markers_m1) {
1384 /*
1385 * The second test is useful only if the original threads can also
1386 * act as helpers. Under Linux they cannot.
1387 */
1388 return;
1389 }
1390 GC_helper_count = (unsigned)my_id + 1;
1391 GC_mark_local(local_mark_stack, (int)my_id);
1392 /* `GC_mark_local` decrements `GC_helper_count`. */
1393 # undef my_id
1394 }
1395
1396 #endif /* PARALLEL_MARK */
1397
1398 /*
1399 * Allocate or reallocate space for mark stack of size `n` entries.
1400 * May silently fail.
1401 */
1402 static void
1403 alloc_mark_stack(size_t n)
1404 {
1405 #ifdef GWW_VDB
1406 static GC_bool GC_incremental_at_stack_alloc = FALSE;
1407
1408 GC_bool recycle_old;
1409 #endif
1410 mse *new_stack;
1411
1412 GC_ASSERT(I_HOLD_LOCK());
1413 new_stack = (mse *)GC_scratch_alloc(n * sizeof(struct GC_ms_entry));
1414 #ifdef GWW_VDB
1415 /*
1416 * Do not recycle a stack segment obtained with the wrong flags.
1417 * Win32 `GetWriteWatch` requires the right kind of memory.
1418 */
1419 recycle_old = !GC_auto_incremental || GC_incremental_at_stack_alloc;
1420 GC_incremental_at_stack_alloc = GC_auto_incremental;
1421 #endif
1422
1423 GC_mark_stack_too_small = FALSE;
1424 if (GC_mark_stack != NULL) {
1425 if (new_stack != 0) {
1426 #ifdef GWW_VDB
1427 if (recycle_old)
1428 #endif
1429 {
1430 /* Recycle old space. */
1431 GC_scratch_recycle_inner(
1432 GC_mark_stack, GC_mark_stack_size * sizeof(struct GC_ms_entry));
1433 }
1434 GC_mark_stack = new_stack;
1435 GC_mark_stack_size = n;
1436 /* FIXME: Do we need some way to reset `GC_mark_stack_size`? */
1437 GC_mark_stack_limit = new_stack + n;
1438 GC_COND_LOG_PRINTF("Grew mark stack to %lu frames\n",
1439 (unsigned long)GC_mark_stack_size);
1440 } else {
1441 WARN("Failed to grow mark stack to %" WARN_PRIuPTR " frames\n", n);
1442 }
1443 } else if (NULL == new_stack) {
1444 GC_err_printf("No space for mark stack\n");
1445 EXIT();
1446 } else {
1447 GC_mark_stack = new_stack;
1448 GC_mark_stack_size = n;
1449 GC_mark_stack_limit = new_stack + n;
1450 }
1451 GC_mark_stack_top = GC_mark_stack - 1;
1452 }
1453
1454 GC_INNER void
1455 GC_mark_init(void)
1456 {
1457 alloc_mark_stack(INITIAL_MARK_STACK_SIZE);
1458 }
1459
1460 GC_API void GC_CALL
1461 GC_push_all(void *bottom, void *top)
1462 {
1463 mse *mark_stack_top;
1464 word length;
1465
1466 bottom = PTR_ALIGN_UP((ptr_t)bottom, ALIGNMENT);
1467 top = PTR_ALIGN_DOWN((ptr_t)top, ALIGNMENT);
1468 if (ADDR_GE((ptr_t)bottom, (ptr_t)top))
1469 return;
1470
1471 mark_stack_top = GC_mark_stack_top + 1;
1472 if (ADDR_GE((ptr_t)mark_stack_top, (ptr_t)GC_mark_stack_limit)) {
1473 ABORT("Unexpected mark stack overflow");
1474 }
1475 length = (word)((ptr_t)top - (ptr_t)bottom);
1476 #if GC_DS_TAGS > ALIGNMENT - 1
1477 length = (length + GC_DS_TAGS) & ~(word)GC_DS_TAGS; /*< round up */
1478 #endif
1479 mark_stack_top->mse_start = (ptr_t)bottom;
1480 mark_stack_top->mse_descr = length | GC_DS_LENGTH;
1481 GC_mark_stack_top = mark_stack_top;
1482 }
1483
1484 GC_API struct GC_ms_entry *GC_CALL
1485 GC_custom_push_range(void *bottom, void *top,
1486 struct GC_ms_entry *mark_stack_top,
1487 struct GC_ms_entry *mark_stack_limit)
1488 {
1489 word length;
1490
1491 bottom = PTR_ALIGN_UP((ptr_t)bottom, ALIGNMENT);
1492 top = PTR_ALIGN_DOWN((ptr_t)top, ALIGNMENT);
1493 if (ADDR_GE((ptr_t)bottom, (ptr_t)top))
1494 return mark_stack_top;
1495
1496 length = (word)((ptr_t)top - (ptr_t)bottom);
1497 #if GC_DS_TAGS > ALIGNMENT - 1
1498 length = (length + GC_DS_TAGS) & ~(word)GC_DS_TAGS; /*< round up */
1499 #endif
1500 return GC_custom_push_proc(length | GC_DS_LENGTH, bottom, mark_stack_top,
1501 mark_stack_limit);
1502 }
1503
1504 GC_API struct GC_ms_entry *GC_CALL
1505 GC_custom_push_proc(GC_word descr, void *obj,
1506 struct GC_ms_entry *mark_stack_top,
1507 struct GC_ms_entry *mark_stack_limit)
1508 {
1509 mark_stack_top++;
1510 if (ADDR_GE((ptr_t)mark_stack_top, (ptr_t)mark_stack_limit)) {
1511 mark_stack_top = GC_signal_mark_stack_overflow(mark_stack_top);
1512 }
1513 mark_stack_top->mse_start = (ptr_t)obj;
1514 mark_stack_top->mse_descr = descr;
1515 return mark_stack_top;
1516 }
1517
1518 GC_API void GC_CALL
1519 GC_push_proc(GC_word descr, void *obj)
1520 {
1521 GC_mark_stack_top = GC_custom_push_proc(descr, obj, GC_mark_stack_top,
1522 GC_mark_stack_limit);
1523 }
1524
1525 #ifndef GC_DISABLE_INCREMENTAL
1526
1527 /*
1528 * Analogous to `GC_push_all`, but push only those pages `h` with
1529 * `dirty_fn(h) != 0`. We use `GC_push_all` to actually push the block.
1530 * Used both to selectively push dirty pages, or to push a block in
1531 * a piecemeal fashion, to allow for more marking concurrency.
1532 * Will not overflow mark stack if `GC_push_all` pushes a small fixed
1533 * number of entries. (This is invoked only if `GC_push_all` pushes
1534 * a single entry, or if it marks each object before pushing it, thus
1535 * ensuring progress in the event of a stack overflow.)
1536 */
1537 STATIC void
1538 GC_push_selected(ptr_t bottom, ptr_t top, GC_bool (*dirty_fn)(struct hblk *))
1539 {
1540 struct hblk *h;
1541
1542 bottom = PTR_ALIGN_UP(bottom, ALIGNMENT);
1543 top = PTR_ALIGN_DOWN(top, ALIGNMENT);
1544 if (ADDR_GE(bottom, top))
1545 return;
1546
1547 h = HBLKPTR(bottom + HBLKSIZE);
1548 if (ADDR_GE((ptr_t)h, top)) {
1549 if ((*dirty_fn)(h - 1)) {
1550 GC_push_all(bottom, top);
1551 }
1552 return;
1553 }
1554 if ((*dirty_fn)(h - 1)) {
1555 if ((word)(GC_mark_stack_top - GC_mark_stack)
1556 > 3 * GC_mark_stack_size / 4) {
1557 GC_push_all(bottom, top);
1558 return;
1559 }
1560 GC_push_all(bottom, h);
1561 }
1562
1563 while (ADDR_GE(top, (ptr_t)(h + 1))) {
1564 if ((*dirty_fn)(h)) {
1565 if ((word)(GC_mark_stack_top - GC_mark_stack)
1566 > 3 * GC_mark_stack_size / 4) {
1567 /* Danger of mark stack overflow. */
1568 GC_push_all(h, top);
1569 return;
1570 } else {
1571 GC_push_all(h, h + 1);
1572 }
1573 }
1574 h++;
1575 }
1576
1577 if ((ptr_t)h != top && (*dirty_fn)(h)) {
1578 GC_push_all(h, top);
1579 }
1580 }
1581
1582 GC_API void GC_CALL
1583 GC_push_conditional(void *bottom, void *top, int all)
1584 {
1585 if (!all) {
1586 GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_page_was_dirty);
1587 } else {
1588 # ifdef PROC_VDB
1589 if (GC_auto_incremental) {
1590 /* Pages that were never dirtied cannot contain pointers. */
1591 GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_page_was_ever_dirty);
1592 } else
1593 # endif
1594 /* else */ {
1595 GC_push_all(bottom, top);
1596 }
1597 }
1598 }
1599
1600 # ifndef NO_VDB_FOR_STATIC_ROOTS
1601 # ifndef PROC_VDB
1602 /*
1603 * Same as `GC_page_was_dirty` but `h` is allowed to point to some page
1604 * in the registered static roots only. Not used if the manual VDB is on.
1605 */
1606 STATIC GC_bool
1607 GC_static_page_was_dirty(struct hblk *h)
1608 {
1609 return get_pht_entry_from_index(GC_grungy_pages, PHT_HASH(h));
1610 }
1611 # endif
1612
1613 GC_INNER void
1614 GC_push_conditional_static(void *bottom, void *top, GC_bool all)
1615 {
1616 # ifdef PROC_VDB
1617 /*
1618 * Just redirect to the generic routine because `PROC_VDB`
1619 * implementation gets the dirty bits map for the whole process memory.
1620 */
1621 GC_push_conditional(bottom, top, all);
1622 # else
1623 if (all || !GC_is_vdb_for_static_roots()) {
1624 GC_push_all(bottom, top);
1625 } else {
1626 GC_push_selected((ptr_t)bottom, (ptr_t)top, GC_static_page_was_dirty);
1627 }
1628 # endif
1629 }
1630 # endif /* !NO_VDB_FOR_STATIC_ROOTS */
1631
1632 #else
1633 GC_API void GC_CALL
1634 GC_push_conditional(void *bottom, void *top, int all)
1635 {
1636 UNUSED_ARG(all);
1637 GC_push_all(bottom, top);
1638 }
1639 #endif /* GC_DISABLE_INCREMENTAL */
1640
1641 #if defined(DARWIN) && defined(THREADS)
1642 void
1643 GC_push_one(word p)
1644 {
1645 GC_PUSH_ONE_STACK((ptr_t)p, MARKED_FROM_REGISTER);
1646 }
1647 #endif /* DARWIN && THREADS */
1648
1649 #if defined(GC_WIN32_THREADS)
1650 GC_INNER void
1651 GC_push_many_regs(const word *regs, unsigned count)
1652 {
1653 unsigned i;
1654
1655 for (i = 0; i < count; i++)
1656 GC_PUSH_ONE_STACK((ptr_t)regs[i], MARKED_FROM_REGISTER);
1657 }
1658 #endif /* GC_WIN32_THREADS */
1659
1660 GC_API struct GC_ms_entry *GC_CALL
1661 GC_mark_and_push(void *obj, mse *mark_stack_top, mse *mark_stack_limit,
1662 void **src)
1663 {
1664 hdr *hhdr;
1665
1666 PREFETCH(obj);
1667 GET_HDR(obj, hhdr);
1668 if ((UNLIKELY(IS_FORWARDING_ADDR_OR_NIL(hhdr))
1669 && (!GC_all_interior_pointers
1670 || NULL == (hhdr = GC_find_header(GC_base(obj)))))
1671 || UNLIKELY(HBLK_IS_FREE(hhdr))) {
1672 GC_ADD_TO_BLACK_LIST_NORMAL((ptr_t)obj, (ptr_t)src);
1673 return mark_stack_top;
1674 }
1675 return GC_push_contents_hdr((ptr_t)obj, mark_stack_top, mark_stack_limit,
1676 (ptr_t)src, hhdr, TRUE);
1677 }
1678
1679 GC_ATTR_NO_SANITIZE_ADDR
1680 GC_INNER void
1681 #if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
1682 GC_mark_and_push_stack(ptr_t p, ptr_t source)
1683 #else
1684 GC_mark_and_push_stack(ptr_t p)
1685 # define source ((ptr_t)0)
1686 #endif
1687 {
1688 hdr *hhdr;
1689 ptr_t r = p;
1690
1691 PREFETCH(p);
1692 GET_HDR(p, hhdr);
1693 if (UNLIKELY(IS_FORWARDING_ADDR_OR_NIL(hhdr))) {
1694 if (NULL == hhdr || (r = (ptr_t)GC_base(p)) == NULL
1695 || (hhdr = HDR(r)) == NULL) {
1696 GC_ADD_TO_BLACK_LIST_STACK(p, source);
1697 return;
1698 }
1699 }
1700 if (UNLIKELY(HBLK_IS_FREE(hhdr))) {
1701 GC_ADD_TO_BLACK_LIST_NORMAL(p, source);
1702 return;
1703 }
1704 #ifdef THREADS
1705 /*
1706 * Pointer is on the stack. We may have dirtied the object it points to,
1707 * but have not called `GC_dirty` yet.
1708 */
1709 GC_dirty(p); /*< entire object */
1710 #endif
1711 GC_mark_stack_top = GC_push_contents_hdr(
1712 r, GC_mark_stack_top, GC_mark_stack_limit, source, hhdr, FALSE);
1713 /*
1714 * We silently ignore pointers to near the end of a block, which is
1715 * very mildly suboptimal.
1716 */
1717 /* FIXME: We should probably add a header word to address this. */
1718 #undef source
1719 }
1720
1721 #ifdef TRACE_BUF
1722 # ifndef TRACE_ENTRIES
1723 # define TRACE_ENTRIES 1000
1724 # endif
1725
1726 struct trace_entry {
1727 const char *caller_fn_name;
1728 word gc_no;
1729 word bytes_allocd;
1730 GC_hidden_pointer arg1;
1731 GC_hidden_pointer arg2;
1732 } GC_trace_buf[TRACE_ENTRIES] = { { (const char *)NULL, 0, 0, 0, 0 } };
1733
1734 void
1735 GC_add_trace_entry(const char *caller_fn_name, ptr_t arg1, ptr_t arg2)
1736 {
1737 size_t i = GC_trace_buf_pos;
1738
1739 GC_trace_buf[i].caller_fn_name = caller_fn_name;
1740 GC_trace_buf[i].gc_no = GC_gc_no;
1741 GC_trace_buf[i].bytes_allocd = GC_bytes_allocd;
1742 GC_trace_buf[i].arg1 = GC_HIDE_POINTER(arg1);
1743 GC_trace_buf[i].arg2 = GC_HIDE_POINTER(arg2);
1744 i++;
1745 if (i >= TRACE_ENTRIES)
1746 i = 0;
1747 GC_trace_buf_pos = i;
1748 }
1749
1750 GC_API void GC_CALL
1751 GC_print_trace_inner(GC_word gc_no)
1752 {
1753 size_t i;
1754
1755 for (i = GC_trace_buf_pos;; i--) {
1756 struct trace_entry *p;
1757
1758 if (0 == i)
1759 i = TRACE_ENTRIES;
1760 p = &GC_trace_buf[i - 1];
1761 /*
1762 * Compare `gc_no` values (`p->gc_no` is less than given `gc_no`)
1763 * taking into account that the counter may overflow.
1764 */
1765 if (((p->gc_no - gc_no) & SIGNB) != 0 || NULL == p->caller_fn_name) {
1766 return;
1767 }
1768 GC_printf("Trace:%s (gc:%lu, bytes:%lu) %p, %p\n", p->caller_fn_name,
1769 (unsigned long)p->gc_no, (unsigned long)p->bytes_allocd,
1770 GC_REVEAL_POINTER(p->arg1), GC_REVEAL_POINTER(p->arg2));
1771 if (i == GC_trace_buf_pos + 1)
1772 break;
1773 }
1774 GC_printf("Trace incomplete\n");
1775 }
1776
1777 GC_API void GC_CALL
1778 GC_print_trace(GC_word gc_no)
1779 {
1780 READER_LOCK();
1781 GC_print_trace_inner(gc_no);
1782 READER_UNLOCK();
1783 }
1784 #endif /* TRACE_BUF */
1785
1786 GC_ATTR_NO_SANITIZE_ADDR_MEM_THREAD
1787 GC_API void GC_CALL
1788 GC_push_all_eager(void *bottom, void *top)
1789 {
1790 REGISTER ptr_t current_p;
1791 REGISTER word lim_addr;
1792 REGISTER ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1793 REGISTER ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1794 #define GC_greatest_plausible_heap_addr greatest_ha
1795 #define GC_least_plausible_heap_addr least_ha
1796
1797 if (NULL == top)
1798 return;
1799 /* Check all pointers in range and push if they appear to be valid. */
1800 current_p = PTR_ALIGN_UP((ptr_t)bottom, ALIGNMENT);
1801 lim_addr = ADDR(PTR_ALIGN_DOWN((ptr_t)top, ALIGNMENT)) - sizeof(ptr_t);
1802 #ifdef CHERI_PURECAP
1803 {
1804 word cap_limit = cheri_base_get(current_p) + cheri_length_get(current_p);
1805
1806 if (lim_addr >= cap_limit)
1807 lim_addr = cap_limit - sizeof(ptr_t);
1808 }
1809 #endif
1810 for (; ADDR(current_p) <= lim_addr; current_p += ALIGNMENT) {
1811 REGISTER ptr_t q;
1812
1813 LOAD_PTR_OR_CONTINUE(q, current_p);
1814 GC_PUSH_ONE_STACK(q, current_p);
1815 }
1816 #undef GC_greatest_plausible_heap_addr
1817 #undef GC_least_plausible_heap_addr
1818 }
1819
1820 GC_INNER void
1821 GC_push_all_stack(ptr_t bottom, ptr_t top)
1822 {
1823 GC_ASSERT(I_HOLD_LOCK());
1824 #ifndef NEED_FIXUP_POINTER
1825 if (GC_all_interior_pointers
1826 # if defined(THREADS) && defined(MPROTECT_VDB)
1827 && !GC_auto_incremental
1828 # endif
1829 && ADDR_LT((ptr_t)GC_mark_stack_top,
1830 (ptr_t)(GC_mark_stack_limit - INITIAL_MARK_STACK_SIZE / 8))) {
1831 GC_push_all(bottom, top);
1832 } else
1833 #endif
1834 /* else */ {
1835 GC_push_all_eager(bottom, top);
1836 }
1837 }
1838
1839 #if defined(WRAP_MARK_SOME) && defined(PARALLEL_MARK)
1840 GC_ATTR_NO_SANITIZE_ADDR_MEM_THREAD
1841 GC_INNER void
1842 GC_push_conditional_eager(void *bottom, void *top, GC_bool all)
1843 {
1844 REGISTER ptr_t current_p;
1845 REGISTER ptr_t lim;
1846 REGISTER ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1847 REGISTER ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1848 # define GC_greatest_plausible_heap_addr greatest_ha
1849 # define GC_least_plausible_heap_addr least_ha
1850
1851 if (NULL == top)
1852 return;
1853
1854 /* TODO: If not `all`, then scan only dirty pages. */
1855 (void)all;
1856
1857 current_p = PTR_ALIGN_UP((ptr_t)bottom, ALIGNMENT);
1858 lim = PTR_ALIGN_DOWN((ptr_t)top, ALIGNMENT) - sizeof(ptr_t);
1859 for (; ADDR_GE(lim, current_p); current_p += ALIGNMENT) {
1860 REGISTER ptr_t q;
1861
1862 LOAD_PTR_OR_CONTINUE(q, current_p);
1863 GC_PUSH_ONE_HEAP(q, current_p, GC_mark_stack_top);
1864 }
1865 # undef GC_greatest_plausible_heap_addr
1866 # undef GC_least_plausible_heap_addr
1867 }
1868 #endif /* WRAP_MARK_SOME && PARALLEL_MARK */
1869
1870 #if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) \
1871 && !defined(MARK_BIT_PER_OBJ) && GC_GRANULE_PTRS <= 4
1872 # define USE_PUSH_MARKED_ACCELERATORS
1873 # if GC_GRANULE_PTRS == 1
1874 # define PUSH_GRANULE(q) \
1875 do { \
1876 ptr_t qcontents = (q)[0]; \
1877 GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1878 } while (0)
1879 # elif GC_GRANULE_PTRS == 2
1880 # define PUSH_GRANULE(q) \
1881 do { \
1882 ptr_t qcontents = (q)[0]; \
1883 GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1884 qcontents = (q)[1]; \
1885 GC_PUSH_ONE_HEAP(qcontents, (q) + 1, GC_mark_stack_top); \
1886 } while (0)
1887 # else
1888 # define PUSH_GRANULE(q) \
1889 do { \
1890 ptr_t qcontents = (q)[0]; \
1891 GC_PUSH_ONE_HEAP(qcontents, q, GC_mark_stack_top); \
1892 qcontents = (q)[1]; \
1893 GC_PUSH_ONE_HEAP(qcontents, (q) + 1, GC_mark_stack_top); \
1894 qcontents = (q)[2]; \
1895 GC_PUSH_ONE_HEAP(qcontents, (q) + 2, GC_mark_stack_top); \
1896 qcontents = (q)[3]; \
1897 GC_PUSH_ONE_HEAP(qcontents, (q) + 3, GC_mark_stack_top); \
1898 } while (0)
1899 # endif
1900
1901 /*
1902 * Push all objects reachable from marked objects in the given block
1903 * containing objects of size 1 granule.
1904 */
1905 GC_ATTR_NO_SANITIZE_THREAD
1906 STATIC void
1907 GC_push_marked1(struct hblk *h, const hdr *hhdr)
1908 {
1909 const word *mark_word_addr
1910 = (word *)CAST_AWAY_VOLATILE_PVOID(hhdr->hb_marks);
1911 ptr_t *p;
1912 ptr_t plim;
1913
1914 /*
1915 * Allow registers to be used for some frequently accessed global variables.
1916 * Otherwise aliasing issues are likely to prevent that.
1917 */
1918 ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1919 ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1920 mse *mark_stack_top = GC_mark_stack_top;
1921 mse *mark_stack_limit = GC_mark_stack_limit;
1922
1923 # undef GC_mark_stack_top
1924 # undef GC_mark_stack_limit
1925 # define GC_mark_stack_top mark_stack_top
1926 # define GC_mark_stack_limit mark_stack_limit
1927 # define GC_greatest_plausible_heap_addr greatest_ha
1928 # define GC_least_plausible_heap_addr least_ha
1929
1930 p = (ptr_t *)h->hb_body;
1931 plim = (ptr_t)h + HBLKSIZE;
1932
1933 /* Go through all granules in block. */
1934 while (ADDR_LT((ptr_t)p, plim)) {
1935 word mark_word = *mark_word_addr++;
1936 ptr_t *q;
1937
1938 for (q = p; mark_word != 0; mark_word >>= 1) {
1939 if ((mark_word & 1) != 0)
1940 PUSH_GRANULE(q);
1941 q += GC_GRANULE_PTRS;
1942 }
1943 p += CPP_WORDSZ * GC_GRANULE_PTRS;
1944 }
1945
1946 # undef GC_greatest_plausible_heap_addr
1947 # undef GC_least_plausible_heap_addr
1948 # undef GC_mark_stack_top
1949 # undef GC_mark_stack_limit
1950 # define GC_mark_stack_limit GC_arrays._mark_stack_limit
1951 # define GC_mark_stack_top GC_arrays._mark_stack_top
1952 GC_mark_stack_top = mark_stack_top;
1953 }
1954
1955 # ifndef UNALIGNED_PTRS
1956 /*
1957 * Push all objects reachable from marked objects in the given block
1958 * of two-granule objects.
1959 */
1960 GC_ATTR_NO_SANITIZE_THREAD
1961 STATIC void
1962 GC_push_marked2(struct hblk *h, const hdr *hhdr)
1963 {
1964 const word *mark_word_addr
1965 = (word *)CAST_AWAY_VOLATILE_PVOID(hhdr->hb_marks);
1966 ptr_t *p;
1967 ptr_t plim;
1968 ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
1969 ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
1970 mse *mark_stack_top = GC_mark_stack_top;
1971 mse *mark_stack_limit = GC_mark_stack_limit;
1972
1973 # undef GC_mark_stack_top
1974 # undef GC_mark_stack_limit
1975 # define GC_mark_stack_top mark_stack_top
1976 # define GC_mark_stack_limit mark_stack_limit
1977 # define GC_greatest_plausible_heap_addr greatest_ha
1978 # define GC_least_plausible_heap_addr least_ha
1979
1980 p = (ptr_t *)h->hb_body;
1981 plim = (ptr_t)h + HBLKSIZE;
1982
1983 /* Go through all granules in block. */
1984 while (ADDR_LT((ptr_t)p, plim)) {
1985 word mark_word = *mark_word_addr++;
1986 ptr_t *q;
1987
1988 for (q = p; mark_word != 0; mark_word >>= 2) {
1989 if (mark_word & 1) {
1990 PUSH_GRANULE(q);
1991 PUSH_GRANULE(q + GC_GRANULE_PTRS);
1992 }
1993 q += 2 * GC_GRANULE_PTRS;
1994 }
1995 p += CPP_WORDSZ * GC_GRANULE_PTRS;
1996 }
1997
1998 # undef GC_greatest_plausible_heap_addr
1999 # undef GC_least_plausible_heap_addr
2000 # undef GC_mark_stack_top
2001 # undef GC_mark_stack_limit
2002 # define GC_mark_stack_limit GC_arrays._mark_stack_limit
2003 # define GC_mark_stack_top GC_arrays._mark_stack_top
2004 GC_mark_stack_top = mark_stack_top;
2005 }
2006
2007 # if GC_GRANULE_PTRS < 4
2008 /*
2009 * Push all objects reachable from marked objects in the given block of
2010 * four-granule objects. There is a risk of mark stack overflow here.
2011 * But we handle that. And only unmarked objects get pushed, so it is
2012 * not very likely.
2013 */
2014 GC_ATTR_NO_SANITIZE_THREAD
2015 STATIC void
2016 GC_push_marked4(struct hblk *h, const hdr *hhdr)
2017 {
2018 const word *mark_word_addr
2019 = (word *)CAST_AWAY_VOLATILE_PVOID(hhdr->hb_marks);
2020 ptr_t *p;
2021 ptr_t plim;
2022 ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr;
2023 ptr_t least_ha = (ptr_t)GC_least_plausible_heap_addr;
2024 mse *mark_stack_top = GC_mark_stack_top;
2025 mse *mark_stack_limit = GC_mark_stack_limit;
2026
2027 # undef GC_mark_stack_top
2028 # undef GC_mark_stack_limit
2029 # define GC_mark_stack_top mark_stack_top
2030 # define GC_mark_stack_limit mark_stack_limit
2031 # define GC_greatest_plausible_heap_addr greatest_ha
2032 # define GC_least_plausible_heap_addr least_ha
2033
2034 p = (ptr_t *)h->hb_body;
2035 plim = (ptr_t)h + HBLKSIZE;
2036
2037 /* Go through all granules in block. */
2038 while (ADDR_LT((ptr_t)p, plim)) {
2039 word mark_word = *mark_word_addr++;
2040 ptr_t *q;
2041
2042 for (q = p; mark_word != 0; mark_word >>= 4) {
2043 if (mark_word & 1) {
2044 PUSH_GRANULE(q);
2045 PUSH_GRANULE(q + GC_GRANULE_PTRS);
2046 PUSH_GRANULE(q + 2 * GC_GRANULE_PTRS);
2047 PUSH_GRANULE(q + 3 * GC_GRANULE_PTRS);
2048 }
2049 q += 4 * GC_GRANULE_PTRS;
2050 }
2051 p += CPP_WORDSZ * GC_GRANULE_PTRS;
2052 }
2053 # undef GC_greatest_plausible_heap_addr
2054 # undef GC_least_plausible_heap_addr
2055 # undef GC_mark_stack_top
2056 # undef GC_mark_stack_limit
2057 # define GC_mark_stack_limit GC_arrays._mark_stack_limit
2058 # define GC_mark_stack_top GC_arrays._mark_stack_top
2059 GC_mark_stack_top = mark_stack_top;
2060 }
2061 # endif
2062 # endif
2063 #endif /* !USE_MARK_BYTES && !MARK_BIT_PER_OBJ && !SMALL_CONFIG */
2064
2065 /* Push all objects reachable from marked objects in the given block. */
2066 STATIC void
2067 GC_push_marked(struct hblk *h, const hdr *hhdr)
2068 {
2069 size_t sz = hhdr->hb_sz;
2070 ptr_t p;
2071 size_t bit_no;
2072 ptr_t plim;
2073 mse *mark_stack_top;
2074 mse *mark_stack_limit = GC_mark_stack_limit;
2075
2076 /* Some quick shortcuts: */
2077 if ((/* `0 |` */ GC_DS_LENGTH) == hhdr->hb_descr)
2078 return;
2079 if (GC_block_empty(hhdr))
2080 return; /*< nothing marked */
2081
2082 #if !defined(GC_DISABLE_INCREMENTAL)
2083 GC_n_rescuing_pages++;
2084 #endif
2085 GC_objects_are_marked = TRUE;
2086 switch (BYTES_TO_GRANULES(sz)) {
2087 #ifdef USE_PUSH_MARKED_ACCELERATORS
2088 case 1:
2089 GC_push_marked1(h, hhdr);
2090 break;
2091 # ifndef UNALIGNED_PTRS
2092 case 2:
2093 GC_push_marked2(h, hhdr);
2094 break;
2095 # if GC_GRANULE_PTRS < 4
2096 case 4:
2097 GC_push_marked4(h, hhdr);
2098 break;
2099 # endif
2100 # endif /* !UNALIGNED_PTRS */
2101 #else
2102 case 1: /*< to suppress "switch statement contains no case" warning */
2103 #endif
2104 default:
2105 plim = sz > MAXOBJBYTES ? h->hb_body
2106 : CAST_THRU_UINTPTR(ptr_t, (h + 1)->hb_body) - sz;
2107 mark_stack_top = GC_mark_stack_top;
2108 for (p = h->hb_body, bit_no = 0; ADDR_GE(plim, p);
2109 p += sz, bit_no += MARK_BIT_OFFSET(sz)) {
2110 /* Mark from fields inside the object. */
2111 if (mark_bit_from_hdr(hhdr, bit_no)) {
2112 mark_stack_top
2113 = GC_push_obj(p, hhdr, mark_stack_top, mark_stack_limit);
2114 }
2115 }
2116 GC_mark_stack_top = mark_stack_top;
2117 }
2118 }
2119
2120 #ifdef ENABLE_DISCLAIM
2121 /*
2122 * Unconditionally mark from all objects that have not been reclaimed.
2123 * This is useful in order to retain pointers reachable from the disclaim
2124 * notifiers. To determine whether an object has been reclaimed, we
2125 * require that any live object has a nonzero as one of the two least
2126 * significant bits of the first "pointer-sized" word. On the other hand,
2127 * the reclaimed object is a member of free lists, and thus contains
2128 * a pointer-aligned next-pointer as the first "pointer-sized" word.
2129 */
2130 GC_ATTR_NO_SANITIZE_THREAD
2131 STATIC void
2132 GC_push_unconditionally(struct hblk *h, const hdr *hhdr)
2133 {
2134 size_t sz = hhdr->hb_sz;
2135 ptr_t p;
2136 ptr_t plim;
2137 mse *mark_stack_top;
2138 mse *mark_stack_limit = GC_mark_stack_limit;
2139
2140 if ((/* `0 |` */ GC_DS_LENGTH) == hhdr->hb_descr)
2141 return;
2142
2143 # if !defined(GC_DISABLE_INCREMENTAL)
2144 GC_n_rescuing_pages++;
2145 # endif
2146 GC_objects_are_marked = TRUE;
2147 plim = sz > MAXOBJBYTES ? h->hb_body
2148 : CAST_THRU_UINTPTR(ptr_t, (h + 1)->hb_body) - sz;
2149 mark_stack_top = GC_mark_stack_top;
2150 for (p = h->hb_body; ADDR_GE(plim, p); p += sz) {
2151 if ((ADDR(*(ptr_t *)p) & 0x3) != 0) {
2152 mark_stack_top = GC_push_obj(p, hhdr, mark_stack_top, mark_stack_limit);
2153 }
2154 }
2155 GC_mark_stack_top = mark_stack_top;
2156 }
2157 #endif /* ENABLE_DISCLAIM */
2158
2159 #ifndef GC_DISABLE_INCREMENTAL
2160 /* Test whether any page in the given block is dirty. */
2161 STATIC GC_bool
2162 GC_block_was_dirty(struct hblk *h, const hdr *hhdr)
2163 {
2164 size_t sz;
2165 ptr_t p;
2166
2167 # ifdef AO_HAVE_load
2168 /* Atomic access is used to avoid racing with `GC_realloc`. */
2169 sz = AO_load((volatile AO_t *)&hhdr->hb_sz);
2170 # else
2171 sz = hhdr->hb_sz;
2172 # endif
2173 if (sz <= MAXOBJBYTES) {
2174 return GC_page_was_dirty(h);
2175 }
2176
2177 for (p = (ptr_t)h; ADDR_LT(p, (ptr_t)h + sz); p += HBLKSIZE) {
2178 if (GC_page_was_dirty((struct hblk *)p))
2179 return TRUE;
2180 }
2181 return FALSE;
2182 }
2183 #endif /* GC_DISABLE_INCREMENTAL */
2184
2185 /*
2186 * Similar to `GC_push_marked`, but skip over unallocated blocks and
2187 * return address of next plausible block.
2188 */
2189 STATIC struct hblk *
2190 GC_push_next_marked(struct hblk *h)
2191 {
2192 hdr *hhdr = HDR(h);
2193
2194 if (UNLIKELY(IS_FORWARDING_ADDR_OR_NIL(hhdr) || HBLK_IS_FREE(hhdr))) {
2195 h = GC_next_block(h, FALSE);
2196 if (NULL == h)
2197 return NULL;
2198 hhdr = GC_find_header(h);
2199 } else {
2200 #ifdef LINT2
2201 if (NULL == h)
2202 ABORT("Bad HDR() definition");
2203 #endif
2204 }
2205 GC_push_marked(h, hhdr);
2206 return h + OBJ_SZ_TO_BLOCKS(hhdr->hb_sz);
2207 }
2208
2209 #ifndef GC_DISABLE_INCREMENTAL
2210 /* Identical to `GC_push_next_marked`, but mark only from dirty pages. */
2211 STATIC struct hblk *
2212 GC_push_next_marked_dirty(struct hblk *h)
2213 {
2214 hdr *hhdr;
2215
2216 GC_ASSERT(I_HOLD_LOCK());
2217 if (!GC_incremental)
2218 ABORT("Dirty bits not set up");
2219 for (;; h += OBJ_SZ_TO_BLOCKS(hhdr->hb_sz)) {
2220 hhdr = HDR(h);
2221 if (UNLIKELY(IS_FORWARDING_ADDR_OR_NIL(hhdr) || HBLK_IS_FREE(hhdr))) {
2222 h = GC_next_block(h, FALSE);
2223 if (NULL == h)
2224 return NULL;
2225 hhdr = GC_find_header(h);
2226 } else {
2227 # ifdef LINT2
2228 if (NULL == h)
2229 ABORT("Bad HDR() definition");
2230 # endif
2231 }
2232 if (GC_block_was_dirty(h, hhdr))
2233 break;
2234 }
2235 # ifdef ENABLE_DISCLAIM
2236 if ((hhdr->hb_flags & MARK_UNCONDITIONALLY) != 0) {
2237 GC_push_unconditionally(h, hhdr);
2238
2239 /*
2240 * Then we may ask, why not also add the `MARK_UNCONDITIONALLY`
2241 * case to `GC_push_next_marked`, which is also applied to
2242 * uncollectible blocks? But it seems to me that the function
2243 * does not need to scan uncollectible (and unconditionally
2244 * marked) blocks since those are already handled in the
2245 * `MS_PUSH_UNCOLLECTABLE` phase.
2246 */
2247 } else
2248 # endif
2249 /* else */ {
2250 GC_push_marked(h, hhdr);
2251 }
2252 return h + OBJ_SZ_TO_BLOCKS(hhdr->hb_sz);
2253 }
2254 #endif /* !GC_DISABLE_INCREMENTAL */
2255
2256 /*
2257 * Similar to `GC_push_next_marked`, but for uncollectible pages.
2258 * Needed since we do not clear marks for such pages, even for full
2259 * collections.
2260 */
2261 STATIC struct hblk *
2262 GC_push_next_marked_uncollectable(struct hblk *h)
2263 {
2264 hdr *hhdr = HDR(h);
2265
2266 for (;;) {
2267 if (UNLIKELY(IS_FORWARDING_ADDR_OR_NIL(hhdr) || HBLK_IS_FREE(hhdr))) {
2268 h = GC_next_block(h, FALSE);
2269 if (NULL == h)
2270 return NULL;
2271 hhdr = GC_find_header(h);
2272 } else {
2273 #ifdef LINT2
2274 if (NULL == h)
2275 ABORT("Bad HDR() definition");
2276 #endif
2277 }
2278 if (hhdr->hb_obj_kind == UNCOLLECTABLE) {
2279 GC_push_marked(h, hhdr);
2280 break;
2281 }
2282 #ifdef ENABLE_DISCLAIM
2283 if ((hhdr->hb_flags & MARK_UNCONDITIONALLY) != 0) {
2284 GC_push_unconditionally(h, hhdr);
2285 break;
2286 }
2287 #endif
2288 h += OBJ_SZ_TO_BLOCKS(hhdr->hb_sz);
2289 hhdr = HDR(h);
2290 }
2291 return h + OBJ_SZ_TO_BLOCKS(hhdr->hb_sz);
2292 }
2293