1 /*
2 * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
3 * Copyright (c) 2001 by Hewlett-Packard Company. All rights reserved.
4 * Copyright (c) 2008-2022 Ivan Maidanski
5 *
6 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
7 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
8 *
9 * Permission is hereby granted to use or copy this program
10 * for any purpose, provided the above notices are retained on all copies.
11 * Permission to modify the code and to distribute modified code is granted,
12 * provided the above notices are retained, and a notice that the code was
13 * modified is included with the above copyright notice.
14 */
15 16 /*
17 * Private declarations of the collector marker data structures (like the
18 * mark stack) and macros. Needed by the marker and the client-supplied
19 * mark routines. Transitively includes `gc_priv.h` file.
20 */
21 22 #ifndef GC_PMARK_H
23 #define GC_PMARK_H
24 25 #if defined(HAVE_CONFIG_H) && !defined(GC_PRIVATE_H)
26 /*
27 * When `gc_pmark.h` file is included from `gc_priv.h` file, some of
28 * macros might be undefined in `gcconfig.h` file, so skip `config.h`
29 * file in this case.
30 */
31 # include "config.h"
32 #endif
33 34 #ifndef GC_BUILD
35 # define GC_BUILD
36 #endif
37 38 #if (defined(__linux__) || defined(__GLIBC__) || defined(__GNU__)) \
39 && !defined(_GNU_SOURCE) && defined(GC_PTHREADS) \
40 && !defined(GC_NO_PTHREAD_SIGMASK)
41 # define _GNU_SOURCE 1
42 #endif
43 44 #if defined(KEEP_BACK_PTRS) || defined(PRINT_BLACK_LIST)
45 # include "dbg_mlc.h"
46 #endif
47 48 #include "gc/gc_mark.h"
49 #include "gc_priv.h"
50 51 EXTERN_C_BEGIN
52 53 /*
54 * The real declarations of the following is in `gc_priv.h` file, so
55 * that we can avoid scanning `GC_mark_procs` table.
56 */
57 58 /*
59 * Mark descriptor stuff that should remain private for now, mostly
60 * because it is hard to export `CPP_WORDSZ` macro without include
61 * `gcconfig.h` file.
62 */
63 #define BITMAP_BITS (CPP_WORDSZ - GC_DS_TAG_BITS)
64 #define PROC(descr) \
65 (GC_mark_procs[((descr) >> GC_DS_TAG_BITS) & (GC_MAX_MARK_PROCS - 1)])
66 #define ENV(descr) ((descr) >> (GC_DS_TAG_BITS + GC_LOG_MAX_MARK_PROCS))
67 #define MAX_ENV (((word)1 << (BITMAP_BITS - GC_LOG_MAX_MARK_PROCS)) - 1)
68 69 GC_EXTERN unsigned GC_n_mark_procs;
70 71 /* Number of mark stack entries to discard on overflow. */
72 #define GC_MARK_STACK_DISCARDS (INITIAL_MARK_STACK_SIZE / 8)
73 74 #ifdef PARALLEL_MARK
75 /*
76 * Allow multiple threads to participate in the marking process.
77 * This works roughly as follows:
78 * - The main mark stack never shrinks, but it can grow.
79 * - The initiating threads holds the allocator lock, sets
80 * `GC_help_wanted`.
81 * - Other threads:
82 * 1. Update `GC_helper_count` (while holding the mark lock).
83 * 2. Allocate a local mark stack repeatedly:
84 * 2.1. Steal a global mark stack entry by atomically replacing
85 * its descriptor with 0;
86 * 2.2. Copy it to the local stack;
87 * 2.3. Mark on the local stack until it is empty, or it may be
88 * profitable to copy it back;
89 * 2.4. If necessary, copy local stack to global one, holding the
90 * mark lock;
91 * 2.5. Stop when the global mark stack is empty.
92 * 3. Decrement `GC_helper_count` (holding the mark lock).
93 *
94 * This is an experiment to see if we can do something along the lines
95 * of the University of Tokyo SGC in a less intrusive, though probably
96 * also less performant, way.
97 */
98 99 /* `GC_mark_stack_top` is protected by the mark lock. */
100 101 /*
102 * `GC_notify_all_marker()` is used when `GC_help_wanted` is first set,
103 * when the last helper becomes inactive, when something is added to the
104 * global mark stack, and just after `GC_mark_no` is incremented.
105 * This could be split into multiple conditional variables (and probably
106 * should be) to scale to really large numbers of processors.
107 */
108 #endif /* PARALLEL_MARK */
109 110 /*
111 * Push the object `obj` with corresponding heap block header `hhdr`
112 * onto the mark stack. Returns the updated `mark_stack_top` value.
113 */
114 GC_INLINE mse *
115 GC_push_obj(ptr_t obj, const hdr *hhdr, mse *mark_stack_top,
116 mse *mark_stack_limit)
117 {
118 GC_ASSERT(!HBLK_IS_FREE(hhdr));
119 if (!IS_PTRFREE(hhdr)) {
120 mark_stack_top = GC_custom_push_proc(hhdr->hb_descr, obj, mark_stack_top,
121 mark_stack_limit);
122 }
123 return mark_stack_top;
124 }
125 126 /*
127 * Push the contents of `current` onto the mark stack if it is a valid
128 * pointer to a currently unmarked object. Mark it.
129 */
130 #define PUSH_CONTENTS(current, mark_stack_top, mark_stack_limit, source) \
131 do { \
132 hdr *my_hhdr; \
133 HC_GET_HDR(current, my_hhdr, source); /*< contains `break` */ \
134 mark_stack_top = GC_push_contents_hdr( \
135 current, mark_stack_top, mark_stack_limit, source, my_hhdr, TRUE); \
136 } while (0)
137 138 /* Set mark bit, exit (using `break` statement) if it is already set. */
139 #ifdef USE_MARK_BYTES
140 # if defined(PARALLEL_MARK) && defined(AO_HAVE_char_store) \
141 && !defined(BASE_ATOMIC_OPS_EMULATED)
142 /*
143 * There is a race here, and we may set the bit twice in the concurrent
144 * case. This can result in the object being pushed twice. But that is
145 * only a performance issue.
146 */
147 # define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no) \
148 { /*< cannot use `do ... while (0)` here */ \
149 volatile unsigned char *mark_byte_addr \
150 = (unsigned char *)(hhdr)->hb_marks + (bit_no); \
151 /* Unordered atomic load and store are sufficient here. */ \
152 if (AO_char_load(mark_byte_addr) != 0) \
153 break; /*< go to the enclosing loop end */ \
154 AO_char_store(mark_byte_addr, 1); \
155 }
156 # else
157 # define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no) \
158 { /*< cannot use `do ... while (0)` here */ \
159 ptr_t mark_byte_addr = (ptr_t)(hhdr)->hb_marks + (bit_no); \
160 \
161 if (*mark_byte_addr != 0) \
162 break; /*< go to the enclosing loop end */ \
163 *mark_byte_addr = 1; \
164 }
165 # endif /* !PARALLEL_MARK */
166 #else
167 # if defined(PARALLEL_MARK) || (defined(THREAD_SANITIZER) && defined(THREADS))
168 # ifdef THREAD_SANITIZER
169 # define MARK_WORD_READ(addr) AO_load(addr)
170 # else
171 # define MARK_WORD_READ(addr) (*(addr))
172 # endif
173 /*
174 * This is used only if we explicitly define `USE_MARK_BITS` macro.
175 * The following may fail to exit even if the bit was already set.
176 * For our uses, that is benign.
177 */
178 # define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no) \
179 { /*< cannot use `do ... while (0)` here */ \
180 volatile AO_t *mark_word_addr = (hhdr)->hb_marks + divWORDSZ(bit_no); \
181 word my_bits = (word)1 << modWORDSZ(bit_no); \
182 \
183 if ((MARK_WORD_READ(mark_word_addr) & my_bits) != 0) \
184 break; /*< go to the enclosing loop end */ \
185 AO_or(mark_word_addr, my_bits); \
186 }
187 # else /* !PARALLEL_MARK */
188 # define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no) \
189 { /*< cannot use `do ... while (0)` here */ \
190 word *mark_word_addr = (hhdr)->hb_marks + divWORDSZ(bit_no); \
191 word old = *mark_word_addr; \
192 word my_bits = (word)1 << modWORDSZ(bit_no); \
193 \
194 if ((old & my_bits) != 0) \
195 break; /*< go to the enclosing loop end */ \
196 *(mark_word_addr) = old | my_bits; \
197 }
198 # endif
199 #endif /* !USE_MARK_BYTES */
200 201 #ifdef ENABLE_TRACE
202 # define TRACE(source, cmd) \
203 if (GC_trace_ptr != NULL && (ptr_t)(source) == GC_trace_ptr) \
204 cmd
205 # define TRACE_TARGET(target, cmd) \
206 if (GC_trace_ptr != NULL && GC_is_heap_ptr(GC_trace_ptr) \
207 && (target) == *(ptr_t *)GC_trace_ptr) \
208 cmd
209 #else
210 # define TRACE(source, cmd)
211 # define TRACE_TARGET(source, cmd)
212 #endif
213 214 /*
215 * If the mark bit corresponding to `current` is not set, set it, and
216 * push the contents of the object on the mark stack. `current` points
217 * to the beginning of the object. We rely on the fact that the
218 * preceding header calculation will succeed for a pointer past the
219 * first page of an object, only if it is in fact a valid pointer
220 * to the object. Thus we can omit the otherwise necessary tests here.
221 */
222 GC_INLINE mse *
223 GC_push_contents_hdr(ptr_t current, mse *mark_stack_top, mse *mark_stack_limit,
224 ptr_t source, hdr *hhdr, GC_bool do_offset_check)
225 {
226 do {
227 /*
228 * Displacement in the block, in bytes; always within range.
229 * Note, in particular, that this value is the displacement from the
230 * beginning of the heap block, which may itself be in the interior
231 * of a large object. If `current` does not point to the first block,
232 * then we are in the all-interior-pointers mode, and it is safe to
233 * use any displacement value.
234 */
235 size_t displ = HBLKDISPL(current);
236 ptr_t base = current;
237 #ifdef MARK_BIT_PER_OBJ
238 unsigned32 gran_displ; /*< `high_prod` */
239 unsigned32 inv_sz = hhdr->hb_inv_sz;
240 241 #else
242 size_t gran_displ = BYTES_TO_GRANULES(displ);
243 size_t gran_offset = hhdr->hb_map[gran_displ];
244 size_t byte_offset = displ & (GC_GRANULE_BYTES - 1);
245 246 /* The following always fails for large block references. */
247 if (UNLIKELY((gran_offset | byte_offset) != 0))
248 #endif
249 {
250 #ifdef MARK_BIT_PER_OBJ
251 if (UNLIKELY(inv_sz == LARGE_INV_SZ))
252 #else
253 if ((hhdr->hb_flags & LARGE_BLOCK) != 0)
254 #endif
255 {
256 /* `gran_offset` is bogus. */
257 size_t obj_displ;
258 259 base = (ptr_t)hhdr->hb_block;
260 obj_displ = (size_t)(current - base);
261 if (obj_displ != displ) {
262 GC_ASSERT(obj_displ < hhdr->hb_sz);
263 /*
264 * Must be in the all-interior-pointers mode, non-first block
265 * already did validity check on cache miss.
266 */
267 } else if (do_offset_check && !GC_valid_offsets[obj_displ]) {
268 GC_ADD_TO_BLACK_LIST_NORMAL(current, source);
269 break;
270 }
271 GC_ASSERT(hhdr->hb_sz > HBLKSIZE
272 || hhdr->hb_block == HBLKPTR(current));
273 GC_ASSERT(ADDR_GE(current, (ptr_t)hhdr->hb_block));
274 gran_displ = 0;
275 } else {
276 #ifdef MARK_BIT_PER_OBJ
277 unsigned32 low_prod;
278 279 LONG_MULT(gran_displ, low_prod, (unsigned32)displ, inv_sz);
280 if ((low_prod >> 16) != 0)
281 #endif
282 {
283 size_t obj_displ;
284 285 #ifdef MARK_BIT_PER_OBJ
286 /* Accurate enough if `HBLKSIZE` is not greater than 2**15. */
287 GC_STATIC_ASSERT(HBLKSIZE <= (1 << 15));
288 obj_displ = (((low_prod >> 16) + 1) * hhdr->hb_sz) >> 16;
289 #else
290 obj_displ = GRANULES_TO_BYTES(gran_offset) + byte_offset;
291 #endif
292 293 if (do_offset_check && !GC_valid_offsets[obj_displ]) {
294 GC_ADD_TO_BLACK_LIST_NORMAL(current, source);
295 break;
296 }
297 #ifndef MARK_BIT_PER_OBJ
298 gran_displ -= gran_offset;
299 #endif
300 base -= obj_displ;
301 }
302 }
303 }
304 #ifdef MARK_BIT_PER_OBJ
305 /*
306 * May get here for pointer to start of block not at the beginning
307 * of object. If so, it is valid, and we are fine.
308 */
309 GC_ASSERT(gran_displ <= HBLK_OBJS(hhdr->hb_sz));
310 #else
311 GC_ASSERT(hhdr == GC_find_header(base));
312 GC_ASSERT(gran_displ % BYTES_TO_GRANULES(hhdr->hb_sz) == 0);
313 #endif
314 TRACE(source, GC_log_printf("GC #%lu: passed validity tests\n",
315 (unsigned long)GC_gc_no));
316 SET_MARK_BIT_EXIT_IF_SET(hhdr, gran_displ); /*< contains `break` */
317 TRACE(source, GC_log_printf("GC #%lu: previously unmarked\n",
318 (unsigned long)GC_gc_no));
319 TRACE_TARGET(base, GC_log_printf("GC #%lu: marking %p from %p instead\n",
320 (unsigned long)GC_gc_no, (void *)base,
321 (void *)source));
322 INCR_MARKS(hhdr);
323 GC_STORE_BACK_PTR(source, base);
324 mark_stack_top = GC_push_obj(base, hhdr, mark_stack_top, mark_stack_limit);
325 } while (0);
326 return mark_stack_top;
327 }
328 329 #if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
330 # define PUSH_ONE_CHECKED_STACK(p, source) \
331 GC_mark_and_push_stack(p, (ptr_t)(source))
332 #else
333 # define PUSH_ONE_CHECKED_STACK(p, source) GC_mark_and_push_stack(p)
334 #endif
335 336 /*
337 * Push a single value onto mark stack. Mark from the object
338 * pointed to by `p`. The argument should be of `ptr_t` type.
339 * Invoke `FIXUP_POINTER()` before any further processing.
340 * p` is considered valid even if it is an interior pointer.
341 * Previously marked objects are not pushed. Hence we make progress
342 * even if the mark stack overflows.
343 */
344 #ifdef NEED_FIXUP_POINTER
345 /* Try both the raw variant and the fixed up one. */
346 # define GC_PUSH_ONE_STACK(p, source) \
347 do { \
348 ptr_t pp = (p); \
349 \
350 if (ADDR_LT((ptr_t)GC_least_plausible_heap_addr, p) \
351 && ADDR_LT(p, (ptr_t)GC_greatest_plausible_heap_addr)) { \
352 PUSH_ONE_CHECKED_STACK(p, source); \
353 } \
354 FIXUP_POINTER(pp); \
355 if (ADDR_LT((ptr_t)GC_least_plausible_heap_addr, pp) \
356 && ADDR_LT(pp, (ptr_t)GC_greatest_plausible_heap_addr)) { \
357 PUSH_ONE_CHECKED_STACK(pp, source); \
358 } \
359 } while (0)
360 #else /* !NEED_FIXUP_POINTER */
361 # define GC_PUSH_ONE_STACK(p, source) \
362 do { \
363 if (ADDR_LT((ptr_t)GC_least_plausible_heap_addr, p) \
364 && ADDR_LT(p, (ptr_t)GC_greatest_plausible_heap_addr)) { \
365 PUSH_ONE_CHECKED_STACK(p, source); \
366 } \
367 } while (0)
368 #endif
369 370 /*
371 * Same as `GC_PUSH_ONE_STACK`, but the interior pointers recognition as
372 * for normal heap pointers.
373 */
374 #define GC_PUSH_ONE_HEAP(p, source, mark_stack_top) \
375 do { \
376 FIXUP_POINTER(p); \
377 if (ADDR_LT((ptr_t)GC_least_plausible_heap_addr, p) \
378 && ADDR_LT(p, (ptr_t)GC_greatest_plausible_heap_addr)) \
379 mark_stack_top = GC_mark_and_push( \
380 p, mark_stack_top, GC_mark_stack_limit, (void **)(source)); \
381 } while (0)
382 383 /*
384 * Mark objects pointed to by the regions described by mark stack entries
385 * between `mark_stack` and `mark_stack_top`, inclusive. Assumes the upper
386 * limit of a mark stack entry is never `NULL`. A mark stack entry never
387 * has zero size. Return the new value of `mark_stack_top`.
388 * We try to traverse on the order of a `hblk` of memory before we return.
389 * Caller is responsible for calling this until the mark stack is empty.
390 * Note that this is the most performance critical routine in the collector.
391 * Hence it contains all sorts of ugly hacks to speed things up.
392 * In particular, we avoid procedure calls on the common path, we take
393 * advantage of peculiarities of the mark descriptor encoding, we optionally
394 * maintain a cache for the block address to header mapping, we prefetch
395 * when an object is "grayed", etc.
396 */
397 GC_INNER mse *GC_mark_from(mse *mark_stack_top, mse *mark_stack,
398 mse *mark_stack_limit);
399 400 #define MARK_FROM_MARK_STACK() \
401 GC_mark_stack_top = GC_mark_from(GC_mark_stack_top, GC_mark_stack, \
402 GC_mark_stack + GC_mark_stack_size);
403 404 #define GC_mark_stack_empty() \
405 ADDR_LT((ptr_t)GC_mark_stack_top, (ptr_t)GC_mark_stack)
406 407 /*
408 * The current state of marking, as follows. We say something is dirty
409 * if it was written since the last time we retrieved dirty bits.
410 * We say it is grungy if it was marked dirty in the last set of bits
411 * we retrieved. Invariant "I": all roots and marked objects `p` are
412 * either dirty, or point to objects `q` that are either marked or
413 * a pointer to `q` appears in a range on the mark stack.
414 */
415 416 /* No marking in progress. "I" holds. Mark stack is empty. */
417 #define MS_NONE 0
418 419 /*
420 * Rescuing objects are currently being pushed. "I" holds, except that
421 * grungy roots may point to unmarked objects, as may marked grungy objects
422 * above `GC_scan_ptr`.
423 */
424 #define MS_PUSH_RESCUERS 1
425 426 /*
427 * Uncollectible objects are currently being pushed. "I" holds, except
428 * that marked uncollectible objects above `GC_scan_ptr` may point to
429 * unmarked objects. Roots may point to unmarked objects too.
430 */
431 #define MS_PUSH_UNCOLLECTABLE 2
432 433 /* "I" holds, mark stack may be nonempty. */
434 #define MS_ROOTS_PUSHED 3
435 436 /*
437 * "I" may not hold, e.g. because of the mark stack overflow. However,
438 * marked heap objects below `GC_scan_ptr` point to marked or stacked
439 * objects.
440 */
441 #define MS_PARTIALLY_INVALID 4
442 443 /* "I" may not hold. */
444 #define MS_INVALID 5
445 446 EXTERN_C_END
447 448 #endif /* GC_PMARK_H */
449