1 /*
2 * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
3 * Copyright (c) 2001 by Hewlett-Packard Company. All rights reserved.
4 * Copyright (c) 2009-2022 Ivan Maidanski
5 *
6 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
7 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
8 *
9 * Permission is hereby granted to use or copy this program
10 * for any purpose, provided the above notices are retained on all copies.
11 * Permission to modify the code and to distribute modified code is granted,
12 * provided the above notices are retained, and a notice that the code was
13 * modified is included with the above copyright notice.
14 */
15 16 /*
17 * This contains interfaces to the garbage collector marker that are likely
18 * to be useful to clients that provide detailed heap layout information
19 * to the collector. This interface should not be used by normal C or C++
20 * clients. It will be useful to runtimes for other languages.
21 *
22 * This is an experts-only interface! There are many ways to break the
23 * collector in subtle ways by using this functionality.
24 */
25 #ifndef GC_MARK_H
26 #define GC_MARK_H
27 28 #ifndef GC_H
29 # include "gc.h"
30 #endif
31 32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 36 #define GC_PROC_BYTES 100
37 38 #if defined(GC_BUILD) || defined(NOT_GCBUILD)
39 struct GC_ms_entry;
40 struct GC_hblk_s;
41 #else
42 struct GC_ms_entry {
43 void *opaque;
44 };
45 struct GC_hblk_s {
46 void *opaque;
47 };
48 #endif
49 50 /**
51 * A client-supplied mark procedure. Returns new mark stack pointer.
52 * Primary effect should be to push new entries on the mark stack.
53 * Mark stack pointer values are passed and returned explicitly.
54 * Global variables describing mark stack are not necessarily valid.
55 * (This usually saves a few cycles by keeping things in registers.)
56 * Assumed to scan about `GC_PROC_BYTES` on average. If it needs to do
57 * much more work than that, it should do it in smaller pieces by
58 * pushing itself back on the mark stack. Note that it should always
59 * do some work (defined as "marking some objects") before pushing more
60 * than one entry on the mark stack. This is required to ensure
61 * termination in the event of mark stack overflows. This procedure is
62 * always called with at least one empty entry on the mark stack.
63 * Currently we require that mark procedures look for pointers in
64 * a subset of the places the conservative marker would. It must be safe
65 * to invoke the normal mark procedure instead.
66 *
67 * *Warning*: Such a mark procedure may be invoked on an unused object
68 * residing on a free list. Such objects are cleared, except for a
69 * free-list link field (which is located at the beginning of each
70 * object). Thus mark procedures may not count on the presence of a
71 * type descriptor, and must handle this case correctly somehow. Also,
72 * a mark procedure should be prepared to be executed concurrently from
73 * the marker threads (the latter ones are created only if the client
74 * has called `GC_start_mark_threads()` or started a user thread
75 * previously). For the compatibility reason, `addr` is a pointer to
76 * `GC_word`, but it should be treated as a pointer to `void` pointer.
77 */
78 typedef struct GC_ms_entry *(GC_CALLBACK *GC_mark_proc)(
79 GC_word * /* `addr` */, struct GC_ms_entry * /* `mark_stack_top` */,
80 struct GC_ms_entry * /* `mark_stack_limit` */, GC_word /* `env` */);
81 82 #define GC_LOG_MAX_MARK_PROCS 6
83 #define GC_MAX_MARK_PROCS (1 << GC_LOG_MAX_MARK_PROCS)
84 85 /*
86 * In a few cases it is necessary to assign statically known indices to
87 * certain mark procedures. Thus we reserve a few for well known clients.
88 * (This is necessary if mark descriptors are compiler generated.)
89 */
90 #define GC_RESERVED_MARK_PROCS 8
91 #define GC_GCJ_RESERVED_MARK_PROC_INDEX 0
92 93 /*
94 * Object descriptors on mark stack or in objects. Low-order two bits are
95 * tags distinguishing among the following 4 possibilities for the rest
96 * (high-order) bits.
97 */
98 #define GC_DS_TAG_BITS 2
99 #define GC_DS_TAGS ((1U << GC_DS_TAG_BITS) - 1)
100 101 /**
102 * The entire descriptor is a length in bytes that must be a multiple of 4.
103 */
104 #define GC_DS_LENGTH 0
105 106 /**
107 * The high-order bits are describing pointer fields. The most
108 * significant bit is set if the first "pointer-sized" word is a pointer.
109 * (This unconventional ordering sometimes makes the marker slightly faster.)
110 * Zeros indicate definite non-pointers; ones indicate possible pointers.
111 * *Note*: only usable if pointers are aligned on the size of a pointer (thus,
112 * extra care should be taken by client on cris and m68k architectures).
113 */
114 #define GC_DS_BITMAP 1
115 116 /**
117 * The objects referenced by this object can be pushed on the mark stack by
118 * invoking `PROC(descr)`. `ENV(descr)` is passed as the last argument.
119 */
120 #define GC_DS_PROC 2
121 122 /**
123 * The real descriptor is at the byte displacement from the beginning
124 * of the object given by `descr & ~GC_DS_TAGS`. If the descriptor is
125 * negative, the real descriptor is at
126 * `*<object_start> - (descr & ~GC_DS_TAGS) - GC_INDIR_PER_OBJ_BIAS`.
127 * The latter alternative can be used if each object contains a type
128 * descriptor at the beginning of the object. Note that in the
129 * multi-threaded environments per-object descriptors must be located
130 * in either the first two or last two "pointer-sized" words of the
131 * object, since only those are guaranteed to be cleared while the
132 * allocator lock is held.
133 */
134 #define GC_DS_PER_OBJECT 3
135 136 #define GC_INDIR_PER_OBJ_BIAS 0x10
137 138 #define GC_MAKE_PROC(proc_index, env) \
139 ((((((GC_word)(env)) << GC_LOG_MAX_MARK_PROCS) | (unsigned)(proc_index)) \
140 << GC_DS_TAG_BITS) \
141 | (GC_word)GC_DS_PROC)
142 143 /**
144 * Bounds on the heap. Guaranteed to be valid. Likely to include future
145 * heap expansion. Hence the bounded range usually includes not-yet-mapped
146 * memory, or might overlap with other data roots. The address of any heap
147 * object is larger than `GC_least_plausible_heap_addr` and less than
148 * `GC_greatest_plausible_heap_addr`.
149 */
150 GC_API void *GC_least_plausible_heap_addr;
151 GC_API void *GC_greatest_plausible_heap_addr;
152 153 /**
154 * Specify the pointer address mask. Works only if the collector is
155 * built with `DYNAMIC_POINTER_MASK` macro defined. These primitives
156 * are normally needed only to support systems that use high-order
157 * pointer tags. The setter is expected to be called, if needed,
158 * before the collector initialization or, at least, before the first
159 * object is allocated. Both the setter and the getter are unsynchronized.
160 */
161 GC_API void GC_CALL GC_set_pointer_mask(GC_word);
162 GC_API GC_word GC_CALL GC_get_pointer_mask(void);
163 164 /**
165 * Similar to `GC_set_pointer_mask`/`GC_get_pointer_mask` but for the
166 * pointer address shift. The value should be less than the size of
167 * `GC_word`, in bits. Applied after the mask.
168 */
169 GC_API void GC_CALL GC_set_pointer_shift(unsigned);
170 GC_API unsigned GC_CALL GC_get_pointer_shift(void);
171 172 /**
173 * Handle nested references in a custom mark procedure.
174 * Check if `obj` is a valid object. If so, ensure that it is marked.
175 * If it was not previously marked, push its contents onto the mark
176 * stack for future scanning. The object will then be scanned using
177 * its mark descriptor. Returns the new mark stack pointer.
178 * Handles mark stack overflows correctly. Since this marks first, it
179 * makes progress even if there are mark stack overflows.
180 * `src` is the address of the pointer to `obj`, which is used only
181 * for back pointer-based heap debugging.
182 * It is strongly recommended that most objects be handled without mark
183 * procedures, e.g. with bitmap descriptors, and that mark procedures
184 * be reserved for the exceptional cases. That will ensure that
185 * performance of this call is not extremely performance critical.
186 * (Otherwise we would need to inline `GC_mark_and_push` completely,
187 * which would tie the client code to a fixed collector version.)
188 * Note that mark procedures should explicitly call `FIXUP_POINTER()`
189 * if required.
190 */
191 GC_API struct GC_ms_entry *GC_CALL GC_mark_and_push(
192 void * /* `obj` */, struct GC_ms_entry * /* `mark_stack_top` */,
193 struct GC_ms_entry * /* `mark_stack_limit` */, void ** /* `src` */);
194 195 #define GC_MARK_AND_PUSH(obj, msp, lim, src) \
196 (GC_ADDR_LT((char *)GC_least_plausible_heap_addr, (char *)(obj)) \
197 && GC_ADDR_LT((char *)(obj), \
198 (char *)GC_greatest_plausible_heap_addr) \
199 ? GC_mark_and_push(obj, msp, lim, src) \
200 : (msp))
201 202 GC_API void GC_CALL GC_push_proc(GC_word /* `descr` */, void * /* `obj` */);
203 204 GC_API struct GC_ms_entry *GC_CALL
205 GC_custom_push_proc(GC_word /* `descr` */, void * /* `obj` */,
206 struct GC_ms_entry * /* `mark_stack_top` */,
207 struct GC_ms_entry * /* `mark_stack_limit` */);
208 209 GC_API struct GC_ms_entry *GC_CALL
210 GC_custom_push_range(void * /* `bottom` */, void * /* `top` */,
211 struct GC_ms_entry * /* `mark_stack_top` */,
212 struct GC_ms_entry * /* `mark_stack_limit` */);
213 214 /**
215 * The size of the header added to objects allocated through the `GC_debug_`
216 * routines. Defined as a function so that client mark procedures do not
217 * need to be recompiled for the collector library version changes.
218 */
219 GC_API GC_ATTR_CONST size_t GC_CALL GC_get_debug_header_size(void);
220 #define GC_USR_PTR_FROM_BASE(p) \
221 ((void *)((char *)(p) + GC_get_debug_header_size()))
222 223 /*
224 * The same as `GC_get_debug_header_size()` but defined as a variable.
225 * Exists only for the backward compatibility. Some compilers do not
226 * accept `const` together with `deprecated` or `dllimport` attributes,
227 * so the symbol is exported as a non-constant one.
228 */
229 GC_API GC_ATTR_DEPRECATED
230 #ifdef GC_BUILD
231 const
232 #endif
233 size_t GC_debug_header_size;
234 235 /**
236 * Return the heap block size. Each heap block is devoted to a single size
237 * and kind of object.
238 */
239 GC_API GC_ATTR_CONST size_t GC_CALL GC_get_hblk_size(void);
240 241 typedef void(GC_CALLBACK *GC_walk_hblk_fn)(struct GC_hblk_s *,
242 void * /* `client_data` */);
243 244 /**
245 * Apply `fn` to each allocated heap block. It is the responsibility
246 * of the caller to avoid data race during the function execution
247 * (e.g. by acquiring the allocator lock at least in the reader mode).
248 */
249 GC_API void GC_CALL GC_apply_to_all_blocks(GC_walk_hblk_fn,
250 void * /* `client_data` */)
251 GC_ATTR_NONNULL(1);
252 253 /* Same as `GC_walk_hblk_fn` but with index of the free list. */
254 typedef void(GC_CALLBACK *GC_walk_free_blk_fn)(struct GC_hblk_s *,
255 int /* `index` */,
256 void * /* `client_data` */);
257 258 /**
259 * Apply `fn` to each completely empty heap block. It is the responsibility
260 * of the caller to avoid data race during the function execution (e.g. by
261 * acquiring the allocator lock at least in the reader mode).
262 */
263 GC_API void GC_CALL GC_iterate_free_hblks(GC_walk_free_blk_fn,
264 void * /* `client_data` */)
265 GC_ATTR_NONNULL(1);
266 267 /**
268 * If there are likely to be false references to a block starting at
269 * `h` of the indicated length (`len`), then return the next plausible
270 * starting location for `h` that might avoid these false references.
271 * (In other words: Is the block starting at `h` of size `len` bytes
272 * black-listed? If so, then return the address of the next plausible
273 * `r` such that (`r`,`len`) might not be black-listed. Note that
274 * pointer `r` may not actually be in the heap, we guarantee only that
275 * every smaller value of `r` after `h` is also black-listed.)
276 * Otherwise `NULL` is returned. Assumes the allocator lock is held at
277 * least in the reader mode, but no assertion about it by design.
278 */
279 GC_API struct GC_hblk_s *GC_CALL
280 GC_is_black_listed(struct GC_hblk_s * /* `h` */, size_t /* `len` */);
281 282 /**
283 * Return the number of set mark bits for the heap block where object `p`
284 * is located. Defined only if the library has been compiled without
285 * `NO_DEBUGGING` macro defined.
286 */
287 GC_API unsigned GC_CALL GC_count_set_marks_in_hblk(const void * /* `p` */);
288 289 /*
290 * And some routines to support creation of new "kinds", e.g. with custom
291 * mark procedures, by language runtimes. The `_inner` variants assume
292 * the caller holds the allocator lock.
293 */
294 295 /** Return a new free-list array. */
296 GC_API void **GC_CALL GC_new_free_list(void);
297 GC_API void **GC_CALL GC_new_free_list_inner(void);
298 299 /**
300 * Return a new kind, as specified. The last two parameters must be zero
301 * or one.
302 */
303 GC_API unsigned GC_CALL GC_new_kind(void ** /* `free_list` */,
304 GC_word /* `mark_descriptor_template` */,
305 int /* `add_size_to_descriptor` */,
306 int /* `clear_new_objects` */)
307 GC_ATTR_NONNULL(1);
308 GC_API unsigned GC_CALL GC_new_kind_inner(
309 void ** /* `free_list` */, GC_word /* `mark_descriptor_template` */,
310 int /* `add_size_to_descriptor` */, int /* `clear_new_objects` */)
311 GC_ATTR_NONNULL(1);
312 313 /**
314 * Return a new mark procedure identifier, suitable for use as the first
315 * argument in `GC_MAKE_PROC()`.
316 */
317 GC_API unsigned GC_CALL GC_new_proc(GC_mark_proc);
318 GC_API unsigned GC_CALL GC_new_proc_inner(GC_mark_proc);
319 320 /**
321 * Similar to `GC_init_gcj_malloc()` described in `gc_gcj.h` file but with
322 * the proper types of the arguments and an additional runtime checking.
323 * `GC_GCJ_MARK_DESCR_OFFSET` should be passed to `descr_offset` argument.
324 * Defined only if the library has been compiled with `GC_GCJ_SUPPORT`
325 * macro defined.
326 */
327 GC_API void GC_CALL GC_init_gcj_malloc_mp(unsigned /* `mp_index` */,
328 GC_mark_proc /* `mp` */,
329 size_t /* `descr_offset` */);
330 331 /**
332 * Allocate an object of a given `kind`. By default, there are only
333 * a few kinds: composite (pointerful), atomic, uncollectible, etc.
334 * We claim it is possible for clever client code that understands the
335 * collector internals to add more, e.g. to communicate object layout
336 * information to the collector. Note that in the multi-threaded
337 * contexts, this is usually unsafe for kinds that have the descriptor
338 * in the object itself, since there is otherwise a window in which
339 * the descriptor is not correct. Even in the single-threaded case,
340 * we need to be sure that cleared objects on a free list do not cause
341 * a GC crash if they are accidentally traced.
342 */
343 GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void *GC_CALL
344 GC_generic_malloc(size_t /* `lb` */, int /* `kind` */);
345 346 /**
347 * Same as `GC_generic_malloc()`, but pointers to past the first heap block
348 * of the resulting object are ignored. We avoid holding the allocator lock
349 * while we clear the memory.
350 */
351 GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void *GC_CALL
352 GC_generic_malloc_ignore_off_page(size_t /* `lb` */, int /* `kind` */);
353 354 /**
355 * A generalized variant of `GC_malloc_uncollectable()` and
356 * `GC_malloc_atomic_uncollectable()`.
357 */
358 GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void *GC_CALL
359 GC_generic_malloc_uncollectable(size_t /* `lb` */, int /* `kind` */);
360 361 /**
362 * Same as `GC_generic_malloc()`, but primary for allocating an object
363 * of the same kind as an existing one (`kind` obtained by
364 * `GC_get_kind_and_size()`). Not suitable for `gcj` and typed-`malloc`
365 * kinds.
366 */
367 GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void *GC_CALL
368 GC_generic_or_special_malloc(size_t /* `size` */, int /* `kind` */);
369 GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void *GC_CALL
370 GC_debug_generic_or_special_malloc(size_t /* `size` */, int /* `kind` */,
371 GC_EXTRA_PARAMS);
372 373 #ifdef GC_DEBUG
374 # define GC_GENERIC_OR_SPECIAL_MALLOC(sz, k) \
375 GC_debug_generic_or_special_malloc(sz, k, GC_EXTRAS)
376 #else
377 # define GC_GENERIC_OR_SPECIAL_MALLOC(sz, k) \
378 GC_generic_or_special_malloc(sz, k)
379 #endif /* !GC_DEBUG */
380 381 /**
382 * Similar to `GC_size` but returns object kind. The size is returned too
383 * if `psize` is not `NULL`. The object pointer should be non-`NULL`.
384 */
385 GC_API int GC_CALL GC_get_kind_and_size(const void *, size_t * /* `psize` */)
386 GC_ATTR_NONNULL(1);
387 388 /**
389 * A procedure which produces a human-readable description of the
390 * "type" of object `p` into the buffer `out_buf` of length
391 * `GC_TYPE_DESCR_LEN`. This is used by the debug support when
392 * printing objects. These functions should be as robust as possible,
393 * though we do avoid invoking them on objects on the global free list.
394 */
395 typedef void(GC_CALLBACK *GC_describe_type_fn)(void * /* `p` */,
396 char * /* `out_buf` */);
397 #define GC_TYPE_DESCR_LEN 40
398 399 /**
400 * Register a `describe_type` function to be used when printing objects
401 * of a particular `kind`.
402 */
403 GC_API void GC_CALL GC_register_describe_type_fn(int /* `kind` */,
404 GC_describe_type_fn);
405 406 /**
407 * Clear some of the inaccessible part of the stack. Returns its argument,
408 * so it can be used in a tail call position, hence clearing another frame.
409 * The argument may be `NULL`.
410 */
411 GC_API void *GC_CALL GC_clear_stack(void *);
412 413 /**
414 * Set/get the client notifier on collections. The client-supplied procedure
415 * is called at the start of every full collection (called with the allocator
416 * lock held). May be 0. This is a really tricky interface to use correctly.
417 * Unless you really understand the collector internals, the callback should
418 * not, directly or indirectly, make any `GC_` or potentially blocking calls.
419 * In particular, it is not safe to allocate memory using the collector from
420 * within the callback procedure. Both the setter and the getter acquire the
421 * allocator lock (in the reader mode in case of the getter).
422 */
423 typedef void(GC_CALLBACK *GC_start_callback_proc)(void);
424 GC_API void GC_CALL GC_set_start_callback(GC_start_callback_proc);
425 GC_API GC_start_callback_proc GC_CALL GC_get_start_callback(void);
426 427 /**
428 * Slow/general mark bit manipulation. The caller should hold the
429 * allocator lock. The argument should be the real address of an object
430 * (i.e. the address of the debug header if there is one).
431 */
432 GC_API void GC_CALL GC_clear_mark_bit(const void *) GC_ATTR_NONNULL(1);
433 GC_API void GC_CALL GC_set_mark_bit(const void *) GC_ATTR_NONNULL(1);
434 435 /**
436 * Get the mark bit. Returns 1 (true) or 0. The caller should hold the
437 * allocator lock at least in the reader mode. The argument should be
438 * the real address of an object (i.e. the address of the debug header
439 * if there is one).
440 */
441 GC_API int GC_CALL GC_is_marked(const void *) GC_ATTR_NONNULL(1);
442 443 /**
444 * Push everything in the given range onto the mark stack. `bottom` is
445 * the first location to be scanned; `top` is one past the last location
446 * to be scanned. Should only be used if there is no possibility of mark
447 * stack overflow.
448 */
449 GC_API void GC_CALL GC_push_all(void * /* `bottom` */, void * /* `top` */);
450 451 /**
452 * Similar to `GC_push_all` but treats all interior pointers as valid and
453 * scans the entire region immediately (not just schedules it for scanning),
454 * in case the contents change.
455 */
456 GC_API void GC_CALL GC_push_all_eager(void * /* `bottom` */,
457 void * /* `top` */);
458 459 /**
460 * Similar to `GC_push_all` but processes either all or only dirty pages
461 * depending on `all` argument.
462 */
463 GC_API void GC_CALL GC_push_conditional(void * /* `bottom` */,
464 void * /* `top` */,
465 int /* `bool` `all` */);
466 467 GC_API void GC_CALL GC_push_finalizer_structures(void);
468 469 /**
470 * Set/get the client push-other-roots procedure. A client-supplied
471 * procedure should also call the original one. Note that both the setter
472 * and the getter require some external synchronization to avoid data race.
473 */
474 typedef void(GC_CALLBACK *GC_push_other_roots_proc)(void);
475 GC_API void GC_CALL GC_set_push_other_roots(GC_push_other_roots_proc);
476 GC_API GC_push_other_roots_proc GC_CALL GC_get_push_other_roots(void);
477 478 /**
479 * Walk the GC heap visiting all reachable objects. Assume the caller
480 * holds the allocator lock at least in the reader mode. Object base
481 * pointer, object size and client custom data are passed to the callback
482 * (holding the allocator lock in the same mode as the caller does).
483 */
484 typedef void(GC_CALLBACK *GC_reachable_object_proc)(
485 void * /* `obj` */, size_t /* `bytes` */, void * /* `client_data` */);
486 GC_API void GC_CALL GC_enumerate_reachable_objects_inner(
487 GC_reachable_object_proc, void * /* `client_data` */) GC_ATTR_NONNULL(1);
488 489 /**
490 * Is the given address in one of the temporary static root sections?
491 * Acquires the allocator lock in the reader mode. For the debugging
492 * purpose only.
493 */
494 GC_API int GC_CALL GC_is_tmp_root(void *);
495 496 GC_API void GC_CALL GC_print_trace(GC_word /* `gc_no` */);
497 GC_API void GC_CALL GC_print_trace_inner(GC_word /* `gc_no` */);
498 499 /**
500 * Set the client for when mark stack is empty. A client can use this
501 * callback to process (un)marked objects and push additional work onto
502 * the stack. Useful for implementing ephemerons. Both the setter and
503 * the getter acquire the allocator lock (in the reader mode in case of
504 * the getter).
505 */
506 typedef struct GC_ms_entry *(GC_CALLBACK *GC_on_mark_stack_empty_proc)(
507 struct GC_ms_entry * /* `mark_stack_top` */,
508 struct GC_ms_entry * /* `mark_stack_limit` */);
509 GC_API void GC_CALL GC_set_on_mark_stack_empty(GC_on_mark_stack_empty_proc);
510 GC_API GC_on_mark_stack_empty_proc GC_CALL GC_get_on_mark_stack_empty(void);
511 512 #ifdef __cplusplus
513 } /* extern "C" */
514 #endif
515 516 #endif /* GC_MARK_H */
517