gc_pmark.h raw

   1  /*
   2   * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
   3   * Copyright (c) 2001 by Hewlett-Packard Company. All rights reserved.
   4   * Copyright (c) 2008-2022 Ivan Maidanski
   5   *
   6   * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
   7   * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
   8   *
   9   * Permission is hereby granted to use or copy this program
  10   * for any purpose, provided the above notices are retained on all copies.
  11   * Permission to modify the code and to distribute modified code is granted,
  12   * provided the above notices are retained, and a notice that the code was
  13   * modified is included with the above copyright notice.
  14   */
  15  
  16  /*
  17   * Private declarations of the collector marker data structures (like the
  18   * mark stack) and macros.  Needed by the marker and the client-supplied
  19   * mark routines.  Transitively includes `gc_priv.h` file.
  20   */
  21  
  22  #ifndef GC_PMARK_H
  23  #define GC_PMARK_H
  24  
  25  #if defined(HAVE_CONFIG_H) && !defined(GC_PRIVATE_H)
  26  /*
  27   * When `gc_pmark.h` file is included from `gc_priv.h` file, some of
  28   * macros might be undefined in `gcconfig.h` file, so skip `config.h`
  29   * file in this case.
  30   */
  31  #  include "config.h"
  32  #endif
  33  
  34  #ifndef GC_BUILD
  35  #  define GC_BUILD
  36  #endif
  37  
  38  #if (defined(__linux__) || defined(__GLIBC__) || defined(__GNU__)) \
  39      && !defined(_GNU_SOURCE) && defined(GC_PTHREADS)               \
  40      && !defined(GC_NO_PTHREAD_SIGMASK)
  41  #  define _GNU_SOURCE 1
  42  #endif
  43  
  44  #if defined(KEEP_BACK_PTRS) || defined(PRINT_BLACK_LIST)
  45  #  include "dbg_mlc.h"
  46  #endif
  47  
  48  #include "gc/gc_mark.h"
  49  #include "gc_priv.h"
  50  
  51  EXTERN_C_BEGIN
  52  
  53  /*
  54   * The real declarations of the following is in `gc_priv.h` file, so
  55   * that we can avoid scanning `GC_mark_procs` table.
  56   */
  57  
  58  /*
  59   * Mark descriptor stuff that should remain private for now, mostly
  60   * because it is hard to export `CPP_WORDSZ` macro without include
  61   * `gcconfig.h` file.
  62   */
  63  #define BITMAP_BITS (CPP_WORDSZ - GC_DS_TAG_BITS)
  64  #define PROC(descr) \
  65    (GC_mark_procs[((descr) >> GC_DS_TAG_BITS) & (GC_MAX_MARK_PROCS - 1)])
  66  #define ENV(descr) ((descr) >> (GC_DS_TAG_BITS + GC_LOG_MAX_MARK_PROCS))
  67  #define MAX_ENV (((word)1 << (BITMAP_BITS - GC_LOG_MAX_MARK_PROCS)) - 1)
  68  
  69  GC_EXTERN unsigned GC_n_mark_procs;
  70  
  71  /* Number of mark stack entries to discard on overflow. */
  72  #define GC_MARK_STACK_DISCARDS (INITIAL_MARK_STACK_SIZE / 8)
  73  
  74  #ifdef PARALLEL_MARK
  75  /*
  76   * Allow multiple threads to participate in the marking process.
  77   * This works roughly as follows:
  78   *   - The main mark stack never shrinks, but it can grow.
  79   *   - The initiating threads holds the allocator lock, sets
  80   *     `GC_help_wanted`.
  81   *   - Other threads:
  82   *     1. Update `GC_helper_count` (while holding the mark lock).
  83   *     2. Allocate a local mark stack repeatedly:
  84   *        2.1. Steal a global mark stack entry by atomically replacing
  85   *             its descriptor with 0;
  86   *        2.2. Copy it to the local stack;
  87   *        2.3. Mark on the local stack until it is empty, or it may be
  88   *             profitable to copy it back;
  89   *        2.4. If necessary, copy local stack to global one, holding the
  90   *             mark lock;
  91   *        2.5. Stop when the global mark stack is empty.
  92   *     3. Decrement `GC_helper_count` (holding the mark lock).
  93   *
  94   * This is an experiment to see if we can do something along the lines
  95   * of the University of Tokyo SGC in a less intrusive, though probably
  96   * also less performant, way.
  97   */
  98  
  99  /* `GC_mark_stack_top` is protected by the mark lock. */
 100  
 101  /*
 102   * `GC_notify_all_marker()` is used when `GC_help_wanted` is first set,
 103   * when the last helper becomes inactive, when something is added to the
 104   * global mark stack, and just after `GC_mark_no` is incremented.
 105   * This could be split into multiple conditional variables (and probably
 106   * should be) to scale to really large numbers of processors.
 107   */
 108  #endif /* PARALLEL_MARK */
 109  
 110  /*
 111   * Push the object `obj` with corresponding heap block header `hhdr`
 112   * onto the mark stack.  Returns the updated `mark_stack_top` value.
 113   */
 114  GC_INLINE mse *
 115  GC_push_obj(ptr_t obj, const hdr *hhdr, mse *mark_stack_top,
 116              mse *mark_stack_limit)
 117  {
 118    GC_ASSERT(!HBLK_IS_FREE(hhdr));
 119    if (!IS_PTRFREE(hhdr)) {
 120      mark_stack_top = GC_custom_push_proc(hhdr->hb_descr, obj, mark_stack_top,
 121                                           mark_stack_limit);
 122    }
 123    return mark_stack_top;
 124  }
 125  
 126  /*
 127   * Push the contents of `current` onto the mark stack if it is a valid
 128   * pointer to a currently unmarked object.  Mark it.
 129   */
 130  #define PUSH_CONTENTS(current, mark_stack_top, mark_stack_limit, source)   \
 131    do {                                                                     \
 132      hdr *my_hhdr;                                                          \
 133      HC_GET_HDR(current, my_hhdr, source); /*< contains `break` */          \
 134      mark_stack_top = GC_push_contents_hdr(                                 \
 135          current, mark_stack_top, mark_stack_limit, source, my_hhdr, TRUE); \
 136    } while (0)
 137  
 138  /* Set mark bit, exit (using `break` statement) if it is already set. */
 139  #ifdef USE_MARK_BYTES
 140  #  if defined(PARALLEL_MARK) && defined(AO_HAVE_char_store) \
 141        && !defined(BASE_ATOMIC_OPS_EMULATED)
 142  /*
 143   * There is a race here, and we may set the bit twice in the concurrent
 144   * case.  This can result in the object being pushed twice.  But that is
 145   * only a performance issue.
 146   */
 147  #    define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no)                 \
 148        { /*< cannot use `do ... while (0)` here */                  \
 149          volatile unsigned char *mark_byte_addr                     \
 150              = (unsigned char *)(hhdr)->hb_marks + (bit_no);        \
 151          /* Unordered atomic load and store are sufficient here. */ \
 152          if (AO_char_load(mark_byte_addr) != 0)                     \
 153            break; /*< go to the enclosing loop end */               \
 154          AO_char_store(mark_byte_addr, 1);                          \
 155        }
 156  #  else
 157  #    define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no)                 \
 158        { /*< cannot use `do ... while (0)` here */                  \
 159          ptr_t mark_byte_addr = (ptr_t)(hhdr)->hb_marks + (bit_no); \
 160                                                                     \
 161          if (*mark_byte_addr != 0)                                  \
 162            break; /*< go to the enclosing loop end */               \
 163          *mark_byte_addr = 1;                                       \
 164        }
 165  #  endif /* !PARALLEL_MARK */
 166  #else
 167  #  if defined(PARALLEL_MARK) || (defined(THREAD_SANITIZER) && defined(THREADS))
 168  #    ifdef THREAD_SANITIZER
 169  #      define MARK_WORD_READ(addr) AO_load(addr)
 170  #    else
 171  #      define MARK_WORD_READ(addr) (*(addr))
 172  #    endif
 173  /*
 174   * This is used only if we explicitly define `USE_MARK_BITS` macro.
 175   * The following may fail to exit even if the bit was already set.
 176   * For our uses, that is benign.
 177   */
 178  #    define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no)                            \
 179        { /*< cannot use `do ... while (0)` here */                             \
 180          volatile AO_t *mark_word_addr = (hhdr)->hb_marks + divWORDSZ(bit_no); \
 181          word my_bits = (word)1 << modWORDSZ(bit_no);                          \
 182                                                                                \
 183          if ((MARK_WORD_READ(mark_word_addr) & my_bits) != 0)                  \
 184            break; /*< go to the enclosing loop end */                          \
 185          AO_or(mark_word_addr, my_bits);                                       \
 186        }
 187  #  else /* !PARALLEL_MARK */
 188  #    define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no)                   \
 189        { /*< cannot use `do ... while (0)` here */                    \
 190          word *mark_word_addr = (hhdr)->hb_marks + divWORDSZ(bit_no); \
 191          word old = *mark_word_addr;                                  \
 192          word my_bits = (word)1 << modWORDSZ(bit_no);                 \
 193                                                                       \
 194          if ((old & my_bits) != 0)                                    \
 195            break; /*< go to the enclosing loop end */                 \
 196          *(mark_word_addr) = old | my_bits;                           \
 197        }
 198  #  endif
 199  #endif /* !USE_MARK_BYTES */
 200  
 201  #ifdef ENABLE_TRACE
 202  #  define TRACE(source, cmd)                                     \
 203      if (GC_trace_ptr != NULL && (ptr_t)(source) == GC_trace_ptr) \
 204      cmd
 205  #  define TRACE_TARGET(target, cmd)                          \
 206      if (GC_trace_ptr != NULL && GC_is_heap_ptr(GC_trace_ptr) \
 207          && (target) == *(ptr_t *)GC_trace_ptr)               \
 208      cmd
 209  #else
 210  #  define TRACE(source, cmd)
 211  #  define TRACE_TARGET(source, cmd)
 212  #endif
 213  
 214  /*
 215   * If the mark bit corresponding to `current` is not set, set it, and
 216   * push the contents of the object on the mark stack.  `current` points
 217   * to the beginning of the object.  We rely on the fact that the
 218   * preceding header calculation will succeed for a pointer past the
 219   * first page of an object, only if it is in fact a valid pointer
 220   * to the object.  Thus we can omit the otherwise necessary tests here.
 221   */
 222  GC_INLINE mse *
 223  GC_push_contents_hdr(ptr_t current, mse *mark_stack_top, mse *mark_stack_limit,
 224                       ptr_t source, hdr *hhdr, GC_bool do_offset_check)
 225  {
 226    do {
 227      /*
 228       * Displacement in the block, in bytes; always within range.
 229       * Note, in particular, that this value is the displacement from the
 230       * beginning of the heap block, which may itself be in the interior
 231       * of a large object.  If `current` does not point to the first block,
 232       * then we are in the all-interior-pointers mode, and it is safe to
 233       * use any displacement value.
 234       */
 235      size_t displ = HBLKDISPL(current);
 236      ptr_t base = current;
 237  #ifdef MARK_BIT_PER_OBJ
 238      unsigned32 gran_displ; /*< `high_prod` */
 239      unsigned32 inv_sz = hhdr->hb_inv_sz;
 240  
 241  #else
 242      size_t gran_displ = BYTES_TO_GRANULES(displ);
 243      size_t gran_offset = hhdr->hb_map[gran_displ];
 244      size_t byte_offset = displ & (GC_GRANULE_BYTES - 1);
 245  
 246      /* The following always fails for large block references. */
 247      if (UNLIKELY((gran_offset | byte_offset) != 0))
 248  #endif
 249      {
 250  #ifdef MARK_BIT_PER_OBJ
 251        if (UNLIKELY(inv_sz == LARGE_INV_SZ))
 252  #else
 253        if ((hhdr->hb_flags & LARGE_BLOCK) != 0)
 254  #endif
 255        {
 256          /* `gran_offset` is bogus. */
 257          size_t obj_displ;
 258  
 259          base = (ptr_t)hhdr->hb_block;
 260          obj_displ = (size_t)(current - base);
 261          if (obj_displ != displ) {
 262            GC_ASSERT(obj_displ < hhdr->hb_sz);
 263            /*
 264             * Must be in the all-interior-pointers mode, non-first block
 265             * already did validity check on cache miss.
 266             */
 267          } else if (do_offset_check && !GC_valid_offsets[obj_displ]) {
 268            GC_ADD_TO_BLACK_LIST_NORMAL(current, source);
 269            break;
 270          }
 271          GC_ASSERT(hhdr->hb_sz > HBLKSIZE
 272                    || hhdr->hb_block == HBLKPTR(current));
 273          GC_ASSERT(ADDR_GE(current, (ptr_t)hhdr->hb_block));
 274          gran_displ = 0;
 275        } else {
 276  #ifdef MARK_BIT_PER_OBJ
 277          unsigned32 low_prod;
 278  
 279          LONG_MULT(gran_displ, low_prod, (unsigned32)displ, inv_sz);
 280          if ((low_prod >> 16) != 0)
 281  #endif
 282          {
 283            size_t obj_displ;
 284  
 285  #ifdef MARK_BIT_PER_OBJ
 286            /* Accurate enough if `HBLKSIZE` is not greater than 2**15. */
 287            GC_STATIC_ASSERT(HBLKSIZE <= (1 << 15));
 288            obj_displ = (((low_prod >> 16) + 1) * hhdr->hb_sz) >> 16;
 289  #else
 290            obj_displ = GRANULES_TO_BYTES(gran_offset) + byte_offset;
 291  #endif
 292  
 293            if (do_offset_check && !GC_valid_offsets[obj_displ]) {
 294              GC_ADD_TO_BLACK_LIST_NORMAL(current, source);
 295              break;
 296            }
 297  #ifndef MARK_BIT_PER_OBJ
 298            gran_displ -= gran_offset;
 299  #endif
 300            base -= obj_displ;
 301          }
 302        }
 303      }
 304  #ifdef MARK_BIT_PER_OBJ
 305      /*
 306       * May get here for pointer to start of block not at the beginning
 307       * of object.  If so, it is valid, and we are fine.
 308       */
 309      GC_ASSERT(gran_displ <= HBLK_OBJS(hhdr->hb_sz));
 310  #else
 311      GC_ASSERT(hhdr == GC_find_header(base));
 312      GC_ASSERT(gran_displ % BYTES_TO_GRANULES(hhdr->hb_sz) == 0);
 313  #endif
 314      TRACE(source, GC_log_printf("GC #%lu: passed validity tests\n",
 315                                  (unsigned long)GC_gc_no));
 316      SET_MARK_BIT_EXIT_IF_SET(hhdr, gran_displ); /*< contains `break` */
 317      TRACE(source, GC_log_printf("GC #%lu: previously unmarked\n",
 318                                  (unsigned long)GC_gc_no));
 319      TRACE_TARGET(base, GC_log_printf("GC #%lu: marking %p from %p instead\n",
 320                                       (unsigned long)GC_gc_no, (void *)base,
 321                                       (void *)source));
 322      INCR_MARKS(hhdr);
 323      GC_STORE_BACK_PTR(source, base);
 324      mark_stack_top = GC_push_obj(base, hhdr, mark_stack_top, mark_stack_limit);
 325    } while (0);
 326    return mark_stack_top;
 327  }
 328  
 329  #if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
 330  #  define PUSH_ONE_CHECKED_STACK(p, source) \
 331      GC_mark_and_push_stack(p, (ptr_t)(source))
 332  #else
 333  #  define PUSH_ONE_CHECKED_STACK(p, source) GC_mark_and_push_stack(p)
 334  #endif
 335  
 336  /*
 337   * Push a single value onto mark stack.  Mark from the object
 338   * pointed to by `p`.  The argument should be of `ptr_t` type.
 339   * Invoke `FIXUP_POINTER()` before any further processing.
 340   * p` is considered valid even if it is an interior pointer.
 341   * Previously marked objects are not pushed.  Hence we make progress
 342   * even if the mark stack overflows.
 343   */
 344  #ifdef NEED_FIXUP_POINTER
 345  /* Try both the raw variant and the fixed up one. */
 346  #  define GC_PUSH_ONE_STACK(p, source)                              \
 347      do {                                                            \
 348        ptr_t pp = (p);                                               \
 349                                                                      \
 350        if (ADDR_LT((ptr_t)GC_least_plausible_heap_addr, p)           \
 351            && ADDR_LT(p, (ptr_t)GC_greatest_plausible_heap_addr)) {  \
 352          PUSH_ONE_CHECKED_STACK(p, source);                          \
 353        }                                                             \
 354        FIXUP_POINTER(pp);                                            \
 355        if (ADDR_LT((ptr_t)GC_least_plausible_heap_addr, pp)          \
 356            && ADDR_LT(pp, (ptr_t)GC_greatest_plausible_heap_addr)) { \
 357          PUSH_ONE_CHECKED_STACK(pp, source);                         \
 358        }                                                             \
 359      } while (0)
 360  #else /* !NEED_FIXUP_POINTER */
 361  #  define GC_PUSH_ONE_STACK(p, source)                             \
 362      do {                                                           \
 363        if (ADDR_LT((ptr_t)GC_least_plausible_heap_addr, p)          \
 364            && ADDR_LT(p, (ptr_t)GC_greatest_plausible_heap_addr)) { \
 365          PUSH_ONE_CHECKED_STACK(p, source);                         \
 366        }                                                            \
 367      } while (0)
 368  #endif
 369  
 370  /*
 371   * Same as `GC_PUSH_ONE_STACK`, but the interior pointers recognition as
 372   * for normal heap pointers.
 373   */
 374  #define GC_PUSH_ONE_HEAP(p, source, mark_stack_top)                   \
 375    do {                                                                \
 376      FIXUP_POINTER(p);                                                 \
 377      if (ADDR_LT((ptr_t)GC_least_plausible_heap_addr, p)               \
 378          && ADDR_LT(p, (ptr_t)GC_greatest_plausible_heap_addr))        \
 379        mark_stack_top = GC_mark_and_push(                              \
 380            p, mark_stack_top, GC_mark_stack_limit, (void **)(source)); \
 381    } while (0)
 382  
 383  /*
 384   * Mark objects pointed to by the regions described by mark stack entries
 385   * between `mark_stack` and `mark_stack_top`, inclusive.  Assumes the upper
 386   * limit of a mark stack entry is never `NULL`.  A mark stack entry never
 387   * has zero size.  Return the new value of `mark_stack_top`.
 388   * We try to traverse on the order of a `hblk` of memory before we return.
 389   * Caller is responsible for calling this until the mark stack is empty.
 390   * Note that this is the most performance critical routine in the collector.
 391   * Hence it contains all sorts of ugly hacks to speed things up.
 392   * In particular, we avoid procedure calls on the common path, we take
 393   * advantage of peculiarities of the mark descriptor encoding, we optionally
 394   * maintain a cache for the block address to header mapping, we prefetch
 395   * when an object is "grayed", etc.
 396   */
 397  GC_INNER mse *GC_mark_from(mse *mark_stack_top, mse *mark_stack,
 398                             mse *mark_stack_limit);
 399  
 400  #define MARK_FROM_MARK_STACK()                                       \
 401    GC_mark_stack_top = GC_mark_from(GC_mark_stack_top, GC_mark_stack, \
 402                                     GC_mark_stack + GC_mark_stack_size);
 403  
 404  #define GC_mark_stack_empty() \
 405    ADDR_LT((ptr_t)GC_mark_stack_top, (ptr_t)GC_mark_stack)
 406  
 407  /*
 408   * The current state of marking, as follows.  We say something is dirty
 409   * if it was written since the last time we retrieved dirty bits.
 410   * We say it is grungy if it was marked dirty in the last set of bits
 411   * we retrieved.  Invariant "I": all roots and marked objects `p` are
 412   * either dirty, or point to objects `q` that are either marked or
 413   * a pointer to `q` appears in a range on the mark stack.
 414   */
 415  
 416  /* No marking in progress.  "I" holds.  Mark stack is empty. */
 417  #define MS_NONE 0
 418  
 419  /*
 420   * Rescuing objects are currently being pushed.  "I" holds, except that
 421   * grungy roots may point to unmarked objects, as may marked grungy objects
 422   * above `GC_scan_ptr`.
 423   */
 424  #define MS_PUSH_RESCUERS 1
 425  
 426  /*
 427   * Uncollectible objects are currently being pushed.  "I" holds, except
 428   * that marked uncollectible objects above `GC_scan_ptr` may point to
 429   * unmarked objects.  Roots may point to unmarked objects too.
 430   */
 431  #define MS_PUSH_UNCOLLECTABLE 2
 432  
 433  /* "I" holds, mark stack may be nonempty. */
 434  #define MS_ROOTS_PUSHED 3
 435  
 436  /*
 437   * "I" may not hold, e.g. because of the mark stack overflow.  However,
 438   * marked heap objects below `GC_scan_ptr` point to marked or stacked
 439   * objects.
 440   */
 441  #define MS_PARTIALLY_INVALID 4
 442  
 443  /* "I" may not hold. */
 444  #define MS_INVALID 5
 445  
 446  EXTERN_C_END
 447  
 448  #endif /* GC_PMARK_H */
 449