thread_local_alloc.c raw

   1  /*
   2   * Copyright (c) 2000-2005 by Hewlett-Packard Company.  All rights reserved.
   3   * Copyright (c) 2008-2022 Ivan Maidanski
   4   *
   5   * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
   6   * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
   7   *
   8   * Permission is hereby granted to use or copy this program
   9   * for any purpose, provided the above notices are retained on all copies.
  10   * Permission to modify the code and to distribute modified code is granted,
  11   * provided the above notices are retained, and a notice that the code was
  12   * modified is included with the above copyright notice.
  13   */
  14  
  15  #include "private/gc_priv.h"
  16  
  17  #if defined(THREAD_LOCAL_ALLOC)
  18  
  19  #  if !defined(THREADS) && !defined(CPPCHECK)
  20  #    error Invalid config - THREAD_LOCAL_ALLOC requires GC_THREADS
  21  #  endif
  22  
  23  #  include "private/thread_local_alloc.h"
  24  
  25  #  if defined(USE_COMPILER_TLS)
  26  __thread GC_ATTR_TLS_FAST
  27  #  elif defined(USE_WIN32_COMPILER_TLS)
  28  __declspec(thread) GC_ATTR_TLS_FAST
  29  #  endif
  30      GC_key_t GC_thread_key;
  31  
  32  static GC_bool keys_initialized;
  33  
  34  /*
  35   * Return a single nonempty free list `fl` to the global one pointed to
  36   * by `gfl`.
  37   */
  38  static void
  39  return_single_freelist(void *fl, void **gfl)
  40  {
  41    if (NULL == *gfl) {
  42      *gfl = fl;
  43    } else {
  44      void *q = fl;
  45      void **qptr;
  46  
  47      GC_ASSERT(GC_size(fl) == GC_size(*gfl));
  48      /* Concatenate. */
  49      do {
  50        qptr = &obj_link(q);
  51        q = *qptr;
  52      } while (ADDR(q) >= HBLKSIZE);
  53      GC_ASSERT(NULL == q);
  54      *qptr = *gfl;
  55      *gfl = fl;
  56    }
  57  }
  58  
  59  /*
  60   * Recover the contents of the free-list array `fl` into the global one
  61   * `gfl`.
  62   */
  63  static void
  64  return_freelists(void **fl, void **gfl)
  65  {
  66    int i;
  67  
  68    for (i = 1; i < GC_TINY_FREELISTS; ++i) {
  69      if (ADDR(fl[i]) >= HBLKSIZE) {
  70        return_single_freelist(fl[i], &gfl[i]);
  71      }
  72      /*
  73       * Clear `fl[i]`, since the thread structure may hang around.
  74       * Do it in a way that is likely to trap if we access it.
  75       */
  76      fl[i] = (ptr_t)NUMERIC_TO_VPTR(HBLKSIZE);
  77    }
  78    /* The 0 granule free list really contains 1 granule objects. */
  79    if (ADDR(fl[0]) >= HBLKSIZE
  80  #  ifdef GC_GCJ_SUPPORT
  81        && ADDR(fl[0]) != ERROR_FL
  82  #  endif
  83    ) {
  84      return_single_freelist(fl[0], &gfl[1]);
  85    }
  86  }
  87  
  88  #  ifdef USE_PTHREAD_SPECIFIC
  89  /*
  90   * Re-set the TLS value on thread cleanup to allow thread-local allocations
  91   * to happen in the TLS destructors.  `GC_unregister_my_thread()` (and
  92   * similar routines) will finally set the `GC_thread_key` to `NULL`
  93   * preventing this destructor from being called repeatedly.
  94   */
  95  static void
  96  reset_thread_key(void *v)
  97  {
  98    pthread_setspecific(GC_thread_key, v);
  99  }
 100  #  else
 101  #    define reset_thread_key 0
 102  #  endif
 103  
 104  GC_INNER void
 105  GC_init_thread_local(GC_tlfs p)
 106  {
 107    int kind, j, res;
 108  
 109    GC_ASSERT(I_HOLD_LOCK());
 110    if (UNLIKELY(!keys_initialized)) {
 111  #  ifdef USE_CUSTOM_SPECIFIC
 112      /* Ensure proper alignment of a "pushed" GC symbol. */
 113      GC_ASSERT(ADDR(&GC_thread_key) % ALIGNMENT == 0);
 114  #  endif
 115      res = GC_key_create(&GC_thread_key, reset_thread_key);
 116      if (COVERT_DATAFLOW(res) != 0) {
 117        ABORT("Failed to create key for local allocator");
 118      }
 119      keys_initialized = TRUE;
 120    }
 121    res = GC_setspecific(GC_thread_key, p);
 122    if (COVERT_DATAFLOW(res) != 0) {
 123      ABORT("Failed to set thread specific allocation pointers");
 124    }
 125    for (j = 0; j < GC_TINY_FREELISTS; ++j) {
 126      for (kind = 0; kind < THREAD_FREELISTS_KINDS; ++kind) {
 127        p->_freelists[kind][j] = NUMERIC_TO_VPTR(1);
 128      }
 129  #  ifdef GC_GCJ_SUPPORT
 130      p->gcj_freelists[j] = NUMERIC_TO_VPTR(1);
 131  #  endif
 132    }
 133    /*
 134     * The zero-sized free list is handled like the regular free list, to
 135     * ensure that the explicit deallocation works.  However, an allocation
 136     * of a `gcj` object with the zero size is always an error.
 137     */
 138  #  ifdef GC_GCJ_SUPPORT
 139    p->gcj_freelists[0] = MAKE_CPTR(ERROR_FL);
 140  #  endif
 141  }
 142  
 143  GC_INNER void
 144  GC_destroy_thread_local(GC_tlfs p)
 145  {
 146    int kind;
 147  
 148    GC_ASSERT(I_HOLD_LOCK());
 149    GC_ASSERT(GC_getspecific(GC_thread_key) == p);
 150    /* We currently only do this from the thread itself. */
 151    GC_STATIC_ASSERT(THREAD_FREELISTS_KINDS <= MAXOBJKINDS);
 152    for (kind = 0; kind < THREAD_FREELISTS_KINDS; ++kind) {
 153      if (kind == (int)GC_n_kinds) {
 154        /* The kind is not created. */
 155        break;
 156      }
 157      return_freelists(p->_freelists[kind], GC_obj_kinds[kind].ok_freelist);
 158    }
 159  #  ifdef GC_GCJ_SUPPORT
 160    return_freelists(p->gcj_freelists, (void **)GC_gcjobjfreelist);
 161  #  endif
 162  }
 163  
 164  STATIC void *
 165  GC_get_tlfs(void)
 166  {
 167  #  if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC)
 168    GC_key_t k = GC_thread_key;
 169  
 170    if (UNLIKELY(0 == k)) {
 171      /*
 172       * We have not yet run `GC_init_parallel()`.  That means we also
 173       * are not locking, so `GC_malloc_kind_global()` is fairly cheap.
 174       */
 175      return NULL;
 176    }
 177    return GC_getspecific(k);
 178  #  else
 179    if (UNLIKELY(!keys_initialized))
 180      return NULL;
 181  
 182    return GC_getspecific(GC_thread_key);
 183  #  endif
 184  }
 185  
 186  GC_API GC_ATTR_MALLOC void *GC_CALL
 187  GC_malloc_kind(size_t lb, int kind)
 188  {
 189    size_t lg;
 190    void *tsd;
 191    void *result;
 192  
 193  #  if MAXOBJKINDS > THREAD_FREELISTS_KINDS
 194    if (UNLIKELY(kind >= THREAD_FREELISTS_KINDS))
 195      return GC_malloc_kind_global(lb, kind);
 196  #  endif
 197    tsd = GC_get_tlfs();
 198    if (UNLIKELY(NULL == tsd))
 199      return GC_malloc_kind_global(lb, kind);
 200  
 201    GC_ASSERT(GC_is_initialized);
 202    GC_ASSERT(GC_is_thread_tsd_valid(tsd));
 203    lg = ALLOC_REQUEST_GRANS(lb);
 204  #  if defined(CPPCHECK)
 205  #    define MALLOC_KIND_PTRFREE_INIT (void *)1
 206  #  else
 207  #    define MALLOC_KIND_PTRFREE_INIT NULL
 208  #  endif
 209    GC_FAST_MALLOC_GRANS(result, lg, ((GC_tlfs)tsd)->_freelists[kind],
 210                         DIRECT_GRANULES, kind, GC_malloc_kind_global(lb, kind),
 211                         (void)(kind == PTRFREE ? MALLOC_KIND_PTRFREE_INIT
 212                                                : (obj_link(result) = 0)));
 213  #  ifdef LOG_ALLOCS
 214    GC_log_printf("GC_malloc_kind(%lu, %d) returned %p, recent GC #%lu\n",
 215                  (unsigned long)lb, kind, result, (unsigned long)GC_gc_no);
 216  #  endif
 217    return result;
 218  }
 219  
 220  #  ifdef GC_GCJ_SUPPORT
 221  
 222  #    include "gc/gc_gcj.h"
 223  
 224  GC_API GC_ATTR_MALLOC void *GC_CALL
 225  GC_gcj_malloc(size_t lb, const void *vtable_ptr)
 226  {
 227    void *result;
 228    void **tiny_fl;
 229    size_t lg;
 230  
 231    /*
 232     * Unlike the other thread-local allocation calls, we assume that the
 233     * collector has been explicitly initialized.
 234     */
 235    GC_ASSERT(GC_gcjobjfreelist != NULL);
 236  #    if defined(USE_PTHREAD_SPECIFIC) || defined(USE_WIN32_SPECIFIC)
 237    GC_ASSERT(keys_initialized);
 238  #    else
 239    GC_ASSERT(GC_thread_key != 0);
 240  #    endif
 241  
 242    /*
 243     * `gcj`-style allocation without locks is extremely tricky.
 244     * The fundamental issue is that we may end up marking a free list,
 245     * which has free-list links instead of "vtable" pointers.
 246     * That is usually OK, since the next object on the free list will be
 247     * cleared, and will thus be interpreted as containing a zero descriptor.
 248     * That is fine if the object has not yet been initialized.  But there
 249     * are interesting potential races.  In the case of incremental
 250     * collection, this seems hopeless, since the marker may run
 251     * asynchronously, and may pick up the pointer to the next free-list
 252     * entry (which it thinks is a "vtable" pointer), get suspended for
 253     * a while, and then see an allocated object instead of the "vtable".
 254     * This may be avoidable with either a handshake with the collector or,
 255     * probably more easily, by moving the free list links to the second
 256     * "pointer-sized" word of each object.  The latter is not a universal
 257     * win, since on architecture like Itanium, nonzero offsets are not
 258     * necessarily free.  And there may be cache fill order issues.
 259     * For now, we punt with the incremental collection.  This probably means
 260     * that the incremental collection should be enabled before we create
 261     * a second thread.
 262     */
 263    if (UNLIKELY(GC_incremental))
 264      return GC_core_gcj_malloc(lb, vtable_ptr, 0 /* `flags` */);
 265  
 266    tiny_fl = ((GC_tlfs)GC_getspecific(GC_thread_key))->gcj_freelists;
 267    lg = ALLOC_REQUEST_GRANS(lb);
 268  
 269    /*
 270     * The provided `default_expr` below forces the initialization of the
 271     * "vtable" pointer.  This is necessary to ensure some very subtle
 272     * properties required if a garbage collection is run in the middle of
 273     * such an allocation.  Here we implicitly also assume atomicity for the
 274     * free list and method pointer assignments.  We must update the free list
 275     * before we store the pointer.  Otherwise a collection at this point
 276     * would see a corrupted free list.  A real memory barrier is not needed,
 277     * since the action of stopping this thread will cause prior writes
 278     * to complete.  We assert that any concurrent marker will stop us.
 279     * Thus it is impossible for a mark procedure to see the allocation of the
 280     * next object, but to see this object still containing a free-list pointer.
 281     * Otherwise the marker, by misinterpreting the free-list link as a "vtable"
 282     * pointer, might find a random "mark descriptor" in the next object.
 283     */
 284    GC_FAST_MALLOC_GRANS(
 285        result, lg, tiny_fl, DIRECT_GRANULES, GC_gcj_kind,
 286        GC_core_gcj_malloc(lb, vtable_ptr, 0 /* `flags` */), do {
 287          AO_compiler_barrier();
 288          *(const void **)result = vtable_ptr;
 289        } while (0));
 290    return result;
 291  }
 292  
 293  #  endif /* GC_GCJ_SUPPORT */
 294  
 295  GC_INNER void
 296  GC_mark_thread_local_fls_for(GC_tlfs p)
 297  {
 298    ptr_t q;
 299    int kind, j;
 300  
 301    for (j = 0; j < GC_TINY_FREELISTS; ++j) {
 302      for (kind = 0; kind < THREAD_FREELISTS_KINDS; ++kind) {
 303        /*
 304         * Load the pointer atomically as it might be updated concurrently
 305         * by `GC_FAST_MALLOC_GRANS()`.
 306         */
 307        q = GC_cptr_load((volatile ptr_t *)&p->_freelists[kind][j]);
 308        if (ADDR(q) > HBLKSIZE)
 309          GC_set_fl_marks(q);
 310      }
 311  #  ifdef GC_GCJ_SUPPORT
 312      if (LIKELY(j > 0)) {
 313        q = GC_cptr_load((volatile ptr_t *)&p->gcj_freelists[j]);
 314        if (ADDR(q) > HBLKSIZE)
 315          GC_set_fl_marks(q);
 316      }
 317  #  endif
 318    }
 319  }
 320  
 321  #  if defined(GC_ASSERTIONS)
 322  /* Check that all thread-local free-lists in `p` are completely marked. */
 323  void
 324  GC_check_tls_for(GC_tlfs p)
 325  {
 326    int kind, j;
 327  
 328    for (j = 1; j < GC_TINY_FREELISTS; ++j) {
 329      for (kind = 0; kind < THREAD_FREELISTS_KINDS; ++kind) {
 330        GC_check_fl_marks(&p->_freelists[kind][j]);
 331      }
 332  #    ifdef GC_GCJ_SUPPORT
 333      GC_check_fl_marks(&p->gcj_freelists[j]);
 334  #    endif
 335    }
 336  }
 337  #  endif
 338  
 339  #endif /* THREAD_LOCAL_ALLOC */
 340