pthread_support.c raw

   1  /*
   2   * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
   3   * Copyright (c) 1996 by Silicon Graphics.  All rights reserved.
   4   * Copyright (c) 1998 by Fergus Henderson.  All rights reserved.
   5   * Copyright (c) 2000-2008 by Hewlett-Packard Company.  All rights reserved.
   6   * Copyright (c) 2008-2022 Ivan Maidanski
   7   *
   8   * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
   9   * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
  10   *
  11   * Permission is hereby granted to use or copy this program
  12   * for any purpose, provided the above notices are retained on all copies.
  13   * Permission to modify the code and to distribute modified code is granted,
  14   * provided the above notices are retained, and a notice that the code was
  15   * modified is included with the above copyright notice.
  16   */
  17  
  18  #include "private/pthread_support.h"
  19  
  20  /*
  21   * Support code originally for LinuxThreads, the `clone`-based kernel
  22   * thread package for Linux which is included in `libc6`.
  23   *
  24   * This code no doubt makes some assumptions beyond what is guaranteed by
  25   * the `pthreads` standard, though it now does very little of that.
  26   * It now also supports NPTL, and many other POSIX thread implementations.
  27   * We are trying to merge all flavors of `pthreads` support code into this
  28   * file.
  29   */
  30  
  31  #ifdef THREADS
  32  
  33  #  ifdef GC_PTHREADS
  34  #    if defined(DARWIN) \
  35          || (defined(GC_WIN32_THREADS) && defined(EMULATE_PTHREAD_SEMAPHORE))
  36  #      include "private/darwin_semaphore.h"
  37  #    elif !defined(PLATFORM_THREADS) && !defined(SN_TARGET_PSP2)
  38  #      include <semaphore.h>
  39  #    endif
  40  #    include <errno.h>
  41  #  endif /* GC_PTHREADS */
  42  
  43  #  if !defined(GC_WIN32_THREADS)
  44  #    include <sched.h>
  45  #    include <time.h>
  46  #    if !defined(PLATFORM_THREADS) && !defined(SN_TARGET_PSP2)
  47  #      ifndef RTEMS
  48  #        include <sys/mman.h>
  49  #      endif
  50  #      include <fcntl.h>
  51  #      include <sys/stat.h>
  52  #      include <sys/time.h>
  53  #    endif
  54  #    if defined(GC_EXPLICIT_SIGNALS_UNBLOCK) \
  55          || !defined(GC_NO_PTHREAD_SIGMASK)   \
  56          || (defined(GC_PTHREADS_PARAMARK)    \
  57              && !defined(NO_MARKER_SPECIAL_SIGMASK))
  58  #      include <signal.h>
  59  #    endif
  60  #  endif /* !GC_WIN32_THREADS */
  61  
  62  #  ifdef E2K
  63  #    include <alloca.h>
  64  #  endif
  65  
  66  #  if defined(DARWIN) || defined(ANY_BSD)
  67  #    if defined(NETBSD) || defined(OPENBSD)
  68  #      include <sys/param.h>
  69  #    endif
  70  #    include <sys/sysctl.h>
  71  #  elif defined(DGUX)
  72  #    include <sys/_int_psem.h>
  73  #    include <sys/dg_sys_info.h>
  74  /* Note: `sem_t` is `uint` in DG/UX. */
  75  typedef unsigned int sem_t;
  76  #  endif
  77  
  78  #  if defined(GC_PTHREADS) && !defined(PLATFORM_THREADS) \
  79        && !defined(SN_TARGET_PSP2)
  80  /* Undefine macros used to redirect `pthreads` primitives. */
  81  #    undef pthread_create
  82  #    ifndef GC_NO_PTHREAD_SIGMASK
  83  #      undef pthread_sigmask
  84  #    endif
  85  #    ifndef GC_NO_PTHREAD_CANCEL
  86  #      undef pthread_cancel
  87  #    endif
  88  #    ifdef GC_HAVE_PTHREAD_EXIT
  89  #      undef pthread_exit
  90  #    endif
  91  #    undef pthread_join
  92  #    undef pthread_detach
  93  #    if defined(OSF1) && defined(_PTHREAD_USE_MANGLED_NAMES_) \
  94          && !defined(_PTHREAD_USE_PTDNAM_)
  95  /* Restore the original mangled names on Tru64 UNIX. */
  96  #      define pthread_create __pthread_create
  97  #      define pthread_join __pthread_join
  98  #      define pthread_detach __pthread_detach
  99  #      ifndef GC_NO_PTHREAD_CANCEL
 100  #        define pthread_cancel __pthread_cancel
 101  #      endif
 102  #      ifdef GC_HAVE_PTHREAD_EXIT
 103  #        define pthread_exit __pthread_exit
 104  #      endif
 105  #    endif
 106  #  endif /* GC_PTHREADS */
 107  
 108  #  if !defined(GC_WIN32_THREADS) && !defined(PLATFORM_THREADS) \
 109        && !defined(SN_TARGET_PSP2)
 110  /* TODO: Enable `GC_USE_DLOPEN_WRAP` for Cygwin? */
 111  
 112  #    ifdef GC_USE_LD_WRAP
 113  #      define WRAP_FUNC(f) __wrap_##f
 114  #      define REAL_FUNC(f) __real_##f
 115  int REAL_FUNC(pthread_create)(pthread_t *,
 116                                GC_PTHREAD_CREATE_CONST pthread_attr_t *,
 117                                void *(*start_routine)(void *), void *);
 118  int REAL_FUNC(pthread_join)(pthread_t, void **);
 119  int REAL_FUNC(pthread_detach)(pthread_t);
 120  #      ifndef GC_NO_PTHREAD_SIGMASK
 121  int REAL_FUNC(pthread_sigmask)(int, const sigset_t *, sigset_t *);
 122  #      endif
 123  #      ifndef GC_NO_PTHREAD_CANCEL
 124  int REAL_FUNC(pthread_cancel)(pthread_t);
 125  #      endif
 126  #      ifdef GC_HAVE_PTHREAD_EXIT
 127  void REAL_FUNC(pthread_exit)(void *) GC_PTHREAD_EXIT_ATTRIBUTE;
 128  #      endif
 129  #    elif defined(GC_USE_DLOPEN_WRAP)
 130  #      include <dlfcn.h>
 131  #      define WRAP_FUNC(f) f
 132  #      define REAL_FUNC(f) GC_real_##f
 133  /*
 134   * We define both `GC_<fn>` and plain `fn` to be the wrapped function.
 135   * In that way plain calls work, as do calls from files that include
 136   * `gc.h` file which redefines `fn` to `GC_<fn>`.
 137   */
 138  /* FIXME: Needs work for `DARWIN` and True64 (`OSF1`). */
 139  typedef int (*GC_pthread_create_t)(pthread_t *,
 140                                     GC_PTHREAD_CREATE_CONST pthread_attr_t *,
 141                                     void *(*)(void *), void *);
 142  static GC_pthread_create_t REAL_FUNC(pthread_create);
 143  #      ifndef GC_NO_PTHREAD_SIGMASK
 144  typedef int (*GC_pthread_sigmask_t)(int, const sigset_t *, sigset_t *);
 145  static GC_pthread_sigmask_t REAL_FUNC(pthread_sigmask);
 146  #      endif
 147  typedef int (*GC_pthread_join_t)(pthread_t, void **);
 148  static GC_pthread_join_t REAL_FUNC(pthread_join);
 149  typedef int (*GC_pthread_detach_t)(pthread_t);
 150  static GC_pthread_detach_t REAL_FUNC(pthread_detach);
 151  #      ifndef GC_NO_PTHREAD_CANCEL
 152  typedef int (*GC_pthread_cancel_t)(pthread_t);
 153  static GC_pthread_cancel_t REAL_FUNC(pthread_cancel);
 154  #      endif
 155  #      ifdef GC_HAVE_PTHREAD_EXIT
 156  typedef void (*GC_pthread_exit_t)(void *) GC_PTHREAD_EXIT_ATTRIBUTE;
 157  static GC_pthread_exit_t REAL_FUNC(pthread_exit);
 158  #      endif
 159  #    else
 160  #      define WRAP_FUNC(f) GC_##f
 161  #      ifdef DGUX
 162  #        define REAL_FUNC(f) __d10_##f
 163  #      else
 164  #        define REAL_FUNC(f) f
 165  #      endif
 166  #    endif /* !GC_USE_LD_WRAP && !GC_USE_DLOPEN_WRAP */
 167  
 168  /*
 169   * Define `GC_` functions as aliases for the plain ones, which will
 170   * be intercepted.  This allows files that include `gc.h` file, and
 171   * hence generate references to the `GC_` symbols, to see the right ones.
 172   */
 173  #    if defined(GC_USE_LD_WRAP) || defined(GC_USE_DLOPEN_WRAP)
 174  
 175  GC_API int
 176  GC_pthread_create(pthread_t *t, GC_PTHREAD_CREATE_CONST pthread_attr_t *a,
 177                    void *(*fn)(void *), void *arg)
 178  {
 179    return pthread_create(t, a, fn, arg);
 180  }
 181  
 182  #      ifndef GC_NO_PTHREAD_SIGMASK
 183  GC_API int
 184  GC_pthread_sigmask(int how, const sigset_t *mask, sigset_t *old)
 185  {
 186    return pthread_sigmask(how, mask, old);
 187  }
 188  #      endif /* !GC_NO_PTHREAD_SIGMASK */
 189  
 190  GC_API int
 191  GC_pthread_join(pthread_t t, void **res)
 192  {
 193    return pthread_join(t, res);
 194  }
 195  
 196  GC_API int
 197  GC_pthread_detach(pthread_t t)
 198  {
 199    return pthread_detach(t);
 200  }
 201  
 202  #      ifndef GC_NO_PTHREAD_CANCEL
 203  GC_API int
 204  GC_pthread_cancel(pthread_t t)
 205  {
 206    return pthread_cancel(t);
 207  }
 208  #      endif /* !GC_NO_PTHREAD_CANCEL */
 209  
 210  #      ifdef GC_HAVE_PTHREAD_EXIT
 211  GC_API GC_PTHREAD_EXIT_ATTRIBUTE void
 212  GC_pthread_exit(void *retval)
 213  {
 214    pthread_exit(retval);
 215  }
 216  #      endif
 217  #    endif /* GC_USE_LD_WRAP || GC_USE_DLOPEN_WRAP */
 218  
 219  #    ifdef GC_USE_DLOPEN_WRAP
 220  STATIC GC_bool GC_syms_initialized = FALSE;
 221  
 222  /*
 223   * Resolve a symbol with name `n` from the dynamic library (given by
 224   * handle `h`) and cast it to the given functional type `fn`.
 225   */
 226  #      define TYPED_DLSYM(fn, h, n) CAST_THRU_UINTPTR(fn, dlsym(h, n))
 227  
 228  STATIC void
 229  GC_init_real_syms(void)
 230  {
 231    void *dl_handle;
 232  
 233    GC_ASSERT(!GC_syms_initialized);
 234  #      ifdef RTLD_NEXT
 235    dl_handle = RTLD_NEXT;
 236  #      else
 237    dl_handle = dlopen("libpthread.so.0", RTLD_LAZY);
 238    if (NULL == dl_handle) {
 239      /* Retry without ".0" suffix. */
 240      dl_handle = dlopen("libpthread.so", RTLD_LAZY);
 241      if (NULL == dl_handle)
 242        ABORT("Couldn't open libpthread");
 243    }
 244  #      endif
 245    REAL_FUNC(pthread_create)
 246        = TYPED_DLSYM(GC_pthread_create_t, dl_handle, "pthread_create");
 247  #      ifdef RTLD_NEXT
 248    if (REAL_FUNC(pthread_create) == 0)
 249      ABORT("pthread_create not found"
 250            " (probably -lgc is specified after -lpthread)");
 251  #      endif
 252  #      ifndef GC_NO_PTHREAD_SIGMASK
 253    REAL_FUNC(pthread_sigmask)
 254        = TYPED_DLSYM(GC_pthread_sigmask_t, dl_handle, "pthread_sigmask");
 255  #      endif
 256    REAL_FUNC(pthread_join)
 257        = TYPED_DLSYM(GC_pthread_join_t, dl_handle, "pthread_join");
 258    REAL_FUNC(pthread_detach)
 259        = TYPED_DLSYM(GC_pthread_detach_t, dl_handle, "pthread_detach");
 260  #      ifndef GC_NO_PTHREAD_CANCEL
 261    REAL_FUNC(pthread_cancel)
 262        = TYPED_DLSYM(GC_pthread_cancel_t, dl_handle, "pthread_cancel");
 263  #      endif
 264  #      ifdef GC_HAVE_PTHREAD_EXIT
 265    REAL_FUNC(pthread_exit)
 266        = TYPED_DLSYM(GC_pthread_exit_t, dl_handle, "pthread_exit");
 267  #      endif
 268    GC_syms_initialized = TRUE;
 269  }
 270  
 271  #      define INIT_REAL_SYMS()             \
 272          if (LIKELY(GC_syms_initialized)) { \
 273          } else                             \
 274            GC_init_real_syms()
 275  #    else
 276  #      define INIT_REAL_SYMS() (void)0
 277  #    endif /* !GC_USE_DLOPEN_WRAP */
 278  
 279  #  else
 280  #    define WRAP_FUNC(f) GC_##f
 281  #    define REAL_FUNC(f) f
 282  #    define INIT_REAL_SYMS() (void)0
 283  #  endif /* GC_WIN32_THREADS */
 284  
 285  #  if defined(MPROTECT_VDB) && defined(DARWIN)
 286  GC_INNER int
 287  GC_inner_pthread_create(pthread_t *t,
 288                          GC_PTHREAD_CREATE_CONST pthread_attr_t *a,
 289                          void *(*fn)(void *), void *arg)
 290  {
 291    INIT_REAL_SYMS();
 292    return REAL_FUNC(pthread_create)(t, a, fn, arg);
 293  }
 294  #  endif
 295  
 296  #  ifndef GC_ALWAYS_MULTITHREADED
 297  GC_INNER GC_bool GC_need_to_lock = FALSE;
 298  #  endif
 299  
 300  #  ifdef THREAD_LOCAL_ALLOC
 301  
 302  GC_INNER void
 303  GC_mark_thread_local_free_lists(void)
 304  {
 305    int i;
 306    GC_thread p;
 307  
 308    for (i = 0; i < THREAD_TABLE_SZ; ++i) {
 309      for (p = GC_threads[i]; p != NULL; p = p->tm.next) {
 310        if (!KNOWN_FINISHED(p))
 311          GC_mark_thread_local_fls_for(&p->tlfs);
 312      }
 313    }
 314  }
 315  
 316  #    if defined(GC_ASSERTIONS)
 317  /*
 318   * Check that all thread-local free-lists are completely marked.
 319   * Also check that thread-specific-data structures are marked.
 320   */
 321  void
 322  GC_check_tls(void)
 323  {
 324    int i;
 325    GC_thread p;
 326  
 327    for (i = 0; i < THREAD_TABLE_SZ; ++i) {
 328      for (p = GC_threads[i]; p != NULL; p = p->tm.next) {
 329        if (!KNOWN_FINISHED(p))
 330          GC_check_tls_for(&p->tlfs);
 331      }
 332    }
 333  #      if defined(USE_CUSTOM_SPECIFIC)
 334    if (GC_thread_key != 0)
 335      GC_check_tsd_marks(GC_thread_key);
 336  #      endif
 337  }
 338  #    endif
 339  
 340  #  endif /* THREAD_LOCAL_ALLOC */
 341  
 342  #  ifdef GC_WIN32_THREADS
 343  /*
 344   * A macro for functions and variables that should be accessible
 345   * from `win32_threads.c` file but otherwise could be `static`.
 346   */
 347  #    define GC_INNER_WIN32THREAD GC_INNER
 348  #  else
 349  #    define GC_INNER_WIN32THREAD STATIC
 350  #  endif
 351  
 352  #  ifdef PARALLEL_MARK
 353  
 354  #    if defined(GC_WIN32_THREADS) || defined(USE_PROC_FOR_LIBRARIES) \
 355          || (defined(IA64)                                            \
 356              && (defined(HAVE_PTHREAD_ATTR_GET_NP)                    \
 357                  || defined(HAVE_PTHREAD_GETATTR_NP)))
 358  GC_INNER_WIN32THREAD ptr_t GC_marker_sp[MAX_MARKERS - 1] = { 0 };
 359  #    endif /* GC_WIN32_THREADS || USE_PROC_FOR_LIBRARIES */
 360  
 361  #    if defined(IA64) && defined(USE_PROC_FOR_LIBRARIES)
 362  static ptr_t marker_bsp[MAX_MARKERS - 1] = { 0 };
 363  #    endif
 364  
 365  #    if defined(DARWIN) && !defined(GC_NO_THREADS_DISCOVERY)
 366  static mach_port_t marker_mach_threads[MAX_MARKERS - 1] = { 0 };
 367  
 368  GC_INNER GC_bool
 369  GC_is_mach_marker(thread_act_t thread)
 370  {
 371    int i;
 372    for (i = 0; i < GC_markers_m1; i++) {
 373      if (marker_mach_threads[i] == thread)
 374        return TRUE;
 375    }
 376    return FALSE;
 377  }
 378  #    endif /* DARWIN && !GC_NO_THREADS_DISCOVERY */
 379  
 380  #    ifdef HAVE_PTHREAD_SETNAME_NP_WITH_TID_AND_ARG
 381  /* For NetBSD. */
 382  static void
 383  set_marker_thread_name(unsigned id)
 384  {
 385    int err = pthread_setname_np(pthread_self(), "GC-marker-%zu",
 386                                 NUMERIC_TO_VPTR(id));
 387    if (UNLIKELY(err != 0))
 388      WARN("pthread_setname_np failed, errno= %" WARN_PRIdPTR "\n",
 389           (GC_signed_word)err);
 390  }
 391  
 392  #    elif defined(HAVE_PTHREAD_SETNAME_NP_WITH_TID)     \
 393          || defined(HAVE_PTHREAD_SETNAME_NP_WITHOUT_TID) \
 394          || defined(HAVE_PTHREAD_SET_NAME_NP)
 395  #      ifdef HAVE_PTHREAD_SET_NAME_NP
 396  #        include <pthread_np.h>
 397  #      endif
 398  static void
 399  set_marker_thread_name(unsigned id)
 400  {
 401    /*
 402     * Note: a smaller size of the buffer may result in
 403     * "output may be truncated" compiler warning.
 404     */
 405    char name_buf[10 + 20 + 1];
 406  
 407    GC_snprintf_s_ld_s(name_buf, sizeof(name_buf), "GC-marker-", (long)id, "");
 408  #      ifdef HAVE_PTHREAD_SETNAME_NP_WITHOUT_TID
 409    /* The iOS or OS X case. */
 410    (void)pthread_setname_np(name_buf);
 411  #      elif defined(HAVE_PTHREAD_SET_NAME_NP)
 412    /* The OpenBSD case. */
 413    pthread_set_name_np(pthread_self(), name_buf);
 414  #      else
 415    /* The case of Linux, Solaris, etc. */
 416    GC_ASSERT(strlen(name_buf) < 16);
 417    /* `pthread_setname_np()` may fail for longer names. */
 418    if (UNLIKELY(pthread_setname_np(pthread_self(), name_buf) != 0))
 419      WARN("pthread_setname_np failed\n", 0);
 420  #      endif
 421  }
 422  
 423  #    elif defined(GC_WIN32_THREADS) && !defined(MSWINCE)
 424  /*
 425   * A pointer to `SetThreadDescription()` which is available since Windows 10.
 426   * The function prototype is in the platform `processthreadsapi.h` file.
 427   */
 428  static FARPROC setThreadDescription_fn;
 429  
 430  GC_INNER void
 431  GC_init_win32_thread_naming(HMODULE hK32)
 432  {
 433    if (hK32)
 434      setThreadDescription_fn = GetProcAddress(hK32, "SetThreadDescription");
 435  }
 436  
 437  static void
 438  set_marker_thread_name(unsigned id)
 439  {
 440    WCHAR name_buf[16];
 441    int len = sizeof(L"GC-marker-") / sizeof(WCHAR) - 1;
 442    HRESULT hr;
 443  
 444    if (!setThreadDescription_fn) {
 445      /* `SetThreadDescription()` is missing. */
 446      return;
 447    }
 448  
 449    /* Compose the name manually as `swprintf` may be unavailable. */
 450    BCOPY(L"GC-marker-", name_buf, len * sizeof(WCHAR));
 451    if (id >= 10)
 452      name_buf[len++] = (WCHAR)('0' + (id / 10) % 10);
 453    name_buf[len] = (WCHAR)('0' + id % 10);
 454    name_buf[len + 1] = 0;
 455  
 456    /*
 457     * Invoke `SetThreadDescription()`.  Cast the function pointer to
 458     * `GC_funcptr_uint` first to avoid "incompatible function types"
 459     * compiler warning.
 460     */
 461    hr = (*(HRESULT(WINAPI *)(HANDLE, const WCHAR *))(
 462        GC_funcptr_uint)setThreadDescription_fn)(GetCurrentThread(), name_buf);
 463    if (hr < 0)
 464      WARN("SetThreadDescription failed\n", 0);
 465  }
 466  #    else
 467  #      define set_marker_thread_name(id) (void)(id)
 468  #    endif
 469  
 470  GC_INNER_WIN32THREAD
 471  #    ifdef GC_PTHREADS_PARAMARK
 472  void *
 473  GC_mark_thread(void *id)
 474  #    elif defined(MSWINCE)
 475  DWORD WINAPI
 476  GC_mark_thread(LPVOID id)
 477  #    else
 478  unsigned __stdcall GC_mark_thread(void *id)
 479  #    endif
 480  {
 481    word my_mark_no = 0;
 482    word id_n = (word)(GC_uintptr_t)id;
 483    IF_CANCEL(int cancel_state;)
 484  
 485    if (id_n == GC_WORD_MAX)
 486      return 0; /*< to prevent a compiler warning */
 487  
 488    /*
 489     * Mark threads are not cancellable; they should be invisible to
 490     * client.
 491     */
 492    DISABLE_CANCEL(cancel_state);
 493  
 494    set_marker_thread_name((unsigned)id_n);
 495  #    if defined(GC_WIN32_THREADS) || defined(USE_PROC_FOR_LIBRARIES) \
 496          || (defined(IA64)                                            \
 497              && (defined(HAVE_PTHREAD_ATTR_GET_NP)                    \
 498                  || defined(HAVE_PTHREAD_GETATTR_NP)))
 499    GC_marker_sp[id_n] = GC_approx_sp();
 500  #    endif
 501  #    if defined(IA64) && defined(USE_PROC_FOR_LIBRARIES)
 502    marker_bsp[id_n] = GC_save_regs_in_stack();
 503  #    endif
 504  #    if defined(DARWIN) && !defined(GC_NO_THREADS_DISCOVERY)
 505    marker_mach_threads[id_n] = mach_thread_self();
 506  #    endif
 507  #    if !defined(GC_PTHREADS_PARAMARK)
 508    GC_marker_Id[id_n] = thread_id_self();
 509  #    endif
 510  
 511    /* Inform `GC_start_mark_threads` about completion of marker data init. */
 512    GC_acquire_mark_lock();
 513    /* Note: the count variable may have a negative value. */
 514    if (0 == --GC_fl_builder_count)
 515      GC_notify_all_builder();
 516  
 517    /*
 518     * `GC_mark_no` is passed only to allow `GC_help_marker` to
 519     * terminate promptly.  This is important if it were called from the
 520     * signal handler or from the allocator lock acquisition code.
 521     * On Linux, it is not safe to call it from a signal handler, since
 522     * it uses mutex and condition variables.  Since it is called only
 523     * here, the argument is unnecessary.
 524     */
 525    for (;; ++my_mark_no) {
 526      if (my_mark_no - GC_mark_no > (word)2) {
 527        /* Resynchronize if we get far off, e.g. because `GC_mark_no` wrapped. */
 528        my_mark_no = GC_mark_no;
 529      }
 530  #    ifdef DEBUG_THREADS
 531      GC_log_printf("Starting helper for mark number %lu (thread %u)\n",
 532                    (unsigned long)my_mark_no, (unsigned)id_n);
 533  #    endif
 534      GC_help_marker(my_mark_no);
 535    }
 536  }
 537  
 538  GC_INNER_WIN32THREAD int GC_available_markers_m1 = 0;
 539  
 540  #  endif /* PARALLEL_MARK */
 541  
 542  #  ifdef GC_PTHREADS_PARAMARK
 543  
 544  #    ifdef GLIBC_2_1_MUTEX_HACK
 545  /*
 546   * Ugly workaround for a Linux threads bug in the final versions
 547   * of `glibc` 2.1.  `pthread_mutex_trylock` sets the mutex owner
 548   * field even when it fails to acquire the mutex.  This causes
 549   * `pthread_cond_wait` to die.  Should not be needed for `glibc` 2.2.
 550   * According to the man page, we should use
 551   * `PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP`, but that is not actually
 552   * defined.
 553   */
 554  static pthread_mutex_t mark_mutex
 555      = { 0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, { 0, 0 } };
 556  #    else
 557  static pthread_mutex_t mark_mutex = PTHREAD_MUTEX_INITIALIZER;
 558  #    endif
 559  
 560  #    ifdef CAN_HANDLE_FORK
 561  /* Note: this is initialized by `GC_start_mark_threads_inner()`. */
 562  static pthread_cond_t mark_cv;
 563  #    else
 564  static pthread_cond_t mark_cv = PTHREAD_COND_INITIALIZER;
 565  #    endif
 566  
 567  GC_INNER void
 568  GC_start_mark_threads_inner(void)
 569  {
 570    int i;
 571    pthread_attr_t attr;
 572  #    ifndef NO_MARKER_SPECIAL_SIGMASK
 573    sigset_t set, oldset;
 574  #    endif
 575  
 576    GC_ASSERT(I_HOLD_LOCK());
 577    ASSERT_CANCEL_DISABLED();
 578    if (GC_available_markers_m1 <= 0 || GC_parallel) {
 579      /* Skip if parallel markers disabled or already started. */
 580      return;
 581    }
 582    GC_wait_for_gc_completion(TRUE);
 583  
 584  #    ifdef CAN_HANDLE_FORK
 585    /*
 586     * Initialize `mark_cv` (for the first time), or cleanup its value
 587     * after forking in the child process.  All the marker threads in the
 588     * parent process were blocked on this variable at process fork, so
 589     * `pthread_cond_wait()` malfunction (hang) is possible in the child
 590     * process without such a cleanup.
 591     */
 592  
 593    /*
 594     * TODO: This is not portable, it is better to shortly unblock all
 595     * marker threads in the parent process at `fork`.
 596     */
 597    {
 598      pthread_cond_t mark_cv_local = PTHREAD_COND_INITIALIZER;
 599      BCOPY(&mark_cv_local, &mark_cv, sizeof(mark_cv));
 600    }
 601  #    endif
 602  
 603    GC_ASSERT(0 == GC_fl_builder_count);
 604    INIT_REAL_SYMS(); /*< for `pthread_create` */
 605  
 606    if (pthread_attr_init(&attr) != 0)
 607      ABORT("pthread_attr_init failed");
 608    if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 609      ABORT("pthread_attr_setdetachstate failed");
 610  
 611  #    ifdef DEFAULT_STACK_MAYBE_SMALL
 612    /*
 613     * The default stack size is usually too small: increase it.
 614     * Otherwise marker threads may run out of space.
 615     */
 616    {
 617      size_t old_size;
 618  
 619      if (pthread_attr_getstacksize(&attr, &old_size) != 0)
 620        ABORT("pthread_attr_getstacksize failed");
 621      if (old_size < MIN_STACK_SIZE && old_size != 0 /* stack size is known */) {
 622        if (pthread_attr_setstacksize(&attr, MIN_STACK_SIZE) != 0)
 623          ABORT("pthread_attr_setstacksize failed");
 624      }
 625    }
 626  #    endif /* DEFAULT_STACK_MAYBE_SMALL */
 627  
 628  #    ifndef NO_MARKER_SPECIAL_SIGMASK
 629    /*
 630     * Apply special signal mask to GC marker threads, and do not drop
 631     * user-defined signals by the marker threads.
 632     */
 633    if (sigfillset(&set) != 0)
 634      ABORT("sigfillset failed");
 635  
 636  #      ifdef SIGNAL_BASED_STOP_WORLD
 637    /* These are used by GC to stop and restart the world. */
 638    if (sigdelset(&set, GC_get_suspend_signal()) != 0
 639        || sigdelset(&set, GC_get_thr_restart_signal()) != 0)
 640      ABORT("sigdelset failed");
 641  #      endif
 642  
 643    if (UNLIKELY(REAL_FUNC(pthread_sigmask)(SIG_BLOCK, &set, &oldset) != 0)) {
 644      WARN("pthread_sigmask set failed, no markers started\n", 0);
 645      GC_markers_m1 = 0;
 646      (void)pthread_attr_destroy(&attr);
 647      return;
 648    }
 649  #    endif /* !NO_MARKER_SPECIAL_SIGMASK */
 650  
 651    /* To have proper `GC_parallel` value in `GC_help_marker()`. */
 652    GC_markers_m1 = GC_available_markers_m1;
 653  
 654    for (i = 0; i < GC_available_markers_m1; ++i) {
 655      pthread_t new_thread;
 656  
 657  #    ifdef GC_WIN32_THREADS
 658      GC_marker_last_stack_min[i] = ADDR_LIMIT;
 659  #    endif
 660      if (UNLIKELY(REAL_FUNC(pthread_create)(&new_thread, &attr, GC_mark_thread,
 661                                             NUMERIC_TO_VPTR(i))
 662                   != 0)) {
 663        WARN("Marker thread %" WARN_PRIdPTR " creation failed\n",
 664             (GC_signed_word)i);
 665        /* Do not try to create other marker threads. */
 666        GC_markers_m1 = i;
 667        break;
 668      }
 669    }
 670  
 671  #    ifndef NO_MARKER_SPECIAL_SIGMASK
 672    /* Restore previous signal mask. */
 673    if (UNLIKELY(REAL_FUNC(pthread_sigmask)(SIG_SETMASK, &oldset, NULL) != 0)) {
 674      WARN("pthread_sigmask restore failed\n", 0);
 675    }
 676  #    endif
 677  
 678    (void)pthread_attr_destroy(&attr);
 679    GC_wait_for_markers_init();
 680    GC_COND_LOG_PRINTF("Started %d mark helper threads\n", GC_markers_m1);
 681  }
 682  
 683  #  endif /* GC_PTHREADS_PARAMARK */
 684  
 685  GC_INNER GC_thread GC_threads[THREAD_TABLE_SZ] = { 0 };
 686  
 687  /*
 688   * It may not be safe to allocate when we register the first thread.
 689   * Note that `next` and `status` fields are unused, but there might be
 690   * some other fields (`crtn`) to be pushed.
 691   */
 692  static struct GC_StackContext_Rep first_crtn;
 693  static struct GC_Thread_Rep first_thread;
 694  
 695  /*
 696   * A place to retain a pointer to an allocated object while a thread
 697   * registration is ongoing.  Protected by the allocator lock.
 698   */
 699  static GC_stack_context_t saved_crtn = NULL;
 700  
 701  #  ifdef GC_ASSERTIONS
 702  GC_INNER GC_bool GC_thr_initialized = FALSE;
 703  #  endif
 704  
 705  GC_INNER void
 706  GC_push_thread_structures(void)
 707  {
 708    GC_ASSERT(I_HOLD_LOCK());
 709  #  if !defined(GC_NO_THREADS_DISCOVERY) && defined(GC_WIN32_THREADS)
 710    if (GC_win32_dll_threads) {
 711      /*
 712       * Unlike the other threads implementations, the thread table here
 713       * contains no pointers to the collectible heap (note also that
 714       * `GC_PTHREADS` is incompatible with `DllMain`-based thread
 715       * registration).  Thus we have no private structures we need to
 716       * preserve.
 717       */
 718    } else
 719  #  endif
 720    /* else */ {
 721      GC_push_all(&GC_threads, (ptr_t)(&GC_threads) + sizeof(GC_threads));
 722      GC_ASSERT(NULL == first_thread.tm.next);
 723  #  ifdef GC_PTHREADS
 724      GC_ASSERT(NULL == first_thread.status);
 725  #  endif
 726      GC_PUSH_ALL_SYM(first_thread.crtn);
 727      GC_PUSH_ALL_SYM(saved_crtn);
 728    }
 729  #  if defined(THREAD_LOCAL_ALLOC) && defined(USE_CUSTOM_SPECIFIC)
 730    GC_PUSH_ALL_SYM(GC_thread_key);
 731  #  endif
 732  }
 733  
 734  #  if defined(MPROTECT_VDB) && defined(GC_WIN32_THREADS)
 735  GC_INNER void
 736  GC_win32_unprotect_thread(GC_thread t)
 737  {
 738    GC_ASSERT(I_HOLD_LOCK());
 739    if (!GC_win32_dll_threads && GC_auto_incremental) {
 740      GC_stack_context_t crtn = t->crtn;
 741  
 742      if (crtn != &first_crtn) {
 743        GC_ASSERT(SMALL_OBJ(GC_size(crtn)));
 744        GC_remove_protection(HBLKPTR(crtn), 1, FALSE);
 745      }
 746      if (t != &first_thread) {
 747        GC_ASSERT(SMALL_OBJ(GC_size(t)));
 748        GC_remove_protection(HBLKPTR(t), 1, FALSE);
 749      }
 750    }
 751  }
 752  #  endif /* MPROTECT_VDB && GC_WIN32_THREADS */
 753  
 754  #  ifdef DEBUG_THREADS
 755  STATIC int
 756  GC_count_threads(void)
 757  {
 758    int i;
 759    int count = 0;
 760  
 761  #    if !defined(GC_NO_THREADS_DISCOVERY) && defined(GC_WIN32_THREADS)
 762    if (GC_win32_dll_threads)
 763      return -1; /*< not implemented */
 764  #    endif
 765    GC_ASSERT(I_HOLD_READER_LOCK());
 766    for (i = 0; i < THREAD_TABLE_SZ; ++i) {
 767      GC_thread p;
 768  
 769      for (p = GC_threads[i]; p != NULL; p = p->tm.next) {
 770        if (!KNOWN_FINISHED(p))
 771          ++count;
 772      }
 773    }
 774    return count;
 775  }
 776  #  endif /* DEBUG_THREADS */
 777  
 778  GC_INNER_WIN32THREAD GC_thread
 779  GC_new_thread(thread_id_t self_id)
 780  {
 781    int hv = THREAD_TABLE_INDEX(self_id);
 782    GC_thread result;
 783  
 784    GC_ASSERT(I_HOLD_LOCK());
 785  #  ifdef DEBUG_THREADS
 786    GC_log_printf("Creating thread %p\n", THREAD_ID_TO_VPTR(self_id));
 787    for (result = GC_threads[hv]; result != NULL; result = result->tm.next)
 788      if (!THREAD_ID_EQUAL(result->id, self_id)) {
 789        GC_log_printf("Hash collision at GC_threads[%d]\n", hv);
 790        break;
 791      }
 792  #  endif
 793    if (UNLIKELY(NULL == first_thread.crtn)) {
 794      result = &first_thread;
 795      first_thread.crtn = &first_crtn;
 796      GC_ASSERT(NULL == GC_threads[hv]);
 797  #  if defined(CPPCHECK) && defined(THREAD_SANITIZER) \
 798        && defined(SIGNAL_BASED_STOP_WORLD)
 799      GC_noop1((unsigned char)first_crtn.dummy[0]);
 800  #  endif
 801    } else {
 802      GC_stack_context_t crtn;
 803  
 804      GC_ASSERT(!GC_win32_dll_threads);
 805      GC_ASSERT(!GC_in_thread_creation);
 806      GC_in_thread_creation = TRUE; /*< OK to collect from unknown thread */
 807      crtn = (GC_stack_context_t)GC_INTERNAL_MALLOC(
 808          sizeof(struct GC_StackContext_Rep), NORMAL);
 809  
 810      /*
 811       * The current stack is not scanned until the thread is registered,
 812       * thus `crtn` pointer is to be retained in the global data roots for
 813       * a while (and pushed explicitly if a collection occurs here).
 814       */
 815      GC_ASSERT(NULL == saved_crtn);
 816      saved_crtn = crtn;
 817      result
 818          = (GC_thread)GC_INTERNAL_MALLOC(sizeof(struct GC_Thread_Rep), NORMAL);
 819      /* No more collections till thread is registered. */
 820      saved_crtn = NULL;
 821      GC_in_thread_creation = FALSE;
 822      if (NULL == crtn || NULL == result)
 823        ABORT("Failed to allocate memory for thread registering");
 824      result->crtn = crtn;
 825    }
 826    /* The `id` field is not set here. */
 827  #  ifdef USE_TKILL_ON_ANDROID
 828    result->kernel_id = gettid();
 829  #  endif
 830    result->tm.next = GC_threads[hv];
 831    GC_threads[hv] = result;
 832  #  ifdef NACL
 833    GC_nacl_initialize_gc_thread(result);
 834  #  endif
 835    GC_ASSERT(0 == result->flags);
 836    if (LIKELY(result != &first_thread))
 837      GC_dirty(result);
 838    return result;
 839  }
 840  
 841  GC_INNER_WIN32THREAD void
 842  GC_delete_thread(GC_thread t)
 843  {
 844  #  if !defined(GC_NO_THREADS_DISCOVERY) && defined(GC_WIN32_THREADS)
 845    if (GC_win32_dll_threads) {
 846      HANDLE handle = t->handle;
 847  
 848      GC_cptr_store_release(&t->handle, NULL);
 849      CloseHandle(handle);
 850      /*
 851       * This is intended to be lock-free.  It is either called synchronously
 852       * from the thread being deleted, or by the joining thread.  In this
 853       * branch asynchronous changes to `*t` are possible.  Note that it is
 854       * not allowed to call `GC_printf` (and the friends) here, see
 855       * `GC_stop_world()` in `win32_threads.c` file for the information.
 856       */
 857      t->crtn->stack_end = NULL;
 858      t->id = 0;
 859      /* The thread is not suspended. */
 860      t->flags = 0;
 861  #    ifdef RETRY_GET_THREAD_CONTEXT
 862      t->context_sp = NULL;
 863  #    endif
 864      AO_store_release(&t->tm.in_use, FALSE);
 865    } else
 866  #  endif
 867    /* else */ {
 868      thread_id_t id = t->id;
 869      int hv = THREAD_TABLE_INDEX(id);
 870      GC_thread p;
 871      GC_thread prev = NULL;
 872  
 873      GC_ASSERT(I_HOLD_LOCK());
 874  #  if defined(DEBUG_THREADS) && !defined(MSWINCE) \
 875        && (!defined(MSWIN32) || defined(CONSOLE_LOG))
 876      GC_log_printf("Deleting thread %p, n_threads= %d\n", THREAD_ID_TO_VPTR(id),
 877                    GC_count_threads());
 878  #  endif
 879  #  if defined(GC_WIN32_THREADS) && !defined(MSWINCE)
 880      CloseHandle(t->handle);
 881  #  endif
 882      for (p = GC_threads[hv]; p != t; p = p->tm.next) {
 883        prev = p;
 884      }
 885      if (NULL == prev) {
 886        GC_threads[hv] = p->tm.next;
 887      } else {
 888        GC_ASSERT(prev != &first_thread);
 889        prev->tm.next = p->tm.next;
 890        GC_dirty(prev);
 891      }
 892      if (LIKELY(p != &first_thread)) {
 893  #  ifdef DARWIN
 894        mach_port_deallocate(mach_task_self(), p->mach_thread);
 895  #  endif
 896        GC_ASSERT(p->crtn != &first_crtn);
 897        GC_INTERNAL_FREE(p->crtn);
 898        GC_INTERNAL_FREE(p);
 899      }
 900    }
 901  }
 902  
 903  GC_INNER GC_thread
 904  GC_lookup_thread(thread_id_t id)
 905  {
 906    GC_thread p;
 907  
 908  #  if !defined(GC_NO_THREADS_DISCOVERY) && defined(GC_WIN32_THREADS)
 909    if (GC_win32_dll_threads)
 910      return GC_win32_dll_lookup_thread(id);
 911  #  endif
 912    for (p = GC_threads[THREAD_TABLE_INDEX(id)]; p != NULL; p = p->tm.next) {
 913      if (LIKELY(THREAD_ID_EQUAL(p->id, id)))
 914        break;
 915    }
 916    return p;
 917  }
 918  
 919  /*
 920   * Same as `GC_self_thread_inner()` but acquires the allocator lock (in
 921   * the reader mode).
 922   */
 923  STATIC GC_thread
 924  GC_self_thread(void)
 925  {
 926    GC_thread p;
 927  
 928    READER_LOCK();
 929    p = GC_self_thread_inner();
 930    READER_UNLOCK();
 931    return p;
 932  }
 933  
 934  #  ifndef GC_NO_FINALIZATION
 935  GC_INNER void
 936  GC_reset_finalizer_nested(void)
 937  {
 938    GC_ASSERT(I_HOLD_LOCK());
 939    GC_self_thread_inner()->crtn->finalizer_nested = 0;
 940  }
 941  
 942  GC_INNER unsigned char *
 943  GC_check_finalizer_nested(void)
 944  {
 945    GC_thread me;
 946    GC_stack_context_t crtn;
 947    unsigned nesting_level;
 948  
 949    GC_ASSERT(I_HOLD_LOCK());
 950    me = GC_self_thread_inner();
 951  #    if defined(INCLUDE_LINUX_THREAD_DESCR) && defined(REDIRECT_MALLOC)
 952    /*
 953     * As noted in `GC_pthread_start`, an allocation may happen in
 954     * `GC_get_stack_base`, causing `GC_notify_or_invoke_finalizers`
 955     * to be called before the thread gets registered.
 956     */
 957    if (UNLIKELY(NULL == me))
 958      return NULL;
 959  #    endif
 960    crtn = me->crtn;
 961    nesting_level = crtn->finalizer_nested;
 962    if (nesting_level) {
 963      /*
 964       * We are inside another `GC_invoke_finalizers()`.  Skip some
 965       * implicitly-called `GC_invoke_finalizers()` depending on the
 966       * nesting (recursion) level.
 967       */
 968      if ((unsigned)(++crtn->finalizer_skipped) < (1U << nesting_level))
 969        return NULL;
 970      crtn->finalizer_skipped = 0;
 971    }
 972    crtn->finalizer_nested = (unsigned char)(nesting_level + 1);
 973    return &crtn->finalizer_nested;
 974  }
 975  #  endif /* !GC_NO_FINALIZATION */
 976  
 977  #  define ADDR_INSIDE_OBJ(p, obj) \
 978      ADDR_INSIDE(p, (ptr_t)(&(obj)), (ptr_t)(&(obj)) + sizeof(obj))
 979  
 980  #  if defined(GC_ASSERTIONS) && defined(THREAD_LOCAL_ALLOC)
 981  /* This is called from thread-local `GC_malloc()`. */
 982  GC_bool
 983  GC_is_thread_tsd_valid(void *tsd)
 984  {
 985    GC_thread me = GC_self_thread();
 986  
 987    return ADDR_INSIDE_OBJ((ptr_t)tsd, me->tlfs);
 988  }
 989  #  endif /* GC_ASSERTIONS && THREAD_LOCAL_ALLOC */
 990  
 991  GC_API int GC_CALL
 992  GC_thread_is_registered(void)
 993  {
 994    /* TODO: Use `GC_get_tlfs()` instead. */
 995    GC_thread me = GC_self_thread();
 996  
 997    return me != NULL && !KNOWN_FINISHED(me);
 998  }
 999  
1000  GC_API void GC_CALL
1001  GC_register_altstack(void *normstack, size_t normstack_size, void *altstack,
1002                       size_t altstack_size)
1003  {
1004  #  ifdef GC_WIN32_THREADS
1005    /* TODO: Implement. */
1006    UNUSED_ARG(normstack);
1007    UNUSED_ARG(normstack_size);
1008    UNUSED_ARG(altstack);
1009    UNUSED_ARG(altstack_size);
1010  #  else
1011    GC_thread me;
1012    GC_stack_context_t crtn;
1013  
1014    READER_LOCK();
1015    me = GC_self_thread_inner();
1016    if (UNLIKELY(NULL == me)) {
1017      /* We are called before `GC_thr_init()`. */
1018      me = &first_thread;
1019    }
1020    crtn = me->crtn;
1021    crtn->normstack = (ptr_t)normstack;
1022    crtn->normstack_size = normstack_size;
1023    crtn->altstack = (ptr_t)altstack;
1024    crtn->altstack_size = altstack_size;
1025    READER_UNLOCK_RELEASE();
1026  #  endif
1027  }
1028  
1029  #  ifdef USE_PROC_FOR_LIBRARIES
1030  GC_INNER GC_bool
1031  GC_segment_is_thread_stack(ptr_t lo, ptr_t hi)
1032  {
1033    int i;
1034    GC_thread p;
1035  
1036    GC_ASSERT(I_HOLD_READER_LOCK());
1037  #    ifdef PARALLEL_MARK
1038    for (i = 0; i < GC_markers_m1; ++i) {
1039      if (ADDR_LT(lo, GC_marker_sp[i]) && ADDR_LT(GC_marker_sp[i], hi))
1040        return TRUE;
1041  #      ifdef IA64
1042      if (ADDR_LT(lo, marker_bsp[i]) && ADDR_LT(marker_bsp[i], hi))
1043        return TRUE;
1044  #      endif
1045    }
1046  #    endif
1047    for (i = 0; i < THREAD_TABLE_SZ; i++) {
1048      for (p = GC_threads[i]; p != NULL; p = p->tm.next) {
1049        ptr_t stack_end = p->crtn->stack_end;
1050  
1051        if (stack_end != NULL) {
1052  #    ifdef STACK_GROWS_UP
1053          if (ADDR_INSIDE(stack_end, lo, hi))
1054            return TRUE;
1055  #    else
1056          if (ADDR_LT(lo, stack_end) && ADDR_GE(hi, stack_end))
1057            return TRUE;
1058  #    endif
1059        }
1060      }
1061    }
1062    return FALSE;
1063  }
1064  #  endif /* USE_PROC_FOR_LIBRARIES */
1065  
1066  #  if (defined(HAVE_PTHREAD_ATTR_GET_NP) || defined(HAVE_PTHREAD_GETATTR_NP)) \
1067        && defined(IA64)
1068  GC_INNER ptr_t
1069  GC_greatest_stack_base_below(ptr_t bound)
1070  {
1071    int i;
1072    GC_thread p;
1073    ptr_t result = NULL;
1074  
1075    GC_ASSERT(I_HOLD_READER_LOCK());
1076  #    ifdef PARALLEL_MARK
1077    for (i = 0; i < GC_markers_m1; ++i) {
1078      if (ADDR_LT(result, GC_marker_sp[i]) && ADDR_LT(GC_marker_sp[i], bound))
1079        result = GC_marker_sp[i];
1080    }
1081  #    endif
1082    for (i = 0; i < THREAD_TABLE_SZ; i++) {
1083      for (p = GC_threads[i]; p != NULL; p = p->tm.next) {
1084        ptr_t stack_end = p->crtn->stack_end;
1085  
1086        if (ADDR_LT(result, stack_end) && ADDR_LT(stack_end, bound))
1087          result = stack_end;
1088      }
1089    }
1090    return result;
1091  }
1092  #  endif /* IA64 */
1093  
1094  #  ifndef STAT_READ
1095  /*
1096   * Note: if `read()` is wrapped, this may need to be redefined to call
1097   * the real one.
1098   */
1099  #    define STAT_READ read
1100  #  endif
1101  
1102  #  ifdef HPUX
1103  #    define GC_get_nprocs() pthread_num_processors_np()
1104  
1105  #  elif defined(AIX) || defined(COSMO) || defined(HAIKU)         \
1106        || defined(HOST_ANDROID) || defined(HURD) || defined(NACL) \
1107        || defined(OSF1) || defined(SOLARIS)
1108  GC_INLINE int
1109  GC_get_nprocs(void)
1110  {
1111    int nprocs = (int)sysconf(_SC_NPROCESSORS_ONLN);
1112    /* Note: ignore any error silently. */
1113    return nprocs > 0 ? nprocs : 1;
1114  }
1115  
1116  #  elif defined(IRIX5)
1117  GC_INLINE int
1118  GC_get_nprocs(void)
1119  {
1120    int nprocs = (int)sysconf(_SC_NPROC_ONLN);
1121    /* Note: ignore any error silently. */
1122    return nprocs > 0 ? nprocs : 1;
1123  }
1124  
1125  #  elif defined(LINUX)
1126  /* Return the number of processors. */
1127  STATIC int
1128  GC_get_nprocs(void)
1129  {
1130    /*
1131     * Should be just `return sysconf(_SC_NPROCESSORS_ONLN)` but that
1132     * appears to be buggy in many cases.  We look for lines "cpu<N>" in
1133     * `/proc/stat` pseudo-file.  No need to read the entire `/proc/stat`
1134     * pseudo-file to get maximum "cpu<N>" such as:
1135     *   - the requested lines are located at the beginning of the file;
1136     *   - the lines with "cpu<N>" where `N` is greater than `MAX_MARKERS`
1137     *     are not needed.
1138     */
1139  #    define PROC_STAT_BUF_SZ ((1 + MAX_MARKERS) * 100)
1140    char stat_buf[PROC_STAT_BUF_SZ + 1]; /*< the size should be enough */
1141    int f;
1142    int result, i, len;
1143  
1144    f = open("/proc/stat", O_RDONLY);
1145    if (f < 0) {
1146      WARN("Could not open /proc/stat\n", 0);
1147      /* Assume an uniprocessor. */
1148      return 1;
1149    }
1150    len = STAT_READ(f, stat_buf, sizeof(stat_buf) - 1);
1151    /* Unlikely that we need to retry because of an incomplete read here. */
1152    if (len < 0) {
1153      WARN("Failed to read /proc/stat, errno= %" WARN_PRIdPTR "\n",
1154           (GC_signed_word)errno);
1155      close(f);
1156      return 1;
1157    }
1158    /* Avoid potential buffer overrun by `atoi()`. */
1159    stat_buf[len] = '\0';
1160  
1161    close(f);
1162  
1163    /*
1164     * Some old kernels only have a single "cpu nnnn ..." entry in
1165     * `/proc/stat` pseudo-file.  We identify those as uniprocessors.
1166     */
1167    result = 1;
1168  
1169    for (i = 0; i < len - 4; ++i) {
1170      if (stat_buf[i] == '\n' && stat_buf[i + 1] == 'c' && stat_buf[i + 2] == 'p'
1171          && stat_buf[i + 3] == 'u') {
1172        int cpu_no = atoi(&stat_buf[i + 4]);
1173        if (cpu_no >= result)
1174          result = cpu_no + 1;
1175      }
1176    }
1177    return result;
1178  }
1179  
1180  #  elif defined(DGUX)
1181  /*
1182   * Return the number of processors, or a non-positive value if the number
1183   * cannot be determined.
1184   */
1185  STATIC int
1186  GC_get_nprocs(void)
1187  {
1188    int numCpus;
1189    struct dg_sys_info_pm_info pm_sysinfo;
1190    int status = 0;
1191  
1192    status = dg_sys_info((long int *)&pm_sysinfo, DG_SYS_INFO_PM_INFO_TYPE,
1193                         DG_SYS_INFO_PM_CURRENT_VERSION);
1194    if (status < 0) {
1195      /* Set -1 for an error. */
1196      numCpus = -1;
1197    } else {
1198      /* Active CPUs. */
1199      numCpus = pm_sysinfo.idle_vp_count;
1200    }
1201    return numCpus;
1202  }
1203  
1204  #  elif defined(ANY_BSD) || defined(DARWIN)
1205  STATIC int
1206  GC_get_nprocs(void)
1207  {
1208    int mib[] = { CTL_HW, HW_NCPU };
1209    int res;
1210    size_t len = sizeof(res);
1211  
1212    sysctl(mib, sizeof(mib) / sizeof(int), &res, &len, NULL, 0);
1213    return res;
1214  }
1215  
1216  #  else
1217  /* E.g., RTEMS. */
1218  /* TODO: Implement. */
1219  #    define GC_get_nprocs() 1
1220  #  endif
1221  
1222  #  if defined(LINUX) && defined(ARM32)
1223  /*
1224   * Some buggy Linux/arm kernels show only non-sleeping CPUs in
1225   * `/proc/stat` pseudo-file (and in `/proc/cpuinfo` pseudo-file), so
1226   * another data system source is tried first.  Returns a non-positive
1227   * value on error.
1228   */
1229  STATIC int
1230  GC_get_nprocs_present(void)
1231  {
1232    char stat_buf[16];
1233    int f;
1234    int len;
1235  
1236    f = open("/sys/devices/system/cpu/present", O_RDONLY);
1237    if (f < 0) {
1238      /* Cannot open the file. */
1239      return -1;
1240    }
1241  
1242    len = STAT_READ(f, stat_buf, sizeof(stat_buf));
1243    close(f);
1244  
1245    /*
1246     * Recognized file format: "0\n" or "0-<max_cpu_num>\n".
1247     * The file might probably contain a comma-separated list
1248     * but we do not need to handle it (just silently ignore).
1249     */
1250    if (len < 2 || stat_buf[0] != '0' || stat_buf[len - 1] != '\n') {
1251      /* A read error or an unrecognized content. */
1252      return 0;
1253    } else if (len == 2) {
1254      /* An uniprocessor. */
1255      return 1;
1256    } else if (stat_buf[1] != '-') {
1257      /* An unrecognized content. */
1258      return 0;
1259    }
1260  
1261    /* Terminate the string. */
1262    stat_buf[len - 1] = '\0';
1263  
1264    /* Skip "0-" and parse `max_cpu_num`. */
1265    return atoi(&stat_buf[2]) + 1;
1266  }
1267  #  endif /* LINUX && ARM32 */
1268  
1269  #  if defined(CAN_HANDLE_FORK) && defined(THREAD_SANITIZER)
1270  #    include "private/gc_pmark.h" /*< for `MS_NONE` */
1271  
1272  /*
1273   * Workaround for TSan which does not notice that the allocator lock
1274   * is acquired in `fork_prepare_proc()`.
1275   */
1276  GC_ATTR_NO_SANITIZE_THREAD
1277  static GC_bool
1278  collection_in_progress(void)
1279  {
1280    return GC_mark_state != MS_NONE;
1281  }
1282  #  else
1283  #    define collection_in_progress() GC_collection_in_progress()
1284  #  endif
1285  
1286  GC_INNER void
1287  GC_wait_for_gc_completion(GC_bool wait_for_all)
1288  {
1289  #  if !defined(THREAD_SANITIZER) || !defined(CAN_CALL_ATFORK)
1290    /*
1291     * `GC_lock_holder` is accessed with the allocator lock held, so
1292     * there is no data race actually (unlike what is reported by TSan).
1293     */
1294    GC_ASSERT(I_HOLD_LOCK());
1295  #  endif
1296    ASSERT_CANCEL_DISABLED();
1297  #  ifdef GC_DISABLE_INCREMENTAL
1298    (void)wait_for_all;
1299  #  else
1300    if (GC_incremental && collection_in_progress()) {
1301      word old_gc_no = GC_gc_no;
1302  
1303      /*
1304       * Make sure that no part of our stack is still on the mark stack,
1305       * since it is about to be unmapped.
1306       */
1307  #    ifdef LINT2
1308      /*
1309       * Note: do not transform this `if`-`do`-`while` construction into
1310       * a single while statement because it might cause some static code
1311       * analyzers to report a false positive (FP) code defect about
1312       * missing unlock after lock.
1313       */
1314  #    endif
1315      do {
1316        GC_ASSERT(!GC_in_thread_creation);
1317        GC_in_thread_creation = TRUE;
1318        GC_collect_a_little_inner(1);
1319        GC_in_thread_creation = FALSE;
1320  
1321        UNLOCK();
1322  #    ifdef GC_WIN32_THREADS
1323        Sleep(0);
1324  #    else
1325        sched_yield();
1326  #    endif
1327        LOCK();
1328      } while (GC_incremental && collection_in_progress()
1329               && (wait_for_all || old_gc_no == GC_gc_no));
1330    }
1331  #  endif
1332  }
1333  
1334  #  if defined(GC_ASSERTIONS) && defined(GC_PTHREADS_PARAMARK)
1335  STATIC unsigned long GC_mark_lock_holder = NO_THREAD;
1336  #  endif
1337  
1338  #  ifdef CAN_HANDLE_FORK
1339  
1340  /*
1341   * Procedures called before and after a process fork.  The goal here is
1342   * to make it safe to call `GC_malloc()` in the forked child process.
1343   * It is unclear that is attainable, since the Single UNIX Specification
1344   * seems to imply that one should only call async-signal-safe functions,
1345   * and we probably cannot quite guarantee that.  But we give it our best
1346   * shot.  (That same specification also implies that it is not safe to
1347   * call the system `malloc` between `fork` and `exec`.  Thus we are doing
1348   * no worse than it.)
1349   */
1350  
1351  IF_CANCEL(static int fork_cancel_state;) /*< protected by the allocator lock */
1352  
1353  #    ifdef PARALLEL_MARK
1354  #      ifdef THREAD_SANITIZER
1355  #        if defined(GC_ASSERTIONS) && defined(CAN_CALL_ATFORK)
1356  STATIC void GC_generic_lock(pthread_mutex_t *);
1357  #        endif
1358  GC_ATTR_NO_SANITIZE_THREAD
1359  static void wait_for_reclaim_atfork(void);
1360  #      else
1361  #        define wait_for_reclaim_atfork() GC_wait_for_reclaim()
1362  #      endif
1363  #    endif /* PARALLEL_MARK */
1364  
1365  /*
1366   * Prevent TSan false positive (FP) about the race during items removal
1367   * from `GC_threads`.  (The race cannot happen since only one thread
1368   * survives in the child process.)
1369   */
1370  #    ifdef CAN_CALL_ATFORK
1371  GC_ATTR_NO_SANITIZE_THREAD
1372  #    endif
1373  static void
1374  store_to_threads_table(int hv, GC_thread me)
1375  {
1376    GC_threads[hv] = me;
1377  }
1378  
1379  /*
1380   * Remove all entries from the `GC_threads` table, except the one for
1381   * the current thread.  Also update thread identifiers stored in the
1382   * table for the current thread.  We need to do this in the child process
1383   * after a `fork()`, since only the current thread survives in the child
1384   * process.
1385   */
1386  STATIC void
1387  GC_remove_all_threads_but_me(void)
1388  {
1389    int hv;
1390    GC_thread me = NULL;
1391  #    ifndef GC_WIN32_THREADS
1392  #      define pthread_id id
1393  #    endif
1394  
1395    for (hv = 0; hv < THREAD_TABLE_SZ; ++hv) {
1396      GC_thread p, next;
1397  
1398      for (p = GC_threads[hv]; p != NULL; p = next) {
1399        next = p->tm.next;
1400        if (THREAD_EQUAL(p->pthread_id, GC_parent_pthread_self) && me == NULL) {
1401          /* Ignore dead threads with the same id. */
1402          me = p;
1403          p->tm.next = NULL;
1404        } else {
1405  #    ifdef THREAD_LOCAL_ALLOC
1406          if (!KNOWN_FINISHED(p)) {
1407            /*
1408             * Cannot call `GC_destroy_thread_local` here.  The free
1409             * lists may be in an inconsistent state (as thread `p` may
1410             * be updating one of the lists by `GC_generic_malloc_many()`
1411             * or `GC_FAST_MALLOC_GRANS()` when `fork()` is invoked).
1412             * This should not be a problem because the lost elements
1413             * of the free lists will be collected during GC.
1414             */
1415            GC_remove_specific_after_fork(GC_thread_key, p->pthread_id);
1416          }
1417  #    endif
1418          /*
1419           * TODO: To avoid TSan hang (when updating `GC_bytes_freed`),
1420           * we just skip explicit freeing of `GC_threads` entries.
1421           */
1422  #    if !defined(THREAD_SANITIZER) || !defined(CAN_CALL_ATFORK)
1423          if (p != &first_thread) {
1424            /* TODO: Should call `mach_port_deallocate`? */
1425            GC_ASSERT(p->crtn != &first_crtn);
1426            GC_INTERNAL_FREE(p->crtn);
1427            GC_INTERNAL_FREE(p);
1428          }
1429  #    endif
1430        }
1431      }
1432      store_to_threads_table(hv, NULL);
1433    }
1434  
1435  #    if defined(CPPCHECK) || defined(LINT2)
1436    if (NULL == me)
1437      ABORT("Current thread is not found after fork");
1438  #    else
1439    GC_ASSERT(me != NULL);
1440  #    endif
1441    /*
1442     * Update `pthreads` id as it is not guaranteed to be the same between
1443     * this (child) process and the parent one.
1444     */
1445    me->pthread_id = pthread_self();
1446  #    ifdef GC_WIN32_THREADS
1447    /*
1448     * Update Win32 thread id and handle.  They differ from that in the
1449     * parent process.
1450     */
1451    me->id = thread_id_self();
1452  #      ifndef MSWINCE
1453    if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
1454                         GetCurrentProcess(), (HANDLE *)&me->handle,
1455                         0 /* `dwDesiredAccess` */, FALSE /* `bInheritHandle` */,
1456                         DUPLICATE_SAME_ACCESS))
1457      ABORT("DuplicateHandle failed");
1458  #      endif
1459  #    endif
1460  #    ifdef DARWIN
1461    /*
1462     * Update thread id after process fork (it is OK to call
1463     * `GC_destroy_thread_local()` and `GC_free_inner()` before update).
1464     */
1465    me->mach_thread = mach_thread_self();
1466  #    endif
1467  #    ifdef USE_TKILL_ON_ANDROID
1468    me->kernel_id = gettid();
1469  #    endif
1470  
1471    /* Put `me` back to `GC_threads`. */
1472    store_to_threads_table(THREAD_TABLE_INDEX(me->id), me);
1473  
1474  #    ifdef THREAD_LOCAL_ALLOC
1475  #      ifdef USE_CUSTOM_SPECIFIC
1476    GC_update_specific_after_fork(GC_thread_key);
1477  #      else
1478    /*
1479     * Some TLS implementations (e.g., in Cygwin) might be not `fork`-friendly,
1480     * so we re-assign thread-local pointer to `tlfs` for safety instead of the
1481     * assertion check (again, it is OK to call `GC_destroy_thread_local()` and
1482     * `GC_free_inner()` before).
1483     */
1484    {
1485      int res = GC_setspecific(GC_thread_key, &me->tlfs);
1486  
1487      if (COVERT_DATAFLOW(res) != 0)
1488        ABORT("GC_setspecific failed (in child)");
1489    }
1490  #      endif
1491  #    endif
1492  #    undef pthread_id
1493  }
1494  
1495  /* Called before a `fork()`. */
1496  #    if defined(GC_ASSERTIONS) && defined(CAN_CALL_ATFORK)
1497  /* `GC_lock_holder` is updated safely (no data race actually). */
1498  GC_ATTR_NO_SANITIZE_THREAD
1499  #    endif
1500  static void
1501  fork_prepare_proc(void)
1502  {
1503  #    if defined(GC_EXPLICIT_SIGNALS_UNBLOCK) && defined(CAN_CALL_ATFORK)
1504    /*
1505     * The signals might be blocked by `fork()` implementation when the
1506     * at-fork prepare handler is invoked.
1507     */
1508    if (GC_handle_fork == 1)
1509      GC_unblock_gc_signals();
1510  #    endif
1511  
1512    /*
1513     * Acquire all relevant locks, so that after releasing the locks the child
1514     * process will see a consistent state in which monitor invariants hold.
1515     * Unfortunately, we cannot acquire `libc` locks we might need, and there
1516     * seems to be no guarantee that `libc` must install a suitable `fork`
1517     * handler.  Wait for an ongoing collection to finish, since we cannot
1518     * finish it in the (one remaining thread in) the child process.
1519     */
1520  
1521    LOCK();
1522    DISABLE_CANCEL(fork_cancel_state);
1523    GC_parent_pthread_self = pthread_self();
1524    /* The following waits may include cancellation points. */
1525  #    ifdef PARALLEL_MARK
1526    if (GC_parallel)
1527      wait_for_reclaim_atfork();
1528  #    endif
1529    GC_wait_for_gc_completion(TRUE);
1530  #    ifdef PARALLEL_MARK
1531    if (GC_parallel) {
1532  #      if defined(THREAD_SANITIZER) && defined(GC_ASSERTIONS) \
1533            && defined(CAN_CALL_ATFORK)
1534      /*
1535       * Prevent TSan false positive (FP) about the data race when updating
1536       * `GC_mark_lock_holder`.
1537       */
1538      GC_generic_lock(&mark_mutex);
1539  #      else
1540      GC_acquire_mark_lock();
1541  #      endif
1542    }
1543  #    endif
1544    GC_acquire_dirty_lock();
1545  }
1546  
1547  /*
1548   * Called in the parent process after a `fork()` (even if the latter
1549   * has failed).
1550   */
1551  #    if defined(GC_ASSERTIONS) && defined(CAN_CALL_ATFORK)
1552  GC_ATTR_NO_SANITIZE_THREAD
1553  #    endif
1554  static void
1555  fork_parent_proc(void)
1556  {
1557    GC_release_dirty_lock();
1558  #    ifdef PARALLEL_MARK
1559    if (GC_parallel) {
1560  #      if defined(THREAD_SANITIZER) && defined(GC_ASSERTIONS) \
1561            && defined(CAN_CALL_ATFORK)
1562      /* To match that in `fork_prepare_proc`. */
1563      (void)pthread_mutex_unlock(&mark_mutex);
1564  #      else
1565      GC_release_mark_lock();
1566  #      endif
1567    }
1568  #    endif
1569    RESTORE_CANCEL(fork_cancel_state);
1570  #    ifdef GC_ASSERTIONS
1571    BZERO(&GC_parent_pthread_self, sizeof(pthread_t));
1572  #    endif
1573    UNLOCK();
1574  }
1575  
1576  /* Called in the child process after a `fork()`. */
1577  #    if defined(GC_ASSERTIONS) && defined(CAN_CALL_ATFORK)
1578  GC_ATTR_NO_SANITIZE_THREAD
1579  #    endif
1580  static void
1581  fork_child_proc(void)
1582  {
1583  #    ifdef GC_ASSERTIONS
1584    /*
1585     * Update `GC_lock_holder` as value of `thread_id_self()` might differ
1586     * from that of the parent process.
1587     */
1588    SET_LOCK_HOLDER();
1589  #    endif
1590    GC_release_dirty_lock();
1591  #    ifndef GC_DISABLE_INCREMENTAL
1592    GC_dirty_update_child();
1593  #    endif
1594  #    ifdef PARALLEL_MARK
1595    if (GC_parallel) {
1596  #      ifdef GC_WIN32_THREADS
1597      GC_release_mark_lock();
1598  #      else
1599  #        if !defined(GC_ASSERTIONS) \
1600              || (defined(THREAD_SANITIZER) && defined(CAN_CALL_ATFORK))
1601      /* Do not change `GC_mark_lock_holder`. */
1602  #        else
1603      GC_mark_lock_holder = NO_THREAD;
1604  #        endif
1605      /*
1606       * The unlock operation may fail on some targets, just ignore
1607       * the error silently.
1608       */
1609      (void)pthread_mutex_unlock(&mark_mutex);
1610      /*
1611       * Reinitialize the mark lock.  The reason is the same as for
1612       * `GC_allocate_ml` below.
1613       */
1614      (void)pthread_mutex_destroy(&mark_mutex);
1615      /* TODO: `GLIBC_2_19_TSX_BUG` has no effect. */
1616      if (pthread_mutex_init(&mark_mutex, NULL) != 0)
1617        ABORT("mark_mutex re-init failed in child");
1618  #      endif
1619      /*
1620       * Turn off parallel marking in the child process, since we are probably
1621       * just going to exec, and we would have to restart mark threads.
1622       */
1623      GC_parallel = FALSE;
1624    }
1625  #      ifdef THREAD_SANITIZER
1626    /* TSan does not support threads creation in the child process. */
1627    GC_available_markers_m1 = 0;
1628  #      endif
1629  #    endif
1630    /* Clean up the thread table, so that just our thread is left. */
1631    GC_remove_all_threads_but_me();
1632    GC_stackbase_info_update_after_fork();
1633    RESTORE_CANCEL(fork_cancel_state);
1634  #    ifdef GC_ASSERTIONS
1635    BZERO(&GC_parent_pthread_self, sizeof(pthread_t));
1636  #    endif
1637    UNLOCK();
1638    /*
1639     * Even though after a `fork()` the child process only inherits the
1640     * single thread that called the `fork()`, if another thread in the
1641     * parent process was attempting to lock the mutex while being held
1642     * in `fork_child_prepare()`, the mutex will be left in
1643     * an inconsistent state in the child process after the `UNLOCK()`.
1644     * This is the case, at least, in Mac OS X and leads to an unusable
1645     * collector in the child process which will block when attempting
1646     * to perform any GC operation that acquires the allocator lock.
1647     */
1648  #    if defined(USE_PTHREAD_LOCKS) && !defined(GC_WIN32_THREADS)
1649    GC_ASSERT(I_DONT_HOLD_LOCK());
1650    /*
1651     * Reinitialize the mutex.  It should be safe since we are running
1652     * this in the child process which only inherits a single thread.
1653     * `pthread_mutex_destroy()` and `pthread_rwlock_destroy()` may
1654     * return `EBUSY`, which makes no sense, but that is the reason for
1655     * the need of the reinitialization.
1656     * Note: excluded for Cygwin as does not seem to be needed.
1657     */
1658  #      ifdef USE_RWLOCK
1659    (void)pthread_rwlock_destroy(&GC_allocate_ml);
1660  #        ifdef DARWIN
1661    /* A workaround for `pthread_rwlock_init()` fail with `EBUSY`. */
1662    {
1663      pthread_rwlock_t rwlock_local = PTHREAD_RWLOCK_INITIALIZER;
1664      BCOPY(&rwlock_local, &GC_allocate_ml, sizeof(GC_allocate_ml));
1665    }
1666  #        else
1667    if (pthread_rwlock_init(&GC_allocate_ml, NULL) != 0)
1668      ABORT("pthread_rwlock_init failed (in child)");
1669  #        endif
1670  #      else
1671    (void)pthread_mutex_destroy(&GC_allocate_ml);
1672    /*
1673     * TODO: Probably some targets (e.g. with `GLIBC_2_19_TSX_BUG`) might
1674     * need the default mutex attribute to be passed instead of `NULL`.
1675     */
1676    if (pthread_mutex_init(&GC_allocate_ml, NULL) != 0)
1677      ABORT("pthread_mutex_init failed (in child)");
1678  #      endif
1679  #    endif
1680  }
1681  
1682  /*
1683   * Routines for `fork()` handling by client (no-op if `pthread_atfork`
1684   * works).
1685   */
1686  
1687  GC_API void GC_CALL
1688  GC_atfork_prepare(void)
1689  {
1690    if (UNLIKELY(!GC_is_initialized))
1691      GC_init();
1692    if (GC_handle_fork <= 0)
1693      fork_prepare_proc();
1694  }
1695  
1696  GC_API void GC_CALL
1697  GC_atfork_parent(void)
1698  {
1699    if (GC_handle_fork <= 0)
1700      fork_parent_proc();
1701  }
1702  
1703  GC_API void GC_CALL
1704  GC_atfork_child(void)
1705  {
1706    if (GC_handle_fork <= 0)
1707      fork_child_proc();
1708  }
1709  
1710  GC_INNER_WIN32THREAD void
1711  GC_setup_atfork(void)
1712  {
1713    if (GC_handle_fork) {
1714  #    ifdef CAN_CALL_ATFORK
1715      if (pthread_atfork(fork_prepare_proc, fork_parent_proc, fork_child_proc)
1716          == 0) {
1717        /* Handlers successfully registered. */
1718        GC_handle_fork = 1;
1719      } else
1720  #    endif
1721      /* else */ {
1722        if (GC_handle_fork != -1)
1723          ABORT("pthread_atfork failed");
1724      }
1725    }
1726  }
1727  
1728  #  endif /* CAN_HANDLE_FORK */
1729  
1730  #  ifdef INCLUDE_LINUX_THREAD_DESCR
1731  __thread int GC_dummy_thread_local;
1732  #  endif
1733  
1734  #  ifdef PARALLEL_MARK
1735  #    ifndef GC_WIN32_THREADS
1736  static void setup_mark_lock(void);
1737  #    endif
1738  
1739  GC_INNER_WIN32THREAD unsigned GC_required_markers_cnt = 0;
1740  
1741  GC_API void GC_CALL
1742  GC_set_markers_count(unsigned markers)
1743  {
1744    GC_required_markers_cnt = markers < MAX_MARKERS ? markers : MAX_MARKERS;
1745  }
1746  #  endif /* PARALLEL_MARK */
1747  
1748  GC_INNER GC_bool GC_in_thread_creation = FALSE;
1749  
1750  GC_INNER_WIN32THREAD void
1751  GC_record_stack_base(GC_stack_context_t crtn, const struct GC_stack_base *sb)
1752  {
1753  #  if !defined(DARWIN) && !defined(GC_WIN32_THREADS)
1754    crtn->stack_ptr = (ptr_t)sb->mem_base;
1755  #  endif
1756    if ((crtn->stack_end = (ptr_t)sb->mem_base) == NULL)
1757      ABORT("Bad stack base in GC_register_my_thread");
1758  #  ifdef E2K
1759    crtn->ps_ofs = (size_t)(GC_uintptr_t)sb->reg_base;
1760  #  elif defined(IA64)
1761    crtn->backing_store_end = (ptr_t)sb->reg_base;
1762  #  elif defined(I386) && defined(GC_WIN32_THREADS)
1763    crtn->initial_stack_base = (ptr_t)sb->mem_base;
1764  #  endif
1765  }
1766  
1767  #  if !defined(GC_NO_THREADS_DISCOVERY) && defined(GC_WIN32_THREADS) \
1768        || !defined(DONT_USE_ATEXIT)
1769  GC_INNER_WIN32THREAD thread_id_t GC_main_thread_id;
1770  #  endif
1771  
1772  #  ifndef DONT_USE_ATEXIT
1773  GC_INNER GC_bool
1774  GC_is_main_thread(void)
1775  {
1776    GC_ASSERT(GC_thr_initialized);
1777    return THREAD_ID_EQUAL(GC_main_thread_id, thread_id_self());
1778  }
1779  #  endif /* !DONT_USE_ATEXIT */
1780  
1781  #  ifndef GC_WIN32_THREADS
1782  
1783  STATIC GC_thread
1784  GC_register_my_thread_inner(const struct GC_stack_base *sb,
1785                              thread_id_t self_id)
1786  {
1787    GC_thread me;
1788  
1789    GC_ASSERT(I_HOLD_LOCK());
1790    me = GC_new_thread(self_id);
1791    me->id = self_id;
1792  #    ifdef DARWIN
1793    me->mach_thread = mach_thread_self();
1794  #    endif
1795    GC_record_stack_base(me->crtn, sb);
1796    return me;
1797  }
1798  
1799  /*
1800   * Number of processors.  We may not have access to all of them, but
1801   * this is as good a guess as any...
1802   */
1803  STATIC int GC_nprocs = 1;
1804  
1805  GC_INNER void
1806  GC_thr_init(void)
1807  {
1808    GC_ASSERT(I_HOLD_LOCK());
1809    GC_ASSERT(!GC_thr_initialized);
1810    GC_ASSERT(ADDR(&GC_threads) % ALIGNMENT == 0);
1811  #    ifdef GC_ASSERTIONS
1812    GC_thr_initialized = TRUE;
1813  #    endif
1814  #    ifdef CAN_HANDLE_FORK
1815    GC_setup_atfork();
1816  #    endif
1817  
1818  #    ifdef INCLUDE_LINUX_THREAD_DESCR
1819    /*
1820     * Explicitly register the region including the address of a thread-local
1821     * variable.  This should include thread locals for the main thread,
1822     * except for those allocated in response to `dlopen()` calls.
1823     */
1824    {
1825      ptr_t thread_local_addr = (ptr_t)(&GC_dummy_thread_local);
1826      ptr_t main_thread_start, main_thread_end;
1827      if (!GC_enclosing_writable_mapping(thread_local_addr, &main_thread_start,
1828                                         &main_thread_end)) {
1829        ABORT("Failed to find TLS mapping for the primordial thread");
1830      } else {
1831        /* `main_thread_start` and `main_thread_end` are initialized. */
1832        GC_add_roots_inner(main_thread_start, main_thread_end, FALSE);
1833      }
1834    }
1835  #    endif
1836  
1837    /* Set `GC_nprocs` and `GC_available_markers_m1` variables. */
1838    {
1839      const char *nprocs_string = GETENV("GC_NPROCS");
1840      GC_nprocs = -1;
1841      if (nprocs_string != NULL)
1842        GC_nprocs = atoi(nprocs_string);
1843    }
1844    if (GC_nprocs <= 0
1845  #    if defined(LINUX) && defined(ARM32)
1846        /* Workaround for some Linux/arm kernels. */
1847        && (GC_nprocs = GC_get_nprocs_present()) <= 1
1848  #    endif
1849    ) {
1850      GC_nprocs = GC_get_nprocs();
1851    }
1852    if (GC_nprocs <= 0) {
1853      WARN("GC_get_nprocs() returned %" WARN_PRIdPTR "\n",
1854           (GC_signed_word)GC_nprocs);
1855      /* Assume a dual-core CPU. */
1856      GC_nprocs = 2;
1857  #    ifdef PARALLEL_MARK
1858      /* But use only one marker. */
1859      GC_available_markers_m1 = 0;
1860  #    endif
1861    } else {
1862  #    ifdef PARALLEL_MARK
1863      {
1864        const char *markers_string = GETENV("GC_MARKERS");
1865        int markers = GC_required_markers_cnt;
1866  
1867        if (markers_string != NULL) {
1868          markers = atoi(markers_string);
1869          if (markers <= 0 || markers > MAX_MARKERS) {
1870            WARN("Too big or invalid number of mark threads: %" WARN_PRIdPTR
1871                 "; using maximum threads\n",
1872                 (GC_signed_word)markers);
1873            markers = MAX_MARKERS;
1874          }
1875        } else if (0 == markers) {
1876          /*
1877           * Unless the client sets the desired number of parallel markers,
1878           * it is determined based on the number of CPU cores.
1879           */
1880          markers = GC_nprocs;
1881  #      if defined(GC_MIN_MARKERS) && !defined(CPPCHECK)
1882          /* This is primarily for targets without `getenv()`. */
1883          if (markers < GC_MIN_MARKERS)
1884            markers = GC_MIN_MARKERS;
1885  #      endif
1886          if (markers > MAX_MARKERS) {
1887            /* Silently limit the amount of markers. */
1888            markers = MAX_MARKERS;
1889          }
1890        }
1891        GC_available_markers_m1 = markers - 1;
1892      }
1893  #    endif
1894    }
1895    GC_COND_LOG_PRINTF("Number of processors: %d\n", GC_nprocs);
1896  
1897  #    if defined(BASE_ATOMIC_OPS_EMULATED) && defined(SIGNAL_BASED_STOP_WORLD)
1898    /*
1899     * Ensure the process is running on just one CPU core.  This is needed
1900     * because the AO primitives emulated with locks cannot be used inside
1901     * signal handlers.
1902     */
1903    {
1904      cpu_set_t mask;
1905      int cpu_set_cnt = 0;
1906      int cpu_lowest_set = 0;
1907  #      ifdef RANDOM_ONE_CPU_CORE
1908      int cpu_highest_set = 0;
1909  #      endif
1910      /* Ensure at least 2 cores. */
1911      int i = GC_nprocs > 1 ? GC_nprocs : 2;
1912  
1913      if (sched_getaffinity(0 /* current process */, sizeof(mask), &mask) == -1)
1914        ABORT_ARG1("sched_getaffinity failed", ": errno= %d", errno);
1915      while (i-- > 0)
1916        if (CPU_ISSET(i, &mask)) {
1917  #      ifdef RANDOM_ONE_CPU_CORE
1918          if (i + 1 != cpu_lowest_set)
1919            cpu_highest_set = i;
1920  #      endif
1921          cpu_lowest_set = i;
1922          cpu_set_cnt++;
1923        }
1924      if (0 == cpu_set_cnt)
1925        ABORT("sched_getaffinity returned empty mask");
1926      if (cpu_set_cnt > 1) {
1927  #      ifdef RANDOM_ONE_CPU_CORE
1928        if (cpu_lowest_set < cpu_highest_set) {
1929          /* Pseudo-randomly adjust the bit to set among valid ones. */
1930          cpu_lowest_set
1931              += (unsigned)getpid() % (cpu_highest_set - cpu_lowest_set + 1);
1932        }
1933  #      endif
1934        CPU_ZERO(&mask);
1935        /* Select just one CPU. */
1936        CPU_SET(cpu_lowest_set, &mask);
1937        if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
1938          ABORT_ARG1("sched_setaffinity failed", ": errno= %d", errno);
1939        WARN("CPU affinity mask is set to %p\n", (word)1 << cpu_lowest_set);
1940      }
1941    }
1942  #    endif /* BASE_ATOMIC_OPS_EMULATED */
1943  
1944  #    ifndef DARWIN
1945    GC_stop_init();
1946  #    endif
1947  
1948  #    ifdef PARALLEL_MARK
1949    if (GC_available_markers_m1 <= 0) {
1950      /* Disable parallel marking. */
1951      GC_parallel = FALSE;
1952      GC_COND_LOG_PRINTF("Single marker thread, turning off parallel marking\n");
1953    } else {
1954      setup_mark_lock();
1955    }
1956  #    endif
1957  
1958    /* Add the initial thread, so we can stop it. */
1959    {
1960      struct GC_stack_base sb;
1961      GC_thread me;
1962      thread_id_t self_id = thread_id_self();
1963  
1964      sb.mem_base = GC_stackbottom;
1965      GC_ASSERT(sb.mem_base != NULL);
1966  #    if defined(E2K) || defined(IA64)
1967      sb.reg_base = GC_register_stackbottom;
1968  #    endif
1969      GC_ASSERT(NULL == GC_self_thread_inner());
1970      me = GC_register_my_thread_inner(&sb, self_id);
1971  #    ifndef DONT_USE_ATEXIT
1972      GC_main_thread_id = self_id;
1973  #    endif
1974      me->flags = DETACHED;
1975    }
1976  }
1977  
1978  #  endif /* !GC_WIN32_THREADS */
1979  
1980  GC_INNER void
1981  GC_init_parallel(void)
1982  {
1983  #  ifdef THREAD_LOCAL_ALLOC
1984    GC_thread me;
1985  
1986    GC_ASSERT(GC_is_initialized);
1987    LOCK();
1988    me = GC_self_thread_inner();
1989    GC_init_thread_local(&me->tlfs);
1990    UNLOCK();
1991  #  endif
1992  #  if !defined(GC_NO_THREADS_DISCOVERY) && defined(GC_WIN32_THREADS)
1993    if (GC_win32_dll_threads) {
1994      /*
1995       * Cannot intercept thread creation.  Hence we do not know if other
1996       * threads exist.  However, client is not allowed to create other threads
1997       * before collector initialization.  Thus it is OK not to lock before
1998       * this.
1999       */
2000      set_need_to_lock();
2001    }
2002  #  endif
2003  }
2004  
2005  #  if !defined(GC_NO_PTHREAD_SIGMASK) && defined(GC_PTHREADS)
2006  #    define GC_wrap_pthread_sigmask WRAP_FUNC(pthread_sigmask)
2007  GC_API int
2008  GC_wrap_pthread_sigmask(int how, const sigset_t *set, sigset_t *oset)
2009  {
2010  #    ifdef GC_WIN32_THREADS
2011    /*
2012     * `pthreads-win32` library does not support `sigmask`.
2013     * So, nothing is required here...
2014     */
2015  #    else
2016    sigset_t fudged_set;
2017  
2018    INIT_REAL_SYMS();
2019    if (LIKELY(set != NULL) && (how == SIG_BLOCK || how == SIG_SETMASK)) {
2020      int sig_suspend = GC_get_suspend_signal();
2021  
2022      fudged_set = *set;
2023      GC_ASSERT(sig_suspend >= 0);
2024      if (sigdelset(&fudged_set, sig_suspend) != 0)
2025        ABORT("sigdelset failed");
2026      set = &fudged_set;
2027    }
2028  #    endif
2029    return REAL_FUNC(pthread_sigmask)(how, set, oset);
2030  }
2031  #    undef GC_wrap_pthread_sigmask
2032  #  endif /* !GC_NO_PTHREAD_SIGMASK */
2033  
2034  /*
2035   * Wrapper for functions that are likely to block for an appreciable
2036   * length of time.
2037   */
2038  
2039  #  ifdef E2K
2040  /*
2041   * Cannot be defined as a function because the stack-allocated buffer
2042   * (pointed to by `bs_lo`) should be preserved till completion of
2043   * `GC_do_blocking_inner` (or `GC_suspend_self_blocked`).
2044   */
2045  #    define do_blocking_enter(pTopOfStackUnset, me)                   \
2046        do {                                                            \
2047          ptr_t bs_lo;                                                  \
2048          size_t stack_size;                                            \
2049          GC_stack_context_t crtn = (me)->crtn;                         \
2050                                                                        \
2051          *(pTopOfStackUnset) = FALSE;                                  \
2052          crtn->stack_ptr = GC_approx_sp();                             \
2053          GC_ASSERT(NULL == crtn->backing_store_end);                   \
2054          GET_PROCEDURE_STACK_LOCAL(crtn->ps_ofs, &bs_lo, &stack_size); \
2055          crtn->backing_store_end = bs_lo;                              \
2056          crtn->backing_store_ptr = bs_lo + stack_size;                 \
2057          (me)->flags |= DO_BLOCKING;                                   \
2058        } while (0)
2059  
2060  #  else /* !E2K */
2061  static void
2062  do_blocking_enter(GC_bool *pTopOfStackUnset, GC_thread me)
2063  {
2064  #    if defined(SPARC) || defined(IA64)
2065    ptr_t bs_hi = GC_save_regs_in_stack();
2066    /* TODO: Registers saving already done by `GC_with_callee_saves_pushed`. */
2067  #    endif
2068    GC_stack_context_t crtn = me->crtn;
2069  
2070    GC_ASSERT(I_HOLD_READER_LOCK());
2071    GC_ASSERT((me->flags & DO_BLOCKING) == 0);
2072    *pTopOfStackUnset = FALSE;
2073  #    ifdef SPARC
2074    crtn->stack_ptr = bs_hi;
2075  #    else
2076    crtn->stack_ptr = GC_approx_sp();
2077  #    endif
2078  #    if defined(DARWIN) && !defined(DARWIN_DONT_PARSE_STACK)
2079    if (NULL == crtn->topOfStack) {
2080      /*
2081       * `GC_do_blocking_inner` is not called recursively, so `topOfStack`
2082       * should be computed now.
2083       */
2084      *pTopOfStackUnset = TRUE;
2085      crtn->topOfStack = GC_FindTopOfStack(0);
2086    }
2087  #    endif
2088  #    ifdef IA64
2089    crtn->backing_store_ptr = bs_hi;
2090  #    endif
2091    me->flags |= DO_BLOCKING;
2092    /* Save context here if we want to support precise stack marking. */
2093  }
2094  #  endif /* !E2K */
2095  
2096  static void
2097  do_blocking_leave(GC_thread me, GC_bool topOfStackUnset)
2098  {
2099    GC_ASSERT(I_HOLD_READER_LOCK());
2100    me->flags &= (unsigned char)~DO_BLOCKING;
2101  #  ifdef E2K
2102    {
2103      GC_stack_context_t crtn = me->crtn;
2104  
2105      GC_ASSERT(crtn->backing_store_end != NULL);
2106      crtn->backing_store_ptr = NULL;
2107      crtn->backing_store_end = NULL;
2108    }
2109  #  endif
2110  #  if defined(DARWIN) && !defined(DARWIN_DONT_PARSE_STACK)
2111    if (topOfStackUnset) {
2112      /* Make it unset again. */
2113      me->crtn->topOfStack = NULL;
2114    }
2115  #  else
2116    (void)topOfStackUnset;
2117  #  endif
2118  }
2119  
2120  GC_INNER void
2121  GC_do_blocking_inner(ptr_t data, void *context)
2122  {
2123    GC_thread me;
2124    GC_bool topOfStackUnset;
2125  
2126    UNUSED_ARG(context);
2127    READER_LOCK();
2128    me = GC_self_thread_inner();
2129    do_blocking_enter(&topOfStackUnset, me);
2130    READER_UNLOCK_RELEASE();
2131  
2132    ((struct blocking_data *)data)->client_data /*< result */
2133        = ((struct blocking_data *)data)
2134              ->fn(((struct blocking_data *)data)->client_data);
2135  
2136    /* This will block if the world is stopped. */
2137    READER_LOCK();
2138  
2139  #  ifdef LINT2
2140    {
2141  #    ifdef GC_ASSERTIONS
2142      GC_thread saved_me = me;
2143  #    endif
2144  
2145      /*
2146       * The pointer to the GC thread descriptor should not be changed while
2147       * the thread is registered but a static analysis tool might complain
2148       * that this pointer value (obtained in the first locked section) is
2149       * unreliable in the second locked section.
2150       */
2151      me = GC_self_thread_inner();
2152      GC_ASSERT(me == saved_me);
2153    }
2154  #  endif
2155  #  if defined(GC_ENABLE_SUSPEND_THREAD) && defined(SIGNAL_BASED_STOP_WORLD)
2156    /*
2157     * Note: this code cannot be moved into `do_blocking_leave()` otherwise
2158     * there could be a static analysis tool warning (false positive) about
2159     * unlock without a matching lock.
2160     */
2161    while (UNLIKELY((me->ext_suspend_cnt & 1) != 0)) {
2162      /* Read suspend counter (number) before unlocking. */
2163      size_t suspend_cnt = me->ext_suspend_cnt;
2164  
2165      READER_UNLOCK_RELEASE();
2166      GC_suspend_self_inner(me, suspend_cnt);
2167      READER_LOCK();
2168    }
2169  #  endif
2170    do_blocking_leave(me, topOfStackUnset);
2171    READER_UNLOCK_RELEASE();
2172  }
2173  
2174  #  if defined(GC_ENABLE_SUSPEND_THREAD) && defined(SIGNAL_BASED_STOP_WORLD)
2175  GC_INNER void
2176  GC_suspend_self_blocked(ptr_t thread_me, void *context)
2177  {
2178    GC_thread me = (GC_thread)thread_me;
2179    GC_bool topOfStackUnset;
2180  
2181    UNUSED_ARG(context);
2182  
2183    /*
2184     * The caller holds the allocator lock in the exclusive mode, thus
2185     * we require and restore it to the same mode upon return from the
2186     * function.
2187     */
2188    GC_ASSERT(I_HOLD_LOCK());
2189  
2190    do_blocking_enter(&topOfStackUnset, me);
2191    while ((me->ext_suspend_cnt & 1) != 0) {
2192      size_t suspend_cnt = me->ext_suspend_cnt;
2193  
2194      UNLOCK();
2195      GC_suspend_self_inner(me, suspend_cnt);
2196      LOCK();
2197    }
2198    do_blocking_leave(me, topOfStackUnset);
2199  }
2200  #  endif /* GC_ENABLE_SUSPEND_THREAD */
2201  
2202  GC_API void GC_CALL
2203  GC_set_stackbottom(void *gc_thread_handle, const struct GC_stack_base *sb)
2204  {
2205    GC_thread t = (GC_thread)gc_thread_handle;
2206    GC_stack_context_t crtn;
2207  
2208    GC_ASSERT(sb->mem_base != NULL);
2209    if (UNLIKELY(!GC_is_initialized)) {
2210      GC_ASSERT(NULL == t);
2211      /* Alter the stack bottom of the primordial thread. */
2212      GC_stackbottom = (char *)sb->mem_base;
2213  #  if defined(E2K) || defined(IA64)
2214      GC_register_stackbottom = (ptr_t)sb->reg_base;
2215  #  endif
2216      return;
2217    }
2218  
2219    GC_ASSERT(I_HOLD_READER_LOCK());
2220    if (NULL == t) {
2221      /* The current thread. */
2222      t = GC_self_thread_inner();
2223    }
2224    GC_ASSERT(!KNOWN_FINISHED(t));
2225    crtn = t->crtn;
2226    GC_ASSERT((t->flags & DO_BLOCKING) == 0
2227              && NULL == crtn->traced_stack_sect); /*< for now */
2228  
2229    crtn->stack_end = (ptr_t)sb->mem_base;
2230  #  ifdef E2K
2231    crtn->ps_ofs = (size_t)(GC_uintptr_t)sb->reg_base;
2232  #  elif defined(IA64)
2233    crtn->backing_store_end = (ptr_t)sb->reg_base;
2234  #  endif
2235  #  ifdef GC_WIN32_THREADS
2236    /* Reset the known minimum (hottest address in the stack). */
2237    crtn->last_stack_min = ADDR_LIMIT;
2238  #  endif
2239  }
2240  
2241  GC_API void *GC_CALL
2242  GC_get_my_stackbottom(struct GC_stack_base *sb)
2243  {
2244    GC_thread me;
2245    GC_stack_context_t crtn;
2246  
2247    READER_LOCK();
2248    me = GC_self_thread_inner();
2249    /* The thread is assumed to be registered. */
2250    crtn = me->crtn;
2251    sb->mem_base = crtn->stack_end;
2252  #  ifdef E2K
2253    /* Store the offset in the procedure stack, not address. */
2254    sb->reg_base = NUMERIC_TO_VPTR(crtn->ps_ofs);
2255  #  elif defined(IA64)
2256    sb->reg_base = crtn->backing_store_end;
2257  #  endif
2258    READER_UNLOCK();
2259    return me; /*< `gc_thread_handle` */
2260  }
2261  
2262  GC_ATTR_NOINLINE
2263  GC_API void *GC_CALL
2264  GC_call_with_gc_active(GC_fn_type fn, void *client_data)
2265  {
2266    struct GC_traced_stack_sect_s stacksect;
2267    GC_thread me;
2268    GC_stack_context_t crtn;
2269    ptr_t stack_end;
2270  #  ifdef E2K
2271    ptr_t saved_bs_ptr, saved_bs_end;
2272    size_t saved_ps_ofs;
2273  #  endif
2274  
2275    /* This will block if the world is stopped. */
2276    READER_LOCK();
2277  
2278    me = GC_self_thread_inner();
2279    crtn = me->crtn;
2280  
2281    /*
2282     * Adjust our stack bottom value (this could happen unless
2283     * `GC_get_stack_base()` was used which returned `GC_SUCCESS`).
2284     */
2285    stack_end = crtn->stack_end; /*< read of a `volatile` field */
2286    GC_ASSERT(stack_end != NULL);
2287    STORE_APPROX_SP_TO(*(volatile ptr_t *)&stacksect.saved_stack_ptr);
2288    if (HOTTER_THAN(stack_end, stacksect.saved_stack_ptr)) {
2289      crtn->stack_end = stacksect.saved_stack_ptr;
2290  #  if defined(I386) && defined(GC_WIN32_THREADS)
2291      crtn->initial_stack_base = stacksect.saved_stack_ptr;
2292  #  endif
2293    }
2294  
2295    if ((me->flags & DO_BLOCKING) == 0) {
2296      /* We are not inside `GC_do_blocking()` - do nothing more. */
2297      READER_UNLOCK_RELEASE();
2298      /* Cast `fn` to a `volatile` type to prevent its call inlining. */
2299      client_data = (*(GC_fn_type volatile *)&fn)(client_data);
2300      /* Prevent treating the above as a tail call. */
2301      GC_noop1(COVERT_DATAFLOW(ADDR(&stacksect)));
2302      return client_data; /*< result */
2303    }
2304  
2305  #  if defined(GC_ENABLE_SUSPEND_THREAD) && defined(SIGNAL_BASED_STOP_WORLD)
2306    while (UNLIKELY((me->ext_suspend_cnt & 1) != 0)) {
2307      size_t suspend_cnt = me->ext_suspend_cnt;
2308  
2309      READER_UNLOCK_RELEASE();
2310      GC_suspend_self_inner(me, suspend_cnt);
2311      READER_LOCK();
2312      GC_ASSERT(me->crtn == crtn);
2313    }
2314  #  endif
2315  
2316    /* Setup new "stack section". */
2317    stacksect.saved_stack_ptr = crtn->stack_ptr;
2318  #  ifdef E2K
2319    GC_ASSERT(crtn->backing_store_end != NULL);
2320    {
2321      unsigned long long sz_ull;
2322  
2323      GET_PROCEDURE_STACK_SIZE_INNER(&sz_ull);
2324      saved_ps_ofs = crtn->ps_ofs;
2325      GC_ASSERT(saved_ps_ofs <= (size_t)sz_ull);
2326      crtn->ps_ofs = (size_t)sz_ull;
2327    }
2328    saved_bs_end = crtn->backing_store_end;
2329    saved_bs_ptr = crtn->backing_store_ptr;
2330    crtn->backing_store_ptr = NULL;
2331    crtn->backing_store_end = NULL;
2332  #  elif defined(IA64)
2333    /* This is the same as in `GC_call_with_stack_base()`. */
2334    stacksect.backing_store_end = GC_save_regs_in_stack();
2335    /*
2336     * Unnecessarily flushes the register stack, but that probably does
2337     * not hurt.
2338     */
2339    stacksect.saved_backing_store_ptr = crtn->backing_store_ptr;
2340  #  endif
2341    stacksect.prev = crtn->traced_stack_sect;
2342    me->flags &= (unsigned char)~DO_BLOCKING;
2343    crtn->traced_stack_sect = &stacksect;
2344  
2345    READER_UNLOCK_RELEASE();
2346    client_data = (*(GC_fn_type volatile *)&fn)(client_data);
2347    GC_ASSERT((me->flags & DO_BLOCKING) == 0);
2348  
2349    /* Restore original "stack section". */
2350    READER_LOCK();
2351    GC_ASSERT(me->crtn == crtn);
2352    GC_ASSERT(crtn->traced_stack_sect == &stacksect);
2353  #  ifdef CPPCHECK
2354    GC_noop1_ptr(crtn->traced_stack_sect);
2355  #  endif
2356    crtn->traced_stack_sect = stacksect.prev;
2357  #  ifdef E2K
2358    GC_ASSERT(NULL == crtn->backing_store_end);
2359    crtn->backing_store_end = saved_bs_end;
2360    crtn->backing_store_ptr = saved_bs_ptr;
2361    crtn->ps_ofs = saved_ps_ofs;
2362  #  elif defined(IA64)
2363    crtn->backing_store_ptr = stacksect.saved_backing_store_ptr;
2364  #  endif
2365    me->flags |= DO_BLOCKING;
2366    crtn->stack_ptr = stacksect.saved_stack_ptr;
2367    READER_UNLOCK_RELEASE();
2368    return client_data; /*< result */
2369  }
2370  
2371  STATIC void
2372  GC_unregister_my_thread_inner(GC_thread me)
2373  {
2374    GC_ASSERT(I_HOLD_LOCK());
2375  #  ifdef DEBUG_THREADS
2376    GC_log_printf("Unregistering thread %p, gc_thread= %p, n_threads= %d\n",
2377                  THREAD_ID_TO_VPTR(me->id), (void *)me, GC_count_threads());
2378  #  endif
2379    GC_ASSERT(!KNOWN_FINISHED(me));
2380  #  if defined(THREAD_LOCAL_ALLOC)
2381    GC_destroy_thread_local(&me->tlfs);
2382  #  endif
2383  #  ifdef NACL
2384    GC_nacl_shutdown_gc_thread();
2385  #  endif
2386  #  ifdef GC_PTHREADS
2387  #    if defined(GC_HAVE_PTHREAD_EXIT) || !defined(GC_NO_PTHREAD_CANCEL)
2388    /*
2389     * Handle `DISABLED_GC` flag which is set by the intercepted
2390     * `pthread_cancel()` or `pthread_exit()`.
2391     */
2392    if ((me->flags & DISABLED_GC) != 0) {
2393      GC_dont_gc--;
2394    }
2395  #    endif
2396    if ((me->flags & DETACHED) == 0) {
2397      me->flags |= FINISHED;
2398    } else
2399  #  endif
2400    /* else */ {
2401      GC_delete_thread(me);
2402    }
2403  #  if defined(THREAD_LOCAL_ALLOC)
2404    /*
2405     * It is required to call `GC_remove_specific()` defined in
2406     * `specific.c` file.
2407     */
2408    GC_remove_specific(GC_thread_key);
2409  #  endif
2410  }
2411  
2412  GC_API int GC_CALL
2413  GC_unregister_my_thread(void)
2414  {
2415    GC_thread me;
2416    IF_CANCEL(int cancel_state;)
2417  
2418    /*
2419     * Client should not unregister the thread explicitly
2420     * if it is registered by `DllMain`, except for the main thread.
2421     */
2422  #  if !defined(GC_NO_THREADS_DISCOVERY) && defined(GC_WIN32_THREADS)
2423    GC_ASSERT(!GC_win32_dll_threads
2424              || THREAD_ID_EQUAL(GC_main_thread_id, thread_id_self()));
2425  #  endif
2426  
2427    LOCK();
2428    DISABLE_CANCEL(cancel_state);
2429    /*
2430     * Wait for any collection that may be marking from our stack to complete
2431     * before we remove this thread.
2432     */
2433    GC_wait_for_gc_completion(FALSE);
2434    me = GC_self_thread_inner();
2435    GC_ASSERT(THREAD_ID_EQUAL(me->id, thread_id_self()));
2436    GC_unregister_my_thread_inner(me);
2437    RESTORE_CANCEL(cancel_state);
2438    UNLOCK();
2439    return GC_SUCCESS;
2440  }
2441  
2442  #  if !defined(GC_NO_PTHREAD_CANCEL) && defined(GC_PTHREADS)
2443  /*
2444   * We should deal with the fact that apparently on Solaris and, probably,
2445   * on some Linux we cannot collect while a thread is exiting, since
2446   * signals are not handled properly.  This currently gives rise to deadlocks.
2447   * The only workaround seen is to intercept `pthread_cancel()` and
2448   * `pthread_exit()`, and disable the collections until the thread exit
2449   * handler is called.  That is ugly, because we risk growing the heap
2450   * unnecessarily.  But it seems that we do not really have an option in that
2451   * the process is not in a fully functional state while a thread is exiting.
2452   */
2453  #    define GC_wrap_pthread_cancel WRAP_FUNC(pthread_cancel)
2454  GC_API int
2455  GC_wrap_pthread_cancel(pthread_t thread)
2456  {
2457  #    ifdef CANCEL_SAFE
2458    GC_thread t;
2459  #    endif
2460  
2461    INIT_REAL_SYMS();
2462  #    ifdef CANCEL_SAFE
2463    LOCK();
2464    t = GC_lookup_by_pthread(thread);
2465    /*
2466     * We test `DISABLED_GC` because `pthread_exit` could be called at
2467     * the same time.  (If `t` is `NULL`, then `pthread_cancel()` should
2468     * return `ESRCH`.)
2469     */
2470    if (t != NULL && (t->flags & DISABLED_GC) == 0) {
2471      t->flags |= DISABLED_GC;
2472      GC_dont_gc++;
2473    }
2474    UNLOCK();
2475  #    endif
2476    return REAL_FUNC(pthread_cancel)(thread);
2477  }
2478  #    undef GC_wrap_pthread_cancel
2479  #  endif /* !GC_NO_PTHREAD_CANCEL */
2480  
2481  #  ifdef GC_HAVE_PTHREAD_EXIT
2482  #    define GC_wrap_pthread_exit WRAP_FUNC(pthread_exit)
2483  GC_API GC_PTHREAD_EXIT_ATTRIBUTE void
2484  GC_wrap_pthread_exit(void *retval)
2485  {
2486    GC_thread me;
2487  
2488    INIT_REAL_SYMS();
2489    LOCK();
2490    me = GC_self_thread_inner();
2491    /*
2492     * We test `DISABLED_GC` because someone else could call `pthread_cancel()`
2493     * at the same time.
2494     */
2495    if (me != NULL && (me->flags & DISABLED_GC) == 0) {
2496      me->flags |= DISABLED_GC;
2497      GC_dont_gc++;
2498    }
2499    UNLOCK();
2500  
2501    REAL_FUNC(pthread_exit)(retval);
2502  }
2503  #    undef GC_wrap_pthread_exit
2504  #  endif /* GC_HAVE_PTHREAD_EXIT */
2505  
2506  GC_API void GC_CALL
2507  GC_allow_register_threads(void)
2508  {
2509    /*
2510     * Check the collector is initialized and the current thread is
2511     * registered.
2512     */
2513    GC_ASSERT(GC_self_thread() != NULL);
2514  
2515    /* Initialize symbols while still single-threaded. */
2516    INIT_REAL_SYMS();
2517  
2518    GC_init_lib_bounds();
2519    GC_start_mark_threads();
2520    set_need_to_lock();
2521  }
2522  
2523  #  if defined(PTHREAD_STOP_WORLD_IMPL)            \
2524            && !defined(NO_SIGNALS_UNBLOCK_IN_MAIN) \
2525        || defined(GC_EXPLICIT_SIGNALS_UNBLOCK)
2526  GC_INNER void
2527  GC_unblock_gc_signals(void)
2528  {
2529    sigset_t set;
2530  
2531    /* This is for `pthread_sigmask`. */
2532    INIT_REAL_SYMS();
2533  
2534    sigemptyset(&set);
2535    sigaddset(&set, GC_get_suspend_signal());
2536    sigaddset(&set, GC_get_thr_restart_signal());
2537    if (REAL_FUNC(pthread_sigmask)(SIG_UNBLOCK, &set, NULL) != 0)
2538      ABORT("pthread_sigmask failed");
2539  }
2540  #  endif /* PTHREAD_STOP_WORLD_IMPL || GC_EXPLICIT_SIGNALS_UNBLOCK */
2541  
2542  GC_API int GC_CALL
2543  GC_register_my_thread(const struct GC_stack_base *sb)
2544  {
2545    GC_thread me;
2546  
2547    if (!GC_need_to_lock)
2548      ABORT("Threads explicit registering is not previously enabled");
2549  
2550    /* We lock here, since we want to wait for an ongoing GC. */
2551    LOCK();
2552    me = GC_self_thread_inner();
2553    if (LIKELY(NULL == me)) {
2554      me = GC_register_my_thread_inner(sb, thread_id_self());
2555  #  ifdef GC_PTHREADS
2556  #    ifdef CPPCHECK
2557      GC_noop1(me->flags);
2558  #    endif
2559      /*
2560       * Treat as detached, since we do not need to worry about pointer
2561       * results.
2562       */
2563      me->flags |= DETACHED;
2564  #  else
2565      (void)me;
2566  #  endif
2567    } else {
2568  #  ifdef GC_PTHREADS
2569      if (KNOWN_FINISHED(me)) {
2570        /*
2571         * This code is executed when a thread is registered from the destructor
2572         * of the client thread key.
2573         */
2574  #    ifdef NACL
2575        GC_nacl_initialize_gc_thread(me);
2576  #    endif
2577  #    ifdef DARWIN
2578        /*
2579         * Reinitialize `mach_thread` to avoid `thread_suspend()` fail
2580         * with `MACH_SEND_INVALID_DEST` error.
2581         */
2582        me->mach_thread = mach_thread_self();
2583  #    endif
2584        GC_record_stack_base(me->crtn, sb);
2585        me->flags &= (unsigned char)~FINISHED; /*< but not `DETACHED` */
2586      } else
2587  #  endif
2588      /* else */ {
2589        UNLOCK();
2590        return GC_DUPLICATE;
2591      }
2592    }
2593  
2594  #  ifdef THREAD_LOCAL_ALLOC
2595    GC_init_thread_local(&me->tlfs);
2596  #  endif
2597  #  ifdef GC_EXPLICIT_SIGNALS_UNBLOCK
2598    /*
2599     * Since this could be executed from a thread destructor,
2600     * our signals might already be blocked.
2601     */
2602    GC_unblock_gc_signals();
2603  #  endif
2604  #  if defined(GC_ENABLE_SUSPEND_THREAD) && defined(SIGNAL_BASED_STOP_WORLD)
2605    if (UNLIKELY((me->ext_suspend_cnt & 1) != 0)) {
2606      GC_with_callee_saves_pushed(GC_suspend_self_blocked, (ptr_t)me);
2607    }
2608  #  endif
2609    UNLOCK();
2610    return GC_SUCCESS;
2611  }
2612  
2613  #  if defined(GC_PTHREADS) && !defined(PLATFORM_THREADS) \
2614        && !defined(SN_TARGET_PSP2)
2615  
2616  GC_INNER_PTHRSTART void
2617  GC_thread_exit_proc(void *arg)
2618  {
2619    GC_thread me = (GC_thread)arg;
2620    IF_CANCEL(int cancel_state;)
2621  
2622  #    ifdef DEBUG_THREADS
2623    GC_log_printf("Called GC_thread_exit_proc on %p, gc_thread= %p\n",
2624                  THREAD_ID_TO_VPTR(me->id), (void *)me);
2625  #    endif
2626    LOCK();
2627    DISABLE_CANCEL(cancel_state);
2628    GC_wait_for_gc_completion(FALSE);
2629    GC_unregister_my_thread_inner(me);
2630    RESTORE_CANCEL(cancel_state);
2631    UNLOCK();
2632  }
2633  
2634  #    define GC_wrap_pthread_join WRAP_FUNC(pthread_join)
2635  GC_API int
2636  GC_wrap_pthread_join(pthread_t thread, void **retval)
2637  {
2638    int result;
2639    GC_thread t;
2640  
2641    INIT_REAL_SYMS();
2642  #    ifdef DEBUG_THREADS
2643    GC_log_printf("thread %p is joining thread %p\n",
2644                  PTHREAD_TO_VPTR(pthread_self()), PTHREAD_TO_VPTR(thread));
2645  #    endif
2646  
2647    /* After the join, thread id may have been recycled. */
2648    READER_LOCK();
2649    t = (GC_thread)COVERT_DATAFLOW_P(GC_lookup_by_pthread(thread));
2650    /*
2651     * This is guaranteed to be the intended one, since the thread id
2652     * cannot have been recycled by `pthreads`.
2653     */
2654    READER_UNLOCK();
2655  
2656    result = REAL_FUNC(pthread_join)(thread, retval);
2657  #    ifdef FREEBSD
2658    /*
2659     * On FreeBSD, the wrapped `pthread_join()` sometimes returns (what
2660     * appears to be) a spurious `EINTR` which caused the test and real code
2661     * to fail gratuitously.  Having looked at system `pthreads` library
2662     * source code, I see how such return code value may be generated:
2663     * in one path of the code, `pthread_join()` just returns the `errno`
2664     * setting of the thread being joined - this does not match the POSIX
2665     * specification or the local man pages.  Thus, I have taken the liberty
2666     * to catch this one spurious return value.
2667     */
2668    if (UNLIKELY(result == EINTR))
2669      result = 0;
2670  #    endif
2671  
2672    if (LIKELY(0 == result)) {
2673      LOCK();
2674      /*
2675       * Here the `pthread_id` may have been recycled.  Delete the thread from
2676       *`GC_threads` (unless it has been registered again from the destructor
2677       * of the client thread key).
2678       */
2679      if (KNOWN_FINISHED(t)) {
2680        GC_delete_thread(t);
2681      }
2682      UNLOCK();
2683    }
2684  
2685  #    ifdef DEBUG_THREADS
2686    GC_log_printf("thread %p join with thread %p %s\n",
2687                  PTHREAD_TO_VPTR(pthread_self()), PTHREAD_TO_VPTR(thread),
2688                  result != 0 ? "failed" : "succeeded");
2689  #    endif
2690    return result;
2691  }
2692  #    undef GC_wrap_pthread_join
2693  
2694  #    define GC_wrap_pthread_detach WRAP_FUNC(pthread_detach)
2695  GC_API int
2696  GC_wrap_pthread_detach(pthread_t thread)
2697  {
2698    int result;
2699    GC_thread t;
2700  
2701    INIT_REAL_SYMS();
2702    READER_LOCK();
2703    t = (GC_thread)COVERT_DATAFLOW_P(GC_lookup_by_pthread(thread));
2704    READER_UNLOCK();
2705    result = REAL_FUNC(pthread_detach)(thread);
2706    if (LIKELY(0 == result)) {
2707      LOCK();
2708      /* Here the `pthread_id` may have been recycled. */
2709      if (KNOWN_FINISHED(t)) {
2710        GC_delete_thread(t);
2711      } else {
2712        t->flags |= DETACHED;
2713      }
2714      UNLOCK();
2715    }
2716    return result;
2717  }
2718  #    undef GC_wrap_pthread_detach
2719  
2720  struct start_info {
2721    void *(*start_routine)(void *);
2722    void *arg;
2723    /*
2724     * Note: a value of 1 means the thread is in our thread table, but
2725     * the parent process has not noticed it yet.
2726     */
2727    sem_t registered;
2728    unsigned char flags;
2729  };
2730  
2731  GC_INNER_PTHRSTART GC_thread
2732  GC_start_rtn_prepare_thread(void *(**pstart)(void *), void **pstart_arg,
2733                              struct GC_stack_base *sb, void *arg)
2734  {
2735    struct start_info *psi = (struct start_info *)arg;
2736    thread_id_t self_id = thread_id_self();
2737    GC_thread me;
2738  
2739  #    ifdef DEBUG_THREADS
2740    GC_log_printf("Starting thread %p, sp= %p\n",
2741                  PTHREAD_TO_VPTR(pthread_self()), (void *)GC_approx_sp());
2742  #    endif
2743    /*
2744     * If a collection occurs before the thread is registered, that collection
2745     * will ignore this thread.  That is fine, since it will block trying to
2746     * acquire the allocator lock, and will not yet hold interesting pointers.
2747     */
2748    LOCK();
2749    /*
2750     * We register the thread here instead of in the parent process, so that
2751     * we do not need to hold the allocator lock during `pthread_create()`
2752     * call.
2753     */
2754    me = GC_register_my_thread_inner(sb, self_id);
2755    GC_ASSERT(me != &first_thread);
2756    me->flags = psi->flags;
2757  #    ifdef GC_WIN32_THREADS
2758    GC_win32_cache_self_pthread(self_id);
2759  #    endif
2760  #    ifdef THREAD_LOCAL_ALLOC
2761    GC_init_thread_local(&me->tlfs);
2762  #    endif
2763    UNLOCK();
2764  
2765    *pstart = psi->start_routine;
2766    *pstart_arg = psi->arg;
2767  #    if defined(DEBUG_THREADS) && defined(FUNCPTR_IS_DATAPTR)
2768    GC_log_printf("start_routine= %p\n", CAST_THRU_UINTPTR(void *, *pstart));
2769  #    endif
2770    sem_post(&psi->registered);
2771    /* This was the last action on `*psi`; OK to deallocate. */
2772    return me;
2773  }
2774  
2775  STATIC void *
2776  GC_pthread_start(void *arg)
2777  {
2778  #    ifdef INCLUDE_LINUX_THREAD_DESCR
2779    struct GC_stack_base sb;
2780  
2781  #      ifdef REDIRECT_MALLOC
2782    /*
2783     * `GC_get_stack_base()` may call `pthread_getattr_np()`, which
2784     * can unfortunately call `realloc()`, which may allocate from
2785     * an unregistered thread.  This is unpleasant, since it might
2786     * force heap growth (or, even, heap overflow).
2787     */
2788    GC_disable();
2789  #      endif
2790    if (GC_get_stack_base(&sb) != GC_SUCCESS)
2791      ABORT("Failed to get thread stack base");
2792  #      ifdef REDIRECT_MALLOC
2793    GC_enable();
2794  #      endif
2795    return GC_pthread_start_inner(&sb, arg);
2796  #    else
2797    return GC_call_with_stack_base(GC_pthread_start_inner, arg);
2798  #    endif
2799  }
2800  
2801  #    define GC_wrap_pthread_create WRAP_FUNC(pthread_create)
2802  GC_API int
2803  GC_wrap_pthread_create(pthread_t *new_thread,
2804                         GC_PTHREAD_CREATE_CONST pthread_attr_t *attr,
2805                         void *(*start_routine)(void *), void *arg)
2806  {
2807    int result;
2808    struct start_info si;
2809  
2810    GC_ASSERT(I_DONT_HOLD_LOCK());
2811    INIT_REAL_SYMS();
2812    if (UNLIKELY(!GC_is_initialized))
2813      GC_init();
2814    GC_ASSERT(GC_thr_initialized);
2815  
2816    GC_init_lib_bounds();
2817    if (sem_init(&si.registered, GC_SEM_INIT_PSHARED, 0) == -1)
2818      ABORT("sem_init failed");
2819    si.flags = 0;
2820    si.start_routine = start_routine;
2821    si.arg = arg;
2822  
2823    /*
2824     * We resist the temptation to muck with the stack size here, even if the
2825     * default is unreasonably small.  That is the client's responsibility.
2826     */
2827  #    ifdef GC_ASSERTIONS
2828    {
2829      size_t stack_size = 0;
2830      if (NULL != attr) {
2831        if (pthread_attr_getstacksize(attr, &stack_size) != 0)
2832          ABORT("pthread_attr_getstacksize failed");
2833      }
2834      if (0 == stack_size) {
2835        pthread_attr_t my_attr;
2836  
2837        if (pthread_attr_init(&my_attr) != 0)
2838          ABORT("pthread_attr_init failed");
2839        if (pthread_attr_getstacksize(&my_attr, &stack_size) != 0)
2840          ABORT("pthread_attr_getstacksize failed");
2841        (void)pthread_attr_destroy(&my_attr);
2842      }
2843      /*
2844       * On Solaris 10 and on Win32 with `winpthreads` library, with the
2845       * default `attr` initialization, `stack_size` remains 0; fudge it.
2846       */
2847      if (UNLIKELY(0 == stack_size)) {
2848  #      if !defined(SOLARIS) && !defined(GC_WIN32_PTHREADS)
2849        WARN("Failed to get stack size for assertion checking\n", 0);
2850  #      endif
2851        stack_size = 1000000;
2852      }
2853      GC_ASSERT(stack_size >= 65536);
2854      /*
2855       * Our threads may need to do some work for the GC.
2856       * Ridiculously small threads will not work, and they
2857       * probably would not work anyway.
2858       */
2859    }
2860  #    endif
2861  
2862    if (attr != NULL) {
2863      int detachstate;
2864  
2865      if (pthread_attr_getdetachstate(attr, &detachstate) != 0)
2866        ABORT("pthread_attr_getdetachstate failed");
2867      if (PTHREAD_CREATE_DETACHED == detachstate)
2868        si.flags |= DETACHED;
2869    }
2870  
2871  #    ifdef PARALLEL_MARK
2872    if (!GC_parallel && UNLIKELY(GC_available_markers_m1 > 0))
2873      GC_start_mark_threads();
2874  #    endif
2875  #    ifdef DEBUG_THREADS
2876    GC_log_printf("About to start new thread from thread %p\n",
2877                  PTHREAD_TO_VPTR(pthread_self()));
2878  #    endif
2879    set_need_to_lock();
2880    result = REAL_FUNC(pthread_create)(new_thread, attr, GC_pthread_start, &si);
2881  
2882    /*
2883     * Wait until child has been added to the thread table.
2884     * This also ensures that we hold onto the stack-allocated `si`
2885     * until the child is done with it.
2886     */
2887    if (LIKELY(0 == result)) {
2888      IF_CANCEL(int cancel_state;)
2889  
2890      /* `pthread_create()` is not a cancellation point. */
2891      DISABLE_CANCEL(cancel_state);
2892  
2893      while (sem_wait(&si.registered) == -1) {
2894  #    ifdef HAIKU
2895        /* To workaround some bug in Haiku semaphores. */
2896        if (EACCES == errno)
2897          continue;
2898  #    endif
2899        if (errno != EINTR)
2900          ABORT("sem_wait failed");
2901      }
2902      RESTORE_CANCEL(cancel_state);
2903    }
2904    sem_destroy(&si.registered);
2905    return result;
2906  }
2907  #    undef GC_wrap_pthread_create
2908  
2909  #  endif /* GC_PTHREADS && !PLATFORM_THREADS && !SN_TARGET_PSP2 */
2910  
2911  #  if ((defined(GC_PTHREADS_PARAMARK) || defined(USE_PTHREAD_LOCKS)) \
2912         && !defined(NO_PTHREAD_TRYLOCK))                              \
2913        || defined(USE_SPIN_LOCK)
2914  /*
2915   * Spend a few cycles in a way that cannot introduce contention with
2916   * other threads.
2917   */
2918  #    define GC_PAUSE_SPIN_CYCLES 10
2919  STATIC void
2920  GC_pause(void)
2921  {
2922    int i;
2923  
2924    for (i = 0; i < GC_PAUSE_SPIN_CYCLES; ++i) {
2925      /* Something that is unlikely to be optimized away. */
2926  #    if defined(AO_HAVE_compiler_barrier) && !defined(BASE_ATOMIC_OPS_EMULATED)
2927      AO_compiler_barrier();
2928  #    else
2929      GC_noop1(i);
2930  #    endif
2931    }
2932  }
2933  #  endif /* USE_SPIN_LOCK || !NO_PTHREAD_TRYLOCK */
2934  
2935  #  ifndef SPIN_MAX
2936  /* Maximum number of calls to `GC_pause()` before give up. */
2937  #    define SPIN_MAX 128
2938  #  endif
2939  
2940  #  if (!defined(USE_SPIN_LOCK) && !defined(NO_PTHREAD_TRYLOCK) \
2941         && defined(USE_PTHREAD_LOCKS))                          \
2942        || defined(GC_PTHREADS_PARAMARK)
2943  /*
2944   * If we do not want to use the below spinlock implementation, either
2945   * because we do not have a `GC_test_and_set` implementation, or because
2946   * we do not want to risk sleeping, we can still try spinning
2947   * on `pthread_mutex_trylock` for a while.  This appears to be very
2948   * beneficial in many cases.
2949   * I suspect that under high contention this is nearly always better
2950   * than the spin lock.  But it is a bit slower on a uniprocessor.
2951   * Hence we still default to the spin lock.  This is also used to
2952   * acquire the mark lock for the parallel marker.
2953   *
2954   * Here we use a strict exponential back-off scheme.  I do not know
2955   * whether that is better or worse than the above.  We eventually
2956   * yield by calling `pthread_mutex_lock()`; it never makes sense to
2957   * explicitly sleep.
2958   */
2959  
2960  #    ifdef LOCK_STATS
2961  /* Note that `LOCK_STATS` requires `AO_HAVE_test_and_set`. */
2962  volatile AO_t GC_spin_count = 0;
2963  volatile AO_t GC_block_count = 0;
2964  volatile AO_t GC_unlocked_count = 0;
2965  #    endif
2966  
2967  STATIC void
2968  GC_generic_lock(pthread_mutex_t *lock)
2969  {
2970  #    ifndef NO_PTHREAD_TRYLOCK
2971    unsigned pause_length = 1;
2972    unsigned i;
2973  
2974    if (LIKELY(0 == pthread_mutex_trylock(lock))) {
2975  #      ifdef LOCK_STATS
2976      (void)AO_fetch_and_add1(&GC_unlocked_count);
2977  #      endif
2978      return;
2979    }
2980    for (; pause_length <= (unsigned)SPIN_MAX; pause_length <<= 1) {
2981      for (i = 0; i < pause_length; ++i) {
2982        GC_pause();
2983      }
2984      switch (pthread_mutex_trylock(lock)) {
2985      case 0:
2986  #      ifdef LOCK_STATS
2987        (void)AO_fetch_and_add1(&GC_spin_count);
2988  #      endif
2989        return;
2990      case EBUSY:
2991        break;
2992      default:
2993        ABORT("Unexpected error from pthread_mutex_trylock");
2994      }
2995    }
2996  #    endif /* !NO_PTHREAD_TRYLOCK */
2997  #    ifdef LOCK_STATS
2998    (void)AO_fetch_and_add1(&GC_block_count);
2999  #    endif
3000    pthread_mutex_lock(lock);
3001  }
3002  #  endif /* !USE_SPIN_LOCK || ... */
3003  
3004  #  if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS)
3005  GC_INNER volatile unsigned char GC_collecting = FALSE;
3006  
3007  #    if defined(AO_HAVE_char_load) && !defined(BASE_ATOMIC_OPS_EMULATED)
3008  #      define is_collecting() ((GC_bool)AO_char_load(&GC_collecting))
3009  #    else
3010  /*
3011   * `GC_collecting` is a hint, a potential data race between `GC_lock()`
3012   * and `ENTER_GC()`/`EXIT_GC()` is OK to ignore.
3013   */
3014  #      define is_collecting() ((GC_bool)GC_collecting)
3015  #    endif
3016  #  endif /* GC_PTHREADS && !GC_WIN32_THREADS */
3017  
3018  #  ifdef GC_ASSERTIONS
3019  GC_INNER unsigned long GC_lock_holder = NO_THREAD;
3020  #  endif
3021  
3022  #  if defined(USE_SPIN_LOCK)
3023  /*
3024   * Reasonably fast spin locks.  Basically the same implementation as
3025   * in STL `alloc.h` file.  This is not really the right way to do this
3026   * but until the POSIX scheduling mess gets straightened out...
3027   */
3028  
3029  /* Spin cycles if we suspect we are running on an uniprocessor. */
3030  #    define low_spin_max 30
3031  
3032  /* Spin cycles for a multiprocessor. */
3033  #    define high_spin_max SPIN_MAX
3034  
3035  static volatile AO_t spin_max = low_spin_max;
3036  
3037  /*
3038   * A potential data race between threads invoking `GC_lock` which reads
3039   * and updates `spin_max` and `last_spins` could be ignored because these
3040   * variables are hints only.
3041   */
3042  static volatile AO_t last_spins = 0;
3043  
3044  GC_INNER void
3045  GC_lock(void)
3046  {
3047    AO_t my_spin_max, my_last_spins_half;
3048    size_t i;
3049  
3050    if (LIKELY(AO_test_and_set_acquire(&GC_allocate_lock) == AO_TS_CLEAR))
3051      return;
3052  
3053    my_spin_max = AO_load(&spin_max);
3054    my_last_spins_half = AO_load(&last_spins) / 2;
3055    for (i = 0; i < my_spin_max; i++) {
3056      if (is_collecting() || GC_nprocs == 1)
3057        goto yield;
3058      if (i < my_last_spins_half) {
3059        GC_pause();
3060        continue;
3061      }
3062      if (AO_test_and_set_acquire(&GC_allocate_lock) == AO_TS_CLEAR) {
3063        /*
3064         * Got it, spinning worked!  Thus we are probably not being
3065         * scheduled against the other process with which we were
3066         * contending.  Thus it makes sense to spin longer the next time.
3067         */
3068        AO_store(&last_spins, i);
3069        AO_store(&spin_max, high_spin_max);
3070        return;
3071      }
3072    }
3073    /* We are probably being scheduled against the other process.  Sleep. */
3074    AO_store(&spin_max, low_spin_max);
3075  yield:
3076    for (i = 0;; ++i) {
3077      if (AO_test_and_set_acquire(&GC_allocate_lock) == AO_TS_CLEAR) {
3078        return;
3079      }
3080  
3081      /*
3082       * Under Linux, very short sleeps tend to wait until the current time
3083       * quantum expires.  On old Linux kernels, `nanosleep()` (<= 2 ms) just
3084       * spins.  (Under Linux 2.4, this happens only for real-time processes.)
3085       * We want to minimize both behaviors here.
3086       */
3087  #    define SLEEP_THRESHOLD 12
3088  
3089      if (i < SLEEP_THRESHOLD) {
3090        sched_yield();
3091      } else {
3092        struct timespec ts;
3093  
3094        /*
3095         * Do not wait for more than about 15 ms, even under extreme
3096         * contention.
3097         */
3098        if (i > 24)
3099          i = 24;
3100  
3101        ts.tv_sec = 0;
3102        ts.tv_nsec = (unsigned32)1 << i;
3103        nanosleep(&ts, 0);
3104      }
3105    }
3106  }
3107  
3108  #  elif defined(USE_PTHREAD_LOCKS)
3109  #    ifdef USE_RWLOCK
3110  GC_INNER pthread_rwlock_t GC_allocate_ml = PTHREAD_RWLOCK_INITIALIZER;
3111  #    else
3112  GC_INNER pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
3113  #    endif
3114  
3115  #    ifndef NO_PTHREAD_TRYLOCK
3116  GC_INNER void
3117  GC_lock(void)
3118  {
3119    if (1 == GC_nprocs || is_collecting()) {
3120      pthread_mutex_lock(&GC_allocate_ml);
3121    } else {
3122      GC_generic_lock(&GC_allocate_ml);
3123    }
3124  }
3125  #    elif defined(GC_ASSERTIONS)
3126  GC_INNER void
3127  GC_lock(void)
3128  {
3129  #      ifdef USE_RWLOCK
3130    (void)pthread_rwlock_wrlock(&GC_allocate_ml); /*< exclusive */
3131  #      else
3132    pthread_mutex_lock(&GC_allocate_ml);
3133  #      endif
3134  }
3135  #    endif /* NO_PTHREAD_TRYLOCK && GC_ASSERTIONS */
3136  
3137  #  endif /* !USE_SPIN_LOCK && USE_PTHREAD_LOCKS */
3138  
3139  #  ifdef GC_PTHREADS_PARAMARK
3140  
3141  #    if defined(GC_ASSERTIONS) && defined(GC_WIN32_THREADS) \
3142          && !defined(USE_PTHREAD_LOCKS)
3143  /* Note: result is not guaranteed to be unique. */
3144  #      define NUMERIC_THREAD_ID(id) ((unsigned long)ADDR(PTHREAD_TO_VPTR(id)))
3145  #    endif
3146  
3147  #    ifdef GC_ASSERTIONS
3148  #      define SET_MARK_LOCK_HOLDER \
3149          (void)(GC_mark_lock_holder = NUMERIC_THREAD_ID(pthread_self()))
3150  #      define UNSET_MARK_LOCK_HOLDER                       \
3151          do {                                               \
3152            GC_ASSERT(GC_mark_lock_holder                    \
3153                      == NUMERIC_THREAD_ID(pthread_self())); \
3154            GC_mark_lock_holder = NO_THREAD;                 \
3155          } while (0)
3156  #    else
3157  #      define SET_MARK_LOCK_HOLDER (void)0
3158  #      define UNSET_MARK_LOCK_HOLDER (void)0
3159  #    endif /* !GC_ASSERTIONS */
3160  
3161  static pthread_cond_t builder_cv = PTHREAD_COND_INITIALIZER;
3162  
3163  #    ifndef GC_WIN32_THREADS
3164  static void
3165  setup_mark_lock(void)
3166  {
3167  #      ifdef GLIBC_2_19_TSX_BUG
3168    pthread_mutexattr_t mattr;
3169    int glibc_minor = -1;
3170    int glibc_major = GC_parse_version(&glibc_minor, gnu_get_libc_version());
3171  
3172    if (glibc_major > 2 || (glibc_major == 2 && glibc_minor >= 19)) {
3173      /* TODO: Disable this workaround for `glibc` with fixed TSX. */
3174      /* This disables lock elision to workaround a bug in `glibc` 2.19+. */
3175      if (pthread_mutexattr_init(&mattr) != 0)
3176        ABORT("pthread_mutexattr_init failed");
3177      if (pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_NORMAL) != 0)
3178        ABORT("pthread_mutexattr_settype failed");
3179      if (pthread_mutex_init(&mark_mutex, &mattr) != 0)
3180        ABORT("pthread_mutex_init failed");
3181      (void)pthread_mutexattr_destroy(&mattr);
3182    }
3183  #      endif
3184  }
3185  #    endif /* !GC_WIN32_THREADS */
3186  
3187  GC_INNER void
3188  GC_acquire_mark_lock(void)
3189  {
3190  #    if defined(NUMERIC_THREAD_ID_UNIQUE) && !defined(THREAD_SANITIZER)
3191    GC_ASSERT(GC_mark_lock_holder != NUMERIC_THREAD_ID(pthread_self()));
3192  #    endif
3193    GC_generic_lock(&mark_mutex);
3194    SET_MARK_LOCK_HOLDER;
3195  }
3196  
3197  GC_INNER void
3198  GC_release_mark_lock(void)
3199  {
3200    UNSET_MARK_LOCK_HOLDER;
3201    if (pthread_mutex_unlock(&mark_mutex) != 0)
3202      ABORT("pthread_mutex_unlock failed");
3203  }
3204  
3205  /*
3206   * Collector must wait for free-list builders for 2 reasons:
3207   *   - Mark bits may still be getting examined without lock;
3208   *   - Partial free lists referenced only by locals may not be scanned
3209   *     correctly, e.g. if they contain "pointer-free" objects, since the
3210   *     free-list link may be ignored.
3211   */
3212  STATIC void
3213  GC_wait_builder(void)
3214  {
3215    ASSERT_CANCEL_DISABLED();
3216    UNSET_MARK_LOCK_HOLDER;
3217    if (pthread_cond_wait(&builder_cv, &mark_mutex) != 0)
3218      ABORT("pthread_cond_wait failed");
3219    GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
3220    SET_MARK_LOCK_HOLDER;
3221  }
3222  
3223  GC_INNER void
3224  GC_wait_for_reclaim(void)
3225  {
3226    GC_acquire_mark_lock();
3227    while (GC_fl_builder_count > 0) {
3228      GC_wait_builder();
3229    }
3230    GC_release_mark_lock();
3231  }
3232  
3233  #    if defined(CAN_HANDLE_FORK) && defined(THREAD_SANITIZER)
3234  /*
3235   * Identical to `GC_wait_for_reclaim()` but with the `no_sanitize`
3236   * attribute as a workaround for TSan which does not notice that the
3237   * allocator lock is acquired in `fork_prepare_proc()`.
3238   */
3239  GC_ATTR_NO_SANITIZE_THREAD
3240  static void
3241  wait_for_reclaim_atfork(void)
3242  {
3243    GC_acquire_mark_lock();
3244    while (GC_fl_builder_count > 0)
3245      GC_wait_builder();
3246    GC_release_mark_lock();
3247  }
3248  #    endif /* CAN_HANDLE_FORK && THREAD_SANITIZER */
3249  
3250  GC_INNER void
3251  GC_notify_all_builder(void)
3252  {
3253    GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self()));
3254    if (pthread_cond_broadcast(&builder_cv) != 0)
3255      ABORT("pthread_cond_broadcast failed");
3256  }
3257  
3258  GC_INNER void
3259  GC_wait_marker(void)
3260  {
3261    ASSERT_CANCEL_DISABLED();
3262    GC_ASSERT(GC_parallel);
3263    UNSET_MARK_LOCK_HOLDER;
3264    if (pthread_cond_wait(&mark_cv, &mark_mutex) != 0)
3265      ABORT("pthread_cond_wait failed");
3266    GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
3267    SET_MARK_LOCK_HOLDER;
3268  }
3269  
3270  GC_INNER void
3271  GC_notify_all_marker(void)
3272  {
3273    GC_ASSERT(GC_parallel);
3274    if (pthread_cond_broadcast(&mark_cv) != 0)
3275      ABORT("pthread_cond_broadcast failed");
3276  }
3277  
3278  #  endif /* GC_PTHREADS_PARAMARK */
3279  
3280  GC_INNER GC_on_thread_event_proc GC_on_thread_event = 0;
3281  
3282  GC_API void GC_CALL
3283  GC_set_on_thread_event(GC_on_thread_event_proc fn)
3284  {
3285    /* Note: `fn` may be 0 (means no event notifier). */
3286    LOCK();
3287    GC_on_thread_event = fn;
3288    UNLOCK();
3289  }
3290  
3291  GC_API GC_on_thread_event_proc GC_CALL
3292  GC_get_on_thread_event(void)
3293  {
3294    GC_on_thread_event_proc fn;
3295  
3296    READER_LOCK();
3297    fn = GC_on_thread_event;
3298    READER_UNLOCK();
3299    return fn;
3300  }
3301  
3302  #  ifdef STACKPTR_CORRECTOR_AVAILABLE
3303  GC_INNER GC_sp_corrector_proc GC_sp_corrector = 0;
3304  #  endif
3305  
3306  GC_API void GC_CALL
3307  GC_set_sp_corrector(GC_sp_corrector_proc fn)
3308  {
3309  #  ifdef STACKPTR_CORRECTOR_AVAILABLE
3310    LOCK();
3311    GC_sp_corrector = fn;
3312    UNLOCK();
3313  #  else
3314    UNUSED_ARG(fn);
3315  #  endif
3316  }
3317  
3318  GC_API GC_sp_corrector_proc GC_CALL
3319  GC_get_sp_corrector(void)
3320  {
3321  #  ifdef STACKPTR_CORRECTOR_AVAILABLE
3322    GC_sp_corrector_proc fn;
3323  
3324    READER_LOCK();
3325    fn = GC_sp_corrector;
3326    READER_UNLOCK();
3327    return fn;
3328  #  else
3329    return 0; /*< unsupported */
3330  #  endif
3331  }
3332  
3333  #  ifdef PTHREAD_REGISTER_CANCEL_WEAK_STUBS
3334  /* Workaround "undefined reference" linkage errors on some targets. */
3335  EXTERN_C_BEGIN
3336  extern void __pthread_register_cancel(void) __attribute__((__weak__));
3337  extern void __pthread_unregister_cancel(void) __attribute__((__weak__));
3338  EXTERN_C_END
3339  
3340  void
3341  __pthread_register_cancel(void)
3342  {
3343  }
3344  void
3345  __pthread_unregister_cancel(void)
3346  {
3347  }
3348  #  endif
3349  
3350  #  undef do_blocking_enter
3351  
3352  #endif /* THREADS */
3353