win32_threads.c raw

   1  /*
   2   * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
   3   * Copyright (c) 1996 by Silicon Graphics.  All rights reserved.
   4   * Copyright (c) 1998 by Fergus Henderson.  All rights reserved.
   5   * Copyright (c) 2000-2008 by Hewlett-Packard Development Company.
   6   * All rights reserved.
   7   * Copyright (c) 2008-2022 Ivan Maidanski
   8   *
   9   * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
  10   * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
  11   *
  12   * Permission is hereby granted to use or copy this program
  13   * for any purpose, provided the above notices are retained on all copies.
  14   * Permission to modify the code and to distribute modified code is granted,
  15   * provided the above notices are retained, and a notice that the code was
  16   * modified is included with the above copyright notice.
  17   */
  18  
  19  #include "private/pthread_support.h"
  20  
  21  #if defined(GC_WIN32_THREADS)
  22  
  23  /* The allocator lock definition. */
  24  #  ifndef USE_PTHREAD_LOCKS
  25  #    ifdef USE_RWLOCK
  26  GC_INNER SRWLOCK GC_allocate_ml;
  27  #    else
  28  GC_INNER CRITICAL_SECTION GC_allocate_ml;
  29  #    endif
  30  #  endif /* !USE_PTHREAD_LOCKS */
  31  
  32  #  undef CreateThread
  33  #  undef ExitThread
  34  #  undef _beginthreadex
  35  #  undef _endthreadex
  36  
  37  #  if !defined(GC_PTHREADS) && !defined(MSWINCE)
  38  #    include <errno.h>
  39  #    include <process.h> /*< for `_beginthreadex`, `_endthreadex` */
  40  #  endif
  41  
  42  static ptr_t copy_ptr_regs(word *regs, const CONTEXT *pcontext);
  43  
  44  #  ifndef GC_NO_THREADS_DISCOVERY
  45  /*
  46   * This code operates in two distinct modes, depending on the setting
  47   * of `GC_win32_dll_threads`.  If `GC_win32_dll_threads`, then all
  48   * threads in the process are implicitly registered with the collector
  49   * by `DllMain()`.  No explicit registration is required, and attempts
  50   * at explicit registration are ignored.  This mode is very different
  51   * from the POSIX operation of the collector.  In this mode access to
  52   * the thread table is lock-free.  Hence there is a static limit on the
  53   * number of threads.
  54   */
  55  
  56  /*
  57   * `GC_DISCOVER_TASK_THREADS` should be used if `DllMain`-based thread
  58   * registration is required but it is impossible to call
  59   * `GC_use_threads_discovery()` before other GC routines.
  60   */
  61  
  62  #    ifndef GC_DISCOVER_TASK_THREADS
  63  GC_INNER GC_bool GC_win32_dll_threads = FALSE;
  64  #    endif
  65  #  else
  66  /*
  67   * If not `GC_win32_dll_threads` (or the collector is built without `GC_DLL`
  68   * macro defined), things operate in a way that is very similar to POSIX
  69   * platforms, and new threads must be registered with the collector,
  70   * e.g. by using preprocessor-based interception of the thread primitives.
  71   * In this case, we use a real data structure for the thread table.
  72   * Note that there is no equivalent of linker-based call interception,
  73   * since we do not have ELF-like facilities.  The Windows analog appears
  74   * to be "API hooking", which really seems to be a standard way to do minor
  75   * binary rewriting (?).  I would prefer not to have the basic collector
  76   * rely on such facilities, but an optional package that intercepts thread
  77   * calls this way would probably be nice.
  78   */
  79  #    undef MAX_THREADS
  80  /* `dll_thread_table[]` is always empty. */
  81  #    define MAX_THREADS 1
  82  #  endif /* GC_NO_THREADS_DISCOVERY */
  83  
  84  /*
  85   * We have two variants of the thread table.  Which one we use depends
  86   * on whether `GC_win32_dll_threads` is set.  Note that before the
  87   * initialization, we do not add any entries to either table, even
  88   * if `DllMain()` is called.  The main thread will be added on the
  89   * collector initialization.
  90   */
  91  
  92  GC_API void GC_CALL
  93  GC_use_threads_discovery(void)
  94  {
  95  #  ifdef GC_NO_THREADS_DISCOVERY
  96    /*
  97     * `GC_use_threads_discovery()` is currently incompatible with
  98     * `pthreads` and WinCE.  It might be possible to get `DllMain`-based
  99     * thread registration to work with Cygwin, but if you try it then
 100     * you are on your own.
 101     */
 102    ABORT("GC DllMain-based thread registration unsupported");
 103  #  else
 104    /* Turn on `GC_win32_dll_threads`. */
 105    GC_ASSERT(!GC_is_initialized);
 106    /*
 107     * Note that `GC_use_threads_discovery()` is expected to be called by
 108     * the client application (not from `DllMain()`) at start-up.
 109     */
 110  #    ifndef GC_DISCOVER_TASK_THREADS
 111    GC_win32_dll_threads = TRUE;
 112  #    endif
 113    GC_init();
 114  #    ifdef CPPCHECK
 115    GC_noop1((word)(GC_funcptr_uint)(&GC_DllMain));
 116  #    endif
 117  #  endif
 118  }
 119  
 120  #  ifndef GC_NO_THREADS_DISCOVERY
 121  /*
 122   * We track thread attachments while the world is supposed to be stopped.
 123   * Unfortunately, we cannot stop them from starting, since blocking in
 124   * `DllMain` seems to cause the world to deadlock.  Thus, we have to
 125   * recover if we notice this in the middle of marking.
 126   */
 127  STATIC volatile AO_t GC_attached_thread = FALSE;
 128  
 129  /*
 130   * We assume that `volatile` implies memory ordering, at least among
 131   * `volatile` variables.  This code should consistently use `libatomic_ops`
 132   * package or gcc atomic intrinsics.
 133   */
 134  STATIC volatile GC_bool GC_please_stop = FALSE;
 135  #  elif defined(GC_ASSERTIONS)
 136  STATIC GC_bool GC_please_stop = FALSE;
 137  #  endif /* GC_NO_THREADS_DISCOVERY && GC_ASSERTIONS */
 138  
 139  #  if defined(WRAP_MARK_SOME) && !defined(GC_PTHREADS)
 140  GC_INNER GC_bool
 141  GC_started_thread_while_stopped(void)
 142  {
 143  #    ifndef GC_NO_THREADS_DISCOVERY
 144    if (GC_win32_dll_threads) {
 145  #      ifdef AO_HAVE_compare_and_swap_release
 146      if (AO_compare_and_swap_release(&GC_attached_thread, TRUE,
 147                                      FALSE /* stored */))
 148        return TRUE;
 149  #      else
 150      /* Prior heap reads need to complete earlier. */
 151      AO_nop_full();
 152  
 153      if (AO_load(&GC_attached_thread)) {
 154        AO_store(&GC_attached_thread, FALSE);
 155        return TRUE;
 156      }
 157  #      endif
 158    }
 159  #    endif
 160    return FALSE;
 161  }
 162  #  endif /* WRAP_MARK_SOME */
 163  
 164  /*
 165   * The thread table used if `GC_win32_dll_threads`.  This is a fixed-size
 166   * array.  Since we use runtime conditionals, both variants are always
 167   * defined.
 168   */
 169  #  ifndef MAX_THREADS
 170  #    define MAX_THREADS 512
 171  #  endif
 172  
 173  /*
 174   * Things may get quite slow for large numbers of threads,
 175   * since we look them up with the sequential search.
 176   */
 177  static volatile struct GC_Thread_Rep dll_thread_table[MAX_THREADS];
 178  #  ifndef GC_NO_THREADS_DISCOVERY
 179  static struct GC_StackContext_Rep dll_crtn_table[MAX_THREADS];
 180  #  endif
 181  
 182  /* Largest index in `dll_thread_table` that was ever used. */
 183  STATIC volatile LONG GC_max_thread_index = 0;
 184  
 185  GC_INNER GC_thread
 186  GC_register_my_thread_inner(const struct GC_stack_base *sb,
 187                              thread_id_t self_id)
 188  {
 189    GC_thread me;
 190  
 191  #  ifdef GC_NO_THREADS_DISCOVERY
 192    GC_ASSERT(I_HOLD_LOCK());
 193  #  endif
 194    /*
 195     * The following should be a no-op according to the Win32 documentation.
 196     * There is an empirical evidence that it is not.
 197     */
 198  #  if defined(MPROTECT_VDB) && !defined(CYGWIN32)
 199    if (GC_auto_incremental
 200  #    ifdef GWW_VDB
 201        && !GC_gww_dirty_init()
 202  #    endif
 203    )
 204      GC_set_write_fault_handler();
 205  #  endif
 206  
 207  #  ifndef GC_NO_THREADS_DISCOVERY
 208    if (GC_win32_dll_threads) {
 209      int i;
 210      /*
 211       * It appears to be unsafe to acquire a lock here, since this code
 212       * is apparently not preemptible on some systems.  (This is based on
 213       * complaints, not on Microsoft's official documentation, which says
 214       * this should perform "only simple initialization tasks".)
 215       * Hence we make it does with a nonblocking synchronization.
 216       * It has been claimed that `DllMain` is really only executed with
 217       * a particular system lock held, and thus careful use of locking
 218       * around code that does not call back into the system libraries might
 219       * be OK.  (But this has not been tested across all Win32 versions.)
 220       */
 221      for (i = 0;
 222           InterlockedExchange(&dll_thread_table[i].tm.long_in_use, 1) != 0;
 223           i++) {
 224        /*
 225         * Compare-and-swap would make this cleaner, but that is not
 226         * supported before Windows 98 and NT 4.0.  In Windows 2000,
 227         * `InterlockedExchange` is supposed to be replaced by
 228         * `InterlockedExchangePointer`, but that is not really what
 229         * is needed here.
 230         */
 231  
 232        /*
 233         * FIXME: We should eventually declare Windows 95 dead and use
 234         * AO primitives here.
 235         */
 236        if (i == MAX_THREADS - 1)
 237          ABORT("Too many threads");
 238      }
 239      /*
 240       * Update `GC_max_thread_index` if necessary.  The following is safe,
 241       * and unlike `CompareExchange`-based solutions seems to work on all
 242       * Windows 95 and later platforms.  Note that `GC_max_thread_index`
 243       * may be temporarily out of bounds, so readers have to compensate.
 244       */
 245      while (i > GC_max_thread_index) {
 246        InterlockedIncrement((LONG *)&GC_max_thread_index);
 247        /* Cast away `volatile` for older versions of Win32 headers. */
 248      }
 249      if (UNLIKELY(GC_max_thread_index >= MAX_THREADS)) {
 250        /*
 251         * We overshot due to simultaneous increments.
 252         * Setting it to `MAX_THREADS - 1` is always safe.
 253         */
 254        GC_max_thread_index = MAX_THREADS - 1;
 255      }
 256      me = (GC_thread)(dll_thread_table + i);
 257      me->crtn = &dll_crtn_table[i];
 258    } else
 259  #  endif
 260    /* else */ {
 261      /* Not using `DllMain`. */
 262      me = GC_new_thread(self_id);
 263    }
 264  #  ifdef GC_PTHREADS
 265    me->pthread_id = pthread_self();
 266  #  endif
 267  #  ifndef MSWINCE
 268    /* `GetCurrentThread()` returns a pseudohandle (a constant value). */
 269    if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
 270                         GetCurrentProcess(), (HANDLE *)&me->handle,
 271                         0 /* `dwDesiredAccess` */, FALSE /* `bInheritHandle` */,
 272                         DUPLICATE_SAME_ACCESS)) {
 273      ABORT_ARG1("DuplicateHandle failed", ": errcode= 0x%X",
 274                 (unsigned)GetLastError());
 275    }
 276  #  endif
 277  #  if defined(WOW64_THREAD_CONTEXT_WORKAROUND) && defined(MSWINRT_FLAVOR)
 278    /*
 279     * Lookup TIB value via a call to `NtCurrentTeb()` on thread registration
 280     * rather than calling `GetThreadSelectorEntry()` which is not available
 281     * on UWP.
 282     */
 283    me->tib = (GC_NT_TIB *)NtCurrentTeb();
 284  #  endif
 285    me->crtn->last_stack_min = ADDR_LIMIT;
 286    GC_record_stack_base(me->crtn, sb);
 287    /*
 288     * Up until this point, `GC_push_all_stacks` considers this thread
 289     * invalid.  And, up until this point, the entry is viewed by
 290     * `GC_win32_dll_lookup_thread` as reserved but invalid.
 291     */
 292    ((volatile struct GC_Thread_Rep *)me)->id = self_id;
 293  #  ifndef GC_NO_THREADS_DISCOVERY
 294    if (GC_win32_dll_threads) {
 295      if (GC_please_stop) {
 296        AO_store(&GC_attached_thread, TRUE);
 297        AO_nop_full(); /*< later updates must become visible after this */
 298      }
 299      /*
 300       * We would like to wait here, but cannot, since waiting in `DllMain()`
 301       * provokes deadlocks.  Thus we force marking to be restarted instead.
 302       */
 303    } else
 304  #  endif
 305    /* else */ {
 306      /*
 307       * `GC_please_stop` is `FALSE`, otherwise both we and the thread-stopping
 308       * code would be holding the allocator lock.
 309       */
 310      GC_ASSERT(!GC_please_stop);
 311    }
 312    return me;
 313  }
 314  
 315  /*
 316   * `GC_max_thread_index` may temporarily be larger than `MAX_THREADS`.
 317   * To avoid subscript errors, we check it on access.
 318   */
 319  GC_INLINE LONG
 320  GC_get_max_thread_index(void)
 321  {
 322    LONG my_max = GC_max_thread_index;
 323    if (UNLIKELY(my_max >= MAX_THREADS))
 324      return MAX_THREADS - 1;
 325    return my_max;
 326  }
 327  
 328  #  ifndef GC_NO_THREADS_DISCOVERY
 329  GC_INNER GC_thread
 330  GC_win32_dll_lookup_thread(thread_id_t id)
 331  {
 332    int i;
 333    LONG my_max = GC_get_max_thread_index();
 334  
 335    GC_ASSERT(GC_win32_dll_threads);
 336    for (i = 0; i <= my_max; i++) {
 337      if (AO_load_acquire(&dll_thread_table[i].tm.in_use)
 338          && dll_thread_table[i].id == id) {
 339        /*
 340         * Must still be in use, since nobody else can store our
 341         * thread `id`.
 342         */
 343        break;
 344      }
 345    }
 346    return i <= my_max ? (GC_thread)(dll_thread_table + i) : NULL;
 347  }
 348  #  endif /* !GC_NO_THREADS_DISCOVERY */
 349  
 350  #  ifdef GC_PTHREADS
 351  /*
 352   * A quick-and-dirty cache of the mapping between `pthread_t` and
 353   * Win32 thread id.
 354   */
 355  #    define PTHREAD_MAP_SIZE 512
 356  thread_id_t GC_pthread_map_cache[PTHREAD_MAP_SIZE] = { 0 };
 357  /* It appears `pthread_t` is really a pointer type... */
 358  #    define PTHREAD_MAP_INDEX(pthread_id) \
 359        ((NUMERIC_THREAD_ID(pthread_id) >> 5) % PTHREAD_MAP_SIZE)
 360  #    define SET_PTHREAD_MAP_CACHE(pthread_id, win32_id) \
 361        (void)(GC_pthread_map_cache[PTHREAD_MAP_INDEX(pthread_id)] = (win32_id))
 362  #    define GET_PTHREAD_MAP_CACHE(pthread_id) \
 363        GC_pthread_map_cache[PTHREAD_MAP_INDEX(pthread_id)]
 364  
 365  GC_INNER void
 366  GC_win32_cache_self_pthread(thread_id_t self_id)
 367  {
 368    pthread_t self = pthread_self();
 369  
 370    GC_ASSERT(I_HOLD_LOCK());
 371    SET_PTHREAD_MAP_CACHE(self, self_id);
 372  }
 373  
 374  GC_INNER GC_thread
 375  GC_lookup_by_pthread(pthread_t thread)
 376  {
 377    /*
 378     * TODO: Search in `dll_thread_table` instead when `DllMain`-based thread
 379     * registration is made compatible with `pthreads` (and turned on).
 380     */
 381    thread_id_t id;
 382    GC_thread p;
 383    int hv;
 384  
 385    GC_ASSERT(I_HOLD_READER_LOCK());
 386    id = GET_PTHREAD_MAP_CACHE(thread);
 387    /* We first try the cache. */
 388    for (p = GC_threads[THREAD_TABLE_INDEX(id)]; p != NULL; p = p->tm.next) {
 389      if (LIKELY(THREAD_EQUAL(p->pthread_id, thread)))
 390        return p;
 391    }
 392  
 393    /* If that fails, we use a very slow approach. */
 394    for (hv = 0; hv < THREAD_TABLE_SZ; ++hv) {
 395      for (p = GC_threads[hv]; p != NULL; p = p->tm.next) {
 396        if (THREAD_EQUAL(p->pthread_id, thread))
 397          return p;
 398      }
 399    }
 400    return NULL;
 401  }
 402  #  endif /* GC_PTHREADS */
 403  
 404  #  ifdef WOW64_THREAD_CONTEXT_WORKAROUND
 405  #    ifndef CONTEXT_EXCEPTION_ACTIVE
 406  #      define CONTEXT_EXCEPTION_ACTIVE 0x08000000
 407  #      define CONTEXT_EXCEPTION_REQUEST 0x40000000
 408  #      define CONTEXT_EXCEPTION_REPORTING 0x80000000
 409  #    endif
 410  /* Is 32-bit code running on Win64? */
 411  static GC_bool isWow64;
 412  #    define GET_THREAD_CONTEXT_FLAGS                                \
 413        (isWow64 ? CONTEXT_INTEGER | CONTEXT_CONTROL                  \
 414                       | CONTEXT_EXCEPTION_REQUEST | CONTEXT_SEGMENTS \
 415                 : CONTEXT_INTEGER | CONTEXT_CONTROL)
 416  #  elif defined(I386) || defined(XMM_CANT_STORE_PTRS)
 417  #    define GET_THREAD_CONTEXT_FLAGS (CONTEXT_INTEGER | CONTEXT_CONTROL)
 418  #  else
 419  #    define GET_THREAD_CONTEXT_FLAGS \
 420        (CONTEXT_INTEGER | CONTEXT_CONTROL | CONTEXT_FLOATING_POINT)
 421  #  endif /* !WOW64_THREAD_CONTEXT_WORKAROUND && !I386 */
 422  
 423  /* Suspend the given thread, if it is still active. */
 424  STATIC void
 425  GC_suspend(GC_thread t)
 426  {
 427  #  ifndef MSWINCE
 428    DWORD exitCode;
 429  #    ifdef RETRY_GET_THREAD_CONTEXT
 430    int retry_cnt;
 431  #      define MAX_SUSPEND_THREAD_RETRIES (1000 * 1000)
 432  #    endif
 433  #  endif
 434  
 435    GC_ASSERT(I_HOLD_LOCK());
 436  #  ifndef GC_NO_THREADS_DISCOVERY
 437    if (NULL == GC_cptr_load_acquire(&t->handle))
 438      return;
 439  #  endif
 440  #  if defined(DEBUG_THREADS) && !defined(MSWINCE) \
 441        && (!defined(MSWIN32) || defined(CONSOLE_LOG))
 442    GC_log_printf("Suspending 0x%x\n", (int)t->id);
 443  #  endif
 444    GC_win32_unprotect_thread(t);
 445    GC_acquire_dirty_lock();
 446  
 447  #  ifdef MSWINCE
 448    /* `SuspendThread()` will fail if thread is running kernel code. */
 449    while (SuspendThread(THREAD_HANDLE(t)) == (DWORD)-1) {
 450      GC_release_dirty_lock();
 451      Sleep(10); /*< in millis */
 452      GC_acquire_dirty_lock();
 453    }
 454  #  elif defined(RETRY_GET_THREAD_CONTEXT)
 455    for (retry_cnt = 0;;) {
 456      /*
 457       * Apparently the Windows 95 `GetOpenFileName()` creates a thread
 458       * that does not properly get cleaned up, and `SuspendThread()` on
 459       * its descriptor may provoke a crash.  This reduces the probability
 460       * of that event, though it still appears there is a race here.
 461       */
 462      if (GetExitCodeThread(t->handle, &exitCode) && exitCode != STILL_ACTIVE) {
 463        GC_release_dirty_lock();
 464  #    ifdef GC_PTHREADS
 465        /* Prevent stack from being pushed. */
 466        t->crtn->stack_end = NULL;
 467  #    else
 468        /*
 469         * This breaks `pthread_join` on Cygwin, which is guaranteed to
 470         * only see user threads.
 471         */
 472        GC_delete_thread(t);
 473  #    endif
 474        return;
 475      }
 476  
 477      if (SuspendThread(t->handle) != (DWORD)-1) {
 478        CONTEXT context;
 479  
 480        /*
 481         * Calls to `GetThreadContext()` may fail.  Work around this by
 482         * putting access in suspend/resume loop to advance thread past
 483         * problematic areas where suspend fails.  Capture the `context`
 484         * in per-thread structure at the suspend time rather than at
 485         * retrieving the `context` during the push logic.
 486         */
 487        context.ContextFlags = GET_THREAD_CONTEXT_FLAGS;
 488        if (GetThreadContext(t->handle, &context)) {
 489          /*
 490           * TODO: WoW64 extra workaround: if `CONTEXT_EXCEPTION_ACTIVE`
 491           * then `Sleep(1)` and retry.
 492           */
 493          t->context_sp = copy_ptr_regs(t->context_regs, &context);
 494          /* Success; the context pointer registers are saved. */
 495          break;
 496        }
 497  
 498        /* Resume the thread, try to suspend it in a better location. */
 499        if (ResumeThread(t->handle) == (DWORD)-1) {
 500  #    ifndef GC_NO_THREADS_DISCOVERY
 501          if (NULL == GC_cptr_load_acquire(&t->handle)) {
 502            /*
 503             * It might be the scenario like this:
 504             *   1. `GC_suspend` calls `SuspendThread` on a valid handle;
 505             *   2. Within the `SuspendThread` call a context switch
 506             *      occurs to `DllMain` (before the thread has actually
 507             *      been suspended);
 508             *   3. `DllMain` sets `t->handle` to `NULL`, but does not
 509             *      yet close the handle;
 510             *   4. A context switch occurs returning to `SuspendThread`
 511             *      which completes on the handle that was originally
 512             *      passed into it;
 513             *   5. Then `ResumeThread` attempts to run on `t->handle`
 514             *      which is now `NULL`.
 515             */
 516            GC_release_dirty_lock();
 517            /*
 518             * FIXME: The thread seems to be suspended forever (causing
 519             * a resource leak).
 520             */
 521            WARN("ResumeThread failed (async CloseHandle by DllMain)\n", 0);
 522            return;
 523          }
 524  #    endif
 525          ABORT("ResumeThread failed in suspend loop");
 526        }
 527      } else {
 528  #    ifndef GC_NO_THREADS_DISCOVERY
 529        if (NULL == GC_cptr_load_acquire(&t->handle)) {
 530          /* The thread handle is closed asynchronously by `GC_DllMain`. */
 531          GC_release_dirty_lock();
 532          return;
 533        }
 534  #    endif
 535      }
 536      if (retry_cnt > 1) {
 537        GC_release_dirty_lock();
 538        Sleep(0); /*< yield */
 539  #    ifndef GC_NO_THREADS_DISCOVERY
 540        if (NULL == GC_cptr_load_acquire(&t->handle))
 541          return;
 542  #    endif
 543        GC_acquire_dirty_lock();
 544      }
 545      if (++retry_cnt >= MAX_SUSPEND_THREAD_RETRIES) {
 546        /* Something must be wrong. */
 547        ABORT("SuspendThread loop failed");
 548      }
 549    }
 550  #  else
 551    if (GetExitCodeThread(t->handle, &exitCode) && exitCode != STILL_ACTIVE) {
 552      GC_release_dirty_lock();
 553  #    ifdef GC_PTHREADS
 554      /* Prevent stack from being pushed. */
 555      t->crtn->stack_end = NULL;
 556  #    else
 557      GC_delete_thread(t);
 558  #    endif
 559      return;
 560    }
 561    if (SuspendThread(t->handle) == (DWORD)-1) {
 562  #    ifndef GC_NO_THREADS_DISCOVERY
 563      if (NULL == GC_cptr_load_acquire(&t->handle)) {
 564        GC_release_dirty_lock();
 565        return;
 566      }
 567  #    endif
 568      ABORT("SuspendThread failed");
 569    }
 570  #  endif
 571    t->flags |= IS_SUSPENDED;
 572    GC_release_dirty_lock();
 573    if (GC_on_thread_event)
 574      GC_on_thread_event(GC_EVENT_THREAD_SUSPENDED, THREAD_HANDLE(t));
 575  }
 576  
 577  #  if defined(GC_ASSERTIONS) \
 578        && ((defined(MSWIN32) && !defined(CONSOLE_LOG)) || defined(MSWINCE))
 579  GC_INNER GC_bool GC_write_disabled = FALSE;
 580  #  endif
 581  
 582  GC_INNER void
 583  GC_stop_world(void)
 584  {
 585    thread_id_t self_id = GetCurrentThreadId();
 586  
 587    GC_ASSERT(I_HOLD_LOCK());
 588    GC_ASSERT(GC_thr_initialized);
 589  
 590    /* This code is the same as in `pthread_stop_world.c` file. */
 591  #  ifdef PARALLEL_MARK
 592    if (GC_parallel) {
 593      GC_acquire_mark_lock();
 594      /* We should have previously waited for the count to become zero. */
 595      GC_ASSERT(0 == GC_fl_builder_count);
 596    }
 597  #  endif /* PARALLEL_MARK */
 598  
 599  #  if !defined(GC_NO_THREADS_DISCOVERY) || defined(GC_ASSERTIONS)
 600    GC_please_stop = TRUE;
 601  #  endif
 602  #  if (defined(MSWIN32) && !defined(CONSOLE_LOG)) || defined(MSWINCE)
 603    GC_ASSERT(!GC_write_disabled);
 604    EnterCriticalSection(&GC_write_cs);
 605    /*
 606     * It is not allowed to call `GC_printf()` (and friends) here down to
 607     * `LeaveCriticalSection` (same applies recursively to `GC_suspend`,
 608     * `GC_delete_thread`, `GC_get_max_thread_index`, `GC_size` and
 609     * `GC_remove_protection`).
 610     */
 611  #    ifdef GC_ASSERTIONS
 612    GC_write_disabled = TRUE;
 613  #    endif
 614  #  endif
 615  #  ifndef GC_NO_THREADS_DISCOVERY
 616    if (GC_win32_dll_threads) {
 617      int i;
 618      int my_max;
 619  
 620      /*
 621       * Any threads being created during this loop will end up setting
 622       * `GC_attached_thread` when they start.  This will force marking
 623       * to restart.  This is not ideal, but hopefully correct.
 624       */
 625      AO_store(&GC_attached_thread, FALSE);
 626      my_max = (int)GC_get_max_thread_index();
 627      for (i = 0; i <= my_max; i++) {
 628        GC_thread p = (GC_thread)(dll_thread_table + i);
 629  
 630        if (p->crtn->stack_end != NULL && (p->flags & DO_BLOCKING) == 0
 631            && p->id != self_id) {
 632          GC_suspend(p);
 633        }
 634      }
 635    } else
 636  #  endif
 637    /* else */ {
 638      GC_thread p;
 639      int i;
 640  
 641      for (i = 0; i < THREAD_TABLE_SZ; i++) {
 642        for (p = GC_threads[i]; p != NULL; p = p->tm.next)
 643          if (p->crtn->stack_end != NULL && p->id != self_id
 644              && (p->flags & (FINISHED | DO_BLOCKING)) == 0)
 645            GC_suspend(p);
 646      }
 647    }
 648  #  if (defined(MSWIN32) && !defined(CONSOLE_LOG)) || defined(MSWINCE)
 649  #    ifdef GC_ASSERTIONS
 650    GC_write_disabled = FALSE;
 651  #    endif
 652    LeaveCriticalSection(&GC_write_cs);
 653  #  endif
 654  #  ifdef PARALLEL_MARK
 655    if (GC_parallel)
 656      GC_release_mark_lock();
 657  #  endif
 658  }
 659  
 660  GC_INNER void
 661  GC_start_world(void)
 662  {
 663  #  ifdef GC_ASSERTIONS
 664    thread_id_t self_id = GetCurrentThreadId();
 665  #  endif
 666  
 667    GC_ASSERT(I_HOLD_LOCK());
 668  #  ifndef GC_NO_THREADS_DISCOVERY
 669    if (GC_win32_dll_threads) {
 670      LONG my_max = GC_get_max_thread_index();
 671      int i;
 672  
 673      for (i = 0; i <= my_max; i++) {
 674        GC_thread p = (GC_thread)(dll_thread_table + i);
 675  
 676        if ((p->flags & IS_SUSPENDED) != 0) {
 677  #    ifdef DEBUG_THREADS
 678          GC_log_printf("Resuming 0x%x\n", (int)p->id);
 679  #    endif
 680          GC_ASSERT(p->id != self_id);
 681          GC_ASSERT(*(ptr_t *)CAST_AWAY_VOLATILE_PVOID(&p->crtn->stack_end)
 682                    != NULL);
 683          if (ResumeThread(p->handle) == (DWORD)-1) {
 684            if (NULL == GC_cptr_load_acquire(&p->handle)) {
 685              /* FIXME: See the same issue in `GC_suspend`. */
 686              WARN("ResumeThread failed (async CloseHandle by DllMain)\n", 0);
 687            } else {
 688              ABORT("ResumeThread failed");
 689            }
 690          }
 691          p->flags &= (unsigned char)~IS_SUSPENDED;
 692          if (GC_on_thread_event)
 693            GC_on_thread_event(GC_EVENT_THREAD_UNSUSPENDED, p->handle);
 694        } else {
 695          /* The thread is unregistered or not suspended. */
 696        }
 697      }
 698    } else
 699  #  endif
 700    /* else */ {
 701      GC_thread p;
 702      int i;
 703  
 704      for (i = 0; i < THREAD_TABLE_SZ; i++) {
 705        for (p = GC_threads[i]; p != NULL; p = p->tm.next) {
 706          if ((p->flags & IS_SUSPENDED) != 0) {
 707  #  ifdef DEBUG_THREADS
 708            GC_log_printf("Resuming 0x%x\n", (int)p->id);
 709  #  endif
 710            GC_ASSERT(p->id != self_id && *(ptr_t *)&p->crtn->stack_end != NULL);
 711            if (ResumeThread(THREAD_HANDLE(p)) == (DWORD)-1)
 712              ABORT("ResumeThread failed");
 713            GC_win32_unprotect_thread(p);
 714            p->flags &= (unsigned char)~IS_SUSPENDED;
 715            if (GC_on_thread_event)
 716              GC_on_thread_event(GC_EVENT_THREAD_UNSUSPENDED, THREAD_HANDLE(p));
 717          } else {
 718  #  ifdef DEBUG_THREADS
 719            GC_log_printf("Not resuming thread 0x%x as it is not suspended\n",
 720                          (int)p->id);
 721  #  endif
 722          }
 723        }
 724      }
 725    }
 726  #  if !defined(GC_NO_THREADS_DISCOVERY) || defined(GC_ASSERTIONS)
 727    GC_please_stop = FALSE;
 728  #  endif
 729  }
 730  
 731  #  ifdef MSWINCE
 732  /*
 733   * The `VirtualQuery()` calls below will not work properly on some old
 734   * WinCE versions, but since each stack is restricted to an aligned
 735   * 64 KiB region of virtual memory we can just take the next lowest
 736   * multiple of 64 KiB.  The result of this macro must not be used as
 737   * its argument later and must not be used as the lower bound for `sp`
 738   * check (since the stack may be bigger than 64 KiB).
 739   */
 740  #    define GC_wince_evaluate_stack_min(s) \
 741        (ptr_t)(((word)(s) - (word)1) & ~(word)0xFFFF)
 742  #  elif defined(GC_ASSERTIONS)
 743  #    define GC_dont_query_stack_min FALSE
 744  #  endif
 745  
 746  /*
 747   * A cache holding the results of the recent `VirtualQuery()` call.
 748   * Protected by the allocator lock.
 749   */
 750  static ptr_t last_address = 0;
 751  static MEMORY_BASIC_INFORMATION last_info;
 752  
 753  /*
 754   * Probe stack memory region (starting at `s`) to find out its lowest
 755   * address (i.e. stack top).  `s` must be a mapped address inside the
 756   * region, *not* the first unmapped address.
 757   */
 758  STATIC ptr_t
 759  GC_get_stack_min(ptr_t s)
 760  {
 761    ptr_t bottom;
 762  
 763    GC_ASSERT(I_HOLD_LOCK());
 764    if (s != last_address) {
 765      VirtualQuery(s, &last_info, sizeof(last_info));
 766      last_address = s;
 767    }
 768    do {
 769      bottom = (ptr_t)last_info.BaseAddress;
 770      VirtualQuery(bottom - 1, &last_info, sizeof(last_info));
 771      last_address = bottom - 1;
 772    } while ((last_info.Protect & PAGE_READWRITE)
 773             && !(last_info.Protect & PAGE_GUARD));
 774    return bottom;
 775  }
 776  
 777  /*
 778   * Return `TRUE` if the page at `s` has protections appropriate for
 779   * a stack page.
 780   */
 781  static GC_bool
 782  may_be_in_stack(ptr_t s)
 783  {
 784    GC_ASSERT(I_HOLD_LOCK());
 785    if (s != last_address) {
 786      VirtualQuery(s, &last_info, sizeof(last_info));
 787      last_address = s;
 788    }
 789    return (last_info.Protect & PAGE_READWRITE) != 0
 790           && (last_info.Protect & PAGE_GUARD) == 0;
 791  }
 792  
 793  /*
 794   * Copy all registers that might point into the heap.  Frame pointer
 795   * registers are included in case client code was compiled with the
 796   * "omit frame pointer" optimization.  The context register values are
 797   * stored to `regs` argument which is expected to be of `PUSHED_REGS_COUNT`
 798   * length exactly.  Returns the context stack pointer value.
 799   */
 800  static ptr_t
 801  copy_ptr_regs(word *regs, const CONTEXT *pcontext)
 802  {
 803    ptr_t sp;
 804    int cnt = 0;
 805  #  define context (*pcontext)
 806  #  define PUSH1(reg) (regs[cnt++] = (word)pcontext->reg)
 807  #  define PUSH2(r1, r2) (PUSH1(r1), PUSH1(r2))
 808  #  define PUSH4(r1, r2, r3, r4) (PUSH2(r1, r2), PUSH2(r3, r4))
 809  #  define PUSH8_LH(r1, r2, r3, r4)            \
 810      (PUSH4(r1.Low, r1.High, r2.Low, r2.High), \
 811       PUSH4(r3.Low, r3.High, r4.Low, r4.High))
 812  #  if defined(I386)
 813  #    ifdef WOW64_THREAD_CONTEXT_WORKAROUND
 814    /*
 815     * Notes: these should be the first "pushed" registers, exactly in this
 816     * order, see the WoW64 logic in `GC_push_stack_for()`; these registers
 817     * do not contain pointers.
 818     */
 819    PUSH2(ContextFlags, SegFs);
 820  #    endif
 821    PUSH4(Edi, Esi, Ebx, Edx), PUSH2(Ecx, Eax), PUSH1(Ebp);
 822    sp = (ptr_t)context.Esp;
 823  #  elif defined(X86_64)
 824    PUSH4(Rax, Rcx, Rdx, Rbx);
 825    PUSH2(Rbp, Rsi);
 826    PUSH1(Rdi);
 827    PUSH4(R8, R9, R10, R11);
 828    PUSH4(R12, R13, R14, R15);
 829  #    ifndef XMM_CANT_STORE_PTRS
 830    PUSH8_LH(Xmm0, Xmm1, Xmm2, Xmm3);
 831    PUSH8_LH(Xmm4, Xmm5, Xmm6, Xmm7);
 832    PUSH8_LH(Xmm8, Xmm9, Xmm10, Xmm11);
 833    PUSH8_LH(Xmm12, Xmm13, Xmm14, Xmm15);
 834  #    endif
 835    sp = (ptr_t)context.Rsp;
 836  #  elif defined(ARM32)
 837    PUSH4(R0, R1, R2, R3), PUSH4(R4, R5, R6, R7), PUSH4(R8, R9, R10, R11);
 838    PUSH1(R12);
 839    sp = (ptr_t)context.Sp;
 840  #  elif defined(AARCH64)
 841    PUSH4(X0, X1, X2, X3), PUSH4(X4, X5, X6, X7), PUSH4(X8, X9, X10, X11);
 842    PUSH4(X12, X13, X14, X15), PUSH4(X16, X17, X18, X19),
 843        PUSH4(X20, X21, X22, X23);
 844    PUSH4(X24, X25, X26, X27), PUSH1(X28);
 845    PUSH1(Lr);
 846    sp = (ptr_t)context.Sp;
 847  #  elif defined(SHx)
 848    PUSH4(R0, R1, R2, R3), PUSH4(R4, R5, R6, R7), PUSH4(R8, R9, R10, R11);
 849    PUSH2(R12, R13), PUSH1(R14);
 850    sp = (ptr_t)context.R15;
 851  #  elif defined(MIPS)
 852    PUSH4(IntAt, IntV0, IntV1, IntA0), PUSH4(IntA1, IntA2, IntA3, IntT0);
 853    PUSH4(IntT1, IntT2, IntT3, IntT4), PUSH4(IntT5, IntT6, IntT7, IntS0);
 854    PUSH4(IntS1, IntS2, IntS3, IntS4), PUSH4(IntS5, IntS6, IntS7, IntT8);
 855    PUSH4(IntT9, IntK0, IntK1, IntS8);
 856    sp = (ptr_t)context.IntSp;
 857  #  elif defined(PPC)
 858    PUSH4(Gpr0, Gpr3, Gpr4, Gpr5), PUSH4(Gpr6, Gpr7, Gpr8, Gpr9);
 859    PUSH4(Gpr10, Gpr11, Gpr12, Gpr14), PUSH4(Gpr15, Gpr16, Gpr17, Gpr18);
 860    PUSH4(Gpr19, Gpr20, Gpr21, Gpr22), PUSH4(Gpr23, Gpr24, Gpr25, Gpr26);
 861    PUSH4(Gpr27, Gpr28, Gpr29, Gpr30), PUSH1(Gpr31);
 862    sp = (ptr_t)context.Gpr1;
 863  #  elif defined(ALPHA)
 864    PUSH4(IntV0, IntT0, IntT1, IntT2), PUSH4(IntT3, IntT4, IntT5, IntT6);
 865    PUSH4(IntT7, IntS0, IntS1, IntS2), PUSH4(IntS3, IntS4, IntS5, IntFp);
 866    PUSH4(IntA0, IntA1, IntA2, IntA3), PUSH4(IntA4, IntA5, IntT8, IntT9);
 867    PUSH4(IntT10, IntT11, IntT12, IntAt);
 868    sp = (ptr_t)context.IntSp;
 869  #  elif defined(CPPCHECK)
 870    GC_noop1_ptr(regs);
 871    sp = (ptr_t)(word)cnt; /*< to workaround "cnt not used" false positive */
 872  #  else
 873  #    error Architecture is not supported
 874  #  endif
 875  #  undef context
 876  #  undef PUSH1
 877  #  undef PUSH2
 878  #  undef PUSH4
 879  #  undef PUSH8_LH
 880    GC_ASSERT(cnt == PUSHED_REGS_COUNT);
 881    return sp;
 882  }
 883  
 884  STATIC word
 885  GC_push_stack_for(GC_thread thread, thread_id_t self_id, GC_bool *pfound_me)
 886  {
 887    GC_bool is_self = FALSE;
 888    ptr_t sp, stack_min;
 889    GC_stack_context_t crtn = thread->crtn;
 890    ptr_t stack_end = crtn->stack_end;
 891    struct GC_traced_stack_sect_s *traced_stack_sect = crtn->traced_stack_sect;
 892  
 893    GC_ASSERT(I_HOLD_LOCK());
 894    if (UNLIKELY(NULL == stack_end))
 895      return 0;
 896  
 897    if (thread->id == self_id) {
 898      GC_ASSERT((thread->flags & DO_BLOCKING) == 0);
 899      sp = GC_approx_sp();
 900      is_self = TRUE;
 901      *pfound_me = TRUE;
 902    } else if ((thread->flags & DO_BLOCKING) != 0) {
 903      /* Use saved `sp` value for blocked threads. */
 904      sp = crtn->stack_ptr;
 905    } else {
 906  #  ifdef RETRY_GET_THREAD_CONTEXT
 907      /*
 908       * We cache context when suspending the thread since it may require
 909       * looping.
 910       */
 911      word *regs = thread->context_regs;
 912  
 913      if ((thread->flags & IS_SUSPENDED) != 0) {
 914        sp = thread->context_sp;
 915      } else
 916  #  else
 917      word regs[PUSHED_REGS_COUNT];
 918  #  endif
 919  
 920      /* else */ {
 921        CONTEXT context;
 922  
 923        /* For unblocked threads call `GetThreadContext()`. */
 924        context.ContextFlags = GET_THREAD_CONTEXT_FLAGS;
 925        if (GetThreadContext(THREAD_HANDLE(thread), &context)) {
 926          sp = copy_ptr_regs(regs, &context);
 927        } else {
 928  #  ifndef GC_NO_THREADS_DISCOVERY
 929          if (NULL == GC_cptr_load_acquire(&thread->handle))
 930            return 0;
 931  #  endif
 932  #  ifdef RETRY_GET_THREAD_CONTEXT
 933          /* At least, try to use the stale context if saved. */
 934          sp = thread->context_sp;
 935          if (NULL == sp) {
 936            /*
 937             * Skip the current thread; anyway its stack will be pushed
 938             * when the world is stopped.
 939             */
 940            return 0;
 941          }
 942  #  else
 943          /* This is to avoid "might be uninitialized" compiler warning. */
 944          *(volatile ptr_t *)&sp = NULL;
 945          ABORT("GetThreadContext failed");
 946  #  endif
 947        }
 948      }
 949  #  ifdef THREAD_LOCAL_ALLOC
 950      GC_ASSERT((thread->flags & IS_SUSPENDED) != 0 || !GC_world_stopped);
 951  #  endif
 952  
 953  #  ifndef WOW64_THREAD_CONTEXT_WORKAROUND
 954      GC_push_many_regs(regs, PUSHED_REGS_COUNT);
 955  #  else
 956      GC_push_many_regs(regs + 2, PUSHED_REGS_COUNT - 2);
 957      /* Skip `ContextFlags` and `SegFs`. */
 958  
 959      /* WoW64 workaround. */
 960      if (isWow64) {
 961        DWORD ContextFlags = (DWORD)regs[0];
 962  
 963        if ((ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0
 964            && (ContextFlags
 965                & (CONTEXT_EXCEPTION_ACTIVE
 966                   /* `| CONTEXT_SERVICE_ACTIVE` */))
 967                   != 0) {
 968          GC_NT_TIB *tib;
 969  
 970  #    ifdef MSWINRT_FLAVOR
 971          tib = thread->tib;
 972  #    else
 973          WORD SegFs = (WORD)regs[1];
 974          LDT_ENTRY selector;
 975  
 976          if (!GetThreadSelectorEntry(THREAD_HANDLE(thread), SegFs, &selector))
 977            ABORT("GetThreadSelectorEntry failed");
 978          tib = (GC_NT_TIB *)(selector.BaseLow
 979                              | (selector.HighWord.Bits.BaseMid << 16)
 980                              | (selector.HighWord.Bits.BaseHi << 24));
 981  #    endif
 982  #    ifdef DEBUG_THREADS
 983          GC_log_printf("TIB stack limit/base: %p .. %p\n",
 984                        (void *)tib->StackLimit, (void *)tib->StackBase);
 985  #    endif
 986          GC_ASSERT(!HOTTER_THAN((ptr_t)tib->StackBase, stack_end));
 987          if (stack_end != crtn->initial_stack_base
 988              /* We are in a coroutine (old-style way of the support). */
 989              && (ADDR(stack_end) <= (word)tib->StackLimit
 990                  || (word)tib->StackBase < ADDR(stack_end))) {
 991            /* The coroutine stack is not within TIB stack. */
 992            WARN("GetThreadContext might return stale register values"
 993                 " including ESP= %p\n",
 994                 sp);
 995            /*
 996             * TODO: Because of WoW64 bug, there is no guarantee that `sp`
 997             * really points to the stack top but, for now, we do our best
 998             * as the TIB stack limit/base cannot be used while we are
 999             * inside a coroutine.
1000             */
1001          } else {
1002            /*
1003             * `GetThreadContext()` might return stale register values,
1004             * so we scan the entire stack region (down to the stack limit).
1005             * There is no 100% guarantee that all the registers are pushed
1006             * but we do our best (the proper solution would be to fix it
1007             * inside Windows).
1008             */
1009            sp = (ptr_t)tib->StackLimit;
1010          }
1011        } /* else */
1012  #    ifdef DEBUG_THREADS
1013        else {
1014          static GC_bool logged;
1015          if (!logged && (ContextFlags & CONTEXT_EXCEPTION_REPORTING) == 0) {
1016            GC_log_printf("CONTEXT_EXCEPTION_REQUEST not supported\n");
1017            logged = TRUE;
1018          }
1019        }
1020  #    endif
1021      }
1022  #  endif /* WOW64_THREAD_CONTEXT_WORKAROUND */
1023    }
1024  #  if defined(STACKPTR_CORRECTOR_AVAILABLE) && defined(GC_PTHREADS)
1025    if (GC_sp_corrector != 0)
1026      GC_sp_corrector((void **)&sp, PTHREAD_TO_VPTR(thread->pthread_id));
1027  #  endif
1028  
1029    /*
1030     * Set `stack_min` to the lowest address in the thread stack, or to
1031     * an address in the thread stack not bigger than `sp`, taking advantage
1032     * of the old value to avoid slow traversals of large stacks.
1033     */
1034    if (crtn->last_stack_min == ADDR_LIMIT) {
1035  #  ifdef MSWINCE
1036      if (GC_dont_query_stack_min) {
1037        stack_min = GC_wince_evaluate_stack_min(
1038            traced_stack_sect != NULL ? (ptr_t)traced_stack_sect : stack_end);
1039        /* Keep `last_stack_min` value unmodified. */
1040      } else
1041  #  endif
1042      /* else */ {
1043        stack_min = GC_get_stack_min(
1044            traced_stack_sect != NULL ? (ptr_t)traced_stack_sect : stack_end);
1045        GC_win32_unprotect_thread(thread);
1046        crtn->last_stack_min = stack_min;
1047      }
1048    } else {
1049      /*
1050       * First, adjust the latest known minimum stack address if we are
1051       * inside `GC_call_with_gc_active`.
1052       */
1053      if (traced_stack_sect != NULL
1054          && ADDR_LT((ptr_t)traced_stack_sect, crtn->last_stack_min)) {
1055        GC_win32_unprotect_thread(thread);
1056        crtn->last_stack_min = (ptr_t)traced_stack_sect;
1057      }
1058  
1059      if (ADDR_INSIDE(sp, crtn->last_stack_min, stack_end)) {
1060        stack_min = sp;
1061      } else {
1062        /* In the current thread it is always safe to use `sp` value. */
1063        if (may_be_in_stack(is_self && ADDR_LT(sp, crtn->last_stack_min)
1064                                ? sp
1065                                : crtn->last_stack_min)) {
1066          stack_min = (ptr_t)last_info.BaseAddress;
1067          /* Do not probe rest of the stack if `sp` is correct. */
1068          if (!ADDR_INSIDE(sp, stack_min, stack_end))
1069            stack_min = GC_get_stack_min(crtn->last_stack_min);
1070        } else {
1071          /* Stack shrunk?  Is this possible? */
1072          stack_min = GC_get_stack_min(stack_end);
1073        }
1074        GC_win32_unprotect_thread(thread);
1075        crtn->last_stack_min = stack_min;
1076      }
1077    }
1078  
1079    GC_ASSERT(GC_dont_query_stack_min || stack_min == GC_get_stack_min(stack_end)
1080              || (ADDR_GE(sp, stack_min) && ADDR_LT(stack_min, stack_end)
1081                  && ADDR_LT(GC_get_stack_min(stack_end), stack_min)));
1082  
1083    if (ADDR_INSIDE(sp, stack_min, stack_end)) {
1084  #  ifdef DEBUG_THREADS
1085      GC_log_printf("Pushing stack for 0x%x from sp %p to %p from 0x%x\n",
1086                    (int)thread->id, (void *)sp, (void *)stack_end,
1087                    (int)self_id);
1088  #  endif
1089      GC_push_all_stack_sections(sp, stack_end, traced_stack_sect);
1090    } else {
1091      /*
1092       * If not the current thread, then it is possible for `sp` to point to
1093       * the guarded (untouched yet) page just below the current `stack_min`
1094       * of the thread.
1095       */
1096      if (is_self || ADDR_GE(sp, stack_end)
1097          || ADDR_LT(sp + GC_page_size, stack_min))
1098        WARN("Thread stack pointer %p out of range, pushing everything\n", sp);
1099  #  ifdef DEBUG_THREADS
1100      GC_log_printf("Pushing stack for 0x%x from (min) %p to %p from 0x%x\n",
1101                    (int)thread->id, (void *)stack_min, (void *)stack_end,
1102                    (int)self_id);
1103  #  endif
1104      /* Push everything - ignore "traced stack section" data. */
1105      GC_push_all_stack(stack_min, stack_end);
1106    }
1107    /* Note: stack grows down. */
1108    return stack_end - sp;
1109  }
1110  
1111  GC_INNER void
1112  GC_push_all_stacks(void)
1113  {
1114    thread_id_t self_id = GetCurrentThreadId();
1115    GC_bool found_me = FALSE;
1116  #  ifndef SMALL_CONFIG
1117    unsigned nthreads = 0;
1118  #  endif
1119    word total_size = 0;
1120  
1121    GC_ASSERT(I_HOLD_LOCK());
1122    GC_ASSERT(GC_thr_initialized);
1123  #  ifndef GC_NO_THREADS_DISCOVERY
1124    if (GC_win32_dll_threads) {
1125      int i;
1126      LONG my_max = GC_get_max_thread_index();
1127  
1128      for (i = 0; i <= my_max; i++) {
1129        GC_thread p = (GC_thread)(dll_thread_table + i);
1130  
1131        if (p->tm.in_use) {
1132  #    ifndef SMALL_CONFIG
1133          ++nthreads;
1134  #    endif
1135          total_size += GC_push_stack_for(p, self_id, &found_me);
1136        }
1137      }
1138    } else
1139  #  endif
1140    /* else */ {
1141      int i;
1142      for (i = 0; i < THREAD_TABLE_SZ; i++) {
1143        GC_thread p;
1144  
1145        for (p = GC_threads[i]; p != NULL; p = p->tm.next) {
1146          GC_ASSERT(THREAD_TABLE_INDEX(p->id) == i);
1147          if (!KNOWN_FINISHED(p)) {
1148  #  ifndef SMALL_CONFIG
1149            ++nthreads;
1150  #  endif
1151            total_size += GC_push_stack_for(p, self_id, &found_me);
1152          }
1153        }
1154      }
1155    }
1156  #  ifndef SMALL_CONFIG
1157    GC_VERBOSE_LOG_PRINTF(
1158        "Pushed %d thread stacks%s\n", nthreads,
1159        GC_win32_dll_threads ? " based on DllMain thread tracking" : "");
1160  #  endif
1161    if (!found_me && !GC_in_thread_creation)
1162      ABORT("Collecting from unknown thread");
1163    GC_total_stacksize = total_size;
1164  }
1165  
1166  #  ifdef PARALLEL_MARK
1167  GC_INNER ptr_t GC_marker_last_stack_min[MAX_MARKERS - 1] = { 0 };
1168  #  endif
1169  
1170  GC_INNER void
1171  GC_get_next_stack(ptr_t start, ptr_t limit, ptr_t *plo, ptr_t *phi)
1172  {
1173    int i;
1174    /* Least in-range stack base. */
1175    ptr_t current_min = ADDR_LIMIT;
1176    /*
1177     * Address of `last_stack_min` field for thread corresponding to
1178     * `current_min`.
1179     */
1180    ptr_t *plast_stack_min = NULL;
1181    /*
1182     * Either `NULL` or points to the thread's hash table entry
1183     * containing `*plast_stack_min`.
1184     */
1185    GC_thread thread = NULL;
1186  
1187    GC_ASSERT(I_HOLD_LOCK());
1188    /* First set `current_min`, ignoring `limit`. */
1189    if (GC_win32_dll_threads) {
1190      LONG my_max = GC_get_max_thread_index();
1191  
1192      for (i = 0; i <= my_max; i++) {
1193        ptr_t stack_end = (ptr_t)dll_thread_table[i].crtn->stack_end;
1194  
1195        if (ADDR_LT(start, stack_end) && ADDR_LT(stack_end, current_min)) {
1196          /* Update address of `last_stack_min`. */
1197          plast_stack_min = &dll_thread_table[i].crtn->last_stack_min;
1198          current_min = stack_end;
1199  #  ifdef CPPCHECK
1200          /* To avoid a warning that thread is always null. */
1201          thread = (GC_thread)&dll_thread_table[i];
1202  #  endif
1203        }
1204      }
1205    } else {
1206      for (i = 0; i < THREAD_TABLE_SZ; i++) {
1207        GC_thread p;
1208  
1209        for (p = GC_threads[i]; p != NULL; p = p->tm.next) {
1210          GC_stack_context_t crtn = p->crtn;
1211          /* Note: the following is read of a `volatile` field. */
1212          ptr_t stack_end = crtn->stack_end;
1213  
1214          if (ADDR_LT(start, stack_end) && ADDR_LT(stack_end, current_min)) {
1215            /* Update value of `*plast_stack_min`. */
1216            plast_stack_min = &crtn->last_stack_min;
1217            /* Remember current thread to unprotect. */
1218            thread = p;
1219            current_min = stack_end;
1220          }
1221        }
1222      }
1223  #  ifdef PARALLEL_MARK
1224      for (i = 0; i < GC_markers_m1; ++i) {
1225        ptr_t s = GC_marker_sp[i];
1226  
1227  #    ifdef IA64
1228        /* FIXME: Not implemented. */
1229  #    endif
1230        if (ADDR_LT(start, s) && ADDR_LT(s, current_min)) {
1231          GC_ASSERT(GC_marker_last_stack_min[i] != NULL);
1232          plast_stack_min = &GC_marker_last_stack_min[i];
1233          current_min = s;
1234          /* Not a thread's hash table entry. */
1235          thread = NULL;
1236        }
1237      }
1238  #  endif
1239    }
1240  
1241    *phi = current_min;
1242    if (current_min == ADDR_LIMIT) {
1243      *plo = ADDR_LIMIT;
1244      return;
1245    }
1246  
1247    GC_ASSERT(ADDR_LT(start, current_min) && plast_stack_min != NULL);
1248  #  ifdef MSWINCE
1249    if (GC_dont_query_stack_min) {
1250      *plo = GC_wince_evaluate_stack_min(current_min);
1251      /* Keep `last_stack_min` value unmodified. */
1252      return;
1253    }
1254  #  endif
1255  
1256    if (ADDR_LT(limit, current_min) && !may_be_in_stack(limit)) {
1257      /*
1258       * Skip the rest since the memory region at `limit` address is not
1259       * a stack (so the lowest address of the found stack would be above
1260       * the `limit` value anyway).
1261       */
1262      *plo = ADDR_LIMIT;
1263      return;
1264    }
1265  
1266    /*
1267     * Get the minimum address of the found stack by probing its memory
1268     * region starting from the recent known minimum (if set).
1269     */
1270    if (*plast_stack_min == ADDR_LIMIT || !may_be_in_stack(*plast_stack_min)) {
1271      /* Unsafe to start from `last_stack_min` value. */
1272      *plo = GC_get_stack_min(current_min);
1273    } else {
1274      /* Use the recent value to optimize search for minimum address. */
1275      *plo = GC_get_stack_min(*plast_stack_min);
1276    }
1277  
1278    /* Remember current `stack_min` value. */
1279    if (thread != NULL)
1280      GC_win32_unprotect_thread(thread);
1281    *plast_stack_min = *plo;
1282  }
1283  
1284  #  if defined(PARALLEL_MARK) && !defined(GC_PTHREADS_PARAMARK)
1285  
1286  #    ifndef MARK_THREAD_STACK_SIZE
1287  /* The default size of the marker's thread stack. */
1288  #      define MARK_THREAD_STACK_SIZE 0
1289  #    endif
1290  
1291  /* Events with manual reset (one for each mark helper). */
1292  STATIC HANDLE GC_marker_cv[MAX_MARKERS - 1] = { 0 };
1293  
1294  GC_INNER thread_id_t GC_marker_Id[MAX_MARKERS - 1] = { 0 };
1295  
1296  /*
1297   * `mark_mutex_event`, `builder_cv`, `mark_cv` are initialized in
1298   * `GC_thr_init()`.
1299   */
1300  
1301  /* Note: this event should be with auto-reset. */
1302  static HANDLE mark_mutex_event = (HANDLE)0;
1303  
1304  /* Note: these events are with manual reset. */
1305  static HANDLE builder_cv = (HANDLE)0;
1306  static HANDLE mark_cv = (HANDLE)0;
1307  
1308  GC_INNER void
1309  GC_start_mark_threads_inner(void)
1310  {
1311    int i;
1312  
1313    GC_ASSERT(I_HOLD_LOCK());
1314    ASSERT_CANCEL_DISABLED();
1315    if (GC_available_markers_m1 <= 0 || GC_parallel)
1316      return;
1317    GC_wait_for_gc_completion(TRUE);
1318  
1319    GC_ASSERT(0 == GC_fl_builder_count);
1320    /*
1321     * Initialize `GC_marker_cv[]` fully before starting the first helper
1322     * thread.
1323     */
1324    GC_markers_m1 = GC_available_markers_m1;
1325    for (i = 0; i < GC_markers_m1; ++i) {
1326      if ((GC_marker_cv[i]
1327           = CreateEvent(NULL /* `attrs` */, TRUE /* `isManualReset` */,
1328                         FALSE /* `initialState` */, NULL /* `name` (A/W) */))
1329          == (HANDLE)0)
1330        ABORT("CreateEvent failed");
1331    }
1332  
1333    for (i = 0; i < GC_markers_m1; ++i) {
1334  #    if defined(MSWINCE) || defined(MSWIN_XBOX1)
1335      HANDLE handle;
1336      DWORD thread_id;
1337  
1338      GC_marker_last_stack_min[i] = ADDR_LIMIT;
1339      /* There is no `_beginthreadex()` in WinCE. */
1340      handle = CreateThread(NULL /* `lpsa` */,
1341                            MARK_THREAD_STACK_SIZE /* ignored */, GC_mark_thread,
1342                            NUMERIC_TO_VPTR(i), 0 /* `fdwCreate` */, &thread_id);
1343      if (UNLIKELY(NULL == handle)) {
1344        WARN("Marker thread %" WARN_PRIdPTR " creation failed\n",
1345             (GC_signed_word)i);
1346        /*
1347         * The most probable failure reason is "not enough memory".
1348         * Do not try to create other marker threads.
1349         */
1350        break;
1351      }
1352      /* It is safe to detach the thread. */
1353      CloseHandle(handle);
1354  #    else
1355      GC_uintptr_t handle;
1356      unsigned thread_id;
1357  
1358      GC_marker_last_stack_min[i] = ADDR_LIMIT;
1359      handle = _beginthreadex(NULL /* `security_attr` */, MARK_THREAD_STACK_SIZE,
1360                              GC_mark_thread, NUMERIC_TO_VPTR(i),
1361                              0 /* `flags` */, &thread_id);
1362      if (UNLIKELY(!handle || handle == ~(GC_uintptr_t)0)) {
1363        WARN("Marker thread %" WARN_PRIdPTR " creation failed\n",
1364             (GC_signed_word)i);
1365        /* Do not try to create other marker threads. */
1366        break;
1367      } else {
1368        /* We may detach the thread (if `handle` is of `HANDLE` type). */
1369        /* `CloseHandle((HANDLE)handle);` */
1370      }
1371  #    endif
1372    }
1373  
1374    /* Adjust `GC_markers_m1` (and free unused resources) if failed. */
1375    while (GC_markers_m1 > i) {
1376      GC_markers_m1--;
1377      CloseHandle(GC_marker_cv[GC_markers_m1]);
1378    }
1379    GC_wait_for_markers_init();
1380    GC_COND_LOG_PRINTF("Started %d mark helper threads\n", GC_markers_m1);
1381    if (UNLIKELY(0 == i)) {
1382      CloseHandle(mark_cv);
1383      CloseHandle(builder_cv);
1384      CloseHandle(mark_mutex_event);
1385    }
1386  }
1387  
1388  #    ifdef GC_ASSERTIONS
1389  STATIC unsigned long GC_mark_lock_holder = NO_THREAD;
1390  #      define SET_MARK_LOCK_HOLDER \
1391          (void)(GC_mark_lock_holder = GetCurrentThreadId())
1392  #      define UNSET_MARK_LOCK_HOLDER                              \
1393          do {                                                      \
1394            GC_ASSERT(GC_mark_lock_holder == GetCurrentThreadId()); \
1395            GC_mark_lock_holder = NO_THREAD;                        \
1396          } while (0)
1397  #    else
1398  #      define SET_MARK_LOCK_HOLDER (void)0
1399  #      define UNSET_MARK_LOCK_HOLDER (void)0
1400  #    endif /* !GC_ASSERTIONS */
1401  
1402  /*
1403   * Allowed values for `GC_mark_mutex_state`.
1404   * `MARK_MUTEX_LOCKED` means "locked but no other waiters";
1405   * `MARK_MUTEX_WAITERS_EXIST` means "locked and waiters may exist".
1406   */
1407  #    define MARK_MUTEX_UNLOCKED 0
1408  #    define MARK_MUTEX_LOCKED 1
1409  #    define MARK_MUTEX_WAITERS_EXIST (-1)
1410  
1411  /* The mutex state.  Accessed using `InterlockedExchange()`. */
1412  STATIC /* `volatile` */ LONG GC_mark_mutex_state = MARK_MUTEX_UNLOCKED;
1413  
1414  #    ifdef LOCK_STATS
1415  volatile AO_t GC_block_count = 0;
1416  volatile AO_t GC_unlocked_count = 0;
1417  #    endif
1418  
1419  GC_INNER void
1420  GC_acquire_mark_lock(void)
1421  {
1422    GC_ASSERT(GC_mark_lock_holder != GetCurrentThreadId());
1423    if (UNLIKELY(InterlockedExchange(&GC_mark_mutex_state, MARK_MUTEX_LOCKED)
1424                 != 0)) {
1425  #    ifdef LOCK_STATS
1426      (void)AO_fetch_and_add1(&GC_block_count);
1427  #    endif
1428      /* Repeatedly reset the state and wait until we acquire the mark lock. */
1429      while (InterlockedExchange(&GC_mark_mutex_state, MARK_MUTEX_WAITERS_EXIST)
1430             != 0) {
1431        if (WaitForSingleObject(mark_mutex_event, INFINITE) == WAIT_FAILED)
1432          ABORT("WaitForSingleObject failed");
1433      }
1434    }
1435  #    ifdef LOCK_STATS
1436    else {
1437      (void)AO_fetch_and_add1(&GC_unlocked_count);
1438    }
1439  #    endif
1440  
1441    GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
1442    SET_MARK_LOCK_HOLDER;
1443  }
1444  
1445  GC_INNER void
1446  GC_release_mark_lock(void)
1447  {
1448    UNSET_MARK_LOCK_HOLDER;
1449    if (UNLIKELY(InterlockedExchange(&GC_mark_mutex_state, MARK_MUTEX_UNLOCKED)
1450                 < 0)) {
1451      /* Wake a waiter. */
1452      if (!SetEvent(mark_mutex_event))
1453        ABORT("SetEvent failed");
1454    }
1455  }
1456  
1457  /*
1458   * In `GC_wait_for_reclaim()`/`GC_notify_all_builder()` we emulate
1459   * `pthread_cond_wait()`/`pthread_cond_broadcast()` primitives with
1460   * Win32 API event object (working in the "manual reset" mode).
1461   * This works here because `GC_notify_all_builder()` is always called
1462   * holding the mark lock and the checked condition
1463   * (`GC_fl_builder_count` is zero) is the only one for which
1464   * broadcasting on `builder_cv` is performed.
1465   */
1466  
1467  GC_INNER void
1468  GC_wait_for_reclaim(void)
1469  {
1470    GC_ASSERT(builder_cv != 0);
1471    for (;;) {
1472      GC_acquire_mark_lock();
1473      if (0 == GC_fl_builder_count)
1474        break;
1475      if (!ResetEvent(builder_cv))
1476        ABORT("ResetEvent failed");
1477      GC_release_mark_lock();
1478      if (WaitForSingleObject(builder_cv, INFINITE) == WAIT_FAILED)
1479        ABORT("WaitForSingleObject failed");
1480    }
1481    GC_release_mark_lock();
1482  }
1483  
1484  GC_INNER void
1485  GC_notify_all_builder(void)
1486  {
1487    GC_ASSERT(GC_mark_lock_holder == GetCurrentThreadId());
1488    GC_ASSERT(builder_cv != 0);
1489    GC_ASSERT(0 == GC_fl_builder_count);
1490    if (!SetEvent(builder_cv))
1491      ABORT("SetEvent failed");
1492  }
1493  
1494  /* `mark_cv` is used (for waiting) by a non-helper thread. */
1495  
1496  GC_INNER void
1497  GC_wait_marker(void)
1498  {
1499    HANDLE event = mark_cv;
1500    thread_id_t self_id = GetCurrentThreadId();
1501    int i = GC_markers_m1;
1502  
1503    while (i-- > 0) {
1504      if (GC_marker_Id[i] == self_id) {
1505        event = GC_marker_cv[i];
1506        break;
1507      }
1508    }
1509  
1510    if (!ResetEvent(event))
1511      ABORT("ResetEvent failed");
1512    GC_release_mark_lock();
1513    if (WaitForSingleObject(event, INFINITE) == WAIT_FAILED)
1514      ABORT("WaitForSingleObject failed");
1515    GC_acquire_mark_lock();
1516  }
1517  
1518  GC_INNER void
1519  GC_notify_all_marker(void)
1520  {
1521    thread_id_t self_id = GetCurrentThreadId();
1522    int i = GC_markers_m1;
1523  
1524    while (i-- > 0) {
1525      /* Notify every marker ignoring self (for efficiency). */
1526      if (!SetEvent(GC_marker_Id[i] != self_id ? GC_marker_cv[i] : mark_cv))
1527        ABORT("SetEvent failed");
1528    }
1529  }
1530  
1531  #  endif /* PARALLEL_MARK && !GC_PTHREADS_PARAMARK */
1532  
1533  /*
1534   * We have no `DllMain` to take care of new threads.  Thus, we must properly
1535   * intercept thread creation.
1536   */
1537  
1538  struct win32_start_info {
1539    LPTHREAD_START_ROUTINE start_routine;
1540    LPVOID arg;
1541  };
1542  
1543  STATIC void *GC_CALLBACK
1544  GC_win32_start_inner(struct GC_stack_base *sb, void *arg)
1545  {
1546    void *ret;
1547    LPTHREAD_START_ROUTINE start_routine
1548        = ((struct win32_start_info *)arg)->start_routine;
1549    LPVOID start_arg = ((struct win32_start_info *)arg)->arg;
1550  
1551    GC_ASSERT(!GC_win32_dll_threads);
1552    /* This waits for an in-progress garbage collection. */
1553    GC_register_my_thread(sb);
1554  #  ifdef DEBUG_THREADS
1555    GC_log_printf("thread 0x%lx starting...\n", (long)GetCurrentThreadId());
1556  #  endif
1557    GC_free(arg);
1558  
1559    /*
1560     * Clear the thread entry even if we exit with an exception.
1561     * This is probably pointless, since an uncaught exception is
1562     * supposed to result in the process being killed.
1563     */
1564  #  ifndef NO_SEH_AVAILABLE
1565    ret = NULL; /*< to avoid "might be uninitialized" compiler warning */
1566    __try
1567  #  endif
1568    {
1569      ret = NUMERIC_TO_VPTR(start_routine(start_arg));
1570    }
1571  #  ifndef NO_SEH_AVAILABLE
1572    __finally
1573  #  endif
1574    {
1575      (void)GC_unregister_my_thread();
1576    }
1577  
1578  #  ifdef DEBUG_THREADS
1579    GC_log_printf("thread 0x%lx returned from start routine\n",
1580                  (long)GetCurrentThreadId());
1581  #  endif
1582  #  if defined(CPPCHECK)
1583    GC_noop1_ptr(sb);
1584  #  endif
1585    return ret;
1586  }
1587  
1588  STATIC DWORD WINAPI
1589  GC_win32_start(LPVOID arg)
1590  {
1591    return (DWORD)(GC_uintptr_t)GC_call_with_stack_base(GC_win32_start_inner,
1592                                                        arg);
1593  }
1594  
1595  GC_API HANDLE WINAPI
1596  GC_CreateThread(LPSECURITY_ATTRIBUTES lpThreadAttributes,
1597                  GC_WIN32_SIZE_T dwStackSize,
1598                  LPTHREAD_START_ROUTINE lpStartAddress, LPVOID lpParameter,
1599                  DWORD dwCreationFlags, LPDWORD lpThreadId)
1600  {
1601    /*
1602     * Make sure the collector is initialized (i.e. main thread is attached,
1603     * TLS is initialized).  This is redundant when `GC_win32_dll_threads`
1604     * is set by `GC_use_threads_discovery()`.
1605     */
1606    if (UNLIKELY(!GC_is_initialized))
1607      GC_init();
1608    GC_ASSERT(GC_thr_initialized);
1609  
1610  #  ifdef DEBUG_THREADS
1611    GC_log_printf("About to create a thread from 0x%lx\n",
1612                  (long)GetCurrentThreadId());
1613  #  endif
1614    if (GC_win32_dll_threads) {
1615      return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress,
1616                          lpParameter, dwCreationFlags, lpThreadId);
1617    } else {
1618      /* Note: this is handed off to and deallocated by child thread. */
1619      struct win32_start_info *psi
1620          = (struct win32_start_info *)GC_malloc_uncollectable(
1621              sizeof(struct win32_start_info));
1622      HANDLE thread_h;
1623  
1624      if (UNLIKELY(NULL == psi)) {
1625        SetLastError(ERROR_NOT_ENOUGH_MEMORY);
1626        return NULL;
1627      }
1628  
1629      /* Set up the thread arguments. */
1630      psi->start_routine = lpStartAddress;
1631      psi->arg = lpParameter;
1632      GC_dirty(psi);
1633      REACHABLE_AFTER_DIRTY(lpParameter);
1634  
1635  #  ifdef PARALLEL_MARK
1636      if (!GC_parallel && UNLIKELY(GC_available_markers_m1 > 0))
1637        GC_start_mark_threads();
1638  #  endif
1639      set_need_to_lock();
1640      thread_h = CreateThread(lpThreadAttributes, dwStackSize, GC_win32_start,
1641                              psi, dwCreationFlags, lpThreadId);
1642      if (UNLIKELY(0 == thread_h))
1643        GC_free(psi);
1644      return thread_h;
1645    }
1646  }
1647  
1648  GC_API DECLSPEC_NORETURN void WINAPI
1649  GC_ExitThread(DWORD dwExitCode)
1650  {
1651    if (!GC_win32_dll_threads)
1652      (void)GC_unregister_my_thread();
1653    ExitThread(dwExitCode);
1654  }
1655  
1656  #  if defined(MSWIN32) && !defined(NO_CRT)
1657  GC_API GC_uintptr_t GC_CALL
1658  GC_beginthreadex(void *security, unsigned stack_size,
1659                   unsigned(__stdcall *start_address)(void *), void *arglist,
1660                   unsigned initflag, unsigned *thrdaddr)
1661  {
1662    if (UNLIKELY(!GC_is_initialized))
1663      GC_init();
1664    GC_ASSERT(GC_thr_initialized);
1665  #    ifdef DEBUG_THREADS
1666    GC_log_printf("About to create a thread from 0x%lx\n",
1667                  (long)GetCurrentThreadId());
1668  #    endif
1669  
1670    if (GC_win32_dll_threads) {
1671      return _beginthreadex(security, stack_size, start_address, arglist,
1672                            initflag, thrdaddr);
1673    } else {
1674      GC_uintptr_t thread_h;
1675      /* Note: this is handed off to and deallocated by child thread. */
1676      struct win32_start_info *psi
1677          = (struct win32_start_info *)GC_malloc_uncollectable(
1678              sizeof(struct win32_start_info));
1679  
1680      if (UNLIKELY(NULL == psi)) {
1681        /*
1682         * MSDN docs say `_beginthreadex()` returns 0 on error and sets
1683         * `errno` to either `EAGAIN` (too many threads) or `EINVAL` (the
1684         * argument is invalid or the stack size is incorrect), so we set
1685         * `errno` to `EAGAIN` on "not enough memory".
1686         */
1687        errno = EAGAIN;
1688        return 0;
1689      }
1690  
1691      /* Set up the thread arguments. */
1692      psi->start_routine = (LPTHREAD_START_ROUTINE)start_address;
1693      psi->arg = arglist;
1694      GC_dirty(psi);
1695      REACHABLE_AFTER_DIRTY(arglist);
1696  
1697  #    ifdef PARALLEL_MARK
1698      if (!GC_parallel && UNLIKELY(GC_available_markers_m1 > 0))
1699        GC_start_mark_threads();
1700  #    endif
1701      set_need_to_lock();
1702      thread_h = _beginthreadex(security, stack_size,
1703                                (unsigned(__stdcall *)(void *))GC_win32_start,
1704                                psi, initflag, thrdaddr);
1705      if (UNLIKELY(0 == thread_h))
1706        GC_free(psi);
1707      return thread_h;
1708    }
1709  }
1710  
1711  GC_API void GC_CALL
1712  GC_endthreadex(unsigned retval)
1713  {
1714    if (!GC_win32_dll_threads)
1715      (void)GC_unregister_my_thread();
1716    _endthreadex(retval);
1717  }
1718  #  endif /* MSWIN32 && !NO_CRT */
1719  
1720  #  ifdef GC_WINMAIN_REDIRECT
1721  /* This might be useful on WinCE.  Should not be used with `GC_DLL`. */
1722  
1723  #    if defined(MSWINCE) && defined(UNDER_CE)
1724  #      define WINMAIN_LPTSTR LPWSTR
1725  #    else
1726  #      define WINMAIN_LPTSTR LPSTR
1727  #    endif
1728  
1729  /* This is defined in `gc.h` file. */
1730  #    undef WinMain
1731  
1732  /* Defined outside the collector by an application. */
1733  int WINAPI GC_WinMain(HINSTANCE, HINSTANCE, WINMAIN_LPTSTR, int);
1734  
1735  typedef struct {
1736    HINSTANCE hInstance;
1737    HINSTANCE hPrevInstance;
1738    WINMAIN_LPTSTR lpCmdLine;
1739    int nShowCmd;
1740  } main_thread_args;
1741  
1742  static DWORD WINAPI
1743  main_thread_start(LPVOID arg)
1744  {
1745    main_thread_args *main_args = (main_thread_args *)arg;
1746    return (DWORD)GC_WinMain(main_args->hInstance, main_args->hPrevInstance,
1747                             main_args->lpCmdLine, main_args->nShowCmd);
1748  }
1749  
1750  STATIC void *GC_CALLBACK
1751  GC_waitForSingleObjectInfinite(void *handle)
1752  {
1753    return NUMERIC_TO_VPTR(WaitForSingleObject((HANDLE)handle, INFINITE));
1754  }
1755  
1756  #    ifndef WINMAIN_THREAD_STACK_SIZE
1757  /* The default size of the `WinMain`'s thread stack. */
1758  #      define WINMAIN_THREAD_STACK_SIZE 0
1759  #    endif
1760  
1761  int WINAPI
1762  WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, WINMAIN_LPTSTR lpCmdLine,
1763          int nShowCmd)
1764  {
1765    DWORD exit_code = 1;
1766  
1767    main_thread_args args = { hInstance, hPrevInstance, lpCmdLine, nShowCmd };
1768    HANDLE thread_h;
1769    DWORD thread_id;
1770  
1771    /* Initialize everything. */
1772    GC_INIT();
1773  
1774    /* Start the main thread. */
1775    thread_h = GC_CreateThread(
1776        NULL /* `lpsa` */, WINMAIN_THREAD_STACK_SIZE /* ignored on WinCE */,
1777        main_thread_start, &args, 0 /* `fdwCreate` */, &thread_id);
1778    if (NULL == thread_h)
1779      ABORT("GC_CreateThread(main_thread) failed");
1780  
1781    if ((DWORD)(GC_uintptr_t)GC_do_blocking(GC_waitForSingleObjectInfinite,
1782                                            (void *)thread_h)
1783        == WAIT_FAILED)
1784      ABORT("WaitForSingleObject(main_thread) failed");
1785    GetExitCodeThread(thread_h, &exit_code);
1786    CloseHandle(thread_h);
1787  
1788  #    ifdef MSWINCE
1789    GC_deinit();
1790  #    endif
1791    return (int)exit_code;
1792  }
1793  
1794  #  endif /* GC_WINMAIN_REDIRECT */
1795  
1796  #  ifdef WOW64_THREAD_CONTEXT_WORKAROUND
1797  #    ifdef MSWINRT_FLAVOR
1798  /* Available on WinRT but we have to declare it manually. */
1799  __declspec(dllimport) HMODULE WINAPI GetModuleHandleW(LPCWSTR);
1800  #    endif
1801  
1802  static GC_bool
1803  is_wow64_process(HMODULE hK32)
1804  {
1805    BOOL is_wow64;
1806  #    ifdef MSWINRT_FLAVOR
1807    /* Try to use `IsWow64Process2()` as it handles different WoW64 cases. */
1808    HMODULE hWow64 = GetModuleHandleW(L"api-ms-win-core-wow64-l1-1-1.dll");
1809  
1810    UNUSED_ARG(hK32);
1811    if (hWow64) {
1812      FARPROC pfn2 = GetProcAddress(hWow64, "IsWow64Process2");
1813      USHORT process_machine, native_machine;
1814  
1815      if (pfn2
1816          && (*(BOOL(WINAPI *)(HANDLE, USHORT *, USHORT *))(GC_funcptr_uint)
1817                  pfn2)(GetCurrentProcess(), &process_machine, &native_machine))
1818        return process_machine != native_machine;
1819    }
1820    if (IsWow64Process(GetCurrentProcess(), &is_wow64))
1821      return (GC_bool)is_wow64;
1822  #    else
1823    if (hK32) {
1824      FARPROC pfn = GetProcAddress(hK32, "IsWow64Process");
1825  
1826      if (pfn
1827          && (*(BOOL(WINAPI *)(HANDLE, BOOL *))(GC_funcptr_uint)pfn)(
1828              GetCurrentProcess(), &is_wow64))
1829        return (GC_bool)is_wow64;
1830    }
1831  #    endif
1832    /* `IsWow64Process()` failed. */
1833    return FALSE;
1834  }
1835  #  endif /* WOW64_THREAD_CONTEXT_WORKAROUND */
1836  
1837  GC_INNER void
1838  GC_thr_init(void)
1839  {
1840    struct GC_stack_base sb;
1841    thread_id_t self_id = GetCurrentThreadId();
1842  #  if (!defined(HAVE_PTHREAD_SETNAME_NP_WITH_TID) && !defined(MSWINCE) \
1843         && defined(PARALLEL_MARK))                                      \
1844        || defined(WOW64_THREAD_CONTEXT_WORKAROUND)
1845    HMODULE hK32;
1846  #    if defined(MSWINRT_FLAVOR) && defined(FUNCPTR_IS_DATAPTR)
1847    MEMORY_BASIC_INFORMATION memInfo;
1848  
1849    if (VirtualQuery(CAST_THRU_UINTPTR(void *, GetProcAddress), &memInfo,
1850                     sizeof(memInfo))
1851        != sizeof(memInfo))
1852      ABORT("Weird VirtualQuery result");
1853    hK32 = (HMODULE)memInfo.AllocationBase;
1854  #    else
1855    hK32 = GetModuleHandle(TEXT("kernel32.dll"));
1856  #    endif
1857  #  endif
1858  
1859    GC_ASSERT(I_HOLD_LOCK());
1860    GC_ASSERT(!GC_thr_initialized);
1861    GC_ASSERT(ADDR(&GC_threads) % ALIGNMENT == 0);
1862  #  ifdef GC_ASSERTIONS
1863    GC_thr_initialized = TRUE;
1864  #  endif
1865  #  if !defined(DONT_USE_ATEXIT) || !defined(GC_NO_THREADS_DISCOVERY)
1866    GC_main_thread_id = self_id;
1867  #  endif
1868  #  ifdef CAN_HANDLE_FORK
1869    GC_setup_atfork();
1870  #  endif
1871  #  ifdef WOW64_THREAD_CONTEXT_WORKAROUND
1872    /* Set `isWow64` flag. */
1873    isWow64 = is_wow64_process(hK32);
1874  #  endif
1875    /* Add the initial thread, so we can stop it. */
1876    sb.mem_base = GC_stackbottom;
1877    GC_ASSERT(sb.mem_base != NULL);
1878  #  ifdef IA64
1879    sb.reg_base = GC_register_stackbottom;
1880  #  endif
1881  
1882  #  if defined(PARALLEL_MARK)
1883    {
1884      const char *markers_string = GETENV("GC_MARKERS");
1885      int markers = GC_required_markers_cnt;
1886  
1887      if (markers_string != NULL) {
1888        markers = atoi(markers_string);
1889        if (markers <= 0 || markers > MAX_MARKERS) {
1890          WARN("Too big or invalid number of mark threads: %" WARN_PRIdPTR
1891               "; using maximum threads\n",
1892               (GC_signed_word)markers);
1893          markers = MAX_MARKERS;
1894        }
1895      } else if (0 == markers) {
1896        /*
1897         * Unless the client sets the desired number of parallel markers,
1898         * it is determined based on the number of CPU cores.
1899         */
1900  #    ifdef MSWINCE
1901        /*
1902         * There is no `GetProcessAffinityMask()` in WinCE.
1903         * `GC_sysinfo` is already initialized.
1904         */
1905        markers = (int)GC_sysinfo.dwNumberOfProcessors;
1906  #    else
1907  #      ifdef _WIN64
1908        DWORD_PTR procMask = 0;
1909        DWORD_PTR sysMask;
1910  #      else
1911        DWORD procMask = 0;
1912        DWORD sysMask;
1913  #      endif
1914        int ncpu = 0;
1915        if (
1916  #      ifdef __cplusplus
1917            GetProcessAffinityMask(GetCurrentProcess(), &procMask, &sysMask)
1918  #      else
1919            /*
1920             * Cast the mask arguments to `void *` for compatibility with
1921             * some old SDKs.
1922             */
1923            GetProcessAffinityMask(GetCurrentProcess(), (void *)&procMask,
1924                                   (void *)&sysMask)
1925  #      endif
1926            && procMask) {
1927          do {
1928            ncpu++;
1929          } while ((procMask &= procMask - 1) != 0);
1930        }
1931        markers = ncpu;
1932  #    endif
1933  #    if defined(GC_MIN_MARKERS) && !defined(CPPCHECK)
1934        /* This is primarily for testing on systems without `getenv()`. */
1935        if (markers < GC_MIN_MARKERS)
1936          markers = GC_MIN_MARKERS;
1937  #    endif
1938        if (markers > MAX_MARKERS) {
1939          /* Silently limit the amount of markers. */
1940          markers = MAX_MARKERS;
1941        }
1942      }
1943      GC_available_markers_m1 = markers - 1;
1944    }
1945  
1946    /* Check whether parallel mode could be enabled. */
1947    if (GC_win32_dll_threads || GC_available_markers_m1 <= 0) {
1948      /* Disable parallel marking. */
1949      GC_parallel = FALSE;
1950      GC_COND_LOG_PRINTF("Single marker thread, turning off parallel marking\n");
1951    } else {
1952  #    ifndef GC_PTHREADS_PARAMARK
1953      /* Initialize Win32 event objects for parallel marking. */
1954      mark_mutex_event
1955          = CreateEvent(NULL /* `attrs` */, FALSE /* `isManualReset` */,
1956                        FALSE /* `initialState` */, NULL /* `name` */);
1957      builder_cv = CreateEvent(NULL /* `attrs` */, TRUE /* `isManualReset` */,
1958                               FALSE /* `initialState` */, NULL /* `name` */);
1959      mark_cv = CreateEvent(NULL /* `attrs` */, TRUE /* `isManualReset` */,
1960                            FALSE /* `initialState` */, NULL /* `name` */);
1961      if (mark_mutex_event == (HANDLE)0 || builder_cv == (HANDLE)0
1962          || mark_cv == (HANDLE)0)
1963        ABORT("CreateEvent failed");
1964  #    endif
1965  #    if !defined(HAVE_PTHREAD_SETNAME_NP_WITH_TID) && !defined(MSWINCE)
1966      GC_init_win32_thread_naming(hK32);
1967  #    endif
1968    }
1969  #  endif /* PARALLEL_MARK */
1970  
1971    GC_register_my_thread_inner(&sb, self_id);
1972  }
1973  
1974  #  ifndef GC_NO_THREADS_DISCOVERY
1975  /*
1976   * We avoid acquiring locks here, since this does not seem to be preemptible.
1977   * This may run with an uninitialized collector, in which case we do not
1978   * do much.  This implies that no threads other than the main one should be
1979   * created with an uninitialized collector.  (The alternative of initializing
1980   * the collector here seems dangerous, since `DllMain` is limited in what it
1981   * can do.)
1982   */
1983  
1984  #    ifdef GC_INSIDE_DLL
1985  /* Export only if needed by client. */
1986  GC_API
1987  #    else
1988  #      define GC_DllMain DllMain
1989  #    endif
1990  BOOL WINAPI
1991  GC_DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved)
1992  {
1993    thread_id_t self_id;
1994  
1995    UNUSED_ARG(inst);
1996    UNUSED_ARG(reserved);
1997    /*
1998     * Note that `GC_use_threads_discovery` should be called by the client
1999     * application at start-up to activate automatic thread registration
2000     * (it is the default collector behavior); to always have automatic
2001     * thread registration turned on, the collector should be compiled with
2002     * `-D GC_DISCOVER_TASK_THREADS` option.
2003     */
2004    if (!GC_win32_dll_threads && GC_is_initialized)
2005      return TRUE;
2006  
2007    switch (reason) {
2008    case DLL_THREAD_ATTACH:
2009      /* This is invoked for threads other than main one. */
2010  #    ifdef PARALLEL_MARK
2011      /* Do not register marker threads. */
2012      if (GC_parallel) {
2013        /*
2014         * We could reach here only if the collector is not initialized.
2015         * Because `GC_thr_init()` sets `GC_parallel` to `FALSE`.
2016         */
2017        break;
2018      }
2019  #    endif
2020      /* FALLTHRU */
2021    case DLL_PROCESS_ATTACH:
2022      /* This may run with the collector uninitialized. */
2023      self_id = GetCurrentThreadId();
2024      if (GC_is_initialized && GC_main_thread_id != self_id) {
2025        struct GC_stack_base sb;
2026        /* Do not lock here. */
2027  #    ifdef GC_ASSERTIONS
2028        int sb_result =
2029  #    endif
2030            GC_get_stack_base(&sb);
2031        GC_ASSERT(sb_result == GC_SUCCESS);
2032        GC_register_my_thread_inner(&sb, self_id);
2033      } else {
2034        /* We already did it during `GC_thr_init()`, called by `GC_init()`. */
2035      }
2036      break;
2037  
2038    case DLL_THREAD_DETACH:
2039      /* We are hopefully running in the context of the exiting thread. */
2040      if (GC_win32_dll_threads) {
2041        GC_thread t = GC_win32_dll_lookup_thread(GetCurrentThreadId());
2042  
2043        if (LIKELY(t != NULL))
2044          GC_delete_thread(t);
2045      }
2046      break;
2047  
2048    case DLL_PROCESS_DETACH:
2049      if (GC_win32_dll_threads) {
2050        int i;
2051        int my_max = (int)GC_get_max_thread_index();
2052  
2053        for (i = 0; i <= my_max; ++i) {
2054          if (AO_load(&dll_thread_table[i].tm.in_use))
2055            GC_delete_thread((GC_thread)&dll_thread_table[i]);
2056        }
2057        GC_deinit();
2058      }
2059      break;
2060    }
2061    return TRUE;
2062  }
2063  #  endif /* !GC_NO_THREADS_DISCOVERY */
2064  
2065  #  ifndef GC_NO_THREAD_REDIRECTS
2066  /* Restore thread calls redirection. */
2067  #    define CreateThread GC_CreateThread
2068  #    define ExitThread GC_ExitThread
2069  #    undef _beginthreadex
2070  #    define _beginthreadex GC_beginthreadex
2071  #    undef _endthreadex
2072  #    define _endthreadex GC_endthreadex
2073  #  endif /* !GC_NO_THREAD_REDIRECTS */
2074  
2075  #endif /* GC_WIN32_THREADS */
2076