1 /*
2 * Copyright (c) 2000-2005 by Hewlett-Packard Company. All rights reserved.
3 * Copyright (c) 2008-2022 Ivan Maidanski
4 *
5 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
6 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
7 *
8 * Permission is hereby granted to use or copy this program
9 * for any purpose, provided the above notices are retained on all copies.
10 * Permission to modify the code and to distribute modified code is granted,
11 * provided the above notices are retained, and a notice that the code was
12 * modified is included with the above copyright notice.
13 */
14 15 #include "private/gc_priv.h"
16 17 #if defined(THREAD_LOCAL_ALLOC)
18 19 # if !defined(THREADS) && !defined(CPPCHECK)
20 # error Invalid config - THREAD_LOCAL_ALLOC requires GC_THREADS
21 # endif
22 23 # include "private/thread_local_alloc.h"
24 25 # if defined(USE_COMPILER_TLS)
26 __thread GC_ATTR_TLS_FAST
27 # elif defined(USE_WIN32_COMPILER_TLS)
28 __declspec(thread) GC_ATTR_TLS_FAST
29 # endif
30 GC_key_t GC_thread_key;
31 32 static GC_bool keys_initialized;
33 34 /*
35 * Return a single nonempty free list `fl` to the global one pointed to
36 * by `gfl`.
37 */
38 static void
39 return_single_freelist(void *fl, void **gfl)
40 {
41 if (NULL == *gfl) {
42 *gfl = fl;
43 } else {
44 void *q = fl;
45 void **qptr;
46 47 GC_ASSERT(GC_size(fl) == GC_size(*gfl));
48 /* Concatenate. */
49 do {
50 qptr = &obj_link(q);
51 q = *qptr;
52 } while (ADDR(q) >= HBLKSIZE);
53 GC_ASSERT(NULL == q);
54 *qptr = *gfl;
55 *gfl = fl;
56 }
57 }
58 59 /*
60 * Recover the contents of the free-list array `fl` into the global one
61 * `gfl`.
62 */
63 static void
64 return_freelists(void **fl, void **gfl)
65 {
66 int i;
67 68 for (i = 1; i < GC_TINY_FREELISTS; ++i) {
69 if (ADDR(fl[i]) >= HBLKSIZE) {
70 return_single_freelist(fl[i], &gfl[i]);
71 }
72 /*
73 * Clear `fl[i]`, since the thread structure may hang around.
74 * Do it in a way that is likely to trap if we access it.
75 */
76 fl[i] = (ptr_t)NUMERIC_TO_VPTR(HBLKSIZE);
77 }
78 /* The 0 granule free list really contains 1 granule objects. */
79 if (ADDR(fl[0]) >= HBLKSIZE
80 # ifdef GC_GCJ_SUPPORT
81 && ADDR(fl[0]) != ERROR_FL
82 # endif
83 ) {
84 return_single_freelist(fl[0], &gfl[1]);
85 }
86 }
87 88 # ifdef USE_PTHREAD_SPECIFIC
89 /*
90 * Re-set the TLS value on thread cleanup to allow thread-local allocations
91 * to happen in the TLS destructors. `GC_unregister_my_thread()` (and
92 * similar routines) will finally set the `GC_thread_key` to `NULL`
93 * preventing this destructor from being called repeatedly.
94 */
95 static void
96 reset_thread_key(void *v)
97 {
98 pthread_setspecific(GC_thread_key, v);
99 }
100 # else
101 # define reset_thread_key 0
102 # endif
103 104 GC_INNER void
105 GC_init_thread_local(GC_tlfs p)
106 {
107 int kind, j, res;
108 109 GC_ASSERT(I_HOLD_LOCK());
110 if (UNLIKELY(!keys_initialized)) {
111 # ifdef USE_CUSTOM_SPECIFIC
112 /* Ensure proper alignment of a "pushed" GC symbol. */
113 GC_ASSERT(ADDR(&GC_thread_key) % ALIGNMENT == 0);
114 # endif
115 res = GC_key_create(&GC_thread_key, reset_thread_key);
116 if (COVERT_DATAFLOW(res) != 0) {
117 ABORT("Failed to create key for local allocator");
118 }
119 keys_initialized = TRUE;
120 }
121 res = GC_setspecific(GC_thread_key, p);
122 if (COVERT_DATAFLOW(res) != 0) {
123 ABORT("Failed to set thread specific allocation pointers");
124 }
125 for (j = 0; j < GC_TINY_FREELISTS; ++j) {
126 for (kind = 0; kind < THREAD_FREELISTS_KINDS; ++kind) {
127 p->_freelists[kind][j] = NUMERIC_TO_VPTR(1);
128 }
129 # ifdef GC_GCJ_SUPPORT
130 p->gcj_freelists[j] = NUMERIC_TO_VPTR(1);
131 # endif
132 }
133 /*
134 * The zero-sized free list is handled like the regular free list, to
135 * ensure that the explicit deallocation works. However, an allocation
136 * of a `gcj` object with the zero size is always an error.
137 */
138 # ifdef GC_GCJ_SUPPORT
139 p->gcj_freelists[0] = MAKE_CPTR(ERROR_FL);
140 # endif
141 }
142 143 GC_INNER void
144 GC_destroy_thread_local(GC_tlfs p)
145 {
146 int kind;
147 148 GC_ASSERT(I_HOLD_LOCK());
149 GC_ASSERT(GC_getspecific(GC_thread_key) == p);
150 /* We currently only do this from the thread itself. */
151 GC_STATIC_ASSERT(THREAD_FREELISTS_KINDS <= MAXOBJKINDS);
152 for (kind = 0; kind < THREAD_FREELISTS_KINDS; ++kind) {
153 if (kind == (int)GC_n_kinds) {
154 /* The kind is not created. */
155 break;
156 }
157 return_freelists(p->_freelists[kind], GC_obj_kinds[kind].ok_freelist);
158 }
159 # ifdef GC_GCJ_SUPPORT
160 return_freelists(p->gcj_freelists, (void **)GC_gcjobjfreelist);
161 # endif
162 }
163 164 STATIC void *
165 GC_get_tlfs(void)
166 {
167 # if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC)
168 GC_key_t k = GC_thread_key;
169 170 if (UNLIKELY(0 == k)) {
171 /*
172 * We have not yet run `GC_init_parallel()`. That means we also
173 * are not locking, so `GC_malloc_kind_global()` is fairly cheap.
174 */
175 return NULL;
176 }
177 return GC_getspecific(k);
178 # else
179 if (UNLIKELY(!keys_initialized))
180 return NULL;
181 182 return GC_getspecific(GC_thread_key);
183 # endif
184 }
185 186 GC_API GC_ATTR_MALLOC void *GC_CALL
187 GC_malloc_kind(size_t lb, int kind)
188 {
189 size_t lg;
190 void *tsd;
191 void *result;
192 193 # if MAXOBJKINDS > THREAD_FREELISTS_KINDS
194 if (UNLIKELY(kind >= THREAD_FREELISTS_KINDS))
195 return GC_malloc_kind_global(lb, kind);
196 # endif
197 tsd = GC_get_tlfs();
198 if (UNLIKELY(NULL == tsd))
199 return GC_malloc_kind_global(lb, kind);
200 201 GC_ASSERT(GC_is_initialized);
202 GC_ASSERT(GC_is_thread_tsd_valid(tsd));
203 lg = ALLOC_REQUEST_GRANS(lb);
204 # if defined(CPPCHECK)
205 # define MALLOC_KIND_PTRFREE_INIT (void *)1
206 # else
207 # define MALLOC_KIND_PTRFREE_INIT NULL
208 # endif
209 GC_FAST_MALLOC_GRANS(result, lg, ((GC_tlfs)tsd)->_freelists[kind],
210 DIRECT_GRANULES, kind, GC_malloc_kind_global(lb, kind),
211 (void)(kind == PTRFREE ? MALLOC_KIND_PTRFREE_INIT
212 : (obj_link(result) = 0)));
213 # ifdef LOG_ALLOCS
214 GC_log_printf("GC_malloc_kind(%lu, %d) returned %p, recent GC #%lu\n",
215 (unsigned long)lb, kind, result, (unsigned long)GC_gc_no);
216 # endif
217 return result;
218 }
219 220 # ifdef GC_GCJ_SUPPORT
221 222 # include "gc/gc_gcj.h"
223 224 GC_API GC_ATTR_MALLOC void *GC_CALL
225 GC_gcj_malloc(size_t lb, const void *vtable_ptr)
226 {
227 void *result;
228 void **tiny_fl;
229 size_t lg;
230 231 /*
232 * Unlike the other thread-local allocation calls, we assume that the
233 * collector has been explicitly initialized.
234 */
235 GC_ASSERT(GC_gcjobjfreelist != NULL);
236 # if defined(USE_PTHREAD_SPECIFIC) || defined(USE_WIN32_SPECIFIC)
237 GC_ASSERT(keys_initialized);
238 # else
239 GC_ASSERT(GC_thread_key != 0);
240 # endif
241 242 /*
243 * `gcj`-style allocation without locks is extremely tricky.
244 * The fundamental issue is that we may end up marking a free list,
245 * which has free-list links instead of "vtable" pointers.
246 * That is usually OK, since the next object on the free list will be
247 * cleared, and will thus be interpreted as containing a zero descriptor.
248 * That is fine if the object has not yet been initialized. But there
249 * are interesting potential races. In the case of incremental
250 * collection, this seems hopeless, since the marker may run
251 * asynchronously, and may pick up the pointer to the next free-list
252 * entry (which it thinks is a "vtable" pointer), get suspended for
253 * a while, and then see an allocated object instead of the "vtable".
254 * This may be avoidable with either a handshake with the collector or,
255 * probably more easily, by moving the free list links to the second
256 * "pointer-sized" word of each object. The latter is not a universal
257 * win, since on architecture like Itanium, nonzero offsets are not
258 * necessarily free. And there may be cache fill order issues.
259 * For now, we punt with the incremental collection. This probably means
260 * that the incremental collection should be enabled before we create
261 * a second thread.
262 */
263 if (UNLIKELY(GC_incremental))
264 return GC_core_gcj_malloc(lb, vtable_ptr, 0 /* `flags` */);
265 266 tiny_fl = ((GC_tlfs)GC_getspecific(GC_thread_key))->gcj_freelists;
267 lg = ALLOC_REQUEST_GRANS(lb);
268 269 /*
270 * The provided `default_expr` below forces the initialization of the
271 * "vtable" pointer. This is necessary to ensure some very subtle
272 * properties required if a garbage collection is run in the middle of
273 * such an allocation. Here we implicitly also assume atomicity for the
274 * free list and method pointer assignments. We must update the free list
275 * before we store the pointer. Otherwise a collection at this point
276 * would see a corrupted free list. A real memory barrier is not needed,
277 * since the action of stopping this thread will cause prior writes
278 * to complete. We assert that any concurrent marker will stop us.
279 * Thus it is impossible for a mark procedure to see the allocation of the
280 * next object, but to see this object still containing a free-list pointer.
281 * Otherwise the marker, by misinterpreting the free-list link as a "vtable"
282 * pointer, might find a random "mark descriptor" in the next object.
283 */
284 GC_FAST_MALLOC_GRANS(
285 result, lg, tiny_fl, DIRECT_GRANULES, GC_gcj_kind,
286 GC_core_gcj_malloc(lb, vtable_ptr, 0 /* `flags` */), do {
287 AO_compiler_barrier();
288 *(const void **)result = vtable_ptr;
289 } while (0));
290 return result;
291 }
292 293 # endif /* GC_GCJ_SUPPORT */
294 295 GC_INNER void
296 GC_mark_thread_local_fls_for(GC_tlfs p)
297 {
298 ptr_t q;
299 int kind, j;
300 301 for (j = 0; j < GC_TINY_FREELISTS; ++j) {
302 for (kind = 0; kind < THREAD_FREELISTS_KINDS; ++kind) {
303 /*
304 * Load the pointer atomically as it might be updated concurrently
305 * by `GC_FAST_MALLOC_GRANS()`.
306 */
307 q = GC_cptr_load((volatile ptr_t *)&p->_freelists[kind][j]);
308 if (ADDR(q) > HBLKSIZE)
309 GC_set_fl_marks(q);
310 }
311 # ifdef GC_GCJ_SUPPORT
312 if (LIKELY(j > 0)) {
313 q = GC_cptr_load((volatile ptr_t *)&p->gcj_freelists[j]);
314 if (ADDR(q) > HBLKSIZE)
315 GC_set_fl_marks(q);
316 }
317 # endif
318 }
319 }
320 321 # if defined(GC_ASSERTIONS)
322 /* Check that all thread-local free-lists in `p` are completely marked. */
323 void
324 GC_check_tls_for(GC_tlfs p)
325 {
326 int kind, j;
327 328 for (j = 1; j < GC_TINY_FREELISTS; ++j) {
329 for (kind = 0; kind < THREAD_FREELISTS_KINDS; ++kind) {
330 GC_check_fl_marks(&p->_freelists[kind][j]);
331 }
332 # ifdef GC_GCJ_SUPPORT
333 GC_check_fl_marks(&p->gcj_freelists[j]);
334 # endif
335 }
336 }
337 # endif
338 339 #endif /* THREAD_LOCAL_ALLOC */
340