Skip to content

Commit acf3bcc

Browse files
authored
gh-112532: Use separate mimalloc heaps for GC objects (gh-113263)
* gh-112532: Use separate mimalloc heaps for GC objects In `--disable-gil` builds, we now use four separate heaps in anticipation of using mimalloc to find GC objects when the GIL is disabled. To support this, we also make a few changes to mimalloc: * `mi_heap_t` and `mi_tld_t` initialization is split from allocation. This allows us to have a `mi_tld_t` per-`PyThreadState`, which is important to keep interpreter isolation, since the same OS thread may run in multiple interpreters (using different PyThreadStates.) * Heap abandoning (mi_heap_collect_ex) can now be called from a different thread than the one that created the heap. This is necessary because we may clear and delete the containing PyThreadStates from a different thread during finalization and after fork(). * Use enum instead of defines and guard mimalloc includes. * The enum typedef will be convenient for future PRs that use the type. * Guarding the mimalloc includes allows us to unconditionally include pycore_mimalloc.h from other header files that rely on things like `struct _mimalloc_thread_state`. * Only define _mimalloc_thread_state in Py_GIL_DISABLED builds
1 parent 8f5b998 commit acf3bcc

File tree

9 files changed

+161
-23
lines changed

9 files changed

+161
-23
lines changed

Include/internal/mimalloc/mimalloc/internal.h

+2
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
8585
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
8686
void _mi_thread_done(mi_heap_t* heap);
8787
void _mi_thread_data_collect(void);
88+
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap);
8889

8990
// os.c
9091
void _mi_os_init(void); // called from process init
@@ -170,6 +171,7 @@ size_t _mi_bin_size(uint8_t bin); // for stats
170171
uint8_t _mi_bin(size_t size); // for stats
171172

172173
// "heap.c"
174+
void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id);
173175
void _mi_heap_destroy_pages(mi_heap_t* heap);
174176
void _mi_heap_collect_abandon(mi_heap_t* heap);
175177
void _mi_heap_set_default_direct(mi_heap_t* heap);

Include/internal/pycore_mimalloc.h

+26
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,37 @@
99
# error "pycore_mimalloc.h must be included before mimalloc.h"
1010
#endif
1111

12+
typedef enum {
13+
_Py_MIMALLOC_HEAP_MEM = 0, // PyMem_Malloc() and friends
14+
_Py_MIMALLOC_HEAP_OBJECT = 1, // non-GC objects
15+
_Py_MIMALLOC_HEAP_GC = 2, // GC objects without pre-header
16+
_Py_MIMALLOC_HEAP_GC_PRE = 3, // GC objects with pre-header
17+
_Py_MIMALLOC_HEAP_COUNT
18+
} _Py_mimalloc_heap_id;
19+
1220
#include "pycore_pymem.h"
21+
22+
#ifdef WITH_MIMALLOC
1323
#define MI_DEBUG_UNINIT PYMEM_CLEANBYTE
1424
#define MI_DEBUG_FREED PYMEM_DEADBYTE
1525
#define MI_DEBUG_PADDING PYMEM_FORBIDDENBYTE
26+
#ifdef Py_DEBUG
27+
# define MI_DEBUG 1
28+
#else
29+
# define MI_DEBUG 0
30+
#endif
1631

1732
#include "mimalloc.h"
33+
#include "mimalloc/types.h"
34+
#include "mimalloc/internal.h"
35+
#endif
36+
37+
#ifdef Py_GIL_DISABLED
38+
struct _mimalloc_thread_state {
39+
mi_heap_t *current_object_heap;
40+
mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT];
41+
mi_tld_t tld;
42+
};
43+
#endif
1844

1945
#endif // Py_INTERNAL_MIMALLOC_H

Include/internal/pycore_pystate.h

+1
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ extern PyThreadState * _PyThreadState_New(
187187
int whence);
188188
extern void _PyThreadState_Bind(PyThreadState *tstate);
189189
extern void _PyThreadState_DeleteExcept(PyThreadState *tstate);
190+
extern void _PyThreadState_ClearMimallocHeaps(PyThreadState *tstate);
190191

191192
// Export for '_testinternalcapi' shared extension
192193
PyAPI_FUNC(PyObject*) _PyThreadState_GetDict(PyThreadState *tstate);

Include/internal/pycore_tstate.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ extern "C" {
88
# error "this header requires Py_BUILD_CORE define"
99
#endif
1010

11+
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
12+
1113

1214
// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The
1315
// PyThreadState fields are exposed as part of the C API, although most fields
@@ -16,7 +18,10 @@ typedef struct _PyThreadStateImpl {
1618
// semi-public fields are in PyThreadState.
1719
PyThreadState base;
1820

19-
// TODO: add private fields here
21+
#ifdef Py_GIL_DISABLED
22+
struct _mimalloc_thread_state mimalloc;
23+
#endif
24+
2025
} _PyThreadStateImpl;
2126

2227

Objects/mimalloc/heap.c

+21-8
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,17 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
123123
const bool force = collect >= MI_FORCE;
124124
_mi_deferred_free(heap, force);
125125

126+
// gh-112532: we may be called from a thread that is not the owner of the heap
127+
bool is_main_thread = _mi_is_main_thread() && heap->thread_id == _mi_thread_id();
128+
126129
// note: never reclaim on collect but leave it to threads that need storage to reclaim
127130
const bool force_main =
128131
#ifdef NDEBUG
129132
collect == MI_FORCE
130133
#else
131134
collect >= MI_FORCE
132135
#endif
133-
&& _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim;
136+
&& is_main_thread && mi_heap_is_backing(heap) && !heap->no_reclaim;
134137

135138
if (force_main) {
136139
// the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
@@ -164,7 +167,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
164167
}
165168

166169
// collect regions on program-exit (or shared library unload)
167-
if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
170+
if (force && is_main_thread && mi_heap_is_backing(heap)) {
168171
_mi_thread_data_collect(); // collect thread data cache
169172
_mi_arena_collect(true /* force purge */, &heap->tld->stats);
170173
}
@@ -206,18 +209,28 @@ mi_heap_t* mi_heap_get_backing(void) {
206209
return bheap;
207210
}
208211

209-
mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
210-
mi_heap_t* bheap = mi_heap_get_backing();
211-
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
212-
if (heap == NULL) return NULL;
212+
void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id)
213+
{
213214
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
214-
heap->tld = bheap->tld;
215+
heap->tld = tld;
215216
heap->thread_id = _mi_thread_id();
216217
heap->arena_id = arena_id;
217-
_mi_random_split(&bheap->random, &heap->random);
218+
if (heap == tld->heap_backing) {
219+
_mi_random_init(&heap->random);
220+
}
221+
else {
222+
_mi_random_split(&tld->heap_backing->random, &heap->random);
223+
}
218224
heap->cookie = _mi_heap_random_next(heap) | 1;
219225
heap->keys[0] = _mi_heap_random_next(heap);
220226
heap->keys[1] = _mi_heap_random_next(heap);
227+
}
228+
229+
mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
230+
mi_heap_t* bheap = mi_heap_get_backing();
231+
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
232+
if (heap == NULL) return NULL;
233+
_mi_heap_init_ex(heap, bheap->tld, arena_id);
221234
heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe
222235
// push on the thread local heaps list
223236
heap->next = heap->tld->heaps;

Objects/mimalloc/init.c

+10-14
Original file line numberDiff line numberDiff line change
@@ -297,24 +297,20 @@ static bool _mi_heap_init(void) {
297297
mi_thread_data_t* td = mi_thread_data_zalloc();
298298
if (td == NULL) return false;
299299

300-
mi_tld_t* tld = &td->tld;
301-
mi_heap_t* heap = &td->heap;
300+
_mi_tld_init(&td->tld, &td->heap);
301+
_mi_heap_init_ex(&td->heap, &td->tld, _mi_arena_id_none());
302+
_mi_heap_set_default_direct(&td->heap);
303+
}
304+
return false;
305+
}
306+
307+
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
302308
_mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld));
303-
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap));
304-
heap->thread_id = _mi_thread_id();
305-
_mi_random_init(&heap->random);
306-
heap->cookie = _mi_heap_random_next(heap) | 1;
307-
heap->keys[0] = _mi_heap_random_next(heap);
308-
heap->keys[1] = _mi_heap_random_next(heap);
309-
heap->tld = tld;
310-
tld->heap_backing = heap;
311-
tld->heaps = heap;
312309
tld->segments.stats = &tld->stats;
313310
tld->segments.os = &tld->os;
314311
tld->os.stats = &tld->stats;
315-
_mi_heap_set_default_direct(heap);
316-
}
317-
return false;
312+
tld->heap_backing = bheap;
313+
tld->heaps = bheap;
318314
}
319315

320316
// Free the thread local default heap (called from `mi_thread_done`)

Objects/obmalloc.c

+36
Original file line numberDiff line numberDiff line change
@@ -88,19 +88,37 @@ _PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
8888
void *
8989
_PyMem_MiMalloc(void *ctx, size_t size)
9090
{
91+
#ifdef Py_GIL_DISABLED
92+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
93+
mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
94+
return mi_heap_malloc(heap, size);
95+
#else
9196
return mi_malloc(size);
97+
#endif
9298
}
9399

94100
void *
95101
_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
96102
{
103+
#ifdef Py_GIL_DISABLED
104+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
105+
mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
106+
return mi_heap_calloc(heap, nelem, elsize);
107+
#else
97108
return mi_calloc(nelem, elsize);
109+
#endif
98110
}
99111

100112
void *
101113
_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
102114
{
115+
#ifdef Py_GIL_DISABLED
116+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
117+
mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
118+
return mi_heap_realloc(heap, ptr, size);
119+
#else
103120
return mi_realloc(ptr, size);
121+
#endif
104122
}
105123

106124
void
@@ -112,20 +130,38 @@ _PyMem_MiFree(void *ctx, void *ptr)
112130
void *
113131
_PyObject_MiMalloc(void *ctx, size_t nbytes)
114132
{
133+
#ifdef Py_GIL_DISABLED
134+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
135+
mi_heap_t *heap = tstate->mimalloc.current_object_heap;
136+
return mi_heap_malloc(heap, nbytes);
137+
#else
115138
return mi_malloc(nbytes);
139+
#endif
116140
}
117141

118142
void *
119143
_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
120144
{
145+
#ifdef Py_GIL_DISABLED
146+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
147+
mi_heap_t *heap = tstate->mimalloc.current_object_heap;
148+
return mi_heap_calloc(heap, nelem, elsize);
149+
#else
121150
return mi_calloc(nelem, elsize);
151+
#endif
122152
}
123153

124154

125155
void *
126156
_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
127157
{
158+
#ifdef Py_GIL_DISABLED
159+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
160+
mi_heap_t *heap = tstate->mimalloc.current_object_heap;
161+
return mi_heap_realloc(heap, ptr, nbytes);
162+
#else
128163
return mi_realloc(ptr, nbytes);
164+
#endif
129165
}
130166

131167
void

Python/pylifecycle.c

+4
Original file line numberDiff line numberDiff line change
@@ -1794,6 +1794,10 @@ finalize_interp_clear(PyThreadState *tstate)
17941794
}
17951795

17961796
finalize_interp_types(tstate->interp);
1797+
1798+
/* finalize_interp_types may allocate Python objects so we may need to
1799+
abandon mimalloc segments again */
1800+
_PyThreadState_ClearMimallocHeaps(tstate);
17971801
}
17981802

17991803

Python/pystate.c

+55
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,8 @@ tstate_is_bound(PyThreadState *tstate)
236236
static void bind_gilstate_tstate(PyThreadState *);
237237
static void unbind_gilstate_tstate(PyThreadState *);
238238

239+
static void tstate_mimalloc_bind(PyThreadState *);
240+
239241
static void
240242
bind_tstate(PyThreadState *tstate)
241243
{
@@ -256,6 +258,9 @@ bind_tstate(PyThreadState *tstate)
256258
tstate->native_thread_id = PyThread_get_thread_native_id();
257259
#endif
258260

261+
// mimalloc state needs to be initialized from the active thread.
262+
tstate_mimalloc_bind(tstate);
263+
259264
tstate->_status.bound = 1;
260265
}
261266

@@ -1533,6 +1538,8 @@ PyThreadState_Clear(PyThreadState *tstate)
15331538
tstate->on_delete(tstate->on_delete_data);
15341539
}
15351540

1541+
_PyThreadState_ClearMimallocHeaps(tstate);
1542+
15361543
tstate->_status.cleared = 1;
15371544

15381545
// XXX Call _PyThreadStateSwap(runtime, NULL) here if "current".
@@ -2509,3 +2516,51 @@ _PyThreadState_MustExit(PyThreadState *tstate)
25092516
}
25102517
return 1;
25112518
}
2519+
2520+
/********************/
2521+
/* mimalloc support */
2522+
/********************/
2523+
2524+
static void
2525+
tstate_mimalloc_bind(PyThreadState *tstate)
2526+
{
2527+
#ifdef Py_GIL_DISABLED
2528+
struct _mimalloc_thread_state *mts = &((_PyThreadStateImpl*)tstate)->mimalloc;
2529+
2530+
// Initialize the mimalloc thread state. This must be called from the
2531+
// same thread that will use the thread state. The "mem" heap doubles as
2532+
// the "backing" heap.
2533+
mi_tld_t *tld = &mts->tld;
2534+
_mi_tld_init(tld, &mts->heaps[_Py_MIMALLOC_HEAP_MEM]);
2535+
2536+
// Initialize each heap
2537+
for (Py_ssize_t i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
2538+
_mi_heap_init_ex(&mts->heaps[i], tld, _mi_arena_id_none());
2539+
}
2540+
2541+
// By default, object allocations use _Py_MIMALLOC_HEAP_OBJECT.
2542+
// _PyObject_GC_New() and similar functions temporarily override this to
2543+
// use one of the GC heaps.
2544+
mts->current_object_heap = &mts->heaps[_Py_MIMALLOC_HEAP_OBJECT];
2545+
#endif
2546+
}
2547+
2548+
void
2549+
_PyThreadState_ClearMimallocHeaps(PyThreadState *tstate)
2550+
{
2551+
#ifdef Py_GIL_DISABLED
2552+
if (!tstate->_status.bound) {
2553+
// The mimalloc heaps are only initialized when the thread is bound.
2554+
return;
2555+
}
2556+
2557+
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
2558+
for (Py_ssize_t i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
2559+
// Abandon all segments in use by this thread. This pushes them to
2560+
// a shared pool to later be reclaimed by other threads. It's important
2561+
// to do this before the thread state is destroyed so that objects
2562+
// remain visible to the GC.
2563+
_mi_heap_collect_abandon(&tstate_impl->mimalloc.heaps[i]);
2564+
}
2565+
#endif
2566+
}

0 commit comments

Comments
 (0)