Skip to content

Commit 572e191

Browse files
committed
pythongh-112532: Use separate mimalloc heaps for GC objects
In `--disable-gil` builds, we now use four separate heaps in anticipation of using mimalloc to find GC objects when the GIL is disabled. To support this, we also make a few changes to mimalloc: * Heap and mi_tld_t initialization is split from allocation. This allows us to have a per-PyThreadState mi_tld_t, which is important to keep interpreter isolation, since the same OS thread may run in multiple interpreters (using different PyThreadStates.) * The pool of abandoned segments is refactored into its own struct. This allows us to use different pools for different interpreters so that we can preserve interpreter isolation. * Heap abandoning (mi_heap_collect_ex) can now be called from a different thread than the one that created the heap. This is necessary because we may clear and delete the containing PyThreadStates from a different thread during finalization and after fork().
1 parent 498a096 commit 572e191

File tree

11 files changed

+263
-99
lines changed

11 files changed

+263
-99
lines changed

Include/internal/mimalloc/mimalloc/internal.h

+6-18
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,6 @@ terms of the MIT license. A copy of the license can be found in the file
2323
#define mi_trace_message(...)
2424
#endif
2525

26-
#define MI_CACHE_LINE 64
27-
#if defined(_MSC_VER)
28-
#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
29-
#pragma warning(disable:26812) // unscoped enum warning
30-
#define mi_decl_noinline __declspec(noinline)
31-
#define mi_decl_thread __declspec(thread)
32-
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
33-
#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
34-
#define mi_decl_noinline __attribute__((noinline))
35-
#define mi_decl_thread __thread
36-
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
37-
#else
38-
#define mi_decl_noinline
39-
#define mi_decl_thread __thread // hope for the best :-)
40-
#define mi_decl_cache_align
41-
#endif
42-
4326
#if defined(__EMSCRIPTEN__) && !defined(__wasi__)
4427
#define __wasi__
4528
#endif
@@ -85,6 +68,8 @@ mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
8568
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
8669
void _mi_thread_done(mi_heap_t* heap);
8770
void _mi_thread_data_collect(void);
71+
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap);
72+
8873

8974
// os.c
9075
void _mi_os_init(void); // called from process init
@@ -130,6 +115,7 @@ void _mi_segment_map_allocated_at(const mi_segment_t* segment);
130115
void _mi_segment_map_freed_at(const mi_segment_t* segment);
131116

132117
// "segment.c"
118+
extern mi_abandoned_pool_t _mi_abandoned_default; // global abandoned pool
133119
mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
134120
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
135121
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
@@ -144,7 +130,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, m
144130

145131
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
146132
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
147-
void _mi_abandoned_await_readers(void);
133+
void _mi_abandoned_await_readers(mi_abandoned_pool_t *pool);
148134
void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);
149135

150136
// "page.c"
@@ -170,8 +156,10 @@ size_t _mi_bin_size(uint8_t bin); // for stats
170156
uint8_t _mi_bin(size_t size); // for stats
171157

172158
// "heap.c"
159+
void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id);
173160
void _mi_heap_destroy_pages(mi_heap_t* heap);
174161
void _mi_heap_collect_abandon(mi_heap_t* heap);
162+
void _mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from);
175163
void _mi_heap_set_default_direct(mi_heap_t* heap);
176164
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid);
177165
void _mi_heap_unsafe_destroy_all(void);

Include/internal/mimalloc/mimalloc/types.h

+40
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,23 @@ terms of the MIT license. A copy of the license can be found in the file
3333
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
3434
#endif
3535

36+
#define MI_CACHE_LINE 64
37+
#if defined(_MSC_VER)
38+
#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
39+
#pragma warning(disable:26812) // unscoped enum warning
40+
#define mi_decl_noinline __declspec(noinline)
41+
#define mi_decl_thread __declspec(thread)
42+
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
43+
#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
44+
#define mi_decl_noinline __attribute__((noinline))
45+
#define mi_decl_thread __thread
46+
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
47+
#else
48+
#define mi_decl_noinline
49+
#define mi_decl_thread __thread // hope for the best :-)
50+
#define mi_decl_cache_align
51+
#endif
52+
3653
// ------------------------------------------------------
3754
// Variants
3855
// ------------------------------------------------------
@@ -445,6 +462,28 @@ typedef struct mi_segment_s {
445462
mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment
446463
} mi_segment_t;
447464

465+
typedef uintptr_t mi_tagged_segment_t;
466+
467+
// Segments unowned by any thread are put in a shared pool
468+
typedef struct mi_abandoned_pool_s {
469+
// This is a list of visited abandoned pages that were full at the time.
470+
// this list migrates to `abandoned` when that becomes NULL. The use of
471+
// this list reduces contention and the rate at which segments are visited.
472+
mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL
473+
474+
// The abandoned page list (tagged as it supports pop)
475+
mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL
476+
477+
// Maintain these for debug purposes (these counts may be a bit off)
478+
mi_decl_cache_align _Atomic(size_t) abandoned_count;
479+
mi_decl_cache_align _Atomic(size_t) abandoned_visited_count;
480+
481+
// We also maintain a count of current readers of the abandoned list
482+
// in order to prevent resetting/decommitting segment memory if it might
483+
// still be read.
484+
mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0
485+
} mi_abandoned_pool_t;
486+
448487

449488
// ------------------------------------------------------
450489
// Heaps
@@ -654,6 +693,7 @@ typedef struct mi_segments_tld_s {
654693
size_t peak_size; // peak size of all segments
655694
mi_stats_t* stats; // points to tld stats
656695
mi_os_tld_t* os; // points to os stats
696+
mi_abandoned_pool_t* abandoned; // pool of abandoned segments
657697
} mi_segments_tld_t;
658698

659699
// Thread local data

Include/internal/pycore_interp.h

+5
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ extern "C" {
2727
#include "pycore_import.h" // struct _import_state
2828
#include "pycore_instruments.h" // _PY_MONITORING_EVENTS
2929
#include "pycore_list.h" // struct _Py_list_state
30+
#include "pycore_mimalloc.h" // struct _mimalloc_interp_state
3031
#include "pycore_object_state.h" // struct _py_object_state
3132
#include "pycore_obmalloc.h" // struct _obmalloc_state
3233
#include "pycore_tstate.h" // _PyThreadStateImpl
@@ -166,6 +167,10 @@ struct _is {
166167
struct _warnings_runtime_state warnings;
167168
struct atexit_state atexit;
168169

170+
#if defined(Py_GIL_DISABLED)
171+
struct _mimalloc_interp_state mimalloc;
172+
#endif
173+
169174
struct _obmalloc_state obmalloc;
170175

171176
PyObject *audit_hooks;

Include/internal/pycore_mimalloc.h

+33
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,44 @@
99
# error "pycore_mimalloc.h must be included before mimalloc.h"
1010
#endif
1111

12+
#define _Py_MIMALLOC_HEAP_MEM 0 // PyMem_Malloc() and friends
13+
#define _Py_MIMALLOC_HEAP_OBJECT 1 // non-GC objects
14+
#define _Py_MIMALLOC_HEAP_GC 2 // GC objects without pre-header
15+
#define _Py_MIMALLOC_HEAP_GC_PRE 3 // GC objects with pre-header
16+
#define _Py_MIMALLOC_HEAP_COUNT 4
17+
1218
#include "pycore_pymem.h"
1319
#define MI_DEBUG_UNINIT PYMEM_CLEANBYTE
1420
#define MI_DEBUG_FREED PYMEM_DEADBYTE
1521
#define MI_DEBUG_PADDING PYMEM_FORBIDDENBYTE
22+
#ifdef Py_DEBUG
23+
# define MI_DEBUG 1
24+
#else
25+
# define MI_DEBUG 0
26+
#endif
1627

1728
#include "mimalloc.h"
29+
#include "mimalloc/types.h"
30+
#include "mimalloc/internal.h"
31+
32+
struct _mimalloc_interp_state {
33+
#ifdef Py_GIL_DISABLED
34+
// When exiting, threads place any segments with live blocks in this
35+
// shared pool for other threads to claim and reuse.
36+
mi_abandoned_pool_t abandoned_pool;
37+
#else
38+
char _unused; // empty structs are not allowed
39+
#endif
40+
};
41+
42+
struct _mimalloc_thread_state {
43+
#ifdef Py_GIL_DISABLED
44+
mi_heap_t *current_object_heap;
45+
mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT];
46+
mi_tld_t tld;
47+
#else
48+
char _unused; // empty structs are not allowed
49+
#endif
50+
};
1851

1952
#endif // Py_INTERNAL_MIMALLOC_H

Include/internal/pycore_tstate.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ extern "C" {
88
# error "this header requires Py_BUILD_CORE define"
99
#endif
1010

11+
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
12+
1113

1214
// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The
1315
// PyThreadState fields are exposed as part of the C API, although most fields
@@ -16,7 +18,10 @@ typedef struct _PyThreadStateImpl {
1618
// semi-public fields are in PyThreadState.
1719
PyThreadState base;
1820

19-
// TODO: add private fields here
21+
#ifdef Py_GIL_DISABLED
22+
struct _mimalloc_thread_state mimalloc;
23+
#endif
24+
2025
} _PyThreadStateImpl;
2126

2227

Lib/test/test_import/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
from test.support import os_helper
2727
from test.support import (
2828
STDLIB_DIR, swap_attr, swap_item, cpython_only, is_emscripten,
29-
is_wasi, run_in_subinterp, run_in_subinterp_with_config, Py_TRACE_REFS)
29+
is_wasi, run_in_subinterp, run_in_subinterp_with_config, Py_TRACE_REFS,
30+
Py_GIL_DISABLED)
3031
from test.support.import_helper import (
3132
forget, make_legacy_pyc, unlink, unload, ready_to_import,
3233
DirsOnSysPath, CleanImport)
@@ -2018,6 +2019,7 @@ def parse(cls, text):
20182019
return self
20192020

20202021

2022+
@unittest.skipIf(Py_GIL_DISABLED, "test deallocates objects from a different interpreter")
20212023
@requires_singlephase_init
20222024
class SinglephaseInitTests(unittest.TestCase):
20232025

Objects/mimalloc/heap.c

+18-8
Original file line numberDiff line numberDiff line change
@@ -206,18 +206,28 @@ mi_heap_t* mi_heap_get_backing(void) {
206206
return bheap;
207207
}
208208

209-
mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
210-
mi_heap_t* bheap = mi_heap_get_backing();
211-
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
212-
if (heap == NULL) return NULL;
209+
void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id)
210+
{
213211
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
214-
heap->tld = bheap->tld;
212+
heap->tld = tld;
215213
heap->thread_id = _mi_thread_id();
216214
heap->arena_id = arena_id;
217-
_mi_random_split(&bheap->random, &heap->random);
215+
if (heap == tld->heap_backing) {
216+
_mi_random_init(&heap->random);
217+
}
218+
else {
219+
_mi_random_split(&tld->heap_backing->random, &heap->random);
220+
}
218221
heap->cookie = _mi_heap_random_next(heap) | 1;
219222
heap->keys[0] = _mi_heap_random_next(heap);
220223
heap->keys[1] = _mi_heap_random_next(heap);
224+
}
225+
226+
mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
227+
mi_heap_t* bheap = mi_heap_get_backing();
228+
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
229+
if (heap == NULL) return NULL;
230+
_mi_heap_init_ex(heap, bheap->tld, arena_id);
221231
heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe
222232
// push on the thread local heaps list
223233
heap->next = heap->tld->heaps;
@@ -383,7 +393,7 @@ void _mi_heap_unsafe_destroy_all(void) {
383393
----------------------------------------------------------- */
384394

385395
// Transfer the pages from one heap to the other
386-
static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
396+
void _mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
387397
mi_assert_internal(heap!=NULL);
388398
if (from==NULL || from->page_count == 0) return;
389399

@@ -426,7 +436,7 @@ void mi_heap_delete(mi_heap_t* heap)
426436

427437
if (!mi_heap_is_backing(heap)) {
428438
// tranfer still used pages to the backing heap
429-
mi_heap_absorb(heap->tld->heap_backing, heap);
439+
_mi_heap_absorb(heap->tld->heap_backing, heap);
430440
}
431441
else {
432442
// the backing heap abandons its pages

Objects/mimalloc/init.c

+12-15
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ extern mi_heap_t _mi_heap_main;
148148
static mi_tld_t tld_main = {
149149
0, false,
150150
&_mi_heap_main, & _mi_heap_main,
151-
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments
151+
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os, &_mi_abandoned_default }, // segments
152152
{ 0, &tld_main.stats }, // os
153153
{ MI_STATS_NULL } // stats
154154
};
@@ -297,24 +297,21 @@ static bool _mi_heap_init(void) {
297297
mi_thread_data_t* td = mi_thread_data_zalloc();
298298
if (td == NULL) return false;
299299

300-
mi_tld_t* tld = &td->tld;
301-
mi_heap_t* heap = &td->heap;
300+
_mi_tld_init(&td->tld, &td->heap);
301+
_mi_heap_init_ex(&td->heap, &td->tld, _mi_arena_id_none());
302+
_mi_heap_set_default_direct(&td->heap);
303+
}
304+
return false;
305+
}
306+
307+
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
302308
_mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld));
303-
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap));
304-
heap->thread_id = _mi_thread_id();
305-
_mi_random_init(&heap->random);
306-
heap->cookie = _mi_heap_random_next(heap) | 1;
307-
heap->keys[0] = _mi_heap_random_next(heap);
308-
heap->keys[1] = _mi_heap_random_next(heap);
309-
heap->tld = tld;
310-
tld->heap_backing = heap;
311-
tld->heaps = heap;
312309
tld->segments.stats = &tld->stats;
313310
tld->segments.os = &tld->os;
311+
tld->segments.abandoned = &_mi_abandoned_default;
314312
tld->os.stats = &tld->stats;
315-
_mi_heap_set_default_direct(heap);
316-
}
317-
return false;
313+
tld->heap_backing = bheap;
314+
tld->heaps = bheap;
318315
}
319316

320317
// Free the thread local default heap (called from `mi_thread_done`)

0 commit comments

Comments
 (0)