Skip to content

Commit fe6f15f

Browse files
colesburyaisk
authored andcommittedFeb 11, 2024
pythongh-112532: Improve mimalloc page visiting (python#114133)
This adds support for visiting abandoned pages in mimalloc and improves the performance of the page visiting code. Abandoned pages contain memory blocks from threads that have exited. At some point, they may be later reclaimed by other threads. We still need to visit those pages in the free-threaded GC because they contain live objects. This also reduces the overhead of visiting mimalloc pages: * Special cases for full, empty, and pages containing only a single block. * Fix free_map to use one bit instead of one byte per block. * Use fast integer division by a constant algorithm when computing block offset from block size and index.
1 parent d1e6fe4 commit fe6f15f

File tree

3 files changed

+134
-34
lines changed

3 files changed

+134
-34
lines changed
 

‎Include/internal/mimalloc/mimalloc/internal.h

+4
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t*
120120
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
121121
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
122122
void _mi_segment_thread_collect(mi_segments_tld_t* tld);
123+
bool _mi_abandoned_pool_visit_blocks(mi_abandoned_pool_t* pool, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
124+
123125

124126
#if MI_HUGE_PAGE_ABANDON
125127
void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
@@ -161,6 +163,8 @@ void _mi_heap_collect_abandon(mi_heap_t* heap);
161163
void _mi_heap_set_default_direct(mi_heap_t* heap);
162164
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid);
163165
void _mi_heap_unsafe_destroy_all(void);
166+
void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page);
167+
bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t *page, mi_block_visit_fun* visitor, void* arg);
164168

165169
// "stats.c"
166170
void _mi_stats_done(mi_stats_t* stats);

‎Objects/mimalloc/heap.c

+80-34
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ typedef bool (heap_page_visitor_fun)(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
2626
// Visit all pages in a heap; returns `false` if break was called.
2727
static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2)
2828
{
29-
if (heap==NULL || heap->page_count==0) return 0;
29+
if (heap==NULL || heap->page_count==0) return true;
3030

3131
// visit all pages
3232
#if MI_DEBUG>1
@@ -521,11 +521,20 @@ typedef struct mi_heap_area_ex_s {
521521
mi_page_t* page;
522522
} mi_heap_area_ex_t;
523523

524-
static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) {
525-
mi_assert(xarea != NULL);
526-
if (xarea==NULL) return true;
527-
const mi_heap_area_t* area = &xarea->area;
528-
mi_page_t* page = xarea->page;
524+
static void mi_fast_divisor(size_t divisor, size_t* magic, size_t* shift) {
525+
mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX);
526+
*shift = MI_INTPTR_BITS - mi_clz(divisor - 1);
527+
*magic = (size_t)(((1ULL << 32) * ((1ULL << *shift) - divisor)) / divisor + 1);
528+
}
529+
530+
static size_t mi_fast_divide(size_t n, size_t magic, size_t shift) {
531+
mi_assert_internal(n <= UINT32_MAX);
532+
return ((((uint64_t) n * magic) >> 32) + n) >> shift;
533+
}
534+
535+
bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t *page, mi_block_visit_fun* visitor, void* arg) {
536+
mi_assert(area != NULL);
537+
if (area==NULL) return true;
529538
mi_assert(page != NULL);
530539
if (page == NULL) return true;
531540

@@ -537,17 +546,39 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
537546
const size_t ubsize = mi_page_usable_block_size(page); // without padding
538547
size_t psize;
539548
uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
549+
mi_heap_t* heap = mi_page_heap(page);
540550

541551
if (page->capacity == 1) {
542552
// optimize page with one block
543553
mi_assert_internal(page->used == 1 && page->free == NULL);
544-
return visitor(mi_page_heap(page), area, pstart, ubsize, arg);
554+
return visitor(heap, area, pstart, ubsize, arg);
555+
}
556+
557+
if (page->used == page->capacity) {
558+
// optimize full pages
559+
uint8_t* block = pstart;
560+
for (size_t i = 0; i < page->capacity; i++) {
561+
if (!visitor(heap, area, block, ubsize, arg)) return false;
562+
block += bsize;
563+
}
564+
return true;
545565
}
546566

547567
// create a bitmap of free blocks.
548568
#define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*))
549-
uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)];
550-
memset(free_map, 0, sizeof(free_map));
569+
uintptr_t free_map[MI_MAX_BLOCKS / MI_INTPTR_BITS];
570+
size_t bmapsize = (page->capacity + MI_INTPTR_BITS - 1) / MI_INTPTR_BITS;
571+
memset(free_map, 0, bmapsize * sizeof(uintptr_t));
572+
573+
if (page->capacity % MI_INTPTR_BITS != 0) {
574+
size_t shift = (page->capacity % MI_INTPTR_BITS);
575+
uintptr_t mask = (UINTPTR_MAX << shift);
576+
free_map[bmapsize-1] = mask;
577+
}
578+
579+
// fast repeated division by the block size
580+
size_t magic, shift;
581+
mi_fast_divisor(bsize, &magic, &shift);
551582

552583
#if MI_DEBUG>1
553584
size_t free_count = 0;
@@ -559,10 +590,11 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
559590
mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
560591
size_t offset = (uint8_t*)block - pstart;
561592
mi_assert_internal(offset % bsize == 0);
562-
size_t blockidx = offset / bsize; // Todo: avoid division?
563-
mi_assert_internal( blockidx < MI_MAX_BLOCKS);
564-
size_t bitidx = (blockidx / sizeof(uintptr_t));
565-
size_t bit = blockidx - (bitidx * sizeof(uintptr_t));
593+
size_t blockidx = mi_fast_divide(offset, magic, shift);
594+
mi_assert_internal(blockidx == offset / bsize);
595+
mi_assert_internal(blockidx < MI_MAX_BLOCKS);
596+
size_t bitidx = (blockidx / MI_INTPTR_BITS);
597+
size_t bit = blockidx - (bitidx * MI_INTPTR_BITS);
566598
free_map[bitidx] |= ((uintptr_t)1 << bit);
567599
}
568600
mi_assert_internal(page->capacity == (free_count + page->used));
@@ -571,19 +603,29 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
571603
#if MI_DEBUG>1
572604
size_t used_count = 0;
573605
#endif
574-
for (size_t i = 0; i < page->capacity; i++) {
575-
size_t bitidx = (i / sizeof(uintptr_t));
576-
size_t bit = i - (bitidx * sizeof(uintptr_t));
577-
uintptr_t m = free_map[bitidx];
578-
if (bit == 0 && m == UINTPTR_MAX) {
579-
i += (sizeof(uintptr_t) - 1); // skip a run of free blocks
606+
uint8_t* block = pstart;
607+
for (size_t i = 0; i < bmapsize; i++) {
608+
if (free_map[i] == 0) {
609+
// every block is in use
610+
for (size_t j = 0; j < MI_INTPTR_BITS; j++) {
611+
#if MI_DEBUG>1
612+
used_count++;
613+
#endif
614+
if (!visitor(heap, area, block, ubsize, arg)) return false;
615+
block += bsize;
616+
}
580617
}
581-
else if ((m & ((uintptr_t)1 << bit)) == 0) {
582-
#if MI_DEBUG>1
583-
used_count++;
584-
#endif
585-
uint8_t* block = pstart + (i * bsize);
586-
if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false;
618+
else {
619+
uintptr_t m = ~free_map[i];
620+
while (m) {
621+
#if MI_DEBUG>1
622+
used_count++;
623+
#endif
624+
size_t bitidx = mi_ctz(m);
625+
if (!visitor(heap, area, block + (bitidx * bsize), ubsize, arg)) return false;
626+
m &= m - 1;
627+
}
628+
block += bsize * MI_INTPTR_BITS;
587629
}
588630
}
589631
mi_assert_internal(page->used == used_count);
@@ -592,21 +634,24 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
592634

593635
typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg);
594636

637+
void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) {
638+
const size_t bsize = mi_page_block_size(page);
639+
const size_t ubsize = mi_page_usable_block_size(page);
640+
area->reserved = page->reserved * bsize;
641+
area->committed = page->capacity * bsize;
642+
area->blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
643+
area->used = page->used; // number of blocks in use (#553)
644+
area->block_size = ubsize;
645+
area->full_block_size = bsize;
646+
}
595647

596648
static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) {
597649
MI_UNUSED(heap);
598650
MI_UNUSED(pq);
599651
mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
600652
mi_heap_area_ex_t xarea;
601-
const size_t bsize = mi_page_block_size(page);
602-
const size_t ubsize = mi_page_usable_block_size(page);
603653
xarea.page = page;
604-
xarea.area.reserved = page->reserved * bsize;
605-
xarea.area.committed = page->capacity * bsize;
606-
xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
607-
xarea.area.used = page->used; // number of blocks in use (#553)
608-
xarea.area.block_size = ubsize;
609-
xarea.area.full_block_size = bsize;
654+
_mi_heap_area_init(&xarea.area, page);
610655
return fun(heap, &xarea, arg);
611656
}
612657

@@ -627,7 +672,7 @@ static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t*
627672
mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg;
628673
if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false;
629674
if (args->visit_blocks) {
630-
return mi_heap_area_visit_blocks(xarea, args->visitor, args->arg);
675+
return _mi_heap_area_visit_blocks(&xarea->area, xarea->page, args->visitor, args->arg);
631676
}
632677
else {
633678
return true;
@@ -637,5 +682,6 @@ static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t*
637682
// Visit all blocks in a heap
638683
bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
639684
mi_visit_blocks_args_t args = { visit_blocks, visitor, arg };
685+
_mi_heap_delayed_free_partial((mi_heap_t *)heap);
640686
return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args);
641687
}

‎Objects/mimalloc/segment.c

+50
Original file line numberDiff line numberDiff line change
@@ -1614,3 +1614,53 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
16141614
mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
16151615
return page;
16161616
}
1617+
1618+
/* -----------------------------------------------------------
1619+
Visit blocks in abandoned segments
1620+
----------------------------------------------------------- */
1621+
1622+
static bool mi_segment_visit_page(mi_segment_t* segment, mi_page_t* page, bool visit_blocks, mi_block_visit_fun* visitor, void* arg)
1623+
{
1624+
mi_heap_area_t area;
1625+
_mi_heap_area_init(&area, page);
1626+
if (!visitor(NULL, &area, NULL, area.block_size, arg)) return false;
1627+
if (visit_blocks) {
1628+
return _mi_heap_area_visit_blocks(&area, page, visitor, arg);
1629+
}
1630+
else {
1631+
return true;
1632+
}
1633+
}
1634+
1635+
static bool mi_segment_visit_pages(mi_segment_t* segment, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
1636+
const mi_slice_t* end;
1637+
mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
1638+
while (slice < end) {
1639+
if (mi_slice_is_used(slice)) {
1640+
mi_page_t* const page = mi_slice_to_page(slice);
1641+
if (page->tag == page_tag) {
1642+
if (!mi_segment_visit_page(segment, page, visit_blocks, visitor, arg)) return false;
1643+
}
1644+
}
1645+
slice = slice + slice->slice_count;
1646+
}
1647+
return true;
1648+
}
1649+
1650+
// Visit all blocks in a abandoned segments
1651+
bool _mi_abandoned_pool_visit_blocks(mi_abandoned_pool_t* pool, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
1652+
// Note: this is not safe in any other thread is abandoning or claiming segments from the pool
1653+
mi_segment_t* segment = mi_tagged_segment_ptr(pool->abandoned);
1654+
while (segment != NULL) {
1655+
if (!mi_segment_visit_pages(segment, page_tag, visit_blocks, visitor, arg)) return false;
1656+
segment = segment->abandoned_next;
1657+
}
1658+
1659+
segment = pool->abandoned_visited;
1660+
while (segment != NULL) {
1661+
if (!mi_segment_visit_pages(segment, page_tag, visit_blocks, visitor, arg)) return false;
1662+
segment = segment->abandoned_next;
1663+
}
1664+
1665+
return true;
1666+
}

0 commit comments

Comments
 (0)