From 531eb76795d45b369d9c8b1284b6c839174eea05 Mon Sep 17 00:00:00 2001 From: Dan Cross Date: Tue, 23 Apr 2024 19:41:06 +0000 Subject: [PATCH] allocator: Implement QuickFit Implement Weinstock's QuickFit allocator, which lets us free in Theon. Signed-off-by: Dan Cross --- theon/src/allocator.rs | 450 ++++++++++++++++++++++++++++++++++++++--- theon/src/main.rs | 14 +- 2 files changed, 429 insertions(+), 35 deletions(-) diff --git a/theon/src/allocator.rs b/theon/src/allocator.rs index caae87d..a5cd11c 100644 --- a/theon/src/allocator.rs +++ b/theon/src/allocator.rs @@ -5,8 +5,11 @@ // license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT. +use alloc::alloc::{AllocError, Allocator, Layout}; use core::cell::UnsafeCell; +use core::ptr::NonNull; use core::sync::atomic::{AtomicUsize, Ordering}; +use core::{mem, ptr}; /// The allocator works in terms of an owned region /// of memory. We call this a Heap. @@ -15,19 +18,42 @@ pub(crate) trait Heap { fn len(&self) -> usize; } -/// A SliceHeap is a heap created by destructuring -/// the elements of a mutable slice. -pub(crate) struct SliceHeap { +/// A Block is a heap defined by a base pointer and a length. +/// It is an analogue of a mutable slice. +/// +/// At some point, it may make sense to replace this with a +/// slice pointer, but too many of the interfaces there are not +/// (yet) stable. +pub(crate) struct Block { heap: *mut u8, len: usize, } -impl SliceHeap { - pub fn new(arena: &mut [u8]) -> SliceHeap { - SliceHeap { heap: arena.as_mut_ptr(), len: arena.len() } +impl Block { + /// Creates a new block from raw parts, analogous to + /// `core::slice::from_raw_parts`. + /// + /// # Safety + /// The caller must ensure that the pointer and length given + /// are appropriate for the construction of a new block. + pub unsafe fn new_from_raw_parts(heap: *mut u8, len: usize) -> Block { + Block { heap, len } + } + + /// Splits a block into two sub-blocks. + pub fn split_at_mut(mut self, offset: usize) -> Option<(Block, Block)> { + let len = self.len(); + if offset > len { + return None; + } + let ptr = self.as_mut_ptr(); + let a = unsafe { Block::new_from_raw_parts(ptr, offset) }; + let b = unsafe { Block::new_from_raw_parts(ptr.wrapping_add(offset), len - offset) }; + Some((a, b)) } } -impl Heap for SliceHeap { + +impl Heap for Block { fn as_mut_ptr(&mut self) -> *mut u8 { self.heap } @@ -47,36 +73,397 @@ pub(crate) struct BumpAlloc { } impl BumpAlloc { + /// Creates a new bump allocator, taking ownership of the + /// provided arena. pub(crate) const fn new(arena: T) -> BumpAlloc { BumpAlloc { arena: UnsafeCell::new(arena), cursor: AtomicUsize::new(0) } } /// Allocates the given number of bytes with the given - /// alignment. Returns `None` if the allocation cannot - /// be satisfied, otherwise returns `Some` of a mutable - /// slice referring to the allocated memory. - pub(crate) fn alloc_bytes(&self, align: usize, size: usize) -> Option<&mut [u8]> { + /// alignment. Returns `None` if the allocation cannot be + /// satisfied, otherwise returns `Some` of a pair of blocks: + /// the first contains the prefix before the (aligned) block + /// and the second is the requested block. + pub(crate) fn try_alloc(&self, align: usize, size: usize) -> Option<(Block, Block)> { let heap = unsafe { &mut *self.arena.get() }; let base = heap.as_mut_ptr(); - let mut offset = 0; + let mut first = ptr::null_mut(); + let mut adjust = 0; self.cursor .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |current| { - let ptr = base.wrapping_add(current); - let adjust = ptr.align_offset(align); - offset = current.checked_add(adjust).expect("alignment overflow"); + first = base.wrapping_add(current); + adjust = first.align_offset(align); + let offset = current.checked_add(adjust).expect("alignment overflow"); let next = offset.checked_add(size).expect("size overflow"); (next <= heap.len()).then_some(next) }) .ok()?; - let ptr = base.wrapping_add(offset); - Some(unsafe { core::slice::from_raw_parts_mut(ptr, size) }) + let prefix = unsafe { Block::new_from_raw_parts(first, adjust) }; + let ptr = first.wrapping_add(adjust); + let block = unsafe { Block::new_from_raw_parts(ptr, size) }; + Some((prefix, block)) + } +} + +/// BumpAlloc implements the allocator interface, and is +/// suitable for e.g. page allocators and so forth. Dealloc is +/// unimplemented and will panic. +unsafe impl Allocator for BumpAlloc { + fn allocate(&self, layout: Layout) -> Result, AllocError> { + let (_, mut block) = self.try_alloc(layout.size(), layout.align()).ok_or(AllocError)?; + let ptr = NonNull::new(block.as_mut_ptr()).ok_or(AllocError)?; + Ok(NonNull::slice_from_raw_parts(ptr, block.len())) + } + + unsafe fn deallocate(&self, _ptr: NonNull, _layout: Layout) { + unimplemented!(); + } +} + +const ALLOC_UNIT_SHIFT: usize = 6; +const ALLOC_UNIT_SIZE: usize = 1 << ALLOC_UNIT_SHIFT; +const MIN_ALLOC_SIZE: usize = ALLOC_UNIT_SIZE; +const MAX_QUICK_SHIFT: usize = 14; +const MAX_QUICK_SIZE: usize = 1 << MAX_QUICK_SHIFT; + +const NUM_QLISTS: usize = 14 - ALLOC_UNIT_SHIFT + 1; +const NUM_HASH_BUCKETS: usize = 31; // Prime. + +/// A linked block header, containing size, alignment, and +/// address information for the block. This is used both for +/// linking unallocated blocks into one of the free lists and +/// for keeping track of blocks allocated from the `misc` +/// list. +/// +/// For irregularly sized allocations, the header keeps track of +/// the block's layout data, its virtual address, and a link +/// pointer. Such a header is either not in any list, if newly +/// allocated and not yet freed, or always in exactly one of two +/// lists: the free list, or a hash chain of allocated blocks. +/// We need some way to preserve the allocation size after the +/// initial allocation from the tail, and Because misc blocks +/// can be reused in a first-fit manner, we cannot rely on a +/// `Layout` to recover the size of the block, so we must store +/// it somewhere. By allocating a tag outside of the buffer, +/// which we look up in a hash table as needed, we can maintain +/// this information without adding additional complexity to +/// allocation. +/// +/// For blocks on one of the quick lists, the size, address and +/// alignment fields are redundant, but convenient. +/// +/// We use the link pointer to point to the next entry in the +/// list in all cases. +#[derive(Debug)] +#[repr(C, align(64))] +struct Header { + next: Option>, + addr: *mut u8, + size: usize, + align: usize, +} + +impl Header { + /// Returns a new header for a block of the given size at + /// the given address. + pub fn new(addr: *mut u8, size: usize, align: usize, next: Option>) -> Header { + Header { next, addr, size, align } + } +} + +/// # QuickFit allocator for small objects. +/// +/// This is an implementation of the QuickFit[Wei88] allocator +/// for small objects, suitable for managing small heaps in +/// memory constrained environments, such as boot loaders and +/// standalone debuggers. +/// +/// [Wei88] Charles B. Weinstock and William A. Wulf. 1988. +/// Quick Fit: An Efficient Algorithm for Heap Storage +/// Allocation. ACM SIGPLAN Notices 23, 10 (Oct. 1988), +/// 141-148. https://doi.org/10.1145/51607.51619 +#[repr(C)] +pub struct QuickFit { + tail: BumpAlloc, + qlists: [Option>; NUM_QLISTS], + misc: Option>, + allocated_misc: [Option>; NUM_HASH_BUCKETS], +} + +impl QuickFit { + /// Constructs a QuickFit from the given `tail`. + pub const fn new(tail: BumpAlloc) -> QuickFit { + let qlists = [None; NUM_QLISTS]; + let misc = None; + let allocated_misc = [None; NUM_HASH_BUCKETS]; + QuickFit { tail, qlists, misc, allocated_misc } + } + + /// Allocates a block of memory of the requested size and + /// alignment. Returns a pointer to such a block, or nil if + /// the block cannot be allocated. + pub fn malloc(&mut self, layout: Layout) -> *mut u8 { + let (size, align) = Self::adjust(layout); + let p = self.alloc_quick(size, align); + p.or_else(|| self.alloc_tail(size, align)).unwrap_or(ptr::null_mut()) + } + + /// Adjusts the given layout so that blocks allocated from + /// one of the quick lists are appropriately sized and + /// aligned. Otherwise, returns the original size and + /// alignment. + fn adjust(layout: Layout) -> (usize, usize) { + let size = layout.size(); + let align = layout.align(); + if size > MAX_QUICK_SIZE { + return (size, align); + } + let size = usize::max(MIN_ALLOC_SIZE, size.next_power_of_two()); + let align = usize::max(layout.align(), size); + (size, align) + } + + /// Attempts to allocate from an existing list: for requests + /// that can be satisfied from one of the quick lists, try + /// and do so; otherwise, attempt an allocation from the + /// misc list. + fn alloc_quick(&mut self, size: usize, align: usize) -> Option<*mut u8> { + if size <= MAX_QUICK_SIZE && align == size { + let k: usize = size.ilog2() as usize - ALLOC_UNIT_SHIFT; + let (node, list) = Self::head(self.qlists[k].take()); + self.qlists[k] = list; + node.map(|header| unsafe { header.as_ref() }.addr) + } else { + self.alloc_misc(size, align) + } + } + + /// Allocates a block from the misc list. A simple + /// first-fit allocator. + fn alloc_misc(&mut self, size: usize, align: usize) -> Option<*mut u8> { + let (node, list) = + Self::unlink(self.misc.take(), |node| size <= node.size && align <= node.align); + self.misc = list; + node.map(|mut header| { + let header = unsafe { header.as_mut() }; + let k = Self::hash(header.addr); + header.next = self.allocated_misc[k].take(); + self.allocated_misc[k] = NonNull::new(header); + header.addr + }) + } + + /// Allocates an aligned block of size `size` from `tail`. + /// If `tail` is not already aligned to the given alignment, + /// then we try to free blocks larger than or equal in size + /// to the minimum allocation unit into the quick lists + /// until it is. + fn alloc_tail(&mut self, size: usize, align: usize) -> Option<*mut u8> { + let (prefix, mut block) = { self.tail.try_alloc(size, align)? }; + self.free_prefix(prefix); + Some(block.as_mut_ptr()) + } + + /// Frees a prefix that came from a tail allocation. This + /// attempts to store blocks to the quick lists. + fn free_prefix(&mut self, prefix: Block) { + let mut prefix = Self::align_prefix(prefix); + while let Some(rest) = self.try_free_prefix(prefix) { + prefix = rest; + } + } + + /// Aligns the prefix to the minimum allocation size. + fn align_prefix(mut prefix: Block) -> Block { + let ptr = prefix.as_mut_ptr(); + let len = prefix.len(); + let offset = ptr.align_offset(MIN_ALLOC_SIZE); + assert!(offset <= len); + unsafe { Block::new_from_raw_parts(ptr.wrapping_add(offset), len - offset) } + } + + /// Tries to free the largest section of the prefix that it + /// can, returning the remainder if it did so. Otherwise, + /// returns None. + fn try_free_prefix<'a>(&mut self, mut prefix: Block) -> Option { + let ptr: *mut u8 = prefix.as_mut_ptr(); + for k in (0..NUM_QLISTS).rev() { + let size = 1 << (k + ALLOC_UNIT_SHIFT); + if prefix.len() >= size && ptr.align_offset(size) == 0 { + let (_, rest) = prefix.split_at_mut(size)?; + self.free(ptr, Layout::from_size_align(size, size).unwrap()); + return (rest.len() >= MIN_ALLOC_SIZE).then_some(rest); + } + } + None + } + + /// Attempts to reallocate the given block to a new size. + /// + /// This has a small optimization for the most common case, + /// where a block is being realloc'd to grow as data is + /// accumulated: it's subtle, but if the original block was + /// allocated from one of the quick lists, and the new size + /// can be accommodated by the existing allocation, simply + /// return the existing block pointer. Otherwise, allocate + /// a new block, copy, and free the old block. + /// + /// Note that the case of a reduction in size might result + /// in a new allocation. This is because we rely on the + /// accuracy of the `Layout` to find the correct quicklist + /// to store the block onto on free. If we reduced below + /// the size of the current block, we would lose the layout + /// information and leak memory on free. But this is very + /// uncommon. + /// + /// We make no effort to optimize the case of a `realloc` in + /// a `misc` block, as a) it is relatively uncommon to do so + /// and b) there may not be a buffer tag for such a block + /// yet (one isn't allocated until the block is freed), and + /// the implementation would need to be more complex as a + /// result. + fn realloc(&mut self, block: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + let new_layout = Layout::from_size_align(new_size, layout.align()).expect("layout"); + let (size, align) = Self::adjust(new_layout); + if size == layout.size() && align == layout.align() { + return block; + } + let np = self.malloc(new_layout); + if !np.is_null() { + unsafe { + ptr::copy(block, np, usize::min(layout.size(), new_size)); + } + self.free(block, layout) + } + np + } + + /// Frees a block of memory characterized by the `layout` + /// argument. If the block can be freed to one of the + /// quick lists, it is; otherwise, it is treated as a misc + /// block and freed there. + pub fn free(&mut self, block: *mut u8, layout: Layout) { + let (size, align) = Self::adjust(layout); + assert!(!block.is_null()); + if size <= MAX_QUICK_SIZE && align == size { + let k: usize = size.ilog2() as usize - ALLOC_UNIT_SHIFT; + let header = Header::new(block, size, align, self.qlists[k].take()); + let p = block as *mut Header; + assert_eq!(p.align_offset(mem::align_of::
()), 0); + unsafe { + ptr::write(p, header); + } + self.qlists[k] = NonNull::new(p); + } else { + self.free_misc(block, size, align); + } + } + + /// Frees a block to the misc list. This looks up the given + /// address in the hash of allocated misc blocks to find its + /// header. + /// + /// If the block header is not found in the hash table, we + /// assume that the block was allocated from the tail and + /// this is the first time it's been freed, so we allocate a + /// header for it and link that into the misc list. + /// + /// If we cannot allocate a header in the usual way, we take + /// it from the block to be freed, which is guaranteed to be + /// large enough since anything smaller would have been + /// satisfied from one of the quick lists, and thus freed + /// through that path. + fn free_misc(&mut self, mut block: *mut u8, mut size: usize, mut align: usize) { + let mut header = self + .unlink_allocated_misc(block) + .or_else(|| { + let hblock = self.malloc(Layout::new::
()).cast::
(); + let hblock = hblock + .is_null() + .then(|| { + let offset = block.align_offset(MIN_ALLOC_SIZE); + let hblock = block.wrapping_add(offset); + block = block.wrapping_add(MIN_ALLOC_SIZE); + size -= offset + MIN_ALLOC_SIZE; + align = MIN_ALLOC_SIZE; + hblock.cast() + }) + .expect("allocated header block"); + let header = Header::new(block, size, align, None); + unsafe { + ptr::write(hblock, header); + } + NonNull::new(hblock) + }) + .expect("header"); + let header = unsafe { header.as_mut() }; + header.next = self.misc.take(); + self.misc = NonNull::new(header); + } + + /// Unlinks the header for the given address from the hash + /// table for allocated misc blocks and returns it, if such + /// a header exists. + fn unlink_allocated_misc(&mut self, block: *mut u8) -> Option> { + let k = Self::hash(block); + let list = self.allocated_misc[k].take(); + let (node, list) = Self::unlink(list, |node| node.addr == block); + self.allocated_misc[k] = list; + node + } + + /// Unlinks the first node matching the given predicate from + /// the given list, if it exists. Returning the node, or + /// None, and the list head, which may be None if the list + /// is empty. + fn unlink( + mut list: Option>, + predicate: F, + ) -> (Option>, Option>) + where + F: Fn(&Header) -> bool, + { + let mut prev: Option> = None; + while let Some(mut node) = list { + let node = unsafe { node.as_mut() }; + if predicate(node) { + let next = node.next.take(); + if let Some(mut prev) = prev { + let prev = unsafe { prev.as_mut() }; + prev.next = next; + } else { + list = next; + } + return (NonNull::new(node), list); + } + prev = NonNull::new(node); + list = node.next; + } + (None, list) + } + + /// Splits the list into it's first element and tail and + /// returns both. + fn head(list: Option>) -> (Option>, Option>) { + Self::unlink(list, |_| true) + } + + /// Hashes a pointer value. This is the bit mixing algorithm + /// from Murmur3. + fn hash(ptr: *mut u8) -> usize { + let mut k = ptr.addr(); + k ^= k >> 33; + k = k.wrapping_mul(0xff51afd7ed558ccd); + k ^= k >> 33; + k = k.wrapping_mul(0xc4ceb9fe1a85ec53); + (k >> 33) % NUM_HASH_BUCKETS } } mod global { - use super::{BumpAlloc, Heap}; + use super::{BumpAlloc, Heap, QuickFit}; use alloc::alloc::{GlobalAlloc, Layout}; - use core::ptr; + use core::cell::UnsafeCell; const GLOBAL_HEAP_SIZE: usize = 4 * 1024 * 1024; @@ -94,15 +481,28 @@ mod global { } } - unsafe impl GlobalAlloc for BumpAlloc { + /// GlobalQuickAlloc is a wrapper around a QuickFit over a + /// GlobalHeap that uses interior mutability to implement + /// the GlobalAlloc trait. + struct GlobalQuickAlloc(UnsafeCell>); + + unsafe impl GlobalAlloc for GlobalQuickAlloc { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { - self.alloc_bytes(layout.align(), layout.size()) - .map_or(ptr::null_mut(), |p| p.as_mut_ptr()) + let quick = unsafe { &mut *self.0.get() }; + quick.malloc(layout) + } + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + let quick = unsafe { &mut *self.0.get() }; + quick.free(ptr, layout); + } + unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + let quick = unsafe { &mut *self.0.get() }; + quick.realloc(ptr, layout, new_size) } - unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) {} } #[global_allocator] - static mut BUMP_ALLOCATOR: BumpAlloc = - BumpAlloc::new(GlobalHeap([0u8; GLOBAL_HEAP_SIZE])); + static mut GLOBAL_ALLOCATOR: GlobalQuickAlloc = GlobalQuickAlloc(UnsafeCell::new( + QuickFit::new(BumpAlloc::new(GlobalHeap([0u8; GLOBAL_HEAP_SIZE]))), + )); } diff --git a/theon/src/main.rs b/theon/src/main.rs index 545bc37..3475d29 100644 --- a/theon/src/main.rs +++ b/theon/src/main.rs @@ -7,7 +7,6 @@ #![feature(allocator_api)] #![feature(exposed_provenance)] -#![feature(inline_const)] #![feature(naked_functions)] #![feature(ptr_sub_ptr)] #![feature(strict_provenance)] @@ -246,18 +245,13 @@ fn load(name: &str, typ: BinaryType, bytes: &[u8], region: Range) -> Result } let base = theon::vaddr(region.start).cast_mut(); let len = unsafe { theon::vaddr(region.end).sub_ptr(theon::vaddr(region.start)) }; - let heap = unsafe { core::slice::from_raw_parts_mut(base, len) }; - let heap = allocator::SliceHeap::new(heap); + let heap = unsafe { allocator::Block::new_from_raw_parts(base, len) }; let bump = allocator::BumpAlloc::new(heap); let allocate = || { - use alloc::alloc::GlobalAlloc; - + use alloc::alloc::Allocator; let layout = alloc::alloc::Layout::new::(); - let mem = unsafe { bump.alloc(layout) }; - if mem.is_null() { - return Err("failed to allocate page"); - } - let page = unsafe { &mut *Page4K::proto_ptr().with_addr(mem.addr()).cast_mut() }; + let mem = bump.allocate(layout).map_err(|_| "failed to allocatoe page")?; + let page = unsafe { &mut *Page4K::proto_ptr().with_addr(mem.addr().into()).cast_mut() }; Ok(page) }; let root = allocate().expect("allocated root page for binary");