From fe01ddd5e3f178b971ed102dd5fdd93cee5d87b9 Mon Sep 17 00:00:00 2001 From: marc0246 <40955683+marc0246@users.noreply.github.com> Date: Sat, 5 Nov 2022 08:50:46 +0100 Subject: [PATCH] `CpuBufferPool` revamp (#2076) * `CpuBufferPool` revamp * Fix oopsie * Fix docs --- .../{buffer-pool.rs => buffer-allocator.rs} | 33 +- examples/src/bin/indirect.rs | 27 +- examples/src/bin/teapot/main.rs | 15 +- vulkano/src/buffer/allocator.rs | 566 +++++++++++ vulkano/src/buffer/cpu_pool.rs | 931 ------------------ vulkano/src/buffer/mod.rs | 51 +- vulkano/src/memory/allocator/mod.rs | 10 + vulkano/src/memory/allocator/suballocator.rs | 56 +- 8 files changed, 672 insertions(+), 1017 deletions(-) rename examples/src/bin/{buffer-pool.rs => buffer-allocator.rs} (93%) create mode 100644 vulkano/src/buffer/allocator.rs delete mode 100644 vulkano/src/buffer/cpu_pool.rs diff --git a/examples/src/bin/buffer-pool.rs b/examples/src/bin/buffer-allocator.rs similarity index 93% rename from examples/src/bin/buffer-pool.rs rename to examples/src/bin/buffer-allocator.rs index 40f0137ca3..9d9784d918 100644 --- a/examples/src/bin/buffer-pool.rs +++ b/examples/src/bin/buffer-allocator.rs @@ -7,17 +7,7 @@ // notice may not be copied, modified, or distributed except // according to those terms. -// BufferPool Example -// -// Modified triangle example to show BufferPool -// Using a pool allows multiple buffers to be "in-flight" simultaneously -// and is suited to highly dynamic, similar sized chunks of data -// -// NOTE:(jdnewman85) ATM (5/4/2020) CpuBufferPool.next() and .chunk() have identical documentation -// I was unable to get next() to work. The compiler complained that the resulting buffer -// didn't implement VertexSource. Similar issues have been reported. -// See: https://github.com/vulkano-rs/vulkano/issues/1221 -// Finally, I have not profiled CpuBufferPool against CpuAccessibleBuffer +// Modified triangle example to show `CpuBufferAllocator`. use bytemuck::{Pod, Zeroable}; use std::{ @@ -25,7 +15,10 @@ use std::{ time::{SystemTime, UNIX_EPOCH}, }; use vulkano::{ - buffer::CpuBufferPool, + buffer::{ + allocator::{CpuBufferAllocator, CpuBufferAllocatorCreateInfo}, + BufferUsage, + }, command_buffer::{ allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage, RenderPassBeginInfo, SubpassContents, @@ -171,8 +164,16 @@ fn main() { let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); - // Vertex Buffer Pool - let buffer_pool: CpuBufferPool = CpuBufferPool::vertex_buffer(memory_allocator); + // Using a buffer allocator allows multiple buffers to be "in-flight" simultaneously and is + // suited to highly dynamic data like vertex, index and uniform buffers. + let buffer_allocator = CpuBufferAllocator::new( + memory_allocator, + CpuBufferAllocatorCreateInfo { + // We want to use the allocated subbuffers as vertex buffers. + buffer_usage: BufferUsage::VERTEX_BUFFER, + ..Default::default() + }, + ); mod vs { vulkano_shaders::shader! { @@ -335,8 +336,8 @@ fn main() { ]; let num_vertices = data.len() as u32; - // Allocate a new chunk from buffer_pool - let buffer = buffer_pool.from_iter(data.to_vec()).unwrap(); + // Allocate a new subbuffer using the buffer allocator. + let buffer = buffer_allocator.from_iter(data.iter().copied()).unwrap(); let mut builder = AutoCommandBufferBuilder::primary( &command_buffer_allocator, queue.queue_family_index(), diff --git a/examples/src/bin/indirect.rs b/examples/src/bin/indirect.rs index cda178c44e..c76981d8fd 100644 --- a/examples/src/bin/indirect.rs +++ b/examples/src/bin/indirect.rs @@ -27,7 +27,10 @@ use bytemuck::{Pod, Zeroable}; use std::sync::Arc; use vulkano::{ - buffer::{BufferUsage, CpuBufferPool}, + buffer::{ + allocator::{CpuBufferAllocator, CpuBufferAllocatorCreateInfo}, + BufferUsage, + }, command_buffer::{ allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage, DrawIndirectCommand, RenderPassBeginInfo, SubpassContents, @@ -42,7 +45,7 @@ use vulkano::{ image::{view::ImageView, ImageAccess, ImageUsage, SwapchainImage}, impl_vertex, instance::{Instance, InstanceCreateInfo}, - memory::allocator::{MemoryUsage, StandardMemoryAllocator}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ input_assembly::InputAssemblyState, @@ -256,17 +259,21 @@ fn main() { let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); - // Each frame we generate a new set of vertices and each frame we need a new DrawIndirectCommand struct to - // set the number of vertices to draw - let indirect_args_pool: CpuBufferPool = CpuBufferPool::new( + // Each frame we generate a new set of vertices and each frame we need a new + // DrawIndirectCommand struct to set the number of vertices to draw. + let indirect_args_pool = CpuBufferAllocator::new( memory_allocator.clone(), - BufferUsage::INDIRECT_BUFFER | BufferUsage::STORAGE_BUFFER, - MemoryUsage::Upload, + CpuBufferAllocatorCreateInfo { + buffer_usage: BufferUsage::INDIRECT_BUFFER | BufferUsage::STORAGE_BUFFER, + ..Default::default() + }, ); - let vertex_pool: CpuBufferPool = CpuBufferPool::new( + let vertex_pool = CpuBufferAllocator::new( memory_allocator, - BufferUsage::STORAGE_BUFFER | BufferUsage::VERTEX_BUFFER, - MemoryUsage::Upload, + CpuBufferAllocatorCreateInfo { + buffer_usage: BufferUsage::STORAGE_BUFFER | BufferUsage::VERTEX_BUFFER, + ..Default::default() + }, ); let compute_pipeline = ComputePipeline::new( diff --git a/examples/src/bin/teapot/main.rs b/examples/src/bin/teapot/main.rs index a82976669c..961459c274 100644 --- a/examples/src/bin/teapot/main.rs +++ b/examples/src/bin/teapot/main.rs @@ -11,7 +11,10 @@ use cgmath::{Matrix3, Matrix4, Point3, Rad, Vector3}; use examples::{Normal, Vertex, INDICES, NORMALS, VERTICES}; use std::{sync::Arc, time::Instant}; use vulkano::{ - buffer::{BufferUsage, CpuAccessibleBuffer, CpuBufferPool, TypedBufferAccess}, + buffer::{ + allocator::{CpuBufferAllocator, CpuBufferAllocatorCreateInfo}, + BufferUsage, CpuAccessibleBuffer, TypedBufferAccess, + }, command_buffer::{ allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage, RenderPassBeginInfo, SubpassContents, @@ -26,7 +29,7 @@ use vulkano::{ format::Format, image::{view::ImageView, AttachmentImage, ImageAccess, ImageUsage, SwapchainImage}, instance::{Instance, InstanceCreateInfo}, - memory::allocator::{MemoryUsage, StandardMemoryAllocator}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ depth_stencil::DepthStencilState, @@ -180,10 +183,12 @@ fn main() { ) .unwrap(); - let uniform_buffer = CpuBufferPool::::new( + let uniform_buffer = CpuBufferAllocator::new( memory_allocator.clone(), - BufferUsage::UNIFORM_BUFFER, - MemoryUsage::Upload, + CpuBufferAllocatorCreateInfo { + buffer_usage: BufferUsage::UNIFORM_BUFFER, + ..Default::default() + }, ); let vs = vs::load(device.clone()).unwrap(); diff --git a/vulkano/src/buffer/allocator.rs b/vulkano/src/buffer/allocator.rs new file mode 100644 index 0000000000..a42523587c --- /dev/null +++ b/vulkano/src/buffer/allocator.rs @@ -0,0 +1,566 @@ +// Copyright (c) 2017 The vulkano developers +// Licensed under the Apache License, Version 2.0 +// or the MIT +// license , +// at your option. All files in the project carrying such +// notice may not be copied, modified, or distributed except +// according to those terms. + +//! Efficiently suballocates buffers into smaller subbuffers. + +use super::{ + sys::{Buffer, BufferCreateInfo, RawBuffer}, + BufferAccess, BufferAccessObject, BufferContents, BufferError, BufferInner, BufferUsage, + TypedBufferAccess, +}; +use crate::{ + buffer::sys::BufferMemory, + device::{Device, DeviceOwned}, + memory::{ + allocator::{ + align_up, AllocationCreateInfo, AllocationCreationError, AllocationType, + MemoryAllocatePreference, MemoryAllocator, MemoryUsage, StandardMemoryAllocator, + }, + DedicatedAllocation, + }, + DeviceSize, +}; +use crossbeam_queue::ArrayQueue; +use std::{ + cell::UnsafeCell, + marker::PhantomData, + mem::{align_of, size_of, ManuallyDrop}, + num::NonZeroU64, + ptr, + sync::Arc, +}; + +const MAX_ARENAS: usize = 32; + +// TODO: Add `CpuSubbuffer::read` to read the content of a subbuffer. +// But that's hard to do because we must prevent `increase_gpu_lock` from working while a +// a buffer is locked. + +/// Efficiently suballocates buffers into smaller subbuffers. +/// +/// This allocator is especially suitable when you want to upload or download some data regularly +/// (for example, at each frame for a video game). +/// +/// # Algorithm +/// +/// The allocator keeps a pool of *arenas*. An arena is simply a buffer in which *arena allocation* +/// takes place, also known as *bump allocation* or *linear allocation*. Every time you allocate, +/// one of these arenas is suballocated. If there is no arena that is currently available, one will +/// be allocated. After all subbuffers allocated from an arena are dropped, the arena is +/// automatically returned to the arena pool. If you try to allocate a subbuffer larger than the +/// current size of an arena, the arenas are automatically resized. +/// +/// No memory is allocated when the allocator is created, be it on the Vulkan or Rust side. That +/// only happens once you allocate a subbuffer. +/// +/// # Usage +/// +/// Ideally, one arena should be able to fit all data you need to update per frame, so that each +/// arena is submitted and freed once per frame. This way, the arena pool would also contain as +/// many arenas as there are frames in flight on the thread. Otherwise, if your arenas are not able +/// to fit everything each frame, what will likely happen is that each subbuffer will be +/// allocated from an individual arena. This can impact efficiency both in terms of memory usage +/// (because each arena has the same size, even if some of the subbuffers are way smaller) as well +/// as performance, because the data could end up more physically separated in memory, which means +/// the GPU would need to hop from place to place a lot more during a frame. +/// +/// Ideally the result is something roughly like this: +/// +/// ```plain +/// +---------------------------------------------------------------------------------------------+ +/// | Memory Block | +/// |-----+------+-----------------------+---------+-----------------------+------+---------+-----| +/// | | | Frame 1 Arena | | Frame 2 Arena | | | | +/// | ••• | Tex. |-------+-------+-------| Attach. |-------+-------+-------| Tex. | Attach. | ••• | +/// | | | Vert. | Indx. | Unif. | | Vert. | Indx. | Unif. | | | | +/// +-----+------+-------+-------+-------+---------+-------+-------+-------+------+---------+-----+ +/// ``` +/// +/// # Examples +/// +/// ``` +/// use vulkano::buffer::allocator::CpuBufferAllocator; +/// use vulkano::command_buffer::{ +/// AutoCommandBufferBuilder, CommandBufferUsage, PrimaryCommandBufferAbstract, +/// }; +/// use vulkano::sync::GpuFuture; +/// # let queue: std::sync::Arc = return; +/// # let memory_allocator: std::sync::Arc = return; +/// # let command_buffer_allocator: vulkano::command_buffer::allocator::StandardCommandBufferAllocator = return; +/// +/// // Create the buffer allocator. +/// let buffer_allocator = CpuBufferAllocator::new(memory_allocator.clone(), Default::default()); +/// +/// for n in 0..25u32 { +/// // Each loop allocates a new subbuffer and stores `data` in it. +/// let data: [f32; 4] = [1.0, 0.5, n as f32 / 24.0, 0.0]; +/// let subbuffer = buffer_allocator.from_data(data).unwrap(); +/// +/// // You can then use `subbuffer` as if it was an entirely separate buffer. +/// AutoCommandBufferBuilder::primary( +/// &command_buffer_allocator, +/// queue.queue_family_index(), +/// CommandBufferUsage::OneTimeSubmit, +/// ) +/// .unwrap() +/// // For the sake of the example we just call `update_buffer` on the buffer, even though +/// // it is pointless to do that. +/// .update_buffer(&[0.2, 0.3, 0.4, 0.5], subbuffer.clone(), 0) +/// .unwrap() +/// .build().unwrap() +/// .execute(queue.clone()) +/// .unwrap() +/// .then_signal_fence_and_flush() +/// .unwrap(); +/// } +/// ``` +#[derive(Debug)] +pub struct CpuBufferAllocator> { + state: UnsafeCell>, +} + +impl CpuBufferAllocator +where + A: MemoryAllocator, +{ + /// Creates a new `CpuBufferAllocator`. + /// + /// # Panics + /// + /// - Panics if `create_info.memory_usage` is [`MemoryUsage::GpuOnly`]. + pub fn new(memory_allocator: A, create_info: CpuBufferAllocatorCreateInfo) -> Self { + let CpuBufferAllocatorCreateInfo { + arena_size, + buffer_usage, + memory_usage, + _ne: _, + } = create_info; + + assert!(memory_usage != MemoryUsage::GpuOnly); + + let properties = memory_allocator.device().physical_device().properties(); + let buffer_alignment = [ + buffer_usage + .contains(BufferUsage::UNIFORM_BUFFER) + .then_some(properties.min_uniform_buffer_offset_alignment), + buffer_usage + .contains(BufferUsage::STORAGE_BUFFER) + .then_some(properties.min_storage_buffer_offset_alignment), + ] + .into_iter() + .flatten() + .max() + .unwrap_or(1); + + CpuBufferAllocator { + state: UnsafeCell::new(CpuBufferAllocatorState { + memory_allocator, + buffer_usage, + memory_usage, + buffer_alignment, + arena_size, + arena: None, + free_start: 0, + reserve: None, + }), + } + } + + /// Returns the current size of the arenas. + pub fn arena_size(&self) -> DeviceSize { + unsafe { &*self.state.get() }.arena_size + } + + /// Sets the arena size to the provided `size`. + /// + /// The next time you allocate a subbuffer, a new arena will be allocated with the new size, + /// and all subsequently allocated arenas will also share the new size. + pub fn set_arena_size(&self, size: DeviceSize) { + let state = unsafe { &mut *self.state.get() }; + state.arena_size = size; + state.arena = None; + state.reserve = None; + } + + /// Ensures that the size of the current arena is at least `size`. + /// + /// If `size` is greater than the current arena size, then a new arena will be allocated with + /// the new size, and all subsequently allocated arenas will also share the new size. Otherwise + /// this has no effect. + pub fn reserve(&self, size: DeviceSize) -> Result<(), AllocationCreationError> { + if size > self.arena_size() { + let state = unsafe { &mut *self.state.get() }; + state.arena_size = size; + state.reserve = None; + state.arena = Some(state.next_arena()?); + } + + Ok(()) + } + + /// Allocates a subbuffer and writes `data` in it. + /// + /// # Panics + /// + /// - Panics if `T` has zero size. + /// - Panics if `T` has an alignment greater than `64`. + pub fn from_data(&self, data: T) -> Result>, AllocationCreationError> + where + T: BufferContents, + { + assert!(size_of::() > 0); + assert!(align_of::() <= 64); + + let state = unsafe { &mut *self.state.get() }; + + let size = size_of::() as DeviceSize; + let offset = state.allocate(size, align_of::() as DeviceSize)?; + let arena = state.arena.as_ref().unwrap().clone(); + let allocation = match arena.inner.memory() { + BufferMemory::Normal(a) => a, + BufferMemory::Sparse => unreachable!(), + }; + + unsafe { + let bytes = allocation.write(offset..offset + size).unwrap(); + let mapping = T::from_bytes_mut(bytes).unwrap(); + + ptr::write(mapping, data); + + if let Some(atom_size) = allocation.atom_size() { + let size = align_up(size, atom_size.get()); + let end = DeviceSize::min(offset + size, allocation.size()); + allocation.flush_range(offset..end).unwrap(); + } + } + + Ok(Arc::new(CpuSubbuffer { + id: CpuSubbuffer::::next_id(), + offset, + size, + arena, + _marker: PhantomData, + })) + } + + /// Allocates a subbuffer and writes all elements of `iter` in it. + /// + /// # Panics + /// + /// - Panics if `T` has zero size. + /// - Panics if `T` has an alignment greater than `64`. + pub fn from_iter( + &self, + iter: I, + ) -> Result>, AllocationCreationError> + where + [T]: BufferContents, + I: IntoIterator, + I::IntoIter: ExactSizeIterator, + { + assert!(size_of::() > 0); + assert!(align_of::() <= 64); + + let iter = iter.into_iter(); + let state = unsafe { &mut *self.state.get() }; + + let size = (size_of::() * iter.len()) as DeviceSize; + let offset = state.allocate(size, align_of::() as DeviceSize)?; + let arena = state.arena.as_ref().unwrap().clone(); + let allocation = match arena.inner.memory() { + BufferMemory::Normal(a) => a, + BufferMemory::Sparse => unreachable!(), + }; + + unsafe { + let bytes = allocation.write(offset..offset + size).unwrap(); + let mapping = <[T]>::from_bytes_mut(bytes).unwrap(); + + for (o, i) in mapping.iter_mut().zip(iter) { + ptr::write(o, i); + } + + if let Some(atom_size) = allocation.atom_size() { + let size = align_up(size, atom_size.get()); + let end = DeviceSize::min(offset + size, allocation.size()); + allocation.flush_range(offset..end).unwrap(); + } + } + + Ok(Arc::new(CpuSubbuffer { + id: CpuSubbuffer::::next_id(), + offset, + size, + arena, + _marker: PhantomData, + })) + } +} + +#[derive(Debug)] +struct CpuBufferAllocatorState { + memory_allocator: A, + buffer_usage: BufferUsage, + memory_usage: MemoryUsage, + // The alignment required for the subbuffers. + buffer_alignment: DeviceSize, + // The current size of the arenas. + arena_size: DeviceSize, + // Contains the buffer that is currently being suballocated. + arena: Option>, + // Offset pointing to the start of free memory within the arena. + free_start: DeviceSize, + // When an `Arena` is dropped, it returns itself here for reuse. + reserve: Option>>>, +} + +impl CpuBufferAllocatorState +where + A: MemoryAllocator, +{ + fn allocate( + &mut self, + size: DeviceSize, + alignment: DeviceSize, + ) -> Result { + let alignment = DeviceSize::max(alignment, self.buffer_alignment); + + loop { + if self.arena.is_none() { + // If the requested size is larger than the arenas, we need to resize them. + if self.arena_size < size { + self.arena_size = size * 2; + // We need to drop our reference to the old pool to make sure the arenas are + // dropped once no longer in use, and replace it with a new pool that will not + // be polluted with the outdates arenas. + self.reserve = None; + } + self.arena = Some(self.next_arena()?); + self.free_start = 0; + } + + let arena = self.arena.as_ref().unwrap(); + let allocation = match arena.inner.memory() { + BufferMemory::Normal(a) => a, + BufferMemory::Sparse => unreachable!(), + }; + let arena_offset = allocation.offset(); + let atom_size = allocation.atom_size().map(NonZeroU64::get).unwrap_or(1); + + let alignment = DeviceSize::max(alignment, atom_size); + let offset = align_up(arena_offset + self.free_start, alignment); + + if offset + size <= arena_offset + self.arena_size { + let offset = offset - arena_offset; + self.free_start = offset + size; + + return Ok(offset); + } + + // We reached the end of the arena, grab the next one. + self.arena = None; + } + } + + fn next_arena(&mut self) -> Result, AllocationCreationError> { + if self.reserve.is_none() { + self.reserve = Some(Arc::new(ArrayQueue::new(MAX_ARENAS))); + } + let reserve = self.reserve.as_ref().unwrap(); + + reserve + .pop() + .map(Ok) + .unwrap_or_else(|| self.create_arena()) + .map(|inner| { + Arc::new(Arena { + inner: ManuallyDrop::new(inner), + reserve: reserve.clone(), + }) + }) + } + + fn create_arena(&self) -> Result, AllocationCreationError> { + let raw_buffer = RawBuffer::new( + self.memory_allocator.device().clone(), + BufferCreateInfo { + size: self.arena_size, + usage: self.buffer_usage, + ..Default::default() + }, + ) + .map_err(|err| match err { + BufferError::AllocError(err) => err, + // We don't use sparse-binding, therefore the other errors can't happen. + _ => unreachable!(), + })?; + let mut requirements = *raw_buffer.memory_requirements(); + requirements.alignment = DeviceSize::max(requirements.alignment, self.buffer_alignment); + let create_info = AllocationCreateInfo { + requirements, + allocation_type: AllocationType::Linear, + usage: self.memory_usage, + allocate_preference: MemoryAllocatePreference::Unknown, + dedicated_allocation: Some(DedicatedAllocation::Buffer(&raw_buffer)), + ..Default::default() + }; + + match unsafe { self.memory_allocator.allocate_unchecked(create_info) } { + Ok(mut alloc) => { + debug_assert!(alloc.offset() % requirements.alignment == 0); + debug_assert!(alloc.size() == requirements.size); + alloc.shrink(self.arena_size); + let inner = Arc::new( + unsafe { raw_buffer.bind_memory_unchecked(alloc) } + .map_err(|(err, _, _)| err)?, + ); + + Ok(inner) + } + Err(err) => Err(err), + } + } +} + +#[derive(Debug)] +struct Arena { + inner: ManuallyDrop>, + // Where we return the arena in our `Drop` impl. + reserve: Arc>>, +} + +impl Drop for Arena { + fn drop(&mut self) { + let inner = unsafe { ManuallyDrop::take(&mut self.inner) }; + let _ = self.reserve.push(inner); + } +} + +/// Parameters to create a new [`CpuBufferAllocator`]. +pub struct CpuBufferAllocatorCreateInfo { + /// Initial size of an arena in bytes. + /// + /// Ideally this should fit all the data you need to update per frame. So for example, if you + /// need to allocate buffers of size 1K, 2K and 5K each frame, then this should be 8K. If your + /// data is dynamically-sized then try to make an educated guess or simply leave the default. + /// + /// The default value is `0`. + pub arena_size: DeviceSize, + + /// The buffer usage that all allocated buffers should have. + /// + /// The default value is [`BufferUsage::TRANSFER_SRC`]. + pub buffer_usage: BufferUsage, + + /// The memory usage that all buffers should be allocated with. + /// + /// Must not be [`MemoryUsage::GpuOnly`]. + /// + /// The default value is [`MemoryUsage::Upload`]. + pub memory_usage: MemoryUsage, + + pub _ne: crate::NonExhaustive, +} + +impl Default for CpuBufferAllocatorCreateInfo { + #[inline] + fn default() -> Self { + CpuBufferAllocatorCreateInfo { + arena_size: 0, + buffer_usage: BufferUsage::TRANSFER_SRC, + memory_usage: MemoryUsage::Upload, + _ne: crate::NonExhaustive(()), + } + } +} + +/// A subbuffer allocated using a [`CpuBufferAllocator`]. +#[derive(Debug)] +pub struct CpuSubbuffer { + id: NonZeroU64, + // Offset within the arena. + offset: DeviceSize, + // Size of the subbuffer. + size: DeviceSize, + // We need to keep a reference to the arena so it won't be reset. + arena: Arc, + _marker: PhantomData>, +} + +unsafe impl BufferAccess for CpuSubbuffer +where + T: BufferContents + ?Sized, +{ + fn inner(&self) -> BufferInner<'_> { + BufferInner { + buffer: &self.arena.inner, + offset: self.offset, + } + } + + fn size(&self) -> DeviceSize { + self.size + } +} + +impl BufferAccessObject for Arc> +where + T: BufferContents + ?Sized, +{ + fn as_buffer_access_object(&self) -> Arc { + self.clone() + } +} + +unsafe impl TypedBufferAccess for CpuSubbuffer +where + T: BufferContents + ?Sized, +{ + type Content = T; +} + +unsafe impl DeviceOwned for CpuSubbuffer +where + T: ?Sized, +{ + fn device(&self) -> &Arc { + self.arena.inner.device() + } +} + +crate::impl_id_counter!(CpuSubbuffer); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn reserve() { + let (device, _) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device); + + let buffer_allocator = CpuBufferAllocator::new(memory_allocator, Default::default()); + assert_eq!(buffer_allocator.arena_size(), 0); + + buffer_allocator.reserve(83).unwrap(); + assert_eq!(buffer_allocator.arena_size(), 83); + } + + #[test] + fn capacity_increase() { + let (device, _) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device); + + let buffer_allocator = CpuBufferAllocator::new(memory_allocator, Default::default()); + assert_eq!(buffer_allocator.arena_size(), 0); + + buffer_allocator.from_data(12u32).unwrap(); + assert_eq!(buffer_allocator.arena_size(), 8); + } +} diff --git a/vulkano/src/buffer/cpu_pool.rs b/vulkano/src/buffer/cpu_pool.rs deleted file mode 100644 index 7dda5d1d43..0000000000 --- a/vulkano/src/buffer/cpu_pool.rs +++ /dev/null @@ -1,931 +0,0 @@ -// Copyright (c) 2017 The vulkano developers -// Licensed under the Apache License, Version 2.0 -// or the MIT -// license , -// at your option. All files in the project carrying such -// notice may not be copied, modified, or distributed except -// according to those terms. - -use super::{ - sys::{Buffer, BufferCreateInfo, RawBuffer}, - BufferAccess, BufferAccessObject, BufferContents, BufferError, BufferInner, BufferUsage, - TypedBufferAccess, -}; -use crate::{ - buffer::sys::BufferMemory, - device::{Device, DeviceOwned}, - memory::{ - allocator::{ - AllocationCreateInfo, AllocationCreationError, AllocationType, - MemoryAllocatePreference, MemoryAllocator, MemoryUsage, StandardMemoryAllocator, - }, - DedicatedAllocation, - }, - DeviceSize, VulkanError, -}; -use std::{ - hash::{Hash, Hasher}, - marker::PhantomData, - mem::size_of, - ptr, - sync::{ - atomic::{AtomicU64, Ordering}, - Arc, Mutex, MutexGuard, - }, -}; - -// TODO: Add `CpuBufferPoolSubbuffer::read` to read the content of a subbuffer. -// But that's hard to do because we must prevent `increase_gpu_lock` from working while a -// a buffer is locked. - -/// Ring buffer from which "sub-buffers" can be individually allocated. -/// -/// This buffer is especially suitable when you want to upload or download some data regularly -/// (for example, at each frame for a video game). -/// -/// # Usage -/// -/// A `CpuBufferPool` is similar to a ring buffer. You start by creating an empty pool, then you -/// grab elements from the pool and use them, and if the pool is full it will automatically grow -/// in size. -/// -/// Contrary to a `Vec`, elements automatically free themselves when they are dropped (ie. usually -/// when you call `cleanup_finished()` on a future, or when you drop that future). -/// -/// # Arc-like -/// -/// The `CpuBufferPool` struct internally contains an `Arc`. You can clone the `CpuBufferPool` for -/// a cheap cost, and all the clones will share the same underlying buffer. -/// -/// # Example -/// -/// ``` -/// use vulkano::buffer::CpuBufferPool; -/// use vulkano::command_buffer::AutoCommandBufferBuilder; -/// use vulkano::command_buffer::CommandBufferUsage; -/// use vulkano::command_buffer::PrimaryCommandBufferAbstract; -/// use vulkano::sync::GpuFuture; -/// # let queue: std::sync::Arc = return; -/// # let memory_allocator: std::sync::Arc = return; -/// # let command_buffer_allocator: vulkano::command_buffer::allocator::StandardCommandBufferAllocator = return; -/// -/// // Create the ring buffer. -/// let buffer = CpuBufferPool::upload(memory_allocator); -/// -/// for n in 0 .. 25u32 { -/// // Each loop grabs a new entry from that ring buffer and stores ` data` in it. -/// let data: [f32; 4] = [1.0, 0.5, n as f32 / 24.0, 0.0]; -/// let sub_buffer = buffer.from_data(data).unwrap(); -/// -/// // You can then use `sub_buffer` as if it was an entirely separate buffer. -/// AutoCommandBufferBuilder::primary( -/// &command_buffer_allocator, -/// queue.queue_family_index(), -/// CommandBufferUsage::OneTimeSubmit, -/// ) -/// .unwrap() -/// // For the sake of the example we just call `update_buffer` on the buffer, even though -/// // it is pointless to do that. -/// .update_buffer(&[0.2, 0.3, 0.4, 0.5], sub_buffer.clone(), 0) -/// .unwrap() -/// .build().unwrap() -/// .execute(queue.clone()) -/// .unwrap() -/// .then_signal_fence_and_flush() -/// .unwrap(); -/// } -/// ``` -pub struct CpuBufferPool -where - [T]: BufferContents, - A: MemoryAllocator + ?Sized, -{ - // The memory pool to use for allocations. - allocator: Arc, - - // Current buffer from which elements are grabbed. - current_buffer: Mutex>>, - - // Buffer usage. - buffer_usage: BufferUsage, - - memory_usage: MemoryUsage, - - // Necessary to make it compile. - marker: PhantomData>, -} - -// One buffer of the pool. -#[derive(Debug)] -struct ActualBuffer { - inner: Arc, - - // List of the chunks that are reserved. - chunks_in_use: Mutex>, - - // The index of the chunk that should be available next for the ring buffer. - next_index: AtomicU64, - - // Number of elements in the buffer. - capacity: DeviceSize, -} - -// Access pattern of one subbuffer. -#[derive(Debug)] -struct ActualBufferChunk { - // First element number within the actual buffer. - index: DeviceSize, - - // Number of occupied elements within the actual buffer. - len: DeviceSize, - - // Number of `CpuBufferPoolSubbuffer` objects that point to this subbuffer. - num_cpu_accesses: usize, -} - -/// A subbuffer allocated from a `CpuBufferPool`. -/// -/// When this object is destroyed, the subbuffer is automatically reclaimed by the pool. -pub struct CpuBufferPoolChunk -where - [T]: BufferContents, -{ - buffer: Arc, - - // Index of the subbuffer within `buffer`. In number of elements. - index: DeviceSize, - - // Number of bytes to add to `index * mem::size_of::()` to obtain the start of the data in - // the buffer. Necessary for alignment purposes. - align_offset: DeviceSize, - - // Size of the subbuffer in number of elements, as requested by the user. - // If this is 0, then no entry was added to `chunks_in_use`. - requested_len: DeviceSize, - - // Necessary to make it compile. - marker: PhantomData>, -} - -/// A subbuffer allocated from a `CpuBufferPool`. -/// -/// When this object is destroyed, the subbuffer is automatically reclaimed by the pool. -pub struct CpuBufferPoolSubbuffer -where - [T]: BufferContents, -{ - // This struct is just a wrapper around `CpuBufferPoolChunk`. - chunk: CpuBufferPoolChunk, -} - -impl CpuBufferPool -where - [T]: BufferContents, - A: MemoryAllocator + ?Sized, -{ - /// Builds a `CpuBufferPool`. - /// - /// # Panics - /// - /// - Panics if `T` has zero size. - /// - Panics if `memory_usage` is [`MemoryUsage::GpuOnly`]. - pub fn new( - allocator: Arc, - buffer_usage: BufferUsage, - memory_usage: MemoryUsage, - ) -> CpuBufferPool { - assert!(size_of::() > 0); - assert!(memory_usage != MemoryUsage::GpuOnly); - - CpuBufferPool { - allocator, - current_buffer: Mutex::new(None), - buffer_usage, - memory_usage, - marker: PhantomData, - } - } - - /// Builds a `CpuBufferPool` meant for simple uploads. - /// - /// Shortcut for a pool that can only be used as transfer source and with exclusive queue - /// family accesses. - /// - /// # Panics - /// - /// - Panics if `T` has zero size. - pub fn upload(allocator: Arc) -> CpuBufferPool { - CpuBufferPool::new(allocator, BufferUsage::TRANSFER_SRC, MemoryUsage::Upload) - } - - /// Builds a `CpuBufferPool` meant for simple downloads. - /// - /// Shortcut for a pool that can only be used as transfer destination and with exclusive queue - /// family accesses. - /// - /// # Panics - /// - /// - Panics if `T` has zero size. - pub fn download(allocator: Arc) -> CpuBufferPool { - CpuBufferPool::new(allocator, BufferUsage::TRANSFER_DST, MemoryUsage::Download) - } - - /// Builds a `CpuBufferPool` meant for usage as a uniform buffer. - /// - /// Shortcut for a pool that can only be used as uniform buffer and with exclusive queue - /// family accesses. - /// - /// # Panics - /// - /// - Panics if `T` has zero size. - pub fn uniform_buffer(allocator: Arc) -> CpuBufferPool { - CpuBufferPool::new(allocator, BufferUsage::UNIFORM_BUFFER, MemoryUsage::Upload) - } - - /// Builds a `CpuBufferPool` meant for usage as a vertex buffer. - /// - /// Shortcut for a pool that can only be used as vertex buffer and with exclusive queue - /// family accesses. - /// - /// # Panics - /// - /// - Panics if `T` has zero size. - pub fn vertex_buffer(allocator: Arc) -> CpuBufferPool { - CpuBufferPool::new(allocator, BufferUsage::VERTEX_BUFFER, MemoryUsage::Upload) - } - - /// Builds a `CpuBufferPool` meant for usage as a indirect buffer. - /// - /// Shortcut for a pool that can only be used as indirect buffer and with exclusive queue - /// family accesses. - /// - /// # Panics - /// - /// - Panics if `T` has zero size. - pub fn indirect_buffer(allocator: Arc) -> CpuBufferPool { - CpuBufferPool::new(allocator, BufferUsage::INDIRECT_BUFFER, MemoryUsage::Upload) - } -} - -impl CpuBufferPool -where - [T]: BufferContents, - A: MemoryAllocator + ?Sized, -{ - /// Returns the current capacity of the pool, in number of elements. - pub fn capacity(&self) -> DeviceSize { - match *self.current_buffer.lock().unwrap() { - None => 0, - Some(ref buf) => buf.capacity, - } - } - - /// Makes sure that the capacity is at least `capacity`. Allocates memory if it is not the - /// case. - /// - /// Since this can involve a memory allocation, an `OomError` can happen. - pub fn reserve(&self, capacity: DeviceSize) -> Result<(), AllocationCreationError> { - if capacity == 0 { - return Ok(()); - } - - let mut cur_buf = self.current_buffer.lock().unwrap(); - - // Check current capacity. - match *cur_buf { - Some(ref buf) if buf.capacity >= capacity => { - return Ok(()); - } - _ => (), - }; - - self.reset_buf(&mut cur_buf, capacity) - } - - /// Grants access to a new subbuffer and puts `data` in it. - /// - /// If no subbuffer is available (because they are still in use by the GPU), a new buffer will - /// automatically be allocated. - /// - /// > **Note**: You can think of it like a `Vec`. If you insert an element and the `Vec` is not - /// > large enough, a new chunk of memory is automatically allocated. - pub fn from_data( - &self, - data: T, - ) -> Result>, AllocationCreationError> { - Ok(Arc::new(CpuBufferPoolSubbuffer { - chunk: self.chunk_impl([data].into_iter())?, - })) - } - - /// Grants access to a new subbuffer and puts all elements of `iter` in it. - /// - /// If no subbuffer is available (because they are still in use by the GPU), a new buffer will - /// automatically be allocated. - /// - /// > **Note**: You can think of it like a `Vec`. If you insert elements and the `Vec` is not - /// > large enough, a new chunk of memory is automatically allocated. - /// - /// # Panic - /// - /// Panics if the length of the iterator didn't match the actual number of elements. - pub fn from_iter( - &self, - iter: I, - ) -> Result>, AllocationCreationError> - where - I: IntoIterator, - I::IntoIter: ExactSizeIterator, - { - self.chunk_impl(iter.into_iter()).map(Arc::new) - } - - fn chunk_impl( - &self, - data: impl ExactSizeIterator, - ) -> Result, AllocationCreationError> { - let mut mutex = self.current_buffer.lock().unwrap(); - - let data = match self.try_next_impl(&mut mutex, data) { - Ok(n) => return Ok(n), - Err(d) => d, - }; - - let next_capacity = match *mutex { - Some(ref b) if (data.len() as DeviceSize) < b.capacity => 2 * b.capacity, - _ => 2 * data.len().max(1) as DeviceSize, - }; - - self.reset_buf(&mut mutex, next_capacity)?; - - match self.try_next_impl(&mut mutex, data) { - Ok(n) => Ok(n), - Err(_) => unreachable!(), - } - } - - /// Grants access to a new subbuffer and puts `data` in it. - /// - /// Returns `None` if no subbuffer is available. - /// - /// A `CpuBufferPool` is always empty the first time you use it, so you shouldn't use - /// `try_next` the first time you use it. - pub fn try_next(&self, data: T) -> Option>> { - let mut mutex = self.current_buffer.lock().unwrap(); - self.try_next_impl(&mut mutex, [data]) - .map(|c| Arc::new(CpuBufferPoolSubbuffer { chunk: c })) - .ok() - } - - // Creates a new buffer and sets it as current. The capacity is in number of elements. - // - // `cur_buf_mutex` must be an active lock of `self.current_buffer`. - fn reset_buf( - &self, - cur_buf_mutex: &mut MutexGuard<'_, Option>>, - capacity: DeviceSize, - ) -> Result<(), AllocationCreationError> { - let size = match (size_of::() as DeviceSize).checked_mul(capacity) { - Some(s) => s, - None => { - return Err(AllocationCreationError::VulkanError( - VulkanError::OutOfDeviceMemory, - )) - } - }; - - let raw_buffer = RawBuffer::new( - self.device().clone(), - BufferCreateInfo { - size, - usage: self.buffer_usage, - ..Default::default() - }, - ) - .map_err(|err| match err { - BufferError::AllocError(err) => err, - // We don't use sparse-binding, therefore the other errors can't happen. - _ => unreachable!(), - })?; - let requirements = *raw_buffer.memory_requirements(); - let create_info = AllocationCreateInfo { - requirements, - allocation_type: AllocationType::Linear, - usage: self.memory_usage, - allocate_preference: MemoryAllocatePreference::Unknown, - dedicated_allocation: Some(DedicatedAllocation::Buffer(&raw_buffer)), - ..Default::default() - }; - - match unsafe { self.allocator.allocate_unchecked(create_info) } { - Ok(mut alloc) => { - debug_assert!(alloc.offset() % requirements.alignment == 0); - debug_assert!(alloc.size() == requirements.size); - alloc.shrink(size); - let inner = unsafe { - Arc::new( - raw_buffer - .bind_memory_unchecked(alloc) - .map_err(|(err, _, _)| err)?, - ) - }; - - **cur_buf_mutex = Some(Arc::new(ActualBuffer { - inner, - chunks_in_use: Mutex::new(vec![]), - next_index: AtomicU64::new(0), - capacity, - })); - - Ok(()) - } - Err(err) => Err(err), - } - } - - // Tries to lock a subbuffer from the current buffer. - // - // `cur_buf_mutex` must be an active lock of `self.current_buffer`. - // - // Returns `data` wrapped inside an `Err` if there is no slot available in the current buffer. - // - // # Panic - // - // Panics if the length of the iterator didn't match the actual number of element. - fn try_next_impl( - &self, - cur_buf_mutex: &mut MutexGuard<'_, Option>>, - data: I, - ) -> Result, I::IntoIter> - where - I: IntoIterator, - I::IntoIter: ExactSizeIterator, - { - let mut data = data.into_iter(); - - // Grab the current buffer. Return `Err` if the pool wasn't "initialized" yet. - let current_buffer = match cur_buf_mutex.clone() { - Some(b) => b, - None => return Err(data), - }; - - let mut chunks_in_use = current_buffer.chunks_in_use.lock().unwrap(); - debug_assert!(!chunks_in_use.iter().any(|c| c.len == 0)); - - // Number of elements requested by the user. - let requested_len = data.len() as DeviceSize; - - // We special case when 0 elements are requested. Polluting the list of allocated chunks - // with chunks of length 0 means that we will have troubles deallocating. - if requested_len == 0 { - assert!( - data.next().is_none(), - "Expected iterator passed to CpuBufferPool::chunk to be empty" - ); - return Ok(CpuBufferPoolChunk { - // TODO: remove .clone() once non-lexical borrows land - buffer: current_buffer.clone(), - index: 0, - align_offset: 0, - requested_len: 0, - marker: PhantomData, - }); - } - - // Find a suitable offset and len, or returns if none available. - let (index, occupied_len, align_offset) = { - let (tentative_index, tentative_len, tentative_align_offset) = { - // Since the only place that touches `next_index` is this code, and since we - // own a mutex lock to the buffer, it means that `next_index` can't be accessed - // concurrently. - // TODO: ^ eventually should be put inside the mutex - let idx = current_buffer.next_index.load(Ordering::SeqCst); - - // Find the required alignment in bytes. - let align_uniform = if self.buffer_usage.intersects(BufferUsage::UNIFORM_BUFFER) { - self.device() - .physical_device() - .properties() - .min_uniform_buffer_offset_alignment - } else { - 1 - }; - let align_storage = if self.buffer_usage.intersects(BufferUsage::STORAGE_BUFFER) { - self.device() - .physical_device() - .properties() - .min_storage_buffer_offset_alignment - } else { - 1 - }; - let align_bytes = align_uniform.max(align_storage); - - let tentative_align_offset = (align_bytes - - ((idx * size_of::() as DeviceSize) % align_bytes)) - % align_bytes; - let additional_len = if tentative_align_offset == 0 { - 0 - } else { - 1 + (tentative_align_offset - 1) / size_of::() as DeviceSize - }; - - (idx, requested_len + additional_len, tentative_align_offset) - }; - - // Find out whether any chunk in use overlaps this range. - if tentative_index + tentative_len <= current_buffer.capacity - && !chunks_in_use.iter().any(|c| { - (c.index >= tentative_index && c.index < tentative_index + tentative_len) - || (c.index <= tentative_index && c.index + c.len > tentative_index) - }) - { - (tentative_index, tentative_len, tentative_align_offset) - } else { - // Impossible to allocate at `tentative_index`. Let's try 0 instead. - if requested_len <= current_buffer.capacity - && !chunks_in_use.iter().any(|c| c.index < requested_len) - { - (0, requested_len, 0) - } else { - // Buffer is full. Return. - return Err(data); - } - } - }; - - // Write `data` in the memory. - unsafe { - let range = (index * size_of::() as DeviceSize + align_offset) - ..((index + requested_len) * size_of::() as DeviceSize + align_offset); - - let allocation = match current_buffer.inner.memory() { - BufferMemory::Normal(a) => a, - BufferMemory::Sparse => unreachable!(), - }; - - let bytes = allocation.write(range.clone()).unwrap(); - let mapping = <[T]>::from_bytes_mut(bytes).unwrap(); - - let mut written = 0; - for (o, i) in mapping.iter_mut().zip(data) { - ptr::write(o, i); - written += 1; - } - - allocation.flush_range(range).unwrap(); - - assert_eq!( - written, requested_len, - "Iterator passed to CpuBufferPool::chunk has a mismatch between reported \ - length and actual number of elements" - ); - } - - // Mark the chunk as in use. - current_buffer - .next_index - .store(index + occupied_len, Ordering::SeqCst); - chunks_in_use.push(ActualBufferChunk { - index, - len: occupied_len, - num_cpu_accesses: 1, - }); - - Ok(CpuBufferPoolChunk { - // TODO: remove .clone() once non-lexical borrows land - buffer: current_buffer.clone(), - index, - align_offset, - requested_len, - marker: PhantomData, - }) - } -} - -// Can't automatically derive `Clone`, otherwise the compiler adds a `T: Clone` requirement. -impl Clone for CpuBufferPool -where - [T]: BufferContents, - A: MemoryAllocator + ?Sized, -{ - fn clone(&self) -> Self { - let buf = self.current_buffer.lock().unwrap(); - - CpuBufferPool { - allocator: self.allocator.clone(), - current_buffer: Mutex::new(buf.clone()), - buffer_usage: self.buffer_usage, - memory_usage: self.memory_usage, - marker: PhantomData, - } - } -} - -unsafe impl DeviceOwned for CpuBufferPool -where - [T]: BufferContents, - A: MemoryAllocator + ?Sized, -{ - fn device(&self) -> &Arc { - self.allocator.device() - } -} - -impl Clone for CpuBufferPoolChunk -where - [T]: BufferContents, -{ - fn clone(&self) -> CpuBufferPoolChunk { - let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); - let chunk = chunks_in_use_lock - .iter_mut() - .find(|c| c.index == self.index) - .unwrap(); - - debug_assert!(chunk.num_cpu_accesses >= 1); - chunk.num_cpu_accesses = chunk - .num_cpu_accesses - .checked_add(1) - .expect("Overflow in CPU accesses"); - - CpuBufferPoolChunk { - buffer: self.buffer.clone(), - index: self.index, - align_offset: self.align_offset, - requested_len: self.requested_len, - marker: PhantomData, - } - } -} - -unsafe impl BufferAccess for CpuBufferPoolChunk -where - T: Send + Sync, - [T]: BufferContents, -{ - fn inner(&self) -> BufferInner<'_> { - BufferInner { - buffer: &self.buffer.inner, - offset: self.index * size_of::() as DeviceSize + self.align_offset, - } - } - - fn size(&self) -> DeviceSize { - self.requested_len * size_of::() as DeviceSize - } -} - -impl BufferAccessObject for Arc> -where - T: Send + Sync, - [T]: BufferContents, -{ - fn as_buffer_access_object(&self) -> Arc { - self.clone() - } -} - -impl Drop for CpuBufferPoolChunk -where - [T]: BufferContents, -{ - fn drop(&mut self) { - // If `requested_len` is 0, then no entry was added in the chunks. - if self.requested_len == 0 { - return; - } - - let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); - let chunk_num = chunks_in_use_lock - .iter_mut() - .position(|c| c.index == self.index) - .unwrap(); - - if chunks_in_use_lock[chunk_num].num_cpu_accesses >= 2 { - chunks_in_use_lock[chunk_num].num_cpu_accesses -= 1; - } else { - chunks_in_use_lock.remove(chunk_num); - } - } -} - -unsafe impl TypedBufferAccess for CpuBufferPoolChunk -where - T: Send + Sync, - [T]: BufferContents, -{ - type Content = [T]; -} - -unsafe impl DeviceOwned for CpuBufferPoolChunk -where - [T]: BufferContents, -{ - fn device(&self) -> &Arc { - self.buffer.inner.device() - } -} - -impl PartialEq for CpuBufferPoolChunk -where - T: Send + Sync, - [T]: BufferContents, -{ - fn eq(&self, other: &Self) -> bool { - self.inner() == other.inner() && self.size() == other.size() - } -} - -impl Eq for CpuBufferPoolChunk -where - T: Send + Sync, - [T]: BufferContents, -{ -} - -impl Hash for CpuBufferPoolChunk -where - T: Send + Sync, - [T]: BufferContents, -{ - fn hash(&self, state: &mut H) { - self.inner().hash(state); - self.size().hash(state); - } -} - -impl Clone for CpuBufferPoolSubbuffer -where - [T]: BufferContents, -{ - fn clone(&self) -> CpuBufferPoolSubbuffer { - CpuBufferPoolSubbuffer { - chunk: self.chunk.clone(), - } - } -} - -unsafe impl BufferAccess for CpuBufferPoolSubbuffer -where - T: Send + Sync, - [T]: BufferContents, -{ - fn inner(&self) -> BufferInner<'_> { - self.chunk.inner() - } - - fn size(&self) -> DeviceSize { - self.chunk.size() - } -} - -impl BufferAccessObject for Arc> -where - T: Send + Sync, - [T]: BufferContents, -{ - fn as_buffer_access_object(&self) -> Arc { - self.clone() - } -} - -unsafe impl TypedBufferAccess for CpuBufferPoolSubbuffer -where - T: BufferContents, - [T]: BufferContents, -{ - type Content = T; -} - -unsafe impl DeviceOwned for CpuBufferPoolSubbuffer -where - [T]: BufferContents, -{ - fn device(&self) -> &Arc { - self.chunk.buffer.inner.device() - } -} - -impl PartialEq for CpuBufferPoolSubbuffer -where - T: Send + Sync, - [T]: BufferContents, -{ - fn eq(&self, other: &Self) -> bool { - self.inner() == other.inner() && self.size() == other.size() - } -} - -impl Eq for CpuBufferPoolSubbuffer -where - T: Send + Sync, - [T]: BufferContents, -{ -} - -impl Hash for CpuBufferPoolSubbuffer -where - T: Send + Sync, - [T]: BufferContents, -{ - fn hash(&self, state: &mut H) { - self.inner().hash(state); - self.size().hash(state); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::mem; - - #[test] - fn basic_create() { - let (device, _) = gfx_dev_and_queue!(); - let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device)); - let _ = CpuBufferPool::::upload(memory_allocator); - } - - #[test] - fn reserve() { - let (device, _) = gfx_dev_and_queue!(); - let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device)); - - let pool = CpuBufferPool::::upload(memory_allocator); - assert_eq!(pool.capacity(), 0); - - pool.reserve(83).unwrap(); - assert_eq!(pool.capacity(), 83); - } - - #[test] - fn capacity_increase() { - let (device, _) = gfx_dev_and_queue!(); - let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device)); - - let pool = CpuBufferPool::upload(memory_allocator); - assert_eq!(pool.capacity(), 0); - - pool.from_data(12).unwrap(); - let first_cap = pool.capacity(); - assert!(first_cap >= 1); - - for _ in 0..first_cap + 5 { - mem::forget(pool.from_data(12).unwrap()); - } - - assert!(pool.capacity() > first_cap); - } - - #[test] - fn reuse_subbuffers() { - let (device, _) = gfx_dev_and_queue!(); - let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device)); - - let pool = CpuBufferPool::upload(memory_allocator); - assert_eq!(pool.capacity(), 0); - - let mut capacity = None; - for _ in 0..64 { - pool.from_data(12).unwrap(); - - let new_cap = pool.capacity(); - assert!(new_cap >= 1); - match capacity { - None => capacity = Some(new_cap), - Some(c) => assert_eq!(c, new_cap), - } - } - } - - #[test] - fn chunk_loopback() { - let (device, _) = gfx_dev_and_queue!(); - let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device)); - - let pool = CpuBufferPool::::upload(memory_allocator); - pool.reserve(5).unwrap(); - - let a = pool.from_iter(vec![0, 0]).unwrap(); - let b = pool.from_iter(vec![0, 0]).unwrap(); - assert_eq!(b.index, 2); - drop(a); - - let c = pool.from_iter(vec![0, 0]).unwrap(); - assert_eq!(c.index, 0); - - assert_eq!(pool.capacity(), 5); - } - - #[test] - fn chunk_0_elems_doesnt_pollute() { - let (device, _) = gfx_dev_and_queue!(); - let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device)); - - let pool = CpuBufferPool::::upload(memory_allocator); - - let _ = pool.from_iter(vec![]).unwrap(); - let _ = pool.from_iter(vec![0, 0]).unwrap(); - } -} diff --git a/vulkano/src/buffer/mod.rs b/vulkano/src/buffer/mod.rs index 76fc3600b5..63490141ef 100644 --- a/vulkano/src/buffer/mod.rs +++ b/vulkano/src/buffer/mod.rs @@ -14,11 +14,12 @@ //! between a Vulkan buffer and a regular buffer is that the content of a Vulkan buffer is //! accessible from the GPU. //! -//! Vulkano does not perform any specific marshalling of buffer data. The representation of the buffer in -//! memory is identical between the CPU and GPU. Because the Rust compiler is allowed to reorder struct -//! fields at will by default when using `#[repr(Rust)]`, it is advised to mark each struct requiring -//! imput assembly as `#[repr(C)]`. This forces Rust to follow the standard C procedure. Each element is -//! laid out in memory in the order of declaration and aligned to a multiple of their alignment. +//! Vulkano does not perform any specific marshalling of buffer data. The representation of the +//! buffer in memory is identical between the CPU and GPU. Because the Rust compiler is allowed to +//! reorder struct fields at will by default when using `#[repr(Rust)]`, it is advised to mark each +//! struct requiring imput assembly as `#[repr(C)]`. This forces Rust to follow the standard C +//! procedure. Each element is laid out in memory in the order of declaration and aligned to a +//! multiple of their alignment. //! //! # Various kinds of buffers //! @@ -28,26 +29,24 @@ //! Instead you are encouraged to use one of the high-level wrappers that vulkano provides. Which //! wrapper to use depends on the way you are going to use the buffer: //! -//! - A [`DeviceLocalBuffer`](crate::buffer::device_local::DeviceLocalBuffer) designates a buffer -//! usually located in video memory and whose content can't be directly accessed by your -//! application. Accessing this buffer from the GPU is generally faster compared to accessing a -//! CPU-accessible buffer. -//! - A [`CpuBufferPool`](crate::buffer::cpu_pool::CpuBufferPool) is a ring buffer that can be used to -//! transfer data between the CPU and the GPU at a high rate. -//! - A [`CpuAccessibleBuffer`](crate::buffer::cpu_access::CpuAccessibleBuffer) is a simple buffer that -//! can be used to prototype. +//! - A [`DeviceLocalBuffer`] designates a buffer usually located in video memory and whose content +//! can't be directly accessed by your application. Accessing this buffer from the GPU is +//! generally faster compared to accessing a CPU-accessible buffer. +//! - A [`CpuBufferAllocator`] can be used to transfer data between the CPU and the GPU at a high +//! rate. +//! - A [`CpuAccessibleBuffer`] is a simple buffer that can be used to prototype. //! -//! Here is a quick way to choose which buffer to use. Do you often need to read or write -//! the content of the buffer? If so, use a `CpuBufferPool`. Otherwise, do you need to have access +//! Here is a quick way to choose which buffer to use. Do you often need to read or write the +//! content of the buffer? If so, use a `CpuBufferAllocator`. Otherwise, do you need to have access //! to the buffer on the CPU? Then use `CpuAccessibleBuffer`. Otherwise, use a `DeviceLocalBuffer`. //! -//! Another example: if a buffer is under constant access by the GPU but you need to -//! read its content on the CPU from time to time, it may be a good idea to use a -//! `DeviceLocalBuffer` as the main buffer and a `CpuBufferPool` for when you need to read it. -//! Then whenever you need to read the main buffer, ask the GPU to copy from the device-local -//! buffer to the CPU buffer pool, and read the CPU buffer pool instead. +//! Another example: if a buffer is under constant access by the GPU but you need to read its +//! content on the CPU from time to time, it may be a good idea to use a `DeviceLocalBuffer` as the +//! main buffer and a `CpuAccessibleBuffer` for when you need to read it. Then whenever you need to +//! read the main buffer, ask the GPU to copy from the device-local buffer to the CPU-accessible +//! buffer, and read the CPU-accessible buffer instead. //! -//! # Buffers usage +//! # Buffer usage //! //! When you create a buffer object, you have to specify its *usage*. In other words, you have to //! specify the way it is going to be used. Trying to use a buffer in a way that wasn't specified @@ -64,18 +63,18 @@ //! //! - As a uniform buffer. Uniform buffers are read-only. //! - As a storage buffer. Storage buffers can be read and written. -//! - As a uniform texel buffer. Contrary to a uniform buffer, the data is interpreted by the -//! GPU and can be for example normalized. +//! - As a uniform texel buffer. Contrary to a uniform buffer, the data is interpreted by the GPU +//! and can be for example normalized. //! - As a storage texel buffer. Additionally, some data formats can be modified with atomic //! operations. //! //! Using uniform/storage texel buffers requires creating a *buffer view*. See the `view` module //! for how to create a buffer view. //! +//! [`CpuBufferAllocator`]: allocator::CpuBufferAllocator pub use self::{ cpu_access::CpuAccessibleBuffer, - cpu_pool::CpuBufferPool, device_local::DeviceLocalBuffer, slice::BufferSlice, sys::BufferError, @@ -95,8 +94,8 @@ use bytemuck::{ }; use std::mem::size_of; +pub mod allocator; pub mod cpu_access; -pub mod cpu_pool; pub mod device_local; pub mod sys; pub mod view; @@ -164,7 +163,7 @@ pub unsafe trait BufferContents: Send + Sync + 'static { /// Converts an immutable reference to `Self` to an immutable byte slice. fn as_bytes(&self) -> &[u8]; - /// Converts a mutable reference to `Self` to an mutable byte slice. + /// Converts a mutable reference to `Self` to a mutable byte slice. fn as_bytes_mut(&mut self) -> &mut [u8]; /// Converts an immutable byte slice into an immutable reference to `Self`. diff --git a/vulkano/src/memory/allocator/mod.rs b/vulkano/src/memory/allocator/mod.rs index f060dd3bea..21c32e2d51 100644 --- a/vulkano/src/memory/allocator/mod.rs +++ b/vulkano/src/memory/allocator/mod.rs @@ -1624,6 +1624,16 @@ impl From for GenericMemoryAllocatorCreationError { } } +pub(crate) fn align_up(val: DeviceSize, alignment: DeviceSize) -> DeviceSize { + align_down(val + alignment - 1, alignment) +} + +pub(crate) fn align_down(val: DeviceSize, alignment: DeviceSize) -> DeviceSize { + debug_assert!(alignment.is_power_of_two()); + + val & !(alignment - 1) +} + mod array_vec { use std::ops::{Deref, DerefMut}; diff --git a/vulkano/src/memory/allocator/suballocator.rs b/vulkano/src/memory/allocator/suballocator.rs index f9d0eba77e..9daab0e7d6 100644 --- a/vulkano/src/memory/allocator/suballocator.rs +++ b/vulkano/src/memory/allocator/suballocator.rs @@ -14,7 +14,9 @@ //! [the parent module]: super use self::host::SlotId; -use super::{array_vec::ArrayVec, AllocationCreateInfo, AllocationCreationError}; +use super::{ + align_down, align_up, array_vec::ArrayVec, AllocationCreateInfo, AllocationCreationError, +}; use crate::{ device::{Device, DeviceOwned}, image::ImageTiling, @@ -205,6 +207,10 @@ impl MemoryAlloc { }) } + pub(crate) fn atom_size(&self) -> Option { + self.atom_size + } + /// Invalidates the host (CPU) cache for a range of the allocation. /// /// You must call this method before the memory is read by the host, if the device previously @@ -239,8 +245,7 @@ impl MemoryAlloc { .result() .map_err(VulkanError::from)?; } else { - // FIXME: - // self.debug_validate_memory_range(&range); + self.debug_validate_memory_range(&range); } Ok(()) @@ -280,8 +285,7 @@ impl MemoryAlloc { .result() .map_err(VulkanError::from)?; } else { - // FIXME: - // self.debug_validate_memory_range(&range); + self.debug_validate_memory_range(&range); } Ok(()) @@ -330,18 +334,22 @@ impl MemoryAlloc { /// This exists because even if no cache control is required, the parameters should still be /// valid, otherwise you might have bugs in your code forever just because your memory happens /// to be host-coherent. - #[allow(dead_code)] fn debug_validate_memory_range(&self, range: &Range) { debug_assert!(!range.is_empty() && range.end <= self.size); - debug_assert!({ - let atom_size = self - .device() - .physical_device() - .properties() - .non_coherent_atom_size; - - range.start % atom_size == 0 && (range.end % atom_size == 0 || range.end == self.size) - }); + debug_assert!( + { + let atom_size = self + .device() + .physical_device() + .properties() + .non_coherent_atom_size; + + range.start % atom_size == 0 + && (range.end % atom_size == 0 || range.end == self.size) + }, + "attempted to invalidate or flush a memory range that is not aligned to the \ + non-coherent atom size", + ); } /// Returns the underlying block of [`DeviceMemory`]. @@ -925,17 +933,17 @@ impl Display for SuballocationCreationError { /// }); /// ``` /// -/// For use in allocating buffers for [`CpuBufferPool`]: +/// For use in allocating arenas for [`CpuBufferAllocator`]: /// /// ``` /// use std::sync::Arc; -/// use vulkano::buffer::CpuBufferPool; +/// use vulkano::buffer::allocator::CpuBufferAllocator; /// use vulkano::memory::allocator::StandardMemoryAllocator; /// # let device: std::sync::Arc = return; /// /// // We need to wrap the allocator in an `Arc` so that we can share ownership of it. /// let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); -/// let buffer_pool = CpuBufferPool::::upload(memory_allocator.clone()); +/// let buffer_allocator = CpuBufferAllocator::new(memory_allocator.clone(), Default::default()); /// /// // You can continue using `memory_allocator` for other things. /// ``` @@ -978,7 +986,7 @@ impl Display for SuballocationCreationError { /// [alignment requirements]: super#alignment /// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator /// [`StandardMemoryAllocator`]: super::StandardMemoryAllocator -/// [`CpuBufferPool`]: crate::buffer::CpuBufferPool +/// [`CpuBufferAllocator`]: crate::buffer::allocator::CpuBufferAllocator #[derive(Debug)] pub struct FreeListAllocator { region: MemoryAlloc, @@ -2430,16 +2438,6 @@ impl Display for BumpAllocatorResetError { } } -fn align_up(val: DeviceSize, alignment: DeviceSize) -> DeviceSize { - align_down(val + alignment - 1, alignment) -} - -fn align_down(val: DeviceSize, alignment: DeviceSize) -> DeviceSize { - debug_assert!(alignment.is_power_of_two()); - - val & !(alignment - 1) -} - /// Checks if resouces A and B share a page. /// /// > **Note**: Assumes `a_offset + a_size > 0` and `a_offset + a_size <= b_offset`.