|
17 | 17 |
|
18 | 18 | use std::mem::size_of;
|
19 | 19 |
|
| 20 | +use arrow::alloc::ALIGNMENT; |
20 | 21 | use arrow::array::{
|
21 | 22 | make_view, Array, ArrayAccessor, ArrayDataBuilder, ArrayIter, ByteView,
|
22 | 23 | GenericStringArray, LargeStringArray, OffsetSizeTrait, StringArray, StringViewArray,
|
@@ -124,8 +125,18 @@ pub struct StringArrayBuilder {
|
124 | 125 |
|
125 | 126 | impl StringArrayBuilder {
|
126 | 127 | pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
|
127 |
| - let mut offsets_buffer = |
128 |
| - MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i32>()); |
| 128 | + // MutableBuffer::with_capacity will round up to multiples of `ALIGNMENT` |
| 129 | + let capacity = std::cmp::max( |
| 130 | + item_capacity.saturating_add(1).saturating_mul(ALIGNMENT), |
| 131 | + data_capacity, |
| 132 | + ); |
| 133 | + // rust core's Layout::from_size_align is capped at isize::Max |
| 134 | + let capacity = std::cmp::min( |
| 135 | + capacity, |
| 136 | + isize::MAX as usize - (isize::MAX as usize % ALIGNMENT), |
| 137 | + ); |
| 138 | + |
| 139 | + let mut offsets_buffer = MutableBuffer::with_capacity(capacity); |
129 | 140 | // SAFETY: the first offset value is definitely not going to exceed the bounds.
|
130 | 141 | unsafe { offsets_buffer.push_unchecked(0_i32) };
|
131 | 142 | Self {
|
@@ -289,8 +300,18 @@ pub struct LargeStringArrayBuilder {
|
289 | 300 |
|
290 | 301 | impl LargeStringArrayBuilder {
|
291 | 302 | pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
|
292 |
| - let mut offsets_buffer = |
293 |
| - MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i64>()); |
| 303 | + // MutableBuffer::with_capacity will round up to multiples of `ALIGNMENT` |
| 304 | + let capacity = std::cmp::max( |
| 305 | + item_capacity.saturating_add(1).saturating_mul(ALIGNMENT), |
| 306 | + data_capacity, |
| 307 | + ); |
| 308 | + // rust core's Layout::from_size_align is capped at isize::Max |
| 309 | + let capacity = std::cmp::min( |
| 310 | + capacity, |
| 311 | + isize::MAX as usize - (isize::MAX as usize % ALIGNMENT), |
| 312 | + ); |
| 313 | + |
| 314 | + let mut offsets_buffer = MutableBuffer::with_capacity(capacity); |
294 | 315 | // SAFETY: the first offset value is definitely not going to exceed the bounds.
|
295 | 316 | unsafe { offsets_buffer.push_unchecked(0_i64) };
|
296 | 317 | Self {
|
@@ -455,11 +476,50 @@ impl ColumnarValueRef<'_> {
|
455 | 476 |
|
456 | 477 | #[cfg(test)]
|
457 | 478 | mod tests {
|
| 479 | + use std::alloc::{GlobalAlloc, Layout, System}; |
| 480 | + |
458 | 481 | use super::*;
|
459 | 482 |
|
| 483 | + #[derive(Default, Copy, Clone)] |
| 484 | + pub struct MockAllocator; |
| 485 | + |
| 486 | + unsafe impl GlobalAlloc for MockAllocator { |
| 487 | + /// only need to check it's valid |
| 488 | + #[inline] |
| 489 | + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { |
| 490 | + if layout.size() >= isize::MAX as usize - (isize::MAX as usize % ALIGNMENT) { |
| 491 | + // system allocator will abort in such a way that is not detectable in tests |
| 492 | + // instead, throw a panic we can detect |
| 493 | + panic!("successfully tried to allocate for max size possible"); |
| 494 | + } else { |
| 495 | + System.alloc(layout) |
| 496 | + } |
| 497 | + } |
| 498 | + |
| 499 | + #[inline] |
| 500 | + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { |
| 501 | + System.dealloc(ptr, layout) |
| 502 | + } |
| 503 | + } |
| 504 | + |
| 505 | + #[global_allocator] |
| 506 | + static ALLOCATOR: MockAllocator = MockAllocator; |
| 507 | + |
460 | 508 | #[test]
|
461 |
| - fn test_overflow_string_array_builders() { |
| 509 | + #[should_panic(expected = "successfully tried to allocate for max size possible")] |
| 510 | + fn test_overflow_string_array_builder() { |
| 511 | + // should successfully handle overflow |
| 512 | + // while providing a number that passes upstream checks |
| 513 | + // and will then panic (by trying to allocate too much) |
462 | 514 | let _builder = StringArrayBuilder::with_capacity(usize::MAX, usize::MAX);
|
| 515 | + } |
| 516 | + |
| 517 | + #[test] |
| 518 | + #[should_panic(expected = "successfully tried to allocate for max size possible")] |
| 519 | + fn test_overflow_large_string_array_builder() { |
| 520 | + // should successfully handle overflow |
| 521 | + // while providing a number that passes upstream checks |
| 522 | + // and will then panic (by trying to allocate too much) |
463 | 523 | let _builder = LargeStringArrayBuilder::with_capacity(usize::MAX, usize::MAX);
|
464 | 524 | }
|
465 | 525 | }
|
0 commit comments