Skip to content

Commit dc76ea2

Browse files
committed
fix(13796): handle overflows with proper max capacity number which is valid for MutableBuffer
1 parent 4269766 commit dc76ea2

File tree

1 file changed

+65
-5
lines changed

1 file changed

+65
-5
lines changed

datafusion/functions/src/strings.rs

+65-5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
use std::mem::size_of;
1919

20+
use arrow::alloc::ALIGNMENT;
2021
use arrow::array::{
2122
make_view, Array, ArrayAccessor, ArrayDataBuilder, ArrayIter, ByteView,
2223
GenericStringArray, LargeStringArray, OffsetSizeTrait, StringArray, StringViewArray,
@@ -124,8 +125,18 @@ pub struct StringArrayBuilder {
124125

125126
impl StringArrayBuilder {
126127
pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
127-
let mut offsets_buffer =
128-
MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i32>());
128+
// MutableBuffer::with_capacity will round up to multiples of `ALIGNMENT`
129+
let capacity = std::cmp::max(
130+
item_capacity.saturating_add(1).saturating_mul(ALIGNMENT),
131+
data_capacity,
132+
);
133+
// rust core's Layout::from_size_align is capped at isize::Max
134+
let capacity = std::cmp::min(
135+
capacity,
136+
isize::MAX as usize - (isize::MAX as usize % ALIGNMENT),
137+
);
138+
139+
let mut offsets_buffer = MutableBuffer::with_capacity(capacity);
129140
// SAFETY: the first offset value is definitely not going to exceed the bounds.
130141
unsafe { offsets_buffer.push_unchecked(0_i32) };
131142
Self {
@@ -289,8 +300,18 @@ pub struct LargeStringArrayBuilder {
289300

290301
impl LargeStringArrayBuilder {
291302
pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
292-
let mut offsets_buffer =
293-
MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i64>());
303+
// MutableBuffer::with_capacity will round up to multiples of `ALIGNMENT`
304+
let capacity = std::cmp::max(
305+
item_capacity.saturating_add(1).saturating_mul(ALIGNMENT),
306+
data_capacity,
307+
);
308+
// rust core's Layout::from_size_align is capped at isize::Max
309+
let capacity = std::cmp::min(
310+
capacity,
311+
isize::MAX as usize - (isize::MAX as usize % ALIGNMENT),
312+
);
313+
314+
let mut offsets_buffer = MutableBuffer::with_capacity(capacity);
294315
// SAFETY: the first offset value is definitely not going to exceed the bounds.
295316
unsafe { offsets_buffer.push_unchecked(0_i64) };
296317
Self {
@@ -455,11 +476,50 @@ impl ColumnarValueRef<'_> {
455476

456477
#[cfg(test)]
457478
mod tests {
479+
use std::alloc::{GlobalAlloc, Layout, System};
480+
458481
use super::*;
459482

483+
#[derive(Default, Copy, Clone)]
484+
pub struct MockAllocator;
485+
486+
unsafe impl GlobalAlloc for MockAllocator {
487+
/// only need to check it's valid
488+
#[inline]
489+
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
490+
if layout.size() >= isize::MAX as usize - (isize::MAX as usize % ALIGNMENT) {
491+
// system allocator will abort in such a way that is not detectable in tests
492+
// instead, throw a panic we can detect
493+
panic!("successfully tried to allocate for max size possible");
494+
} else {
495+
System.alloc(layout)
496+
}
497+
}
498+
499+
#[inline]
500+
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
501+
System.dealloc(ptr, layout)
502+
}
503+
}
504+
505+
#[global_allocator]
506+
static ALLOCATOR: MockAllocator = MockAllocator;
507+
460508
#[test]
461-
fn test_overflow_string_array_builders() {
509+
#[should_panic(expected = "successfully tried to allocate for max size possible")]
510+
fn test_overflow_string_array_builder() {
511+
// should successfully handle overflow
512+
// while providing a number that passes upstream checks
513+
// and will then panic (by trying to allocate too much)
462514
let _builder = StringArrayBuilder::with_capacity(usize::MAX, usize::MAX);
515+
}
516+
517+
#[test]
518+
#[should_panic(expected = "successfully tried to allocate for max size possible")]
519+
fn test_overflow_large_string_array_builder() {
520+
// should successfully handle overflow
521+
// while providing a number that passes upstream checks
522+
// and will then panic (by trying to allocate too much)
463523
let _builder = LargeStringArrayBuilder::with_capacity(usize::MAX, usize::MAX);
464524
}
465525
}

0 commit comments

Comments
 (0)