Skip to content

GH-45664: [C++] Allow LargeString,LargeBinary,FixedSizeBinary,StringView and BinaryView for RecordBatch::MakeStatisticsArray() #46031

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
2 changes: 2 additions & 0 deletions cpp/src/arrow/array/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ struct ARROW_EXPORT ArrayStatistics {
case Type::FIXED_SIZE_BINARY:
case Type::LARGE_STRING:
case Type::LARGE_BINARY:
case Type::BINARY_VIEW:
case Type::STRING_VIEW:
return array_type;
default:
return utf8();
Expand Down
23 changes: 20 additions & 3 deletions cpp/src/arrow/record_batch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@
#include "arrow/array/builder_nested.h"
#include "arrow/array/builder_union.h"
#include "arrow/array/concatenate.h"
#include "arrow/array/statistics.h"
#include "arrow/array/validate.h"
#include "arrow/c/abi.h"
#include "arrow/pretty_print.h"
#include "arrow/status.h"
#include "arrow/table.h"
#include "arrow/tensor.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/iterator.h"
#include "arrow/util/logging_internal.h"
#include "arrow/util/vector.h"
Expand Down Expand Up @@ -556,6 +558,21 @@ Status EnumerateStatistics(const RecordBatch& record_batch, OnStatistics on_stat
}
return Status::OK();
}
struct StringBuilderVisitor {
template <typename DataType>
enable_if_has_string_view<DataType, Status> Visit(const DataType&,
ArrayBuilder* raw_builder,
const std::string& value) {
using Builder = typename TypeTraits<DataType>::BuilderType;
auto builder = static_cast<Builder*>(raw_builder);
return builder->Append(value);
}

Status Visit(const DataType& type, ArrayBuilder*, const std::string&) {
return Status::Invalid("Only string types are supported and the current type is ",
type.ToString());
}
};
} // namespace

Result<std::shared_ptr<Array>> RecordBatch::MakeStatisticsArray(
Expand All @@ -580,7 +597,7 @@ Result<std::shared_ptr<Array>> RecordBatch::MakeStatisticsArray(
RETURN_NOT_OK(EnumerateStatistics(*this, [&](const EnumeratedStatistics& statistics) {
int8_t i = 0;
for (const auto& field : values_types) {
if (field->type()->id() == statistics.type->id()) {
if (field->type()->Equals(statistics.type)) {
break;
}
i++;
Expand Down Expand Up @@ -680,8 +697,8 @@ Result<std::shared_ptr<Array>> RecordBatch::MakeStatisticsArray(
return static_cast<DoubleBuilder*>(builder)->Append(value);
}
Status operator()(const std::string& value) {
return static_cast<StringBuilder*>(builder)->Append(
value.data(), static_cast<int32_t>(value.size()));
StringBuilderVisitor visitor;
return VisitTypeInline(*builder->type(), &visitor, builder, value);
}
} visitor;
visitor.builder = values_builders[values_type_index].get();
Expand Down
Loading
Loading