Skip to content

Commit 4291dad

Browse files
AlenkaFzanmato1984
andauthored
GH-45744: [C++] Remove deprecated GetNextSegment (#45745)
### Rationale for this change `GetNextSegment` has been deprecated in 18.0.0 and can now be removed. ### What changes are included in this PR? `GetNextSegment` and related code is removed from compute/row/grouper.cc/.h. ### Are these changes tested? The existing tests should pass. ### Are there any user-facing changes? `GetNextSegment` is removed in favour of `GetSegment` * GitHub Issue: #45744 Lead-authored-by: AlenkaF <frim.alenka@gmail.com> Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com> Co-authored-by: Rossi Sun <zanmato1984@gmail.com> Signed-off-by: Rossi Sun <zanmato1984@gmail.com>
1 parent 2316ce8 commit 4291dad

File tree

2 files changed

+5
-145
lines changed

2 files changed

+5
-145
lines changed

cpp/src/arrow/compute/row/grouper.cc

+5-140
Original file line numberDiff line numberDiff line change
@@ -55,18 +55,14 @@ using group_id_t = std::remove_const<decltype(kNoGroupId)>::type;
5555
using GroupIdType = CTypeTraits<group_id_t>::ArrowType;
5656
auto g_group_id_type = std::make_shared<GroupIdType>();
5757

58-
template <typename Value>
59-
ARROW_DEPRECATED("Deprecated in 18.0.0 along with GetSegments.")
60-
Status CheckForGetNextSegment(const std::vector<Value>& values, int64_t length,
61-
int64_t offset, const std::vector<TypeHolder>& key_types) {
62-
DCHECK_GE(offset, 0);
63-
DCHECK_LE(offset, length);
64-
if (values.size() != key_types.size()) {
58+
Status CheckForGetSegments(const ExecSpan& batch,
59+
const std::vector<TypeHolder>& key_types) {
60+
if (batch.values.size() != key_types.size()) {
6561
return Status::Invalid("expected batch size ", key_types.size(), " but got ",
66-
values.size());
62+
batch.values.size());
6763
}
6864
for (size_t i = 0; i < key_types.size(); i++) {
69-
const auto& value = values[i];
65+
const auto& value = batch.values[i];
7066
const auto& key_type = key_types[i];
7167
if (*value.type() != *key_type.type) {
7268
return Status::Invalid("expected batch value ", i, " of type ", *key_type.type,
@@ -76,25 +72,6 @@ Status CheckForGetNextSegment(const std::vector<Value>& values, int64_t length,
7672
return Status::OK();
7773
}
7874

79-
template <typename Batch>
80-
ARROW_DEPRECATED("Deprecated in 18.0.0 along with GetSegments.")
81-
enable_if_t<std::is_same<Batch, ExecSpan>::value || std::is_same<Batch, ExecBatch>::value,
82-
Status> CheckForGetNextSegment(const Batch& batch, int64_t offset,
83-
const std::vector<TypeHolder>& key_types) {
84-
ARROW_SUPPRESS_DEPRECATION_WARNING
85-
return CheckForGetNextSegment(batch.values, batch.length, offset, key_types);
86-
ARROW_UNSUPPRESS_DEPRECATION_WARNING
87-
}
88-
89-
Status CheckForGetSegments(const ExecSpan& batch,
90-
const std::vector<TypeHolder>& key_types) {
91-
// TODO: Move the implementation of CheckForGetNextSegment here once we remove the
92-
// deprecated functions.
93-
ARROW_SUPPRESS_DEPRECATION_WARNING
94-
return CheckForGetNextSegment(batch, 0, key_types);
95-
ARROW_UNSUPPRESS_DEPRECATION_WARNING
96-
}
97-
9875
struct BaseRowSegmenter : public RowSegmenter {
9976
explicit BaseRowSegmenter(const std::vector<TypeHolder>& key_types)
10077
: key_types_(key_types) {}
@@ -110,7 +87,6 @@ Segment MakeSegment(int64_t batch_length, int64_t offset, int64_t length, bool e
11087

11188
using ExtendFunc = std::function<bool(const void*)>;
11289
constexpr bool kDefaultExtends = true; // by default, the first segment extends
113-
constexpr bool kEmptyExtends = true; // an empty segment extends too
11490

11591
struct NoKeysSegmenter : public BaseRowSegmenter {
11692
static std::unique_ptr<RowSegmenter> Make() {
@@ -121,14 +97,6 @@ struct NoKeysSegmenter : public BaseRowSegmenter {
12197

12298
Status Reset() override { return Status::OK(); }
12399

124-
ARROW_DEPRECATED("Deprecated in 18.0.0. Use GetSegments instead.")
125-
Result<Segment> GetNextSegment(const ExecSpan& batch, int64_t offset) override {
126-
ARROW_SUPPRESS_DEPRECATION_WARNING
127-
ARROW_RETURN_NOT_OK(CheckForGetNextSegment(batch, offset, {}));
128-
return MakeSegment(batch.length, offset, batch.length - offset, kDefaultExtends);
129-
ARROW_UNSUPPRESS_DEPRECATION_WARNING
130-
}
131-
132100
Result<std::vector<Segment>> GetSegments(const ExecSpan& batch) override {
133101
RETURN_NOT_OK(CheckForGetSegments(batch, {}));
134102

@@ -156,64 +124,6 @@ struct SimpleKeySegmenter : public BaseRowSegmenter {
156124
return Status::OK();
157125
}
158126

159-
// Checks whether the given grouping data extends the current segment, i.e., is equal to
160-
// previously seen grouping data, which is updated with each invocation.
161-
ARROW_DEPRECATED("Deprecated in 18.0.0 along with GetSegments.")
162-
bool ExtendDeprecated(const void* data) {
163-
bool extends = !extend_was_called_
164-
? kDefaultExtends
165-
: 0 == memcmp(save_key_data_.data(), data, save_key_data_.size());
166-
extend_was_called_ = true;
167-
memcpy(save_key_data_.data(), data, save_key_data_.size());
168-
return extends;
169-
}
170-
171-
ARROW_DEPRECATED("Deprecated in 18.0.0 along with GetSegments.")
172-
Result<Segment> GetNextSegmentDeprecated(const Scalar& scalar, int64_t offset,
173-
int64_t length) {
174-
ARROW_SUPPRESS_DEPRECATION_WARNING
175-
DCHECK(is_fixed_width(*scalar.type));
176-
DCHECK(scalar.is_valid);
177-
auto data = checked_cast<const PrimitiveScalarBase&>(scalar).data();
178-
bool extends = length > 0 ? ExtendDeprecated(data) : kEmptyExtends;
179-
return MakeSegment(length, offset, length, extends);
180-
ARROW_UNSUPPRESS_DEPRECATION_WARNING
181-
}
182-
183-
ARROW_DEPRECATED("Deprecated in 18.0.0 along with GetSegments.")
184-
Result<Segment> GetNextSegmentDeprecated(const DataType& array_type,
185-
const uint8_t* array_bytes, int64_t offset,
186-
int64_t length) {
187-
ARROW_SUPPRESS_DEPRECATION_WARNING
188-
DCHECK(is_fixed_width(array_type));
189-
DCHECK_LE(offset, length);
190-
int64_t byte_width = array_type.byte_width();
191-
int64_t match_length = GetMatchLength(array_bytes + offset * byte_width, byte_width,
192-
array_bytes, offset, length);
193-
bool extends =
194-
length > 0 ? ExtendDeprecated(array_bytes + offset * byte_width) : kEmptyExtends;
195-
return MakeSegment(length, offset, match_length, extends);
196-
ARROW_UNSUPPRESS_DEPRECATION_WARNING
197-
}
198-
199-
Result<Segment> GetNextSegment(const ExecSpan& batch, int64_t offset) override {
200-
ARROW_SUPPRESS_DEPRECATION_WARNING
201-
ARROW_RETURN_NOT_OK(CheckForGetNextSegment(batch, offset, {key_type_}));
202-
if (offset == batch.length) {
203-
return MakeSegment(batch.length, offset, 0, kEmptyExtends);
204-
}
205-
const auto& value = batch.values[0];
206-
if (value.is_scalar()) {
207-
return GetNextSegmentDeprecated(*value.scalar, offset, batch.length);
208-
}
209-
ARROW_DCHECK(value.is_array());
210-
const auto& array = value.array;
211-
DCHECK_EQ(array.GetNullCount(), 0);
212-
return GetNextSegmentDeprecated(*array.type, GetValuesAsBytes(array), offset,
213-
batch.length);
214-
ARROW_UNSUPPRESS_DEPRECATION_WARNING
215-
}
216-
217127
Result<std::vector<Segment>> GetSegments(const ExecSpan& batch) override {
218128
RETURN_NOT_OK(CheckForGetSegments(batch, {key_type_}));
219129

@@ -314,51 +224,6 @@ struct AnyKeysSegmenter : public BaseRowSegmenter {
314224
return Status::OK();
315225
}
316226

317-
ARROW_DEPRECATED("Deprecated in 18.0.0 along with GetSegments.")
318-
bool Extend(const void* data) {
319-
auto group_id = *static_cast<const group_id_t*>(data);
320-
bool extends =
321-
save_group_id_ == kNoGroupId ? kDefaultExtends : save_group_id_ == group_id;
322-
save_group_id_ = group_id;
323-
return extends;
324-
}
325-
326-
ARROW_DEPRECATED("Deprecated in 18.0.0. Use GetSegments instead.")
327-
Result<Segment> GetNextSegment(const ExecSpan& batch, int64_t offset) override {
328-
ARROW_SUPPRESS_DEPRECATION_WARNING
329-
ARROW_RETURN_NOT_OK(CheckForGetNextSegment(batch, offset, key_types_));
330-
if (offset == batch.length) {
331-
return MakeSegment(batch.length, offset, 0, kEmptyExtends);
332-
}
333-
// the group id must be computed prior to resetting the grouper, since it is compared
334-
// to save_group_id_, and after resetting the grouper produces incomparable group ids
335-
ARROW_ASSIGN_OR_RAISE(auto group_id, MapGroupIdAt(batch, offset));
336-
ExtendFunc bound_extend = [this, group_id](const void* data) {
337-
bool extends = Extend(&group_id);
338-
save_group_id_ = *static_cast<const group_id_t*>(data);
339-
return extends;
340-
};
341-
// resetting drops grouper's group-ids, freeing-up memory for the next segment
342-
ARROW_RETURN_NOT_OK(grouper_->Reset());
343-
344-
ARROW_ASSIGN_OR_RAISE(auto datum, grouper_->Consume(batch, offset));
345-
DCHECK(datum.is_array());
346-
// `data` is an array whose index-0 corresponds to index `offset` of `batch`
347-
const std::shared_ptr<ArrayData>& data = datum.array();
348-
DCHECK_EQ(data->length, batch.length - offset);
349-
DCHECK_EQ(data->GetNullCount(), 0);
350-
DCHECK_EQ(data->type->id(), GroupIdType::type_id);
351-
const group_id_t* values = data->GetValues<group_id_t>(1);
352-
int64_t cursor;
353-
for (cursor = 1; cursor < data->length; cursor++) {
354-
if (values[0] != values[cursor]) break;
355-
}
356-
int64_t length = cursor;
357-
bool extends = length > 0 ? bound_extend(values) : kEmptyExtends;
358-
return MakeSegment(batch.length, offset, length, extends);
359-
ARROW_UNSUPPRESS_DEPRECATION_WARNING
360-
}
361-
362227
Result<std::vector<Segment>> GetSegments(const ExecSpan& batch) override {
363228
RETURN_NOT_OK(CheckForGetSegments(batch, {key_types_}));
364229

cpp/src/arrow/compute/row/grouper.h

-5
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,6 @@ class ARROW_EXPORT RowSegmenter {
9696
/// independently, then `Reset` should be invoked before processing the next batch.
9797
virtual Status Reset() = 0;
9898

99-
/// \brief Get the next segment for the given batch starting from the given offset
100-
/// DEPRECATED: Due to its inefficiency, use GetSegments instead.
101-
ARROW_DEPRECATED("Deprecated in 18.0.0. Use GetSegments instead.")
102-
virtual Result<Segment> GetNextSegment(const ExecSpan& batch, int64_t offset) = 0;
103-
10499
/// \brief Get all segments for the given batch
105100
virtual Result<std::vector<Segment>> GetSegments(const ExecSpan& batch) = 0;
106101
};

0 commit comments

Comments
 (0)