Skip to content

Commit

Permalink
Enforce utf-8 validity in string() conversion from bytes.
Browse files Browse the repository at this point in the history
Relies on SIMD-optimized library https://github.com/cyb70289/utf8.

PiperOrigin-RevId: 353736473
  • Loading branch information
kyessenov committed Jan 29, 2021
1 parent bca699e commit e53c05c
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 11 deletions.
2 changes: 0 additions & 2 deletions conformance/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,6 @@ cc_binary(
# uncommented when the spec changes to truncation rather than rounding.
"--skip_test=conversions/int/double_nearest,double_nearest_neg,double_half_away_neg,double_half_away_pos",
"--skip_test=conversions/uint/double_nearest,double_nearest_int,double_half_away",
# TODO(issues/82): Unexpected behavior when converting invalid bytes to string.
"--skip_test=conversions/string/bytes_invalid",
# TODO(issues/96): Well-known type conversion support.
"--skip_test=proto2/literal_wellknown",
"--skip_test=proto3/literal_wellknown",
Expand Down
1 change: 1 addition & 0 deletions eval/public/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ cc_library(
":cel_function_adapter",
":cel_function_registry",
":cel_options",
"//base:unilib",
"//eval/public/containers:container_backed_list_impl",
"@com_google_absl//absl/numeric:int128",
"@com_google_absl//absl/status",
Expand Down
18 changes: 9 additions & 9 deletions eval/public/builtin_func_registrar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "eval/public/cel_options.h"
#include "eval/public/containers/container_backed_list_impl.h"
#include "re2/re2.h"
#include "base/unilib.h"

namespace google {
namespace api {
Expand Down Expand Up @@ -1145,16 +1146,15 @@ absl::Status RegisterStringConversionFunctions(
return absl::OkStatus();
}

// TODO(issues/82): ensure the bytes conversion to string handles UTF-8
// properly, and avoids unncessary allocations.
// bytes -> string
auto status = FunctionAdapter<CelValue::StringHolder, CelValue::BytesHolder>::
CreateAndRegister(
auto status =
FunctionAdapter<CelValue, CelValue::BytesHolder>::CreateAndRegister(
builtin::kString, false,
[](Arena* arena,
CelValue::BytesHolder value) -> CelValue::StringHolder {
return CelValue::StringHolder(
Arena::Create<std::string>(arena, std::string(value.value())));
[](Arena* arena, CelValue::BytesHolder value) -> CelValue {
if (UniLib::IsStructurallyValid(value.value())) {
return CelValue::CreateStringView(value.value());
}
return CreateErrorValue(arena, "invalid UTF-8 bytes value",
absl::StatusCode::kInvalidArgument);
},
registry);
if (!status.ok()) return status;
Expand Down
9 changes: 9 additions & 0 deletions eval/public/builtin_func_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1686,6 +1686,15 @@ TEST_F(BuiltinsTest, BytesToString) {
EXPECT_EQ(result_value.StringOrDie().value(), "abcd");
}

TEST_F(BuiltinsTest, BytesToStringInvalid) {
std::string input = "\xFF";
std::vector<CelValue> args = {CelValue::CreateBytes(&input)};
CelValue result_value;
ASSERT_NO_FATAL_FAILURE(
PerformRun(builtin::kString, {}, args, &result_value));
ASSERT_TRUE(result_value.IsError());
}

TEST_F(BuiltinsTest, StringToString) {
std::string input = "abcd";
std::vector<CelValue> args = {CelValue::CreateString(&input)};
Expand Down
3 changes: 3 additions & 0 deletions eval/public/cel_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ class CelValue {

static CelValue CreateString(StringHolder holder) { return CelValue(holder); }

// Returns a string value from a string_view. Warning: the caller is
// responsible for the lifecycle of the backing string. Prefer CreateString
// instead.
static CelValue CreateStringView(absl::string_view value) {
return CelValue(StringHolder(value));
}
Expand Down
3 changes: 3 additions & 0 deletions testutil/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ cc_library(

cc_library(
name = "test_data_io",
testonly = True,
srcs = [
"test_data_io.cc",
],
Expand Down Expand Up @@ -76,6 +77,7 @@ cc_library(
# third_party/cel/spec/testdata/unique_values.textpb
cc_binary(
name = "test_data_gen",
testonly = True,
srcs = [
"test_data_gen.cc",
],
Expand Down Expand Up @@ -110,6 +112,7 @@ cc_test(

cc_library(
name = "util",
testonly = True,
hdrs = [
"util.h",
],
Expand Down

0 comments on commit e53c05c

Please # to comment.