From 72dc811d54445ed8c759667fb98568ff39f20d33 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Tue, 27 Jan 2026 09:51:29 -0800 Subject: [PATCH 1/2] GH-49034 [C++][Gandiva] Fix binary_string to not trigger error for null strings. --- cpp/src/gandiva/function_registry_string.cc | 2 +- cpp/src/gandiva/precompiled/string_ops.cc | 10 +++++----- .../gandiva/precompiled/string_ops_test.cc | 19 +++++++++++++++---- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index 2bc6936d77b..d523ab95270 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -432,7 +432,7 @@ std::vector GetStringFunctionRegistry() { NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), NativeFunction("binary_string", {}, DataTypeVector{utf8()}, binary(), - kResultNullIfNull, "binary_string", NativeFunction::kNeedsContext), + kResultNullIfNull, "binary_string", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), NativeFunction("left", {}, DataTypeVector{utf8(), int32()}, utf8(), kResultNullIfNull, "left_utf8_int32", NativeFunction::kNeedsContext), diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc index 0b31c769c99..7450018a556 100644 --- a/cpp/src/gandiva/precompiled/string_ops.cc +++ b/cpp/src/gandiva/precompiled/string_ops.cc @@ -2252,6 +2252,11 @@ const char* right_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text FORCE_INLINE const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_len, gdv_int32* out_len) { + if (text_len == 0) { + *out_len = 0; + return ""; + } + gdv_binary ret = reinterpret_cast(gdv_fn_context_arena_malloc(context, text_len)); @@ -2261,11 +2266,6 @@ const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_le return ""; } - if (text_len == 0) { - *out_len = 0; - return ""; - } - // converting hex encoded string to normal string int j = 0; for (int i = 0; i < text_len; i++, j++) { diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc index 9d0a4d71afe..982cc122cbd 100644 --- a/cpp/src/gandiva/precompiled/string_ops_test.cc +++ b/cpp/src/gandiva/precompiled/string_ops_test.cc @@ -1883,10 +1883,6 @@ TEST(TestStringOps, TestBinaryString) { std::string output = std::string(out_str, out_len); EXPECT_EQ(output, "TestString"); - out_str = binary_string(ctx_ptr, "", 0, &out_len); - output = std::string(out_str, out_len); - EXPECT_EQ(output, ""); - out_str = binary_string(ctx_ptr, "T", 1, &out_len); output = std::string(out_str, out_len); EXPECT_EQ(output, "T"); @@ -1912,6 +1908,21 @@ TEST(TestStringOps, TestBinaryString) { EXPECT_EQ(output, "OM"); } +TEST(TestStringOps, TestBinaryStringNull) { + //This test is only valid if it is the first to trigger a memory allocation in the context. + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast(&ctx); + gdv_int32 out_len = 0; + const char* out_str; + + std::string output; + + out_str = binary_string(ctx_ptr, "", 0, &out_len); + ASSERT_FALSE(ctx.has_error()); + output = std::string(out_str, out_len); + EXPECT_EQ(output, ""); +} + TEST(TestStringOps, TestSplitPart) { gandiva::ExecutionContext ctx; uint64_t ctx_ptr = reinterpret_cast(&ctx); From 6b3bfd857d51d2a642d67142e064d0eeb755ffe1 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Tue, 27 Jan 2026 14:53:50 -0800 Subject: [PATCH 2/2] Lint fixes --- cpp/src/gandiva/function_registry_string.cc | 3 ++- cpp/src/gandiva/precompiled/string_ops_test.cc | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index d523ab95270..be57ce4f476 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -432,7 +432,8 @@ std::vector GetStringFunctionRegistry() { NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), NativeFunction("binary_string", {}, DataTypeVector{utf8()}, binary(), - kResultNullIfNull, "binary_string", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), + kResultNullIfNull, "binary_string", + NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), NativeFunction("left", {}, DataTypeVector{utf8(), int32()}, utf8(), kResultNullIfNull, "left_utf8_int32", NativeFunction::kNeedsContext), diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc index 982cc122cbd..ca2b2b57856 100644 --- a/cpp/src/gandiva/precompiled/string_ops_test.cc +++ b/cpp/src/gandiva/precompiled/string_ops_test.cc @@ -1909,7 +1909,8 @@ TEST(TestStringOps, TestBinaryString) { } TEST(TestStringOps, TestBinaryStringNull) { - //This test is only valid if it is the first to trigger a memory allocation in the context. + // This test is only valid if it is the first to trigger a memory allocation in the + // context. gandiva::ExecutionContext ctx; uint64_t ctx_ptr = reinterpret_cast(&ctx); gdv_int32 out_len = 0;