From 49945c2c91dc219fdf45290ffe227bb0a2e97c0a Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 9 Feb 2026 15:41:02 -0700 Subject: [PATCH 1/3] test: Add SQL file tests for left and right expressions Enhance left.sql with additional edge case coverage (column + literal for len=0, len=-1, len>length) and unicode tests. Add right.sql with comprehensive coverage including null propagation with non-positive length, substring equivalence, unicode, and mixed null/non-null values. Co-Authored-By: Claude Opus 4.6 --- .../sql-tests/expressions/string/left.sql | 26 +++++ .../sql-tests/expressions/string/right.sql | 95 +++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 spark/src/test/resources/sql-tests/expressions/string/right.sql diff --git a/spark/src/test/resources/sql-tests/expressions/string/left.sql b/spark/src/test/resources/sql-tests/expressions/string/left.sql index 4605622e8b..256e952f12 100644 --- a/spark/src/test/resources/sql-tests/expressions/string/left.sql +++ b/spark/src/test/resources/sql-tests/expressions/string/left.sql @@ -30,6 +30,16 @@ SELECT left(s, n) FROM test_str_left query SELECT left(s, 3) FROM test_str_left +-- column + literal: edge cases +query +SELECT left(s, 0) FROM test_str_left + +query +SELECT left(s, -1) FROM test_str_left + +query +SELECT left(s, 10) FROM test_str_left + -- literal + column query expect_fallback(Substring pos and len must be literals) SELECT left('hello', n) FROM test_str_left @@ -37,3 +47,19 @@ SELECT left('hello', n) FROM test_str_left -- literal + literal query ignore(https://github.com/apache/datafusion-comet/issues/3337) SELECT left('hello', 3), left('hello', 0), left('hello', -1), left('', 3), left(NULL, 3) + +-- unicode +statement +CREATE TABLE test_str_left_unicode(s string) USING parquet + +statement +INSERT INTO test_str_left_unicode VALUES ('café'), ('hello世界'), ('😀emoji'), ('తెలుగు'), (NULL) + +query +SELECT s, left(s, 2) FROM test_str_left_unicode + +query +SELECT s, left(s, 4) FROM test_str_left_unicode + +query +SELECT s, left(s, 0) FROM test_str_left_unicode diff --git a/spark/src/test/resources/sql-tests/expressions/string/right.sql b/spark/src/test/resources/sql-tests/expressions/string/right.sql new file mode 100644 index 0000000000..6157c9c160 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/string/right.sql @@ -0,0 +1,95 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- Note: Right is a RuntimeReplaceable expression. Spark replaces it with +-- If(IsNull(str), null, If(len <= 0, "", Substring(str, -len, len))) +-- before Comet sees it. CometRight handles the serde, but the optimizer +-- may replace it first. We use spark_answer_only to verify correctness. + +-- ConfigMatrix: parquet.enable.dictionary=false,true + +statement +CREATE TABLE test_str_right(s string, n int) USING parquet + +statement +INSERT INTO test_str_right VALUES ('hello', 3), ('hello', 0), ('hello', -1), ('hello', 10), ('', 3), (NULL, 3), ('hello', NULL) + +-- both columns: len must be literal, falls back +query spark_answer_only +SELECT right(s, n) FROM test_str_right + +-- column + literal: basic +query spark_answer_only +SELECT right(s, 3) FROM test_str_right + +-- column + literal: edge cases +query spark_answer_only +SELECT right(s, 0) FROM test_str_right + +query spark_answer_only +SELECT right(s, -1) FROM test_str_right + +query spark_answer_only +SELECT right(s, 10) FROM test_str_right + +-- literal + column: falls back +query spark_answer_only +SELECT right('hello', n) FROM test_str_right + +-- literal + literal +query spark_answer_only +SELECT right('hello', 3), right('hello', 0), right('hello', -1), right('', 3), right(NULL, 3) + +-- null propagation with len <= 0 (critical: NULL str with non-positive len must return NULL, not empty string) +query spark_answer_only +SELECT right(CAST(NULL AS STRING), 0), right(CAST(NULL AS STRING), -1), right(CAST(NULL AS STRING), 2) + +-- mixed null and non-null values with len <= 0 +statement +CREATE TABLE test_str_right_nulls(s string) USING parquet + +statement +INSERT INTO test_str_right_nulls VALUES ('hello'), (NULL), (''), ('world') + +query spark_answer_only +SELECT s, right(s, 0) FROM test_str_right_nulls + +query spark_answer_only +SELECT s, right(s, -1) FROM test_str_right_nulls + +query spark_answer_only +SELECT s, right(s, 2) FROM test_str_right_nulls + +-- equivalence with substring +query spark_answer_only +SELECT s, right(s, 3), substring(s, -3, 3) FROM test_str_right_nulls + +-- unicode +statement +CREATE TABLE test_str_right_unicode(s string) USING parquet + +statement +INSERT INTO test_str_right_unicode VALUES ('café'), ('hello世界'), ('😀emoji'), ('తెలుగు'), (NULL) + +query spark_answer_only +SELECT s, right(s, 2) FROM test_str_right_unicode + +query spark_answer_only +SELECT s, right(s, 4) FROM test_str_right_unicode + +query spark_answer_only +SELECT s, right(s, 0) FROM test_str_right_unicode From b9fb95cc5eb2a411fdec32ee59e02d2cc5d629d6 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 9 Feb 2026 15:45:23 -0700 Subject: [PATCH 2/3] Remove LEFT/RIGHT Scala tests now covered by SQL file tests Co-Authored-By: Claude Opus 4.6 --- .../apache/comet/CometExpressionSuite.scala | 117 ------------------ 1 file changed, 117 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 206ac17260..8af177e0bb 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -523,123 +523,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("LEFT function") { - withParquetTable((0 until 10).map(i => (s"test$i", i)), "tbl") { - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 2) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 4) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 0) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, -1) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 100) FROM tbl") - checkSparkAnswerAndOperator("SELECT LEFT(CAST(NULL AS STRING), 2) FROM tbl LIMIT 1") - } - } - - test("LEFT function with unicode") { - val data = Seq("café", "hello世界", "😀emoji", "తెలుగు") - withParquetTable(data.zipWithIndex, "unicode_tbl") { - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 2) FROM unicode_tbl") - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 3) FROM unicode_tbl") - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 0) FROM unicode_tbl") - } - } - - test("LEFT function equivalence with SUBSTRING") { - withParquetTable((0 until 20).map(i => Tuple1(s"test$i")), "equiv_tbl") { - val df = spark.sql(""" - SELECT _1, - LEFT(_1, 3) as left_result, - SUBSTRING(_1, 1, 3) as substring_result - FROM equiv_tbl - """) - checkAnswer( - df.filter( - "left_result != substring_result OR " + - "(left_result IS NULL AND substring_result IS NOT NULL) OR " + - "(left_result IS NOT NULL AND substring_result IS NULL)"), - Seq.empty) - } - } - - test("LEFT function with dictionary") { - val data = (0 until 1000) - .map(_ % 5) - .map(i => s"value$i") - withParquetTable(data.zipWithIndex, "dict_tbl") { - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 3) FROM dict_tbl") - } - } - - test("RIGHT function") { - withParquetTable((0 until 10).map(i => (s"test$i", i)), "tbl") { - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 2) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 4) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, -1) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 100) FROM tbl") - checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), 2) FROM tbl LIMIT 1") - } - } - - test("RIGHT function with unicode") { - val data = Seq("café", "hello世界", "😀emoji", "తెలుగు") - withParquetTable(data.zipWithIndex, "unicode_tbl") { - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 2) FROM unicode_tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 3) FROM unicode_tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM unicode_tbl") - } - } - - test("RIGHT function equivalence with SUBSTRING negative pos") { - withParquetTable((0 until 20).map(i => Tuple1(s"test$i")), "equiv_tbl") { - val df = spark.sql(""" - SELECT _1, - RIGHT(_1, 3) as right_result, - SUBSTRING(_1, -3, 3) as substring_result - FROM equiv_tbl - """) - checkAnswer( - df.filter( - "right_result != substring_result OR " + - "(right_result IS NULL AND substring_result IS NOT NULL) OR " + - "(right_result IS NOT NULL AND substring_result IS NULL)"), - Seq.empty) - } - } - - test("RIGHT function with dictionary") { - val data = (0 until 1000) - .map(_ % 5) - .map(i => s"value$i") - withParquetTable(data.zipWithIndex, "dict_tbl") { - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 3) FROM dict_tbl") - } - } - - test("RIGHT function NULL handling") { - // Test NULL propagation with len = 0 (critical edge case) - withParquetTable((0 until 5).map(i => (s"test$i", i)), "null_tbl") { - checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), 0) FROM null_tbl LIMIT 1") - checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), -1) FROM null_tbl LIMIT 1") - checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), -5) FROM null_tbl LIMIT 1") - } - - // Test non-NULL strings with len <= 0 (should return empty string) - withParquetTable((0 until 5).map(i => (s"test$i", i)), "edge_tbl") { - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM edge_tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, -1) FROM edge_tbl") - } - - // Test mixed NULL and non-NULL values with a table - val table = "right_null_edge" - withTable(table) { - sql(s"create table $table(str string) using parquet") - sql(s"insert into $table values('hello'), (NULL), (''), ('world')") - checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, 0) FROM $table") - checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, -1) FROM $table") - checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, 2) FROM $table") - } - } - test("hour, minute, second") { Seq(true, false).foreach { dictionaryEnabled => withTempDir { dir => From 43c65a0b871271659a21dfd8f95464d27400aa4f Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 11 Feb 2026 08:14:46 -0700 Subject: [PATCH 3/3] address feedback --- spark/src/test/resources/sql-tests/expressions/string/left.sql | 1 + spark/src/test/resources/sql-tests/expressions/string/right.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/spark/src/test/resources/sql-tests/expressions/string/left.sql b/spark/src/test/resources/sql-tests/expressions/string/left.sql index 256e952f12..31372f0a42 100644 --- a/spark/src/test/resources/sql-tests/expressions/string/left.sql +++ b/spark/src/test/resources/sql-tests/expressions/string/left.sql @@ -38,6 +38,7 @@ query SELECT left(s, -1) FROM test_str_left query +-- n exceeds length of 'hello' (5 chars) SELECT left(s, 10) FROM test_str_left -- literal + column diff --git a/spark/src/test/resources/sql-tests/expressions/string/right.sql b/spark/src/test/resources/sql-tests/expressions/string/right.sql index 6157c9c160..4fb9763bcc 100644 --- a/spark/src/test/resources/sql-tests/expressions/string/right.sql +++ b/spark/src/test/resources/sql-tests/expressions/string/right.sql @@ -44,6 +44,7 @@ query spark_answer_only SELECT right(s, -1) FROM test_str_right query spark_answer_only +-- n exceeds length of 'hello' (5 chars) SELECT right(s, 10) FROM test_str_right -- literal + column: falls back