diff --git a/spark/src/test/resources/sql-tests/expressions/string/left.sql b/spark/src/test/resources/sql-tests/expressions/string/left.sql index 4605622e8b..31372f0a42 100644 --- a/spark/src/test/resources/sql-tests/expressions/string/left.sql +++ b/spark/src/test/resources/sql-tests/expressions/string/left.sql @@ -30,6 +30,17 @@ SELECT left(s, n) FROM test_str_left query SELECT left(s, 3) FROM test_str_left +-- column + literal: edge cases +query +SELECT left(s, 0) FROM test_str_left + +query +SELECT left(s, -1) FROM test_str_left + +query +-- n exceeds length of 'hello' (5 chars) +SELECT left(s, 10) FROM test_str_left + -- literal + column query expect_fallback(Substring pos and len must be literals) SELECT left('hello', n) FROM test_str_left @@ -37,3 +48,19 @@ SELECT left('hello', n) FROM test_str_left -- literal + literal query ignore(https://github.com/apache/datafusion-comet/issues/3337) SELECT left('hello', 3), left('hello', 0), left('hello', -1), left('', 3), left(NULL, 3) + +-- unicode +statement +CREATE TABLE test_str_left_unicode(s string) USING parquet + +statement +INSERT INTO test_str_left_unicode VALUES ('café'), ('hello世界'), ('😀emoji'), ('తెలుగు'), (NULL) + +query +SELECT s, left(s, 2) FROM test_str_left_unicode + +query +SELECT s, left(s, 4) FROM test_str_left_unicode + +query +SELECT s, left(s, 0) FROM test_str_left_unicode diff --git a/spark/src/test/resources/sql-tests/expressions/string/right.sql b/spark/src/test/resources/sql-tests/expressions/string/right.sql new file mode 100644 index 0000000000..4fb9763bcc --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/string/right.sql @@ -0,0 +1,96 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- Note: Right is a RuntimeReplaceable expression. Spark replaces it with +-- If(IsNull(str), null, If(len <= 0, "", Substring(str, -len, len))) +-- before Comet sees it. CometRight handles the serde, but the optimizer +-- may replace it first. We use spark_answer_only to verify correctness. + +-- ConfigMatrix: parquet.enable.dictionary=false,true + +statement +CREATE TABLE test_str_right(s string, n int) USING parquet + +statement +INSERT INTO test_str_right VALUES ('hello', 3), ('hello', 0), ('hello', -1), ('hello', 10), ('', 3), (NULL, 3), ('hello', NULL) + +-- both columns: len must be literal, falls back +query spark_answer_only +SELECT right(s, n) FROM test_str_right + +-- column + literal: basic +query spark_answer_only +SELECT right(s, 3) FROM test_str_right + +-- column + literal: edge cases +query spark_answer_only +SELECT right(s, 0) FROM test_str_right + +query spark_answer_only +SELECT right(s, -1) FROM test_str_right + +query spark_answer_only +-- n exceeds length of 'hello' (5 chars) +SELECT right(s, 10) FROM test_str_right + +-- literal + column: falls back +query spark_answer_only +SELECT right('hello', n) FROM test_str_right + +-- literal + literal +query spark_answer_only +SELECT right('hello', 3), right('hello', 0), right('hello', -1), right('', 3), right(NULL, 3) + +-- null propagation with len <= 0 (critical: NULL str with non-positive len must return NULL, not empty string) +query spark_answer_only +SELECT right(CAST(NULL AS STRING), 0), right(CAST(NULL AS STRING), -1), right(CAST(NULL AS STRING), 2) + +-- mixed null and non-null values with len <= 0 +statement +CREATE TABLE test_str_right_nulls(s string) USING parquet + +statement +INSERT INTO test_str_right_nulls VALUES ('hello'), (NULL), (''), ('world') + +query spark_answer_only +SELECT s, right(s, 0) FROM test_str_right_nulls + +query spark_answer_only +SELECT s, right(s, -1) FROM test_str_right_nulls + +query spark_answer_only +SELECT s, right(s, 2) FROM test_str_right_nulls + +-- equivalence with substring +query spark_answer_only +SELECT s, right(s, 3), substring(s, -3, 3) FROM test_str_right_nulls + +-- unicode +statement +CREATE TABLE test_str_right_unicode(s string) USING parquet + +statement +INSERT INTO test_str_right_unicode VALUES ('café'), ('hello世界'), ('😀emoji'), ('తెలుగు'), (NULL) + +query spark_answer_only +SELECT s, right(s, 2) FROM test_str_right_unicode + +query spark_answer_only +SELECT s, right(s, 4) FROM test_str_right_unicode + +query spark_answer_only +SELECT s, right(s, 0) FROM test_str_right_unicode diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 206ac17260..8af177e0bb 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -523,123 +523,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("LEFT function") { - withParquetTable((0 until 10).map(i => (s"test$i", i)), "tbl") { - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 2) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 4) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 0) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, -1) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 100) FROM tbl") - checkSparkAnswerAndOperator("SELECT LEFT(CAST(NULL AS STRING), 2) FROM tbl LIMIT 1") - } - } - - test("LEFT function with unicode") { - val data = Seq("café", "hello世界", "😀emoji", "తెలుగు") - withParquetTable(data.zipWithIndex, "unicode_tbl") { - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 2) FROM unicode_tbl") - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 3) FROM unicode_tbl") - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 0) FROM unicode_tbl") - } - } - - test("LEFT function equivalence with SUBSTRING") { - withParquetTable((0 until 20).map(i => Tuple1(s"test$i")), "equiv_tbl") { - val df = spark.sql(""" - SELECT _1, - LEFT(_1, 3) as left_result, - SUBSTRING(_1, 1, 3) as substring_result - FROM equiv_tbl - """) - checkAnswer( - df.filter( - "left_result != substring_result OR " + - "(left_result IS NULL AND substring_result IS NOT NULL) OR " + - "(left_result IS NOT NULL AND substring_result IS NULL)"), - Seq.empty) - } - } - - test("LEFT function with dictionary") { - val data = (0 until 1000) - .map(_ % 5) - .map(i => s"value$i") - withParquetTable(data.zipWithIndex, "dict_tbl") { - checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 3) FROM dict_tbl") - } - } - - test("RIGHT function") { - withParquetTable((0 until 10).map(i => (s"test$i", i)), "tbl") { - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 2) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 4) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, -1) FROM tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 100) FROM tbl") - checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), 2) FROM tbl LIMIT 1") - } - } - - test("RIGHT function with unicode") { - val data = Seq("café", "hello世界", "😀emoji", "తెలుగు") - withParquetTable(data.zipWithIndex, "unicode_tbl") { - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 2) FROM unicode_tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 3) FROM unicode_tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM unicode_tbl") - } - } - - test("RIGHT function equivalence with SUBSTRING negative pos") { - withParquetTable((0 until 20).map(i => Tuple1(s"test$i")), "equiv_tbl") { - val df = spark.sql(""" - SELECT _1, - RIGHT(_1, 3) as right_result, - SUBSTRING(_1, -3, 3) as substring_result - FROM equiv_tbl - """) - checkAnswer( - df.filter( - "right_result != substring_result OR " + - "(right_result IS NULL AND substring_result IS NOT NULL) OR " + - "(right_result IS NOT NULL AND substring_result IS NULL)"), - Seq.empty) - } - } - - test("RIGHT function with dictionary") { - val data = (0 until 1000) - .map(_ % 5) - .map(i => s"value$i") - withParquetTable(data.zipWithIndex, "dict_tbl") { - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 3) FROM dict_tbl") - } - } - - test("RIGHT function NULL handling") { - // Test NULL propagation with len = 0 (critical edge case) - withParquetTable((0 until 5).map(i => (s"test$i", i)), "null_tbl") { - checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), 0) FROM null_tbl LIMIT 1") - checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), -1) FROM null_tbl LIMIT 1") - checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), -5) FROM null_tbl LIMIT 1") - } - - // Test non-NULL strings with len <= 0 (should return empty string) - withParquetTable((0 until 5).map(i => (s"test$i", i)), "edge_tbl") { - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM edge_tbl") - checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, -1) FROM edge_tbl") - } - - // Test mixed NULL and non-NULL values with a table - val table = "right_null_edge" - withTable(table) { - sql(s"create table $table(str string) using parquet") - sql(s"insert into $table values('hello'), (NULL), (''), ('world')") - checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, 0) FROM $table") - checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, -1) FROM $table") - checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, 2) FROM $table") - } - } - test("hour, minute, second") { Seq(true, false).foreach { dictionaryEnabled => withTempDir { dir =>