Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions spark/src/test/resources/sql-tests/expressions/string/left.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,36 @@ SELECT left(s, n) FROM test_str_left
query
SELECT left(s, 3) FROM test_str_left

-- column + literal: edge cases
query
SELECT left(s, 0) FROM test_str_left

query
SELECT left(s, -1) FROM test_str_left

query
SELECT left(s, 10) FROM test_str_left
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would prob nice to mention why 10 is the edge case? I assume it is related to s length?


-- literal + column
query expect_fallback(Substring pos and len must be literals)
SELECT left('hello', n) FROM test_str_left

-- literal + literal
query ignore(https://github.com/apache/datafusion-comet/issues/3337)
SELECT left('hello', 3), left('hello', 0), left('hello', -1), left('', 3), left(NULL, 3)

-- unicode
statement
CREATE TABLE test_str_left_unicode(s string) USING parquet

statement
INSERT INTO test_str_left_unicode VALUES ('café'), ('hello世界'), ('😀emoji'), ('తెలుగు'), (NULL)

query
SELECT s, left(s, 2) FROM test_str_left_unicode

query
SELECT s, left(s, 4) FROM test_str_left_unicode

query
SELECT s, left(s, 0) FROM test_str_left_unicode
95 changes: 95 additions & 0 deletions spark/src/test/resources/sql-tests/expressions/string/right.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- Note: Right is a RuntimeReplaceable expression. Spark replaces it with
-- If(IsNull(str), null, If(len <= 0, "", Substring(str, -len, len)))
-- before Comet sees it. CometRight handles the serde, but the optimizer
-- may replace it first. We use spark_answer_only to verify correctness.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_str_right(s string, n int) USING parquet

statement
INSERT INTO test_str_right VALUES ('hello', 3), ('hello', 0), ('hello', -1), ('hello', 10), ('', 3), (NULL, 3), ('hello', NULL)

-- both columns: len must be literal, falls back
query spark_answer_only
SELECT right(s, n) FROM test_str_right

-- column + literal: basic
query spark_answer_only
SELECT right(s, 3) FROM test_str_right

-- column + literal: edge cases
query spark_answer_only
SELECT right(s, 0) FROM test_str_right

query spark_answer_only
SELECT right(s, -1) FROM test_str_right

query spark_answer_only
SELECT right(s, 10) FROM test_str_right

-- literal + column: falls back
query spark_answer_only
SELECT right('hello', n) FROM test_str_right

-- literal + literal
query spark_answer_only
SELECT right('hello', 3), right('hello', 0), right('hello', -1), right('', 3), right(NULL, 3)

-- null propagation with len <= 0 (critical: NULL str with non-positive len must return NULL, not empty string)
query spark_answer_only
SELECT right(CAST(NULL AS STRING), 0), right(CAST(NULL AS STRING), -1), right(CAST(NULL AS STRING), 2)

-- mixed null and non-null values with len <= 0
statement
CREATE TABLE test_str_right_nulls(s string) USING parquet

statement
INSERT INTO test_str_right_nulls VALUES ('hello'), (NULL), (''), ('world')

query spark_answer_only
SELECT s, right(s, 0) FROM test_str_right_nulls

query spark_answer_only
SELECT s, right(s, -1) FROM test_str_right_nulls

query spark_answer_only
SELECT s, right(s, 2) FROM test_str_right_nulls

-- equivalence with substring
query spark_answer_only
SELECT s, right(s, 3), substring(s, -3, 3) FROM test_str_right_nulls

-- unicode
statement
CREATE TABLE test_str_right_unicode(s string) USING parquet

statement
INSERT INTO test_str_right_unicode VALUES ('café'), ('hello世界'), ('😀emoji'), ('తెలుగు'), (NULL)

query spark_answer_only
SELECT s, right(s, 2) FROM test_str_right_unicode

query spark_answer_only
SELECT s, right(s, 4) FROM test_str_right_unicode

query spark_answer_only
SELECT s, right(s, 0) FROM test_str_right_unicode
117 changes: 0 additions & 117 deletions spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -523,123 +523,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
}
}

test("LEFT function") {
withParquetTable((0 until 10).map(i => (s"test$i", i)), "tbl") {
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 2) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 4) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 0) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, -1) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 100) FROM tbl")
checkSparkAnswerAndOperator("SELECT LEFT(CAST(NULL AS STRING), 2) FROM tbl LIMIT 1")
}
}

test("LEFT function with unicode") {
val data = Seq("café", "hello世界", "😀emoji", "తెలుగు")
withParquetTable(data.zipWithIndex, "unicode_tbl") {
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 2) FROM unicode_tbl")
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 3) FROM unicode_tbl")
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 0) FROM unicode_tbl")
}
}

test("LEFT function equivalence with SUBSTRING") {
withParquetTable((0 until 20).map(i => Tuple1(s"test$i")), "equiv_tbl") {
val df = spark.sql("""
SELECT _1,
LEFT(_1, 3) as left_result,
SUBSTRING(_1, 1, 3) as substring_result
FROM equiv_tbl
""")
checkAnswer(
df.filter(
"left_result != substring_result OR " +
"(left_result IS NULL AND substring_result IS NOT NULL) OR " +
"(left_result IS NOT NULL AND substring_result IS NULL)"),
Seq.empty)
}
}

test("LEFT function with dictionary") {
val data = (0 until 1000)
.map(_ % 5)
.map(i => s"value$i")
withParquetTable(data.zipWithIndex, "dict_tbl") {
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 3) FROM dict_tbl")
}
}

test("RIGHT function") {
withParquetTable((0 until 10).map(i => (s"test$i", i)), "tbl") {
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 2) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 4) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, -1) FROM tbl")
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 100) FROM tbl")
checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), 2) FROM tbl LIMIT 1")
}
}

test("RIGHT function with unicode") {
val data = Seq("café", "hello世界", "😀emoji", "తెలుగు")
withParquetTable(data.zipWithIndex, "unicode_tbl") {
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 2) FROM unicode_tbl")
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 3) FROM unicode_tbl")
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM unicode_tbl")
}
}

test("RIGHT function equivalence with SUBSTRING negative pos") {
withParquetTable((0 until 20).map(i => Tuple1(s"test$i")), "equiv_tbl") {
val df = spark.sql("""
SELECT _1,
RIGHT(_1, 3) as right_result,
SUBSTRING(_1, -3, 3) as substring_result
FROM equiv_tbl
""")
checkAnswer(
df.filter(
"right_result != substring_result OR " +
"(right_result IS NULL AND substring_result IS NOT NULL) OR " +
"(right_result IS NOT NULL AND substring_result IS NULL)"),
Seq.empty)
}
}

test("RIGHT function with dictionary") {
val data = (0 until 1000)
.map(_ % 5)
.map(i => s"value$i")
withParquetTable(data.zipWithIndex, "dict_tbl") {
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 3) FROM dict_tbl")
}
}

test("RIGHT function NULL handling") {
// Test NULL propagation with len = 0 (critical edge case)
withParquetTable((0 until 5).map(i => (s"test$i", i)), "null_tbl") {
checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), 0) FROM null_tbl LIMIT 1")
checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), -1) FROM null_tbl LIMIT 1")
checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), -5) FROM null_tbl LIMIT 1")
}

// Test non-NULL strings with len <= 0 (should return empty string)
withParquetTable((0 until 5).map(i => (s"test$i", i)), "edge_tbl") {
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM edge_tbl")
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, -1) FROM edge_tbl")
}

// Test mixed NULL and non-NULL values with a table
val table = "right_null_edge"
withTable(table) {
sql(s"create table $table(str string) using parquet")
sql(s"insert into $table values('hello'), (NULL), (''), ('world')")
checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, 0) FROM $table")
checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, -1) FROM $table")
checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, 2) FROM $table")
}
}

test("hour, minute, second") {
Seq(true, false).foreach { dictionaryEnabled =>
withTempDir { dir =>
Expand Down
Loading