Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions native/spark-expr/src/conversion_funcs/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,16 @@ pub(crate) fn is_df_cast_from_decimal_spark_compatible(to_type: &DataType) -> bo
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::Float32
| DataType::Float64
| DataType::Float32 // DataFusion divides i128 by 10^scale in f64, then narrows to
| DataType::Float64 // f32 if needed; empirically matches Spark's BigDecimal.doubleValue
// / floatValue for all tested values
| DataType::Decimal128(_, _)
| DataType::Decimal256(_, _)
| DataType::Utf8 // note that there can be formatting differences
)
// Note: Boolean is intentionally absent. Decimal-to-boolean uses a dedicated
// spark_cast_decimal_to_boolean function (in cast.rs) that checks the raw i128
// value, bypassing the DataFusion cast kernel entirely.
}

macro_rules! cast_float_to_timestamp_impl {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_cast_decimal(d10 decimal(10,2), d5 decimal(5,0)) USING parquet

statement
INSERT INTO test_cast_decimal VALUES
(123.45, 123),
(-67.89, -67),
(0.00, 0),
(0.01, 1),
(-0.01, -1),
(99999999.99, 99999),
(-99999999.99, -99999),
(NULL, NULL)

-- decimal(10,2) column to FLOAT
query
SELECT cast(d10 as float) FROM test_cast_decimal
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we also cast to int/longs?


-- decimal(10,2) column to DOUBLE
query
SELECT cast(d10 as double) FROM test_cast_decimal

-- decimal(10,2) column to INT
query
SELECT cast(d10 as int) FROM test_cast_decimal

-- decimal(10,2) column to LONG
query
SELECT cast(d10 as long) FROM test_cast_decimal

-- decimal(10,2) column to BOOLEAN
query
SELECT cast(d10 as boolean) FROM test_cast_decimal

-- decimal(5,0) column to FLOAT
query
SELECT cast(d5 as float) FROM test_cast_decimal

-- decimal(5,0) column to DOUBLE
query
SELECT cast(d5 as double) FROM test_cast_decimal

-- decimal(5,0) column to INT
query
SELECT cast(d5 as int) FROM test_cast_decimal

-- decimal(5,0) column to LONG
query
SELECT cast(d5 as long) FROM test_cast_decimal

-- decimal(5,0) column to BOOLEAN
query
SELECT cast(d5 as boolean) FROM test_cast_decimal

-- decimal(38,18) table: covers boundary values that exercise the i128 code path
statement
CREATE TABLE test_cast_decimal_high_precision(d38 decimal(38,18)) USING parquet

statement
INSERT INTO test_cast_decimal_high_precision VALUES
(CAST('99999999999999999999.999999999999999999' AS decimal(38,18))),
(CAST('-99999999999999999999.999999999999999999' AS decimal(38,18))),
(CAST('9223372036854775807.000000000000000000' AS decimal(38,18))),
(CAST('-9223372036854775808.000000000000000000' AS decimal(38,18))),
(CAST('1.000000000000000000' AS decimal(38,18))),
(CAST('-1.000000000000000000' AS decimal(38,18))),
(CAST('0.000000000000000000' AS decimal(38,18))),
(NULL)

-- decimal(38,18) column to FLOAT
query
SELECT cast(d38 as float) FROM test_cast_decimal_high_precision

-- decimal(38,18) column to DOUBLE
query
SELECT cast(d38 as double) FROM test_cast_decimal_high_precision

-- decimal(38,18) column to INT
query
SELECT cast(d38 as int) FROM test_cast_decimal_high_precision

-- decimal(38,18) column to LONG
query
SELECT cast(d38 as long) FROM test_cast_decimal_high_precision

-- decimal(38,18) column to BOOLEAN
query
SELECT cast(d38 as boolean) FROM test_cast_decimal_high_precision

-- additional precision/scale combinations: decimal(15,5) has fractional part with int overflow
-- possible; decimal(20,0) has no fractional part with long overflow possible
statement
CREATE TABLE test_cast_decimal_extra(
d15_5 decimal(15,5),
d20_0 decimal(20,0)
) USING parquet

statement
INSERT INTO test_cast_decimal_extra VALUES
(2147483648.12345, 9223372036854775808), -- d15_5 overflows INT; d20_0 overflows LONG
(-2147483649.12345, -9223372036854775809),
(123.45678, 2147483648), -- fractional truncation; d20_0 overflows INT only
(0.00001, 1),
(-0.00001, -1),
(0.00000, 0),
(NULL, NULL)

-- decimal(15,5) to INT (exercises fractional truncation and int overflow)
query
SELECT cast(d15_5 as int) FROM test_cast_decimal_extra

-- decimal(15,5) to LONG
query
SELECT cast(d15_5 as long) FROM test_cast_decimal_extra

-- decimal(15,5) to BOOLEAN
query
SELECT cast(d15_5 as boolean) FROM test_cast_decimal_extra

-- decimal(20,0) to INT
query
SELECT cast(d20_0 as int) FROM test_cast_decimal_extra

-- decimal(20,0) to LONG (exercises long overflow)
query
SELECT cast(d20_0 as long) FROM test_cast_decimal_extra

-- decimal(20,0) to BOOLEAN
query
SELECT cast(d20_0 as boolean) FROM test_cast_decimal_extra

-- literal casts: decimal(10,2) to float
query
SELECT cast(cast(1.50 as decimal(10,2)) as float),
cast(cast(0.00 as decimal(10,2)) as float),
cast(cast(-1.50 as decimal(10,2)) as float),
cast(cast(NULL as decimal(10,2)) as float)

-- literal casts: decimal(5,0) to float
query
SELECT cast(cast(123 as decimal(5,0)) as float),
cast(cast(0 as decimal(5,0)) as float),
cast(cast(-123 as decimal(5,0)) as float),
cast(cast(NULL as decimal(5,0)) as float)

-- literal casts: decimal(10,2) to boolean
query
SELECT cast(cast(1.50 as decimal(10,2)) as boolean),
cast(cast(0.00 as decimal(10,2)) as boolean),
cast(cast(NULL as decimal(10,2)) as boolean)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we could improve variability by adding more scale / precision options


-- literal casts: decimal(5,0) to boolean
query
SELECT cast(cast(1 as decimal(5,0)) as boolean),
cast(cast(0 as decimal(5,0)) as boolean),
cast(cast(NULL as decimal(5,0)) as boolean)
100 changes: 99 additions & 1 deletion spark/src/test/scala/org/apache/comet/CometCastSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,34 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
castTest(generateDecimalsPrecision10Scale2(), DataTypes.DoubleType)
}

// CAST from DecimalType(15,5): fractional truncation for int/long; int overflow possible

test("cast DecimalType(15,5) to IntegerType") {
castTest(generateDecimalsPrecision15Scale5(), DataTypes.IntegerType)
}

test("cast DecimalType(15,5) to LongType") {
castTest(generateDecimalsPrecision15Scale5(), DataTypes.LongType)
}

test("cast DecimalType(15,5) to BooleanType") {
castTest(generateDecimalsPrecision15Scale5(), DataTypes.BooleanType)
}

// CAST from DecimalType(20,0): large integers with no fractional part; long overflow possible

test("cast DecimalType(20,0) to IntegerType") {
castTest(generateDecimalsPrecision20Scale0(), DataTypes.IntegerType)
}

test("cast DecimalType(20,0) to LongType") {
castTest(generateDecimalsPrecision20Scale0(), DataTypes.LongType)
}

test("cast DecimalType(20,0) to BooleanType") {
castTest(generateDecimalsPrecision20Scale0(), DataTypes.BooleanType)
}

test("cast DecimalType(38,18) to ByteType") {
castTest(generateDecimalsPrecision38Scale18(), DataTypes.ByteType)
}
Expand All @@ -637,6 +665,41 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
DataTypes.LongType)
}

test("cast DecimalType(38,18) to FloatType") {
castTest(generateDecimalsPrecision38Scale18(), DataTypes.FloatType)
// small fractions exercise the i128 / 10^scale precision path
castTest(
generateDecimalsPrecision38Scale18(
Seq(
BigDecimal("0.000000000000000001"),
BigDecimal("-0.000000000000000001"),
BigDecimal("1.500000000000000000"),
BigDecimal("123456789.123456789"))),
DataTypes.FloatType)
}

test("cast DecimalType(38,18) to DoubleType") {
castTest(generateDecimalsPrecision38Scale18(), DataTypes.DoubleType)
// small fractions exercise the i128 / 10^scale precision path
castTest(
generateDecimalsPrecision38Scale18(
Seq(
BigDecimal("0.000000000000000001"),
BigDecimal("-0.000000000000000001"),
BigDecimal("1.500000000000000000"),
BigDecimal("123456789.123456789"))),
DataTypes.DoubleType)
}

test("cast DecimalType(38,18) to BooleanType") {
castTest(generateDecimalsPrecision38Scale18(), DataTypes.BooleanType)
// tiny non-zero values must be true; only exact zero is false
castTest(
generateDecimalsPrecision38Scale18(
Seq(BigDecimal("0.000000000000000001"), BigDecimal("-0.000000000000000001"))),
DataTypes.BooleanType)
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might also want to add some edge cases here to gain more confidence


test("cast DecimalType(10,2) to StringType") {
castTest(generateDecimalsPrecision10Scale2(), DataTypes.StringType)
}
Expand Down Expand Up @@ -1395,7 +1458,13 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
FloatType,
DoubleType,
DecimalType(10, 2),
DecimalType(38, 18),
// DecimalType(38, 18) is excluded here: random data exposes a ~1 ULP difference between
// DataFusion's (i128 as f64) / 10^scale path and Spark's BigDecimal.doubleValue() for
// float/double casts; and extreme boundary values that would avoid the ULP issue overflow
// byte/short/int in ANSI mode, causing non-deterministic exception-message differences
// between Spark's row-at-a-time and Comet's vectorized execution. The individual scalar
// tests (cast DecimalType(38,18) to FloatType / DoubleType / BooleanType / etc.) already
// cover this type fully.
DateType,
TimestampType,
BinaryType)
Expand Down Expand Up @@ -1591,6 +1660,35 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
withNulls(values).toDF("b").withColumn("a", col("b").cast(DecimalType(10, 2))).drop("b")
}

private def generateDecimalsPrecision15Scale5(): DataFrame = {
val values = Seq(
// just above Int.MAX_VALUE (2147483647) — overflows IntegerType
BigDecimal("2147483648.12345"),
BigDecimal("-2147483649.12345"),
// fits in both int and long; exercises fractional truncation
BigDecimal("123.45678"),
BigDecimal("-123.45678"),
// tiny non-zero — boolean must be true
BigDecimal("0.00001"),
BigDecimal("-0.00001"),
BigDecimal("0.00000"))
withNulls(values).toDF("b").withColumn("a", col("b").cast(DecimalType(15, 5))).drop("b")
}

private def generateDecimalsPrecision20Scale0(): DataFrame = {
val values = Seq(
// just above Long.MAX_VALUE (9223372036854775807) — overflows LongType
BigDecimal("9223372036854775808"),
BigDecimal("-9223372036854775809"),
// overflows IntegerType, fits in LongType
BigDecimal("2147483648"),
BigDecimal("-2147483649"),
BigDecimal("1"),
BigDecimal("-1"),
BigDecimal("0"))
withNulls(values).toDF("b").withColumn("a", col("b").cast(DecimalType(20, 0))).drop("b")
}

private def generateDecimalsPrecision38Scale18(): DataFrame = {
val values = Seq(
BigDecimal("-99999999999999999999.999999999999"),
Expand Down
Loading