From 17aa2f66c66358fbb49a98a72cf61bb77f01b0a6 Mon Sep 17 00:00:00 2001 From: StandingMan Date: Wed, 24 Dec 2025 13:18:32 +0800 Subject: [PATCH] fix: follow IEEE 754 totalOrder for float and double Signed-off-by: StandingMan --- crates/iceberg/src/spec/values/datum.rs | 36 ++++++------------------- crates/iceberg/src/spec/values/tests.rs | 25 +++++++++++++++++ 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/crates/iceberg/src/spec/values/datum.rs b/crates/iceberg/src/spec/values/datum.rs index cb60fb94e9..88209ae95c 100644 --- a/crates/iceberg/src/spec/values/datum.rs +++ b/crates/iceberg/src/spec/values/datum.rs @@ -166,36 +166,16 @@ impl<'de> Deserialize<'de> for Datum { // Compare following iceberg float ordering rules: // -NaN < -Infinity < -value < -0 < 0 < value < Infinity < NaN -fn iceberg_float_cmp(a: T, b: T) -> Option { - if a.is_nan() && b.is_nan() { - return match (a.is_sign_negative(), b.is_sign_negative()) { - (true, false) => Some(Ordering::Less), - (false, true) => Some(Ordering::Greater), - _ => Some(Ordering::Equal), - }; - } - - if a.is_nan() { - return Some(if a.is_sign_negative() { - Ordering::Less - } else { - Ordering::Greater - }); - } - - if b.is_nan() { - return Some(if b.is_sign_negative() { - Ordering::Greater - } else { - Ordering::Less - }); - } +fn iceberg_float_cmp_f32(a: OrderedFloat, b: OrderedFloat) -> Option { + Some(a.total_cmp(&b)) +} - a.partial_cmp(&b) +fn iceberg_float_cmp_f64(a: OrderedFloat, b: OrderedFloat) -> Option { + Some(a.total_cmp(&b)) } impl PartialOrd for Datum { - fn partial_cmp(&self, other: &Self) -> Option { + fn partial_cmp(&self, other: &Self) -> Option { match (&self.literal, &other.literal, &self.r#type, &other.r#type) { // generate the arm with same type and same literal ( @@ -221,13 +201,13 @@ impl PartialOrd for Datum { PrimitiveLiteral::Float(other_val), PrimitiveType::Float, PrimitiveType::Float, - ) => iceberg_float_cmp(*val, *other_val), + ) => iceberg_float_cmp_f32(*val, *other_val), ( PrimitiveLiteral::Double(val), PrimitiveLiteral::Double(other_val), PrimitiveType::Double, PrimitiveType::Double, - ) => iceberg_float_cmp(*val, *other_val), + ) => iceberg_float_cmp_f64(*val, *other_val), ( PrimitiveLiteral::Int(val), PrimitiveLiteral::Int(other_val), diff --git a/crates/iceberg/src/spec/values/tests.rs b/crates/iceberg/src/spec/values/tests.rs index 73343a9a1a..bb10701d87 100644 --- a/crates/iceberg/src/spec/values/tests.rs +++ b/crates/iceberg/src/spec/values/tests.rs @@ -1293,6 +1293,31 @@ fn test_iceberg_float_order() { assert_eq!(double_sorted, double_expected); } +#[test] +fn test_negative_zero_less_than_positive_zero() { + { + let neg_zero = Datum::float(-0.0); + let pos_zero = Datum::float(0.0); + + assert_eq!( + neg_zero.partial_cmp(&pos_zero), + Some(std::cmp::Ordering::Less), + "IEEE 754 totalOrder requires -0.0 < +0.0 on F32" + ); + } + + { + let neg_zero = Datum::double(-0.0); + let pos_zero = Datum::double(0.0); + + assert_eq!( + neg_zero.partial_cmp(&pos_zero), + Some(std::cmp::Ordering::Less), + "IEEE 754 totalOrder requires -0.0 < +0.0 on F64" + ); + } +} + /// Test Date deserialization from JSON as number (days since epoch). /// /// This reproduces the scenario from Iceberg Java's TestAddFilesProcedure where: