From 8ac5ab2d02aa27e4a79f45e019eff9d3419135e0 Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Sun, 2 Nov 2025 00:03:45 +0800 Subject: [PATCH 1/5] feat: add SortOrder::Make and SortOrder::IsBoundToSchema Signed-off-by: Junwang Zhao --- src/iceberg/sort_order.cc | 70 ++++++++++++++++- src/iceberg/sort_order.h | 25 ++++++ src/iceberg/test/schema_field_test.cc | 2 +- src/iceberg/test/schema_test.cc | 1 - src/iceberg/test/sort_order_test.cc | 106 ++++++++++++++++++++++++++ src/iceberg/transform.cc | 79 +++++++++++++++++++ src/iceberg/transform.h | 4 + 7 files changed, 284 insertions(+), 3 deletions(-) diff --git a/src/iceberg/sort_order.cc b/src/iceberg/sort_order.cc index 48a306611..2c1354ae3 100644 --- a/src/iceberg/sort_order.cc +++ b/src/iceberg/sort_order.cc @@ -20,9 +20,15 @@ #include "iceberg/sort_order.h" #include +#include #include +#include "iceberg/result.h" +#include "iceberg/schema.h" +#include "iceberg/sort_field.h" +#include "iceberg/transform.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep +#include "iceberg/util/macros.h" namespace iceberg { @@ -31,7 +37,7 @@ SortOrder::SortOrder(int32_t order_id, std::vector fields) const std::shared_ptr& SortOrder::Unsorted() { static const std::shared_ptr unsorted = - std::make_shared(/*order_id=*/0, std::vector{}); + std::make_shared(kUnsortedOrderId, std::vector{}); return unsorted; } @@ -80,4 +86,66 @@ bool SortOrder::Equals(const SortOrder& other) const { return order_id_ == other.order_id_ && fields_ == other.fields_; } +bool SortOrder::IsBoundToSchema(const Schema& schema) const { + for (const auto& field : fields_) { + auto schema_field = schema.FindFieldById(field.source_id()); + if (!schema_field.has_value() || schema_field.value() == std::nullopt) { + return false; + } + + const auto& source_type = schema_field.value().value().get().type(); + if (!source_type->is_primitive()) { + return false; + } + + auto result = field.transform()->ResultType(source_type); + if (!result) { + return false; + } + } + return true; +} + +Result> SortOrder::Make(const Schema& schema, int32_t sort_id, + std::vector fields) { + if (!fields.empty() && sort_id == kUnsortedOrderId) [[unlikely]] { + return InvalidArgument("{} is reserved for unsorted sort order", kUnsortedOrderId); + } + + if (fields.empty() && sort_id != kUnsortedOrderId) [[unlikely]] { + return InvalidArgument("Sort order must have at least one sort field"); + } + + for (const auto& field : fields) { + ICEBERG_ASSIGN_OR_RAISE(auto schema_field, schema.FindFieldById(field.source_id())); + if (schema_field == std::nullopt) { + return InvalidArgument("Cannot find source column for sort field: {}", field); + } + + const auto& source_type = schema_field.value().get().type(); + + if (!source_type->is_primitive()) { + return InvalidArgument("Cannot sort by non-primitive source field: {}", + *source_type); + } + + ICEBERG_RETURN_UNEXPECTED(field.transform()->ResultType(source_type)); + } + + return std::make_unique(sort_id, std::move(fields)); +} + +Result> SortOrder::Make(int32_t sort_id, + std::vector fields) { + if (!fields.empty() && sort_id == kUnsortedOrderId) [[unlikely]] { + return InvalidArgument("{} is reserved for unsorted sort order", kUnsortedOrderId); + } + + if (fields.empty() && sort_id != kUnsortedOrderId) [[unlikely]] { + return InvalidArgument("Sort order must have at least one sort field"); + } + + return std::make_unique(sort_id, std::move(fields)); +} + } // namespace iceberg diff --git a/src/iceberg/sort_order.h b/src/iceberg/sort_order.h index e245a74a2..4e4beb38e 100644 --- a/src/iceberg/sort_order.h +++ b/src/iceberg/sort_order.h @@ -20,11 +20,13 @@ #pragma once #include +#include #include #include #include "iceberg/iceberg_export.h" #include "iceberg/sort_field.h" +#include "iceberg/type_fwd.h" #include "iceberg/util/formattable.h" namespace iceberg { @@ -36,6 +38,7 @@ namespace iceberg { /// applied to the data. class ICEBERG_EXPORT SortOrder : public util::Formattable { public: + static constexpr int32_t kUnsortedOrderId = 0; static constexpr int32_t kInitialSortOrderId = 1; SortOrder(int32_t order_id, std::vector fields); @@ -69,6 +72,28 @@ class ICEBERG_EXPORT SortOrder : public util::Formattable { return lhs.Equals(rhs); } + /// \brief Checks whether the sort order is bound to the given schema. + /// \param schema The schema to check against. + /// \return true if the sort order is valid for the given schema. + bool IsBoundToSchema(const Schema& schema) const; + + /// \brief Create a SortOrder. + /// \param schema The schema to bind the sort order to. + /// \param sort_id The sort order id. + /// \param fields The sort fields. + /// \return A Result containing the SortOrder or an error. + static Result> Make(const Schema& schema, int32_t sort_id, + std::vector fields); + + /// \brief Create a SortOrder without binding to a schema. + /// \param sort_id The sort order id. + /// \param fields The sort fields. + /// \return A Result containing the SortOrder or an error. + /// \note This method does not check whether the sort fields are valid for any schema. + /// Use IsBoundToSchema to check if the sort order is valid for a given schema. + static Result> Make(int32_t sort_id, + std::vector fields); + private: /// \brief Compare two sort orders for equality. bool Equals(const SortOrder& other) const; diff --git a/src/iceberg/test/schema_field_test.cc b/src/iceberg/test/schema_field_test.cc index bc0dbbdf3..1078aadf2 100644 --- a/src/iceberg/test/schema_field_test.cc +++ b/src/iceberg/test/schema_field_test.cc @@ -63,7 +63,7 @@ TEST(SchemaFieldTest, Equality) { iceberg::SchemaField field1(1, "foo", iceberg::int32(), false); iceberg::SchemaField field2(2, "foo", iceberg::int32(), false); iceberg::SchemaField field3(1, "bar", iceberg::int32(), false); - iceberg::SchemaField field4(1, "foo", std::make_shared(), false); + iceberg::SchemaField field4(1, "foo", iceberg::int64(), false); iceberg::SchemaField field5(1, "foo", iceberg::int32(), true); iceberg::SchemaField field6(1, "foo", iceberg::int32(), false); diff --git a/src/iceberg/test/schema_test.cc b/src/iceberg/test/schema_test.cc index d99e9b3c3..89a8d54b5 100644 --- a/src/iceberg/test/schema_test.cc +++ b/src/iceberg/test/schema_test.cc @@ -25,7 +25,6 @@ #include #include -#include "gtest/gtest.h" #include "iceberg/result.h" #include "iceberg/schema_field.h" #include "iceberg/test/matchers.h" diff --git a/src/iceberg/test/sort_order_test.cc b/src/iceberg/test/sort_order_test.cc index bb407afae..54fe1dab7 100644 --- a/src/iceberg/test/sort_order_test.cc +++ b/src/iceberg/test/sort_order_test.cc @@ -26,11 +26,40 @@ #include "iceberg/schema.h" #include "iceberg/sort_field.h" +#include "iceberg/test/matchers.h" #include "iceberg/transform.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep namespace iceberg { +class SortOrderMakeTest : public ::testing::Test { + protected: + void SetUp() override { + field1_ = std::make_unique(1, "x", int32(), true); + field2_ = std::make_unique(2, "y", string(), true); + field3_ = std::make_unique(3, "time", timestamp(), true); + + schema_ = std::make_unique( + std::vector{*field1_, *field2_, *field3_}, 1); + + sort_field1_ = std::make_unique( + 1, Transform::Identity(), SortDirection::kAscending, NullOrder::kFirst); + sort_field2_ = std::make_unique( + 2, Transform::Bucket(10), SortDirection::kDescending, NullOrder::kLast); + sort_field3_ = std::make_unique( + 3, Transform::Day(), SortDirection::kAscending, NullOrder::kFirst); + } + + std::unique_ptr schema_; + std::unique_ptr field1_; + std::unique_ptr field2_; + std::unique_ptr field3_; + + std::unique_ptr sort_field1_; + std::unique_ptr sort_field2_; + std::unique_ptr sort_field3_; +}; + TEST(SortOrderTest, Basics) { { SchemaField field1(5, "ts", iceberg::timestamp(), true); @@ -148,4 +177,81 @@ TEST(SortOrderTest, Satisfies) { EXPECT_FALSE(sort_order2.Satisfies(sort_order4)); } +TEST_F(SortOrderMakeTest, MakeValidSortOrder) { + ICEBERG_UNWRAP_OR_FAIL( + auto sort_order, + SortOrder::Make(*schema_, 1, std::vector{*sort_field1_, *sort_field2_})); + ASSERT_NE(sort_order, nullptr); + + EXPECT_TRUE(sort_order->is_sorted()); + ASSERT_EQ(sort_order->fields().size(), 2); + EXPECT_EQ(sort_order->fields()[0], *sort_field1_); + EXPECT_EQ(sort_order->fields()[1], *sort_field2_); +} + +TEST_F(SortOrderMakeTest, MakeInvalidSortOrderEmptyFields) { + auto sort_order = SortOrder::Make(*schema_, 1, std::vector{}); + EXPECT_THAT(sort_order, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(sort_order, + HasErrorMessage("Sort order must have at least one sort field")); +} + +TEST_F(SortOrderMakeTest, MakeInvalidSortOrderUnsortedId) { + auto sort_order = SortOrder::Make(*schema_, SortOrder::kUnsortedOrderId, + std::vector{*sort_field1_}); + EXPECT_THAT(sort_order, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(sort_order, + HasErrorMessage(std::format("{} is reserved for unsorted sort order", + SortOrder::kUnsortedOrderId))); +} + +TEST_F(SortOrderMakeTest, MakeValidUnsortedSortOrder) { + ICEBERG_UNWRAP_OR_FAIL(auto sort_order, SortOrder::Make(SortOrder::kUnsortedOrderId, + std::vector{})); + ASSERT_NE(sort_order, nullptr); + + EXPECT_TRUE(sort_order->is_unsorted()); + EXPECT_EQ(sort_order->fields().size(), 0); +} + +TEST_F(SortOrderMakeTest, MakeInvalidSortOrderNonPrimitiveField) { + auto struct_field = std::make_unique( + 4, "struct_field", + std::make_shared(std::vector{ + SchemaField::MakeRequired(41, "inner_field", iceberg::int32()), + }), + true); + + Schema schema_with_struct( + std::vector{*field1_, *field2_, *field3_, *struct_field}, 1); + + SortField sort_field_invalid(4, Transform::Identity(), SortDirection::kAscending, + NullOrder::kFirst); + + auto sort_order = SortOrder::Make( + schema_with_struct, 1, std::vector{*sort_field1_, sort_field_invalid}); + EXPECT_THAT(sort_order, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(sort_order, HasErrorMessage("Cannot sort by non-primitive source field")); +} + +TEST_F(SortOrderMakeTest, MakeInvalidSortOrderFieldNotInSchema) { + SortField sort_field_invalid(999, Transform::Identity(), SortDirection::kAscending, + NullOrder::kFirst); + + auto sort_order = SortOrder::Make( + *schema_, 1, std::vector{*sort_field1_, sort_field_invalid}); + EXPECT_THAT(sort_order, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(sort_order, HasErrorMessage("Cannot find source column for sort field:")); +} + +TEST_F(SortOrderMakeTest, MakeUnboundSortOrder) { + SortField sort_field_invalid(999, Transform::Identity(), SortDirection::kAscending, + NullOrder::kFirst); + + auto sort_order = + SortOrder::Make(1, std::vector{*sort_field1_, sort_field_invalid}); + ASSERT_THAT(sort_order, IsOk()); + ASSERT_EQ(sort_order.value()->IsBoundToSchema(*schema_), false); +} + } // namespace iceberg diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc index 4724cc18e..a569cd451 100644 --- a/src/iceberg/transform.cc +++ b/src/iceberg/transform.cc @@ -23,6 +23,7 @@ #include #include +#include "iceberg/result.h" #include "iceberg/transform_function.h" #include "iceberg/type.h" @@ -125,6 +126,84 @@ Result> Transform::Bind( } } +Result> Transform::ResultType( + const std::shared_ptr& source_type) const { + switch (transform_type_) { + case TransformType::kIdentity: + if (!source_type->is_primitive()) [[unlikely]] { + return InvalidArgument("{} is not a valid input type of identity transform", + source_type->ToString()); + } + return source_type; + case TransformType::kVoid: + return source_type; + case TransformType::kUnknown: + return string(); + case TransformType::kBucket: + switch (source_type->type_id()) { + case TypeId::kInt: + case TypeId::kLong: + case TypeId::kDecimal: + case TypeId::kDate: + case TypeId::kTime: + case TypeId::kTimestamp: + case TypeId::kTimestampTz: + case TypeId::kString: + case TypeId::kUuid: + case TypeId::kFixed: + case TypeId::kBinary: + return int32(); + default: + return InvalidArgument("{} is not a valid input type of bucket transform", + source_type->ToString()); + } + case TransformType::kTruncate: + switch (source_type->type_id()) { + case TypeId::kInt: + case TypeId::kLong: + case TypeId::kString: + case TypeId::kBinary: + case TypeId::kDecimal: + return source_type; + default: + return InvalidArgument("{} is not a valid input type of truncate transform", + source_type->ToString()); + } + case TransformType::kYear: + case TransformType::kMonth: + switch (source_type->type_id()) { + case TypeId::kDate: + case TypeId::kTimestamp: + case TypeId::kTimestampTz: + return int32(); + default: + return InvalidArgument("{} is not a valid input type of {} transform", + source_type->ToString(), this->ToString()); + } + case TransformType::kDay: + switch (source_type->type_id()) { + case TypeId::kDate: + case TypeId::kTimestamp: + case TypeId::kTimestampTz: + return date(); + default: + return InvalidArgument("{} is not a valid input type of day transform", + source_type->ToString()); + } + case TransformType::kHour: + switch (source_type->type_id()) { + case TypeId::kTimestamp: + case TypeId::kTimestampTz: + return int32(); + default: + return InvalidArgument("{} is not a valid input type of hour transform", + source_type->ToString()); + } + default: + std::unreachable(); + } +} + bool Transform::PreservesOrder() const { switch (transform_type_) { case TransformType::kUnknown: diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h index 21483e0c4..4c0fb1923 100644 --- a/src/iceberg/transform.h +++ b/src/iceberg/transform.h @@ -150,6 +150,10 @@ class ICEBERG_EXPORT Transform : public util::Formattable { Result> Bind( const std::shared_ptr& source_type) const; + /// \brief Returns the Type produced by this transform given a source type. + Result> ResultType( + const std::shared_ptr& source_type) const; + /// \brief Whether the transform preserves the order of values (is monotonic). bool PreservesOrder() const; From b167aed3ad9cb12f9869fc09a699f6e513090196 Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Sat, 8 Nov 2025 15:23:04 +0800 Subject: [PATCH 2/5] fix: review comments --- src/iceberg/sort_order.cc | 24 ++++------ src/iceberg/sort_order.h | 8 ++-- src/iceberg/test/sort_order_test.cc | 7 ++- src/iceberg/test/transform_test.cc | 73 +++++++++++++++++++++++++++++ src/iceberg/transform.cc | 51 +++++++++----------- src/iceberg/transform.h | 7 +-- 6 files changed, 116 insertions(+), 54 deletions(-) diff --git a/src/iceberg/sort_order.cc b/src/iceberg/sort_order.cc index 2c1354ae3..8f0890354 100644 --- a/src/iceberg/sort_order.cc +++ b/src/iceberg/sort_order.cc @@ -86,24 +86,21 @@ bool SortOrder::Equals(const SortOrder& other) const { return order_id_ == other.order_id_ && fields_ == other.fields_; } -bool SortOrder::IsBoundToSchema(const Schema& schema) const { +Status SortOrder::Validate(const Schema& schema) const { for (const auto& field : fields_) { auto schema_field = schema.FindFieldById(field.source_id()); if (!schema_field.has_value() || schema_field.value() == std::nullopt) { - return false; + return InvalidArgument("Cannot find schema field for sort field: {}", field); } const auto& source_type = schema_field.value().value().get().type(); - if (!source_type->is_primitive()) { - return false; - } - auto result = field.transform()->ResultType(source_type); - if (!result) { - return false; + if (!field.transform()->CanTransform(*source_type)) { + return InvalidArgument("Cannot transform schema field type {} with transform {}", + source_type->ToString(), field.transform()->ToString()); } } - return true; + return {}; } Result> SortOrder::Make(const Schema& schema, int32_t sort_id, @@ -123,13 +120,10 @@ Result> SortOrder::Make(const Schema& schema, int32_t } const auto& source_type = schema_field.value().get().type(); - - if (!source_type->is_primitive()) { - return InvalidArgument("Cannot sort by non-primitive source field: {}", - *source_type); + if (field.transform()->CanTransform(*source_type) == false) { + return InvalidArgument("Cannot transform schema field type {} with transform {}", + source_type->ToString(), field.transform()->ToString()); } - - ICEBERG_RETURN_UNEXPECTED(field.transform()->ResultType(source_type)); } return std::make_unique(sort_id, std::move(fields)); diff --git a/src/iceberg/sort_order.h b/src/iceberg/sort_order.h index 4e4beb38e..b442c3e58 100644 --- a/src/iceberg/sort_order.h +++ b/src/iceberg/sort_order.h @@ -72,10 +72,10 @@ class ICEBERG_EXPORT SortOrder : public util::Formattable { return lhs.Equals(rhs); } - /// \brief Checks whether the sort order is bound to the given schema. - /// \param schema The schema to check against. - /// \return true if the sort order is valid for the given schema. - bool IsBoundToSchema(const Schema& schema) const; + /// \brief Validates the sort order against a schema. + /// \param schema The schema to validate against. + /// \return Error status if the sort order is not bound to the schema. + Status Validate(const Schema& schema) const; /// \brief Create a SortOrder. /// \param schema The schema to bind the sort order to. diff --git a/src/iceberg/test/sort_order_test.cc b/src/iceberg/test/sort_order_test.cc index 54fe1dab7..bbeb6a780 100644 --- a/src/iceberg/test/sort_order_test.cc +++ b/src/iceberg/test/sort_order_test.cc @@ -231,7 +231,7 @@ TEST_F(SortOrderMakeTest, MakeInvalidSortOrderNonPrimitiveField) { auto sort_order = SortOrder::Make( schema_with_struct, 1, std::vector{*sort_field1_, sort_field_invalid}); EXPECT_THAT(sort_order, IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT(sort_order, HasErrorMessage("Cannot sort by non-primitive source field")); + EXPECT_THAT(sort_order, HasErrorMessage("Cannot transform schema field type")); } TEST_F(SortOrderMakeTest, MakeInvalidSortOrderFieldNotInSchema) { @@ -251,7 +251,10 @@ TEST_F(SortOrderMakeTest, MakeUnboundSortOrder) { auto sort_order = SortOrder::Make(1, std::vector{*sort_field1_, sort_field_invalid}); ASSERT_THAT(sort_order, IsOk()); - ASSERT_EQ(sort_order.value()->IsBoundToSchema(*schema_), false); + auto validate_status = sort_order.value()->Validate(*schema_); + EXPECT_THAT(validate_status, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(validate_status, + HasErrorMessage("Cannot find source column for sort field:")); } } // namespace iceberg diff --git a/src/iceberg/test/transform_test.cc b/src/iceberg/test/transform_test.cc index e4352af8a..529297b27 100644 --- a/src/iceberg/test/transform_test.cc +++ b/src/iceberg/test/transform_test.cc @@ -27,6 +27,7 @@ #include #include "iceberg/expression/literal.h" +#include "iceberg/schema_field.h" #include "iceberg/test/matchers.h" #include "iceberg/test/temporal_test_helper.h" #include "iceberg/type.h" @@ -841,4 +842,76 @@ TEST(TransformSatisfiesOrderOfTest, SatisfiesOrderOf) { } } +TEST(TransformCanTransformTest, CanTransform) { + struct Case { + std::string transform_str; + std::shared_ptr source_type; + bool expected; + }; + + const std::vector cases = { + // Identity can transform all primitive types + {.transform_str = "identity", .source_type = int32(), .expected = true}, + {.transform_str = "identity", .source_type = string(), .expected = true}, + {.transform_str = "identity", .source_type = boolean(), .expected = true}, + {.transform_str = "identity", + .source_type = list(SchemaField(123, "element", int32(), false)), + .expected = false}, + + // Void can transform any type + {.transform_str = "void", .source_type = iceberg::int32(), .expected = true}, + {.transform_str = "void", + .source_type = iceberg::map(SchemaField(123, "key", iceberg::string(), false), + SchemaField(124, "value", iceberg::int32(), true)), + .expected = true}, + + // Bucket can transform specific types + {.transform_str = "bucket[16]", .source_type = iceberg::int32(), .expected = true}, + {.transform_str = "bucket[16]", .source_type = iceberg::string(), .expected = true}, + {.transform_str = "bucket[16]", + .source_type = iceberg::float32(), + .expected = false}, + + // Truncate can transform specific types + {.transform_str = "truncate[32]", + .source_type = iceberg::int32(), + .expected = true}, + {.transform_str = "truncate[32]", + .source_type = iceberg::string(), + .expected = true}, + {.transform_str = "truncate[32]", + .source_type = iceberg::boolean(), + .expected = false}, + + // Year can transform date and timestamp types + {.transform_str = "year", .source_type = iceberg::date(), .expected = true}, + {.transform_str = "year", .source_type = iceberg::timestamp(), .expected = true}, + {.transform_str = "year", .source_type = iceberg::string(), .expected = false}, + + // Month can transform date and timestamp types + {.transform_str = "month", .source_type = iceberg::date(), .expected = true}, + {.transform_str = "month", .source_type = iceberg::timestamp(), .expected = true}, + {.transform_str = "month", .source_type = iceberg::binary(), .expected = false}, + + // Day can transform date and timestamp types + {.transform_str = "day", .source_type = iceberg::date(), .expected = true}, + {.transform_str = "day", .source_type = iceberg::timestamp(), .expected = true}, + {.transform_str = "day", .source_type = iceberg::uuid(), .expected = false}, + + // Hour can transform timestamp types + {.transform_str = "hour", .source_type = iceberg::timestamp(), .expected = true}, + {.transform_str = "hour", .source_type = iceberg::timestamp_tz(), .expected = true}, + {.transform_str = "hour", .source_type = iceberg::int32(), .expected = false}, + }; + + for (const auto& c : cases) { + auto transform = TransformFromString(c.transform_str); + ASSERT_TRUE(transform.has_value()) << "Failed to parse: " << c.transform_str; + + EXPECT_EQ(transform.value()->CanTransform(*c.source_type), c.expected) + << "Unexpected result for transform: " << c.transform_str + << " and source type: " << c.source_type->ToString(); + } +} + } // namespace iceberg diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc index a569cd451..1f1477284 100644 --- a/src/iceberg/transform.cc +++ b/src/iceberg/transform.cc @@ -126,21 +126,18 @@ Result> Transform::Bind( } } -Result> Transform::ResultType( - const std::shared_ptr& source_type) const { +bool Transform::CanTransform(const Type& source_type) const { switch (transform_type_) { case TransformType::kIdentity: - if (!source_type->is_primitive()) [[unlikely]] { - return InvalidArgument("{} is not a valid input type of identity transform", - source_type->ToString()); + if (!source_type.is_primitive()) [[unlikely]] { + return false; } - return source_type; + return true; case TransformType::kVoid: - return source_type; case TransformType::kUnknown: - return string(); + return true; case TransformType::kBucket: - switch (source_type->type_id()) { + switch (source_type.type_id()) { case TypeId::kInt: case TypeId::kLong: case TypeId::kDecimal: @@ -152,56 +149,50 @@ Result> Transform::ResultType( case TypeId::kUuid: case TypeId::kFixed: case TypeId::kBinary: - return int32(); + return true; default: - return InvalidArgument("{} is not a valid input type of bucket transform", - source_type->ToString()); + return false; } case TransformType::kTruncate: - switch (source_type->type_id()) { + switch (source_type.type_id()) { case TypeId::kInt: case TypeId::kLong: case TypeId::kString: case TypeId::kBinary: case TypeId::kDecimal: - return source_type; + return true; default: - return InvalidArgument("{} is not a valid input type of truncate transform", - source_type->ToString()); + return false; } case TransformType::kYear: case TransformType::kMonth: - switch (source_type->type_id()) { + switch (source_type.type_id()) { case TypeId::kDate: case TypeId::kTimestamp: case TypeId::kTimestampTz: - return int32(); + return true; default: - return InvalidArgument("{} is not a valid input type of {} transform", - source_type->ToString(), this->ToString()); + return false; } case TransformType::kDay: - switch (source_type->type_id()) { + switch (source_type.type_id()) { case TypeId::kDate: case TypeId::kTimestamp: case TypeId::kTimestampTz: - return date(); + return true; default: - return InvalidArgument("{} is not a valid input type of day transform", - source_type->ToString()); + return false; } case TransformType::kHour: - switch (source_type->type_id()) { + switch (source_type.type_id()) { case TypeId::kTimestamp: case TypeId::kTimestampTz: - return int32(); + return true; default: - return InvalidArgument("{} is not a valid input type of hour transform", - source_type->ToString()); + return false; } - default: - std::unreachable(); } + std::unreachable(); } bool Transform::PreservesOrder() const { diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h index 4c0fb1923..4f608c467 100644 --- a/src/iceberg/transform.h +++ b/src/iceberg/transform.h @@ -150,9 +150,10 @@ class ICEBERG_EXPORT Transform : public util::Formattable { Result> Bind( const std::shared_ptr& source_type) const; - /// \brief Returns the Type produced by this transform given a source type. - Result> ResultType( - const std::shared_ptr& source_type) const; + /// \brief Checks whether this function can be applied to the given Type. + /// \param source_type The source type to check. + /// \return true if this transform can be applied to the type, false otherwise + bool CanTransform(const Type& source_type) const; /// \brief Whether the transform preserves the order of values (is monotonic). bool PreservesOrder() const; From 03eb40b95823c1d59805184be2e584b656a928c4 Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Sat, 8 Nov 2025 15:29:01 +0800 Subject: [PATCH 3/5] fix: remove a unnecessary header --- src/iceberg/transform.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc index 1f1477284..2e39cb06d 100644 --- a/src/iceberg/transform.cc +++ b/src/iceberg/transform.cc @@ -23,7 +23,6 @@ #include #include -#include "iceberg/result.h" #include "iceberg/transform_function.h" #include "iceberg/type.h" From 5bf6c2b462a6a845b3ccbd00944aadc4d3a72488 Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Sat, 8 Nov 2025 15:54:07 +0800 Subject: [PATCH 4/5] fix: wrong error message --- src/iceberg/sort_order.cc | 2 +- src/iceberg/test/sort_order_test.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/iceberg/sort_order.cc b/src/iceberg/sort_order.cc index 8f0890354..91a84aade 100644 --- a/src/iceberg/sort_order.cc +++ b/src/iceberg/sort_order.cc @@ -116,7 +116,7 @@ Result> SortOrder::Make(const Schema& schema, int32_t for (const auto& field : fields) { ICEBERG_ASSIGN_OR_RAISE(auto schema_field, schema.FindFieldById(field.source_id())); if (schema_field == std::nullopt) { - return InvalidArgument("Cannot find source column for sort field: {}", field); + return InvalidArgument("Cannot find schema field for sort field: {}", field); } const auto& source_type = schema_field.value().get().type(); diff --git a/src/iceberg/test/sort_order_test.cc b/src/iceberg/test/sort_order_test.cc index bbeb6a780..f585e25b5 100644 --- a/src/iceberg/test/sort_order_test.cc +++ b/src/iceberg/test/sort_order_test.cc @@ -241,7 +241,7 @@ TEST_F(SortOrderMakeTest, MakeInvalidSortOrderFieldNotInSchema) { auto sort_order = SortOrder::Make( *schema_, 1, std::vector{*sort_field1_, sort_field_invalid}); EXPECT_THAT(sort_order, IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT(sort_order, HasErrorMessage("Cannot find source column for sort field:")); + EXPECT_THAT(sort_order, HasErrorMessage("Cannot find schema field for sort field")); } TEST_F(SortOrderMakeTest, MakeUnboundSortOrder) { @@ -254,7 +254,7 @@ TEST_F(SortOrderMakeTest, MakeUnboundSortOrder) { auto validate_status = sort_order.value()->Validate(*schema_); EXPECT_THAT(validate_status, IsError(ErrorKind::kInvalidArgument)); EXPECT_THAT(validate_status, - HasErrorMessage("Cannot find source column for sort field:")); + HasErrorMessage("Cannot find schema field for sort field")); } } // namespace iceberg From ce5774bc388aa3101e1b09fdbcd3e1bc5e9363d2 Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Sat, 8 Nov 2025 20:43:31 +0800 Subject: [PATCH 5/5] fix: make error message consistent with Java impl --- src/iceberg/sort_order.cc | 8 ++++---- src/iceberg/sort_order.h | 2 +- src/iceberg/test/sort_order_test.cc | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/iceberg/sort_order.cc b/src/iceberg/sort_order.cc index 91a84aade..9e42b15dd 100644 --- a/src/iceberg/sort_order.cc +++ b/src/iceberg/sort_order.cc @@ -90,13 +90,13 @@ Status SortOrder::Validate(const Schema& schema) const { for (const auto& field : fields_) { auto schema_field = schema.FindFieldById(field.source_id()); if (!schema_field.has_value() || schema_field.value() == std::nullopt) { - return InvalidArgument("Cannot find schema field for sort field: {}", field); + return InvalidArgument("Cannot find source column for sort field: {}", field); } const auto& source_type = schema_field.value().value().get().type(); if (!field.transform()->CanTransform(*source_type)) { - return InvalidArgument("Cannot transform schema field type {} with transform {}", + return InvalidArgument("Invalid source type {} for transform {}", source_type->ToString(), field.transform()->ToString()); } } @@ -116,12 +116,12 @@ Result> SortOrder::Make(const Schema& schema, int32_t for (const auto& field : fields) { ICEBERG_ASSIGN_OR_RAISE(auto schema_field, schema.FindFieldById(field.source_id())); if (schema_field == std::nullopt) { - return InvalidArgument("Cannot find schema field for sort field: {}", field); + return InvalidArgument("Cannot find source column for sort field: {}", field); } const auto& source_type = schema_field.value().get().type(); if (field.transform()->CanTransform(*source_type) == false) { - return InvalidArgument("Cannot transform schema field type {} with transform {}", + return InvalidArgument("Invalid source type {} for transform {}", source_type->ToString(), field.transform()->ToString()); } } diff --git a/src/iceberg/sort_order.h b/src/iceberg/sort_order.h index b442c3e58..4a0b11a92 100644 --- a/src/iceberg/sort_order.h +++ b/src/iceberg/sort_order.h @@ -74,7 +74,7 @@ class ICEBERG_EXPORT SortOrder : public util::Formattable { /// \brief Validates the sort order against a schema. /// \param schema The schema to validate against. - /// \return Error status if the sort order is not bound to the schema. + /// \return Error status if the sort order has any invalid transform. Status Validate(const Schema& schema) const; /// \brief Create a SortOrder. diff --git a/src/iceberg/test/sort_order_test.cc b/src/iceberg/test/sort_order_test.cc index f585e25b5..86bd52567 100644 --- a/src/iceberg/test/sort_order_test.cc +++ b/src/iceberg/test/sort_order_test.cc @@ -231,7 +231,7 @@ TEST_F(SortOrderMakeTest, MakeInvalidSortOrderNonPrimitiveField) { auto sort_order = SortOrder::Make( schema_with_struct, 1, std::vector{*sort_field1_, sort_field_invalid}); EXPECT_THAT(sort_order, IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT(sort_order, HasErrorMessage("Cannot transform schema field type")); + EXPECT_THAT(sort_order, HasErrorMessage("Invalid source type")); } TEST_F(SortOrderMakeTest, MakeInvalidSortOrderFieldNotInSchema) { @@ -241,7 +241,7 @@ TEST_F(SortOrderMakeTest, MakeInvalidSortOrderFieldNotInSchema) { auto sort_order = SortOrder::Make( *schema_, 1, std::vector{*sort_field1_, sort_field_invalid}); EXPECT_THAT(sort_order, IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT(sort_order, HasErrorMessage("Cannot find schema field for sort field")); + EXPECT_THAT(sort_order, HasErrorMessage("Cannot find source column for sort field")); } TEST_F(SortOrderMakeTest, MakeUnboundSortOrder) { @@ -254,7 +254,7 @@ TEST_F(SortOrderMakeTest, MakeUnboundSortOrder) { auto validate_status = sort_order.value()->Validate(*schema_); EXPECT_THAT(validate_status, IsError(ErrorKind::kInvalidArgument)); EXPECT_THAT(validate_status, - HasErrorMessage("Cannot find schema field for sort field")); + HasErrorMessage("Cannot find source column for sort field")); } } // namespace iceberg