4747 TimestampType ,
4848 TimestamptzType ,
4949 TimeType ,
50+ UnknownType ,
5051 UUIDType ,
5152)
5253
@@ -165,6 +166,28 @@ def test_partition_spec_to_path() -> None:
165166 assert spec .partition_to_path (record , schema ) == "my%23str%25bucket=my%2Bstr/other+str%2Bbucket=%28+%29/my%21int%3Abucket=10"
166167
167168
169+ def test_partition_spec_to_path_dropped_source_id () -> None :
170+ schema = Schema (
171+ NestedField (field_id = 1 , name = "str" , field_type = StringType (), required = False ),
172+ NestedField (field_id = 2 , name = "other_str" , field_type = StringType (), required = False ),
173+ NestedField (field_id = 3 , name = "int" , field_type = IntegerType (), required = True ),
174+ )
175+
176+ spec = PartitionSpec (
177+ PartitionField (source_id = 1 , field_id = 1000 , transform = TruncateTransform (width = 19 ), name = "my#str%bucket" ),
178+ PartitionField (source_id = 2 , field_id = 1001 , transform = IdentityTransform (), name = "other str+bucket" ),
179+ # Point partition field to missing source id
180+ PartitionField (source_id = 4 , field_id = 1002 , transform = BucketTransform (num_buckets = 25 ), name = "my!int:bucket" ),
181+ spec_id = 3 ,
182+ )
183+
184+ record = Record ("my+str" , "( )" , 10 )
185+
186+ # Both partition field names and values should be URL encoded, with spaces mapping to plus signs, to match the Java
187+ # behaviour: https://github.com/apache/iceberg/blob/ca3db931b0f024f0412084751ac85dd4ef2da7e7/api/src/main/java/org/apache/iceberg/PartitionSpec.java#L198-L204
188+ assert spec .partition_to_path (record ,schema ) == "my%23str%25bucket=my%2Bstr/other+str%2Bbucket=%28+%29/my%21int%3Abucket=10"
189+
190+
168191def test_partition_type (table_schema_simple : Schema ) -> None :
169192 spec = PartitionSpec (
170193 PartitionField (source_id = 1 , field_id = 1000 , transform = TruncateTransform (width = 19 ), name = "str_truncate" ),
@@ -178,6 +201,19 @@ def test_partition_type(table_schema_simple: Schema) -> None:
178201 )
179202
180203
204+ def test_partition_type_missing_source_field (table_schema_simple : Schema ) -> None :
205+ spec = PartitionSpec (
206+ PartitionField (source_id = 1 , field_id = 1000 , transform = TruncateTransform (width = 19 ), name = "str_truncate" ),
207+ PartitionField (source_id = 10 , field_id = 1001 , transform = BucketTransform (num_buckets = 25 ), name = "int_bucket" ),
208+ spec_id = 3 ,
209+ )
210+
211+ assert spec .partition_type (table_schema_simple ) == StructType (
212+ NestedField (field_id = 1000 , name = "str_truncate" , field_type = StringType (), required = False ),
213+ NestedField (field_id = 1001 , name = "int_bucket" , field_type = UnknownType (), required = False ),
214+ )
215+
216+
181217@pytest .mark .parametrize (
182218 "source_type, value" ,
183219 [
0 commit comments