|
25 | 25 | from pyiceberg.schema import ( |
26 | 26 | Accessor, |
27 | 27 | Schema, |
| 28 | + _check_schema_compatible, |
28 | 29 | build_position_accessors, |
29 | 30 | index_by_id, |
30 | 31 | index_by_name, |
@@ -1687,3 +1688,107 @@ def test_arrow_schema() -> None: |
1687 | 1688 | ) |
1688 | 1689 |
|
1689 | 1690 | assert base_schema.as_arrow() == expected_schema |
| 1691 | + |
| 1692 | + |
| 1693 | +def test_check_schema_compatible_optional_map_field_missing() -> None: |
| 1694 | + """Test that optional map field missing from provided schema is compatible (issue #2684).""" |
| 1695 | + requested_schema = Schema( |
| 1696 | + NestedField(field_id=1, name="id", field_type=LongType(), required=True), |
| 1697 | + NestedField( |
| 1698 | + field_id=2, |
| 1699 | + name="data", |
| 1700 | + field_type=MapType(key_id=3, key_type=StringType(), value_id=4, value_type=StringType()), |
| 1701 | + required=False, # Optional map field |
| 1702 | + ), |
| 1703 | + ) |
| 1704 | + # Provided schema is missing the optional map field |
| 1705 | + provided_schema = Schema( |
| 1706 | + NestedField(field_id=1, name="id", field_type=LongType(), required=True), |
| 1707 | + ) |
| 1708 | + # Should not raise - optional field can be missing |
| 1709 | + _check_schema_compatible(requested_schema, provided_schema) |
| 1710 | + |
| 1711 | + |
| 1712 | +def test_check_schema_compatible_required_map_field_missing() -> None: |
| 1713 | + """Test that required map field missing from provided schema raises error.""" |
| 1714 | + requested_schema = Schema( |
| 1715 | + NestedField(field_id=1, name="id", field_type=LongType(), required=True), |
| 1716 | + NestedField( |
| 1717 | + field_id=2, |
| 1718 | + name="data", |
| 1719 | + field_type=MapType(key_id=3, key_type=StringType(), value_id=4, value_type=StringType()), |
| 1720 | + required=True, # Required map field |
| 1721 | + ), |
| 1722 | + ) |
| 1723 | + # Provided schema is missing the required map field |
| 1724 | + provided_schema = Schema( |
| 1725 | + NestedField(field_id=1, name="id", field_type=LongType(), required=True), |
| 1726 | + ) |
| 1727 | + # Should raise - required field cannot be missing |
| 1728 | + with pytest.raises(ValueError, match="Mismatch in fields"): |
| 1729 | + _check_schema_compatible(requested_schema, provided_schema) |
| 1730 | + |
| 1731 | + |
| 1732 | +def test_check_schema_compatible_optional_list_field_missing() -> None: |
| 1733 | + """Test that optional list field missing from provided schema is compatible.""" |
| 1734 | + requested_schema = Schema( |
| 1735 | + NestedField(field_id=1, name="id", field_type=LongType(), required=True), |
| 1736 | + NestedField( |
| 1737 | + field_id=2, |
| 1738 | + name="items", |
| 1739 | + field_type=ListType(element_id=3, element_type=StringType(), element_required=True), |
| 1740 | + required=False, # Optional list field |
| 1741 | + ), |
| 1742 | + ) |
| 1743 | + # Provided schema is missing the optional list field |
| 1744 | + provided_schema = Schema( |
| 1745 | + NestedField(field_id=1, name="id", field_type=LongType(), required=True), |
| 1746 | + ) |
| 1747 | + # Should not raise - optional field can be missing |
| 1748 | + _check_schema_compatible(requested_schema, provided_schema) |
| 1749 | + |
| 1750 | + |
| 1751 | +def test_check_schema_compatible_optional_struct_field_missing() -> None: |
| 1752 | + """Test that optional struct field missing from provided schema is compatible.""" |
| 1753 | + requested_schema = Schema( |
| 1754 | + NestedField(field_id=1, name="id", field_type=LongType(), required=True), |
| 1755 | + NestedField( |
| 1756 | + field_id=2, |
| 1757 | + name="details", |
| 1758 | + field_type=StructType( |
| 1759 | + NestedField(field_id=3, name="name", field_type=StringType(), required=True), |
| 1760 | + NestedField(field_id=4, name="count", field_type=IntegerType(), required=True), |
| 1761 | + ), |
| 1762 | + required=False, # Optional struct field |
| 1763 | + ), |
| 1764 | + ) |
| 1765 | + # Provided schema is missing the optional struct field |
| 1766 | + provided_schema = Schema( |
| 1767 | + NestedField(field_id=1, name="id", field_type=LongType(), required=True), |
| 1768 | + ) |
| 1769 | + # Should not raise - optional field can be missing |
| 1770 | + _check_schema_compatible(requested_schema, provided_schema) |
| 1771 | + |
| 1772 | + |
| 1773 | +def test_check_schema_compatible_optional_map_field_present() -> None: |
| 1774 | + """Test that optional map field present in provided schema is compatible.""" |
| 1775 | + requested_schema = Schema( |
| 1776 | + NestedField(field_id=1, name="id", field_type=LongType(), required=True), |
| 1777 | + NestedField( |
| 1778 | + field_id=2, |
| 1779 | + name="data", |
| 1780 | + field_type=MapType(key_id=3, key_type=StringType(), value_id=4, value_type=StringType()), |
| 1781 | + required=False, |
| 1782 | + ), |
| 1783 | + ) |
| 1784 | + provided_schema = Schema( |
| 1785 | + NestedField(field_id=1, name="id", field_type=LongType(), required=True), |
| 1786 | + NestedField( |
| 1787 | + field_id=2, |
| 1788 | + name="data", |
| 1789 | + field_type=MapType(key_id=3, key_type=StringType(), value_id=4, value_type=StringType()), |
| 1790 | + required=False, |
| 1791 | + ), |
| 1792 | + ) |
| 1793 | + # Should not raise - schemas match |
| 1794 | + _check_schema_compatible(requested_schema, provided_schema) |
0 commit comments