diff --git a/test/collection/test_batch.py b/test/collection/test_batch.py index 420452a26..0a2cda954 100644 --- a/test/collection/test_batch.py +++ b/test/collection/test_batch.py @@ -1,6 +1,10 @@ import uuid +import pytest + +from weaviate.collections.batch.grpc_batch import _validate_props from weaviate.collections.classes.batch import MAX_STORED_RESULTS, BatchObjectReturn +from weaviate.exceptions import WeaviateInsertInvalidPropertyError def test_batch_object_return_add() -> None: @@ -30,3 +34,22 @@ def test_batch_object_return_add() -> None: idx + len(rhs_uuids): v for idx, v in enumerate(lhs_uuids[len(rhs_uuids) : MAX_STORED_RESULTS] + rhs_uuids) } + + +def test_validate_props_raises_for_top_level_id() -> None: + with pytest.raises(WeaviateInsertInvalidPropertyError): + _validate_props({"id": "abc123"}) + + +def test_validate_props_allows_nested_id() -> None: + _validate_props({"id": "abc123"}, nested=True) + + +def test_validate_props_raises_for_top_level_vector() -> None: + with pytest.raises(WeaviateInsertInvalidPropertyError): + _validate_props({"vector": [0.1, 0.2]}) + + +def test_validate_props_raises_for_nested_vector() -> None: + with pytest.raises(WeaviateInsertInvalidPropertyError): + _validate_props({"vector": [0.1, 0.2]}, nested=True) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 7287c2ffd..b188a0bad 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -22,6 +22,7 @@ VectorDistances, ) from weaviate.collections.classes.config_vectors import _VectorConfigCreate +from weaviate.exceptions import WeaviateInsertInvalidPropertyError DEFAULTS = { "vectorConfig": { @@ -1380,7 +1381,16 @@ def make_expected_props() -> List[dict]: assert out["properties"] == make_expected_props() -@pytest.mark.parametrize("name", ["id", "vector"]) +def test_config_with_invalid_property_id(): + with pytest.raises(WeaviateInsertInvalidPropertyError): + _CollectionConfigCreate( + name="test", + description="test", + properties=[Property(name="id", data_type=DataType.TEXT)], + ) + + +@pytest.mark.parametrize("name", ["vector"]) def test_config_with_invalid_property(name: str): with pytest.raises(ValidationError): _CollectionConfigCreate( @@ -2682,3 +2692,15 @@ def test_config_with_vectors(vector_config: List[_VectorConfigCreate], expected: def test_object_ttl_config_to_dict(ttl_config: _ObjectTTLConfig, expected: dict) -> None: """Test that _ObjectTTLConfig.to_dict() properly converts timedelta to seconds.""" assert ttl_config.to_dict() == expected + + +def test_nested_property_with_id_name_is_allowed() -> None: + """A nested property named 'id' must not raise — only top-level 'id' is reserved.""" + prop = Property( + name="nested_property", + data_type=DataType.OBJECT, + nested_properties=[ + Property(name="id", data_type=DataType.TEXT), + ], + ) + assert prop.nestedProperties[0].name == "id" diff --git a/weaviate/collections/batch/grpc_batch.py b/weaviate/collections/batch/grpc_batch.py index 7384dcb49..ecf084232 100644 --- a/weaviate/collections/batch/grpc_batch.py +++ b/weaviate/collections/batch/grpc_batch.py @@ -213,9 +213,9 @@ def stream( return connection.grpc_batch_stream(requests=requests) def __translate_properties_from_python_to_grpc( - self, data: Dict[str, Any], refs: ReferenceInputs + self, data: Dict[str, Any], refs: ReferenceInputs, *, nested: bool = False ) -> batch_pb2.BatchObject.Properties: - _validate_props(data) + _validate_props(data, nested=nested) multi_target: List[batch_pb2.BatchObject.MultiTargetRefProps] = [] single_target: List[batch_pb2.BatchObject.SingleTargetRefProps] = [] @@ -252,7 +252,7 @@ def __translate_properties_from_python_to_grpc( for key, entry in data.items(): if isinstance(entry, dict): - parsed = self.__translate_properties_from_python_to_grpc(entry, {}) + parsed = self.__translate_properties_from_python_to_grpc(entry, {}, nested=True) object_properties.append( base_pb2.ObjectProperties( prop_name=key, @@ -286,7 +286,11 @@ def __translate_properties_from_python_to_grpc( empty_list_props=parsed.empty_list_props, ) for v in entry - if (parsed := self.__translate_properties_from_python_to_grpc(v, {})) + if ( + parsed := self.__translate_properties_from_python_to_grpc( + v, {}, nested=True + ) + ) ], prop_name=key, ) @@ -333,8 +337,10 @@ def __translate_properties_from_python_to_grpc( ) -def _validate_props(props: Dict[str, Any]) -> None: - if "id" in props or "vector" in props: +def _validate_props(props: Dict[str, Any], nested: bool = False) -> None: + if not nested and "id" in props: + raise WeaviateInsertInvalidPropertyError(props) + if "vector" in props: raise WeaviateInsertInvalidPropertyError(props) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index df79252b6..4bd95f814 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -74,7 +74,7 @@ _Vectors, _VectorsUpdate, ) -from weaviate.exceptions import WeaviateInvalidInputError +from weaviate.exceptions import WeaviateInvalidInputError, WeaviateInsertInvalidPropertyError from weaviate.str_enum import BaseEnum from weaviate.util import _capitalize_first_letter from weaviate.warnings import _Warnings @@ -2069,7 +2069,7 @@ class Property(_ConfigCreateModel): @field_validator("name") def _check_name(cls, v: str) -> str: - if v in ["id", "vector"]: + if v == "vector": raise ValueError(f"Property name '{v}' is reserved and cannot be used") return v @@ -2196,6 +2196,18 @@ class _CollectionConfigCreate(_ConfigCreateModel): def model_post_init(self, __context: Any) -> None: self.name = _capitalize_first_letter(self.name) + @field_validator("properties", mode="after") + @classmethod + def _check_top_level_property_names( + cls, v: Optional[Sequence["Property"]] + ) -> Optional[Sequence["Property"]]: + if v is None: + return v + for prop in v: + if prop.name == "id": + raise WeaviateInsertInvalidPropertyError({"name": prop.name}) + return v + @field_validator("vectorizerConfig", "vectorConfig", mode="after") @classmethod def validate_vector_names(