From b9187cba1b01e59b5c2be7bfa2fa133283a7c697 Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Wed, 13 May 2026 13:57:39 -0700 Subject: [PATCH 1/4] Adding embedding policy changes for EGS --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 1 + sdk/cosmos/azure-cosmos/README.md | 31 ++++++++ .../azure/cosmos/aio/_database.py | 30 ++++++-- .../azure-cosmos/azure/cosmos/database.py | 30 ++++++-- .../azure-cosmos/tests/test_vector_policy.py | 70 +++++++++++++++++++ .../tests/test_vector_policy_async.py | 69 ++++++++++++++++++ 6 files changed, 219 insertions(+), 12 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 187cd853cb27..c170cbe30ba3 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -3,6 +3,7 @@ ### 4.16.0b3 (Unreleased) #### Features Added +* Documented support for the optional `embeddingSource` field on entries in `vector_embedding_policy.vectorEmbeddings`, including the `sourcePaths`, `deploymentName`, `modelName`, `endpoint`, and `authType` (`ApiKey` or `Entra`) properties used to have the service generate vector embeddings from the specified item paths. #### Breaking Changes diff --git a/sdk/cosmos/azure-cosmos/README.md b/sdk/cosmos/azure-cosmos/README.md index a0f0d07c1d08..d3e2bbed5db3 100644 --- a/sdk/cosmos/azure-cosmos/README.md +++ b/sdk/cosmos/azure-cosmos/README.md @@ -733,6 +733,37 @@ vector_embedding_policy = { } ``` +A vector embedding may optionally include an `embeddingSource` describing how the embedding is generated by the +service. The source specifies the item paths whose values are embedded (`sourcePaths`), the embedding model +deployment (`deploymentName`, `modelName`), the embedding service `endpoint`, and the `authType` used to call +that endpoint (one of `ApiKey` or `Entra`). A vector embedding policy that includes an embedding source looks like +this: +```python +vector_embedding_policy = { + "vectorEmbeddings": [ + { + "path": "/embedding", + "dataType": "float32", + "dimensions": 1536, + "distanceFunction": "cosine", + "embeddingSource": { + "sourcePaths": [ + "/journal_title", + "/title", + "/toc_abstract", + "/abstract", + "/full_text" + ], + "deploymentName": "text-embedding-3-small", + "modelName": "text-embedding-3-small", + "endpoint": "", + "authType": "ApiKey" + } + } + ] +} +``` + Separately, vector indexes have been added to the already existing indexing_policy and only require two fields per index: the path to the relevant field to be used, and the type of index from the possible options - flat, quantizedFlat, or diskANN. A sample indexing policy with vector indexes would look like this: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_database.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_database.py index 37c13438ae75..8407cab5a445 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_database.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_database.py @@ -206,7 +206,10 @@ async def create_container( note that analytical storage can only be enabled on Synapse Link enabled accounts. :keyword dict[str, Any] vector_embedding_policy: The vector embedding policy for the container. Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying data type, and - is generated for a particular distance function. + is generated for a particular distance function. Each vector embedding may also include an optional + ``embeddingSource`` describing how the embedding is produced; the source object supports + ``sourcePaths`` (list of item paths whose values are embedded), ``deploymentName``, ``modelName``, + ``endpoint`` (embedding service endpoint), and ``authType`` (one of ``ApiKey`` or ``Entra``). :keyword dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to the container. :keyword dict[str, Any] full_text_policy: **provisional** The full text policy for the container. @@ -283,7 +286,10 @@ async def create_container( # pylint: disable=too-many-statements note that analytical storage can only be enabled on Synapse Link enabled accounts. :keyword dict[str, Any] vector_embedding_policy: The vector embedding policy for the container. Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying data type, and - is generated for a particular distance function. + is generated for a particular distance function. Each vector embedding may also include an optional + ``embeddingSource`` describing how the embedding is produced; the source object supports ``sourcePaths`` + (list of item paths whose values are embedded), ``deploymentName``, ``modelName``, ``endpoint`` + (embedding service endpoint), and ``authType`` (one of ``ApiKey`` or ``Entra``). :keyword dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to the container. :keyword dict[str, Any] full_text_policy: **provisional** The full text policy for the container. @@ -347,7 +353,10 @@ async def create_container( # pylint:disable=docstring-should-be-keyword, too-ma note that analytical storage can only be enabled on Synapse Link enabled accounts. :keyword dict[str, Any] vector_embedding_policy: The vector embedding policy for the container. Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying data type, and - is generated for a particular distance function. + is generated for a particular distance function. Each vector embedding may also include an optional + ``embeddingSource`` describing how the embedding is produced; the source object supports ``sourcePaths`` + (list of item paths whose values are embedded), ``deploymentName``, ``modelName``, ``endpoint`` + (embedding service endpoint), and ``authType`` (one of ``ApiKey`` or ``Entra``). :keyword dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to the container. :keyword dict[str, Any] full_text_policy: **provisional** The full text policy for the container. @@ -497,7 +506,10 @@ async def create_container_if_not_exists( note that analytical storage can only be enabled on Synapse Link enabled accounts. :keyword dict[str, Any] vector_embedding_policy: The vector embedding policy for the container. Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying - data type, and is generated for a particular distance function. + data type, and is generated for a particular distance function. Each vector embedding may also include an + optional ``embeddingSource`` describing how the embedding is produced; the source object supports + ``sourcePaths`` (list of item paths whose values are embedded), ``deploymentName``, ``modelName``, + ``endpoint`` (embedding service endpoint), and ``authType`` (one of ``ApiKey`` or ``Entra``). :keyword dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to the container. :keyword dict[str, Any] full_text_policy: **provisional** The full text policy for the container. @@ -558,7 +570,10 @@ async def create_container_if_not_exists( note that analytical storage can only be enabled on Synapse Link enabled accounts. :keyword dict[str, Any] vector_embedding_policy: The vector embedding policy for the container. Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying - data type, and is generated for a particular distance function. + data type, and is generated for a particular distance function. Each vector embedding may also include an + optional ``embeddingSource`` describing how the embedding is produced; the source object supports + ``sourcePaths`` (list of item paths whose values are embedded), ``deploymentName``, ``modelName``, + ``endpoint`` (embedding service endpoint), and ``authType`` (one of ``ApiKey`` or ``Entra``). :keyword dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to the container. :keyword dict[str, Any] full_text_policy: **provisional** The full text policy for the container. @@ -606,7 +621,10 @@ async def create_container_if_not_exists( # pylint:disable=docstring-should-be-k note that analytical storage can only be enabled on Synapse Link enabled accounts. :keyword dict[str, Any] vector_embedding_policy: The vector embedding policy for the container. Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying - data type, and is generated for a particular distance function. + data type, and is generated for a particular distance function. Each vector embedding may also include an + optional ``embeddingSource`` describing how the embedding is produced; the source object supports + ``sourcePaths`` (list of item paths whose values are embedded), ``deploymentName``, ``modelName``, + ``endpoint`` (embedding service endpoint), and ``authType`` (one of ``ApiKey`` or ``Entra``). :keyword dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to the container. :keyword dict[str, Any] full_text_policy: **provisional** The full text policy for the container. diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/database.py b/sdk/cosmos/azure-cosmos/azure/cosmos/database.py index 22ae16a04457..71fc1c33c419 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/database.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/database.py @@ -201,7 +201,10 @@ def create_container( # pylint:disable=docstring-missing-param `here: https://learn.microsoft.com/azure/cosmos-db/nosql/query/computed-properties?tabs=dotnet` :keyword dict[str, Any] vector_embedding_policy: The vector embedding policy for the container. Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying - data type, and is generated for a particular distance function. + data type, and is generated for a particular distance function. Each vector embedding may also include an + optional ``embeddingSource`` describing how the embedding is produced; the source object supports + ``sourcePaths`` (list of item paths whose values are embedded), ``deploymentName``, ``modelName``, + ``endpoint`` (embedding service endpoint), and ``authType`` (one of ``ApiKey`` or ``Entra``). :keyword dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to the container. :keyword dict[str, Any] full_text_policy: **provisional** The full text policy for the container. @@ -274,7 +277,10 @@ def create_container( # pylint:disable=docstring-missing-param `here: https://learn.microsoft.com/azure/cosmos-db/nosql/query/computed-properties?tabs=dotnet` :keyword dict[str, Any] vector_embedding_policy: The vector embedding policy for the container. Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying - data type, and is generated for a particular distance function. + data type, and is generated for a particular distance function. Each vector embedding may also include an + optional ``embeddingSource`` describing how the embedding is produced; the source object supports + ``sourcePaths`` (list of item paths whose values are embedded), ``deploymentName``, ``modelName``, + ``endpoint`` (embedding service endpoint), and ``authType`` (one of ``ApiKey`` or ``Entra``). :keyword dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to the container. :keyword dict[str, Any] full_text_policy: **provisional** The full text policy for the container. @@ -333,7 +339,10 @@ def create_container( # pylint:disable=docstring-missing-param, too-many-statem `here: https://learn.microsoft.com/azure/cosmos-db/nosql/query/computed-properties?tabs=dotnet` :keyword dict[str, Any] vector_embedding_policy: The vector embedding policy for the container. Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying - data type, and is generated for a particular distance function. + data type, and is generated for a particular distance function. Each vector embedding may also include an + optional ``embeddingSource`` describing how the embedding is produced; the source object supports + ``sourcePaths`` (list of item paths whose values are embedded), ``deploymentName``, ``modelName``, + ``endpoint`` (embedding service endpoint), and ``authType`` (one of ``ApiKey`` or ``Entra``). :keyword dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to the container. :keyword dict[str, Any] full_text_policy: **provisional** The full text policy for the container. @@ -485,7 +494,10 @@ def create_container_if_not_exists( # pylint:disable=docstring-missing-param `here: https://learn.microsoft.com/azure/cosmos-db/nosql/query/computed-properties?tabs=dotnet` :keyword dict[str, Any] vector_embedding_policy: The vector embedding policy for the container. Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying data type, and - is generated for a particular distance function. + is generated for a particular distance function. Each vector embedding may also include an optional + ``embeddingSource`` describing how the embedding is produced; the source object supports ``sourcePaths`` + (list of item paths whose values are embedded), ``deploymentName``, ``modelName``, ``endpoint`` + (embedding service endpoint), and ``authType`` (one of ``ApiKey`` or ``Entra``). :keyword dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to the container. :keyword dict[str, Any] full_text_policy: **provisional** The full text policy for the container. @@ -544,7 +556,10 @@ def create_container_if_not_exists( # pylint:disable=docstring-missing-param `here: https://learn.microsoft.com/azure/cosmos-db/nosql/query/computed-properties?tabs=dotnet` :keyword dict[str, Any] vector_embedding_policy: The vector embedding policy for the container. Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying data type, and - is generated for a particular distance function. + is generated for a particular distance function. Each vector embedding may also include an optional + ``embeddingSource`` describing how the embedding is produced; the source object supports ``sourcePaths`` + (list of item paths whose values are embedded), ``deploymentName``, ``modelName``, ``endpoint`` + (embedding service endpoint), and ``authType`` (one of ``ApiKey`` or ``Entra``). :keyword dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to the container. :keyword dict[str, Any] full_text_policy: **provisional** The full text policy for the container. @@ -589,7 +604,10 @@ def create_container_if_not_exists( # pylint:disable=docstring-missing-param, d `here: https://learn.microsoft.com/azure/cosmos-db/nosql/query/computed-properties?tabs=dotnet` :keyword dict[str, Any] vector_embedding_policy: The vector embedding policy for the container. Each vector embedding possesses a predetermined number of dimensions, is associated with an underlying data type, and - is generated for a particular distance function. + is generated for a particular distance function. Each vector embedding may also include an optional + ``embeddingSource`` describing how the embedding is produced; the source object supports ``sourcePaths`` + (list of item paths whose values are embedded), ``deploymentName``, ``modelName``, ``endpoint`` + (embedding service endpoint), and ``authType`` (one of ``ApiKey`` or ``Entra``). :keyword dict[str, Any] change_feed_policy: The change feed policy to apply 'retentionDuration' to the container. :keyword dict[str, Any] full_text_policy: **provisional** The full text policy for the container. diff --git a/sdk/cosmos/azure-cosmos/tests/test_vector_policy.py b/sdk/cosmos/azure-cosmos/tests/test_vector_policy.py index d4d37fd44034..8fd93d548169 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_vector_policy.py +++ b/sdk/cosmos/azure-cosmos/tests/test_vector_policy.py @@ -580,6 +580,76 @@ def test_fail_create_vector_embedding_policy(self): assert e.status_code == 400 assert "The Vector Embedding Policy has an invalid DistanceFunction:handMeasured" in e.http_error_message + def test_create_vector_embedding_with_embedding_source(self): + # Verify a vector embedding policy that includes the optional ``embeddingSource`` round-trips + # correctly for both ``ApiKey`` and ``Entra`` auth types. + for auth_type in ["ApiKey", "Entra"]: + vector_embedding_policy = { + "vectorEmbeddings": [ + { + "path": "/embedding", + "dataType": "float32", + "dimensions": 1536, + "distanceFunction": "cosine", + "embeddingSource": { + "sourcePaths": [ + "/journal_title", + "/title", + "/toc_abstract", + "/abstract", + "/full_text", + ], + "deploymentName": "text-embedding-3-small", + "modelName": "text-embedding-3-small", + "endpoint": "https://embedding-south-central.cognitiveservices.azure.com/", + "authType": auth_type, + }, + } + ] + } + container_id = "vector_embedding_source_container_" + auth_type.lower() + "_" + str(uuid.uuid4()) + created_container = self.test_db.create_container( + id=container_id, + partition_key=PartitionKey(path="/id"), + vector_embedding_policy=vector_embedding_policy, + ) + properties = created_container.read() + assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy + self.test_db.delete_container(container_id) + + def test_fail_create_vector_embedding_source_invalid_auth_type(self): + # An ``embeddingSource`` with an unsupported ``authType`` should be rejected by the service + # (only ``ApiKey`` and ``Entra`` are valid). + vector_embedding_policy = { + "vectorEmbeddings": [ + { + "path": "/embedding", + "dataType": "float32", + "dimensions": 1536, + "distanceFunction": "cosine", + "embeddingSource": { + "sourcePaths": ["/title"], + "deploymentName": "text-embedding-3-small", + "modelName": "text-embedding-3-small", + "endpoint": "https://embedding-south-central.cognitiveservices.azure.com/", + "authType": "NotAnAuthType", + }, + } + ] + } + + try: + self.test_db.create_container( + id="vector_embedding_source_bad_auth_" + str(uuid.uuid4()), + partition_key=PartitionKey(path="/id"), + vector_embedding_policy=vector_embedding_policy, + ) + pytest.fail("Container creation should have failed for invalid embeddingSource authType.") + except exceptions.CosmosHttpResponseError as e: + assert e.status_code == 400 + assert "The auth type provided in the embedding source of vector policy is invalid." \ + in e.http_error_message + if __name__ == '__main__': unittest.main() diff --git a/sdk/cosmos/azure-cosmos/tests/test_vector_policy_async.py b/sdk/cosmos/azure-cosmos/tests/test_vector_policy_async.py index dd0c779982db..f70e511b61ba 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_vector_policy_async.py +++ b/sdk/cosmos/azure-cosmos/tests/test_vector_policy_async.py @@ -492,6 +492,75 @@ async def test_fail_create_vector_embedding_policy_async(self): assert e.status_code == 400 assert "The Vector Embedding Policy has an invalid DistanceFunction:handMeasured" in e.http_error_message + async def test_create_vector_embedding_with_embedding_source_async(self): + # Verify a vector embedding policy that includes the optional ``embeddingSource`` round-trips + # correctly for both ``ApiKey`` and ``Entra`` auth types. + for auth_type in ["ApiKey", "Entra"]: + vector_embedding_policy = { + "vectorEmbeddings": [ + { + "path": "/embedding", + "dataType": "float32", + "dimensions": 1536, + "distanceFunction": "cosine", + "embeddingSource": { + "sourcePaths": [ + "/journal_title", + "/title", + "/toc_abstract", + "/abstract", + "/full_text", + ], + "deploymentName": "text-embedding-3-small", + "modelName": "text-embedding-3-small", + "endpoint": "https://embedding-south-central.cognitiveservices.azure.com/", + "authType": auth_type, + }, + } + ] + } + container_id = "vector_embedding_source_container_" + auth_type.lower() + "_" + str(uuid.uuid4()) + created_container = await self.test_db.create_container( + id=container_id, + partition_key=PartitionKey(path="/id"), + vector_embedding_policy=vector_embedding_policy, + ) + properties = await created_container.read() + assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy + await self.test_db.delete_container(container_id) + + async def test_fail_create_vector_embedding_source_invalid_auth_type_async(self): + # An ``embeddingSource`` with an unsupported ``authType`` should be rejected by the service + # (only ``ApiKey`` and ``Entra`` are valid). + vector_embedding_policy = { + "vectorEmbeddings": [ + { + "path": "/embedding", + "dataType": "float32", + "dimensions": 1536, + "distanceFunction": "cosine", + "embeddingSource": { + "sourcePaths": ["/title"], + "deploymentName": "text-embedding-3-small", + "modelName": "text-embedding-3-small", + "endpoint": "https://embedding-south-central.cognitiveservices.azure.com/", + "authType": "NotAnAuthType", + }, + } + ] + } + try: + await self.test_db.create_container( + id="vector_embedding_source_bad_auth_" + str(uuid.uuid4()), + partition_key=PartitionKey(path="/id"), + vector_embedding_policy=vector_embedding_policy, + ) + pytest.fail("Container creation should have failed for invalid embeddingSource authType.") + except exceptions.CosmosHttpResponseError as e: + assert e.status_code == 400 + assert "The auth type provided in the embedding source of vector policy is invalid." \ + in e.http_error_message + if __name__ == '__main__': unittest.main() From c66ad121d006d33a1a24d95aa25aee72de5e18fd Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Wed, 13 May 2026 14:02:11 -0700 Subject: [PATCH 2/4] Updating changelog --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index c170cbe30ba3..63cb8813dd3b 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -3,7 +3,7 @@ ### 4.16.0b3 (Unreleased) #### Features Added -* Documented support for the optional `embeddingSource` field on entries in `vector_embedding_policy.vectorEmbeddings`, including the `sourcePaths`, `deploymentName`, `modelName`, `endpoint`, and `authType` (`ApiKey` or `Entra`) properties used to have the service generate vector embeddings from the specified item paths. +* Documented support for the optional `embeddingSource` field on entries in `vector_embedding_policy.vectorEmbeddings`, properties used to have the service generate vector embeddings from the specified item paths. See [46870](https://github.com/Azure/azure-sdk-for-python/pull/46870) #### Breaking Changes From f15862ffa32940632c3c544dc1aee62f4489aa0a Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Wed, 13 May 2026 14:06:47 -0700 Subject: [PATCH 3/4] Updating changelog --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 63cb8813dd3b..358a5a7ce1a3 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -3,7 +3,7 @@ ### 4.16.0b3 (Unreleased) #### Features Added -* Documented support for the optional `embeddingSource` field on entries in `vector_embedding_policy.vectorEmbeddings`, properties used to have the service generate vector embeddings from the specified item paths. See [46870](https://github.com/Azure/azure-sdk-for-python/pull/46870) +* Documented support for the optional `embeddingSource` field on entries in `vector_embedding_policy.vectorEmbeddings`, which can be used to have the service generate vector embeddings from the specified item paths. See [46870](https://github.com/Azure/azure-sdk-for-python/pull/46870) #### Breaking Changes From 9df623a6be69d7a050349963b40d8780593c5f47 Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Wed, 13 May 2026 15:44:06 -0700 Subject: [PATCH 4/4] Resolving comments --- .../azure-cosmos/tests/test_vector_policy.py | 168 ++++++++++-------- .../tests/test_vector_policy_async.py | 155 ++++++++-------- 2 files changed, 178 insertions(+), 145 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/tests/test_vector_policy.py b/sdk/cosmos/azure-cosmos/tests/test_vector_policy.py index 8fd93d548169..7e34fc8b0b2b 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_vector_policy.py +++ b/sdk/cosmos/azure-cosmos/tests/test_vector_policy.py @@ -88,13 +88,16 @@ def test_create_valid_vector_embedding_policy(self): "dimensions": 256, "distanceFunction": "euclidean" }]} + container_id = 'vector_container_' + data_type created_container = self.test_db.create_container( - id='vector_container_' + data_type, + id=container_id, partition_key=PartitionKey(path="/id"), vector_embedding_policy=vector_embedding_policy) - properties = created_container.read() - assert properties["vectorEmbeddingPolicy"]["vectorEmbeddings"][0]["dataType"] == data_type - self.test_db.delete_container('vector_container_' + data_type) + try: + properties = created_container.read() + assert properties["vectorEmbeddingPolicy"]["vectorEmbeddings"][0]["dataType"] == data_type + finally: + self.test_db.delete_container(container_id) @unittest.skip def test_create_valid_vector_indexing_policy(self): @@ -107,9 +110,11 @@ def test_create_valid_vector_indexing_policy(self): partition_key=PartitionKey(path="/id"), vector_embedding_policy=vector_embedding_policy, indexing_policy=indexing_policy) - properties = created_container.read() - assert properties['indexingPolicy']['vectorIndexes'] == indexing_policy['vectorIndexes'] - self.test_db.delete_container(created_container.id) + try: + properties = created_container.read() + assert properties['indexingPolicy']['vectorIndexes'] == indexing_policy['vectorIndexes'] + finally: + self.test_db.delete_container(created_container.id) def test_create_vector_embedding_container(self): indexing_policy = { @@ -148,10 +153,12 @@ def test_create_vector_embedding_container(self): vector_embedding_policy=vector_embedding_policy, indexing_policy=indexing_policy ) - properties = created_container.read() - assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy - assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"] - self.test_db.delete_container(container_id) + try: + properties = created_container.read() + assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy + assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"] + finally: + self.test_db.delete_container(container_id) # Pass a vector indexing policy with hierarchical vectorIndexShardKey value indexing_policy = { @@ -165,10 +172,12 @@ def test_create_vector_embedding_container(self): indexing_policy=indexing_policy, vector_embedding_policy=vector_embedding_policy ) - properties = created_container.read() - assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy - assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"] - self.test_db.delete_container(container_id) + try: + properties = created_container.read() + assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy + assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"] + finally: + self.test_db.delete_container(container_id) def test_replace_vector_indexing_policy(self): # Replace should work so long as the new indexing policy doesn't change the vector indexes, and as long as @@ -240,15 +249,18 @@ def test_replace_vector_indexing_policy(self): "indexingSearchListSize": 100 }] } - self.test_db.replace_container( - created_container, - PartitionKey(path="/id"), - vector_embedding_policy=vector_embedding_policy, - indexing_policy=new_indexing_policy) - properties = created_container.read() - assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy - assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"] - self.test_db.delete_container(container_id) + + try: + self.test_db.replace_container( + created_container, + PartitionKey(path="/id"), + vector_embedding_policy=vector_embedding_policy, + indexing_policy=new_indexing_policy) + properties = created_container.read() + assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy + assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"] + finally: + self.test_db.delete_container(container_id) def test_fail_create_vector_indexing_policy(self): vector_embedding_policy = { @@ -453,53 +465,55 @@ def test_fail_replace_vector_indexing_policy(self): indexing_policy=indexing_policy, vector_embedding_policy=vector_embedding_policy ) - # don't provide vector embedding policy - try: - self.test_db.replace_container( - created_container, - PartitionKey(path="/id"), - indexing_policy=indexing_policy) - pytest.fail("Container replace should have failed for missing embedding policy.") - except exceptions.CosmosHttpResponseError as e: - assert e.status_code == 400 - assert ("The Vector Indexing Policy's path::/vector1 not matching in Embedding's path." - in e.http_error_message) - # using a new indexing policy - new_indexing_policy = { - "vectorIndexes": [ - {"path": "/vector1", "type": "quantizedFlat"}] - } - try: - self.test_db.replace_container( - created_container, - PartitionKey(path="/id"), - vector_embedding_policy=vector_embedding_policy, - indexing_policy=new_indexing_policy) - pytest.fail("Container replace should have failed for new indexing policy.") - except exceptions.CosmosHttpResponseError as e: - assert e.status_code == 400 - assert ("Paths in existing vector indexing policy cannot be modified in Collection Replace" - in e.http_error_message) - # using a new vector embedding policy - new_embedding_policy = { - "vectorEmbeddings": [ - { - "path": "/vector1", - "dataType": "float32", - "dimensions": 384, - "distanceFunction": "euclidean"}]} try: - self.test_db.replace_container( - created_container, - PartitionKey(path="/id"), - vector_embedding_policy=new_embedding_policy, - indexing_policy=indexing_policy) - pytest.fail("Container replace should have failed for new embedding policy.") - except exceptions.CosmosHttpResponseError as e: - assert e.status_code == 400 - assert ("Paths in existing embedding policy cannot be modified in Collection Replace" - in e.http_error_message) - self.test_db.delete_container(container_id) + # don't provide vector embedding policy + try: + self.test_db.replace_container( + created_container, + PartitionKey(path="/id"), + indexing_policy=indexing_policy) + pytest.fail("Container replace should have failed for missing embedding policy.") + except exceptions.CosmosHttpResponseError as e: + assert e.status_code == 400 + assert ("The Vector Indexing Policy's path::/vector1 not matching in Embedding's path." + in e.http_error_message) + # using a new indexing policy + new_indexing_policy = { + "vectorIndexes": [ + {"path": "/vector1", "type": "quantizedFlat"}] + } + try: + self.test_db.replace_container( + created_container, + PartitionKey(path="/id"), + vector_embedding_policy=vector_embedding_policy, + indexing_policy=new_indexing_policy) + pytest.fail("Container replace should have failed for new indexing policy.") + except exceptions.CosmosHttpResponseError as e: + assert e.status_code == 400 + assert ("Paths in existing vector indexing policy cannot be modified in Collection Replace" + in e.http_error_message) + # using a new vector embedding policy + new_embedding_policy = { + "vectorEmbeddings": [ + { + "path": "/vector1", + "dataType": "float32", + "dimensions": 384, + "distanceFunction": "euclidean"}]} + try: + self.test_db.replace_container( + created_container, + PartitionKey(path="/id"), + vector_embedding_policy=new_embedding_policy, + indexing_policy=indexing_policy) + pytest.fail("Container replace should have failed for new embedding policy.") + except exceptions.CosmosHttpResponseError as e: + assert e.status_code == 400 + assert ("Paths in existing embedding policy cannot be modified in Collection Replace" + in e.http_error_message) + finally: + self.test_db.delete_container(container_id) def test_fail_create_vector_embedding_policy(self): # Using invalid data type @@ -580,6 +594,7 @@ def test_fail_create_vector_embedding_policy(self): assert e.status_code == 400 assert "The Vector Embedding Policy has an invalid DistanceFunction:handMeasured" in e.http_error_message + @unittest.skip("Skipped until the embedding generation service is in preview.") def test_create_vector_embedding_with_embedding_source(self): # Verify a vector embedding policy that includes the optional ``embeddingSource`` round-trips # correctly for both ``ApiKey`` and ``Entra`` auth types. @@ -601,7 +616,7 @@ def test_create_vector_embedding_with_embedding_source(self): ], "deploymentName": "text-embedding-3-small", "modelName": "text-embedding-3-small", - "endpoint": "https://embedding-south-central.cognitiveservices.azure.com/", + "endpoint": "https://example.com", "authType": auth_type, }, } @@ -613,10 +628,13 @@ def test_create_vector_embedding_with_embedding_source(self): partition_key=PartitionKey(path="/id"), vector_embedding_policy=vector_embedding_policy, ) - properties = created_container.read() - assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy - self.test_db.delete_container(container_id) + try: + properties = created_container.read() + assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy + finally: + self.test_db.delete_container(container_id) + @unittest.skip("Skipped until the embedding generation service is in preview.") def test_fail_create_vector_embedding_source_invalid_auth_type(self): # An ``embeddingSource`` with an unsupported ``authType`` should be rejected by the service # (only ``ApiKey`` and ``Entra`` are valid). @@ -631,7 +649,7 @@ def test_fail_create_vector_embedding_source_invalid_auth_type(self): "sourcePaths": ["/title"], "deploymentName": "text-embedding-3-small", "modelName": "text-embedding-3-small", - "endpoint": "https://embedding-south-central.cognitiveservices.azure.com/", + "endpoint": "https://example.com", "authType": "NotAnAuthType", }, } diff --git a/sdk/cosmos/azure-cosmos/tests/test_vector_policy_async.py b/sdk/cosmos/azure-cosmos/tests/test_vector_policy_async.py index f70e511b61ba..abcc06fb7da0 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_vector_policy_async.py +++ b/sdk/cosmos/azure-cosmos/tests/test_vector_policy_async.py @@ -57,9 +57,11 @@ async def test_create_valid_vector_indexing_policy_async(self): partition_key=PartitionKey(path="/id"), vector_embedding_policy=vector_embedding_policy, indexing_policy=indexing_policy) - properties = await created_container.read() - assert properties['indexingPolicy']['vectorIndexes'] == indexing_policy['vectorIndexes'] - await self.test_db.delete_container(created_container.id) + try: + properties = await created_container.read() + assert properties['indexingPolicy']['vectorIndexes'] == indexing_policy['vectorIndexes'] + finally: + await self.test_db.delete_container(created_container.id) async def test_create_valid_vector_embedding_policy_async(self): # Using valid data types @@ -78,9 +80,11 @@ async def test_create_valid_vector_embedding_policy_async(self): id=container_id, partition_key=PartitionKey(path="/id"), vector_embedding_policy=vector_embedding_policy) - properties = await created_container.read() - assert properties["vectorEmbeddingPolicy"]["vectorEmbeddings"][0]["dataType"] == data_type - await self.test_db.delete_container(container_id) + try: + properties = await created_container.read() + assert properties["vectorEmbeddingPolicy"]["vectorEmbeddings"][0]["dataType"] == data_type + finally: + await self.test_db.delete_container(container_id) async def test_create_vector_embedding_container_async(self): indexing_policy = { @@ -121,10 +125,12 @@ async def test_create_vector_embedding_container_async(self): vector_embedding_policy=vector_embedding_policy, indexing_policy=indexing_policy ) - properties = await created_container.read() - assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy - assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"] - await self.test_db.delete_container(container_id) + try: + properties = await created_container.read() + assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy + assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"] + finally: + await self.test_db.delete_container(container_id) async def test_replace_vector_indexing_policy_async(self): # Replace should work so long as the new indexing policy doesn't change the vector indexes, and as long as @@ -196,15 +202,18 @@ async def test_replace_vector_indexing_policy_async(self): "indexingSearchListSize": 100 }] } - await self.test_db.replace_container( - created_container, - PartitionKey(path="/id"), - vector_embedding_policy=vector_embedding_policy, - indexing_policy=new_indexing_policy) - properties = await created_container.read() - assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy - assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"] - await self.test_db.delete_container(container_id) + + try: + await self.test_db.replace_container( + created_container, + PartitionKey(path="/id"), + vector_embedding_policy=vector_embedding_policy, + indexing_policy=new_indexing_policy) + properties = await created_container.read() + assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy + assert properties["indexingPolicy"]["vectorIndexes"] == indexing_policy["vectorIndexes"] + finally: + await self.test_db.delete_container(container_id) async def test_fail_create_vector_indexing_policy_async(self): vector_embedding_policy = { @@ -365,53 +374,55 @@ async def test_fail_replace_vector_indexing_policy_async(self): indexing_policy=indexing_policy, vector_embedding_policy=vector_embedding_policy ) - # don't provide vector embedding policy - try: - await self.test_db.replace_container( - created_container, - PartitionKey(path="/id"), - indexing_policy=indexing_policy) - pytest.fail("Container replace should have failed for missing embedding policy.") - except exceptions.CosmosHttpResponseError as e: - assert e.status_code == 400 - assert ("The Vector Indexing Policy's path::/vector1 not matching in Embedding's path." - in e.http_error_message) - # using a new indexing policy - new_indexing_policy = { - "vectorIndexes": [ - {"path": "/vector1", "type": "quantizedFlat"}] - } - try: - await self.test_db.replace_container( - created_container, - PartitionKey(path="/id"), - vector_embedding_policy=vector_embedding_policy, - indexing_policy=new_indexing_policy) - pytest.fail("Container replace should have failed for new indexing policy.") - except exceptions.CosmosHttpResponseError as e: - assert e.status_code == 400 - assert ("Paths in existing vector indexing policy cannot be modified in Collection Replace" - in e.http_error_message) - # using a new vector embedding policy - new_embedding_policy = { - "vectorEmbeddings": [ - { - "path": "/vector1", - "dataType": "float32", - "dimensions": 384, - "distanceFunction": "euclidean"}]} try: - await self.test_db.replace_container( - created_container, - PartitionKey(path="/id"), - vector_embedding_policy=new_embedding_policy, - indexing_policy=indexing_policy) - pytest.fail("Container replace should have failed for new embedding policy.") - except exceptions.CosmosHttpResponseError as e: - assert e.status_code == 400 - assert ("Paths in existing embedding policy cannot be modified in Collection Replace" - in e.http_error_message) - await self.test_db.delete_container(container_id) + # don't provide vector embedding policy + try: + await self.test_db.replace_container( + created_container, + PartitionKey(path="/id"), + indexing_policy=indexing_policy) + pytest.fail("Container replace should have failed for missing embedding policy.") + except exceptions.CosmosHttpResponseError as e: + assert e.status_code == 400 + assert ("The Vector Indexing Policy's path::/vector1 not matching in Embedding's path." + in e.http_error_message) + # using a new indexing policy + new_indexing_policy = { + "vectorIndexes": [ + {"path": "/vector1", "type": "quantizedFlat"}] + } + try: + await self.test_db.replace_container( + created_container, + PartitionKey(path="/id"), + vector_embedding_policy=vector_embedding_policy, + indexing_policy=new_indexing_policy) + pytest.fail("Container replace should have failed for new indexing policy.") + except exceptions.CosmosHttpResponseError as e: + assert e.status_code == 400 + assert ("Paths in existing vector indexing policy cannot be modified in Collection Replace" + in e.http_error_message) + # using a new vector embedding policy + new_embedding_policy = { + "vectorEmbeddings": [ + { + "path": "/vector1", + "dataType": "float32", + "dimensions": 384, + "distanceFunction": "euclidean"}]} + try: + await self.test_db.replace_container( + created_container, + PartitionKey(path="/id"), + vector_embedding_policy=new_embedding_policy, + indexing_policy=indexing_policy) + pytest.fail("Container replace should have failed for new embedding policy.") + except exceptions.CosmosHttpResponseError as e: + assert e.status_code == 400 + assert ("Paths in existing embedding policy cannot be modified in Collection Replace" + in e.http_error_message) + finally: + await self.test_db.delete_container(container_id) async def test_fail_create_vector_embedding_policy_async(self): # Using invalid data type @@ -492,6 +503,7 @@ async def test_fail_create_vector_embedding_policy_async(self): assert e.status_code == 400 assert "The Vector Embedding Policy has an invalid DistanceFunction:handMeasured" in e.http_error_message + @unittest.skip("Skipped until the embedding generation service is in preview.") async def test_create_vector_embedding_with_embedding_source_async(self): # Verify a vector embedding policy that includes the optional ``embeddingSource`` round-trips # correctly for both ``ApiKey`` and ``Entra`` auth types. @@ -513,7 +525,7 @@ async def test_create_vector_embedding_with_embedding_source_async(self): ], "deploymentName": "text-embedding-3-small", "modelName": "text-embedding-3-small", - "endpoint": "https://embedding-south-central.cognitiveservices.azure.com/", + "endpoint": "https://example.com", "authType": auth_type, }, } @@ -525,10 +537,13 @@ async def test_create_vector_embedding_with_embedding_source_async(self): partition_key=PartitionKey(path="/id"), vector_embedding_policy=vector_embedding_policy, ) - properties = await created_container.read() - assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy - await self.test_db.delete_container(container_id) + try: + properties = await created_container.read() + assert properties["vectorEmbeddingPolicy"] == vector_embedding_policy + finally: + await self.test_db.delete_container(container_id) + @unittest.skip("Skipped until the embedding generation service is in preview.") async def test_fail_create_vector_embedding_source_invalid_auth_type_async(self): # An ``embeddingSource`` with an unsupported ``authType`` should be rejected by the service # (only ``ApiKey`` and ``Entra`` are valid). @@ -543,7 +558,7 @@ async def test_fail_create_vector_embedding_source_invalid_auth_type_async(self) "sourcePaths": ["/title"], "deploymentName": "text-embedding-3-small", "modelName": "text-embedding-3-small", - "endpoint": "https://embedding-south-central.cognitiveservices.azure.com/", + "endpoint": "https://example.com", "authType": "NotAnAuthType", }, }