diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 187cd853cb27..ef68074f22f7 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -8,6 +8,7 @@ #### Bugs Fixed * Fixed bug where `CosmosClient` construction with AAD credentials would crash at startup if the semantic reranking inference endpoint environment variable was not set, even when semantic reranking was not being used. The inference service is now lazily initialized on first use. See [PR 46243](https://github.com/Azure/azure-sdk-for-python/pull/46243) +* Fixed bug where the global endpoint manager would fall back to synthesized public regional endpoints (e.g., `https://-.documents.azure.com`) even when `enable_endpoint_discovery=False` was specified. This caused intermittent `403 Forbidden` ("Request originated from IP ... through public internet") errors for private-endpoint-only Cosmos DB accounts, because the synthesized regional FQDNs are not always present in the customer's `privatelink.documents.azure.com` private DNS zone. With this fix, `enable_endpoint_discovery=False` strictly pins the client to the URL supplied at construction time. See [Issue 46219](https://github.com/Azure/azure-sdk-for-python/issues/46219). #### Other Changes * Reduced per-client memory overhead when partition-level circuit breaker (PPCB) is enabled by sharing the partition key range routing map cache across CosmosClient instances connected to the same endpoint, and stripping unused fields from cached partition key ranges using compact PKRange namedtuples. See [PR 46297](https://github.com/Azure/azure-sdk-for-python/pull/46297) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_endpoint_manager.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_endpoint_manager.py index bb3687de3332..a5a20960d2d0 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_global_endpoint_manager.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_global_endpoint_manager.py @@ -203,9 +203,18 @@ def runner(): def _GetDatabaseAccount(self, **kwargs) -> DatabaseAccount: """Gets the database account. - First tries by using the default endpoint, and if that doesn't work, - use the endpoints for the preferred locations in the order they are - specified, to get the database account. + First tries by using the default endpoint. If that doesn't work and endpoint + discovery is enabled, falls back to trying the endpoints for the preferred + locations in the order they are specified, to get the database account. + + When endpoint discovery is disabled (`enable_endpoint_discovery=False`), + the fallback is skipped and the original exception is re-raised. This + guarantees the SDK only contacts the URL the caller supplied -- important + for private-endpoint deployments where synthesized regional hostnames + (e.g., ``https://-.documents.azure.com``) are not in + the customer's private DNS zone and would otherwise resolve to public + IPs and be rejected by the account's firewall. + :returns: A `DatabaseAccount` instance representing the Cosmos DB Database Account and the endpoint that was used for the request. :rtype: ~azure.cosmos.DatabaseAccount @@ -223,6 +232,11 @@ def _GetDatabaseAccount(self, **kwargs) -> DatabaseAccount: except (exceptions.CosmosHttpResponseError, AzureError) as e: if isinstance(e, exceptions.CosmosHttpResponseError): e.endpoint = self.DefaultEndpoint + # Honor the user's request to disable endpoint discovery: do not try + # synthesized regional/locational endpoints. Re-raise so the caller can + # decide how to handle the failure against the supplied endpoint. + if not self.client.connection_policy.EnableEndpointDiscovery: + raise for location_name in self.PreferredLocations: locational_endpoint = LocationCache.GetLocationalEndpoint(self.DefaultEndpoint, location_name) try: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client.py index fa109d594c31..a8927f6b2bf3 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client.py @@ -172,7 +172,10 @@ class CosmosClient: # pylint: disable=client-accepts-api-version-keyword the operation is not guaranteed to be idempotent. This should only be enabled if the application can tolerate such risks or has logic to safely detect and handle duplicate operations. :keyword bool enable_endpoint_discovery: Enable endpoint discovery for - geo-replicated database accounts. (Default: True) + geo-replicated database accounts. (Default: True) When set to False, the + client only contacts the URL supplied at construction time and will not + fall back to synthesized regional endpoints; useful for private-endpoint + deployments. :keyword list[str] preferred_locations: The preferred locations for geo-replicated database accounts. :keyword list[str] excluded_locations: The excluded locations to be skipped from preferred locations. The locations in this list are specified as the names of the azure Cosmos locations like, 'West US', 'East US' and so on. diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_endpoint_manager_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_endpoint_manager_async.py index f3e6bfce4e8c..503629521df2 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_endpoint_manager_async.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_global_endpoint_manager_async.py @@ -218,9 +218,18 @@ async def _endpoints_health_check(self, **kwargs): async def _GetDatabaseAccount(self, **kwargs) -> DatabaseAccount: """Gets the database account. - First tries by using the default endpoint, and if that doesn't work, - use the endpoints for the preferred locations in the order they are - specified, to get the database account. + First tries by using the default endpoint. If that doesn't work and endpoint + discovery is enabled, falls back to trying the endpoints for the preferred + locations in the order they are specified, to get the database account. + + When endpoint discovery is disabled (`enable_endpoint_discovery=False`), + the fallback is skipped and the original exception is re-raised. This + guarantees the SDK only contacts the URL the caller supplied -- important + for private-endpoint deployments where synthesized regional hostnames + (e.g., ``https://-.documents.azure.com``) are not in + the customer's private DNS zone and would otherwise resolve to public + IPs and be rejected by the account's firewall. + :returns: A `DatabaseAccount` instance representing the Cosmos DB Database Account and the endpoint that was used for the request. :rtype: ~azure.cosmos.DatabaseAccount @@ -238,6 +247,11 @@ async def _GetDatabaseAccount(self, **kwargs) -> DatabaseAccount: except (exceptions.CosmosHttpResponseError, AzureError) as e: if isinstance(e, exceptions.CosmosHttpResponseError): e.endpoint = self.DefaultEndpoint + # Honor the user's request to disable endpoint discovery: do not try + # synthesized regional/locational endpoints. Re-raise so the caller can + # decide how to handle the failure against the supplied endpoint. + if not self.client.connection_policy.EnableEndpointDiscovery: + raise for location_name in self.PreferredLocations: locational_endpoint = LocationCache.GetLocationalEndpoint(self.DefaultEndpoint, location_name) try: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/cosmos_client.py b/sdk/cosmos/azure-cosmos/azure/cosmos/cosmos_client.py index 360bdca53a63..c95d069ce249 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/cosmos_client.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/cosmos_client.py @@ -194,7 +194,10 @@ class CosmosClient: # pylint: disable=client-accepts-api-version-keyword even if the operation is not guaranteed to be idempotent. This should only be enabled if the application can tolerate such risks or has logic to safely detect and handle duplicate operations. :keyword bool enable_endpoint_discovery: Enable endpoint discovery for - geo-replicated database accounts. (Default: True) + geo-replicated database accounts. (Default: True) When set to False, the + client only contacts the URL supplied at construction time and will not + fall back to synthesized regional endpoints; useful for private-endpoint + deployments. :keyword list[str] preferred_locations: The preferred locations for geo-replicated database accounts. :keyword list[str] excluded_locations: The excluded locations to be skipped from preferred locations. The locations in this list are specified as the names of the azure Cosmos locations like, 'West US', 'East US' and so on. diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/documents.py b/sdk/cosmos/azure-cosmos/azure/cosmos/documents.py index 5e912dce7cc9..a38dd66cd4f3 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/documents.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/documents.py @@ -302,6 +302,12 @@ class ConnectionPolicy: # pylint: disable=too-many-instance-attributes automatically discover the current write and read locations and direct the requests to the correct location taking into consideration of the user's preference(if provided) as PreferredLocations. + When EnableEndpointDiscovery is false, the client will only send + requests to the endpoint supplied at construction time. No + synthesized regional/locational endpoints will be tried, even on + transient failures of the initial database account read. Set this to + false when the account is reachable only via a private endpoint and + regional FQDNs are not present in the private DNS zone. :ivar PreferredLocations: Gets or sets the preferred locations for geo-replicated database accounts. When EnableEndpointDiscovery is true and PreferredLocations is diff --git a/sdk/cosmos/azure-cosmos/tests/test_endpoint_discovery_disabled.py b/sdk/cosmos/azure-cosmos/tests/test_endpoint_discovery_disabled.py new file mode 100644 index 000000000000..c4549fe064db --- /dev/null +++ b/sdk/cosmos/azure-cosmos/tests/test_endpoint_discovery_disabled.py @@ -0,0 +1,106 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +"""Unit tests for the ``enable_endpoint_discovery=False`` contract on the +global endpoint manager. + +These tests guard the fix for +https://github.com/Azure/azure-sdk-for-python/issues/46219 where the +synchronous and asynchronous ``_GlobalEndpointManager._GetDatabaseAccount`` +methods would fall back to synthesized public regional endpoints +(``https://-.documents.azure.com``) even when the caller +explicitly disabled endpoint discovery -- causing intermittent +``403 Forbidden`` errors against private-endpoint-only accounts whose +private DNS zone did not include the regional FQDN. +""" + +import unittest +import unittest.mock + +import pytest + +from azure.cosmos import documents, exceptions +from azure.cosmos._global_endpoint_manager import _GlobalEndpointManager +from azure.cosmos.http_constants import StatusCodes + + +_DEFAULT_ENDPOINT = "https://contoso.documents.azure.com:443/" + + +class _FakeClient: + """Minimal client surface used by ``_GlobalEndpointManager``.""" + + def __init__(self, connection_policy: documents.ConnectionPolicy): + self.connection_policy = connection_policy + self.url_connection = _DEFAULT_ENDPOINT + + +def _make_manager(*, enable_endpoint_discovery: bool, preferred_locations): + policy = documents.ConnectionPolicy() + policy.EnableEndpointDiscovery = enable_endpoint_discovery + policy.PreferredLocations = list(preferred_locations) + return _GlobalEndpointManager(_FakeClient(policy)) + + +def _raise_503(_endpoint, **_kwargs): + raise exceptions.CosmosHttpResponseError( + status_code=StatusCodes.SERVICE_UNAVAILABLE, + message="Service Unavailable", + ) + + +@pytest.mark.cosmosEmulator +class TestEndpointDiscoveryDisabled(unittest.TestCase): + """Synchronous ``_GlobalEndpointManager._GetDatabaseAccount`` contract.""" + + def test_disabled_does_not_try_locational_endpoints(self): + mgr = _make_manager( + enable_endpoint_discovery=False, + preferred_locations=["North Europe", "West US"], + ) + + stub = unittest.mock.Mock(side_effect=_raise_503) + with unittest.mock.patch.object(mgr, "_GetDatabaseAccountStub", stub): + with self.assertRaises(exceptions.CosmosHttpResponseError): + mgr._GetDatabaseAccount() + + # The stub must have been called exactly once -- against the user's URL. + self.assertEqual(stub.call_count, 1) + called_endpoint = stub.call_args.args[0] + self.assertEqual(called_endpoint, _DEFAULT_ENDPOINT) + + def test_disabled_does_not_synthesize_when_no_preferred_locations(self): + mgr = _make_manager( + enable_endpoint_discovery=False, + preferred_locations=[], + ) + + stub = unittest.mock.Mock(side_effect=_raise_503) + with unittest.mock.patch.object(mgr, "_GetDatabaseAccountStub", stub): + with self.assertRaises(exceptions.CosmosHttpResponseError): + mgr._GetDatabaseAccount() + + self.assertEqual(stub.call_count, 1) + + def test_enabled_still_falls_back_to_preferred_locations(self): + """Regression check -- behavior with discovery on must be unchanged.""" + mgr = _make_manager( + enable_endpoint_discovery=True, + preferred_locations=["North Europe", "West US"], + ) + + stub = unittest.mock.Mock(side_effect=_raise_503) + with unittest.mock.patch.object(mgr, "_GetDatabaseAccountStub", stub): + with self.assertRaises(exceptions.CosmosHttpResponseError): + mgr._GetDatabaseAccount() + + # 1 attempt on the default endpoint plus one per preferred location. + self.assertEqual(stub.call_count, 1 + 2) + called_endpoints = [c.args[0] for c in stub.call_args_list] + self.assertEqual(called_endpoints[0], _DEFAULT_ENDPOINT) + self.assertIn("contoso-northeurope", called_endpoints[1]) + self.assertIn("contoso-westus", called_endpoints[2]) + + +if __name__ == "__main__": + unittest.main() diff --git a/sdk/cosmos/azure-cosmos/tests/test_endpoint_discovery_disabled_async.py b/sdk/cosmos/azure-cosmos/tests/test_endpoint_discovery_disabled_async.py new file mode 100644 index 000000000000..d88e82ed3c98 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/tests/test_endpoint_discovery_disabled_async.py @@ -0,0 +1,90 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +"""Async unit tests for the ``enable_endpoint_discovery=False`` contract. + +Guards the async portion of the fix for +https://github.com/Azure/azure-sdk-for-python/issues/46219. +""" + +import unittest +import unittest.mock + +import pytest + +from azure.cosmos import documents, exceptions +from azure.cosmos.aio._global_endpoint_manager_async import _GlobalEndpointManager +from azure.cosmos.http_constants import StatusCodes + + +_DEFAULT_ENDPOINT = "https://contoso.documents.azure.com:443/" + + +class _FakeAsyncClient: + def __init__(self, connection_policy: documents.ConnectionPolicy): + self.connection_policy = connection_policy + self.url_connection = _DEFAULT_ENDPOINT + + +def _make_manager(*, enable_endpoint_discovery: bool, preferred_locations): + policy = documents.ConnectionPolicy() + policy.EnableEndpointDiscovery = enable_endpoint_discovery + policy.PreferredLocations = list(preferred_locations) + return _GlobalEndpointManager(_FakeAsyncClient(policy)) + + +async def _raise_503(_endpoint, **_kwargs): + raise exceptions.CosmosHttpResponseError( + status_code=StatusCodes.SERVICE_UNAVAILABLE, + message="Service Unavailable", + ) + + +@pytest.mark.cosmosEmulator +@pytest.mark.asyncio +class TestEndpointDiscoveryDisabledAsync: + + async def test_disabled_does_not_try_locational_endpoints(self): + mgr = _make_manager( + enable_endpoint_discovery=False, + preferred_locations=["North Europe", "West US"], + ) + + stub = unittest.mock.AsyncMock(side_effect=_raise_503) + with unittest.mock.patch.object(mgr, "_GetDatabaseAccountStub", stub): + with pytest.raises(exceptions.CosmosHttpResponseError): + await mgr._GetDatabaseAccount() + + assert stub.call_count == 1 + assert stub.call_args.args[0] == _DEFAULT_ENDPOINT + + async def test_disabled_does_not_synthesize_when_no_preferred_locations(self): + mgr = _make_manager( + enable_endpoint_discovery=False, + preferred_locations=[], + ) + + stub = unittest.mock.AsyncMock(side_effect=_raise_503) + with unittest.mock.patch.object(mgr, "_GetDatabaseAccountStub", stub): + with pytest.raises(exceptions.CosmosHttpResponseError): + await mgr._GetDatabaseAccount() + + assert stub.call_count == 1 + + async def test_enabled_still_falls_back_to_preferred_locations(self): + """Regression check -- behavior with discovery on must be unchanged.""" + mgr = _make_manager( + enable_endpoint_discovery=True, + preferred_locations=["North Europe", "West US"], + ) + + stub = unittest.mock.AsyncMock(side_effect=_raise_503) + with unittest.mock.patch.object(mgr, "_GetDatabaseAccountStub", stub): + with pytest.raises(exceptions.CosmosHttpResponseError): + await mgr._GetDatabaseAccount() + + assert stub.call_count == 1 + 2 + called_endpoints = [c.args[0] for c in stub.call_args_list] + assert called_endpoints[0] == _DEFAULT_ENDPOINT + assert "contoso-northeurope" in called_endpoints[1] + assert "contoso-westus" in called_endpoints[2]