Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk/cosmos/azure-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#### Bugs Fixed
* Fixed bug where `CosmosClient` construction with AAD credentials would crash at startup if the semantic reranking inference endpoint environment variable was not set, even when semantic reranking was not being used. The inference service is now lazily initialized on first use. See [PR 46243](https://github.com/Azure/azure-sdk-for-python/pull/46243)
* Fixed bug where the global endpoint manager would fall back to synthesized public regional endpoints (e.g., `https://<account>-<region>.documents.azure.com`) even when `enable_endpoint_discovery=False` was specified. This caused intermittent `403 Forbidden` ("Request originated from IP ... through public internet") errors for private-endpoint-only Cosmos DB accounts, because the synthesized regional FQDNs are not always present in the customer's `privatelink.documents.azure.com` private DNS zone. With this fix, `enable_endpoint_discovery=False` strictly pins the client to the URL supplied at construction time. See [Issue 46219](https://github.com/Azure/azure-sdk-for-python/issues/46219).

#### Other Changes
* Reduced per-client memory overhead when partition-level circuit breaker (PPCB) is enabled by sharing the partition key range routing map cache across CosmosClient instances connected to the same endpoint, and stripping unused fields from cached partition key ranges using compact PKRange namedtuples. See [PR 46297](https://github.com/Azure/azure-sdk-for-python/pull/46297)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,18 @@ def runner():
def _GetDatabaseAccount(self, **kwargs) -> DatabaseAccount:
"""Gets the database account.

First tries by using the default endpoint, and if that doesn't work,
use the endpoints for the preferred locations in the order they are
specified, to get the database account.
First tries by using the default endpoint. If that doesn't work and endpoint
discovery is enabled, falls back to trying the endpoints for the preferred
locations in the order they are specified, to get the database account.

When endpoint discovery is disabled (`enable_endpoint_discovery=False`),
the fallback is skipped and the original exception is re-raised. This
guarantees the SDK only contacts the URL the caller supplied -- important
for private-endpoint deployments where synthesized regional hostnames
(e.g., ``https://<account>-<region>.documents.azure.com``) are not in
the customer's private DNS zone and would otherwise resolve to public
IPs and be rejected by the account's firewall.

:returns: A `DatabaseAccount` instance representing the Cosmos DB Database Account
and the endpoint that was used for the request.
:rtype: ~azure.cosmos.DatabaseAccount
Expand All @@ -223,6 +232,11 @@ def _GetDatabaseAccount(self, **kwargs) -> DatabaseAccount:
except (exceptions.CosmosHttpResponseError, AzureError) as e:
if isinstance(e, exceptions.CosmosHttpResponseError):
e.endpoint = self.DefaultEndpoint
# Honor the user's request to disable endpoint discovery: do not try
# synthesized regional/locational endpoints. Re-raise so the caller can
# decide how to handle the failure against the supplied endpoint.
if not self.client.connection_policy.EnableEndpointDiscovery:
raise
for location_name in self.PreferredLocations:
locational_endpoint = LocationCache.GetLocationalEndpoint(self.DefaultEndpoint, location_name)
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,10 @@ class CosmosClient: # pylint: disable=client-accepts-api-version-keyword
the operation is not guaranteed to be idempotent. This should only be enabled if the application can
tolerate such risks or has logic to safely detect and handle duplicate operations.
:keyword bool enable_endpoint_discovery: Enable endpoint discovery for
geo-replicated database accounts. (Default: True)
geo-replicated database accounts. (Default: True) When set to False, the
client only contacts the URL supplied at construction time and will not
fall back to synthesized regional endpoints; useful for private-endpoint
deployments.
:keyword list[str] preferred_locations: The preferred locations for geo-replicated database accounts.
:keyword list[str] excluded_locations: The excluded locations to be skipped from preferred locations. The locations
in this list are specified as the names of the azure Cosmos locations like, 'West US', 'East US' and so on.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,18 @@ async def _endpoints_health_check(self, **kwargs):
async def _GetDatabaseAccount(self, **kwargs) -> DatabaseAccount:
"""Gets the database account.

First tries by using the default endpoint, and if that doesn't work,
use the endpoints for the preferred locations in the order they are
specified, to get the database account.
First tries by using the default endpoint. If that doesn't work and endpoint
discovery is enabled, falls back to trying the endpoints for the preferred
locations in the order they are specified, to get the database account.

When endpoint discovery is disabled (`enable_endpoint_discovery=False`),
the fallback is skipped and the original exception is re-raised. This
guarantees the SDK only contacts the URL the caller supplied -- important
for private-endpoint deployments where synthesized regional hostnames
(e.g., ``https://<account>-<region>.documents.azure.com``) are not in
the customer's private DNS zone and would otherwise resolve to public
IPs and be rejected by the account's firewall.

:returns: A `DatabaseAccount` instance representing the Cosmos DB Database Account
and the endpoint that was used for the request.
:rtype: ~azure.cosmos.DatabaseAccount
Expand All @@ -238,6 +247,11 @@ async def _GetDatabaseAccount(self, **kwargs) -> DatabaseAccount:
except (exceptions.CosmosHttpResponseError, AzureError) as e:
if isinstance(e, exceptions.CosmosHttpResponseError):
e.endpoint = self.DefaultEndpoint
# Honor the user's request to disable endpoint discovery: do not try
# synthesized regional/locational endpoints. Re-raise so the caller can
# decide how to handle the failure against the supplied endpoint.
if not self.client.connection_policy.EnableEndpointDiscovery:
raise
for location_name in self.PreferredLocations:
locational_endpoint = LocationCache.GetLocationalEndpoint(self.DefaultEndpoint, location_name)
try:
Expand Down
5 changes: 4 additions & 1 deletion sdk/cosmos/azure-cosmos/azure/cosmos/cosmos_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,10 @@ class CosmosClient: # pylint: disable=client-accepts-api-version-keyword
even if the operation is not guaranteed to be idempotent. This should only be enabled if the application can
tolerate such risks or has logic to safely detect and handle duplicate operations.
:keyword bool enable_endpoint_discovery: Enable endpoint discovery for
geo-replicated database accounts. (Default: True)
geo-replicated database accounts. (Default: True) When set to False, the
client only contacts the URL supplied at construction time and will not
fall back to synthesized regional endpoints; useful for private-endpoint
deployments.
:keyword list[str] preferred_locations: The preferred locations for geo-replicated database accounts.
:keyword list[str] excluded_locations: The excluded locations to be skipped from preferred locations. The locations
in this list are specified as the names of the azure Cosmos locations like, 'West US', 'East US' and so on.
Expand Down
6 changes: 6 additions & 0 deletions sdk/cosmos/azure-cosmos/azure/cosmos/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,12 @@ class ConnectionPolicy: # pylint: disable=too-many-instance-attributes
automatically discover the current write and read locations and direct
the requests to the correct location taking into consideration of the
user's preference(if provided) as PreferredLocations.
When EnableEndpointDiscovery is false, the client will only send
requests to the endpoint supplied at construction time. No
synthesized regional/locational endpoints will be tried, even on
transient failures of the initial database account read. Set this to
false when the account is reachable only via a private endpoint and
regional FQDNs are not present in the private DNS zone.
:ivar PreferredLocations:
Gets or sets the preferred locations for geo-replicated database
accounts. When EnableEndpointDiscovery is true and PreferredLocations is
Expand Down
106 changes: 106 additions & 0 deletions sdk/cosmos/azure-cosmos/tests/test_endpoint_discovery_disabled.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# The MIT License (MIT)
# Copyright (c) Microsoft Corporation. All rights reserved.

"""Unit tests for the ``enable_endpoint_discovery=False`` contract on the
global endpoint manager.

These tests guard the fix for
https://github.com/Azure/azure-sdk-for-python/issues/46219 where the
synchronous and asynchronous ``_GlobalEndpointManager._GetDatabaseAccount``
methods would fall back to synthesized public regional endpoints
(``https://<account>-<region>.documents.azure.com``) even when the caller
explicitly disabled endpoint discovery -- causing intermittent
``403 Forbidden`` errors against private-endpoint-only accounts whose
private DNS zone did not include the regional FQDN.
"""

import unittest
import unittest.mock

import pytest

from azure.cosmos import documents, exceptions
from azure.cosmos._global_endpoint_manager import _GlobalEndpointManager
from azure.cosmos.http_constants import StatusCodes


_DEFAULT_ENDPOINT = "https://contoso.documents.azure.com:443/"


class _FakeClient:
"""Minimal client surface used by ``_GlobalEndpointManager``."""

def __init__(self, connection_policy: documents.ConnectionPolicy):
self.connection_policy = connection_policy
self.url_connection = _DEFAULT_ENDPOINT


def _make_manager(*, enable_endpoint_discovery: bool, preferred_locations):
policy = documents.ConnectionPolicy()
policy.EnableEndpointDiscovery = enable_endpoint_discovery
policy.PreferredLocations = list(preferred_locations)
return _GlobalEndpointManager(_FakeClient(policy))


def _raise_503(_endpoint, **_kwargs):
raise exceptions.CosmosHttpResponseError(
status_code=StatusCodes.SERVICE_UNAVAILABLE,
message="Service Unavailable",
)


@pytest.mark.cosmosEmulator
class TestEndpointDiscoveryDisabled(unittest.TestCase):
"""Synchronous ``_GlobalEndpointManager._GetDatabaseAccount`` contract."""

def test_disabled_does_not_try_locational_endpoints(self):
mgr = _make_manager(
enable_endpoint_discovery=False,
preferred_locations=["North Europe", "West US"],
)

stub = unittest.mock.Mock(side_effect=_raise_503)
with unittest.mock.patch.object(mgr, "_GetDatabaseAccountStub", stub):
with self.assertRaises(exceptions.CosmosHttpResponseError):
mgr._GetDatabaseAccount()

# The stub must have been called exactly once -- against the user's URL.
self.assertEqual(stub.call_count, 1)
called_endpoint = stub.call_args.args[0]
self.assertEqual(called_endpoint, _DEFAULT_ENDPOINT)

def test_disabled_does_not_synthesize_when_no_preferred_locations(self):
mgr = _make_manager(
enable_endpoint_discovery=False,
preferred_locations=[],
)

stub = unittest.mock.Mock(side_effect=_raise_503)
with unittest.mock.patch.object(mgr, "_GetDatabaseAccountStub", stub):
with self.assertRaises(exceptions.CosmosHttpResponseError):
mgr._GetDatabaseAccount()

self.assertEqual(stub.call_count, 1)

def test_enabled_still_falls_back_to_preferred_locations(self):
"""Regression check -- behavior with discovery on must be unchanged."""
mgr = _make_manager(
enable_endpoint_discovery=True,
preferred_locations=["North Europe", "West US"],
)

stub = unittest.mock.Mock(side_effect=_raise_503)
with unittest.mock.patch.object(mgr, "_GetDatabaseAccountStub", stub):
with self.assertRaises(exceptions.CosmosHttpResponseError):
mgr._GetDatabaseAccount()

# 1 attempt on the default endpoint plus one per preferred location.
self.assertEqual(stub.call_count, 1 + 2)
called_endpoints = [c.args[0] for c in stub.call_args_list]
self.assertEqual(called_endpoints[0], _DEFAULT_ENDPOINT)
self.assertIn("contoso-northeurope", called_endpoints[1])
self.assertIn("contoso-westus", called_endpoints[2])


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# The MIT License (MIT)
# Copyright (c) Microsoft Corporation. All rights reserved.

"""Async unit tests for the ``enable_endpoint_discovery=False`` contract.

Guards the async portion of the fix for
https://github.com/Azure/azure-sdk-for-python/issues/46219.
"""

import unittest
import unittest.mock

import pytest

from azure.cosmos import documents, exceptions
from azure.cosmos.aio._global_endpoint_manager_async import _GlobalEndpointManager
from azure.cosmos.http_constants import StatusCodes


_DEFAULT_ENDPOINT = "https://contoso.documents.azure.com:443/"


class _FakeAsyncClient:
def __init__(self, connection_policy: documents.ConnectionPolicy):
self.connection_policy = connection_policy
self.url_connection = _DEFAULT_ENDPOINT


def _make_manager(*, enable_endpoint_discovery: bool, preferred_locations):
policy = documents.ConnectionPolicy()
policy.EnableEndpointDiscovery = enable_endpoint_discovery
policy.PreferredLocations = list(preferred_locations)
return _GlobalEndpointManager(_FakeAsyncClient(policy))


async def _raise_503(_endpoint, **_kwargs):
raise exceptions.CosmosHttpResponseError(
status_code=StatusCodes.SERVICE_UNAVAILABLE,
message="Service Unavailable",
)


@pytest.mark.cosmosEmulator
@pytest.mark.asyncio
class TestEndpointDiscoveryDisabledAsync:

async def test_disabled_does_not_try_locational_endpoints(self):
mgr = _make_manager(
enable_endpoint_discovery=False,
preferred_locations=["North Europe", "West US"],
)

stub = unittest.mock.AsyncMock(side_effect=_raise_503)
with unittest.mock.patch.object(mgr, "_GetDatabaseAccountStub", stub):
with pytest.raises(exceptions.CosmosHttpResponseError):
await mgr._GetDatabaseAccount()

assert stub.call_count == 1
assert stub.call_args.args[0] == _DEFAULT_ENDPOINT

async def test_disabled_does_not_synthesize_when_no_preferred_locations(self):
mgr = _make_manager(
enable_endpoint_discovery=False,
preferred_locations=[],
)

stub = unittest.mock.AsyncMock(side_effect=_raise_503)
with unittest.mock.patch.object(mgr, "_GetDatabaseAccountStub", stub):
with pytest.raises(exceptions.CosmosHttpResponseError):
await mgr._GetDatabaseAccount()

assert stub.call_count == 1

async def test_enabled_still_falls_back_to_preferred_locations(self):
"""Regression check -- behavior with discovery on must be unchanged."""
mgr = _make_manager(
enable_endpoint_discovery=True,
preferred_locations=["North Europe", "West US"],
)

stub = unittest.mock.AsyncMock(side_effect=_raise_503)
with unittest.mock.patch.object(mgr, "_GetDatabaseAccountStub", stub):
with pytest.raises(exceptions.CosmosHttpResponseError):
await mgr._GetDatabaseAccount()

assert stub.call_count == 1 + 2
called_endpoints = [c.args[0] for c in stub.call_args_list]
assert called_endpoints[0] == _DEFAULT_ENDPOINT
assert "contoso-northeurope" in called_endpoints[1]
assert "contoso-westus" in called_endpoints[2]
Loading