From 003796fdbea7c2ec05feed0112e7b5042eb9da61 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Thu, 19 Feb 2026 13:34:25 -0500 Subject: [PATCH 1/3] PYTHON-5731 - Server selection deprioritization only for overload errors on replica sets --- pymongo/asynchronous/mongo_client.py | 6 +- pymongo/synchronous/mongo_client.py | 6 +- test/asynchronous/test_retryable_reads.py | 72 +++++++++++++++++++++++ test/test_retryable_reads.py | 72 +++++++++++++++++++++++ 4 files changed, 154 insertions(+), 2 deletions(-) diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index 4f3c43f23c..7fa0983908 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -2825,7 +2825,11 @@ async def run(self) -> T: if self._last_error is None: self._last_error = exc - if self._server is not None: + if ( + self._server is not None + and self._client.topology_description.topology_type_name == "Sharded" + or exc.has_error_label("SystemOverloadedError") + ): self._deprioritized_servers.append(self._server) def _is_not_eligible_for_retry(self) -> bool: diff --git a/pymongo/synchronous/mongo_client.py b/pymongo/synchronous/mongo_client.py index cd0d19141f..badbeac09d 100644 --- a/pymongo/synchronous/mongo_client.py +++ b/pymongo/synchronous/mongo_client.py @@ -2815,7 +2815,11 @@ def run(self) -> T: if self._last_error is None: self._last_error = exc - if self._server is not None: + if ( + self._server is not None + and self._client.topology_description.topology_type_name == "Sharded" + or exc.has_error_label("SystemOverloadedError") + ): self._deprioritized_servers.append(self._server) def _is_not_eligible_for_retry(self) -> bool: diff --git a/test/asynchronous/test_retryable_reads.py b/test/asynchronous/test_retryable_reads.py index 47ac91b0f5..85b2a1f7f8 100644 --- a/test/asynchronous/test_retryable_reads.py +++ b/test/asynchronous/test_retryable_reads.py @@ -261,6 +261,78 @@ async def test_retryable_reads_are_retried_on_the_same_implicit_session(self): self.assertEqual(command_docs[0]["lsid"], command_docs[1]["lsid"]) self.assertIsNot(command_docs[0], command_docs[1]) + @async_client_context.require_replica_set + @async_client_context.require_failCommand_fail_point + async def test_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available( + self + ): + listener = OvertCommandListener() + + # Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled. + client = await self.async_rs_or_single_client( + event_listeners=[listener], retryReads=True, readPreference="primaryPreferred" + ) + + # Configure a fail point with the RetryableError and SystemOverloadedError error labels. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["find"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], + "errorCode": 6, + }, + } + await async_set_fail_point(client, command_args) + + # Reset the command event monitor to clear the fail point command from its stored events. + listener.reset() + + # Execute a `find` command with `client`. + await client.t.t.find_one({}) + + # Assert that one failed command event and one successful command event occurred. + self.assertEqual(len(listener.failed_events), 1) + self.assertEqual(len(listener.succeeded_events), 1) + + # Assert that both events occurred on different servers. + assert listener.failed_events[0].connection_id != listener.succeeded_events[0].connection_id + + @async_client_context.require_replica_set + @async_client_context.require_failCommand_fail_point + async def test_retryable_reads_error_are_retried_on_same_replicaset_server(self): + listener = OvertCommandListener() + + # Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled. + client = await self.async_rs_or_single_client( + event_listeners=[listener], retryReads=True, readPreference="primaryPreferred" + ) + + # Configure a fail point with the RetryableError error label. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["find"], + "errorLabels": ["RetryableError"], + "errorCode": 6, + }, + } + await async_set_fail_point(client, command_args) + + # Reset the command event monitor to clear the fail point command from its stored events. + listener.reset() + + # Execute a `find` command with `client`. + await client.t.t.find_one({}) + + # Assert that one failed command event and one successful command event occurred. + self.assertEqual(len(listener.failed_events), 1) + self.assertEqual(len(listener.succeeded_events), 1) + + # Assert that both events occurred the same server. + assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id + if __name__ == "__main__": unittest.main() diff --git a/test/test_retryable_reads.py b/test/test_retryable_reads.py index c9f72ae547..43073f6e40 100644 --- a/test/test_retryable_reads.py +++ b/test/test_retryable_reads.py @@ -259,6 +259,78 @@ def test_retryable_reads_are_retried_on_the_same_implicit_session(self): self.assertEqual(command_docs[0]["lsid"], command_docs[1]["lsid"]) self.assertIsNot(command_docs[0], command_docs[1]) + @client_context.require_replica_set + @client_context.require_failCommand_fail_point + def test_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available( + self + ): + listener = OvertCommandListener() + + # Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled. + client = self.rs_or_single_client( + event_listeners=[listener], retryReads=True, readPreference="primaryPreferred" + ) + + # Configure a fail point with the RetryableError and SystemOverloadedError error labels. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["find"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], + "errorCode": 6, + }, + } + set_fail_point(client, command_args) + + # Reset the command event monitor to clear the fail point command from its stored events. + listener.reset() + + # Execute a `find` command with `client`. + client.t.t.find_one({}) + + # Assert that one failed command event and one successful command event occurred. + self.assertEqual(len(listener.failed_events), 1) + self.assertEqual(len(listener.succeeded_events), 1) + + # Assert that both events occurred on different servers. + assert listener.failed_events[0].connection_id != listener.succeeded_events[0].connection_id + + @client_context.require_replica_set + @client_context.require_failCommand_fail_point + def test_retryable_reads_error_are_retried_on_same_replicaset_server(self): + listener = OvertCommandListener() + + # Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled. + client = self.rs_or_single_client( + event_listeners=[listener], retryReads=True, readPreference="primaryPreferred" + ) + + # Configure a fail point with the RetryableError error label. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["find"], + "errorLabels": ["RetryableError"], + "errorCode": 6, + }, + } + set_fail_point(client, command_args) + + # Reset the command event monitor to clear the fail point command from its stored events. + listener.reset() + + # Execute a `find` command with `client`. + client.t.t.find_one({}) + + # Assert that one failed command event and one successful command event occurred. + self.assertEqual(len(listener.failed_events), 1) + self.assertEqual(len(listener.succeeded_events), 1) + + # Assert that both events occurred the same server. + assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id + if __name__ == "__main__": unittest.main() From 77464f09c7bfbf8cc98761506ee462ee150f48d2 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Thu, 19 Feb 2026 15:23:02 -0500 Subject: [PATCH 2/3] Format tests --- test/asynchronous/test_retryable_reads.py | 30 ++++++++++++----------- test/test_retryable_reads.py | 30 ++++++++++++----------- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/test/asynchronous/test_retryable_reads.py b/test/asynchronous/test_retryable_reads.py index 85b2a1f7f8..cc756c8356 100644 --- a/test/asynchronous/test_retryable_reads.py +++ b/test/asynchronous/test_retryable_reads.py @@ -263,17 +263,17 @@ async def test_retryable_reads_are_retried_on_the_same_implicit_session(self): @async_client_context.require_replica_set @async_client_context.require_failCommand_fail_point - async def test_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available( + async def test_03_01_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available( self ): listener = OvertCommandListener() - # Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled. + # 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled. client = await self.async_rs_or_single_client( event_listeners=[listener], retryReads=True, readPreference="primaryPreferred" ) - # Configure a fail point with the RetryableError and SystemOverloadedError error labels. + # 2. Configure a fail point with the RetryableError and SystemOverloadedError error labels. command_args = { "configureFailPoint": "failCommand", "mode": {"times": 1}, @@ -285,30 +285,32 @@ async def test_retryable_reads_caused_by_overload_errors_are_retried_on_a_differ } await async_set_fail_point(client, command_args) - # Reset the command event monitor to clear the fail point command from its stored events. + # 3. Reset the command event monitor to clear the fail point command from its stored events. listener.reset() - # Execute a `find` command with `client`. + # 4. Execute a `find` command with `client`. await client.t.t.find_one({}) - # Assert that one failed command event and one successful command event occurred. + # 5. Assert that one failed command event and one successful command event occurred. self.assertEqual(len(listener.failed_events), 1) self.assertEqual(len(listener.succeeded_events), 1) - # Assert that both events occurred on different servers. + # 6. Assert that both events occurred on different servers. assert listener.failed_events[0].connection_id != listener.succeeded_events[0].connection_id @async_client_context.require_replica_set @async_client_context.require_failCommand_fail_point - async def test_retryable_reads_error_are_retried_on_same_replicaset_server(self): + async def test_03_02_retryable_reads_caused_by_non_overload_errors_are_retried_on_the_same_replicaset_server( + self + ): listener = OvertCommandListener() - # Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled. + # 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled. client = await self.async_rs_or_single_client( event_listeners=[listener], retryReads=True, readPreference="primaryPreferred" ) - # Configure a fail point with the RetryableError error label. + # 2. Configure a fail point with the RetryableError error label. command_args = { "configureFailPoint": "failCommand", "mode": {"times": 1}, @@ -320,17 +322,17 @@ async def test_retryable_reads_error_are_retried_on_same_replicaset_server(self) } await async_set_fail_point(client, command_args) - # Reset the command event monitor to clear the fail point command from its stored events. + # 3. Reset the command event monitor to clear the fail point command from its stored events. listener.reset() - # Execute a `find` command with `client`. + # 4. Execute a `find` command with `client`. await client.t.t.find_one({}) - # Assert that one failed command event and one successful command event occurred. + # 5. Assert that one failed command event and one successful command event occurred. self.assertEqual(len(listener.failed_events), 1) self.assertEqual(len(listener.succeeded_events), 1) - # Assert that both events occurred the same server. + # 6. Assert that both events occurred the same server. assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id diff --git a/test/test_retryable_reads.py b/test/test_retryable_reads.py index 43073f6e40..4ef560f7f2 100644 --- a/test/test_retryable_reads.py +++ b/test/test_retryable_reads.py @@ -261,17 +261,17 @@ def test_retryable_reads_are_retried_on_the_same_implicit_session(self): @client_context.require_replica_set @client_context.require_failCommand_fail_point - def test_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available( + def test_03_01_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available( self ): listener = OvertCommandListener() - # Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled. + # 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled. client = self.rs_or_single_client( event_listeners=[listener], retryReads=True, readPreference="primaryPreferred" ) - # Configure a fail point with the RetryableError and SystemOverloadedError error labels. + # 2. Configure a fail point with the RetryableError and SystemOverloadedError error labels. command_args = { "configureFailPoint": "failCommand", "mode": {"times": 1}, @@ -283,30 +283,32 @@ def test_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_re } set_fail_point(client, command_args) - # Reset the command event monitor to clear the fail point command from its stored events. + # 3. Reset the command event monitor to clear the fail point command from its stored events. listener.reset() - # Execute a `find` command with `client`. + # 4. Execute a `find` command with `client`. client.t.t.find_one({}) - # Assert that one failed command event and one successful command event occurred. + # 5. Assert that one failed command event and one successful command event occurred. self.assertEqual(len(listener.failed_events), 1) self.assertEqual(len(listener.succeeded_events), 1) - # Assert that both events occurred on different servers. + # 6. Assert that both events occurred on different servers. assert listener.failed_events[0].connection_id != listener.succeeded_events[0].connection_id @client_context.require_replica_set @client_context.require_failCommand_fail_point - def test_retryable_reads_error_are_retried_on_same_replicaset_server(self): + def test_03_02_retryable_reads_caused_by_non_overload_errors_are_retried_on_the_same_replicaset_server( + self + ): listener = OvertCommandListener() - # Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled. + # 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled. client = self.rs_or_single_client( event_listeners=[listener], retryReads=True, readPreference="primaryPreferred" ) - # Configure a fail point with the RetryableError error label. + # 2. Configure a fail point with the RetryableError error label. command_args = { "configureFailPoint": "failCommand", "mode": {"times": 1}, @@ -318,17 +320,17 @@ def test_retryable_reads_error_are_retried_on_same_replicaset_server(self): } set_fail_point(client, command_args) - # Reset the command event monitor to clear the fail point command from its stored events. + # 3. Reset the command event monitor to clear the fail point command from its stored events. listener.reset() - # Execute a `find` command with `client`. + # 4. Execute a `find` command with `client`. client.t.t.find_one({}) - # Assert that one failed command event and one successful command event occurred. + # 5. Assert that one failed command event and one successful command event occurred. self.assertEqual(len(listener.failed_events), 1) self.assertEqual(len(listener.succeeded_events), 1) - # Assert that both events occurred the same server. + # 6. Assert that both events occurred the same server. assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id From 875d8e3460e0db07e3eb5c8ba4783c746c56cf45 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Fri, 20 Feb 2026 10:29:13 -0500 Subject: [PATCH 3/3] Require 4.4+ --- test/asynchronous/test_retryable_reads.py | 2 ++ test/test_retryable_reads.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/test/asynchronous/test_retryable_reads.py b/test/asynchronous/test_retryable_reads.py index cc756c8356..ec188b1080 100644 --- a/test/asynchronous/test_retryable_reads.py +++ b/test/asynchronous/test_retryable_reads.py @@ -263,6 +263,7 @@ async def test_retryable_reads_are_retried_on_the_same_implicit_session(self): @async_client_context.require_replica_set @async_client_context.require_failCommand_fail_point + @async_client_context.require_version_min(4, 4, 0) async def test_03_01_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available( self ): @@ -300,6 +301,7 @@ async def test_03_01_retryable_reads_caused_by_overload_errors_are_retried_on_a_ @async_client_context.require_replica_set @async_client_context.require_failCommand_fail_point + @async_client_context.require_version_min(4, 4, 0) async def test_03_02_retryable_reads_caused_by_non_overload_errors_are_retried_on_the_same_replicaset_server( self ): diff --git a/test/test_retryable_reads.py b/test/test_retryable_reads.py index 4ef560f7f2..8ee1cb3a29 100644 --- a/test/test_retryable_reads.py +++ b/test/test_retryable_reads.py @@ -261,6 +261,7 @@ def test_retryable_reads_are_retried_on_the_same_implicit_session(self): @client_context.require_replica_set @client_context.require_failCommand_fail_point + @client_context.require_version_min(4, 4, 0) def test_03_01_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available( self ): @@ -298,6 +299,7 @@ def test_03_01_retryable_reads_caused_by_overload_errors_are_retried_on_a_differ @client_context.require_replica_set @client_context.require_failCommand_fail_point + @client_context.require_version_min(4, 4, 0) def test_03_02_retryable_reads_caused_by_non_overload_errors_are_retried_on_the_same_replicaset_server( self ):