diff --git a/distributed/tests/test_utils_comm.py b/distributed/tests/test_utils_comm.py index 36f5fca77e..0cf4235c5f 100644 --- a/distributed/tests/test_utils_comm.py +++ b/distributed/tests/test_utils_comm.py @@ -229,6 +229,53 @@ async def f(): assert sleep_calls == [0.0, 1.0, 3.0, 6.0, 6.0] +def test_retry_truncates_large_coro_repr(cleanup): + """Test that retry truncates excessively large string representations of coro.""" + + class MyEx(Exception): + pass + + class LargeReprCallable: + def __repr__(self): + return "x" * 500 + + async def __call__(self): + raise MyEx("fail") + + log_messages = [] + + async def f(): + return await retry( + LargeReprCallable(), + retry_on_exceptions=(MyEx,), + count=1, + delay_min=0, + delay_max=0, + jitter_fraction=0, + ) + + import logging + + handler = logging.Handler() + handler.emit = lambda record: log_messages.append(record.getMessage()) + + logger = logging.getLogger("distributed.utils_comm") + logger.addHandler(handler) + try: + with pytest.raises(MyEx): + asyncio_run(f(), loop_factory=get_loop_factory()) + finally: + logger.removeHandler(handler) + + assert len(log_messages) == 1 + # reprlib truncates the 500-char repr to maxother (200) chars + assert len(log_messages[0]) < 500 + # reprlib uses "..." to indicate truncation + assert "..." in log_messages[0] + # Verify the full 500-char repr is NOT present + assert "x" * 500 not in log_messages[0] + + def test_unpack_remotedata(): def assert_eq(keys1: set[TaskRef], keys2: set[TaskRef]) -> None: if len(keys1) != len(keys2): diff --git a/distributed/utils_comm.py b/distributed/utils_comm.py index 92679b13ad..043ab04caf 100644 --- a/distributed/utils_comm.py +++ b/distributed/utils_comm.py @@ -3,6 +3,7 @@ import asyncio import logging import random +import reprlib from collections import defaultdict from collections.abc import Callable, Collection, Coroutine, Mapping from functools import partial @@ -384,7 +385,10 @@ async def retry( try: return await coro() except retry_on_exceptions as ex: - operation = operation or str(coro) + if not operation: + aRepr = reprlib.Repr() + aRepr.maxother = 200 + operation = aRepr.repr(coro) logger.info( f"Retrying {operation} after exception in attempt {i_try}/{count}: {ex}" )