Skip to content

Commit 5584cc6

Browse files
fix(tests): use condition-based waiting for child process tests
ROOT CAUSE: Flaky test failures on Windows Python 3.11 CI were caused by arbitrary timeout-based waiting (sleep 0.5s + 0.3s) instead of waiting for the actual condition (child process writing to file). CHANGES: - Added _wait_for_file_to_exist() helper with condition-based polling - Replaced arbitrary sleep calls with condition-based waiting in: - test_basic_child_process_cleanup - test_nested_process_tree - test_early_parent_exit IMPACT: Eliminates race conditions in process startup timing. Tests now wait for the actual condition (file exists and is non-empty) rather than guessing how long process startup takes. FILES MODIFIED: - tests/client/test_stdio.py
1 parent 80d1386 commit 5584cc6

File tree

1 file changed

+51
-29
lines changed

1 file changed

+51
-29
lines changed

tests/client/test_stdio.py

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,38 @@
3030
tee = shutil.which("tee")
3131

3232

33+
async def _wait_for_file_to_exist(file_path: str, timeout: float = 5.0) -> None:
34+
"""Wait for a file to exist and be non-empty.
35+
36+
Uses condition-based waiting instead of arbitrary sleep to eliminate
37+
race conditions in tests that verify child processes have started
38+
writing to marker files.
39+
40+
Args:
41+
file_path: Path to the file to wait for
42+
timeout: Maximum time to wait in seconds
43+
44+
Raises:
45+
TimeoutError: If file doesn't exist or remains empty after timeout
46+
"""
47+
start_time = time.time()
48+
poll_interval = 0.01 # Poll every 10ms
49+
50+
while time.time() - start_time < timeout:
51+
if os.path.exists(file_path):
52+
size = os.path.getsize(file_path)
53+
if size > 0:
54+
return
55+
await anyio.sleep(poll_interval)
56+
57+
# Check one more time for better error message
58+
if os.path.exists(file_path):
59+
size = os.path.getsize(file_path)
60+
raise TimeoutError(f"File {file_path} exists but is empty after {timeout}s (size={size})")
61+
else:
62+
raise TimeoutError(f"File {file_path} does not exist after {timeout}s")
63+
64+
3365
@pytest.mark.anyio
3466
@pytest.mark.skipif(tee is None, reason="could not find tee command")
3567
async def test_stdio_context_manager_exiting():
@@ -296,19 +328,15 @@ async def test_basic_child_process_cleanup(self):
296328
# Start the parent process
297329
proc = await _create_platform_compatible_process(sys.executable, ["-c", parent_script])
298330

299-
# Wait for processes to start
300-
await anyio.sleep(0.5)
301-
302-
# Verify parent started
331+
# Wait for parent to start (condition-based)
332+
with anyio.fail_after(5.0):
333+
await _wait_for_file_to_exist(parent_marker)
303334
assert os.path.exists(parent_marker), "Parent process didn't start"
304335

305-
# Verify child is writing
306-
if os.path.exists(marker_file): # pragma: no branch
307-
initial_size = os.path.getsize(marker_file)
308-
await anyio.sleep(0.3)
309-
size_after_wait = os.path.getsize(marker_file)
310-
assert size_after_wait > initial_size, "Child process should be writing"
311-
print(f"Child is writing (file grew from {initial_size} to {size_after_wait} bytes)")
336+
# Wait for child to start writing (condition-based instead of arbitrary 0.5s + 0.3s)
337+
with anyio.fail_after(5.0):
338+
await _wait_for_file_to_exist(marker_file)
339+
print(f"Child is writing (file size: {os.path.getsize(marker_file)} bytes)")
312340

313341
# Terminate using our function
314342
print("Terminating process and children...")
@@ -398,16 +426,15 @@ async def test_nested_process_tree(self):
398426
# Start the parent process
399427
proc = await _create_platform_compatible_process(sys.executable, ["-c", parent_script])
400428

401-
# Let all processes start
402-
await anyio.sleep(1.0)
403-
404-
# Verify all are writing
429+
# Wait for all processes to start (condition-based)
405430
for file_path, name in [(parent_file, "parent"), (child_file, "child"), (grandchild_file, "grandchild")]:
406-
if os.path.exists(file_path): # pragma: no branch
407-
initial_size = os.path.getsize(file_path)
408-
await anyio.sleep(0.3)
409-
new_size = os.path.getsize(file_path)
410-
assert new_size > initial_size, f"{name} process should be writing"
431+
with anyio.fail_after(5.0):
432+
await _wait_for_file_to_exist(file_path)
433+
434+
parent_size = os.path.getsize(parent_file)
435+
child_size = os.path.getsize(child_file)
436+
grandchild_size = os.path.getsize(grandchild_file)
437+
print(f"parent={parent_size}, child={child_size}, grandchild={grandchild_size}")
411438

412439
# Terminate the whole tree
413440
await _terminate_process_tree(proc)
@@ -477,15 +504,10 @@ def handle_term(sig, frame):
477504
# Start the parent process
478505
proc = await _create_platform_compatible_process(sys.executable, ["-c", parent_script])
479506

480-
# Let child start writing
481-
await anyio.sleep(0.5)
482-
483-
# Verify child is writing
484-
if os.path.exists(marker_file): # pragma: no branch
485-
size1 = os.path.getsize(marker_file)
486-
await anyio.sleep(0.3)
487-
size2 = os.path.getsize(marker_file)
488-
assert size2 > size1, "Child should be writing"
507+
# Wait for child to start writing (condition-based)
508+
with anyio.fail_after(5.0):
509+
await _wait_for_file_to_exist(marker_file)
510+
print(f"Child is writing (file size: {os.path.getsize(marker_file)} bytes)")
489511

490512
# Terminate - this will kill the process group even if parent exits first
491513
await _terminate_process_tree(proc)

0 commit comments

Comments
 (0)