55This script:
661. Runs the full test suite initially (with normal cleanup)
772. Identifies failed tests and retries them with configurable strategy
8- 3. Manages test namespaces intelligently (cleanup on success, keep failed for debugging)
8+ 3. Manages test namespaces intelligently (cleanup on success, keep failed for debugging by default )
994. Provides detailed logging and comprehensive reporting
1010
1111Usage: ./scripts/auto-retry-tests.py --parallel 4 --attempts-serial 3 --attempts-parallel 2 --venv ./venv
1616import json
1717import os
1818import re
19+ import shutil
1920import statistics
2021import subprocess
2122import sys
@@ -32,8 +33,7 @@ class TestConstants:
3233 """Constants used throughout the test runner."""
3334
3435 MIN_RUNTIME_THRESHOLD = 30.0 # Filter out quick failures (seconds)
35- MAX_RUNTIME_HISTORY = 10 # Keep only recent runs
36- MAX_ERROR_LINES_TO_CHECK = 50 # Lines to scan for errors in logs
36+ MAX_RUNTIME_HISTORY = 50 # Keep only recent runs
3737 MAX_TEST_NAME_LENGTH = 100 # Maximum test name length for filenames
3838 HASH_SUFFIX_LENGTH = 8 # Length of MD5 hash suffix
3939 DEFAULT_PARALLEL_WORKERS = 2 # Default number of parallel workers
@@ -231,7 +231,7 @@ def build_configuration_dict(self, args) -> dict:
231231 "parallel" : args .parallel ,
232232 "attempts_parallel" : args .attempts_parallel ,
233233 "attempts_serial" : args .attempts_serial ,
234- "keep_failed_namespaces " : args .keep_failed_namespaces ,
234+ "delete_failed_namespaces " : args .delete_failed_namespaces ,
235235 "venv" : args .venv ,
236236 "extra_args" : args .extra_args ,
237237 "output_dir" : str (self .output_dir ),
@@ -378,7 +378,7 @@ def create_safe_log_filename(
378378 return f"{ safe_test_name } _attempt_{ attempt } _{ attempt_type } .txt"
379379
380380 def build_test_command (
381- self , test_name : str = None , skip_delete : bool = False
381+ self , test_name : str = None , skip_delete : bool = False , work_dir : str = None
382382 ) -> List [str ]:
383383 """Build the command arguments for running tests."""
384384 command_args = ["scripts/run-tests" ]
@@ -393,6 +393,13 @@ def build_test_command(
393393 if test_name :
394394 command_args .extend (["--test" , test_name ])
395395
396+ # Add unique work directory to prevent parallel interference
397+ # beku deletes the work dir at the start of a test so if a test was already running and then
398+ # another starts the new one would delete (and recreate) the work directory.
399+ # This does lead to failures.
400+ if work_dir :
401+ command_args .extend (["--work-dir" , work_dir ])
402+
396403 # Add any extra arguments passed through
397404 if self .args .extra_args :
398405 command_args .extend (self .args .extra_args )
@@ -484,7 +491,20 @@ def run_single_test_suite(
484491 attempt_type : str = "initial" ,
485492 ) -> TestResult :
486493 """Run a single test or the full test suite."""
487- command_args = self .build_test_command (test_name , skip_delete )
494+ # Create unique work directory to prevent parallel test interference
495+ timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S_%f" )[
496+ :- 3
497+ ] # microseconds to milliseconds
498+ if test_name :
499+ # Create a unique work directory based on test name and timestamp
500+ safe_test_name = test_name .replace ("/" , "_" ).replace ("," , "_" )
501+ work_dir = (
502+ f"tests/_work_{ safe_test_name } _{ attempt } _{ attempt_type } _{ timestamp } "
503+ )
504+ else :
505+ work_dir = f"tests/_work_full_suite_{ attempt } _{ attempt_type } _{ timestamp } "
506+
507+ command_args = self .build_test_command (test_name , skip_delete , work_dir )
488508
489509 # Set up log file
490510 if test_name :
@@ -519,6 +539,12 @@ def run_single_test_suite(
519539 start_time = time .time ()
520540
521541 with open (log_file , "w" ) as file_handle :
542+ # Log the exact command being executed
543+ file_handle .write (f"Command: { ' ' .join (command_args )} \n " )
544+ file_handle .write (f"Working directory: { os .getcwd ()} \n " )
545+ file_handle .write ("=" * 80 + "\n \n " )
546+ file_handle .flush ()
547+
522548 result = subprocess .run (
523549 command_args ,
524550 stdout = file_handle ,
@@ -569,10 +595,18 @@ def run_single_test_suite(
569595 print (f" 📊 Average: { self .format_duration (avg_runtime )} " )
570596
571597 # If this was a skip_delete attempt and test passed, clean up the namespace
598+ # We only want to keep failed namespaces
572599 if test_name and skip_delete and success and namespace :
573600 self .delete_test_namespace (namespace )
574601 print (f" 🧹 Test passed, cleaning up namespace: { namespace } " )
575602
603+ # Clean up the unique work directory after test completion
604+ if work_dir and Path (work_dir ).exists ():
605+ try :
606+ shutil .rmtree (work_dir )
607+ except Exception as exception :
608+ print (f" ⚠️ Failed to clean up work directory { work_dir } : { exception } " )
609+
576610 return test_result
577611
578612
@@ -727,7 +761,9 @@ def generate_comprehensive_report(self, runner, start_time: datetime) -> str:
727761 report .append (f"Parallel: { runner .args .parallel } " )
728762 report .append (f"Parallel retry attempts: { runner .args .attempts_parallel } " )
729763 report .append (f"Serial retry attempts: { runner .args .attempts_serial } " )
730- report .append (f"Keep failed namespaces: { runner .args .keep_failed_namespaces } " )
764+ report .append (
765+ f"Delete failed namespaces: { runner .args .delete_failed_namespaces } "
766+ )
731767 report .append (f"Virtualenv: { runner .args .venv or 'None' } " )
732768 report .append ("" )
733769
@@ -882,7 +918,7 @@ def retry_tests_in_parallel(
882918 # Determine if this is the last attempt and no serial tests follow
883919 is_last_attempt = attempt == max_attempts
884920 use_skip_delete = (
885- self .args .keep_failed_namespaces
921+ not self .args .delete_failed_namespaces
886922 and not serial_tests_follow
887923 and is_last_attempt
888924 )
@@ -892,10 +928,6 @@ def retry_tests_in_parallel(
892928 print (
893929 f"Retrying { len (tests_to_retry )} tests in parallel (max { max_parallel } at once)..."
894930 )
895- if use_skip_delete :
896- print (
897- " Using skip-delete for this final parallel attempt (no serial tests follow)"
898- )
899931
900932 # Execute tests in parallel
901933 with ThreadPoolExecutor (max_workers = max_parallel ) as executor :
@@ -952,10 +984,12 @@ def retry_test_serially(
952984 for attempt in range (1 , max_attempts + 1 ):
953985 # Only use skip-delete on the last attempt
954986 is_last_attempt = attempt == max_attempts
987+ use_skip_delete = not self .args .delete_failed_namespaces and is_last_attempt
988+
955989 result = self .test_executor .run_single_test_suite (
956990 self .output_dir ,
957991 test_name = test_name ,
958- skip_delete = self . args . keep_failed_namespaces and is_last_attempt ,
992+ skip_delete = use_skip_delete ,
959993 attempt = attempt ,
960994 attempt_type = "serial" ,
961995 )
@@ -992,23 +1026,15 @@ def create_test_summary(
9921026 else :
9931027 final_status = "failed"
9941028
995- # Find the last namespace for failed tests
1029+ # Find the last namespace for failed tests (only if keeping failed namespaces)
9961030 final_namespace = None
997- if final_status == "failed" :
1031+ if final_status == "failed" and not self . args . delete_failed_namespaces :
9981032 # Keep the last failed attempt's namespace
9991033 for result in reversed (retry_results ):
10001034 if result .namespace :
10011035 final_namespace = result .namespace
10021036 break
10031037
1004- # Clean up namespaces for successful tests
1005- if final_status in ["passed" , "flaky" ]:
1006- # Delete all namespaces for this test
1007- all_results = [initial_result ] + retry_results
1008- for result in all_results :
1009- if result .namespace and result .namespace != final_namespace :
1010- self .test_executor .delete_test_namespace (result .namespace )
1011-
10121038 summary = TestSummary (
10131039 test_name = test_name ,
10141040 initial_result = initial_result ,
@@ -1078,8 +1104,14 @@ def _run_initial_test_suite(self) -> bool:
10781104 )
10791105
10801106 if not failed_tests :
1081- print (" No failed tests found in output (this might be a parsing issue)" )
1082- self .report_generator .generate_and_save_final_report (self , self .start_time )
1107+ print (" No failed tests found in output but run-tests exited with code 1" )
1108+ print (
1109+ " This indicates an infrastructure or setup issue that prevents tests from running"
1110+ )
1111+ print (
1112+ " Check the log file for connection errors, missing dependencies, or cluster issues"
1113+ )
1114+ print (f" Log file: { initial_result .log_file } " )
10831115 return False
10841116
10851117 print (f" Found { len (failed_tests )} failed tests:" )
@@ -1281,9 +1313,9 @@ def main():
12811313
12821314 # Namespace management arguments
12831315 parser .add_argument (
1284- "--keep -failed-namespaces" ,
1316+ "--delete -failed-namespaces" ,
12851317 action = "store_true" ,
1286- help = "Keep namespaces of failed tests for debugging (only the last one is kept )" ,
1318+ help = "Delete namespaces of failed tests (default: keep them for debugging )" ,
12871319 )
12881320
12891321 # Output arguments
0 commit comments