diff --git a/util/compare_test_results.py b/util/compare_test_results.py index d5739deaed5..0f586d5f1fa 100644 --- a/util/compare_test_results.py +++ b/util/compare_test_results.py @@ -50,14 +50,14 @@ def identify_test_changes(current_flat, reference_flat): reference_flat (dict): Flattened dictionary of reference test results Returns: - tuple: Four lists containing regressions, fixes, newly_skipped, and newly_passing tests + tuple: Five lists containing regressions, fixes, newly_skipped, newly_passing, and newly_failing tests """ # Find regressions (tests that were passing but now failing) regressions = [] for test_path, status in current_flat.items(): if status in ("FAIL", "ERROR"): if test_path in reference_flat: - if reference_flat[test_path] in ("PASS", "SKIP"): + if reference_flat[test_path] == "PASS": regressions.append(test_path) # Find fixes (tests that were failing but now passing) @@ -88,7 +88,17 @@ def identify_test_changes(current_flat, reference_flat): ): newly_passing.append(test_path) - return regressions, fixes, newly_skipped, newly_passing + # Find newly failing tests (were skipped, now failing) + newly_failing = [] + for test_path, status in current_flat.items(): + if ( + status in ("FAIL", "ERROR") + and test_path in reference_flat + and reference_flat[test_path] == "SKIP" + ): + newly_failing.append(test_path) + + return regressions, fixes, newly_skipped, newly_passing, newly_failing def main(): @@ -135,8 +145,8 @@ def main(): reference_flat = flatten_test_results(reference_results) # Identify different categories of test changes - regressions, fixes, newly_skipped, newly_passing = identify_test_changes( - current_flat, reference_flat + regressions, fixes, newly_skipped, newly_passing, newly_failing = ( + identify_test_changes(current_flat, reference_flat) ) # Filter out intermittent issues from regressions @@ -147,6 +157,10 @@ def main(): real_fixes = [f for f in fixes if f not in ignore_list] intermittent_fixes = [f for f in fixes if f in ignore_list] + # Filter out intermittent issues from newly failing + real_newly_failing = [n for n in newly_failing if n not in ignore_list] + intermittent_newly_failing = [n for n in newly_failing if n in ignore_list] + # Print summary stats print(f"Total tests in current run: {len(current_flat)}") print(f"Total tests in reference: {len(reference_flat)}") @@ -156,6 +170,8 @@ def main(): print(f"Intermittent fixes: {len(intermittent_fixes)}") print(f"Newly skipped tests: {len(newly_skipped)}") print(f"Newly passing tests (previously skipped): {len(newly_passing)}") + print(f"Newly failing tests (previously skipped): {len(real_newly_failing)}") + print(f"Intermittent newly failing: {len(intermittent_newly_failing)}") output_lines = [] @@ -206,6 +222,21 @@ def main(): print(f"::notice ::{msg}", file=sys.stderr) output_lines.append(msg) + # Report newly failing tests (were skipped, now failing) + if real_newly_failing: + print("\nNEWLY FAILING TESTS (previously skipped):", file=sys.stderr) + for test in sorted(real_newly_failing): + msg = f"Note: The gnu test {test} was skipped on 'main' but is now failing." + print(f"::warning ::{msg}", file=sys.stderr) + output_lines.append(msg) + + if intermittent_newly_failing: + print("\nINTERMITTENT NEWLY FAILING (ignored):", file=sys.stderr) + for test in sorted(intermittent_newly_failing): + msg = f"Skip an intermittent issue {test} (was skipped on 'main', now failing)" + print(f"::notice ::{msg}", file=sys.stderr) + output_lines.append(msg) + if args.output and output_lines: with open(args.output, "w") as f: for line in output_lines: diff --git a/util/test_compare_test_results.py b/util/test_compare_test_results.py index c3ab4d833a8..f10557c96f0 100644 --- a/util/test_compare_test_results.py +++ b/util/test_compare_test_results.py @@ -129,11 +129,11 @@ def test_regressions(self): } reference = { "tests/ls/test1": "PASS", - "tests/ls/test2": "SKIP", + "tests/ls/test2": "PASS", "tests/cp/test3": "PASS", "tests/cp/test4": "FAIL", } - regressions, _, _, _ = identify_test_changes(current, reference) + regressions, _, _, _, _ = identify_test_changes(current, reference) self.assertEqual(sorted(regressions), ["tests/ls/test1", "tests/ls/test2"]) def test_fixes(self): @@ -150,7 +150,7 @@ def test_fixes(self): "tests/cp/test3": "PASS", "tests/cp/test4": "FAIL", } - _, fixes, _, _ = identify_test_changes(current, reference) + _, fixes, _, _, _ = identify_test_changes(current, reference) self.assertEqual(sorted(fixes), ["tests/ls/test1", "tests/ls/test2"]) def test_newly_skipped(self): @@ -165,7 +165,7 @@ def test_newly_skipped(self): "tests/ls/test2": "FAIL", "tests/cp/test3": "PASS", } - _, _, newly_skipped, _ = identify_test_changes(current, reference) + _, _, newly_skipped, _, _ = identify_test_changes(current, reference) self.assertEqual(newly_skipped, ["tests/ls/test1"]) def test_newly_passing(self): @@ -180,7 +180,7 @@ def test_newly_passing(self): "tests/ls/test2": "FAIL", "tests/cp/test3": "SKIP", } - _, _, _, newly_passing = identify_test_changes(current, reference) + _, _, _, newly_passing, _ = identify_test_changes(current, reference) self.assertEqual(newly_passing, ["tests/ls/test1"]) def test_all_categories(self): @@ -191,6 +191,7 @@ def test_all_categories(self): "tests/cp/test3": "SKIP", # Newly skipped "tests/cp/test4": "PASS", # Newly passing "tests/rm/test5": "PASS", # No change + "tests/rm/test6": "FAIL", # Newly failing } reference = { "tests/ls/test1": "PASS", # Regression @@ -198,14 +199,16 @@ def test_all_categories(self): "tests/cp/test3": "PASS", # Newly skipped "tests/cp/test4": "SKIP", # Newly passing "tests/rm/test5": "PASS", # No change + "tests/rm/test6": "SKIP", # Newly failing } - regressions, fixes, newly_skipped, newly_passing = identify_test_changes( - current, reference + regressions, fixes, newly_skipped, newly_passing, newly_failing = ( + identify_test_changes(current, reference) ) self.assertEqual(regressions, ["tests/ls/test1"]) self.assertEqual(fixes, ["tests/ls/test2"]) self.assertEqual(newly_skipped, ["tests/cp/test3"]) self.assertEqual(newly_passing, ["tests/cp/test4"]) + self.assertEqual(newly_failing, ["tests/rm/test6"]) def test_new_and_removed_tests(self): """Test handling of tests that are only in one of the datasets.""" @@ -219,13 +222,43 @@ def test_new_and_removed_tests(self): "tests/ls/test2": "PASS", "tests/rm/old_test": "FAIL", } - regressions, fixes, newly_skipped, newly_passing = identify_test_changes( - current, reference + regressions, fixes, newly_skipped, newly_passing, newly_failing = ( + identify_test_changes(current, reference) ) self.assertEqual(regressions, ["tests/ls/test2"]) self.assertEqual(fixes, []) self.assertEqual(newly_skipped, []) self.assertEqual(newly_passing, []) + self.assertEqual(newly_failing, []) + + def test_newly_failing(self): + """Test identifying newly failing tests (SKIP -> FAIL).""" + current = { + "tests/ls/test1": "FAIL", + "tests/ls/test2": "ERROR", + "tests/cp/test3": "PASS", + } + reference = { + "tests/ls/test1": "SKIP", + "tests/ls/test2": "SKIP", + "tests/cp/test3": "SKIP", + } + _, _, _, _, newly_failing = identify_test_changes(current, reference) + self.assertEqual(sorted(newly_failing), ["tests/ls/test1", "tests/ls/test2"]) + + def test_skip_to_fail_not_regression(self): + """Test that SKIP -> FAIL is not counted as a regression.""" + current = { + "tests/ls/test1": "FAIL", + "tests/ls/test2": "FAIL", + } + reference = { + "tests/ls/test1": "SKIP", + "tests/ls/test2": "PASS", + } + regressions, _, _, _, newly_failing = identify_test_changes(current, reference) + self.assertEqual(regressions, ["tests/ls/test2"]) + self.assertEqual(newly_failing, ["tests/ls/test1"]) class TestMainFunction(unittest.TestCase): @@ -285,7 +318,7 @@ def test_main_exit_code_with_real_regressions(self): current_flat = flatten_test_results(self.current_data) reference_flat = flatten_test_results(self.reference_data) - regressions, _, _, _ = identify_test_changes(current_flat, reference_flat) + regressions, _, _, _, _ = identify_test_changes(current_flat, reference_flat) self.assertIn("tests/ls/test2", regressions) @@ -320,7 +353,7 @@ def test_filter_intermittent_fixes(self): current_flat = flatten_test_results(self.current_data) reference_flat = flatten_test_results(self.reference_data) - _, fixes, _, _ = identify_test_changes(current_flat, reference_flat) + _, fixes, _, _, _ = identify_test_changes(current_flat, reference_flat) # tests/cp/test1 and tests/cp/test2 should be fixed but tests/cp/test1 is in ignore list self.assertIn("tests/cp/test1", fixes)