From 16117306bf3af08e19904bf1b945f9cda3a6e1ef Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Mon, 4 May 2026 13:17:28 +0200 Subject: [PATCH 1/2] Fix update_status race by snapshotting under the lock instead. --- src/ParallelTestRunner.jl | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/ParallelTestRunner.jl b/src/ParallelTestRunner.jl index 53cfbaa..0e1f734 100644 --- a/src/ParallelTestRunner.jl +++ b/src/ParallelTestRunner.jl @@ -1003,16 +1003,19 @@ function runtests(mod::Module, args::ParsedArgs; end function update_status() - # only draw if we have something to show - @lock(running_tests, isempty(running_tests[])) && return - completed = @lock results length(results[]) + # take consistent snapshots once, so the rest of this function operates on + # frozen data rather than racing with workers that mutate these collections + running_snapshot = @lock running_tests copy(running_tests[]) + isempty(running_snapshot) && return + results_snapshot = @lock results copy(results[]) + completed = length(results_snapshot) total = @lock tests length(tests[]) # line 1: empty line line1 = "" # line 2: running tests - test_list = @lock running_tests sort(collect(keys(running_tests[])), by = x -> running_tests[][x]) + test_list = sort(collect(keys(running_snapshot)), by = x -> running_snapshot[x]) status_parts = map(test_list) do test "$test" end @@ -1027,23 +1030,23 @@ function runtests(mod::Module, args::ParsedArgs; line3 = "Progress: $completed/$total tests completed" if completed > 0 # estimate per-test time (slightly pessimistic) - durations_done = @lock results [end_time - start_time for (_, _,_, start_time, end_time) in results[]] + durations_done = [end_time - start_time for (_, _,_, start_time, end_time) in results_snapshot] μ = mean(durations_done) σ = length(durations_done) > 1 ? std(durations_done) : 0.0 est_per_test = μ + 0.5σ est_remaining = 0.0 ## currently-running - for (test, start_time) in @lock(running_tests, running_tests[]) + for (test, start_time) in running_snapshot elapsed = time() - start_time duration = get(historical_durations, test, est_per_test) est_remaining += max(0.0, duration - elapsed) end ## yet-to-run - for test in @lock(tests, tests[]) - @lock(running_tests, haskey(running_tests[], test)) && continue + @lock tests for test in tests[] + haskey(running_snapshot, test) && continue # Test is in any completed test - @lock(results, any(r -> test == r.test, results[])) && continue + any(r -> test == r.test, results_snapshot) && continue est_remaining += get(historical_durations, test, est_per_test) end @@ -1126,7 +1129,10 @@ function runtests(mod::Module, args::ParsedArgs; end isa(ex, InterruptException) || rethrow() finally - if @lock running_tests @lock results @lock tests isempty(running_tests[]) && length(results[]) >= length(tests[]) + n_running = @lock running_tests length(running_tests[]) + n_results = @lock results length(results[]) + n_tests = @lock tests length(tests[]) + if n_running == 0 && n_results >= n_tests # XXX: only erase the status if we completed successfully. # in other cases we'll have printed "caught interrupt" clear_status() From f2aa539fb2d656a072b05cd28cd2930ec5de383d Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Mon, 4 May 2026 13:19:22 +0200 Subject: [PATCH 2/2] Drop unnecessary Lockable wrapping of `tests` It's read-only anyway. --- src/ParallelTestRunner.jl | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/ParallelTestRunner.jl b/src/ParallelTestRunner.jl index 0e1f734..fad30c3 100644 --- a/src/ParallelTestRunner.jl +++ b/src/ParallelTestRunner.jl @@ -932,15 +932,15 @@ function runtests(mod::Module, args::ParsedArgs; filter_tests!(testsuite, args) # determine test order - tests = Lockable(collect(keys(testsuite))) - @lock tests Random.shuffle!(tests[]) + tests = collect(keys(testsuite)) + Random.shuffle!(tests) historical_durations = load_test_history(mod) - @lock tests sort!(tests[], by = x -> -get(historical_durations, x, Inf)) + sort!(tests, by = x -> -get(historical_durations, x, Inf)) # determine parallelism jobs = something(args.jobs, default_njobs()) - jobs = @lock tests clamp(jobs, 1, length(tests[])) - @lock tests println(stdout, "Running $(length(tests[])) tests using $jobs parallel jobs. If this is too many concurrent jobs, specify the `--jobs=N` argument to the tests, or set the `JULIA_CPU_THREADS` environment variable.") + jobs = clamp(jobs, 1, length(tests)) + println(stdout, "Running $(length(tests)) tests using $jobs parallel jobs. If this is too many concurrent jobs, specify the `--jobs=N` argument to the tests, or set the `JULIA_CPU_THREADS` environment variable.") !isnothing(args.verbose) && println(stdout, "Available memory: $(Base.format_bytes(available_memory()))") sem = Base.Semaphore(max(1, jobs)) worker_pool = Channel{Union{Nothing, PTRWorker}}(jobs) @@ -974,10 +974,10 @@ function runtests(mod::Module, args::ParsedArgs; # pretty print information about gc and mem usage testgroupheader = "Test" workerheader = "(Worker)" - name_align = @lock tests maximum( + name_align = maximum( [ textwidth(testgroupheader) + textwidth(" ") + textwidth(workerheader); - map(x -> textwidth(x) + 5, tests[]) + map(x -> textwidth(x) + 5, tests) ] ) @@ -1009,7 +1009,7 @@ function runtests(mod::Module, args::ParsedArgs; isempty(running_snapshot) && return results_snapshot = @lock results copy(results[]) completed = length(results_snapshot) - total = @lock tests length(tests[]) + total = length(tests) # line 1: empty line line1 = "" @@ -1043,7 +1043,7 @@ function runtests(mod::Module, args::ParsedArgs; est_remaining += max(0.0, duration - elapsed) end ## yet-to-run - @lock tests for test in tests[] + for test in tests haskey(running_snapshot, test) && continue # Test is in any completed test any(r -> test == r.test, results_snapshot) && continue @@ -1131,8 +1131,7 @@ function runtests(mod::Module, args::ParsedArgs; finally n_running = @lock running_tests length(running_tests[]) n_results = @lock results length(results[]) - n_tests = @lock tests length(tests[]) - if n_running == 0 && n_results >= n_tests + if n_running == 0 && n_results >= length(tests) # XXX: only erase the status if we completed successfully. # in other cases we'll have printed "caught interrupt" clear_status() @@ -1144,9 +1143,9 @@ function runtests(mod::Module, args::ParsedArgs; # execution # - tests_to_start = @lock tests Threads.Atomic{Int}(length(tests[])) + tests_to_start = Threads.Atomic{Int}(length(tests)) try - @sync for test in @lock(tests, tests[]) + @sync for test in tests push!(worker_tasks, Threads.@spawn begin local p = nothing acquired = false @@ -1366,7 +1365,7 @@ function runtests(mod::Module, args::ParsedArgs; end # mark remaining or running tests as interrupted - for test in @lock(tests, tests[]) + for test in tests (test in completed_tests) && continue testset = create_testset(test) Test.record(testset, Test.Error(:test_interrupted, test, nothing, Base.ExceptionStack(NamedTuple[(;exception = "skipped", backtrace = [])]), LineNumberNode(1)))