From 66919592ff3eee1c64dffe85c7818d612e63119c Mon Sep 17 00:00:00 2001 From: Ben Hearsum Date: Thu, 14 Aug 2025 09:57:16 -0400 Subject: [PATCH] fix: race condition in parallel kind processing This race condition happens because `all_tasks` is updated in a callback, which runs asynchronously, but `kinds` and `edges` are updated in the main loop. This leads to a potential mismatch when the logic at the start of `submit_ready_kinds` runs: if `all_tasks` is updated but the other two are not, the data we feed into `load_tasks` will be incorrect. --- src/taskgraph/generator.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/taskgraph/generator.py b/src/taskgraph/generator.py index d4a79f7de..d2f7e8a65 100644 --- a/src/taskgraph/generator.py +++ b/src/taskgraph/generator.py @@ -267,12 +267,6 @@ def _load_tasks(self, kinds, kind_graph, parameters): futures = set() edges = set(kind_graph.edges) - def add_new_tasks(future): - for task in future.result(): - if task.label in all_tasks: - raise Exception("duplicate tasks with label " + task.label) - all_tasks[task.label] = task - with ProcessPoolExecutor() as executor: def submit_ready_kinds(): @@ -303,7 +297,6 @@ def submit_ready_kinds(): }, self._write_artifacts, ) - future.add_done_callback(add_new_tasks) futures.add(future) futures_to_kind[future] = name @@ -317,6 +310,11 @@ def submit_ready_kinds(): kind = futures_to_kind.pop(future) futures.remove(future) + for task in future.result(): + if task.label in all_tasks: + raise Exception("duplicate tasks with label " + task.label) + all_tasks[task.label] = task + # Update state for next batch of futures. del kinds[kind] edges = {e for e in edges if e[1] != kind}