diff --git a/src/taskgraph/graph.py b/src/taskgraph/graph.py index 9151a5d1c..573275087 100644 --- a/src/taskgraph/graph.py +++ b/src/taskgraph/graph.py @@ -97,14 +97,18 @@ def _visit(self, reverse): indegree[dependent] -= 1 if indegree[dependent] == 0: queue.append(dependent) + loopy_nodes = {node for node, degree in indegree.items() if degree > 0} + if loopy_nodes: + raise Exception( + f"Dependency loop detected involving the following nodes: {loopy_nodes}" + ) def visit_postorder(self): """ Generate a sequence of nodes in postorder, such that every node is visited *after* any nodes it links to. - Behavior is undefined (read: it will hang) if the graph contains a - cycle. + Raises an exception if the graph contains a cycle. """ return self._visit(False) diff --git a/src/taskgraph/transforms/cached_tasks.py b/src/taskgraph/transforms/cached_tasks.py index 798187c4a..d6668d06d 100644 --- a/src/taskgraph/transforms/cached_tasks.py +++ b/src/taskgraph/transforms/cached_tasks.py @@ -3,9 +3,8 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. -from collections import deque - import taskgraph +from taskgraph.graph import Graph from taskgraph.transforms.base import TransformSequence from taskgraph.util.cached_tasks import add_optimization @@ -15,25 +14,19 @@ def order_tasks(config, tasks): """Iterate image tasks in an order where parent tasks come first.""" kind_prefix = config.kind + "-" - - pending = deque(tasks) - task_labels = {task["label"] for task in pending} - emitted = set() - while True: - try: - task = pending.popleft() - except IndexError: - break - parents = { - task - for task in task.get("dependencies", {}).values() - if task.startswith(kind_prefix) - } - if parents and not emitted.issuperset(parents & task_labels): - pending.append(task) - continue - emitted.add(task["label"]) - yield task + pending = {task["label"]: task for task in tasks} + nodes = set(pending) + edges = { + (label, dep, "") + for label in pending + for dep in pending[label].get("dependencies", {}).values() + if dep.startswith(kind_prefix) + } + graph = Graph(nodes, edges) + + for label in graph.visit_postorder(): + yield pending.pop(label) + assert not pending def format_task_digest(cached_task): diff --git a/test/test_graph.py b/test/test_graph.py index 667d185f7..f1d683cc4 100644 --- a/test/test_graph.py +++ b/test/test_graph.py @@ -5,6 +5,8 @@ import unittest +import pytest + from taskgraph.graph import Graph @@ -63,6 +65,11 @@ class TestGraph(unittest.TestCase): }, ) + loopy = Graph( + {"A", "B", "C"}, + {tuple(x) for x in "ABL BCL CAL".split()}, # codespell:ignore + ) + def test_transitive_closure_empty(self): "transitive closure of an empty set is an empty graph" g = Graph({"a", "b", "c"}, {("a", "b", "L"), ("a", "c", "L")}) @@ -123,6 +130,10 @@ def test_transitive_closure_linear(self): "transitive closure of a linear graph includes all nodes in the line" self.assertEqual(self.linear.transitive_closure({"1"}), self.linear) + def test_transitive_closure_loopy(self): + "transitive closure of a loop is the whole loop" + self.assertEqual(self.loopy.transitive_closure({"A"}), self.loopy) + def test_visit_postorder_empty(self): "postorder visit of an empty graph is empty" self.assertEqual(list(Graph(set(), set()).visit_postorder()), []) @@ -154,6 +165,11 @@ def test_visit_postorder_disjoint(self): "postorder visit of a disjoint graph satisfies invariant" self.assert_postorder(self.disjoint.visit_postorder(), self.disjoint.nodes) + def test_visit_postorder_loopy(self): + with pytest.raises(Exception) as excinfo: + list(self.loopy.visit_postorder()) + assert "Dependency loop detected" in str(excinfo.value) + def assert_preorder(self, seq, all_nodes): seen = set() for e in seq: @@ -179,6 +195,11 @@ def test_visit_preorder_disjoint(self): "preorder visit of a disjoint graph satisfies invariant" self.assert_preorder(self.disjoint.visit_preorder(), self.disjoint.nodes) + def test_visit_preorder_loopy(self): + with pytest.raises(Exception) as excinfo: + list(self.loopy.visit_preorder()) + assert "Dependency loop detected" in str(excinfo.value) + def test_links_dict(self): "link dict for a graph with multiple edges is correct" self.assertEqual( diff --git a/test/test_transforms_cached_tasks.py b/test/test_transforms_cached_tasks.py index cc4451a42..c69112a5b 100644 --- a/test/test_transforms_cached_tasks.py +++ b/test/test_transforms_cached_tasks.py @@ -55,6 +55,7 @@ def assert_cache_basic(tasks): def assert_cache_with_dependency(tasks): handle_exception(tasks) assert len(tasks) == 2 + tasks = sorted(tasks, key=lambda t: t["label"]) assert tasks[1] == { "attributes": { "cached_task": { @@ -65,7 +66,7 @@ def assert_cache_with_dependency(tasks): }, "dependencies": {"edge": "dep-cached"}, "description": "description", - "label": "cached-task", + "label": "cached-with-dep", "optimization": { "index-search": [ "test-domain.cache.level-3.cached-task.v2.cache-foo.hash.db201e53944fccbb16736c8153a14de39748c0d290de84bd976c11ddcc413089", @@ -92,15 +93,16 @@ def assert_cache_with_non_cached_dependency(e): def assert_chain_of_trust_influences_digest(tasks): assert len(tasks) == 3 - # The first two tasks are chain-of-trust unspecified, and chain-of-trust: False + tasks = sorted(tasks, key=lambda t: t["label"]) # cot-false, cot-true, no-cot + # The first and third tasks are chain-of-trust: false, and chain-of-trust unspecified # which should result in the same digest. digest_0 = tasks[0]["attributes"]["cached_task"]["digest"] - digest_1 = tasks[1]["attributes"]["cached_task"]["digest"] - assert digest_0 == digest_1 - - # The third task is chain-of-trust: True, and should have a different digest digest_2 = tasks[2]["attributes"]["cached_task"]["digest"] - assert digest_0 != digest_2 + assert digest_0 == digest_2 + + # The second task is chain-of-trust: True, and should have a different digest + digest_1 = tasks[1]["attributes"]["cached_task"]["digest"] + assert digest_0 != digest_1 @pytest.mark.parametrize( @@ -141,7 +143,8 @@ def assert_chain_of_trust_influences_digest(tasks): "type": "cached-task.v2", "name": "cache-foo", "digest-data": ["abc"], - } + }, + "label": "cached-no-dep", }, # This task has same digest-data, but a dependency on a cached task. { @@ -151,6 +154,7 @@ def assert_chain_of_trust_influences_digest(tasks): "digest-data": ["abc"], }, "dependencies": {"edge": "dep-cached"}, + "label": "cached-with-dep", }, ], # kind config @@ -198,6 +202,7 @@ def assert_chain_of_trust_influences_digest(tasks): "name": "cache-foo", "digest-data": ["abc"], }, + "label": "no-cot", # no explicit chain of trust configuration; should be the # same as when it is set to False }, @@ -207,6 +212,7 @@ def assert_chain_of_trust_influences_digest(tasks): "name": "cache-foo", "digest-data": ["abc"], }, + "label": "cot-false", "worker": { "chain-of-trust": False, }, @@ -217,6 +223,7 @@ def assert_chain_of_trust_influences_digest(tasks): "name": "cache-foo", "digest-data": ["abc"], }, + "label": "cot-true", "worker": {"chain-of-trust": True}, }, ],