diff --git a/packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py b/packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py index 718f34a50..ac1bee702 100644 --- a/packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py +++ b/packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py @@ -32,8 +32,12 @@ def fake_loader(kind, path, config, parameters, loaded_tasks, write_artifacts): "i": i, "metadata": {"name": f"{kind}-t-{i}"}, "deadline": "soon", + "workerType": "linux", + "provisionerId": "prov", }, "dependencies": dependencies, + "if-dependencies": [], + "soft-dependencies": [], } if "task-defaults" in config: task = merge(config["task-defaults"], task) @@ -250,6 +254,7 @@ def make_task( task_id=None, dependencies=None, if_dependencies=None, + soft_dependencies=None, attributes=None, ): task_def = task_def or { @@ -258,7 +263,9 @@ def make_task( } task = Task( attributes=attributes or {}, + dependencies=dependencies or {}, if_dependencies=if_dependencies or [], + soft_dependencies=soft_dependencies or [], kind=kind, label=label, task=task_def, @@ -266,7 +273,13 @@ def make_task( task.optimization = optimization task.task_id = task_id if dependencies is not None: - task.task["dependencies"] = sorted(dependencies) + # The dependencies dict is converted from a dict to a list during the + # optimization phase. This is pretty confusing and means this utility + # might break assumptions about the format of 'dependencies'. + if isinstance(dependencies, dict): + task.task["dependencies"] = sorted(dependencies.values()) + else: + task.task["dependencies"] = sorted(dependencies) return task diff --git a/src/taskgraph/optimize/base.py b/src/taskgraph/optimize/base.py index c291d4a8f..ba07ca276 100644 --- a/src/taskgraph/optimize/base.py +++ b/src/taskgraph/optimize/base.py @@ -437,6 +437,7 @@ def get_subgraph( ) deps = task.task.setdefault("dependencies", []) deps.extend(sorted(named_task_dependencies.values())) + task.dependencies.update(named_task_dependencies) tasks_by_taskid[task.task_id] = task # resolve edges to taskIds diff --git a/src/taskgraph/transforms/task.py b/src/taskgraph/transforms/task.py index 6702ddc10..795a75744 100644 --- a/src/taskgraph/transforms/task.py +++ b/src/taskgraph/transforms/task.py @@ -11,7 +11,6 @@ import functools import hashlib import os -import re import time from copy import deepcopy from dataclasses import dataclass @@ -20,7 +19,6 @@ from voluptuous import All, Any, Extra, NotIn, Optional, Required -from taskgraph import MAX_DEPENDENCIES from taskgraph.transforms.base import TransformSequence from taskgraph.util.hash import hash_path from taskgraph.util.keyed_by import evaluate_keyed_by @@ -44,7 +42,7 @@ @functools.cache -def _run_task_suffix(): +def run_task_suffix(): """String to append to cache names under control of run-task.""" return hash_path(RUN_TASK)[0:20] @@ -715,7 +713,7 @@ def build_docker_worker_payload(config, task, task_def): cache_version = "v3" if run_task: - suffix = f"{cache_version}-{_run_task_suffix()}" + suffix = f"{cache_version}-{run_task_suffix()}" if out_of_tree_image: name_hash = hashlib.sha256( @@ -763,8 +761,6 @@ def build_docker_worker_payload(config, task, task_def): if capabilities: payload["capabilities"] = capabilities - check_caches_are_volumes(task) - @payload_builder( "generic-worker", @@ -1457,135 +1453,3 @@ def chain_of_trust(config, tasks): "task-reference": "" } yield task - - -@transforms.add -def check_task_identifiers(config, tasks): - """Ensures that all tasks have well defined identifiers: - ``^[a-zA-Z0-9_-]{1,38}$`` - """ - e = re.compile("^[a-zA-Z0-9_-]{1,38}$") - for task in tasks: - for attrib in ("workerType", "provisionerId"): - if not e.match(task["task"][attrib]): - raise Exception( - "task {}.{} is not a valid identifier: {}".format( - task["label"], attrib, task["task"][attrib] - ) - ) - yield task - - -@transforms.add -def check_task_dependencies(config, tasks): - """Ensures that tasks don't have more than 100 dependencies.""" - for task in tasks: - number_of_dependencies = ( - len(task["dependencies"]) - + len(task["if-dependencies"]) - + len(task["soft-dependencies"]) - ) - if number_of_dependencies > MAX_DEPENDENCIES: - raise Exception( - "task {}/{} has too many dependencies ({} > {})".format( - config.kind, - task["label"], - number_of_dependencies, - MAX_DEPENDENCIES, - ) - ) - yield task - - -def check_caches_are_volumes(task): - """Ensures that all cache paths are defined as volumes. - - Caches and volumes are the only filesystem locations whose content - isn't defined by the Docker image itself. Some caches are optional - depending on the task environment. We want paths that are potentially - caches to have as similar behavior regardless of whether a cache is - used. To help enforce this, we require that all paths used as caches - to be declared as Docker volumes. This check won't catch all offenders. - But it is better than nothing. - """ - volumes = set(task["worker"]["volumes"]) - paths = {c["mount-point"] for c in task["worker"].get("caches", [])} - missing = paths - volumes - - if not missing: - return - - raise Exception( - "task {} (image {}) has caches that are not declared as " - "Docker volumes: {} " - "(have you added them as VOLUMEs in the Dockerfile?)".format( - task["label"], task["worker"]["docker-image"], ", ".join(sorted(missing)) - ) - ) - - -@transforms.add -def check_run_task_caches(config, tasks): - """Audit for caches requiring run-task. - - run-task manages caches in certain ways. If a cache managed by run-task - is used by a non run-task task, it could cause problems. So we audit for - that and make sure certain cache names are exclusive to run-task. - - IF YOU ARE TEMPTED TO MAKE EXCLUSIONS TO THIS POLICY, YOU ARE LIKELY - CONTRIBUTING TECHNICAL DEBT AND WILL HAVE TO SOLVE MANY OF THE PROBLEMS - THAT RUN-TASK ALREADY SOLVES. THINK LONG AND HARD BEFORE DOING THAT. - """ - re_reserved_caches = re.compile( - """^ - (checkouts|tooltool-cache) - """, - re.VERBOSE, - ) - - cache_prefix = "{trust_domain}-level-{level}-".format( - trust_domain=config.graph_config["trust-domain"], - level=config.params["level"], - ) - - suffix = _run_task_suffix() - - for task in tasks: - payload = task["task"].get("payload", {}) - command = payload.get("command") or [""] - - main_command = command[0] if isinstance(command[0], str) else "" - run_task = main_command.endswith("run-task") - - for cache in payload.get("cache", {}).get( - "task-reference", payload.get("cache", {}) - ): - if not cache.startswith(cache_prefix): - raise Exception( - "{} is using a cache ({}) which is not appropriate " - "for its trust-domain and level. It should start with {}.".format( - task["label"], cache, cache_prefix - ) - ) - - cache = cache[len(cache_prefix) :] - - if not re_reserved_caches.match(cache): - continue - - if not run_task: - raise Exception( - f"{task['label']} is using a cache ({cache}) reserved for run-task " - "change the task to use run-task or use a different " - "cache name" - ) - - if suffix not in cache: - raise Exception( - f"{task['label']} is using a cache ({cache}) reserved for run-task " - "but the cache name is not dependent on the contents " - "of run-task; change the cache name to conform to the " - "naming requirements" - ) - - yield task diff --git a/src/taskgraph/util/verify.py b/src/taskgraph/util/verify.py index b4070ae8b..594388019 100644 --- a/src/taskgraph/util/verify.py +++ b/src/taskgraph/util/verify.py @@ -4,15 +4,18 @@ import logging +import re import sys import warnings from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import Callable, Union +from taskgraph import MAX_DEPENDENCIES from taskgraph.config import GraphConfig from taskgraph.parameters import Parameters from taskgraph.taskgraph import TaskGraph +from taskgraph.transforms.task import run_task_suffix from taskgraph.util.attributes import match_run_on_projects from taskgraph.util.treeherder import join_symbol @@ -301,6 +304,133 @@ def verify_toolchain_alias(task, taskgraph, scratch_pad, graph_config, parameter scratch_pad[key] = task.label +RE_RESERVED_CACHES = re.compile(r"^(checkouts|tooltool-cache)", re.VERBOSE) + + +@verifications.add("full_task_graph") +def verify_run_task_caches(task, taskgraph, scratch_pad, graph_config, parameters): + """Audit for caches requiring run-task. + + run-task manages caches in certain ways. If a cache managed by run-task + is used by a non run-task task, it could cause problems. So we audit for + that and make sure certain cache names are exclusive to run-task. + + IF YOU ARE TEMPTED TO MAKE EXCLUSIONS TO THIS POLICY, YOU ARE LIKELY + CONTRIBUTING TECHNICAL DEBT AND WILL HAVE TO SOLVE MANY OF THE PROBLEMS + THAT RUN-TASK ALREADY SOLVES. THINK LONG AND HARD BEFORE DOING THAT. + """ + if task is None: + return + + cache_prefix = "{trust_domain}-level-{level}-".format( + trust_domain=graph_config["trust-domain"], + level=parameters["level"], + ) + + suffix = run_task_suffix() + + payload = task.task.get("payload", {}) + command = payload.get("command") or [""] + + main_command = command[0] if isinstance(command[0], str) else "" + run_task = main_command.endswith("run-task") + + for cache in payload.get("cache", {}).get( + "task-reference", payload.get("cache", {}) + ): + if not cache.startswith(cache_prefix): + raise Exception( + f"{task.label} is using a cache ({cache}) which is not appropriate " + f"for its trust-domain and level. It should start with {cache_prefix}." + ) + + cache = cache[len(cache_prefix) :] + + if not RE_RESERVED_CACHES.match(cache): + continue + + if not run_task: + raise Exception( + f"{task['label']} is using a cache ({cache}) reserved for run-task " + "change the task to use run-task or use a different " + "cache name" + ) + + if suffix not in cache: + raise Exception( + f"{task['label']} is using a cache ({cache}) reserved for run-task " + "but the cache name is not dependent on the contents " + "of run-task; change the cache name to conform to the " + "naming requirements" + ) + + +@verifications.add("full_task_graph") +def verify_task_identifiers(task, taskgraph, scratch_pad, graph_config, parameters): + """Ensures that all tasks have well defined identifiers: + ``^[a-zA-Z0-9_-]{1,38}$`` + """ + if task is None: + return + + e = re.compile("^[a-zA-Z0-9_-]{1,38}$") + for attrib in ("workerType", "provisionerId"): + if not e.match(task.task[attrib]): + raise Exception( + f"task {task.label}.{attrib} is not a valid identifier: {task.task[attrib]}" + ) + + +@verifications.add("full_task_graph") +def verify_task_dependencies(task, taskgraph, scratch_pad, graph_config, parameters): + """Ensures that tasks don't have more than 100 dependencies.""" + if task is None: + return + + number_of_dependencies = ( + len(task.dependencies) + len(task.if_dependencies) + len(task.soft_dependencies) + ) + if number_of_dependencies > MAX_DEPENDENCIES: + raise Exception( + f"task {task.label} has too many dependencies ({number_of_dependencies} > {MAX_DEPENDENCIES})" + ) + + +@verifications.add("full_task_graph") +def verify_caches_are_volumes(task, taskgraph, scratch_pad, graph_config, parameters): + """Ensures that all cache paths are defined as volumes. + + Caches and volumes are the only filesystem locations whose content + isn't defined by the Docker image itself. Some caches are optional + depending on the task environment. We want paths that are potentially + caches to have as similar behavior regardless of whether a cache is + used. To help enforce this, we require that all paths used as caches + to be declared as Docker volumes. This check won't catch all offenders. + But it is better than nothing. + """ + if task is None: + return + + taskdef = task.task + if taskdef.get("worker", {}).get("implementation") != "docker-worker": + return + + volumes = set(taskdef["worker"]["volumes"]) + paths = {c["mount-point"] for c in taskdef["worker"].get("caches", [])} + missing = paths - volumes + + if missing: + raise Exception( + "task {} (image {}) has caches that are not declared as " + "Docker volumes: {} " + "(have you added them as VOLUMEs in the Dockerfile?)".format( + task.label, + taskdef["worker"]["docker-image"], + ", ".join(sorted(missing)), + ) + ) + + @verifications.add("optimized_task_graph") def verify_always_optimized(task, taskgraph, scratch_pad, graph_config, parameters): """ diff --git a/test/test_optimize.py b/test/test_optimize.py index af9924f7e..bfc2e9709 100644 --- a/test/test_optimize.py +++ b/test/test_optimize.py @@ -410,8 +410,10 @@ def test_replace_tasks( {}, make_opt_graph( make_task("t1", task_id="tid1", dependencies={}), - make_task("t2", task_id="tid2", dependencies={"tid1"}), - make_task("t3", task_id="tid3", dependencies={"tid1", "tid2"}), + make_task("t2", task_id="tid2", dependencies={"dep": "tid1"}), + make_task( + "t3", task_id="tid3", dependencies={"dep": "tid2", "dep2": "tid1"} + ), ("tid3", "tid2", "dep"), ("tid3", "tid1", "dep2"), ("tid2", "tid1", "dep"), @@ -438,7 +440,11 @@ def test_replace_tasks( "label_to_taskid": {"t1": "e1", "t2": "e2"}, }, # expectations - make_opt_graph(make_task("t3", task_id="tid1", dependencies={"e1", "e2"})), + make_opt_graph( + make_task( + "t3", task_id="tid1", dependencies={"dep": "e2", "dep2": "e1"} + ) + ), {"t1": "e1", "t2": "e2", "t3": "tid1"}, id="replaced", ), diff --git a/test/test_transforms_task.py b/test/test_transforms_task.py index 8568d6d5e..3e049ba72 100644 --- a/test/test_transforms_task.py +++ b/test/test_transforms_task.py @@ -2,7 +2,6 @@ Tests for the 'task' transforms. """ -from contextlib import nullcontext as does_not_raise from copy import deepcopy from pathlib import Path from pprint import pprint @@ -724,86 +723,6 @@ def test_treeherder_defaults(run_transform, graph_config, kind, task_def, expect assert task_dict["task"].get("extra", {}).get("treeherder", {}) == expected_th -@pytest.mark.parametrize( - "test_task, expectation", - ( - ( - { - "label": "task1", - "dependencies": ["dependency"] * 2, - "soft-dependencies": ["dependency"] * 1, - "if-dependencies": ["dependency"] * 4, - }, - does_not_raise(), - ), - ( - { - "label": "task2", - "dependencies": ["dependency"] * 97, - "soft-dependencies": ["dependency"] * 1, - "if-dependencies": ["dependency"] * 1, - }, - does_not_raise(), - ), - ( - { - "label": "task3", - "dependencies": ["dependency"] * 9998, - "soft-dependencies": ["dependency"] * 1, - "if-dependencies": ["dependency"] * 1, - }, - pytest.raises(Exception), - ), - ( - { - "label": "task3", - "dependencies": ["dependency"] * 9999, - "soft-dependencies": ["dependency"], - "if-dependencies": ["dependency"], - }, - pytest.raises(Exception), - ), - ), -) -def test_check_task_dependencies(graph_config, test_task, expectation): - params = FakeParameters( - { - "base_repository": "git@github.com://github.com/mozilla/example.git", - "build_date": 0, - "build_number": 1, - "head_repository": "git@github.com://github.com/mozilla/example.git", - "head_rev": "abcdef", - "head_ref": "default", - "level": "1", - "moz_build_date": 0, - "next_version": "1.0.1", - "owner": "some-owner", - "project": "some-project", - "pushlog_id": 1, - "repository_type": "git", - "target_tasks_method": "test_method", - "tasks_for": "github-pull-request", - "try_mode": None, - "version": "1.0.0", - }, - ) - - transform_config = TransformConfig( - "check_task_dependencies", - str(here), - {}, - params, - {}, - graph_config, - write_artifacts=False, - ) - - with expectation: - assert ( - len(list(task.check_task_dependencies(transform_config, [test_task]))) == 1 - ) - - @pytest.mark.parametrize( "deadline_after, test_task", ( diff --git a/test/test_util_verify.py b/test/test_util_verify.py index fc1c4714b..3218daf09 100644 --- a/test/test_util_verify.py +++ b/test/test_util_verify.py @@ -2,12 +2,14 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. +from contextlib import nullcontext as does_not_raise from functools import partial from itertools import chain import pytest from pytest_taskgraph import make_graph, make_task +from taskgraph import MAX_DEPENDENCIES from taskgraph.task import Task from taskgraph.util.treeherder import split_symbol from taskgraph.util.verify import ( @@ -134,7 +136,7 @@ def make_task_treeherder(label, symbol, platform="linux/opt"): @pytest.mark.parametrize( - "name,graph,exception", + "name,graph,expectation", ( pytest.param( "verify_task_graph_symbol", @@ -145,7 +147,7 @@ def make_task_treeherder(label, symbol, platform="linux/opt"): make_task_treeherder("d", "M(2)", "linux/opt"), make_task_treeherder("e", "1", "linux/opt"), ), - None, + does_not_raise(), id="task_graph_symbol: valid", ), pytest.param( @@ -154,7 +156,7 @@ def make_task_treeherder(label, symbol, platform="linux/opt"): make_task_treeherder("a", "M(1)"), make_task_treeherder("b", "M(1)"), ), - Exception, + pytest.raises(Exception), id="task_graph_symbol: conflicting symbol", ), pytest.param( @@ -169,7 +171,7 @@ def make_task_treeherder(label, symbol, platform="linux/opt"): }, ), ), - Exception, + pytest.raises(Exception), id="task_graph_symbol: too many collections", ), pytest.param( @@ -182,7 +184,7 @@ def make_task_treeherder(label, symbol, platform="linux/opt"): }, ), ), - None, + does_not_raise(), id="routes_notfication_filter: valid", ), pytest.param( @@ -195,7 +197,7 @@ def make_task_treeherder(label, symbol, platform="linux/opt"): }, ), ), - Exception, + pytest.raises(Exception), id="routes_notfication_filter: invalid", ), pytest.param( @@ -206,7 +208,7 @@ def make_task_treeherder(label, symbol, platform="linux/opt"): task_def={"routes": ["notify.email.default@email.address.on-any"]}, ), ), - DeprecationWarning, + pytest.raises(DeprecationWarning), id="routes_notfication_filter: deprecated", ), pytest.param( @@ -221,16 +223,52 @@ def make_task_treeherder(label, symbol, platform="linux/opt"): }, ), ), - Exception, + pytest.raises(Exception), id="verify_index_route: invalid slash in route", ), + pytest.param( + "verify_task_dependencies", + make_graph( + make_task( + "task2", + dependencies=["dependency"] * (MAX_DEPENDENCIES - 3), + soft_dependencies=["dependency"] * 1, + if_dependencies=["dependency"] * 1, + ) + ), + does_not_raise(), + id="dependencies under limit", + ), + pytest.param( + "verify_task_dependencies", + make_graph( + make_task( + "task3", + dependencies=["dependency"] * (MAX_DEPENDENCIES - 2), + soft_dependencies=["dependency"] * 1, + if_dependencies=["dependency"] * 1, + ) + ), + does_not_raise(), + id="dependencies at limit", + ), + pytest.param( + "verify_task_dependencies", + make_graph( + make_task( + "task3", + dependencies=["dependency"] * (MAX_DEPENDENCIES - 1), + soft_dependencies=["dependency"] * 1, + if_dependencies=["dependency"] * 1, + ) + ), + pytest.raises(Exception), + id="dependencies over limit", + ), ), ) @pytest.mark.filterwarnings("error") -def test_verification(run_verification, name, graph, exception): +def test_verification(run_verification, name, graph, expectation): func = partial(run_verification, name, graph=graph) - if exception: - with pytest.raises(exception): - func() - else: - func() # no exceptions + with expectation: + func()