taskcluster · ahal · Oct 31, 2025 · Oct 30, 2025 · Oct 30, 2025
@@ -32,8 +32,12 @@ def fake_loader(kind, path, config, parameters, loaded_tasks, write_artifacts):
                 "i": i,
                 "metadata": {"name": f"{kind}-t-{i}"},
                 "deadline": "soon",
+                "workerType": "linux",
+                "provisionerId": "prov",
             },
             "dependencies": dependencies,
+            "if-dependencies": [],
+            "soft-dependencies": [],
         }
         if "task-defaults" in config:
             task = merge(config["task-defaults"], task)
@@ -250,6 +254,7 @@ def make_task(
     task_id=None,
     dependencies=None,
     if_dependencies=None,
+    soft_dependencies=None,
     attributes=None,
 ):
     task_def = task_def or {
@@ -258,15 +263,23 @@ def make_task(
     }
     task = Task(
         attributes=attributes or {},
+        dependencies=dependencies or {},
         if_dependencies=if_dependencies or [],
+        soft_dependencies=soft_dependencies or [],
         kind=kind,
         label=label,
         task=task_def,
     )
     task.optimization = optimization
     task.task_id = task_id
     if dependencies is not None:
-        task.task["dependencies"] = sorted(dependencies)
+        # The dependencies dict is converted from a dict to a list during the
+        # optimization phase. This is pretty confusing and means this utility
+        # might break assumptions about the format of 'dependencies'.
+        if isinstance(dependencies, dict):
+            task.task["dependencies"] = sorted(dependencies.values())
+        else:
+            task.task["dependencies"] = sorted(dependencies)
     return task
 
 

@@ -437,6 +437,7 @@ def get_subgraph(
         )
         deps = task.task.setdefault("dependencies", [])
         deps.extend(sorted(named_task_dependencies.values()))
+        task.dependencies.update(named_task_dependencies)
         tasks_by_taskid[task.task_id] = task
 
     # resolve edges to taskIds

@@ -11,7 +11,6 @@
 import functools
 import hashlib
 import os
-import re
 import time
 from copy import deepcopy
 from dataclasses import dataclass
@@ -20,7 +19,6 @@
 
 from voluptuous import All, Any, Extra, NotIn, Optional, Required
 
-from taskgraph import MAX_DEPENDENCIES
 from taskgraph.transforms.base import TransformSequence
 from taskgraph.util.hash import hash_path
 from taskgraph.util.keyed_by import evaluate_keyed_by
@@ -44,7 +42,7 @@
 
 
 @functools.cache
-def _run_task_suffix():
+def run_task_suffix():
     """String to append to cache names under control of run-task."""
     return hash_path(RUN_TASK)[0:20]
 
@@ -715,7 +713,7 @@ def build_docker_worker_payload(config, task, task_def):
         cache_version = "v3"
 
         if run_task:
-            suffix = f"{cache_version}-{_run_task_suffix()}"
+            suffix = f"{cache_version}-{run_task_suffix()}"
 
             if out_of_tree_image:
                 name_hash = hashlib.sha256(
@@ -763,8 +761,6 @@ def build_docker_worker_payload(config, task, task_def):
     if capabilities:
         payload["capabilities"] = capabilities
 
-    check_caches_are_volumes(task)
-
 
 @payload_builder(
     "generic-worker",
@@ -1457,135 +1453,3 @@ def chain_of_trust(config, tasks):
                     "task-reference": "<docker-image>"
                 }
         yield task
-
-
-@transforms.add
-def check_task_identifiers(config, tasks):
-    """Ensures that all tasks have well defined identifiers:
-    ``^[a-zA-Z0-9_-]{1,38}$``
-    """
-    e = re.compile("^[a-zA-Z0-9_-]{1,38}$")
-    for task in tasks:
-        for attrib in ("workerType", "provisionerId"):
-            if not e.match(task["task"][attrib]):
-                raise Exception(
-                    "task {}.{} is not a valid identifier: {}".format(
-                        task["label"], attrib, task["task"][attrib]
-                    )
-                )
-        yield task
-
-
-@transforms.add
-def check_task_dependencies(config, tasks):
-    """Ensures that tasks don't have more than 100 dependencies."""
-    for task in tasks:
-        number_of_dependencies = (
-            len(task["dependencies"])
-            + len(task["if-dependencies"])
-            + len(task["soft-dependencies"])
-        )
-        if number_of_dependencies > MAX_DEPENDENCIES:
-            raise Exception(
-                "task {}/{} has too many dependencies ({} > {})".format(
-                    config.kind,
-                    task["label"],
-                    number_of_dependencies,
-                    MAX_DEPENDENCIES,
-                )
-            )
-        yield task
-
-
-def check_caches_are_volumes(task):
-    """Ensures that all cache paths are defined as volumes.
-
-    Caches and volumes are the only filesystem locations whose content
-    isn't defined by the Docker image itself. Some caches are optional
-    depending on the task environment. We want paths that are potentially
-    caches to have as similar behavior regardless of whether a cache is
-    used. To help enforce this, we require that all paths used as caches
-    to be declared as Docker volumes. This check won't catch all offenders.
-    But it is better than nothing.
-    """
-    volumes = set(task["worker"]["volumes"])
-    paths = {c["mount-point"] for c in task["worker"].get("caches", [])}
-    missing = paths - volumes
-
-    if not missing:
-        return
-
-    raise Exception(
-        "task {} (image {}) has caches that are not declared as "
-        "Docker volumes: {} "
-        "(have you added them as VOLUMEs in the Dockerfile?)".format(
-            task["label"], task["worker"]["docker-image"], ", ".join(sorted(missing))
-        )
-    )
-
-
-@transforms.add
-def check_run_task_caches(config, tasks):
-    """Audit for caches requiring run-task.
-
-    run-task manages caches in certain ways. If a cache managed by run-task
-    is used by a non run-task task, it could cause problems. So we audit for
-    that and make sure certain cache names are exclusive to run-task.
-
-    IF YOU ARE TEMPTED TO MAKE EXCLUSIONS TO THIS POLICY, YOU ARE LIKELY
-    CONTRIBUTING TECHNICAL DEBT AND WILL HAVE TO SOLVE MANY OF THE PROBLEMS
-    THAT RUN-TASK ALREADY SOLVES. THINK LONG AND HARD BEFORE DOING THAT.
-    """
-    re_reserved_caches = re.compile(
-        """^
-        (checkouts|tooltool-cache)
-    """,
-        re.VERBOSE,
-    )
-
-    cache_prefix = "{trust_domain}-level-{level}-".format(
-        trust_domain=config.graph_config["trust-domain"],
-        level=config.params["level"],
-    )
-
-    suffix = _run_task_suffix()
-
-    for task in tasks:
-        payload = task["task"].get("payload", {})
-        command = payload.get("command") or [""]
-
-        main_command = command[0] if isinstance(command[0], str) else ""
-        run_task = main_command.endswith("run-task")
-
-        for cache in payload.get("cache", {}).get(
-            "task-reference", payload.get("cache", {})
-        ):
-            if not cache.startswith(cache_prefix):
-                raise Exception(
-                    "{} is using a cache ({}) which is not appropriate "
-                    "for its trust-domain and level. It should start with {}.".format(
-                        task["label"], cache, cache_prefix
-                    )
-                )
-
-            cache = cache[len(cache_prefix) :]
-
-            if not re_reserved_caches.match(cache):
-                continue
-
-            if not run_task:
-                raise Exception(
-                    f"{task['label']} is using a cache ({cache}) reserved for run-task "
-                    "change the task to use run-task or use a different "
-                    "cache name"
-                )
-
-            if suffix not in cache:
-                raise Exception(
-                    f"{task['label']} is using a cache ({cache}) reserved for run-task "
-                    "but the cache name is not dependent on the contents "
-                    "of run-task; change the cache name to conform to the "
-                    "naming requirements"
-                )
-
-        yield task
@@ -4,15 +4,18 @@
 
 
 import logging
+import re
 import sys
 import warnings
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from typing import Callable, Union
 
+from taskgraph import MAX_DEPENDENCIES
 from taskgraph.config import GraphConfig
 from taskgraph.parameters import Parameters
 from taskgraph.taskgraph import TaskGraph
+from taskgraph.transforms.task import run_task_suffix
 from taskgraph.util.attributes import match_run_on_projects
 from taskgraph.util.treeherder import join_symbol
 
@@ -301,6 +304,133 @@ def verify_toolchain_alias(task, taskgraph, scratch_pad, graph_config, parameter
                 scratch_pad[key] = task.label
 
 
+RE_RESERVED_CACHES = re.compile(r"^(checkouts|tooltool-cache)", re.VERBOSE)
+
+
+@verifications.add("full_task_graph")
+def verify_run_task_caches(task, taskgraph, scratch_pad, graph_config, parameters):
+    """Audit for caches requiring run-task.
+
+    run-task manages caches in certain ways. If a cache managed by run-task
+    is used by a non run-task task, it could cause problems. So we audit for
+    that and make sure certain cache names are exclusive to run-task.
+
+    IF YOU ARE TEMPTED TO MAKE EXCLUSIONS TO THIS POLICY, YOU ARE LIKELY
+    CONTRIBUTING TECHNICAL DEBT AND WILL HAVE TO SOLVE MANY OF THE PROBLEMS
+    THAT RUN-TASK ALREADY SOLVES. THINK LONG AND HARD BEFORE DOING THAT.
+    """
+    if task is None:
+        return
+
+    cache_prefix = "{trust_domain}-level-{level}-".format(
+        trust_domain=graph_config["trust-domain"],
+        level=parameters["level"],
+    )
+
+    suffix = run_task_suffix()
+
+    payload = task.task.get("payload", {})
+    command = payload.get("command") or [""]
+
+    main_command = command[0] if isinstance(command[0], str) else ""
+    run_task = main_command.endswith("run-task")
+
+    for cache in payload.get("cache", {}).get(
+        "task-reference", payload.get("cache", {})
+    ):
+        if not cache.startswith(cache_prefix):
+            raise Exception(
+                f"{task.label} is using a cache ({cache}) which is not appropriate "
+                f"for its trust-domain and level. It should start with {cache_prefix}."
+            )
+
+        cache = cache[len(cache_prefix) :]
+
+        if not RE_RESERVED_CACHES.match(cache):
+            continue
+
+        if not run_task:
+            raise Exception(
+                f"{task['label']} is using a cache ({cache}) reserved for run-task "
+                "change the task to use run-task or use a different "
+                "cache name"
+            )
+
+        if suffix not in cache:
+            raise Exception(
+                f"{task['label']} is using a cache ({cache}) reserved for run-task "
+                "but the cache name is not dependent on the contents "
+                "of run-task; change the cache name to conform to the "
+                "naming requirements"
+            )
+
+
+@verifications.add("full_task_graph")
+def verify_task_identifiers(task, taskgraph, scratch_pad, graph_config, parameters):
+    """Ensures that all tasks have well defined identifiers:
+    ``^[a-zA-Z0-9_-]{1,38}$``
+    """
+    if task is None:
+        return
+
+    e = re.compile("^[a-zA-Z0-9_-]{1,38}$")
+    for attrib in ("workerType", "provisionerId"):
+        if not e.match(task.task[attrib]):
+            raise Exception(
+                f"task {task.label}.{attrib} is not a valid identifier: {task.task[attrib]}"
+            )
+
+
+@verifications.add("full_task_graph")
+def verify_task_dependencies(task, taskgraph, scratch_pad, graph_config, parameters):
+    """Ensures that tasks don't have more than 100 dependencies."""
+    if task is None:
+        return
+
+    number_of_dependencies = (
+        len(task.dependencies) + len(task.if_dependencies) + len(task.soft_dependencies)
+    )
+    if number_of_dependencies > MAX_DEPENDENCIES:
+        raise Exception(
+            f"task {task.label} has too many dependencies ({number_of_dependencies} > {MAX_DEPENDENCIES})"
+        )
+
+
+@verifications.add("full_task_graph")
+def verify_caches_are_volumes(task, taskgraph, scratch_pad, graph_config, parameters):
+    """Ensures that all cache paths are defined as volumes.
+
+    Caches and volumes are the only filesystem locations whose content
+    isn't defined by the Docker image itself. Some caches are optional
+    depending on the task environment. We want paths that are potentially
+    caches to have as similar behavior regardless of whether a cache is
+    used. To help enforce this, we require that all paths used as caches
+    to be declared as Docker volumes. This check won't catch all offenders.
+    But it is better than nothing.
+    """
+    if task is None:
+        return
+
+    taskdef = task.task
+    if taskdef.get("worker", {}).get("implementation") != "docker-worker":
+        return
+
+    volumes = set(taskdef["worker"]["volumes"])
+    paths = {c["mount-point"] for c in taskdef["worker"].get("caches", [])}
+    missing = paths - volumes
+
+    if missing:
+        raise Exception(
+            "task {} (image {}) has caches that are not declared as "
+            "Docker volumes: {} "
+            "(have you added them as VOLUMEs in the Dockerfile?)".format(
+                task.label,
+                taskdef["worker"]["docker-image"],
+                ", ".join(sorted(missing)),
+            )
+        )
+
+
 @verifications.add("optimized_task_graph")
 def verify_always_optimized(task, taskgraph, scratch_pad, graph_config, parameters):
     """