Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ def fake_loader(kind, path, config, parameters, loaded_tasks, write_artifacts):
"i": i,
"metadata": {"name": f"{kind}-t-{i}"},
"deadline": "soon",
"workerType": "linux",
"provisionerId": "prov",
},
"dependencies": dependencies,
"if-dependencies": [],
"soft-dependencies": [],
}
if "task-defaults" in config:
task = merge(config["task-defaults"], task)
Expand Down Expand Up @@ -250,6 +254,7 @@ def make_task(
task_id=None,
dependencies=None,
if_dependencies=None,
soft_dependencies=None,
attributes=None,
):
task_def = task_def or {
Expand All @@ -258,15 +263,23 @@ def make_task(
}
task = Task(
attributes=attributes or {},
dependencies=dependencies or {},
if_dependencies=if_dependencies or [],
soft_dependencies=soft_dependencies or [],
kind=kind,
label=label,
task=task_def,
)
task.optimization = optimization
task.task_id = task_id
if dependencies is not None:
task.task["dependencies"] = sorted(dependencies)
# The dependencies dict is converted from a dict to a list during the
# optimization phase. This is pretty confusing and means this utility
# might break assumptions about the format of 'dependencies'.
if isinstance(dependencies, dict):
task.task["dependencies"] = sorted(dependencies.values())
else:
task.task["dependencies"] = sorted(dependencies)
return task


Expand Down
1 change: 1 addition & 0 deletions src/taskgraph/optimize/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@ def get_subgraph(
)
deps = task.task.setdefault("dependencies", [])
deps.extend(sorted(named_task_dependencies.values()))
task.dependencies.update(named_task_dependencies)
tasks_by_taskid[task.task_id] = task

# resolve edges to taskIds
Expand Down
140 changes: 2 additions & 138 deletions src/taskgraph/transforms/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import functools
import hashlib
import os
import re
import time
from copy import deepcopy
from dataclasses import dataclass
Expand All @@ -20,7 +19,6 @@

from voluptuous import All, Any, Extra, NotIn, Optional, Required

from taskgraph import MAX_DEPENDENCIES
from taskgraph.transforms.base import TransformSequence
from taskgraph.util.hash import hash_path
from taskgraph.util.keyed_by import evaluate_keyed_by
Expand All @@ -44,7 +42,7 @@


@functools.cache
def _run_task_suffix():
def run_task_suffix():
"""String to append to cache names under control of run-task."""
return hash_path(RUN_TASK)[0:20]

Expand Down Expand Up @@ -715,7 +713,7 @@ def build_docker_worker_payload(config, task, task_def):
cache_version = "v3"

if run_task:
suffix = f"{cache_version}-{_run_task_suffix()}"
suffix = f"{cache_version}-{run_task_suffix()}"

if out_of_tree_image:
name_hash = hashlib.sha256(
Expand Down Expand Up @@ -763,8 +761,6 @@ def build_docker_worker_payload(config, task, task_def):
if capabilities:
payload["capabilities"] = capabilities

check_caches_are_volumes(task)


@payload_builder(
"generic-worker",
Expand Down Expand Up @@ -1457,135 +1453,3 @@ def chain_of_trust(config, tasks):
"task-reference": "<docker-image>"
}
yield task


@transforms.add
def check_task_identifiers(config, tasks):
"""Ensures that all tasks have well defined identifiers:
``^[a-zA-Z0-9_-]{1,38}$``
"""
e = re.compile("^[a-zA-Z0-9_-]{1,38}$")
for task in tasks:
for attrib in ("workerType", "provisionerId"):
if not e.match(task["task"][attrib]):
raise Exception(
"task {}.{} is not a valid identifier: {}".format(
task["label"], attrib, task["task"][attrib]
)
)
yield task


@transforms.add
def check_task_dependencies(config, tasks):
"""Ensures that tasks don't have more than 100 dependencies."""
for task in tasks:
number_of_dependencies = (
len(task["dependencies"])
+ len(task["if-dependencies"])
+ len(task["soft-dependencies"])
)
if number_of_dependencies > MAX_DEPENDENCIES:
raise Exception(
"task {}/{} has too many dependencies ({} > {})".format(
config.kind,
task["label"],
number_of_dependencies,
MAX_DEPENDENCIES,
)
)
yield task


def check_caches_are_volumes(task):
"""Ensures that all cache paths are defined as volumes.

Caches and volumes are the only filesystem locations whose content
isn't defined by the Docker image itself. Some caches are optional
depending on the task environment. We want paths that are potentially
caches to have as similar behavior regardless of whether a cache is
used. To help enforce this, we require that all paths used as caches
to be declared as Docker volumes. This check won't catch all offenders.
But it is better than nothing.
"""
volumes = set(task["worker"]["volumes"])
paths = {c["mount-point"] for c in task["worker"].get("caches", [])}
missing = paths - volumes

if not missing:
return

raise Exception(
"task {} (image {}) has caches that are not declared as "
"Docker volumes: {} "
"(have you added them as VOLUMEs in the Dockerfile?)".format(
task["label"], task["worker"]["docker-image"], ", ".join(sorted(missing))
)
)


@transforms.add
def check_run_task_caches(config, tasks):
"""Audit for caches requiring run-task.

run-task manages caches in certain ways. If a cache managed by run-task
is used by a non run-task task, it could cause problems. So we audit for
that and make sure certain cache names are exclusive to run-task.

IF YOU ARE TEMPTED TO MAKE EXCLUSIONS TO THIS POLICY, YOU ARE LIKELY
CONTRIBUTING TECHNICAL DEBT AND WILL HAVE TO SOLVE MANY OF THE PROBLEMS
THAT RUN-TASK ALREADY SOLVES. THINK LONG AND HARD BEFORE DOING THAT.
"""
re_reserved_caches = re.compile(
"""^
(checkouts|tooltool-cache)
""",
re.VERBOSE,
)

cache_prefix = "{trust_domain}-level-{level}-".format(
trust_domain=config.graph_config["trust-domain"],
level=config.params["level"],
)

suffix = _run_task_suffix()

for task in tasks:
payload = task["task"].get("payload", {})
command = payload.get("command") or [""]

main_command = command[0] if isinstance(command[0], str) else ""
run_task = main_command.endswith("run-task")

for cache in payload.get("cache", {}).get(
"task-reference", payload.get("cache", {})
):
if not cache.startswith(cache_prefix):
raise Exception(
"{} is using a cache ({}) which is not appropriate "
"for its trust-domain and level. It should start with {}.".format(
task["label"], cache, cache_prefix
)
)

cache = cache[len(cache_prefix) :]

if not re_reserved_caches.match(cache):
continue

if not run_task:
raise Exception(
f"{task['label']} is using a cache ({cache}) reserved for run-task "
"change the task to use run-task or use a different "
"cache name"
)

if suffix not in cache:
raise Exception(
f"{task['label']} is using a cache ({cache}) reserved for run-task "
"but the cache name is not dependent on the contents "
"of run-task; change the cache name to conform to the "
"naming requirements"
)

yield task
130 changes: 130 additions & 0 deletions src/taskgraph/util/verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@


import logging
import re
import sys
import warnings
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Callable, Union

from taskgraph import MAX_DEPENDENCIES
from taskgraph.config import GraphConfig
from taskgraph.parameters import Parameters
from taskgraph.taskgraph import TaskGraph
from taskgraph.transforms.task import run_task_suffix
from taskgraph.util.attributes import match_run_on_projects
from taskgraph.util.treeherder import join_symbol

Expand Down Expand Up @@ -301,6 +304,133 @@ def verify_toolchain_alias(task, taskgraph, scratch_pad, graph_config, parameter
scratch_pad[key] = task.label


RE_RESERVED_CACHES = re.compile(r"^(checkouts|tooltool-cache)", re.VERBOSE)


@verifications.add("full_task_graph")
def verify_run_task_caches(task, taskgraph, scratch_pad, graph_config, parameters):
"""Audit for caches requiring run-task.

run-task manages caches in certain ways. If a cache managed by run-task
is used by a non run-task task, it could cause problems. So we audit for
that and make sure certain cache names are exclusive to run-task.

IF YOU ARE TEMPTED TO MAKE EXCLUSIONS TO THIS POLICY, YOU ARE LIKELY
CONTRIBUTING TECHNICAL DEBT AND WILL HAVE TO SOLVE MANY OF THE PROBLEMS
THAT RUN-TASK ALREADY SOLVES. THINK LONG AND HARD BEFORE DOING THAT.
"""
if task is None:
return

cache_prefix = "{trust_domain}-level-{level}-".format(
trust_domain=graph_config["trust-domain"],
level=parameters["level"],
)

suffix = run_task_suffix()

payload = task.task.get("payload", {})
command = payload.get("command") or [""]

main_command = command[0] if isinstance(command[0], str) else ""
run_task = main_command.endswith("run-task")

for cache in payload.get("cache", {}).get(
"task-reference", payload.get("cache", {})
):
if not cache.startswith(cache_prefix):
raise Exception(
f"{task.label} is using a cache ({cache}) which is not appropriate "
f"for its trust-domain and level. It should start with {cache_prefix}."
)

cache = cache[len(cache_prefix) :]

if not RE_RESERVED_CACHES.match(cache):
continue

if not run_task:
raise Exception(
f"{task['label']} is using a cache ({cache}) reserved for run-task "
"change the task to use run-task or use a different "
"cache name"
)

if suffix not in cache:
raise Exception(
f"{task['label']} is using a cache ({cache}) reserved for run-task "
"but the cache name is not dependent on the contents "
"of run-task; change the cache name to conform to the "
"naming requirements"
)


@verifications.add("full_task_graph")
def verify_task_identifiers(task, taskgraph, scratch_pad, graph_config, parameters):
"""Ensures that all tasks have well defined identifiers:
``^[a-zA-Z0-9_-]{1,38}$``
"""
if task is None:
return

e = re.compile("^[a-zA-Z0-9_-]{1,38}$")
for attrib in ("workerType", "provisionerId"):
if not e.match(task.task[attrib]):
raise Exception(
f"task {task.label}.{attrib} is not a valid identifier: {task.task[attrib]}"
)


@verifications.add("full_task_graph")
def verify_task_dependencies(task, taskgraph, scratch_pad, graph_config, parameters):
"""Ensures that tasks don't have more than 100 dependencies."""
if task is None:
return

number_of_dependencies = (
len(task.dependencies) + len(task.if_dependencies) + len(task.soft_dependencies)
)
if number_of_dependencies > MAX_DEPENDENCIES:
raise Exception(
f"task {task.label} has too many dependencies ({number_of_dependencies} > {MAX_DEPENDENCIES})"
)


@verifications.add("full_task_graph")
def verify_caches_are_volumes(task, taskgraph, scratch_pad, graph_config, parameters):
"""Ensures that all cache paths are defined as volumes.

Caches and volumes are the only filesystem locations whose content
isn't defined by the Docker image itself. Some caches are optional
depending on the task environment. We want paths that are potentially
caches to have as similar behavior regardless of whether a cache is
used. To help enforce this, we require that all paths used as caches
to be declared as Docker volumes. This check won't catch all offenders.
But it is better than nothing.
"""
if task is None:
return

taskdef = task.task
if taskdef.get("worker", {}).get("implementation") != "docker-worker":
return

volumes = set(taskdef["worker"]["volumes"])
paths = {c["mount-point"] for c in taskdef["worker"].get("caches", [])}
missing = paths - volumes

if missing:
raise Exception(
"task {} (image {}) has caches that are not declared as "
"Docker volumes: {} "
"(have you added them as VOLUMEs in the Dockerfile?)".format(
task.label,
taskdef["worker"]["docker-image"],
", ".join(sorted(missing)),
)
)


@verifications.add("optimized_task_graph")
def verify_always_optimized(task, taskgraph, scratch_pad, graph_config, parameters):
"""
Expand Down
Loading
Loading