From 9d549647efbb3c39098f754e75aa042645d00941 Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Mon, 30 Dec 2024 16:27:11 -0500 Subject: [PATCH 1/6] feat: add caches for common package managers in run transforms --- src/taskgraph/transforms/run/common.py | 39 +++++++++++++--- src/taskgraph/transforms/run/run_task.py | 6 ++- test/test_transforms_run.py | 4 +- test/test_transforms_run_run_task.py | 59 ++++++++++++++++++++++-- 4 files changed, 95 insertions(+), 13 deletions(-) diff --git a/src/taskgraph/transforms/run/common.py b/src/taskgraph/transforms/run/common.py index 30bf5ce4a..7a92fbd80 100644 --- a/src/taskgraph/transforms/run/common.py +++ b/src/taskgraph/transforms/run/common.py @@ -13,6 +13,13 @@ from taskgraph.util import path from taskgraph.util.taskcluster import get_artifact_prefix +CACHES = { + "cargo": {"env": "CARGO_HOME"}, + "npm": {"env": "npm_config_cache"}, + "pip": {"env": "PIP_CACHE_DIR"}, + "uv": {"env": "UV_CACHE_DIR"}, +} + def get_vcsdir_name(os): if os == "windows": @@ -32,10 +39,13 @@ def add_cache(task, taskdesc, name, mount_point, skip_untrusted=False): skip_untrusted (bool): Whether cache is used in untrusted environments (default: False). Only applies to docker-worker. """ - if not task["run"].get("use-caches", True): + worker = task["worker"] + if worker["implementation"] not in ("docker-worker", "generic-worker"): + # caches support not implemented return - worker = task["worker"] + if not task["run"].get("use-caches", True): + return if worker["implementation"] == "docker-worker": taskdesc["worker"].setdefault("caches", []).append( @@ -55,10 +65,6 @@ def add_cache(task, taskdesc, name, mount_point, skip_untrusted=False): } ) - else: - # Caches not implemented - pass - def add_artifacts(config, task, taskdesc, path): taskdesc["worker"].setdefault("artifacts", []).append( @@ -166,3 +172,24 @@ def support_vcs_checkout(config, task, taskdesc, repo_configs, sparse=False): taskdesc["worker"]["taskcluster-proxy"] = True return vcsdir + + +def support_caches(task, taskdesc): + """Add caches for common tools.""" + worker = task["worker"] + workdir = task["run"].get("workdir") + base_cache_dir = ".task-cache" + if worker["implementation"] == "docker-worker": + workdir = workdir or "/builds/worker" + base_cache_dir = path.join(workdir, base_cache_dir) + + for name, config in CACHES.items(): + cache_dir = f"{base_cache_dir}/{name}" + + if config.get("env"): + env = taskdesc["worker"].setdefault("env", {}) + # If cache_dir is already absolute, the `.join` call returns it as + # is. In that case, {task_workdir} will get interpolated by + # run-task. + env[config["env"]] = path.join("{task_workdir}", cache_dir) + add_cache(task, taskdesc, name, cache_dir) diff --git a/src/taskgraph/transforms/run/run_task.py b/src/taskgraph/transforms/run/run_task.py index 124242ae2..54f47012b 100644 --- a/src/taskgraph/transforms/run/run_task.py +++ b/src/taskgraph/transforms/run/run_task.py @@ -11,7 +11,10 @@ from voluptuous import Any, Optional, Required from taskgraph.transforms.run import run_task_using -from taskgraph.transforms.run.common import support_vcs_checkout +from taskgraph.transforms.run.common import ( + support_caches, + support_vcs_checkout, +) from taskgraph.transforms.task import taskref_or_string from taskgraph.util import path, taskcluster from taskgraph.util.schema import Schema @@ -103,6 +106,7 @@ def common_setup(config, task, taskdesc, command): if "cwd" in run: command.extend(("--task-cwd", run["cwd"])) + support_caches(task, taskdesc) taskdesc["worker"].setdefault("env", {})["MOZ_SCM_LEVEL"] = config.params["level"] diff --git a/test/test_transforms_run.py b/test/test_transforms_run.py index ee36e9ee2..c892c52d7 100644 --- a/test/test_transforms_run.py +++ b/test/test_transforms_run.py @@ -86,7 +86,7 @@ def inner(task): def test_caches_docker_worker(run_caches): - assert run_caches({"worker-type": "t-linux"}) == [ + assert run_caches({"run": {"use-caches": True}, "worker-type": "t-linux"}) == [ { "mount-point": "/cache1", "name": "cache1", @@ -103,7 +103,7 @@ def test_caches_docker_worker(run_caches): def test_caches_generic_worker(run_caches): - assert run_caches({"worker-type": "t-win"}) == [ + assert run_caches({"run": {"use-caches": True}, "worker-type": "t-win"}) == [ {"cache-name": "cache1", "directory": "/cache1"}, {"cache-name": "cache2", "directory": "/cache2"}, ] diff --git a/test/test_transforms_run_run_task.py b/test/test_transforms_run_run_task.py index bce45e111..a76dfc98a 100644 --- a/test/test_transforms_run_run_task.py +++ b/test/test_transforms_run_run_task.py @@ -57,7 +57,31 @@ def assert_docker_worker(task): "name": "checkouts", "skip-untrusted": False, "type": "persistent", - } + }, + { + "mount-point": "/builds/worker/.task-cache/cargo", + "name": "cargo", + "skip-untrusted": False, + "type": "persistent", + }, + { + "mount-point": "/builds/worker/.task-cache/npm", + "name": "npm", + "skip-untrusted": False, + "type": "persistent", + }, + { + "mount-point": "/builds/worker/.task-cache/pip", + "name": "pip", + "skip-untrusted": False, + "type": "persistent", + }, + { + "mount-point": "/builds/worker/.task-cache/uv", + "name": "uv", + "skip-untrusted": False, + "type": "persistent", + }, ], "command": [ "/usr/local/bin/run-task", @@ -68,6 +92,7 @@ def assert_docker_worker(task): "echo hello world", ], "env": { + "CARGO_HOME": "/builds/worker/.task-cache/cargo", "CI_BASE_REPOSITORY": "http://hg.example.com", "CI_HEAD_REF": "default", "CI_HEAD_REPOSITORY": "http://hg.example.com", @@ -75,8 +100,11 @@ def assert_docker_worker(task): "CI_REPOSITORY_TYPE": "hg", "HG_STORE_PATH": "/builds/worker/checkouts/hg-store", "MOZ_SCM_LEVEL": "1", + "PIP_CACHE_DIR": "/builds/worker/.task-cache/pip", "REPOSITORIES": '{"ci": "Taskgraph"}', + "UV_CACHE_DIR": "/builds/worker/.task-cache/uv", "VCS_PATH": "/builds/worker/checkouts/vcs", + "npm_config_cache": "/builds/worker/.task-cache/npm", }, "implementation": "docker-worker", "os": "linux", @@ -103,6 +131,7 @@ def assert_generic_worker(task): 'world"' ], "env": { + "CARGO_HOME": "{task_workdir}/.task-cache/cargo", "CI_BASE_REPOSITORY": "http://hg.example.com", "CI_HEAD_REF": "default", "CI_HEAD_REPOSITORY": "http://hg.example.com", @@ -110,15 +139,37 @@ def assert_generic_worker(task): "CI_REPOSITORY_TYPE": "hg", "HG_STORE_PATH": "y:/hg-shared", "MOZ_SCM_LEVEL": "1", + "PIP_CACHE_DIR": "{task_workdir}/.task-cache/pip", "REPOSITORIES": '{"ci": "Taskgraph"}', + "UV_CACHE_DIR": "{task_workdir}/.task-cache/uv", "VCS_PATH": "{task_workdir}/build/src", + "npm_config_cache": "{task_workdir}/.task-cache/npm", }, "implementation": "generic-worker", "mounts": [ - {"cache-name": "checkouts", "directory": "build"}, + { + "cache-name": "checkouts", + "directory": "build", + }, + { + "cache-name": "cargo", + "directory": ".task-cache/cargo", + }, + { + "cache-name": "npm", + "directory": ".task-cache/npm", + }, + { + "cache-name": "pip", + "directory": ".task-cache/pip", + }, + { + "cache-name": "uv", + "directory": ".task-cache/uv", + }, { "content": { - "url": "https://tc-tests.localhost/api/queue/v1/task//artifacts/public/run-task" # noqa + "url": "https://tc-tests.localhost/api/queue/v1/task//artifacts/public/run-task" }, "file": "./run-task", }, @@ -172,7 +223,7 @@ def assert_run_task_command_generic_worker(task): "task", ( pytest.param( - {}, + {"worker": {"os": "linux"}}, id="docker_worker", ), pytest.param( From eed5c39da125073b0bd46b1ee44c1e6f03c34501 Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Mon, 30 Dec 2024 16:27:11 -0500 Subject: [PATCH 2/6] refactor: make 'checkouts' cache use the 'support_caches' mechanism --- src/taskgraph/transforms/run/common.py | 112 ++++++++++++++--------- src/taskgraph/transforms/run/run_task.py | 2 +- test/test_transforms_run_run_task.py | 16 ++-- 3 files changed, 79 insertions(+), 51 deletions(-) diff --git a/src/taskgraph/transforms/run/common.py b/src/taskgraph/transforms/run/common.py index 7a92fbd80..354a23c1d 100644 --- a/src/taskgraph/transforms/run/common.py +++ b/src/taskgraph/transforms/run/common.py @@ -9,17 +9,12 @@ import hashlib import json +from typing import Any, Dict +from taskgraph.transforms.base import TransformConfig from taskgraph.util import path from taskgraph.util.taskcluster import get_artifact_prefix -CACHES = { - "cargo": {"env": "CARGO_HOME"}, - "npm": {"env": "npm_config_cache"}, - "pip": {"env": "PIP_CACHE_DIR"}, - "uv": {"env": "UV_CACHE_DIR"}, -} - def get_vcsdir_name(os): if os == "windows": @@ -28,6 +23,54 @@ def get_vcsdir_name(os): return "vcs" +def get_checkout_dir(task: Dict[str, Any]) -> str: + worker = task["worker"] + if worker["os"] == "windows": + return "build" + elif worker["implementation"] == "docker-worker": + return f"{task['run']['workdir']}/checkouts" + else: + return "checkouts" + + +def get_checkout_cache_name(config: TransformConfig, task: Dict[str, Any]) -> str: + repo_configs = config.repo_configs + cache_name = "checkouts" + + # Robust checkout does not clean up subrepositories, so ensure that tasks + # that checkout different sets of paths have separate caches. + # See https://bugzilla.mozilla.org/show_bug.cgi?id=1631610 + if len(repo_configs) > 1: + checkout_paths = { + "\t".join([repo_config.path, repo_config.prefix]) + for repo_config in sorted( + repo_configs.values(), key=lambda repo_config: repo_config.path + ) + } + checkout_paths_str = "\n".join(checkout_paths).encode("utf-8") + digest = hashlib.sha256(checkout_paths_str).hexdigest() + cache_name += f"-repos-{digest}" + + # Sparse checkouts need their own cache because they can interfere + # with clients that aren't sparse aware. + if task["run"]["sparse-profile"]: + cache_name += "-sparse" + + return cache_name + + +CACHES = { + "cargo": {"env": "CARGO_HOME"}, + "checkout": { + "cache_dir": get_checkout_dir, + "cache_name": get_checkout_cache_name, + }, + "npm": {"env": "npm_config_cache"}, + "pip": {"env": "PIP_CACHE_DIR"}, + "uv": {"env": "UV_CACHE_DIR"}, +} + + def add_cache(task, taskdesc, name, mount_point, skip_untrusted=False): """Adds a cache based on the worker's implementation. @@ -98,46 +141,19 @@ def support_vcs_checkout(config, task, taskdesc, repo_configs, sparse=False): reserved for ``run-task`` tasks. """ worker = task["worker"] - is_mac = worker["os"] == "macosx" + assert worker["os"] in ("linux", "macosx", "windows") is_win = worker["os"] == "windows" - is_linux = worker["os"] == "linux" is_docker = worker["implementation"] == "docker-worker" - assert is_mac or is_win or is_linux + checkoutdir = get_checkout_dir(task) if is_win: - checkoutdir = "build" hgstore = "y:/hg-shared" elif is_docker: - checkoutdir = "{workdir}/checkouts".format(**task["run"]) hgstore = f"{checkoutdir}/hg-store" else: - checkoutdir = "checkouts" hgstore = f"{checkoutdir}/hg-shared" vcsdir = f"{checkoutdir}/{get_vcsdir_name(worker['os'])}" - cache_name = "checkouts" - - # Robust checkout does not clean up subrepositories, so ensure that tasks - # that checkout different sets of paths have separate caches. - # See https://bugzilla.mozilla.org/show_bug.cgi?id=1631610 - if len(repo_configs) > 1: - checkout_paths = { - "\t".join([repo_config.path, repo_config.prefix]) - for repo_config in sorted( - repo_configs.values(), key=lambda repo_config: repo_config.path - ) - } - checkout_paths_str = "\n".join(checkout_paths).encode("utf-8") - digest = hashlib.sha256(checkout_paths_str).hexdigest() - cache_name += f"-repos-{digest}" - - # Sparse checkouts need their own cache because they can interfere - # with clients that aren't sparse aware. - if sparse: - cache_name += "-sparse" - - add_cache(task, taskdesc, cache_name, checkoutdir) - env = taskdesc["worker"].setdefault("env", {}) env.update( { @@ -174,7 +190,9 @@ def support_vcs_checkout(config, task, taskdesc, repo_configs, sparse=False): return vcsdir -def support_caches(task, taskdesc): +def support_caches( + config: TransformConfig, task: Dict[str, Any], taskdesc: Dict[str, Any] +): """Add caches for common tools.""" worker = task["worker"] workdir = task["run"].get("workdir") @@ -183,13 +201,23 @@ def support_caches(task, taskdesc): workdir = workdir or "/builds/worker" base_cache_dir = path.join(workdir, base_cache_dir) - for name, config in CACHES.items(): - cache_dir = f"{base_cache_dir}/{name}" + for name, cache_cfg in CACHES.items(): + if "cache_dir" in cache_cfg: + assert callable(cache_cfg["cache_dir"]) + cache_dir = cache_cfg["cache_dir"](task) + else: + cache_dir = f"{base_cache_dir}/{name}" + + if "cache_name" in cache_cfg: + assert callable(cache_cfg["cache_name"]) + cache_name = cache_cfg["cache_name"](config, task) + else: + cache_name = name - if config.get("env"): + if cache_cfg.get("env"): env = taskdesc["worker"].setdefault("env", {}) # If cache_dir is already absolute, the `.join` call returns it as # is. In that case, {task_workdir} will get interpolated by # run-task. - env[config["env"]] = path.join("{task_workdir}", cache_dir) - add_cache(task, taskdesc, name, cache_dir) + env[cache_cfg["env"]] = path.join("{task_workdir}", cache_dir) + add_cache(task, taskdesc, cache_name, cache_dir) diff --git a/src/taskgraph/transforms/run/run_task.py b/src/taskgraph/transforms/run/run_task.py index 54f47012b..d449fdbd4 100644 --- a/src/taskgraph/transforms/run/run_task.py +++ b/src/taskgraph/transforms/run/run_task.py @@ -106,7 +106,7 @@ def common_setup(config, task, taskdesc, command): if "cwd" in run: command.extend(("--task-cwd", run["cwd"])) - support_caches(task, taskdesc) + support_caches(config, task, taskdesc) taskdesc["worker"].setdefault("env", {})["MOZ_SCM_LEVEL"] = config.params["level"] diff --git a/test/test_transforms_run_run_task.py b/test/test_transforms_run_run_task.py index a76dfc98a..a6b4c7768 100644 --- a/test/test_transforms_run_run_task.py +++ b/test/test_transforms_run_run_task.py @@ -53,14 +53,14 @@ def assert_docker_worker(task): "worker": { "caches": [ { - "mount-point": "/builds/worker/checkouts", - "name": "checkouts", + "mount-point": "/builds/worker/.task-cache/cargo", + "name": "cargo", "skip-untrusted": False, "type": "persistent", }, { - "mount-point": "/builds/worker/.task-cache/cargo", - "name": "cargo", + "mount-point": "/builds/worker/checkouts", + "name": "checkouts", "skip-untrusted": False, "type": "persistent", }, @@ -147,14 +147,14 @@ def assert_generic_worker(task): }, "implementation": "generic-worker", "mounts": [ - { - "cache-name": "checkouts", - "directory": "build", - }, { "cache-name": "cargo", "directory": ".task-cache/cargo", }, + { + "cache-name": "checkouts", + "directory": "build", + }, { "cache-name": "npm", "directory": ".task-cache/npm", From 06f23d002230bd49a572291d42ebcb4f95acf4e8 Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Tue, 31 Dec 2024 11:29:04 -0500 Subject: [PATCH 3/6] feat: support list of caches in 'use-caches' key This allows tasks to opt into specific caches rather than all or nothing. --- src/taskgraph/transforms/run/common.py | 31 +++++-- src/taskgraph/transforms/run/run_task.py | 8 +- test/test_transforms_run.py | 61 -------------- test/test_transforms_run_run_task.py | 100 ++++++++++++++++++----- 4 files changed, 112 insertions(+), 88 deletions(-) diff --git a/src/taskgraph/transforms/run/common.py b/src/taskgraph/transforms/run/common.py index 354a23c1d..32cb32dec 100644 --- a/src/taskgraph/transforms/run/common.py +++ b/src/taskgraph/transforms/run/common.py @@ -9,7 +9,7 @@ import hashlib import json -from typing import Any, Dict +from typing import Any, Dict, List, Union from taskgraph.transforms.base import TransformConfig from taskgraph.util import path @@ -87,9 +87,6 @@ def add_cache(task, taskdesc, name, mount_point, skip_untrusted=False): # caches support not implemented return - if not task["run"].get("use-caches", True): - return - if worker["implementation"] == "docker-worker": taskdesc["worker"].setdefault("caches", []).append( { @@ -190,18 +187,42 @@ def support_vcs_checkout(config, task, taskdesc, repo_configs, sparse=False): return vcsdir +def should_use_cache( + name: str, + use_caches: Union[bool, List[str]], + has_checkout: bool, +) -> bool: + # Never enable the checkout cache if there's no clone. This allows + # 'checkout' to be specified as a default cache without impacting + # irrelevant tasks. + if name == "checkout" and not has_checkout: + return False + + if isinstance(use_caches, bool): + return use_caches + + return name in use_caches + + def support_caches( config: TransformConfig, task: Dict[str, Any], taskdesc: Dict[str, Any] ): """Add caches for common tools.""" + run = task["run"] worker = task["worker"] - workdir = task["run"].get("workdir") + workdir = run.get("workdir") base_cache_dir = ".task-cache" if worker["implementation"] == "docker-worker": workdir = workdir or "/builds/worker" base_cache_dir = path.join(workdir, base_cache_dir) + # Default to all caches enabled. + use_caches = run.get("use-caches", True) + for name, cache_cfg in CACHES.items(): + if not should_use_cache(name, use_caches, run["checkout"]): + continue + if "cache_dir" in cache_cfg: assert callable(cache_cfg["cache_dir"]) cache_dir = cache_cfg["cache_dir"](task) diff --git a/src/taskgraph/transforms/run/run_task.py b/src/taskgraph/transforms/run/run_task.py index d449fdbd4..ef1ae15ea 100644 --- a/src/taskgraph/transforms/run/run_task.py +++ b/src/taskgraph/transforms/run/run_task.py @@ -12,6 +12,7 @@ from taskgraph.transforms.run import run_task_using from taskgraph.transforms.run.common import ( + CACHES, support_caches, support_vcs_checkout, ) @@ -31,8 +32,11 @@ # tend to hide their caches. This cache is never added for level-1 tasks. # TODO Once bug 1526028 is fixed, this and 'use-caches' should be merged. Required("cache-dotcache"): bool, - # Whether or not to use caches. - Optional("use-caches"): bool, + # Which caches to use. May take a boolean in which case either all + # (True) or no (False) caches will be used. Alternatively, it can + # accept a list of caches to enable. Defaults to only the checkout cache + # enabled. + Optional("use-caches", "caches"): Any(bool, list(CACHES.keys())), # if true (the default), perform a checkout on the worker Required("checkout"): Any(bool, {str: dict}), Optional( diff --git a/test/test_transforms_run.py b/test/test_transforms_run.py index c892c52d7..5599f5a34 100644 --- a/test/test_transforms_run.py +++ b/test/test_transforms_run.py @@ -18,9 +18,6 @@ from taskgraph.task import Task from taskgraph.transforms import run from taskgraph.transforms.run import run_task # noqa: F401 -from taskgraph.transforms.run.common import add_cache -from taskgraph.transforms.task import payload_builders -from taskgraph.util.schema import Schema, validate_schema from taskgraph.util.templates import merge here = os.path.abspath(os.path.dirname(__file__)) @@ -58,64 +55,6 @@ def inner(task_input): return inner -@pytest.fixture -def run_caches(transform): - def inner(task): - config, task, taskdesc, impl = transform(task) - add_cache(task, taskdesc, "cache1", "/cache1") - add_cache(task, taskdesc, "cache2", "/cache2", skip_untrusted=True) - - if impl not in ("docker-worker", "generic-worker"): - pytest.xfail(f"caches not implemented for '{impl}'") - - key = "caches" if impl == "docker-worker" else "mounts" - - result = taskdesc["worker"].get(key) - - if result: - print("Dumping for copy/paste:") - pprint(result, indent=2) - - # Create a new schema object with just the part relevant to caches. - partial_schema = Schema(payload_builders[impl].schema.schema[key]) - validate_schema(partial_schema, taskdesc["worker"][key], "validation error") - - return result - - return inner - - -def test_caches_docker_worker(run_caches): - assert run_caches({"run": {"use-caches": True}, "worker-type": "t-linux"}) == [ - { - "mount-point": "/cache1", - "name": "cache1", - "skip-untrusted": False, - "type": "persistent", - }, - { - "mount-point": "/cache2", - "name": "cache2", - "skip-untrusted": True, - "type": "persistent", - }, - ] - - -def test_caches_generic_worker(run_caches): - assert run_caches({"run": {"use-caches": True}, "worker-type": "t-win"}) == [ - {"cache-name": "cache1", "directory": "/cache1"}, - {"cache-name": "cache2", "directory": "/cache2"}, - ] - - -@pytest.mark.parametrize("worker_type", ("t-linux", "t-win")) -def test_caches_disabled(run_caches, worker_type): - assert ( - run_caches({"run": {"use-caches": False}, "worker-type": worker_type}) is None - ) - - def test_rewrite_when_to_optimization(run_transform, make_transform_config): config = make_transform_config() diff --git a/test/test_transforms_run_run_task.py b/test/test_transforms_run_run_task.py index a6b4c7768..6f896ea30 100644 --- a/test/test_transforms_run_run_task.py +++ b/test/test_transforms_run_run_task.py @@ -8,6 +8,9 @@ import pytest from taskgraph.transforms.run import make_task_description +from taskgraph.transforms.run.common import CACHES +from taskgraph.transforms.task import payload_builders +from taskgraph.util.schema import Schema, validate_schema from taskgraph.util.templates import merge here = os.path.abspath(os.path.dirname(__file__)) @@ -53,10 +56,10 @@ def assert_docker_worker(task): "worker": { "caches": [ { - "mount-point": "/builds/worker/.task-cache/cargo", - "name": "cargo", - "skip-untrusted": False, - "type": "persistent", + 'mount-point': '/builds/worker/.task-cache/cargo', + 'name': 'cargo', + 'skip-untrusted': False, + 'type': 'persistent', }, { "mount-point": "/builds/worker/checkouts", @@ -65,22 +68,22 @@ def assert_docker_worker(task): "type": "persistent", }, { - "mount-point": "/builds/worker/.task-cache/npm", - "name": "npm", - "skip-untrusted": False, - "type": "persistent", + 'mount-point': '/builds/worker/.task-cache/npm', + 'name': 'npm', + 'skip-untrusted': False, + 'type': 'persistent', }, { - "mount-point": "/builds/worker/.task-cache/pip", - "name": "pip", - "skip-untrusted": False, - "type": "persistent", + 'mount-point': '/builds/worker/.task-cache/pip', + 'name': 'pip', + 'skip-untrusted': False, + 'type': 'persistent', }, { - "mount-point": "/builds/worker/.task-cache/uv", - "name": "uv", - "skip-untrusted": False, - "type": "persistent", + 'mount-point': '/builds/worker/.task-cache/uv', + 'name': 'uv', + 'skip-untrusted': False, + 'type': 'persistent', }, ], "command": [ @@ -151,10 +154,7 @@ def assert_generic_worker(task): "cache-name": "cargo", "directory": ".task-cache/cargo", }, - { - "cache-name": "checkouts", - "directory": "build", - }, + {"cache-name": "checkouts", "directory": "build"}, { "cache-name": "npm", "directory": ".task-cache/npm", @@ -269,3 +269,63 @@ def test_run_task(monkeypatch, request, run_task_using, task): param_id = request.node.callspec.id assert_func = globals()[f"assert_{param_id}"] assert_func(taskdesc) + + +@pytest.fixture +def run_caches(run_task_using): + def inner(task): + task.setdefault("run", {}).setdefault("using", "run-task") + impl = task.setdefault("worker", {}).setdefault( + "implementation", "generic-worker" + ) + result = run_task_using(task) + + key = "mounts" if impl == "generic-worker" else "caches" + + caches = result["worker"][key] + caches = [c for c in caches if "cache-name" in c] + print("Dumping for copy/paste:") + pprint(caches, indent=2) + + # Create a new schema object with just the part relevant to caches. + partial_schema = Schema(payload_builders[impl].schema.schema[key]) + validate_schema(partial_schema, caches, "validation error") + + return caches + + return inner + + +def test_caches_enabled(run_caches): + task = { + "run": { + "use-caches": True, + } + } + caches = run_caches(task) + assert len(caches) == len(CACHES) + cache_names = {c["cache-name"] for c in caches} + for name, cfg in CACHES.items(): + if "cache_name" in cfg: + continue + assert name in cache_names + + +def test_caches_disabled(run_caches): + task = { + "run": { + "use-caches": False, + } + } + assert run_caches(task) == [] + + +def test_caches_explicit(run_caches): + task = { + "run": { + "use-caches": ["cargo"], + } + } + assert run_caches(task) == [ + {"cache-name": "cargo", "directory": ".task-cache/cargo"} + ] From a6a3ac5444a43d3c843a0a21a49649e62031a721 Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Tue, 31 Dec 2024 11:29:04 -0500 Subject: [PATCH 4/6] feat: support setting repo wide default values for cache selection --- .../src/pytest_taskgraph/fixtures/gen.py | 10 ++- src/taskgraph/config.py | 15 ++++- src/taskgraph/transforms/run/common.py | 61 +++---------------- src/taskgraph/transforms/run/run_task.py | 2 +- src/taskgraph/util/caches.py | 57 +++++++++++++++++ test/test_transforms_run.py | 4 +- test/test_transforms_run_run_task.py | 18 ++++-- 7 files changed, 103 insertions(+), 64 deletions(-) create mode 100644 src/taskgraph/util/caches.py diff --git a/packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py b/packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py index f11f459f1..038a5ca22 100644 --- a/packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py +++ b/packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py @@ -204,7 +204,11 @@ def inner( if extra_params: parameters.update(extra_params) if extra_graph_config: - graph_config._config.update(extra_graph_config) + # We need this intermediate variable because `GraphConfig` is + # frozen and we can't set attributes on it. + new_graph_config = merge(graph_config._config, extra_graph_config) + graph_config._config.update(new_graph_config) + return TransformConfig( "test", str(here), @@ -220,12 +224,12 @@ def inner( @pytest.fixture def run_transform(make_transform_config): - def inner(func, tasks, config=None): + def inner(func, tasks, config=None, **kwargs): if not isinstance(tasks, list): tasks = [tasks] if not config: - config = make_transform_config() + config = make_transform_config(**kwargs) return list(func(config, tasks)) return inner diff --git a/src/taskgraph/config.py b/src/taskgraph/config.py index 76b882b6b..ceef30261 100644 --- a/src/taskgraph/config.py +++ b/src/taskgraph/config.py @@ -12,6 +12,7 @@ from voluptuous import All, Any, Extra, Length, Optional, Required from .util import path +from .util.caches import CACHES from .util.python_path import find_object from .util.schema import Schema, optionally_keyed_by, validate_schema from .util.yaml import load_yaml @@ -74,6 +75,16 @@ "index-path-regexes", description="Regular expressions matching index paths to be summarized.", ): [str], + Optional( + "run", + description="Configuration related to the 'run' transforms.", + ): { + Optional( + "use-caches", + description="List of caches to enable, or a boolean to " + "enable/disable all of them.", + ): Any(bool, list(CACHES.keys())), + }, Required("repositories"): All( { str: { @@ -106,8 +117,8 @@ def __getitem__(self, name): def __contains__(self, name): return name in self._config - def get(self, name): - return self._config.get(name) + def get(self, name, default=None): + return self._config.get(name, default) def register(self): """ diff --git a/src/taskgraph/transforms/run/common.py b/src/taskgraph/transforms/run/common.py index 32cb32dec..1749039b8 100644 --- a/src/taskgraph/transforms/run/common.py +++ b/src/taskgraph/transforms/run/common.py @@ -7,12 +7,12 @@ consistency. """ -import hashlib import json from typing import Any, Dict, List, Union from taskgraph.transforms.base import TransformConfig from taskgraph.util import path +from taskgraph.util.caches import CACHES, get_checkout_dir from taskgraph.util.taskcluster import get_artifact_prefix @@ -23,54 +23,6 @@ def get_vcsdir_name(os): return "vcs" -def get_checkout_dir(task: Dict[str, Any]) -> str: - worker = task["worker"] - if worker["os"] == "windows": - return "build" - elif worker["implementation"] == "docker-worker": - return f"{task['run']['workdir']}/checkouts" - else: - return "checkouts" - - -def get_checkout_cache_name(config: TransformConfig, task: Dict[str, Any]) -> str: - repo_configs = config.repo_configs - cache_name = "checkouts" - - # Robust checkout does not clean up subrepositories, so ensure that tasks - # that checkout different sets of paths have separate caches. - # See https://bugzilla.mozilla.org/show_bug.cgi?id=1631610 - if len(repo_configs) > 1: - checkout_paths = { - "\t".join([repo_config.path, repo_config.prefix]) - for repo_config in sorted( - repo_configs.values(), key=lambda repo_config: repo_config.path - ) - } - checkout_paths_str = "\n".join(checkout_paths).encode("utf-8") - digest = hashlib.sha256(checkout_paths_str).hexdigest() - cache_name += f"-repos-{digest}" - - # Sparse checkouts need their own cache because they can interfere - # with clients that aren't sparse aware. - if task["run"]["sparse-profile"]: - cache_name += "-sparse" - - return cache_name - - -CACHES = { - "cargo": {"env": "CARGO_HOME"}, - "checkout": { - "cache_dir": get_checkout_dir, - "cache_name": get_checkout_cache_name, - }, - "npm": {"env": "npm_config_cache"}, - "pip": {"env": "PIP_CACHE_DIR"}, - "uv": {"env": "UV_CACHE_DIR"}, -} - - def add_cache(task, taskdesc, name, mount_point, skip_untrusted=False): """Adds a cache based on the worker's implementation. @@ -216,8 +168,15 @@ def support_caches( workdir = workdir or "/builds/worker" base_cache_dir = path.join(workdir, base_cache_dir) - # Default to all caches enabled. - use_caches = run.get("use-caches", True) + use_caches = run.get("use-caches") + if use_caches is None: + # Use project default values for filtering caches, default to + # checkout cache if no selection is specified. + use_caches = ( + config.graph_config.get("taskgraph", {}) + .get("run", {}) + .get("use-caches", True) + ) for name, cache_cfg in CACHES.items(): if not should_use_cache(name, use_caches, run["checkout"]): diff --git a/src/taskgraph/transforms/run/run_task.py b/src/taskgraph/transforms/run/run_task.py index ef1ae15ea..732a609f6 100644 --- a/src/taskgraph/transforms/run/run_task.py +++ b/src/taskgraph/transforms/run/run_task.py @@ -12,12 +12,12 @@ from taskgraph.transforms.run import run_task_using from taskgraph.transforms.run.common import ( - CACHES, support_caches, support_vcs_checkout, ) from taskgraph.transforms.task import taskref_or_string from taskgraph.util import path, taskcluster +from taskgraph.util.caches import CACHES from taskgraph.util.schema import Schema EXEC_COMMANDS = { diff --git a/src/taskgraph/util/caches.py b/src/taskgraph/util/caches.py new file mode 100644 index 000000000..0d57a29d9 --- /dev/null +++ b/src/taskgraph/util/caches.py @@ -0,0 +1,57 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import hashlib +from typing import TYPE_CHECKING, Any, Dict + +if TYPE_CHECKING: + from taskgraph.transforms.base import TransformConfig + + +def get_checkout_dir(task: Dict[str, Any]) -> str: + worker = task["worker"] + if worker["os"] == "windows": + return "build" + elif worker["implementation"] == "docker-worker": + return f"{task['run']['workdir']}/checkouts" + else: + return "checkouts" + + +def get_checkout_cache_name(config: "TransformConfig", task: Dict[str, Any]) -> str: + repo_configs = config.repo_configs + cache_name = "checkouts" + + # Robust checkout does not clean up subrepositories, so ensure that tasks + # that checkout different sets of paths have separate caches. + # See https://bugzilla.mozilla.org/show_bug.cgi?id=1631610 + if len(repo_configs) > 1: + checkout_paths = { + "\t".join([repo_config.path, repo_config.prefix]) + for repo_config in sorted( + repo_configs.values(), key=lambda repo_config: repo_config.path + ) + } + checkout_paths_str = "\n".join(checkout_paths).encode("utf-8") + digest = hashlib.sha256(checkout_paths_str).hexdigest() + cache_name += f"-repos-{digest}" + + # Sparse checkouts need their own cache because they can interfere + # with clients that aren't sparse aware. + if task["run"]["sparse-profile"]: + cache_name += "-sparse" + + return cache_name + + +CACHES = { + "cargo": {"env": "CARGO_HOME"}, + "checkout": { + "cache_dir": get_checkout_dir, + "cache_name": get_checkout_cache_name, + }, + "npm": {"env": "npm_config_cache"}, + "pip": {"env": "PIP_CACHE_DIR"}, + "uv": {"env": "UV_CACHE_DIR"}, +} diff --git a/test/test_transforms_run.py b/test/test_transforms_run.py index 5599f5a34..748c457a3 100644 --- a/test/test_transforms_run.py +++ b/test/test_transforms_run.py @@ -43,13 +43,13 @@ def transform(monkeypatch, run_transform): # Needed by 'generic_worker_run_task' monkeypatch.setenv("TASK_ID", "fakeid") - def inner(task_input): + def inner(task_input, **kwargs): defaults = deepcopy(TASK_DEFAULTS) task = merge(defaults, task_input) with patch("taskgraph.transforms.run.configure_taskdesc_for_run") as m: # This forces the generator to be evaluated - run_transform(run.transforms, task) + run_transform(run.transforms, task, **kwargs) return m.call_args[0] return inner diff --git a/test/test_transforms_run_run_task.py b/test/test_transforms_run_run_task.py index 6f896ea30..444c37d81 100644 --- a/test/test_transforms_run_run_task.py +++ b/test/test_transforms_run_run_task.py @@ -8,8 +8,8 @@ import pytest from taskgraph.transforms.run import make_task_description -from taskgraph.transforms.run.common import CACHES from taskgraph.transforms.task import payload_builders +from taskgraph.util.caches import CACHES from taskgraph.util.schema import Schema, validate_schema from taskgraph.util.templates import merge @@ -36,9 +36,9 @@ def run_task_using(mocker, run_transform): "taskcluster/scripts/toolchain/run.ps1", ] - def inner(task): + def inner(task, **kwargs): task = merge(TASK_DEFAULTS, task) - return run_transform(make_task_description, task)[0] + return run_transform(make_task_description, task, **kwargs)[0] return inner @@ -273,12 +273,12 @@ def test_run_task(monkeypatch, request, run_task_using, task): @pytest.fixture def run_caches(run_task_using): - def inner(task): + def inner(task, **kwargs): task.setdefault("run", {}).setdefault("using", "run-task") impl = task.setdefault("worker", {}).setdefault( "implementation", "generic-worker" ) - result = run_task_using(task) + result = run_task_using(task, **kwargs) key = "mounts" if impl == "generic-worker" else "caches" @@ -329,3 +329,11 @@ def test_caches_explicit(run_caches): assert run_caches(task) == [ {"cache-name": "cargo", "directory": ".task-cache/cargo"} ] + + +def test_caches_project_explicit(run_caches): + caches = run_caches( + {}, + extra_graph_config={"taskgraph": {"run": {"use-caches": ["cargo"]}}}, + ) + assert caches == [{"cache-name": "cargo", "directory": ".task-cache/cargo"}] From e77d38ee62ab6efbc5568ed6f0064c1c626438dd Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Mon, 30 Dec 2024 16:27:11 -0500 Subject: [PATCH 5/6] refactor!: remove 'cache-dotcache' key --- docs/reference/migrations.rst | 19 ++++++++++++++++++- src/taskgraph/transforms/run/run_task.py | 22 ---------------------- taskcluster/docker/python/Dockerfile | 2 +- taskcluster/kinds/check/kind.yml | 2 +- taskcluster/kinds/codecov/kind.yml | 1 + taskcluster/kinds/doc/kind.yml | 2 +- taskcluster/kinds/test/kind.yml | 2 +- 7 files changed, 23 insertions(+), 27 deletions(-) diff --git a/docs/reference/migrations.rst b/docs/reference/migrations.rst index f19caee75..af877f5b1 100644 --- a/docs/reference/migrations.rst +++ b/docs/reference/migrations.rst @@ -3,6 +3,23 @@ Migration Guide This page can help when migrating Taskgraph across major versions. +12.x -> 13.x +------------ + +* Remove all ``run.cache-dotcache`` keys. If it was set to ``true``, replace it + with: + + .. code-block:: yaml + + run: + use-caches: [checkout, ] + + Where caches can be any of ``cargo``, ``pip``, ``uv`` or ``npm``. If the task + was setting up ``.cache`` for another tool, a mount will need to be created + for it manually. If ``use-caches`` was previously set to ``false``, omit + ``checkout`` in the example above. If ``use-caches`` was previously set to + ``true``, replace ``true`` with the value above (including ``checkout``). + 11.x -> 12.x ------------ @@ -110,7 +127,7 @@ This page can help when migrating Taskgraph across major versions. 5.x -> 6.x ---------- -* Replace all uses of ``command-context` with the more generalized ``task-context`` +* Replace all uses of ``command-context`` with the more generalized ``task-context`` 4.x -> 5.x ---------- diff --git a/src/taskgraph/transforms/run/run_task.py b/src/taskgraph/transforms/run/run_task.py index 732a609f6..8830b9ac5 100644 --- a/src/taskgraph/transforms/run/run_task.py +++ b/src/taskgraph/transforms/run/run_task.py @@ -28,10 +28,6 @@ run_task_schema = Schema( { Required("using"): "run-task", - # if true, add a cache at ~worker/.cache, which is where things like pip - # tend to hide their caches. This cache is never added for level-1 tasks. - # TODO Once bug 1526028 is fixed, this and 'use-caches' should be merged. - Required("cache-dotcache"): bool, # Which caches to use. May take a boolean in which case either all # (True) or no (False) caches will be used. Alternatively, it can # accept a list of caches to enable. Defaults to only the checkout cache @@ -115,7 +111,6 @@ def common_setup(config, task, taskdesc, command): worker_defaults = { - "cache-dotcache": False, "checkout": True, "sparse-profile": None, "run-as-root": False, @@ -142,16 +137,6 @@ def docker_worker_run_task(config, task, taskdesc): command = run.pop("run-task-command", ["/usr/local/bin/run-task"]) common_setup(config, task, taskdesc, command) - if run.get("cache-dotcache"): - worker["caches"].append( - { - "type": "persistent", - "name": "{project}-dotcache".format(**config.params), - "mount-point": "{workdir}/.cache".format(**run), - "skip-untrusted": True, - } - ) - run_command = run["command"] # dict is for the case of `{'task-reference': str}`. @@ -184,13 +169,6 @@ def generic_worker_run_task(config, task, taskdesc): common_setup(config, task, taskdesc, command) worker.setdefault("mounts", []) - if run.get("cache-dotcache"): - worker["mounts"].append( - { - "cache-name": "{project}-dotcache".format(**config.params), - "directory": "{workdir}/.cache".format(**run), - } - ) worker["mounts"].append( { "content": { diff --git a/taskcluster/docker/python/Dockerfile b/taskcluster/docker/python/Dockerfile index a26c9c380..f30418bfb 100644 --- a/taskcluster/docker/python/Dockerfile +++ b/taskcluster/docker/python/Dockerfile @@ -6,7 +6,7 @@ FROM debian:bookworm-slim LABEL maintainer="Release Engineering " VOLUME /builds/worker/checkouts -VOLUME /builds/worker/.cache +VOLUME /builds/worker/.task-cache/uv # Add worker user RUN mkdir -p /builds && \ diff --git a/taskcluster/kinds/check/kind.yml b/taskcluster/kinds/check/kind.yml index 9b5651f88..07ac8fc6d 100644 --- a/taskcluster/kinds/check/kind.yml +++ b/taskcluster/kinds/check/kind.yml @@ -29,7 +29,7 @@ task-defaults: run: using: run-task cwd: '{checkout}' - cache-dotcache: true + use-caches: [checkout, uv] tasks: type: diff --git a/taskcluster/kinds/codecov/kind.yml b/taskcluster/kinds/codecov/kind.yml index cbf95c86b..64defb5c2 100644 --- a/taskcluster/kinds/codecov/kind.yml +++ b/taskcluster/kinds/codecov/kind.yml @@ -44,6 +44,7 @@ tasks: run: using: run-task cwd: '{checkout}' + use-caches: [checkout, uv] command: >- uv run coverage combine --data-file $MOZ_FETCHES_DIR/coverage $MOZ_FETCHES_DIR && uv run coverage xml --data-file $MOZ_FETCHES_DIR/coverage -o $MOZ_FETCHES_DIR/coverage.xml && diff --git a/taskcluster/kinds/doc/kind.yml b/taskcluster/kinds/doc/kind.yml index 92bf9a274..588c8cc44 100644 --- a/taskcluster/kinds/doc/kind.yml +++ b/taskcluster/kinds/doc/kind.yml @@ -21,7 +21,7 @@ task-defaults: run: using: run-task cwd: '{checkout}' - cache-dotcache: true + use-caches: [checkout, uv] tasks: generate: diff --git a/taskcluster/kinds/test/kind.yml b/taskcluster/kinds/test/kind.yml index c0206afc7..8118e078a 100644 --- a/taskcluster/kinds/test/kind.yml +++ b/taskcluster/kinds/test/kind.yml @@ -34,7 +34,7 @@ task-defaults: run: using: run-task cwd: '{checkout}' - cache-dotcache: true + use-caches: [checkout, uv] tasks: unit: From 7045aa5016452694daa82f4a943d51b6bfee7122 Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Mon, 20 Jan 2025 14:37:42 -0500 Subject: [PATCH 6/6] refactor!: default to only use the 'checkout' cache We shouldn't set up a cache for every conceivable tool that's going to be added to the `CACHES` config. Instead tasks should list out the ones they need. The checkout cache is going to be pretty standard and is a reasonable default. --- src/taskgraph/transforms/run/common.py | 2 +- test/test_transforms_run_run_task.py | 48 -------------------------- 2 files changed, 1 insertion(+), 49 deletions(-) diff --git a/src/taskgraph/transforms/run/common.py b/src/taskgraph/transforms/run/common.py index 1749039b8..062cf275e 100644 --- a/src/taskgraph/transforms/run/common.py +++ b/src/taskgraph/transforms/run/common.py @@ -175,7 +175,7 @@ def support_caches( use_caches = ( config.graph_config.get("taskgraph", {}) .get("run", {}) - .get("use-caches", True) + .get("use-caches", ["checkout"]) ) for name, cache_cfg in CACHES.items(): diff --git a/test/test_transforms_run_run_task.py b/test/test_transforms_run_run_task.py index 444c37d81..3f2d93237 100644 --- a/test/test_transforms_run_run_task.py +++ b/test/test_transforms_run_run_task.py @@ -55,36 +55,12 @@ def assert_docker_worker(task): "soft-dependencies": [], "worker": { "caches": [ - { - 'mount-point': '/builds/worker/.task-cache/cargo', - 'name': 'cargo', - 'skip-untrusted': False, - 'type': 'persistent', - }, { "mount-point": "/builds/worker/checkouts", "name": "checkouts", "skip-untrusted": False, "type": "persistent", }, - { - 'mount-point': '/builds/worker/.task-cache/npm', - 'name': 'npm', - 'skip-untrusted': False, - 'type': 'persistent', - }, - { - 'mount-point': '/builds/worker/.task-cache/pip', - 'name': 'pip', - 'skip-untrusted': False, - 'type': 'persistent', - }, - { - 'mount-point': '/builds/worker/.task-cache/uv', - 'name': 'uv', - 'skip-untrusted': False, - 'type': 'persistent', - }, ], "command": [ "/usr/local/bin/run-task", @@ -95,7 +71,6 @@ def assert_docker_worker(task): "echo hello world", ], "env": { - "CARGO_HOME": "/builds/worker/.task-cache/cargo", "CI_BASE_REPOSITORY": "http://hg.example.com", "CI_HEAD_REF": "default", "CI_HEAD_REPOSITORY": "http://hg.example.com", @@ -103,11 +78,8 @@ def assert_docker_worker(task): "CI_REPOSITORY_TYPE": "hg", "HG_STORE_PATH": "/builds/worker/checkouts/hg-store", "MOZ_SCM_LEVEL": "1", - "PIP_CACHE_DIR": "/builds/worker/.task-cache/pip", "REPOSITORIES": '{"ci": "Taskgraph"}', - "UV_CACHE_DIR": "/builds/worker/.task-cache/uv", "VCS_PATH": "/builds/worker/checkouts/vcs", - "npm_config_cache": "/builds/worker/.task-cache/npm", }, "implementation": "docker-worker", "os": "linux", @@ -134,7 +106,6 @@ def assert_generic_worker(task): 'world"' ], "env": { - "CARGO_HOME": "{task_workdir}/.task-cache/cargo", "CI_BASE_REPOSITORY": "http://hg.example.com", "CI_HEAD_REF": "default", "CI_HEAD_REPOSITORY": "http://hg.example.com", @@ -142,31 +113,12 @@ def assert_generic_worker(task): "CI_REPOSITORY_TYPE": "hg", "HG_STORE_PATH": "y:/hg-shared", "MOZ_SCM_LEVEL": "1", - "PIP_CACHE_DIR": "{task_workdir}/.task-cache/pip", "REPOSITORIES": '{"ci": "Taskgraph"}', - "UV_CACHE_DIR": "{task_workdir}/.task-cache/uv", "VCS_PATH": "{task_workdir}/build/src", - "npm_config_cache": "{task_workdir}/.task-cache/npm", }, "implementation": "generic-worker", "mounts": [ - { - "cache-name": "cargo", - "directory": ".task-cache/cargo", - }, {"cache-name": "checkouts", "directory": "build"}, - { - "cache-name": "npm", - "directory": ".task-cache/npm", - }, - { - "cache-name": "pip", - "directory": ".task-cache/pip", - }, - { - "cache-name": "uv", - "directory": ".task-cache/uv", - }, { "content": { "url": "https://tc-tests.localhost/api/queue/v1/task//artifacts/public/run-task"