From 52288f24046bda7883bc45f6beefb0d8c2e91347 Mon Sep 17 00:00:00 2001 From: Julien Cristau Date: Wed, 5 Nov 2025 14:46:17 +0100 Subject: [PATCH] load-task: allow specifying caches on the command line When debugging a task it can be useful to re-use e.g. the checkout cache between load-task invocations, to not have to clone from scratch each time. --- src/taskgraph/docker.py | 29 ++++++++++++++++++--------- src/taskgraph/main.py | 22 +++++++++++++++++++++ test/test_docker.py | 11 +++++++---- test/test_main.py | 44 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 91 insertions(+), 15 deletions(-) diff --git a/src/taskgraph/docker.py b/src/taskgraph/docker.py index 57719d602..52fe71375 100644 --- a/src/taskgraph/docker.py +++ b/src/taskgraph/docker.py @@ -188,11 +188,11 @@ def build_image( output_dir = temp_dir / "out" output_dir.mkdir() - volumes = { + volumes = [ # TODO write artifacts to tmpdir - str(output_dir): "/workspace/out", - str(image_context): "/workspace/context.tar.gz", - } + (str(output_dir), "/workspace/out"), + (str(image_context), "/workspace/context.tar.gz"), + ] assert label in image_tasks task = image_tasks[label] @@ -211,7 +211,7 @@ def build_image( parent = task.dependencies["parent"][len("docker-image-") :] parent_tar = temp_dir / "parent.tar" build_image(graph_config, parent, save_image=str(parent_tar)) - volumes[str(parent_tar)] = "/workspace/parent.tar" + volumes.append((str(parent_tar), "/workspace/parent.tar")) task_def["payload"]["env"]["CHOWN_OUTPUT"] = f"{os.getuid()}:{os.getgid()}" load_task( @@ -425,7 +425,7 @@ def load_task( user: Optional[str] = None, custom_image: Optional[str] = None, interactive: Optional[bool] = False, - volumes: Optional[dict[str, str]] = None, + volumes: Optional[list[tuple[str, str]]] = None, ) -> int: """Load and run a task interactively in a Docker container. @@ -523,9 +523,20 @@ def load_task( env.update(task_def["payload"].get("env", {})) # type: ignore # run-task expects the worker to mount a volume for each path defined in - # TASKCLUSTER_CACHES, delete them to avoid needing to do the same. + # TASKCLUSTER_CACHES; delete them to avoid needing to do the same, unless + # they're passed in as volumes. if "TASKCLUSTER_CACHES" in env: - del env["TASKCLUSTER_CACHES"] + if volumes: + caches = env["TASKCLUSTER_CACHES"].split(";") + caches = [ + cache for cache in caches if any(path == cache for _, path in volumes) + ] + else: + caches = [] + if caches: + env["TASKCLUSTER_CACHES"] = ";".join(caches) + else: + del env["TASKCLUSTER_CACHES"] envfile = None initfile = None @@ -570,7 +581,7 @@ def load_task( command.extend(["-v", f"{initfile.name}:/builds/worker/.bashrc"]) if volumes: - for k, v in volumes.items(): + for k, v in volumes: command.extend(["-v", f"{k}:{v}"]) command.append(image_tag) diff --git a/src/taskgraph/main.py b/src/taskgraph/main.py index f4a9ba3c8..c09413c22 100644 --- a/src/taskgraph/main.py +++ b/src/taskgraph/main.py @@ -792,6 +792,14 @@ def image_digest(args): default="taskcluster", help="Relative path to the root of the Taskgraph definition.", ) +@argument( + "--volume", + "-v", + metavar="HOST_DIR:CONTAINER_DIR", + default=[], + action="append", + help="Mount local path into the container.", +) def load_task(args): from taskgraph.config import load_graph_config # noqa: PLC0415 from taskgraph.docker import load_task # noqa: PLC0415 @@ -806,6 +814,19 @@ def load_task(args): except ValueError: args["task"] = data # assume it is a taskId + volumes = [] + for vol in args["volume"]: + if ":" not in vol: + raise ValueError( + "Invalid volume specification '{vol}', expected HOST_DIR:CONTAINER_DIR" + ) + k, v = vol.split(":", 1) + if not k or not v: + raise ValueError( + "Invalid volume specification '{vol}', expected HOST_DIR:CONTAINER_DIR" + ) + volumes.append((k, v)) + root = args["root"] graph_config = load_graph_config(root) return load_task( @@ -815,6 +836,7 @@ def load_task(args): remove=args["remove"], user=args["user"], custom_image=args["image"], + volumes=volumes, ) diff --git a/test/test_docker.py b/test/test_docker.py index 85494ddc6..e183d3f35 100644 --- a/test/test_docker.py +++ b/test/test_docker.py @@ -137,7 +137,10 @@ def test_load_task(run_load_task): }, } # Test with custom volumes - volumes = {"/host/path": "/container/path", "/another/host": "/another/container"} + volumes = [ + ("/host/path", "/container/path"), + ("/another/host", "/another/container"), + ] ret, mocks = run_load_task(task, volumes=volumes) assert ret == 0 @@ -216,11 +219,11 @@ def test_load_task_env_init_and_remove(mocker, run_load_task): "--", "echo foo", ], - "env": {"FOO": "BAR", "BAZ": "1", "TASKCLUSTER_CACHES": "path"}, + "env": {"FOO": "BAR", "BAZ": "1", "TASKCLUSTER_CACHES": "/path;/cache"}, "image": {"taskId": image_task_id, "type": "task-image"}, }, } - ret, mocks = run_load_task(task, remove=True) + ret, mocks = run_load_task(task, remove=True, volumes=[("/host/path", "/cache")]) assert ret == 0 # NamedTemporaryFile was called twice (once for env, once for init) @@ -231,7 +234,7 @@ def test_load_task_env_init_and_remove(mocker, run_load_task): env_lines = written_env_content[0].split("\n") # Verify written env is expected - assert "TASKCLUSTER_CACHES=path" not in env_lines + assert "TASKCLUSTER_CACHES=/cache" in env_lines assert "FOO=BAR" in env_lines assert "BAZ=1" in env_lines diff --git a/test/test_main.py b/test/test_main.py index aebac10e4..9efd84690 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -417,7 +417,7 @@ def fake_actions_load(_graph_config): @pytest.fixture -def run_load_task(mocker, monkeypatch): +def run_load_task(mocker, monkeypatch, run_taskgraph): def inner(args, stdin_data=None): # Mock the docker module functions m_validate_docker = mocker.patch("taskgraph.main.validate_docker") @@ -445,7 +445,7 @@ def inner(args, stdin_data=None): } # Run the command - result = taskgraph_main(args) + result = run_taskgraph(args) return result, mocks @@ -466,6 +466,7 @@ def test_load_task_command(run_load_task): remove=True, user=None, custom_image=None, + volumes=[], ) # Test with interactive flag @@ -479,9 +480,46 @@ def test_load_task_command(run_load_task): remove=True, user=None, custom_image=None, + volumes=[], ) +def test_load_task_command_with_volume(run_load_task): + # Test with correct volume specification + result, mocks = run_load_task( + ["load-task", "task-id-123", "-v", "/host/path:/builds/worker/checkouts"] + ) + + assert result == 0 + mocks["validate_docker"].assert_called_once() + mocks["load_graph_config"].assert_called_once_with("taskcluster") + mocks["docker_load_task"].assert_called_once_with( + mocks["graph_config"], + "task-id-123", + interactive=False, + remove=True, + user=None, + custom_image=None, + volumes=[("/host/path", "/builds/worker/checkouts")], + ) + + # Test with no colon + result, mocks = run_load_task( + ["load-task", "task-id-123", "-v", "/builds/worker/checkouts"] + ) + assert result == 1 + mocks["validate_docker"].assert_called_once() + mocks["load_graph_config"].assert_not_called() + mocks["docker_load_task"].assert_not_called() + + # Test with missing container path + result, mocks = run_load_task(["load-task", "task-id-123", "-v", "/host/path:"]) + assert result == 1 + mocks["validate_docker"].assert_called_once() + mocks["load_graph_config"].assert_not_called() + mocks["docker_load_task"].assert_not_called() + + def test_load_task_command_with_stdin(run_load_task): # Test with JSON task definition from stdin task_def = { @@ -503,6 +541,7 @@ def test_load_task_command_with_stdin(run_load_task): remove=True, user=None, custom_image=None, + volumes=[], ) @@ -520,6 +559,7 @@ def test_load_task_command_with_task_id(run_load_task): remove=True, user=None, custom_image=None, + volumes=[], )