From 3c4154d94331128e953a91c797c2df3e244c4dc0 Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Mon, 8 Sep 2025 10:33:42 -0400 Subject: [PATCH 1/3] chore: add docstrings + type annotations to docker.py --- src/taskgraph/docker.py | 140 +++++++++++++++++++++++++++++++++++----- 1 file changed, 124 insertions(+), 16 deletions(-) diff --git a/src/taskgraph/docker.py b/src/taskgraph/docker.py index 7d237ab6f..ba1817f7b 100644 --- a/src/taskgraph/docker.py +++ b/src/taskgraph/docker.py @@ -10,13 +10,14 @@ import tempfile from io import BytesIO from textwrap import dedent -from typing import List, Optional +from typing import Dict, Generator, List, Mapping, Optional, Union try: import zstandard as zstd except ImportError as e: zstd = e +from taskgraph.config import GraphConfig from taskgraph.util import docker, json from taskgraph.util.taskcluster import ( find_task_id, @@ -43,7 +44,15 @@ """ -def get_image_digest(image_name): +def get_image_digest(image_name: str) -> str: + """Get the digest of a docker image by its name. + + Args: + image_name: The name of the docker image to get the digest for. + + Returns: + str: The digest string of the cached docker image task. + """ from taskgraph.generator import load_tasks_for_kind # noqa: PLC0415 from taskgraph.parameters import Parameters # noqa: PLC0415 @@ -56,7 +65,21 @@ def get_image_digest(image_name): return task.attributes["cached_task"]["digest"] -def load_image_by_name(image_name, tag=None): +def load_image_by_name(image_name: str, tag: Optional[str] = None) -> Union[str, bool]: + """Load a docker image by its name. + + Finds the appropriate docker image task by name and loads it from + the indexed artifacts. + + Args: + image_name: The name of the docker image to load. + tag: Optional tag to apply to the loaded image. If not provided, + uses the tag from the image artifact. + + Returns: + str or bool: The full image tag (name:tag) if successful, False if + the image artifacts could not be found. + """ from taskgraph.generator import load_tasks_for_kind # noqa: PLC0415 from taskgraph.optimize.strategies import IndexSearch # noqa: PLC0415 from taskgraph.parameters import Parameters # noqa: PLC0415 @@ -86,7 +109,20 @@ def load_image_by_name(image_name, tag=None): return load_image_by_task_id(task_id, tag) -def load_image_by_task_id(task_id, tag=None): +def load_image_by_task_id(task_id: str, tag: Optional[str] = None) -> str: + """Load a docker image from a task's artifacts. + + Downloads and loads a docker image from the specified task's + public/image.tar.zst artifact. + + Args: + task_id: The task ID containing the docker image artifact. + tag: Optional tag to apply to the loaded image. If not provided, + uses the tag from the image artifact. + + Returns: + str: The full image tag (name:tag) that was loaded. + """ artifact_url = get_artifact_url(task_id, "public/image.tar.zst") result = load_image(artifact_url, tag) print("Found docker image: {}:{}".format(result["image"], result["tag"])) @@ -98,8 +134,27 @@ def load_image_by_task_id(task_id, tag=None): return tag -def build_context(name, outputFile, graph_config, args=None): - """Build a context.tar for image with specified name.""" +def build_context( + name: str, + outputFile: str, + graph_config: GraphConfig, + args: Optional[Mapping[str, str]] = None, +) -> None: + """Build a context.tar for image with specified name. + + Creates a Docker build context tar file for the specified image, + which can be used to build the Docker image. + + Args: + name: The name of the Docker image to build context for. + outputFile: Path to the output tar file to create. + graph_config: The graph configuration object. + args: Optional mapping of arguments to pass to context creation. + + Raises: + ValueError: If name or outputFile is not provided. + Exception: If the image directory does not exist. + """ if not name: raise ValueError("must provide a Docker image name") if not outputFile: @@ -112,10 +167,27 @@ def build_context(name, outputFile, graph_config, args=None): docker.create_context_tar(".", image_dir, outputFile, args) -def build_image(name, tag, graph_config, args=None): +def build_image( + name: str, + tag: Optional[str], + graph_config: GraphConfig, + args: Optional[Mapping[str, str]] = None, +) -> None: """Build a Docker image of specified name. - Output from image building process will be printed to stdout. + Builds a Docker image from the specified image directory and optionally + tags it. Output from image building process will be printed to stdout. + + Args: + name: The name of the Docker image to build. + tag: Optional tag for the built image. If not provided, uses + the default tag from docker_image(). + graph_config: The graph configuration. + args: Optional mapping of arguments to pass to the build process. + + Raises: + ValueError: If name is not provided. + Exception: If the image directory does not exist. """ if not name: raise ValueError("must provide a Docker image name") @@ -142,12 +214,28 @@ def build_image(name, tag, graph_config, args=None): print(DEPLOY_WARNING.format(image_dir=os.path.relpath(image_dir), image=name)) -def load_image(url, imageName=None, imageTag=None): - """ - Load docker image from URL as imageName:tag, if no imageName or tag is given - it will use whatever is inside the zstd compressed tarball. +def load_image( + url: str, imageName: Optional[str] = None, imageTag: Optional[str] = None +) -> Dict[str, str]: + """Load docker image from URL as imageName:tag. + + Downloads a zstd-compressed docker image tarball from the given URL and + loads it into the local Docker daemon. If no imageName or tag is given, + it will use whatever is inside the compressed tarball. + + Args: + url: URL to download the zstd-compressed docker image from. + imageName: Optional name to give the loaded image. If provided + without imageTag, will parse tag from name or default to 'latest'. + imageTag: Optional tag to give the loaded image. - Returns an object with properties 'image', 'tag' and 'layer'. + Returns: + dict: An object with properties 'image', 'tag' and 'layer' containing + information about the loaded image. + + Raises: + ImportError: If zstandard package is not installed. + Exception: If the tar contains multiple images/tags or no repositories file. """ if isinstance(zstd, ImportError): raise ImportError( @@ -168,9 +256,9 @@ def load_image(url, imageName=None, imageTag=None): else: imageTag = "latest" - info = {} + info: Dict[str, str] = {} - def download_and_modify_image(): + def download_and_modify_image() -> Generator[bytes, None, None]: # This function downloads and edits the downloaded tar file on the fly. # It emits chunked buffers of the edited tar file, as a generator. print(f"Downloading from {url}") @@ -266,7 +354,27 @@ def _index(l: List, s: str) -> Optional[int]: pass -def load_task(task_id, remove=True, user=None): +def load_task(task_id: str, remove: bool = True, user: Optional[str] = None) -> int: + """Load and run a task interactively in a Docker container. + + Downloads the docker image from a task's definition and runs it in an + interactive shell, setting up the task environment but not executing + the actual task command. The task command can be executed later using + the 'exec-task' function provided in the shell. + + Args: + task_id: The ID of the task to load. + remove: Whether to remove the container after exit (default True). + user: The user to switch to in the container (default 'worker'). + + Returns: + int: The exit code from the Docker container. + + Note: + Only supports tasks that use 'run-task' and have a payload.image. + The task's actual command is made available via an 'exec-task' function + in the interactive shell. + """ user = user or "worker" task_def = get_task_definition(task_id) From 3613f7cbba01040f67aa31c599a56a6081085ff2 Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Wed, 17 Sep 2025 10:50:29 -0400 Subject: [PATCH 2/3] fix(docker): make 'load_image_by_name' return None instead of False --- src/taskgraph/docker.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/taskgraph/docker.py b/src/taskgraph/docker.py index ba1817f7b..df2a9387a 100644 --- a/src/taskgraph/docker.py +++ b/src/taskgraph/docker.py @@ -10,7 +10,7 @@ import tempfile from io import BytesIO from textwrap import dedent -from typing import Dict, Generator, List, Mapping, Optional, Union +from typing import Dict, Generator, List, Mapping, Optional try: import zstandard as zstd @@ -65,7 +65,7 @@ def get_image_digest(image_name: str) -> str: return task.attributes["cached_task"]["digest"] -def load_image_by_name(image_name: str, tag: Optional[str] = None) -> Union[str, bool]: +def load_image_by_name(image_name: str, tag: Optional[str] = None) -> Optional[str]: """Load a docker image by its name. Finds the appropriate docker image task by name and loads it from @@ -77,7 +77,7 @@ def load_image_by_name(image_name: str, tag: Optional[str] = None) -> Union[str, uses the tag from the image artifact. Returns: - str or bool: The full image tag (name:tag) if successful, False if + str or None: The full image tag (name:tag) if successful, None if the image artifacts could not be found. """ from taskgraph.generator import load_tasks_for_kind # noqa: PLC0415 @@ -104,7 +104,7 @@ def load_image_by_name(image_name: str, tag: Optional[str] = None) -> Union[str, image_name=image_name, project=params["project"] ) ) - return False + return None return load_image_by_task_id(task_id, tag) From a67d335b9c04d5174bb9cf16aa28f55c9fa19840 Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Mon, 8 Sep 2025 11:47:08 -0400 Subject: [PATCH 3/3] feat(load-task): support using custom images This implements `taskgraph load-task --image `, where can be one of: * task-id= - Downloads image.tar from specified task id * image= - Downloads image.tar from specified index path * - Builds image that lives under `taskcluster/docker/` * - Uses specified image from a registry --- src/taskgraph/docker.py | 66 ++++++++++++++++++++++++++++++------- src/taskgraph/main.py | 27 +++++++++++++-- test/test_docker.py | 73 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 147 insertions(+), 19 deletions(-) diff --git a/src/taskgraph/docker.py b/src/taskgraph/docker.py index df2a9387a..45c3850d5 100644 --- a/src/taskgraph/docker.py +++ b/src/taskgraph/docker.py @@ -6,11 +6,13 @@ import os import shlex import subprocess +import sys import tarfile import tempfile from io import BytesIO +from pathlib import Path from textwrap import dedent -from typing import Dict, Generator, List, Mapping, Optional +from typing import Dict, Generator, List, Mapping, Optional, Union try: import zstandard as zstd @@ -354,7 +356,48 @@ def _index(l: List, s: str) -> Optional[int]: pass -def load_task(task_id: str, remove: bool = True, user: Optional[str] = None) -> int: +def _resolve_image(image: Union[str, Dict[str, str]], graph_config: GraphConfig) -> str: + image_task_id = None + + # Standard case, image comes from the task definition. + if isinstance(image, dict): + assert "type" in image + + if image["type"] == "task-image": + image_task_id = image["taskId"] + elif image["type"] == "indexed-image": + image_task_id = find_task_id(image["namespace"]) + else: + raise Exception(f"Tasks with {image['type']} images are not supported!") + else: + # Check if image refers to an in-tree image under taskcluster/docker, + # if so build it. + image_dir = docker.image_path(image, graph_config) + if Path(image_dir).is_dir(): + tag = f"taskcluster/{image}:latest" + build_image(image, tag, graph_config, os.environ) + return tag + + # Check if we're referencing a task or index. + if image.startswith("task-id="): + image_task_id = image.split("=", 1)[1] + elif image.startswith("index="): + index = image.split("=", 1)[1] + image_task_id = find_task_id(index) + else: + # Assume the string references an image from a registry. + return image + + return load_image_by_task_id(image_task_id) + + +def load_task( + graph_config: GraphConfig, + task_id: str, + remove: bool = True, + user: Optional[str] = None, + custom_image: Optional[str] = None, +) -> int: """Load and run a task interactively in a Docker container. Downloads the docker image from a task's definition and runs it in an @@ -363,9 +406,11 @@ def load_task(task_id: str, remove: bool = True, user: Optional[str] = None) -> the 'exec-task' function provided in the shell. Args: + graph_config: The graph configuration object. task_id: The ID of the task to load. remove: Whether to remove the container after exit (default True). user: The user to switch to in the container (default 'worker'). + custom_image: A custom image to use instead of the task's image. Returns: int: The exit code from the Docker container. @@ -387,6 +432,13 @@ def load_task(task_id: str, remove: bool = True, user: Optional[str] = None) -> print("Only tasks using `run-task` are supported!") return 1 + try: + image = custom_image or image + image_tag = _resolve_image(image, graph_config) + except Exception as e: + print(e, file=sys.stderr) + return 1 + # Remove the payload section of the task's command. This way run-task will # set up the task (clone repos, download fetches, etc) but won't actually # start the core of the task. Instead we'll drop the user into an interactive @@ -415,16 +467,6 @@ def load_task(task_id: str, remove: bool = True, user: Optional[str] = None) -> else: task_cwd = "$TASK_WORKDIR" - if image["type"] == "task-image": - image_task_id = image["taskId"] - elif image["type"] == "indexed-image": - image_task_id = find_task_id(image["namespace"]) - else: - print(f"Tasks with {image['type']} images are not supported!") - return 1 - - image_tag = load_image_by_task_id(image_task_id) - # Set some env vars the worker would normally set. env = { "RUN_ID": "0", diff --git a/src/taskgraph/main.py b/src/taskgraph/main.py index a305c6af4..10ec361c1 100644 --- a/src/taskgraph/main.py +++ b/src/taskgraph/main.py @@ -569,7 +569,7 @@ def show_taskgraph(options): "--root", "-r", default="taskcluster", - help="relative path for the root of the taskgraph definition", + help="Relative path to the root of the Taskgraph definition.", ) @argument( "-t", "--tag", help="tag that the image should be built as.", metavar="name:tag" @@ -683,11 +683,34 @@ def image_digest(args): help="Keep the docker container after exiting.", ) @argument("--user", default=None, help="Container user to start shell with.") +@argument( + "--image", + default=None, + help="Use a custom image instead of the task's image. Can be the name of " + "an image under `taskcluster/docker`, `task-id=`, or " + "`index=`.", +) +@argument( + "--root", + "-r", + default="taskcluster", + help="Relative path to the root of the Taskgraph definition.", +) def load_task(args): + from taskgraph.config import load_graph_config # noqa: PLC0415 from taskgraph.docker import load_task # noqa: PLC0415 validate_docker() - return load_task(args["task_id"], remove=args["remove"], user=args["user"]) + + root = args["root"] + graph_config = load_graph_config(root) + return load_task( + graph_config, + args["task_id"], + remove=args["remove"], + user=args["user"], + custom_image=args["image"], + ) @command("decision", help="Run the decision task") diff --git a/test/test_docker.py b/test/test_docker.py index a1bf4628c..4a7b8ca8f 100644 --- a/test/test_docker.py +++ b/test/test_docker.py @@ -113,11 +113,22 @@ def mock_run(*popenargs, check=False, **kwargs): def run_load_task(mocker): task_id = "abc" - def inner(task, remove=False): + def inner(task, remove=False, custom_image=None): proc = mocker.MagicMock() proc.returncode = 0 + graph_config = GraphConfig( + { + "trust-domain": "test-domain", + "docker-image-kind": "docker-image", + }, + "test/data/taskcluster", + ) + mocks = { + "build_image": mocker.patch.object( + docker, "build_image", return_value=None + ), "get_task_definition": mocker.patch.object( docker, "get_task_definition", return_value=task ), @@ -129,7 +140,9 @@ def inner(task, remove=False): ), } - ret = docker.load_task(task_id, remove=remove) + ret = docker.load_task( + graph_config, task_id, remove=remove, custom_image=custom_image + ) return ret, mocks return inner @@ -330,8 +343,58 @@ def test_load_task_with_unsupported_image_type(capsys, run_load_task): }, } - ret, mocks = run_load_task(task) + ret, _ = run_load_task(task) assert ret == 1 - out, _ = capsys.readouterr() - assert "Tasks with unsupported-type images are not supported!" in out + _, err = capsys.readouterr() + assert "Tasks with unsupported-type images are not supported!" in err + + +@pytest.fixture +def task(): + return { + "payload": { + "command": [ + "/usr/bin/run-task", + "--task-cwd=/builds/worker", + "--", + "echo", + "test", + ], + "image": {"type": "task-image", "taskId": "abc"}, + }, + } + + +def test_load_task_with_custom_image_in_tree(run_load_task, task): + image = "hello-world" + ret, mocks = run_load_task(task, custom_image=image) + assert ret == 0 + + mocks["build_image"].assert_called_once() + args = mocks["subprocess_run"].call_args[0][0] + tag = args[args.index("-it") + 1] + assert tag == f"taskcluster/{image}:latest" + + +def test_load_task_with_custom_image_task_id(run_load_task, task): + image = "task-id=abc" + ret, mocks = run_load_task(task, custom_image=image) + assert ret == 0 + mocks["load_image_by_task_id"].assert_called_once_with("abc") + + +def test_load_task_with_custom_image_index(mocker, run_load_task, task): + image = "index=abc" + mocker.patch.object(docker, "find_task_id", return_value="abc") + ret, mocks = run_load_task(task, custom_image=image) + assert ret == 0 + mocks["load_image_by_task_id"].assert_called_once_with("abc") + + +def test_load_task_with_custom_image_registry(mocker, run_load_task, task): + image = "ubuntu:latest" + ret, mocks = run_load_task(task, custom_image=image) + assert ret == 0 + assert not mocks["load_image_by_task_id"].called + assert not mocks["build_image"].called