diff --git a/docs/howto/load-task-locally.rst b/docs/howto/load-task-locally.rst index e73387901..066360779 100644 --- a/docs/howto/load-task-locally.rst +++ b/docs/howto/load-task-locally.rst @@ -105,4 +105,32 @@ by combining passing in a custom image locally, and piping a task definition via taskgraph morphed -J --tasks test-unit-py | jq -r 'to_entries | first | .value.task' | taskgraph load-task --image python - +Developing in the Container +--------------------------- + +The ``taskgraph load-task`` command also accepts a ``--develop`` flag. This mounts your +local repository as a volume in the task's expected checkout directory, allowing you to +add print debugging or test fixes directly in a local container. + +Run: + +.. code-block:: shell + + taskgraph load-task --develop + +.. warning:: + + Tasks can do all sorts of strange things. Running in this mode has the + potential to modify your local source checkout in unexpected ways, up to and + including data loss. Use with caution and consider using a dedicated Git + worktree when running with ``--develop``. + +.. warning:: + + Only tasks that use the ``run-task`` script to bootstrap their commands + are currently supported. + +Using ``--develop`` in conjunction with ``--interactive`` can be a powerful way +to iterate quickly in a task's environment. + .. _docker-worker payload format: https://docs.taskcluster.net/docs/reference/workers/docker-worker/payload diff --git a/src/taskgraph/docker.py b/src/taskgraph/docker.py index 33a01a7f1..fd16e274b 100644 --- a/src/taskgraph/docker.py +++ b/src/taskgraph/docker.py @@ -28,6 +28,7 @@ from taskgraph.generator import load_tasks_for_kind from taskgraph.transforms import docker_image from taskgraph.util import docker, json +from taskgraph.util.caches import CACHES from taskgraph.util.taskcluster import ( find_task_id, get_artifact_url, @@ -36,6 +37,7 @@ get_task_definition, status_task, ) +from taskgraph.util.vcs import get_repository logger = logging.getLogger(__name__) RUN_TASK_RE = re.compile(r"run-task(-(git|hg))?$") @@ -386,6 +388,27 @@ def _index(l: list, s: str) -> Optional[int]: pass +def _extract_arg(cmd: list[str], arg: str) -> Optional[str]: + if index := _index(cmd, arg): + return cmd[index + 1] + + for item in cmd: + if item.startswith(f"{arg}="): + return item.split("=", 1)[1] + + +def _delete_arg(cmd: list[str], arg: str) -> bool: + if index := _index(cmd, arg): + del cmd[index : index + 2] + return True + + for i, item in enumerate(cmd): + if item.startswith(f"{arg}="): + del cmd[i] + return True + return False + + def _resolve_image(image: Union[str, dict[str, str]], graph_config: GraphConfig) -> str: image_task_id = None @@ -432,6 +455,7 @@ def load_task( custom_image: Optional[str] = None, interactive: Optional[bool] = False, volumes: Optional[list[tuple[str, str]]] = None, + develop: bool = False, ) -> int: """Load and run a task interactively in a Docker container. @@ -449,6 +473,8 @@ def load_task( interactive: If True, execution of the task will be paused and user will be dropped into a shell. They can run `exec-task` to resume it (default: False). + develop: If True, the task will be configured to use the current + local checkout at the current revision (default: False). Returns: int: The exit code from the Docker container. @@ -476,6 +502,10 @@ def load_task( logger.error("Only tasks using `run-task` are supported with --interactive!") return 1 + if develop and not is_run_task: + logger.error("Only tasks using `run-task` are supported with --develop!") + return 1 + try: image = custom_image or image image_tag = _resolve_image(image, graph_config) @@ -484,6 +514,48 @@ def load_task( return 1 task_command = task_def["payload"].get("command") # type: ignore + task_env = task_def["payload"].get("env", {}) + + if develop: + repositories = json.loads(task_env.get("REPOSITORIES", "{}")) + if not repositories: + logger.error( + "Can't use --develop with task that doesn't define any $REPOSITORIES!" + ) + return 1 + + try: + repo = get_repository(os.getcwd()) + except RuntimeError: + logger.error("Can't use --develop from outside a source repository!") + return 1 + + checkout_name = list(repositories.keys())[0] + checkout_arg = f"--{checkout_name}-checkout" + checkout_dir = _extract_arg(task_command, checkout_arg) + if not checkout_dir: + logger.error( + f"Can't use --develop with task that doesn't use {checkout_arg}" + ) + return 1 + volumes = volumes or [] + volumes.append((repo.path, checkout_dir)) + + # Delete cache environment variables for cache directories that aren't mounted. + # This prevents tools from trying to write to inaccessible cache paths. + mount_paths = {v[1] for v in volumes} + for cache in CACHES.values(): + var = cache.get("env") + if var in task_env and task_env[var] not in mount_paths: + del task_env[var] + + # Delete environment and arguments related to this repo so that + # `run-task` doesn't attempt to fetch or checkout a new revision. + del repositories[checkout_name] + task_env["REPOSITORIES"] = json.dumps(repositories) + for arg in ("checkout", "sparse-profile", "shallow-clone"): + _delete_arg(task_command, f"--{checkout_name}-{arg}") + exec_command = task_cwd = None if interactive: # Remove the payload section of the task's command. This way run-task will @@ -503,16 +575,7 @@ def load_task( ] # Parse `--task-cwd` so we know where to execute the task's command later. - if index := _index(task_command, "--task-cwd"): - task_cwd = task_command[index + 1] - else: - for arg in task_command: - if arg.startswith("--task-cwd="): - task_cwd = arg.split("=", 1)[1] - break - else: - task_cwd = "$TASK_WORKDIR" - + task_cwd = _extract_arg(task_command, "--task-cwd") or "$TASK_WORKDIR" task_command = [ "bash", "-c", @@ -527,7 +590,7 @@ def load_task( "TASKCLUSTER_ROOT_URL": get_root_url(), } # Add the task's environment variables. - env.update(task_def["payload"].get("env", {})) # type: ignore + env.update(task_env) # type: ignore # run-task expects the worker to mount a volume for each path defined in # TASKCLUSTER_CACHES; delete them to avoid needing to do the same, unless @@ -545,6 +608,11 @@ def load_task( else: del env["TASKCLUSTER_CACHES"] + # run-task expects volumes listed under `TASKCLUSTER_VOLUMES` to be empty. + # This can interfere with load-task when using custom volumes. + if volumes and "TASKCLUSTER_VOLUMES" in env: + del env["TASKCLUSTER_VOLUMES"] + envfile = None initfile = None isatty = os.isatty(sys.stdin.fileno()) diff --git a/src/taskgraph/main.py b/src/taskgraph/main.py index c09413c22..18a3633db 100644 --- a/src/taskgraph/main.py +++ b/src/taskgraph/main.py @@ -767,10 +767,19 @@ def image_digest(args): action="store_true", default=False, help="Setup the task but pause execution before executing its command. " - "Repositories will be cloned, environment variables will be set and an" + "Repositories will be cloned, environment variables will be set and an " "executable script named `exec-task` will be provided to resume task " "execution. Only supported for `run-task` based tasks.", ) +@argument( + "--develop", + "--use-local-checkout", + dest="develop", + action="store_true", + default=False, + help="Configure the task to use the local source checkout at the current " + "revision instead of cloning and using the revision from CI.", +) @argument( "--keep", dest="remove", @@ -805,6 +814,29 @@ def load_task(args): from taskgraph.docker import load_task # noqa: PLC0415 from taskgraph.util import json # noqa: PLC0415 + no_warn = "TASKGRAPH_LOAD_TASK_NO_WARN" + if args["develop"] and not os.environ.get(no_warn): + print( + dedent( + f""" + warning: Using --develop can cause data loss. + + Running `taskgraph load-task --develop` means the task will operate + on your actual source repository. Make sure you verify the task + doesn't perform any destructive operations against your repository. + + Set {no_warn}=1 to disable this warning. + """ + ).lstrip() + ) + while True: + proceed = input("Proceed? [y/N]: ").lower().strip() + if proceed == "y": + break + if not proceed or proceed == "n": + return 1 + print(f"invalid option: {proceed}") + validate_docker() if args["task"] == "-": @@ -837,6 +869,7 @@ def load_task(args): user=args["user"], custom_image=args["image"], volumes=volumes, + develop=args["develop"], ) diff --git a/src/taskgraph/util/vcs.py b/src/taskgraph/util/vcs.py index f189af544..efed2c5bb 100644 --- a/src/taskgraph/util/vcs.py +++ b/src/taskgraph/util/vcs.py @@ -582,7 +582,7 @@ def does_revision_exist_locally(self, revision): raise -def get_repository(path): +def get_repository(path: str): """Get a repository object for the repository at `path`. If `path` is not a known VCS repository, raise an exception. """ diff --git a/test/test_docker.py b/test/test_docker.py index e183d3f35..2e4003c45 100644 --- a/test/test_docker.py +++ b/test/test_docker.py @@ -8,6 +8,7 @@ from taskgraph import docker from taskgraph.config import GraphConfig from taskgraph.transforms.docker_image import IMAGE_BUILDER_IMAGE +from taskgraph.util.vcs import get_repository @pytest.fixture @@ -44,11 +45,8 @@ def side_effect(topsrcdir, context_dir, out_file, image_name=None, args=None): def run_load_task(mocker): def inner( task, - remove=False, - custom_image=None, pass_task_def=False, - interactive=True, - volumes=None, + **kwargs, ): proc = mocker.MagicMock() proc.returncode = 0 @@ -90,14 +88,7 @@ def inner( # Testing with task definition directly input_arg = task - ret = docker.load_task( - graph_config, - input_arg, - remove=remove, - custom_image=custom_image, - interactive=interactive, - volumes=volumes, - ) + ret = docker.load_task(graph_config, input_arg, **kwargs) return ret, mocks return inner @@ -141,7 +132,7 @@ def test_load_task(run_load_task): ("/host/path", "/container/path"), ("/another/host", "/another/container"), ] - ret, mocks = run_load_task(task, volumes=volumes) + ret, mocks = run_load_task(task, remove=False, interactive=True, volumes=volumes) assert ret == 0 if "get_task_definition" in mocks: @@ -223,7 +214,9 @@ def test_load_task_env_init_and_remove(mocker, run_load_task): "image": {"taskId": image_task_id, "type": "task-image"}, }, } - ret, mocks = run_load_task(task, remove=True, volumes=[("/host/path", "/cache")]) + ret, mocks = run_load_task( + task, interactive=True, volumes=[("/host/path", "/cache")] + ) assert ret == 0 # NamedTemporaryFile was called twice (once for env, once for init) @@ -410,6 +403,7 @@ def test_load_task_with_interactive_false(run_load_task): "run", "-i", "-t", + "--rm", "image/tag", "echo", "hello world", @@ -482,6 +476,57 @@ def test_load_task_with_custom_image_registry(mocker, run_load_task, task): assert not mocks["build_image"].called +def test_load_task_with_develop(mocker, run_load_task, task): + repo_name = "foo" + repo_path = "/workdir/vcs" + repo = get_repository(os.getcwd()) + + # No REPOSITORIES env + ret, _ = run_load_task(task, develop=True) + assert ret == 1 + + # No --checkout flag + task["payload"]["env"] = { + "REPOSITORIES": f'{{"{repo_name}": "{repo_path}"}}', + "CARGO_HOME": "/cache/cargo", + "PIP_CACHE_DIR": "/unmounted/pip", + "UV_CACHE_DIR": "/unmounted/uv", + } + ret, mocks = run_load_task(task, develop=True) + assert ret == 1 + + env_file = None + task["payload"]["command"].insert(1, f"--{repo_name}-checkout={repo_path}") + m = mocker.patch("os.remove") + try: + ret, mocks = run_load_task( + task, develop=True, volumes=[("/host/cache", "/cache/cargo")] + ) + assert ret == 0 + cmd = mocks["subprocess_run"].call_args[0][0] + cmdstr = " ".join(cmd) + assert f"-v {repo.path}:{repo_path}" in cmdstr + assert "-v /host/cache:/cache/cargo" in cmdstr + assert f"--{repo_name}-checkout" not in cmdstr + + env_file = docker._extract_arg(cmd, "--env-file") + assert env_file + with open(env_file) as fh: + contents = fh.read() + + assert "TASKCLUSTER_VOLUMES" not in contents + assert "REPOSITORIES" in contents + assert "foo" not in contents + # Verify cache env vars: mounted cache should be kept, unmounted should be removed + assert "CARGO_HOME=/cache/cargo" in contents + assert "PIP_CACHE_DIR" not in contents + assert "UV_CACHE_DIR" not in contents + finally: + if env_file: + m.reset_mock() + os.remove(env_file) + + @pytest.fixture def run_build_image(mocker): def inner(image_name, save_image=None, context_file=None, image_task=None): diff --git a/test/test_main.py b/test/test_main.py index 9efd84690..7a2c9100c 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -418,6 +418,8 @@ def fake_actions_load(_graph_config): @pytest.fixture def run_load_task(mocker, monkeypatch, run_taskgraph): + monkeypatch.setenv("TASKGRAPH_LOAD_TASK_NO_WARN", "1") + def inner(args, stdin_data=None): # Mock the docker module functions m_validate_docker = mocker.patch("taskgraph.main.validate_docker") @@ -467,6 +469,7 @@ def test_load_task_command(run_load_task): user=None, custom_image=None, volumes=[], + develop=False, ) # Test with interactive flag @@ -481,6 +484,22 @@ def test_load_task_command(run_load_task): user=None, custom_image=None, volumes=[], + develop=False, + ) + + # Test with develop flag + result, mocks = run_load_task(["load-task", "--develop", "task-id-456"]) + + assert result == 0 + mocks["docker_load_task"].assert_called_once_with( + mocks["graph_config"], + "task-id-456", + interactive=False, + remove=True, + user=None, + custom_image=None, + volumes=[], + develop=True, ) @@ -501,6 +520,7 @@ def test_load_task_command_with_volume(run_load_task): user=None, custom_image=None, volumes=[("/host/path", "/builds/worker/checkouts")], + develop=False, ) # Test with no colon @@ -542,6 +562,7 @@ def test_load_task_command_with_stdin(run_load_task): user=None, custom_image=None, volumes=[], + develop=False, ) @@ -560,9 +581,33 @@ def test_load_task_command_with_task_id(run_load_task): user=None, custom_image=None, volumes=[], + develop=False, ) +def test_load_task_develop_warning(monkeypatch, run_load_task, mocker): + monkeypatch.delenv("TASKGRAPH_LOAD_TASK_NO_WARN", raising=False) + input_prompt = "Proceed? [y/N]: " + + mock_input = mocker.patch("builtins.input", return_value="y") + result, mocks = run_load_task(["load-task", "--develop", "task-id-456"]) + assert result == 0 + mock_input.assert_called_once_with(input_prompt) + mocks["docker_load_task"].assert_called_once() + + mock_input = mocker.patch("builtins.input", return_value="n") + result, mocks = run_load_task(["load-task", "--develop", "task-id-789"]) + assert result == 1 + mock_input.assert_called_once_with(input_prompt) + mocks["docker_load_task"].assert_not_called() + + mock_input = mocker.patch("builtins.input", return_value="") + result, mocks = run_load_task(["load-task", "--develop", "task-id-000"]) + assert result == 1 + mock_input.assert_called_once_with(input_prompt) + mocks["docker_load_task"].assert_not_called() + + def test_format_kind_graph_mermaid(): """Test conversion of kind graph to Mermaid format""" # Test with simple graph