Skip to content

Commit d87ef5c

Browse files
committed
feat(run-task): implement shallow git clones
1 parent 272b9ad commit d87ef5c

File tree

2 files changed

+180
-13
lines changed

2 files changed

+180
-13
lines changed

src/taskgraph/run-task/run-task

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -571,13 +571,17 @@ def git_fetch(
571571
ref: str,
572572
remote: str = "origin",
573573
tags: bool = False,
574+
shallow: bool = False,
574575
env: Optional[Dict[str, str]] = None,
575576
):
576577
args = ["git", "fetch"]
577578
if tags:
578579
# `--force` is needed to be able to update an existing outdated tag.
579580
args.extend(["--tags", "--force"])
580581

582+
if shallow:
583+
args.append("--depth=1")
584+
581585
args.extend([remote, ref])
582586
retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
583587

@@ -621,6 +625,19 @@ def _clean_git_checkout(destination_path):
621625
print_line(b"vcs", b"successfully cleaned git checkout!\n")
622626

623627

628+
def shortref(ref: str) -> str:
629+
"""Normalize a git ref to its short form.
630+
631+
Returns the ref unchanged if it's already in short form.
632+
"""
633+
# Strip common ref prefixes
634+
for prefix in ("refs/heads/", "refs/tags/"):
635+
if ref.startswith(prefix):
636+
return ref[len(prefix) :]
637+
638+
return ref
639+
640+
624641
def git_checkout(
625642
destination_path: str,
626643
head_repo: str,
@@ -630,6 +647,7 @@ def git_checkout(
630647
head_rev: Optional[str],
631648
ssh_key_file: Optional[Path],
632649
ssh_known_hosts_file: Optional[Path],
650+
shallow: bool = False,
633651
):
634652
env = {
635653
# abort if transfer speed is lower than 1kB/s for 1 minute
@@ -673,10 +691,18 @@ def git_checkout(
673691
args = [
674692
"git",
675693
"clone",
676-
base_repo if base_repo else head_repo,
677-
destination_path,
678694
]
679695

696+
if shallow:
697+
args.extend(["--depth=1", "--no-checkout"])
698+
699+
args.extend(
700+
[
701+
base_repo if base_repo else head_repo,
702+
destination_path,
703+
]
704+
)
705+
680706
retry_required_command(b"vcs", args, extra_env=env)
681707

682708
# For Github based repos, base_rev often doesn't refer to an ancestor of
@@ -685,7 +711,7 @@ def git_checkout(
685711
# that consumers can compute the merge-base or files modified between the
686712
# two as needed.
687713
if base_rev and base_rev != NULL_REVISION:
688-
git_fetch(destination_path, base_rev, env=env)
714+
git_fetch(destination_path, base_rev, shallow=shallow, env=env)
689715

690716
# If a head_ref was provided, it might be tag, so we need to make sure we fetch
691717
# those. This is explicitly only done when base and head repo match,
@@ -698,16 +724,29 @@ def git_checkout(
698724

699725
# If a head_ref isn't provided, we fetch all refs from head_repo, which may be slow.
700726
target = head_ref if head_ref else "+refs/heads/*:refs/remotes/work/*"
701-
git_fetch(destination_path, target, remote=head_repo, tags=tags, env=env)
727+
git_fetch(
728+
destination_path,
729+
target,
730+
remote=head_repo,
731+
tags=tags,
732+
shallow=shallow,
733+
env=env,
734+
)
735+
736+
# If we have a shallow clone and specific commit, we need to fetch it too.
737+
if shallow and head_rev and head_rev != head_ref:
738+
git_fetch(
739+
destination_path, head_rev, remote=head_repo, shallow=shallow, env=env
740+
)
702741

703742
args = [
704743
"git",
705744
"checkout",
706745
"-f",
707746
]
708747

709-
if head_ref:
710-
args.extend(["-B", head_ref])
748+
if head_ref and head_ref != head_rev:
749+
args.extend(["-B", shortref(head_ref)])
711750

712751
# `git fetch` set `FETCH_HEAD` reference to the last commit of the desired branch
713752
args.append(head_rev if head_rev else "FETCH_HEAD")
@@ -884,11 +923,17 @@ def add_vcs_arguments(parser, project, name):
884923
f"--{project}-sparse-profile",
885924
help=f"Path to sparse profile for {name} checkout",
886925
)
926+
parser.add_argument(
927+
f"--{project}-shallow-clone",
928+
action="store_true",
929+
help=f"Use shallow clone for {name}",
930+
)
887931

888932

889933
def collect_vcs_options(args, project, name):
890934
checkout = getattr(args, f"{project}_checkout")
891935
sparse_profile = getattr(args, f"{project}_sparse_profile")
936+
shallow_clone = getattr(args, f"{project}_shallow_clone")
892937

893938
env_prefix = project.upper()
894939

@@ -933,6 +978,7 @@ def collect_vcs_options(args, project, name):
933978
"repo-type": repo_type,
934979
"ssh-secret-name": private_key_secret,
935980
"pip-requirements": pip_requirements,
981+
"shallow-clone": shallow_clone,
936982
}
937983

938984

@@ -981,6 +1027,7 @@ def vcs_checkout_from_args(options):
9811027
head_rev,
9821028
ssh_key_file,
9831029
ssh_known_hosts_file,
1030+
shallow=options.get("shallow-clone", False),
9841031
)
9851032
elif options["repo-type"] == "hg":
9861033
revision = hg_checkout(

test/test_scripts_run_task.py

Lines changed: 127 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,10 @@ def test_install_pip_requirements_with_uv(
151151

152152

153153
@pytest.mark.parametrize(
154-
"env,extra_expected",
154+
"args,env,extra_expected",
155155
[
156156
pytest.param(
157+
{},
157158
{
158159
"REPOSITORY_TYPE": "hg",
159160
"BASE_REPOSITORY": "https://hg.mozilla.org/mozilla-central",
@@ -164,20 +165,38 @@ def test_install_pip_requirements_with_uv(
164165
{
165166
"base-repo": "https://hg.mozilla.org/mozilla-unified",
166167
},
167-
)
168+
id="hg",
169+
),
170+
pytest.param(
171+
{"myrepo_shallow_clone": True},
172+
{
173+
"REPOSITORY_TYPE": "git",
174+
"HEAD_REPOSITORY": "https://github.com/test/repo.git",
175+
"HEAD_REV": "abc123",
176+
},
177+
{"shallow-clone": True},
178+
id="git_with_shallow_clone",
179+
),
168180
],
169181
)
170-
def test_collect_vcs_options(monkeypatch, run_task_mod, env, extra_expected):
182+
def test_collect_vcs_options(
183+
monkeypatch,
184+
run_task_mod,
185+
args,
186+
env,
187+
extra_expected,
188+
):
171189
name = "myrepo"
172190
checkout = "checkout"
173191

174192
monkeypatch.setattr(os, "environ", {})
175193
for k, v in env.items():
176194
monkeypatch.setenv(f"{name.upper()}_{k.upper()}", v)
177195

178-
args = Namespace()
179-
setattr(args, f"{name}_checkout", checkout)
180-
setattr(args, f"{name}_sparse_profile", False)
196+
args.setdefault(f"{name}_checkout", checkout)
197+
args.setdefault(f"{name}_shallow_clone", False)
198+
args.setdefault(f"{name}_sparse_profile", False)
199+
args = Namespace(**args)
181200

182201
result = run_task_mod.collect_vcs_options(args, name, name)
183202

@@ -193,6 +212,7 @@ def test_collect_vcs_options(monkeypatch, run_task_mod, env, extra_expected):
193212
"head-ref": env.get("HEAD_REF"),
194213
"head-rev": env.get("HEAD_REV"),
195214
"repo-type": env.get("REPOSITORY_TYPE"),
215+
"shallow-clone": False,
196216
"ssh-secret-name": env.get("SSH_SECRET_NAME"),
197217
"sparse-profile": False,
198218
"store-path": env.get("HG_STORE_PATH"),
@@ -333,7 +353,9 @@ def mock_git_repo():
333353
)
334354

335355
def _commit_file(message, filename):
336-
with open(os.path.join(repo, filename), "w") as fout:
356+
filepath = os.path.join(repo, filename)
357+
os.makedirs(os.path.dirname(filepath), exist_ok=True)
358+
with open(filepath, "w") as fout:
337359
fout.write("test file content")
338360
subprocess.check_call(["git", "add", filename], cwd=repo_path)
339361
subprocess.check_call(["git", "commit", "-m", message], cwd=repo_path)
@@ -427,6 +449,104 @@ def test_git_checkout_with_commit(
427449
assert current_rev == mock_git_repo["branch"]
428450

429451

452+
def test_git_checkout_shallow(
453+
mock_stdin,
454+
run_task_mod,
455+
mock_git_repo,
456+
tmp_path,
457+
):
458+
destination = tmp_path / "destination"
459+
460+
# Git ignores `--depth` when cloning from local directories, so use file://
461+
# protocol to force shallow clone.
462+
repo_url = f"file://{mock_git_repo['path']}"
463+
base_rev = mock_git_repo["main"]
464+
head_rev = mock_git_repo["branch"]
465+
466+
# Use shallow clone with head_ref != head_rev
467+
run_task_mod.git_checkout(
468+
destination_path=str(destination),
469+
head_repo=repo_url,
470+
base_repo=repo_url,
471+
base_rev=base_rev,
472+
head_ref="mybranch",
473+
head_rev=head_rev,
474+
ssh_key_file=None,
475+
ssh_known_hosts_file=None,
476+
shallow=True,
477+
)
478+
shallow_file = destination / ".git" / "shallow"
479+
assert shallow_file.exists()
480+
481+
# Verify we're on the correct commit
482+
final_rev = subprocess.check_output(
483+
["git", "rev-parse", "HEAD"],
484+
cwd=str(destination),
485+
universal_newlines=True,
486+
).strip()
487+
assert final_rev == mock_git_repo["branch"]
488+
489+
# Verify both base_rev and head_rev are available.
490+
for sha in [mock_git_repo["main"], mock_git_repo["branch"]]:
491+
result = subprocess.run(
492+
["git", "cat-file", "-t", sha],
493+
cwd=str(destination),
494+
capture_output=True,
495+
text=True,
496+
)
497+
assert result.returncode == 0, f"Commit {sha} should be available"
498+
assert result.stdout.strip() == "commit"
499+
500+
501+
def test_git_fetch_shallow(
502+
mock_stdin,
503+
run_task_mod,
504+
mock_git_repo,
505+
tmp_path,
506+
):
507+
destination = tmp_path / "destination"
508+
509+
# Git ignores `--depth` when cloning from local directories, so use file://
510+
# protocol to force shallow clone.
511+
repo_url = f"file://{mock_git_repo['path']}"
512+
513+
run_task_mod.run_command(
514+
b"vcs",
515+
[
516+
"git",
517+
"clone",
518+
"--depth=1",
519+
"--no-checkout",
520+
repo_url,
521+
str(destination),
522+
],
523+
)
524+
shallow_file = destination / ".git" / "shallow"
525+
assert shallow_file.exists()
526+
527+
# Verify base_rev doesn't exist yet
528+
base_rev = mock_git_repo["branch"]
529+
result = subprocess.run(
530+
["git", "cat-file", "-t", base_rev],
531+
cwd=str(destination),
532+
capture_output=True,
533+
text=True,
534+
)
535+
assert result.returncode != 0
536+
537+
run_task_mod.git_fetch(str(destination), base_rev, remote=repo_url, shallow=True)
538+
539+
# Verify base_rev is now available
540+
result = subprocess.run(
541+
["git", "cat-file", "-t", base_rev],
542+
cwd=str(destination),
543+
capture_output=True,
544+
text=True,
545+
)
546+
assert result.returncode == 0
547+
assert result.stdout.strip() == "commit"
548+
549+
430550
def test_display_python_version_should_output_python_versions_title(
431551
run_task_mod, capsys
432552
):

0 commit comments

Comments
 (0)