Skip to content

Commit d609819

Browse files
committed
feat(run-task): implement shallow git clones
1 parent cf83951 commit d609819

File tree

2 files changed

+274
-25
lines changed

2 files changed

+274
-25
lines changed

src/taskgraph/run-task/run-task

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,7 @@ def git_fetch(
550550
ref: str,
551551
remote: str = "origin",
552552
tags: bool = False,
553+
shallow: bool = False,
553554
env: Optional[Dict[str, str]] = None,
554555
):
555556
args = ["git", "fetch"]
@@ -558,6 +559,36 @@ def git_fetch(
558559
args.extend(["--tags", "--force"])
559560

560561
args.extend([remote, ref])
562+
563+
if shallow:
564+
# If we have a full sha, we can fetch it directly
565+
if re.match(r"^[a-f0-9]{40}$", ref):
566+
fetch_args = args[:2] + ["--depth=1"] + args[2:]
567+
sha_args = ["git", "fetch", "--depth=1", remote, ref]
568+
ret = run_command(b"vcs", sha_args, cwd=destination_path, extra_env=env)
569+
if ret == 0:
570+
return
571+
572+
# Otherwise we need to incrementally deepen the repo until we detect
573+
# the ref.
574+
for deepen in range(10, 100, 10):
575+
fetch_args = args[:2] + [f"--deepen={deepen}"] + args[2:]
576+
run_command(b"vcs", fetch_args, cwd=destination_path, extra_env=env)
577+
578+
# Check if the target ref exists, if not deepen further.
579+
ret = run_command(
580+
b"vcs",
581+
["git", "cat-file", "-e", "FETCH_HEAD"],
582+
cwd=destination_path,
583+
extra_env=env,
584+
)
585+
if ret == 0:
586+
return
587+
588+
print(f"unable to fetch {ref} from {remote} in shallow clone")
589+
sys.exit(1)
590+
591+
# Non-shallow repo
561592
retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
562593

563594

@@ -600,6 +631,19 @@ def _clean_git_checkout(destination_path):
600631
print_line(b"vcs", b"successfully cleaned git checkout!\n")
601632

602633

634+
def shortref(ref: str) -> str:
635+
"""Normalize a git ref to its short form.
636+
637+
Returns the ref unchanged if it's already in short form.
638+
"""
639+
# Strip common ref prefixes
640+
for prefix in ("refs/heads/", "refs/tags/"):
641+
if ref.startswith(prefix):
642+
return ref[len(prefix) :]
643+
644+
return ref
645+
646+
603647
def git_checkout(
604648
destination_path: str,
605649
head_repo: str,
@@ -609,6 +653,7 @@ def git_checkout(
609653
commit: Optional[str],
610654
ssh_key_file: Optional[Path],
611655
ssh_known_hosts_file: Optional[Path],
656+
shallow: bool = False,
612657
):
613658
env = {
614659
# abort if transfer speed is lower than 1kB/s for 1 minute
@@ -652,16 +697,27 @@ def git_checkout(
652697
args = [
653698
"git",
654699
"clone",
655-
base_repo if base_repo else head_repo,
656-
destination_path,
657700
]
658701

702+
if shallow:
703+
# Use shallow clone with depth 1 for minimal history
704+
args.extend(["--depth=1"])
705+
# Skip checkout initially
706+
args.extend(["--no-checkout"])
707+
708+
args.extend(
709+
[
710+
base_repo if base_repo else head_repo,
711+
destination_path,
712+
]
713+
)
714+
659715
retry_required_command(b"vcs", args, extra_env=env)
660716

661717
# First fetch the base_rev. This allows Taskgraph to compute the files
662718
# changed by the push.
663719
if base_rev and base_rev != NULL_REVISION:
664-
git_fetch(destination_path, base_rev, env=env)
720+
git_fetch(destination_path, base_rev, shallow=shallow, env=env)
665721

666722
# Next fetch the head ref.
667723

@@ -676,16 +732,27 @@ def git_checkout(
676732

677733
# If a ref isn't provided, we fetch all refs from head_repo, which may be slow.
678734
target = ref if ref else "+refs/heads/*:refs/remotes/work/*"
679-
git_fetch(destination_path, target, remote=head_repo, tags=tags, env=env)
735+
git_fetch(
736+
destination_path,
737+
target,
738+
remote=head_repo,
739+
tags=tags,
740+
shallow=shallow,
741+
env=env,
742+
)
743+
744+
# If we have a shallow clone and specific commit, we need to fetch it too.
745+
if shallow and commit and commit != ref:
746+
git_fetch(destination_path, commit, remote=head_repo, shallow=shallow, env=env)
680747

681748
args = [
682749
"git",
683750
"checkout",
684751
"-f",
685752
]
686753

687-
if ref:
688-
args.extend(["-B", ref])
754+
if ref and ref != commit:
755+
args.extend(["-B", shortref(ref)])
689756

690757
# `git fetch` set `FETCH_HEAD` reference to the last commit of the desired branch
691758
args.append(commit if commit else "FETCH_HEAD")
@@ -862,11 +929,17 @@ def add_vcs_arguments(parser, project, name):
862929
f"--{project}-sparse-profile",
863930
help=f"Path to sparse profile for {name} checkout",
864931
)
932+
parser.add_argument(
933+
f"--{project}-shallow-clone",
934+
action="store_true",
935+
help=f"Use shallow clone for {name}",
936+
)
865937

866938

867939
def collect_vcs_options(args, project, name):
868940
checkout = getattr(args, f"{project}_checkout")
869941
sparse_profile = getattr(args, f"{project}_sparse_profile")
942+
shallow_clone = getattr(args, f"{project}_shallow_clone")
870943

871944
env_prefix = project.upper()
872945

@@ -911,6 +984,7 @@ def collect_vcs_options(args, project, name):
911984
"repo-type": repo_type,
912985
"ssh-secret-name": private_key_secret,
913986
"pip-requirements": pip_requirements,
987+
"shallow-clone": shallow_clone,
914988
}
915989

916990

@@ -958,6 +1032,7 @@ def vcs_checkout_from_args(options):
9581032
revision,
9591033
ssh_key_file,
9601034
ssh_known_hosts_file,
1035+
shallow=options.get("shallow-clone", False),
9611036
)
9621037
elif options["repo-type"] == "hg":
9631038
if not revision and not ref:

0 commit comments

Comments
 (0)