Skip to content
Merged
180 changes: 104 additions & 76 deletions src/taskgraph/run-task/run-task
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,26 @@ def configure_volume_posix(volume, user, group, running_as_root):
set_dir_permissions(volume, user.pw_uid, group.gr_gid)


def git_fetch(
destination_path: str,
*targets: str,
remote: str = "origin",
tags: bool = False,
shallow: bool = False,
env: Optional[dict[str, str]] = None,
):
args = ["git", "fetch"]
if tags:
# `--force` is needed to be able to update an existing outdated tag.
args.extend(["--tags", "--force"])

if shallow:
args.append("--depth=1")

args.extend([remote, *set(targets)])
retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)


def _clean_git_checkout(destination_path):
# Delete untracked files (i.e. build products)
print_line(b"vcs", b"cleaning git checkout...\n")
Expand Down Expand Up @@ -605,17 +625,32 @@ def _clean_git_checkout(destination_path):
print_line(b"vcs", b"successfully cleaned git checkout!\n")


def shortref(ref: str) -> str:
"""Normalize a git ref to its short form.

Returns the ref unchanged if it's already in short form.
"""
# Strip common ref prefixes
for prefix in ("refs/heads/", "refs/tags/"):
if ref.startswith(prefix):
return ref[len(prefix) :]

return ref


def git_checkout(
destination_path: str,
head_repo: str,
base_repo: Optional[str],
base_ref: Optional[str],
base_rev: Optional[str],
ref: Optional[str],
commit: Optional[str],
head_ref: Optional[str],
head_rev: Optional[str],
ssh_key_file: Optional[Path],
ssh_known_hosts_file: Optional[Path],
shallow: bool = False,
):
assert head_ref or head_rev

env = {
# abort if transfer speed is lower than 1kB/s for 1 minute
"GIT_HTTP_LOW_SPEED_LIMIT": "1024",
Expand Down Expand Up @@ -658,74 +693,66 @@ def git_checkout(
args = [
"git",
"clone",
base_repo if base_repo else head_repo,
destination_path,
]

retry_required_command(b"vcs", args, extra_env=env)

if base_ref:
args = ["git", "fetch", "origin", base_ref]

retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
if shallow:
args.extend(["--depth=1", "--no-checkout"])

# Create local branch so that taskgraph is able to compute differences
# between the head branch and the base one, if needed
args = ["git", "checkout", base_ref]
args.extend(
[
base_repo if base_repo else head_repo,
destination_path,
]
)

retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
retry_required_command(b"vcs", args, extra_env=env)

# When commits are force-pushed (like on a testing branch), base_rev doesn't
# exist on base_ref. Fetching it allows taskgraph to compute differences
# between the previous state before the force-push and the current state.
#
# Unlike base_ref just above, there is no need to checkout the revision:
# it's immediately available after the fetch.
# For Github based repos, base_rev often doesn't refer to an ancestor of
# head_rev simply due to Github not providing that information in their
# webhook events. Therefore we fetch it independently from `head_rev` so
# that consumers can compute the merge-base or files modified between the
# two as needed.
if base_rev and base_rev != NULL_REVISION:
args = ["git", "fetch", "origin", base_rev]
git_fetch(destination_path, base_rev, shallow=shallow, env=env)

retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)

# If a ref was provided, it might be tag, so we need to make sure we fetch
# If a head_ref was provided, it might be tag, so we need to make sure we fetch
# those. This is explicitly only done when base and head repo match,
# because it is the only scenario where tags could be present. (PRs, for
# example, always include an explicit rev.) Failure to do this could result
# in not having a tag, or worse: having an outdated version of one.
# `--force` is needed to be able to update an existing tag.
if ref and base_repo == head_repo:
args = [
"git",
"fetch",
"--tags",
"--force",
base_repo,
ref,
]

retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)

# If a ref isn't provided, we fetch all refs from head_repo, which may be slow
args = [
"git",
"fetch",
"--no-tags",
head_repo,
ref if ref else "+refs/heads/*:refs/remotes/work/*",
]

retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
tags = False
if head_ref and not head_ref.startswith("refs/heads/") and base_repo == head_repo:
tags = True

# Fetch head_ref and/or head_rev
targets = []
if head_ref:
targets.append(head_ref)
if not head_ref or (shallow and head_rev):
# If head_ref wasn't provided, we fallback to head_rev. If we have a
# shallow clone, head_rev needs to be fetched independently regardless.
targets.append(head_rev)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we assert somewhere that if shallow is True then we have a head_rev?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, good point. I guess a head_rev isn't necessary for shallow clones either though.. I'll fix this up.


git_fetch(
destination_path,
*targets,
remote=head_repo,
tags=tags,
shallow=shallow,
env=env,
)

args = [
"git",
"checkout",
"-f",
]

if ref:
args.extend(["-B", ref])
if head_ref:
args.extend(["-B", shortref(head_ref)])

# `git fetch` set `FETCH_HEAD` reference to the last commit of the desired branch
args.append(commit if commit else "FETCH_HEAD")
args.append(head_rev if head_rev else "FETCH_HEAD")

run_required_command(b"vcs", args, cwd=destination_path)

Expand Down Expand Up @@ -899,21 +926,26 @@ def add_vcs_arguments(parser, project, name):
f"--{project}-sparse-profile",
help=f"Path to sparse profile for {name} checkout",
)
parser.add_argument(
f"--{project}-shallow-clone",
action="store_true",
help=f"Use shallow clone for {name}",
)


def collect_vcs_options(args, project, name):
checkout = getattr(args, f"{project}_checkout")
sparse_profile = getattr(args, f"{project}_sparse_profile")
shallow_clone = getattr(args, f"{project}_shallow_clone")

env_prefix = project.upper()

repo_type = os.environ.get(f"{env_prefix}_REPOSITORY_TYPE")
base_repo = os.environ.get(f"{env_prefix}_BASE_REPOSITORY")
base_ref = os.environ.get(f"{env_prefix}_BASE_REF")
base_rev = os.environ.get(f"{env_prefix}_BASE_REV")
head_repo = os.environ.get(f"{env_prefix}_HEAD_REPOSITORY")
revision = os.environ.get(f"{env_prefix}_HEAD_REV")
ref = os.environ.get(f"{env_prefix}_HEAD_REF")
head_ref = os.environ.get(f"{env_prefix}_HEAD_REF")
head_rev = os.environ.get(f"{env_prefix}_HEAD_REV")
pip_requirements = os.environ.get(f"{env_prefix}_PIP_REQUIREMENTS")
private_key_secret = os.environ.get(f"{env_prefix}_SSH_SECRET_NAME")

Expand Down Expand Up @@ -942,26 +974,26 @@ def collect_vcs_options(args, project, name):
"checkout": checkout,
"sparse-profile": sparse_profile,
"base-repo": base_repo,
"base-ref": base_ref,
"base-rev": base_rev,
"head-repo": head_repo,
"revision": revision,
"ref": ref,
"head-ref": head_ref,
"head-rev": head_rev,
"repo-type": repo_type,
"ssh-secret-name": private_key_secret,
"pip-requirements": pip_requirements,
"shallow-clone": shallow_clone,
}


def vcs_checkout_from_args(options):
if not options["checkout"]:
if options["ref"] and not options["revision"]:
if options["head-ref"] and not options["head-rev"]:
print("task should be defined in terms of non-symbolic revision")
sys.exit(1)
return

revision = options["revision"]
ref = options["ref"]
head_ref = options["head-ref"]
head_rev = options["head-rev"]
ssh_key_file = None
ssh_known_hosts_file = None
ssh_dir = None
Expand All @@ -979,40 +1011,36 @@ def vcs_checkout_from_args(options):
ssh_known_hosts_file = ssh_dir.joinpath("known_hosts")
ssh_known_hosts_file.write_bytes(GITHUB_SSH_FINGERPRINT)

if options["repo-type"] == "git":
if not revision and not ref:
raise RuntimeError(
"Git requires that either a ref, a revision, or both are provided"
)
if not head_rev and not head_ref:
raise RuntimeError(
f"{options['repo-type'].capitalize()} requires that either a "
"ref, a revision, or both are provided"
)

if not ref:
if options["repo-type"] == "git":
if not head_ref:
print("Providing a ref will improve the performance of this checkout")

revision = git_checkout(
options["checkout"],
options["head-repo"],
options["base-repo"],
options["base-ref"],
options["base-rev"],
ref,
revision,
head_ref,
head_rev,
ssh_key_file,
ssh_known_hosts_file,
shallow=options.get("shallow-clone", False),
)
elif options["repo-type"] == "hg":
if not revision and not ref:
raise RuntimeError(
"Hg requires that at least one of a ref or revision is provided"
)

revision = hg_checkout(
options["checkout"],
options["head-repo"],
options["base-repo"],
options["store-path"],
options["sparse-profile"],
ref,
revision,
head_ref,
head_rev,
)
else:
raise RuntimeError('Type of VCS must be either "git" or "hg"')
Expand Down
22 changes: 22 additions & 0 deletions src/taskgraph/util/vcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ def run(self, *args: str, **kwargs) -> str:
def tool(self) -> str:
"""Version control system being used, either 'hg' or 'git'."""

@property
@abstractmethod
def is_shallow(self) -> str:
"""Whether this repo is a shallow clone."""

@property
@abstractmethod
def head_rev(self) -> str:
Expand Down Expand Up @@ -224,6 +229,10 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._env["HGPLAIN"] = "1"

@property
def is_shallow(self):
return False

@property
def head_rev(self):
return self.run("log", "-r", ".", "-T", "{node}").strip()
Expand Down Expand Up @@ -371,6 +380,10 @@ def default_remote_name(self) -> str:

_LS_REMOTE_PATTERN = re.compile(r"ref:\s+refs/heads/(?P<branch_name>\S+)\s+HEAD")

@property
def is_shallow(self):
return self.run("rev-parse", "--is-shallow-repository").strip() == "true"

@property
def head_rev(self):
return self.run("rev-parse", "--verify", "HEAD").strip()
Expand Down Expand Up @@ -492,6 +505,15 @@ def get_changed_files(self, diff_filter=None, mode=None, rev=None, base=None):
cmd.append("--cached")
elif mode == "all":
cmd.append("HEAD")
elif self.is_shallow:
# In shallow clones, `git log` won't have the history necessary to
# determine the files changed. Using `git diff` finds the
# differences between the two trees which is slightly more
# accurate. However, Github events often don't provide the true
# base revision so shallow Github clones will still return
# incorrect files changed in many cases, most notably pull
# requests that need rebasing.
cmd = ["diff", base, rev]
else:
revision_argument = f"{rev}~1..{rev}" if base is None else f"{base}..{rev}"
cmd = ["log", "--format=format:", revision_argument]
Expand Down
Loading
Loading