-
Notifications
You must be signed in to change notification settings - Fork 48
Support shallow clones with Git #772
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ae328ef
756b503
09754d9
fb2a0a0
7c6c19d
dbda62d
ea0b89b
7bef1f9
33eb91e
c8788ca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -566,6 +566,26 @@ def configure_volume_posix(volume, user, group, running_as_root): | |
| set_dir_permissions(volume, user.pw_uid, group.gr_gid) | ||
|
|
||
|
|
||
| def git_fetch( | ||
| destination_path: str, | ||
| *targets: str, | ||
| remote: str = "origin", | ||
| tags: bool = False, | ||
| shallow: bool = False, | ||
| env: Optional[dict[str, str]] = None, | ||
| ): | ||
| args = ["git", "fetch"] | ||
| if tags: | ||
| # `--force` is needed to be able to update an existing outdated tag. | ||
| args.extend(["--tags", "--force"]) | ||
|
|
||
| if shallow: | ||
| args.append("--depth=1") | ||
|
|
||
| args.extend([remote, *set(targets)]) | ||
| retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env) | ||
|
|
||
|
|
||
| def _clean_git_checkout(destination_path): | ||
| # Delete untracked files (i.e. build products) | ||
| print_line(b"vcs", b"cleaning git checkout...\n") | ||
|
|
@@ -605,17 +625,32 @@ def _clean_git_checkout(destination_path): | |
| print_line(b"vcs", b"successfully cleaned git checkout!\n") | ||
|
|
||
|
|
||
| def shortref(ref: str) -> str: | ||
| """Normalize a git ref to its short form. | ||
|
|
||
| Returns the ref unchanged if it's already in short form. | ||
| """ | ||
| # Strip common ref prefixes | ||
| for prefix in ("refs/heads/", "refs/tags/"): | ||
| if ref.startswith(prefix): | ||
| return ref[len(prefix) :] | ||
|
|
||
| return ref | ||
|
|
||
|
|
||
| def git_checkout( | ||
| destination_path: str, | ||
| head_repo: str, | ||
| base_repo: Optional[str], | ||
| base_ref: Optional[str], | ||
| base_rev: Optional[str], | ||
| ref: Optional[str], | ||
| commit: Optional[str], | ||
| head_ref: Optional[str], | ||
| head_rev: Optional[str], | ||
| ssh_key_file: Optional[Path], | ||
| ssh_known_hosts_file: Optional[Path], | ||
| shallow: bool = False, | ||
| ): | ||
| assert head_ref or head_rev | ||
|
|
||
| env = { | ||
| # abort if transfer speed is lower than 1kB/s for 1 minute | ||
| "GIT_HTTP_LOW_SPEED_LIMIT": "1024", | ||
|
|
@@ -658,74 +693,66 @@ def git_checkout( | |
| args = [ | ||
| "git", | ||
| "clone", | ||
| base_repo if base_repo else head_repo, | ||
| destination_path, | ||
| ] | ||
|
|
||
| retry_required_command(b"vcs", args, extra_env=env) | ||
|
|
||
| if base_ref: | ||
| args = ["git", "fetch", "origin", base_ref] | ||
|
|
||
| retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env) | ||
| if shallow: | ||
| args.extend(["--depth=1", "--no-checkout"]) | ||
|
|
||
| # Create local branch so that taskgraph is able to compute differences | ||
| # between the head branch and the base one, if needed | ||
| args = ["git", "checkout", base_ref] | ||
| args.extend( | ||
| [ | ||
| base_repo if base_repo else head_repo, | ||
| destination_path, | ||
| ] | ||
| ) | ||
|
|
||
| retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env) | ||
| retry_required_command(b"vcs", args, extra_env=env) | ||
|
|
||
| # When commits are force-pushed (like on a testing branch), base_rev doesn't | ||
| # exist on base_ref. Fetching it allows taskgraph to compute differences | ||
| # between the previous state before the force-push and the current state. | ||
| # | ||
| # Unlike base_ref just above, there is no need to checkout the revision: | ||
| # it's immediately available after the fetch. | ||
| # For Github based repos, base_rev often doesn't refer to an ancestor of | ||
| # head_rev simply due to Github not providing that information in their | ||
| # webhook events. Therefore we fetch it independently from `head_rev` so | ||
| # that consumers can compute the merge-base or files modified between the | ||
| # two as needed. | ||
| if base_rev and base_rev != NULL_REVISION: | ||
| args = ["git", "fetch", "origin", base_rev] | ||
| git_fetch(destination_path, base_rev, shallow=shallow, env=env) | ||
|
|
||
| retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env) | ||
|
|
||
| # If a ref was provided, it might be tag, so we need to make sure we fetch | ||
| # If a head_ref was provided, it might be tag, so we need to make sure we fetch | ||
| # those. This is explicitly only done when base and head repo match, | ||
| # because it is the only scenario where tags could be present. (PRs, for | ||
| # example, always include an explicit rev.) Failure to do this could result | ||
| # in not having a tag, or worse: having an outdated version of one. | ||
| # `--force` is needed to be able to update an existing tag. | ||
| if ref and base_repo == head_repo: | ||
| args = [ | ||
| "git", | ||
| "fetch", | ||
| "--tags", | ||
| "--force", | ||
| base_repo, | ||
| ref, | ||
| ] | ||
|
|
||
| retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env) | ||
|
|
||
| # If a ref isn't provided, we fetch all refs from head_repo, which may be slow | ||
| args = [ | ||
| "git", | ||
| "fetch", | ||
| "--no-tags", | ||
| head_repo, | ||
| ref if ref else "+refs/heads/*:refs/remotes/work/*", | ||
| ] | ||
|
|
||
| retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env) | ||
| tags = False | ||
| if head_ref and not head_ref.startswith("refs/heads/") and base_repo == head_repo: | ||
| tags = True | ||
|
|
||
| # Fetch head_ref and/or head_rev | ||
| targets = [] | ||
| if head_ref: | ||
| targets.append(head_ref) | ||
| if not head_ref or (shallow and head_rev): | ||
| # If head_ref wasn't provided, we fallback to head_rev. If we have a | ||
| # shallow clone, head_rev needs to be fetched independently regardless. | ||
| targets.append(head_rev) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we assert somewhere that if shallow is True then we have a head_rev?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Huh, good point. I guess a head_rev isn't necessary for shallow clones either though.. I'll fix this up. |
||
|
|
||
| git_fetch( | ||
| destination_path, | ||
| *targets, | ||
| remote=head_repo, | ||
| tags=tags, | ||
| shallow=shallow, | ||
| env=env, | ||
| ) | ||
|
|
||
| args = [ | ||
| "git", | ||
| "checkout", | ||
| "-f", | ||
| ] | ||
|
|
||
| if ref: | ||
| args.extend(["-B", ref]) | ||
| if head_ref: | ||
| args.extend(["-B", shortref(head_ref)]) | ||
|
|
||
| # `git fetch` set `FETCH_HEAD` reference to the last commit of the desired branch | ||
| args.append(commit if commit else "FETCH_HEAD") | ||
| args.append(head_rev if head_rev else "FETCH_HEAD") | ||
|
|
||
| run_required_command(b"vcs", args, cwd=destination_path) | ||
|
|
||
|
|
@@ -899,21 +926,26 @@ def add_vcs_arguments(parser, project, name): | |
| f"--{project}-sparse-profile", | ||
| help=f"Path to sparse profile for {name} checkout", | ||
| ) | ||
| parser.add_argument( | ||
| f"--{project}-shallow-clone", | ||
| action="store_true", | ||
| help=f"Use shallow clone for {name}", | ||
| ) | ||
|
|
||
|
|
||
| def collect_vcs_options(args, project, name): | ||
| checkout = getattr(args, f"{project}_checkout") | ||
| sparse_profile = getattr(args, f"{project}_sparse_profile") | ||
| shallow_clone = getattr(args, f"{project}_shallow_clone") | ||
|
|
||
| env_prefix = project.upper() | ||
|
|
||
| repo_type = os.environ.get(f"{env_prefix}_REPOSITORY_TYPE") | ||
| base_repo = os.environ.get(f"{env_prefix}_BASE_REPOSITORY") | ||
| base_ref = os.environ.get(f"{env_prefix}_BASE_REF") | ||
| base_rev = os.environ.get(f"{env_prefix}_BASE_REV") | ||
| head_repo = os.environ.get(f"{env_prefix}_HEAD_REPOSITORY") | ||
| revision = os.environ.get(f"{env_prefix}_HEAD_REV") | ||
| ref = os.environ.get(f"{env_prefix}_HEAD_REF") | ||
| head_ref = os.environ.get(f"{env_prefix}_HEAD_REF") | ||
| head_rev = os.environ.get(f"{env_prefix}_HEAD_REV") | ||
| pip_requirements = os.environ.get(f"{env_prefix}_PIP_REQUIREMENTS") | ||
| private_key_secret = os.environ.get(f"{env_prefix}_SSH_SECRET_NAME") | ||
|
|
||
|
|
@@ -942,26 +974,26 @@ def collect_vcs_options(args, project, name): | |
| "checkout": checkout, | ||
| "sparse-profile": sparse_profile, | ||
| "base-repo": base_repo, | ||
| "base-ref": base_ref, | ||
| "base-rev": base_rev, | ||
| "head-repo": head_repo, | ||
| "revision": revision, | ||
| "ref": ref, | ||
| "head-ref": head_ref, | ||
| "head-rev": head_rev, | ||
| "repo-type": repo_type, | ||
| "ssh-secret-name": private_key_secret, | ||
| "pip-requirements": pip_requirements, | ||
| "shallow-clone": shallow_clone, | ||
| } | ||
|
|
||
|
|
||
| def vcs_checkout_from_args(options): | ||
| if not options["checkout"]: | ||
| if options["ref"] and not options["revision"]: | ||
| if options["head-ref"] and not options["head-rev"]: | ||
| print("task should be defined in terms of non-symbolic revision") | ||
| sys.exit(1) | ||
| return | ||
|
|
||
| revision = options["revision"] | ||
| ref = options["ref"] | ||
| head_ref = options["head-ref"] | ||
| head_rev = options["head-rev"] | ||
| ssh_key_file = None | ||
| ssh_known_hosts_file = None | ||
| ssh_dir = None | ||
|
|
@@ -979,40 +1011,36 @@ def vcs_checkout_from_args(options): | |
| ssh_known_hosts_file = ssh_dir.joinpath("known_hosts") | ||
| ssh_known_hosts_file.write_bytes(GITHUB_SSH_FINGERPRINT) | ||
|
|
||
| if options["repo-type"] == "git": | ||
| if not revision and not ref: | ||
| raise RuntimeError( | ||
| "Git requires that either a ref, a revision, or both are provided" | ||
| ) | ||
| if not head_rev and not head_ref: | ||
| raise RuntimeError( | ||
| f"{options['repo-type'].capitalize()} requires that either a " | ||
| "ref, a revision, or both are provided" | ||
| ) | ||
|
|
||
| if not ref: | ||
| if options["repo-type"] == "git": | ||
| if not head_ref: | ||
| print("Providing a ref will improve the performance of this checkout") | ||
|
|
||
| revision = git_checkout( | ||
| options["checkout"], | ||
| options["head-repo"], | ||
| options["base-repo"], | ||
| options["base-ref"], | ||
| options["base-rev"], | ||
| ref, | ||
| revision, | ||
| head_ref, | ||
| head_rev, | ||
| ssh_key_file, | ||
| ssh_known_hosts_file, | ||
| shallow=options.get("shallow-clone", False), | ||
| ) | ||
| elif options["repo-type"] == "hg": | ||
| if not revision and not ref: | ||
| raise RuntimeError( | ||
| "Hg requires that at least one of a ref or revision is provided" | ||
| ) | ||
|
|
||
| revision = hg_checkout( | ||
| options["checkout"], | ||
| options["head-repo"], | ||
| options["base-repo"], | ||
| options["store-path"], | ||
| options["sparse-profile"], | ||
| ref, | ||
| revision, | ||
| head_ref, | ||
| head_rev, | ||
| ) | ||
| else: | ||
| raise RuntimeError('Type of VCS must be either "git" or "hg"') | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.