Skip to content

Commit 5a08030

Browse files
committed
feat: fast, simple git clone
The new function clones a git repository with submodules with a blob filter. A blobless clone contains the full git history but no blobs. Blobs are downloaded on demand. Pip's VCS feature uses similar tricks to speed up builds from a VCS URL. The *ref* parameter can be any tree-ish reference like a commit, tag, or branch. To force a tag, use `refs/tags/v1.0` to fetch the `v1.0` tag. Like in pip, submodules are automatically cloned recursively. See: #868 Signed-off-by: Christian Heimes <cheimes@redhat.com>
1 parent 81d7024 commit 5a08030

File tree

2 files changed

+136
-0
lines changed

2 files changed

+136
-0
lines changed

src/fromager/gitutils.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,76 @@ def git_clone(
6161
)
6262

6363
return output_dir
64+
65+
66+
def git_clone_fast(
67+
*,
68+
output_dir: pathlib.Path,
69+
repo_url: str,
70+
ref: str = "HEAD",
71+
) -> None:
72+
"""Efficient, blobless git clone with all submodules
73+
74+
The function clones a git repository with submodules with a blob filter.
75+
A blobless clone contains the full git history but no blobs. Blobs are
76+
downloaded on demand. Pip's VCS feature uses similar tricks to speed
77+
up builds from a VCS URL.
78+
79+
The *ref* parameter can be any tree-ish reference like a commit, tag, or
80+
branch. To force a tag, use ``refs/tags/v1.0`` to fetch the ``v1.0`` tag.
81+
82+
Like in :command:`pip`, submodules are automatically cloned recursively.
83+
84+
.. note::
85+
86+
:command:`git` and ``libcurl`` do not support :envvar:`NETRC`. Use
87+
:file:`~/.netrc` or :file:`.gitconfig` for authentication.
88+
"""
89+
parsed_url = urlparse(repo_url)
90+
# Create a clean URL without any credentials for logging
91+
clean_url = parsed_url._replace(netloc=parsed_url.hostname or "").geturl()
92+
logger.info(
93+
"cloning %s, tree-ish %r, into %s",
94+
clean_url,
95+
ref,
96+
output_dir,
97+
)
98+
99+
# Clone repo without blobs, don't check out HEAD
100+
cmd: list[str]
101+
cmd = [
102+
"git",
103+
"clone",
104+
"--filter=blob:none",
105+
"--no-checkout",
106+
repo_url,
107+
str(output_dir),
108+
]
109+
external_commands.run(cmd, network_isolation=False)
110+
111+
# check out reference / tag
112+
logger.debug("check out ref")
113+
cmd = [
114+
"git",
115+
"checkout",
116+
ref,
117+
]
118+
external_commands.run(cmd, cwd=str(output_dir), network_isolation=False)
119+
120+
# clone submodules if ".gitmodules" exist.
121+
if output_dir.joinpath(".gitmodules").is_file():
122+
# recursive clone of all submodules, filter out unnecessary blobs,
123+
# 4 jobs in parallel
124+
logger.debug("update submodules")
125+
cmd = [
126+
"git",
127+
"submodule",
128+
"update",
129+
"--init",
130+
"--recursive",
131+
"--filter=blob:none",
132+
"--jobs=4",
133+
]
134+
external_commands.run(cmd, cwd=str(output_dir), network_isolation=False)
135+
else:
136+
logger.debug("no .gitmodules file")

tests/test_gitutils.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import pathlib
2+
from unittest.mock import Mock, patch
3+
4+
import pytest
5+
6+
from fromager.gitutils import git_clone_fast
7+
8+
9+
@patch("fromager.external_commands.run")
10+
def test_git_clone_fast(m_run: Mock, tmp_path: pathlib.Path) -> None:
11+
repo_url = "https://git.test/project.git"
12+
git_clone_fast(output_dir=tmp_path, repo_url=repo_url)
13+
14+
assert m_run.call_count == 2
15+
m_run.assert_any_call(
16+
[
17+
"git",
18+
"clone",
19+
"--filter=blob:none",
20+
"--no-checkout",
21+
repo_url,
22+
str(tmp_path),
23+
],
24+
network_isolation=False,
25+
)
26+
m_run.assert_any_call(
27+
[
28+
"git",
29+
"checkout",
30+
"HEAD",
31+
],
32+
network_isolation=False,
33+
cwd=str(tmp_path),
34+
)
35+
36+
37+
@patch("fromager.external_commands.run")
38+
def test_git_clone_fast_submodules(m_run: Mock, tmp_path: pathlib.Path) -> None:
39+
repo_url = "https://git.test/project.git"
40+
tmp_path.joinpath(".gitmodules").touch()
41+
git_clone_fast(output_dir=tmp_path, repo_url=repo_url)
42+
43+
assert m_run.call_count == 3
44+
m_run.assert_called_with(
45+
[
46+
"git",
47+
"submodule",
48+
"update",
49+
"--init",
50+
"--recursive",
51+
"--filter=blob:none",
52+
"--jobs=4",
53+
],
54+
cwd=str(tmp_path),
55+
network_isolation=False,
56+
)
57+
58+
59+
@pytest.mark.skip(reason="needs network access")
60+
def test_git_clone_real(tmp_path: pathlib.Path) -> None:
61+
repo_url = "https://github.com/python-wheel-build/fromager.git"
62+
git_clone_fast(output_dir=tmp_path, repo_url=repo_url, ref="refs/tags/0.73.0")
63+
assert tmp_path.joinpath("src", "fromager").is_dir()

0 commit comments

Comments
 (0)