Skip to content

Commit 177d4e4

Browse files
committed
fix: git URLs working with bootstrap-parallel
The bootstrap-parallel command failed when top-level requirements were specified as git URLs. During the bootstrap phase, git URLs were correctly stored in the dependency graph, but the build-parallel phase reconstructed requirements as name==version, losing the URL information. This caused the system to treat local directory paths as HTTP URLs, resulting in invalid URL errors. Changes: - Add get_top_level_requirement() method to DependencyGraph that efficiently retrieves original requirement specifications by iterating through node parents instead of ROOT children - Modify build_parallel() to use original top-level requirements only when they contain URLs, ensuring non-URL requirements always use the already-resolved version from the graph - Move urlparse import to top of sources.py following PEP 8 guidelines - Add URL parsing logic in download_source() to extract @ref from git URLs (e.g., repo.git@5.2.0) before calling download_git_source() This fix enables nightly build pipelines that rely on git URLs with the bootstrap-parallel command for faster builds. Now #762 can be revisited. Fixes: #761 Co-authored-by: Claude <claude@anthropic.com> Signed-off-by: Ioannis Angelakopoulos <iangelak@redhat.com>
1 parent 90ae492 commit 177d4e4

3 files changed

Lines changed: 49 additions & 2 deletions

File tree

src/fromager/commands/build.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,19 @@ def update_progressbar_cb(future: concurrent.futures.Future) -> None:
696696
"starting to build: %s", sorted(n.key for n in buildable_nodes)
697697
)
698698
for node in buildable_nodes:
699-
req = Requirement(f"{node.canonicalized_name}=={node.version}")
699+
# Try to get the original top-level requirement if it includes a URL.
700+
# We only use the top-level requirement if:
701+
# 1. The node is a direct child of ROOT (it's actually top-level)
702+
# 2. The requirement includes a URL (git, etc.)
703+
# Otherwise, we use the already-resolved version from the node to avoid:
704+
# - Building wrong versions when same package appears multiple times
705+
# - Re-resolving unpinned version specifiers
706+
top_level_req = graph.get_top_level_requirement(node)
707+
if top_level_req and top_level_req.url:
708+
req = top_level_req
709+
logger.debug("using top-level requirement with URL: %s", req)
710+
else:
711+
req = Requirement(f"{node.canonicalized_name}=={node.version}")
700712
reqs.append(req)
701713
future = executor.submit(
702714
_build_parallel,

src/fromager/dependency_graph.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,19 @@ def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
352352
def get_root_node(self) -> DependencyNode:
353353
return self.nodes[ROOT]
354354

355+
def get_top_level_requirement(self, node: DependencyNode) -> Requirement | None:
356+
"""Get the top-level requirement specification for a node.
357+
358+
For packages that were specified as top-level requirements (e.g., with git URLs),
359+
this returns the original requirement specification. Returns None if the node
360+
is not a direct child of ROOT.
361+
"""
362+
root = self.get_root_node()
363+
for edge in node.parents:
364+
if edge.parent_node is root:
365+
return edge.req
366+
return None
367+
355368
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
356369
return self.nodes.values()
357370

src/fromager/sources.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import tarfile
99
import typing
1010
import zipfile
11+
from urllib.parse import urlparse
1112

1213
import resolvelib
1314
from packaging.requirements import Requirement
@@ -77,11 +78,32 @@ def download_source(
7778
elif req.url:
7879
download_path = ctx.work_dir / f"{req.name}-{version}" / f"{req.name}-{version}"
7980
download_path.mkdir(parents=True, exist_ok=True)
81+
82+
# Parse ref from URL if it's in the form repo@ref
83+
url_to_clone = req.url
84+
git_ref: str | None = None
85+
86+
# Remove git+ prefix for parsing
87+
parsing_url = url_to_clone
88+
if parsing_url.startswith("git+"):
89+
parsing_url = parsing_url[len("git+") :]
90+
91+
parsed_url = urlparse(parsing_url)
92+
if "@" in parsed_url.path:
93+
# Extract the ref from the path (e.g., /path/to/repo.git@5.2.0 -> @5.2.0)
94+
new_path, _, git_ref = parsed_url.path.rpartition("@")
95+
# Reconstruct URL without the @ref part
96+
url_to_clone = parsed_url._replace(path=new_path).geturl()
97+
# Add back git+ prefix if it was present
98+
if req.url.startswith("git+"):
99+
url_to_clone = f"git+{url_to_clone}"
100+
80101
download_git_source(
81102
ctx=ctx,
82103
req=req,
83-
url_to_clone=req.url,
104+
url_to_clone=url_to_clone,
84105
destination_dir=download_path,
106+
ref=git_ref,
85107
)
86108
return download_path
87109

0 commit comments

Comments
 (0)