Skip to content

Commit b5ab037

Browse files
Replacing the Python email library parser with packaging.metadata
Replaces the Python email library parser with packaging.metadata.Metadata for parsing wheel/package metadata. Fixes: #561 Co-Authored-By: Claude <claude@anthropic.com> Signed-off-by: Lalatendu Mohanty <lmohanty@redhat.com>
1 parent 6f9a334 commit b5ab037

6 files changed

Lines changed: 290 additions & 45 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ dependencies = [
3636
"elfdeps>=0.2.0",
3737
"license-expression",
3838
"packaging",
39-
"pkginfo",
4039
"psutil",
4140
"pydantic",
4241
"pypi_simple",
@@ -204,7 +203,7 @@ exclude = [
204203

205204
[[tool.mypy.overrides]]
206205
# packages without typing annotations and stubs
207-
module = ["license_expression", "pyproject_hooks", "requests_mock", "resolver", "stevedore"]
206+
module = ["hatchling", "hatchling.build", "license_expression", "pyproject_hooks", "requests_mock", "resolver", "stevedore"]
208207
ignore_missing_imports = true
209208

210209
[tool.basedpyright]

src/fromager/bootstrapper.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import tempfile
1313
import typing
1414
import zipfile
15-
from email.parser import BytesParser
1615
from urllib.parse import urlparse
1716

1817
from packaging.requirements import Requirement
@@ -1242,10 +1241,8 @@ def _get_version_from_package_metadata(
12421241
config_settings=pbi.config_settings,
12431242
)
12441243
metadata_filename = source_dir.parent / metadata_dir_base / "METADATA"
1245-
with open(metadata_filename, "rb") as f:
1246-
p = BytesParser()
1247-
metadata = p.parse(f, headersonly=True)
1248-
return Version(metadata["Version"])
1244+
metadata = dependencies.parse_metadata(metadata_filename)
1245+
return metadata.version
12491246

12501247
def _resolve_prebuilt_with_history(
12511248
self,

src/fromager/candidate.py

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,10 @@
22
import datetime
33
import logging
44
import typing
5-
from email.message import EmailMessage, Message
6-
from email.parser import BytesParser
75
from io import BytesIO
8-
from typing import TYPE_CHECKING
96
from zipfile import ZipFile
107

8+
from packaging.metadata import Metadata
119
from packaging.requirements import Requirement
1210
from packaging.utils import BuildTag, canonicalize_name
1311
from packaging.version import Version
@@ -16,13 +14,6 @@
1614

1715
logger = logging.getLogger(__name__)
1816

19-
# fix for runtime errors caused by inheriting classes that are generic in stubs but not runtime
20-
# https://mypy.readthedocs.io/en/latest/runtime_troubles.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime
21-
if TYPE_CHECKING:
22-
Metadata = Message[str, str]
23-
else:
24-
Metadata = Message
25-
2617

2718
@dataclasses.dataclass(frozen=True, order=True, slots=True, repr=False, kw_only=True)
2819
class Candidate:
@@ -73,11 +64,10 @@ def metadata(self) -> Metadata:
7364
return self._metadata
7465

7566
def _get_dependencies(self) -> typing.Iterable[Requirement]:
76-
deps = self.metadata.get_all("Requires-Dist", [])
67+
deps = self.metadata.requires_dist or []
7768
extras = self.extras if self.extras else [""]
7869

79-
for d in deps:
80-
r = Requirement(d)
70+
for r in deps:
8171
if r.marker is None:
8272
yield r
8373
else:
@@ -95,19 +85,22 @@ def dependencies(self) -> list[Requirement]:
9585

9686
@property
9787
def requires_python(self) -> str | None:
98-
return self.metadata.get("Requires-Python")
88+
spec = self.metadata.requires_python
89+
return str(spec) if spec is not None else None
9990

10091

101-
def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadata:
102-
"""
103-
Get metadata for a wheel, supporting PEP 658 metadata endpoints.
92+
def get_metadata_for_wheel(
93+
url: str, metadata_url: str | None = None, *, validate: bool = True
94+
) -> Metadata:
95+
"""Get metadata for a wheel, supporting PEP 658 metadata endpoints.
10496
10597
Args:
10698
url: URL of the wheel file
10799
metadata_url: Optional URL of the metadata file (PEP 658)
100+
validate: Whether to validate metadata (default: True)
108101
109102
Returns:
110-
Parsed metadata as a Message object
103+
Parsed metadata as a Metadata object
111104
"""
112105
# Try PEP 658 metadata endpoint first if available
113106
if metadata_url:
@@ -118,9 +111,9 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat
118111
response = session.get(metadata_url)
119112
response.raise_for_status()
120113

121-
# Parse metadata directly from the response content
122-
p = BytesParser()
123-
metadata = p.parse(BytesIO(response.content), headersonly=True)
114+
# Parse metadata directly using packaging.metadata.Metadata
115+
# (avoiding circular import with dependencies module)
116+
metadata = Metadata.from_email(response.content, validate=validate)
124117
logger.debug(f"Successfully retrieved metadata via PEP 658 for {url}")
125118
return metadata
126119

@@ -136,8 +129,10 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat
136129
with ZipFile(BytesIO(data)) as z:
137130
for n in z.namelist():
138131
if n.endswith(".dist-info/METADATA"):
139-
p = BytesParser()
140-
return p.parse(z.open(n), headersonly=True)
132+
metadata_content = z.read(n)
133+
# Parse metadata directly using packaging.metadata.Metadata
134+
# (avoiding circular import with dependencies module)
135+
return Metadata.from_email(metadata_content, validate=validate)
141136

142-
# If we didn't find the metadata, return an empty dict
143-
return EmailMessage()
137+
# If we didn't find the metadata, raise an error
138+
raise ValueError(f"Could not find METADATA file in wheel: {url}")

src/fromager/dependencies.py

Lines changed: 72 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
import pathlib
77
import tempfile
88
import typing
9+
import zipfile
910

10-
import pkginfo
1111
import pyproject_hooks
1212
import tomlkit
1313
from packaging.metadata import Metadata
@@ -344,14 +344,23 @@ def default_get_install_dependencies_of_sdist(
344344
return set(metadata.requires_dist)
345345

346346

347-
def parse_metadata(metadata_file: pathlib.Path, *, validate: bool = True) -> Metadata:
348-
"""Parse a dist-info/METADATA file
347+
def parse_metadata(
348+
metadata_source: pathlib.Path | bytes, *, validate: bool = True
349+
) -> Metadata:
350+
"""Parse metadata from a file path or bytes.
351+
352+
Args:
353+
metadata_source: Path to METADATA file or bytes containing metadata
354+
validate: Whether to validate metadata (default: True)
349355
350-
The default parse mode is 'strict'. It even fails for a mismatch of field
351-
and core metadata version, e.g. a package with metadata 2.2 and
352-
license-expression field (added in 2.4).
356+
Returns:
357+
Parsed Metadata object
353358
"""
354-
return Metadata.from_email(metadata_file.read_bytes(), validate=validate)
359+
if isinstance(metadata_source, pathlib.Path):
360+
metadata_bytes = metadata_source.read_bytes()
361+
else:
362+
metadata_bytes = metadata_source
363+
return Metadata.from_email(metadata_bytes, validate=validate)
355364

356365

357366
def pep517_metadata_of_sdist(
@@ -418,16 +427,70 @@ def validate_dist_name_version(
418427
def get_install_dependencies_of_wheel(
419428
req: Requirement, wheel_filename: pathlib.Path, requirements_file_dir: pathlib.Path
420429
) -> set[Requirement]:
430+
"""Get install dependencies from a wheel file.
431+
432+
Extracts and parses the METADATA file from the wheel to get the
433+
Requires-Dist entries.
434+
435+
Args:
436+
req: The requirement being processed
437+
wheel_filename: Path to the wheel file
438+
requirements_file_dir: Directory to write the requirements file
439+
440+
Returns:
441+
Set of requirements from the wheel's metadata
442+
"""
421443
logger.info(f"getting installation dependencies from {wheel_filename}")
422-
wheel = pkginfo.Wheel(str(wheel_filename))
423-
deps = _filter_requirements(req, wheel.requires_dist)
444+
metadata = _get_metadata_from_wheel(wheel_filename)
445+
requires_dist = metadata.requires_dist or []
446+
deps = _filter_requirements(req, requires_dist)
424447
_write_requirements_file(
425448
deps,
426449
requirements_file_dir / INSTALL_REQ_FILE_NAME,
427450
)
428451
return deps
429452

430453

454+
def _get_metadata_from_wheel(
455+
wheel_filename: pathlib.Path, *, validate: bool = True
456+
) -> Metadata:
457+
"""Extract and parse METADATA from a wheel file.
458+
459+
Args:
460+
wheel_filename: Path to the wheel file
461+
validate: Whether to validate metadata (default: True)
462+
463+
Returns:
464+
Parsed Metadata object
465+
466+
Raises:
467+
ValueError: If no METADATA file is found in the wheel
468+
"""
469+
# Predict the dist-info directory name from the wheel filename
470+
# Wheel format: {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl
471+
# Dist-info format: {distribution}-{version}.dist-info
472+
# Note: We extract from the filename directly rather than using parse_wheel_filename
473+
# because the dist-info directory uses the original (non-normalized) name
474+
wheel_name_parts = wheel_filename.stem.split("-")
475+
dist_name = wheel_name_parts[0]
476+
dist_version = wheel_name_parts[1]
477+
predicted_dist_info = f"{dist_name}-{dist_version}.dist-info/METADATA"
478+
479+
with zipfile.ZipFile(wheel_filename) as whl:
480+
# Try predicted path first for efficiency
481+
if predicted_dist_info in whl.namelist():
482+
metadata_content = whl.read(predicted_dist_info)
483+
return parse_metadata(metadata_content, validate=validate)
484+
485+
# Fallback to iterating if prediction fails (e.g., non-standard naming)
486+
for entry in whl.namelist():
487+
if entry.endswith(".dist-info/METADATA"):
488+
metadata_content = whl.read(entry)
489+
return parse_metadata(metadata_content, validate=validate)
490+
491+
raise ValueError(f"Could not find METADATA file in wheel: {wheel_filename}")
492+
493+
431494
def get_pyproject_contents(sdist_root_dir: pathlib.Path) -> dict[str, typing.Any]:
432495
pyproject_toml_filename = sdist_root_dir / "pyproject.toml"
433496
if not os.path.exists(pyproject_toml_filename):

0 commit comments

Comments
 (0)