Skip to content

Commit c8ee75b

Browse files
Replacing the Python email library parser with packaging.metadata
Replaces the Python email library parser with packaging.metadata.Metadata for parsing wheel/package metadata. Fixes: #561 Signed-off-by: Lalatendu Mohanty <lmohanty@redhat.com>
1 parent 6f9a334 commit c8ee75b

File tree

6 files changed

+153
-40
lines changed

6 files changed

+153
-40
lines changed

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ dependencies = [
3636
"elfdeps>=0.2.0",
3737
"license-expression",
3838
"packaging",
39-
"pkginfo",
4039
"psutil",
4140
"pydantic",
4241
"pypi_simple",

src/fromager/bootstrapper.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import tempfile
1313
import typing
1414
import zipfile
15-
from email.parser import BytesParser
1615
from urllib.parse import urlparse
1716

1817
from packaging.requirements import Requirement
@@ -1242,10 +1241,8 @@ def _get_version_from_package_metadata(
12421241
config_settings=pbi.config_settings,
12431242
)
12441243
metadata_filename = source_dir.parent / metadata_dir_base / "METADATA"
1245-
with open(metadata_filename, "rb") as f:
1246-
p = BytesParser()
1247-
metadata = p.parse(f, headersonly=True)
1248-
return Version(metadata["Version"])
1244+
metadata = dependencies.parse_metadata(metadata_filename)
1245+
return metadata.version
12491246

12501247
def _resolve_prebuilt_with_history(
12511248
self,

src/fromager/candidate.py

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,19 @@
22
import datetime
33
import logging
44
import typing
5-
from email.message import EmailMessage, Message
6-
from email.parser import BytesParser
75
from io import BytesIO
8-
from typing import TYPE_CHECKING
96
from zipfile import ZipFile
107

8+
from packaging.metadata import Metadata
119
from packaging.requirements import Requirement
1210
from packaging.utils import BuildTag, canonicalize_name
1311
from packaging.version import Version
1412

13+
from . import dependencies
1514
from .request_session import session
1615

1716
logger = logging.getLogger(__name__)
1817

19-
# fix for runtime errors caused by inheriting classes that are generic in stubs but not runtime
20-
# https://mypy.readthedocs.io/en/latest/runtime_troubles.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime
21-
if TYPE_CHECKING:
22-
Metadata = Message[str, str]
23-
else:
24-
Metadata = Message
25-
2618

2719
@dataclasses.dataclass(frozen=True, order=True, slots=True, repr=False, kw_only=True)
2820
class Candidate:
@@ -73,11 +65,10 @@ def metadata(self) -> Metadata:
7365
return self._metadata
7466

7567
def _get_dependencies(self) -> typing.Iterable[Requirement]:
76-
deps = self.metadata.get_all("Requires-Dist", [])
68+
deps = self.metadata.requires_dist or []
7769
extras = self.extras if self.extras else [""]
7870

79-
for d in deps:
80-
r = Requirement(d)
71+
for r in deps:
8172
if r.marker is None:
8273
yield r
8374
else:
@@ -95,7 +86,8 @@ def dependencies(self) -> list[Requirement]:
9586

9687
@property
9788
def requires_python(self) -> str | None:
98-
return self.metadata.get("Requires-Python")
89+
spec = self.metadata.requires_python
90+
return str(spec) if spec is not None else None
9991

10092

10193
def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadata:
@@ -107,7 +99,7 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat
10799
metadata_url: Optional URL of the metadata file (PEP 658)
108100
109101
Returns:
110-
Parsed metadata as a Message object
102+
Parsed metadata as a Metadata object
111103
"""
112104
# Try PEP 658 metadata endpoint first if available
113105
if metadata_url:
@@ -119,8 +111,8 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat
119111
response.raise_for_status()
120112

121113
# Parse metadata directly from the response content
122-
p = BytesParser()
123-
metadata = p.parse(BytesIO(response.content), headersonly=True)
114+
# validate=False for backwards compatibility with previous BytesParser
115+
metadata = dependencies.parse_metadata(response.content, validate=False)
124116
logger.debug(f"Successfully retrieved metadata via PEP 658 for {url}")
125117
return metadata
126118

@@ -136,8 +128,9 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat
136128
with ZipFile(BytesIO(data)) as z:
137129
for n in z.namelist():
138130
if n.endswith(".dist-info/METADATA"):
139-
p = BytesParser()
140-
return p.parse(z.open(n), headersonly=True)
131+
metadata_content = z.read(n)
132+
# validate=False for backwards compatibility with previous BytesParser
133+
return dependencies.parse_metadata(metadata_content, validate=False)
141134

142-
# If we didn't find the metadata, return an empty dict
143-
return EmailMessage()
135+
# If we didn't find the metadata, raise an error
136+
raise ValueError(f"Could not find METADATA file in wheel: {url}")

src/fromager/dependencies.py

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
import pathlib
77
import tempfile
88
import typing
9+
import zipfile
910

10-
import pkginfo
1111
import pyproject_hooks
1212
import tomlkit
1313
from packaging.metadata import Metadata
@@ -344,14 +344,23 @@ def default_get_install_dependencies_of_sdist(
344344
return set(metadata.requires_dist)
345345

346346

347-
def parse_metadata(metadata_file: pathlib.Path, *, validate: bool = True) -> Metadata:
348-
"""Parse a dist-info/METADATA file
347+
def parse_metadata(
348+
metadata_source: pathlib.Path | bytes, *, validate: bool = True
349+
) -> Metadata:
350+
"""Parse metadata from a file path or bytes.
351+
352+
Args:
353+
metadata_source: Path to METADATA file or bytes containing metadata
354+
validate: Whether to validate metadata (default: True)
349355
350-
The default parse mode is 'strict'. It even fails for a mismatch of field
351-
and core metadata version, e.g. a package with metadata 2.2 and
352-
license-expression field (added in 2.4).
356+
Returns:
357+
Parsed Metadata object
353358
"""
354-
return Metadata.from_email(metadata_file.read_bytes(), validate=validate)
359+
if isinstance(metadata_source, pathlib.Path):
360+
metadata_bytes = metadata_source.read_bytes()
361+
else:
362+
metadata_bytes = metadata_source
363+
return Metadata.from_email(metadata_bytes, validate=validate)
355364

356365

357366
def pep517_metadata_of_sdist(
@@ -418,16 +427,54 @@ def validate_dist_name_version(
418427
def get_install_dependencies_of_wheel(
419428
req: Requirement, wheel_filename: pathlib.Path, requirements_file_dir: pathlib.Path
420429
) -> set[Requirement]:
430+
"""Get install dependencies from a wheel file.
431+
432+
Extracts and parses the METADATA file from the wheel to get the
433+
Requires-Dist entries.
434+
435+
Args:
436+
req: The requirement being processed
437+
wheel_filename: Path to the wheel file
438+
requirements_file_dir: Directory to write the requirements file
439+
440+
Returns:
441+
Set of requirements from the wheel's metadata
442+
"""
421443
logger.info(f"getting installation dependencies from {wheel_filename}")
422-
wheel = pkginfo.Wheel(str(wheel_filename))
423-
deps = _filter_requirements(req, wheel.requires_dist)
444+
metadata = _get_metadata_from_wheel(wheel_filename)
445+
requires_dist = metadata.requires_dist or []
446+
deps = _filter_requirements(req, requires_dist)
424447
_write_requirements_file(
425448
deps,
426449
requirements_file_dir / INSTALL_REQ_FILE_NAME,
427450
)
428451
return deps
429452

430453

454+
def _get_metadata_from_wheel(
455+
wheel_filename: pathlib.Path, *, validate: bool = False
456+
) -> Metadata:
457+
"""Extract and parse METADATA from a wheel file.
458+
459+
Args:
460+
wheel_filename: Path to the wheel file
461+
validate: Whether to validate metadata (default: False for backwards
462+
compatibility with the previous pkginfo-based implementation)
463+
464+
Returns:
465+
Parsed Metadata object
466+
467+
Raises:
468+
ValueError: If no METADATA file is found in the wheel
469+
"""
470+
with zipfile.ZipFile(wheel_filename) as whl:
471+
for name in whl.namelist():
472+
if name.endswith(".dist-info/METADATA"):
473+
metadata_content = whl.read(name)
474+
return parse_metadata(metadata_content, validate=validate)
475+
raise ValueError(f"Could not find METADATA file in wheel: {wheel_filename}")
476+
477+
431478
def get_pyproject_contents(sdist_root_dir: pathlib.Path) -> dict[str, typing.Any]:
432479
pyproject_toml_filename = sdist_root_dir / "pyproject.toml"
433480
if not os.path.exists(pyproject_toml_filename):

tests/test_dependencies.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import shutil
55
import textwrap
66
import typing
7+
import zipfile
78
from unittest.mock import Mock, patch
89

910
import pytest
@@ -308,3 +309,78 @@ def test_validate_dist_name_version(
308309
else:
309310
with pytest.raises(exc):
310311
validate()
312+
313+
314+
def test_get_install_dependencies_of_wheel(tmp_path: pathlib.Path) -> None:
315+
"""Test extracting install dependencies from a wheel file."""
316+
# Arrange: Create a minimal wheel file with dependencies
317+
wheel_file = tmp_path / "test_pkg-1.0.0-py3-none-any.whl"
318+
metadata_content = textwrap.dedent(
319+
"""\
320+
Metadata-Version: 2.3
321+
Name: test_pkg
322+
Version: 1.0.0
323+
Author-email: Test Author <test@example.com>
324+
Requires-Dist: requests>=2.0
325+
Requires-Dist: urllib3
326+
Requires-Dist: pytest; extra == "test"
327+
"""
328+
)
329+
with zipfile.ZipFile(wheel_file, "w") as zf:
330+
zf.writestr("test_pkg/__init__.py", "")
331+
zf.writestr("test_pkg-1.0.0.dist-info/METADATA", metadata_content)
332+
zf.writestr(
333+
"test_pkg-1.0.0.dist-info/WHEEL",
334+
"Wheel-Version: 1.0\nRoot-Is-Purelib: true\nTag: py3-none-any\n",
335+
)
336+
zf.writestr("test_pkg-1.0.0.dist-info/RECORD", "")
337+
338+
req = Requirement("test_pkg")
339+
340+
# Act
341+
result = dependencies.get_install_dependencies_of_wheel(req, wheel_file, tmp_path)
342+
343+
# Assert: Should get requests and urllib3, but not pytest (extra not requested)
344+
result_names = {r.name for r in result}
345+
assert result_names == {"requests", "urllib3"}
346+
347+
348+
def test_get_install_dependencies_of_wheel_no_deps(tmp_path: pathlib.Path) -> None:
349+
"""Test extracting dependencies from a wheel with no dependencies."""
350+
# Arrange: Create a wheel file without dependencies
351+
wheel_file = tmp_path / "simple_pkg-1.0.0-py3-none-any.whl"
352+
metadata_content = textwrap.dedent(
353+
"""\
354+
Metadata-Version: 2.3
355+
Name: simple_pkg
356+
Version: 1.0.0
357+
"""
358+
)
359+
with zipfile.ZipFile(wheel_file, "w") as zf:
360+
zf.writestr("simple_pkg/__init__.py", "")
361+
zf.writestr("simple_pkg-1.0.0.dist-info/METADATA", metadata_content)
362+
zf.writestr(
363+
"simple_pkg-1.0.0.dist-info/WHEEL",
364+
"Wheel-Version: 1.0\nRoot-Is-Purelib: true\nTag: py3-none-any\n",
365+
)
366+
zf.writestr("simple_pkg-1.0.0.dist-info/RECORD", "")
367+
368+
req = Requirement("simple_pkg")
369+
370+
# Act
371+
result = dependencies.get_install_dependencies_of_wheel(req, wheel_file, tmp_path)
372+
373+
# Assert
374+
assert result == set()
375+
376+
377+
def test_get_metadata_from_wheel_missing_metadata(tmp_path: pathlib.Path) -> None:
378+
"""Test that missing METADATA file raises ValueError."""
379+
# Arrange: Create a wheel file without METADATA
380+
wheel_file = tmp_path / "broken_pkg-1.0.0-py3-none-any.whl"
381+
with zipfile.ZipFile(wheel_file, "w") as zf:
382+
zf.writestr("broken_pkg/__init__.py", "")
383+
384+
# Act & Assert
385+
with pytest.raises(ValueError, match="Could not find METADATA file"):
386+
dependencies._get_metadata_from_wheel(wheel_file)

tests/test_pep658_support.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,11 @@ def test_get_metadata_with_pep658_success(self, mock_session: typing.Any) -> Non
5757
metadata = get_metadata_for_wheel(wheel_url, metadata_url)
5858

5959
# Verify the metadata was parsed correctly
60-
assert metadata["Name"] == "test-package"
61-
assert metadata["Version"] == "1.0.0"
62-
assert metadata["Summary"] == "A test package"
63-
assert "requests >= 2.0.0" in metadata.get_all("Requires-Dist", [])
60+
assert metadata.name == "test-package"
61+
assert str(metadata.version) == "1.0.0"
62+
assert metadata.summary == "A test package"
63+
assert metadata.requires_dist is not None
64+
assert any(str(req) == "requests>=2.0.0" for req in metadata.requires_dist)
6465

6566
# Verify only the metadata URL was called, not the wheel URL
6667
mock_session.get.assert_called_once_with(metadata_url)

0 commit comments

Comments
 (0)