From 90afccf67ebd6dba25c2096ddc68c3a4386f6897 Mon Sep 17 00:00:00 2001 From: Lalatendu Mohanty Date: Fri, 4 Jul 2025 23:36:34 +0530 Subject: [PATCH] Replaceing the Python email library parser from packaging.metadata Replaces the Python email library parser with packaging.metadata.Metadata for parsing wheel/package metadata. Fixes: #561 Signed-off-by: Lalatendu Mohanty --- src/fromager/bootstrapper.py | 7 ++----- src/fromager/candidate.py | 33 ++++++++++++--------------------- src/fromager/dependencies.py | 21 +++++++++++++++------ tests/test_pep658_support.py | 9 +++++---- 4 files changed, 34 insertions(+), 36 deletions(-) diff --git a/src/fromager/bootstrapper.py b/src/fromager/bootstrapper.py index 9447026c..8ee38508 100644 --- a/src/fromager/bootstrapper.py +++ b/src/fromager/bootstrapper.py @@ -10,7 +10,6 @@ import tempfile import typing import zipfile -from email.parser import BytesParser from urllib.parse import urlparse from packaging.requirements import Requirement @@ -907,10 +906,8 @@ def _get_version_from_package_metadata( config_settings=pbi.config_settings, ) metadata_filename = source_dir.parent / metadata_dir_base / "METADATA" - with open(metadata_filename, "rb") as f: - p = BytesParser() - metadata = p.parse(f, headersonly=True) - return Version(metadata["Version"]) + metadata = dependencies.parse_metadata(metadata_filename) + return metadata.version def _resolve_prebuilt_with_history( self, diff --git a/src/fromager/candidate.py b/src/fromager/candidate.py index 157f5716..4b6d8fe2 100644 --- a/src/fromager/candidate.py +++ b/src/fromager/candidate.py @@ -2,27 +2,19 @@ import datetime import logging import typing -from email.message import EmailMessage, Message -from email.parser import BytesParser from io import BytesIO -from typing import TYPE_CHECKING from zipfile import ZipFile +from packaging.metadata import Metadata from packaging.requirements import Requirement from packaging.utils import BuildTag, canonicalize_name from packaging.version import Version +from . import dependencies from .request_session import session logger = logging.getLogger(__name__) -# fix for runtime errors caused by inheriting classes that are generic in stubs but not runtime -# https://mypy.readthedocs.io/en/latest/runtime_troubles.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime -if TYPE_CHECKING: - Metadata = Message[str, str] -else: - Metadata = Message - @dataclasses.dataclass(frozen=True, order=True, slots=True, repr=False, kw_only=True) class Candidate: @@ -73,11 +65,10 @@ def metadata(self) -> Metadata: return self._metadata def _get_dependencies(self) -> typing.Iterable[Requirement]: - deps = self.metadata.get_all("Requires-Dist", []) + deps = self.metadata.requires_dist or [] extras = self.extras if self.extras else [""] - for d in deps: - r = Requirement(d) + for r in deps: if r.marker is None: yield r else: @@ -95,7 +86,8 @@ def dependencies(self) -> list[Requirement]: @property def requires_python(self) -> str | None: - return self.metadata.get("Requires-Python") + spec = self.metadata.requires_python + return str(spec) if spec is not None else None def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadata: @@ -107,7 +99,7 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat metadata_url: Optional URL of the metadata file (PEP 658) Returns: - Parsed metadata as a Message object + Parsed metadata as a Metadata object """ # Try PEP 658 metadata endpoint first if available if metadata_url: @@ -119,8 +111,7 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat response.raise_for_status() # Parse metadata directly from the response content - p = BytesParser() - metadata = p.parse(BytesIO(response.content), headersonly=True) + metadata = dependencies.parse_metadata(response.content) logger.debug(f"Successfully retrieved metadata via PEP 658 for {url}") return metadata @@ -136,8 +127,8 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat with ZipFile(BytesIO(data)) as z: for n in z.namelist(): if n.endswith(".dist-info/METADATA"): - p = BytesParser() - return p.parse(z.open(n), headersonly=True) + metadata_content = z.read(n) + return dependencies.parse_metadata(metadata_content) - # If we didn't find the metadata, return an empty dict - return EmailMessage() + # If we didn't find the metadata, raise an error + raise ValueError(f"Could not find METADATA file in wheel: {url}") diff --git a/src/fromager/dependencies.py b/src/fromager/dependencies.py index bcc5932f..4a49db32 100644 --- a/src/fromager/dependencies.py +++ b/src/fromager/dependencies.py @@ -344,14 +344,23 @@ def default_get_install_dependencies_of_sdist( return set(metadata.requires_dist) -def parse_metadata(metadata_file: pathlib.Path, *, validate: bool = True) -> Metadata: - """Parse a dist-info/METADATA file +def parse_metadata( + metadata_source: pathlib.Path | bytes, *, validate: bool = True +) -> Metadata: + """Parse metadata from a file path or bytes. + + Args: + metadata_source: Path to METADATA file or bytes containing metadata + validate: Whether to validate metadata (default: True) - The default parse mode is 'strict'. It even fails for a mismatch of field - and core metadata version, e.g. a package with metadata 2.2 and - license-expression field (added in 2.4). + Returns: + Parsed Metadata object """ - return Metadata.from_email(metadata_file.read_bytes(), validate=validate) + if isinstance(metadata_source, pathlib.Path): + metadata_bytes = metadata_source.read_bytes() + else: + metadata_bytes = metadata_source + return Metadata.from_email(metadata_bytes, validate=validate) def pep517_metadata_of_sdist( diff --git a/tests/test_pep658_support.py b/tests/test_pep658_support.py index 771114ce..1534295a 100644 --- a/tests/test_pep658_support.py +++ b/tests/test_pep658_support.py @@ -56,10 +56,11 @@ def test_get_metadata_with_pep658_success(self, mock_session) -> None: metadata = get_metadata_for_wheel(wheel_url, metadata_url) # Verify the metadata was parsed correctly - assert metadata["Name"] == "test-package" - assert metadata["Version"] == "1.0.0" - assert metadata["Summary"] == "A test package" - assert "requests >= 2.0.0" in metadata.get_all("Requires-Dist", []) + assert metadata.name == "test-package" + assert str(metadata.version) == "1.0.0" + assert metadata.summary == "A test package" + assert metadata.requires_dist is not None + assert any(str(req) == "requests>=2.0.0" for req in metadata.requires_dist) # Verify only the metadata URL was called, not the wheel URL mock_session.get.assert_called_once_with(metadata_url)