From 1479b0412806501d80c7a2ed3a39aef179b46dc1 Mon Sep 17 00:00:00 2001 From: Jayant Date: Tue, 27 Jan 2026 11:04:08 +0530 Subject: [PATCH] Add pip wheel cache parser for origin.json files Fixes #4220 Implements PipCacheWheelHandler to parse origin.json files created by pip when caching built wheels. Extracts package metadata including name, version, download URL, and SHA256 hash. Signed-off-by: Jayant --- src/packagedcode/__init__.py | 1 + src/packagedcode/pypi.py | 74 +++++++++++++++++++ .../pip_cache/wheels/construct/origin.json | 1 + .../wheels/construct/origin.json.expected | 46 ++++++++++++ tests/packagedcode/test_pypi.py | 9 +++ 5 files changed, 131 insertions(+) create mode 100644 tests/packagedcode/data/pip_cache/wheels/construct/origin.json create mode 100644 tests/packagedcode/data/pip_cache/wheels/construct/origin.json.expected diff --git a/src/packagedcode/__init__.py b/src/packagedcode/__init__.py index d3c48b6e259..772b79e015d 100644 --- a/src/packagedcode/__init__.py +++ b/src/packagedcode/__init__.py @@ -171,6 +171,7 @@ pypi.PypiEggHandler, # pypi.PypiSdistArchiveHandler, pypi.PypiWheelHandler, + pypi.PipCacheWheelHandler, pypi.PyprojectTomlHandler, pypi.PoetryPyprojectTomlHandler, pypi.PoetryLockHandler, diff --git a/src/packagedcode/pypi.py b/src/packagedcode/pypi.py index b5588ed7ca9..56dfb56a1c8 100644 --- a/src/packagedcode/pypi.py +++ b/src/packagedcode/pypi.py @@ -2661,3 +2661,77 @@ def get_requirement_from_section(section, sub_section): packages, _ = get_requirements_txt_dependencies(location=location) for req in packages: yield req.extracted_requirement + + +class PipCacheWheelHandler(models.DatafileHandler): + + + datasource_id = 'pip_wheel_cache' + default_package_type = 'pypi' + default_primary_language = 'Python' + path_patterns = ('*/.cache/pip/wheels/*/*/*/*/origin.json',) + filetypes = ('json',) + + @classmethod + def parse(cls, location, package_only=False): + + with open(location, 'r') as f: + data = json.load(f) + + url = data.get('url', '') + archive_info = data.get('archive_info', {}) + sha256 = archive_info.get('hash', '').replace('sha256=', '') + + if url: + filename = posixpath.basename(url) + name_version = filename.replace('.tar.gz', '').replace('.zip', '') + + if '-' in name_version: + parts = name_version.rsplit('-', 1) + name = parts[0] + version = parts[1] if len(parts) > 1 else None + else: + name = name_version + version = None + else: + name = None + version = None + + package_data = dict( + datasource_id=cls.datasource_id, + type=cls.default_package_type, + name=name, + version=version, + download_url=url, + sha256=sha256, + ) + yield models.PackageData.from_data(package_data, package_only) + + @classmethod + def assemble(cls, package_data, resource, codebase, package_adder): + + name = package_data.get('name') + version = package_data.get('version') + + if not name: + return + + package = models.Package( + datasource_id=cls.datasource_id, + type=cls.default_package_type, + name=name, + version=version, + download_url=package_data.get('download_url'), + sha256=package_data.get('sha256'), + ) + + if version: + package.purl = PackageURL( + type='pypi', + name=name, + version=version, + ).to_string() + + package_adder(package.package_uid, package, resource) + + yield package diff --git a/tests/packagedcode/data/pip_cache/wheels/construct/origin.json b/tests/packagedcode/data/pip_cache/wheels/construct/origin.json new file mode 100644 index 00000000000..33ae2f5f623 --- /dev/null +++ b/tests/packagedcode/data/pip_cache/wheels/construct/origin.json @@ -0,0 +1 @@ +{"archive_info": {"hash": "sha256=7b2a3fd8e5f597a5aa1d614c3bd516fa065db01704c72a1efaaeec6ef23d8b45", "hashes": {"sha256": "7b2a3fd8e5f597a5aa1d614c3bd516fa065db01704c72a1efaaeec6ef23d8b45"}}, "url": "https://files.pythonhosted.org/packages/e0/b7/a4a032e94bcfdff481f2e6fecd472794d9da09f474a2185ed33b2c7cad64/construct-2.10.68.tar.gz"} \ No newline at end of file diff --git a/tests/packagedcode/data/pip_cache/wheels/construct/origin.json.expected b/tests/packagedcode/data/pip_cache/wheels/construct/origin.json.expected new file mode 100644 index 00000000000..6f659feed03 --- /dev/null +++ b/tests/packagedcode/data/pip_cache/wheels/construct/origin.json.expected @@ -0,0 +1,46 @@ +[ + { + "type": "pypi", + "namespace": null, + "name": "construct", + "version": "2.10.68", + "qualifiers": {}, + "subpath": null, + "primary_language": null, + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": null, + "download_url": "https://files.pythonhosted.org/packages/e0/b7/a4a032e94bcfdff481f2e6fecd472794d9da09f474a2185ed33b2c7cad64/construct-2.10.68.tar.gz", + "size": null, + "sha1": null, + "md5": null, + "sha256": "7b2a3fd8e5f597a5aa1d614c3bd516fa065db01704c72a1efaaeec6ef23d8b45", + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "holder": null, + "declared_license_expression": null, + "declared_license_expression_spdx": null, + "license_detections": [], + "other_license_expression": null, + "other_license_expression_spdx": null, + "other_license_detections": [], + "extracted_license_statement": null, + "notice_text": null, + "source_packages": [], + "file_references": [], + "is_private": false, + "is_virtual": false, + "extra_data": {}, + "dependencies": [], + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null, + "datasource_id": "pip_wheel_cache", + "purl": "pkg:pypi/construct@2.10.68" + } +] \ No newline at end of file diff --git a/tests/packagedcode/test_pypi.py b/tests/packagedcode/test_pypi.py index 3dcfa7d4268..96942d13e04 100644 --- a/tests/packagedcode/test_pypi.py +++ b/tests/packagedcode/test_pypi.py @@ -753,3 +753,12 @@ def test_parse_setup_py(test_loc): def test_parse_more_setup_py(test_loc): check_setup_py_parsing(test_loc) + +class TestPipCacheWheelHandler(PackageTester): + test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + + def test_parse_pip_cache_wheel(self): + test_file = self.get_test_loc('pip_cache/wheels/construct/origin.json') + expected_loc = self.get_test_loc('pip_cache/wheels/construct/origin.json.expected') + package = pypi.PipCacheWheelHandler.parse(test_file) + self.check_packages_data(package, expected_loc, regen=REGEN_TEST_FIXTURES)