From 10afc59b829daecfd7f731e6d8a1a9f0492f18c5 Mon Sep 17 00:00:00 2001 From: royalvedant Date: Mon, 26 Jan 2026 05:37:19 +0530 Subject: [PATCH 1/2] Fix licensedb URLs and start modernization cleanup --- src/scancode/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scancode/api.py b/src/scancode/api.py index 71382f4a6a..c81fc156d3 100644 --- a/src/scancode/api.py +++ b/src/scancode/api.py @@ -142,7 +142,7 @@ def get_urls(location, threshold=50, **kwargs): SPDX_LICENSE_URL = 'https://spdx.org/licenses/{}' DEJACODE_LICENSE_URL = 'https://enterprise.dejacode.com/urn/urn:dje:license:{}' SCANCODE_LICENSEDB_URL = 'https://scancode-licensedb.aboutcode.org/{}' -SCANCODE_DATA_BASE_URL = 'https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data' +SCANCODE_DATA_BASE_URL = 'https://raw.githubusercontent.com/nexB/scancode-toolkit/develop/src/licensedcode/data' SCANCODE_LICENSE_URL = f'{SCANCODE_DATA_BASE_URL}/licenses/{{}}.LICENSE' SCANCODE_RULE_URL = f'{SCANCODE_DATA_BASE_URL}/rules/{{}}' From 157540bf103ba6ade22fe5ecb26f93211e1da2d9 Mon Sep 17 00:00:00 2001 From: royalvedant Date: Mon, 26 Jan 2026 16:38:50 +0530 Subject: [PATCH 2/2] Add support for pylock.toml (fixes #4638) --- src/packagedcode/__init__.py | 4 ++ src/packagedcode/pypi.py | 38 +++++++++++++++++++ src/packagedcode/python.pyx | 6 +++ src/scancode/api.py | 17 +++++++++ src/scancode/pylock.py | 18 +++++++++ src/scancode/pylock.toml | 5 +++ .../packagedcode/data/pypi/pylock/pylock.toml | 5 +++ .../pypi/pylock/pylock.toml-expected.json | 37 ++++++++++++++++++ tests/packagedcode/test_pypi.py | 14 +++++++ 9 files changed, 144 insertions(+) create mode 100644 src/packagedcode/python.pyx create mode 100644 src/scancode/pylock.py create mode 100644 src/scancode/pylock.toml create mode 100644 tests/packagedcode/data/pypi/pylock/pylock.toml create mode 100644 tests/packagedcode/data/pypi/pylock/pylock.toml-expected.json diff --git a/src/packagedcode/__init__.py b/src/packagedcode/__init__.py index d3c48b6e25..076b458b64 100644 --- a/src/packagedcode/__init__.py +++ b/src/packagedcode/__init__.py @@ -34,12 +34,15 @@ from packagedcode import phpcomposer from packagedcode import pubspec from packagedcode import pypi +from packagedcode import pypi from packagedcode import readme from packagedcode import rpm from packagedcode import rubygems from packagedcode import swift from packagedcode import win_pe from packagedcode import windows +from packagedcode.pylock import parse_pylock + if on_linux: from packagedcode import msi @@ -212,6 +215,7 @@ # These are handlers for deplock generated files pypi.PipInspectDeplockHandler, + pypi.PylockTomlHandler, ] if on_linux: diff --git a/src/packagedcode/pypi.py b/src/packagedcode/pypi.py index b5588ed7ca..2cb8cd5b64 100644 --- a/src/packagedcode/pypi.py +++ b/src/packagedcode/pypi.py @@ -832,6 +832,44 @@ def parse(cls, location, package_only=False): yield models.PackageData.from_data(package_data, package_only) +class PylockTomlHandler(models.DatafileHandler): + datasource_id = 'pypi_pylock_toml' + path_patterns = ('*pylock.toml',) + default_package_type = 'pypi' + default_primary_language = 'Python' + description = 'Python pylock.toml' + documentation_url = 'https://github.com/nexB/scancode-toolkit' + + @classmethod + def parse(cls, location, package_only=False): + pylock_data = parse_pylock(location) + if not pylock_data: + return + + dependencies = [] + for package_name, package_info in pylock_data.get('package', {}).items(): + version = package_info.get('version') + purl = PackageURL(type='pypi', name=package_name, version=version) + dependency = models.DependentPackage( + purl=purl.to_string(), + extracted_requirement=f'{package_name}=={version}' if version else package_name, + scope='install', + is_runtime=True, + is_optional=False, + is_direct=True, + is_pinned=bool(version), + ) + dependencies.append(dependency.to_dict()) + + package_data = dict( + datasource_id=cls.datasource_id, + type=cls.default_package_type, + primary_language='Python', + dependencies=dependencies, + extra_data=pylock_data, + ) + yield models.PackageData.from_data(package_data, package_only) + class PipInspectDeplockHandler(models.DatafileHandler): datasource_id = 'pypi_inspect_deplock' path_patterns = ('*pip-inspect.deplock',) diff --git a/src/packagedcode/python.pyx b/src/packagedcode/python.pyx new file mode 100644 index 0000000000..9fad45bf1d --- /dev/null +++ b/src/packagedcode/python.pyx @@ -0,0 +1,6 @@ +requirements + + + + + diff --git a/src/scancode/api.py b/src/scancode/api.py index c81fc156d3..1833ec83ae 100644 --- a/src/scancode/api.py +++ b/src/scancode/api.py @@ -16,6 +16,7 @@ from commoncode.hash import multi_checksums from scancode import ScancodeError from typecode.contenttype import get_type +from scancode.pylock import parse_pylock TRACE = os.environ.get('SCANCODE_DEBUG_API', False) @@ -333,9 +334,25 @@ def get_package_data( **kwargs, ) or [] + # get pylock data from the `pylock.toml` file + pylock_data = get_pylock_data(location) + + if pylock_data: + package_datas.append(pylock_data) + return dict(package_data=[pd.to_dict() for pd in package_datas]) +def get_pylock_data(location): + """ + Return a mapping of pylock data from the `pylock.toml` file at `location`. + """ + pylock_location = os.path.join(location, "pylock.toml") + if os.path.exists(pylock_location): + return parse_pylock(pylock_location) + return {} + + def get_file_info(location, **kwargs): """ Return a mapping of file information collected for the file at `location`. diff --git a/src/scancode/pylock.py b/src/scancode/pylock.py new file mode 100644 index 0000000000..091cab290b --- /dev/null +++ b/src/scancode/pylock.py @@ -0,0 +1,18 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import tomli + +def parse_pylock(location): + """ + Parse a pylock.toml file and return its content. + """ + with open(location, "rb") as fp: + data = tomli.load(fp) + return data diff --git a/src/scancode/pylock.toml b/src/scancode/pylock.toml new file mode 100644 index 0000000000..a93b09157a --- /dev/null +++ b/src/scancode/pylock.toml @@ -0,0 +1,5 @@ +[package.requests] +version = "2.31.0" + +[package.numpy] +version = "1.26.0" diff --git a/tests/packagedcode/data/pypi/pylock/pylock.toml b/tests/packagedcode/data/pypi/pylock/pylock.toml new file mode 100644 index 0000000000..a93b09157a --- /dev/null +++ b/tests/packagedcode/data/pypi/pylock/pylock.toml @@ -0,0 +1,5 @@ +[package.requests] +version = "2.31.0" + +[package.numpy] +version = "1.26.0" diff --git a/tests/packagedcode/data/pypi/pylock/pylock.toml-expected.json b/tests/packagedcode/data/pypi/pylock/pylock.toml-expected.json new file mode 100644 index 0000000000..c72cee407f --- /dev/null +++ b/tests/packagedcode/data/pypi/pylock/pylock.toml-expected.json @@ -0,0 +1,37 @@ +[ + { + "datasource_id": "pypi_pylock_toml", + "type": "pypi", + "primary_language": "Python", + "dependencies": [ + { + "purl": "pkg:pypi/requests@2.31.0", + "extracted_requirement": "requests==2.31.0", + "scope": "install", + "is_runtime": true, + "is_optional": false, + "is_direct": true, + "is_pinned": true + }, + { + "purl": "pkg:pypi/numpy@1.26.0", + "extracted_requirement": "numpy==1.26.0", + "scope": "install", + "is_runtime": true, + "is_optional": false, + "is_direct": true, + "is_pinned": true + } + ], + "extra_data": { + "package": { + "requests": { + "version": "2.31.0" + }, + "numpy": { + "version": "1.26.0" + } + } + } + } +] \ No newline at end of file diff --git a/tests/packagedcode/test_pypi.py b/tests/packagedcode/test_pypi.py index 3dcfa7d426..f013a93fab 100644 --- a/tests/packagedcode/test_pypi.py +++ b/tests/packagedcode/test_pypi.py @@ -15,6 +15,7 @@ from commoncode.system import on_windows from packagedcode import pypi +from scancode.pylock import parse_pylock from packages_test_utils import check_result_equals_expected_json from packages_test_utils import PackageTester from scancode_config import REGEN_TEST_FIXTURES @@ -428,6 +429,19 @@ def test_parse_pip_inspect_deplock_univers(self): class TestPipRequirementsFileHandler(PackageTester): test_data_dir = os.path.join(os.path.dirname(__file__), 'data') +class TestPylockTomlHandler(PackageTester): + test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + + def test_is_pylock_toml(self): + test_file = self.get_test_loc('pypi/pylock/pylock.toml') + assert pypi.PylockTomlHandler.is_datafile(test_file) + + def test_parse_pylock_toml(self): + test_file = self.get_test_loc('pypi/pylock/pylock.toml') + package = pypi.PylockTomlHandler.parse(test_file) + expected_loc = self.get_test_loc('pypi/pylock/pylock.toml-expected.json') + self.check_packages_data(package, expected_loc, regen=REGEN_TEST_FIXTURES) + def test_python_requirements_is_package_data_file(self): test_file = self.get_test_loc('pypi/requirements_txt/basic/requirements.txt') assert pypi.PipRequirementsFileHandler.is_datafile(test_file)