From 4426407fe869ccbc093c24ddac3de2f7c1754b7b Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 10 Feb 2025 15:34:38 -0600 Subject: [PATCH 1/2] Disambiguate SPDXIDs when merging source and externals SBOMs --- sbom.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/sbom.py b/sbom.py index 42130e4f..009c15c3 100644 --- a/sbom.py +++ b/sbom.py @@ -223,6 +223,38 @@ def recursive_sort_in_place(value: list[Any] | dict[str, Any]) -> None: recursive_sort_in_place(cast(dict[str, Any], sbom_data)) +def check_sbom_data(sbom_data): + """Check SBOM data for common issues""" + + def check_id_duplicates(sbom_components: list[dict[str, typing.Any]]) -> set[str]: + all_ids = set() + for sbom_component in sbom_components: + sbom_component_id = sbom_component["SPDXID"] + assert sbom_component_id not in all_ids + all_ids.add(sbom_component_id) + return all_ids + + all_package_ids = check_id_duplicates(sbom_data["packages"]) + all_file_ids = check_id_duplicates(sbom_data["files"]) + + # Check that no files and packages have the same ID. + assert not all_package_ids.intersection(all_file_ids) + all_sbom_ids = all_package_ids | all_file_ids + + # Check that all relationships use existing IDs. + for sbom_relationship in sbom_data["relationships"]: + + # The exception being 'DESCRIBES' with the meta 'document' ID + if ( + sbom_relationship["spdxElementId"] == "SPDXRef-DOCUMENT" + and sbom_relationship["relationshipType"] == "DESCRIBES" + ): + continue + + assert sbom_relationship["spdxElementId"] in all_sbom_ids + assert sbom_relationship["relatedSpdxElement"] in all_sbom_ids + + def fetch_package_metadata_from_pypi( project: str, version: str, filename: str | None = None ) -> tuple[str, str]: @@ -686,6 +718,11 @@ def create_sbom_for_windows_artifact( with (cpython_source_dir / "Misc/sbom.spdx.json").open() as f: source_sbom_data = json.loads(f.read()) for sbom_package in source_sbom_data["packages"]: + # Update the SPDX ID to avoid collisions with + # the 'externals' SBOM. + sbom_package["SPDXID"] = spdx_id( + f"SPDXRef-PACKAGE-{sbom_package['name']}-{sbom_package['versionInfo']}" + ) sbom_data["packages"].append(sbom_package) create_cpython_sbom( @@ -755,6 +792,10 @@ def main() -> None: # Normalize SBOM data for reproducibility. normalize_sbom_data(sbom_data) + + # Check SBOM for validity. + check_sbom_data(sbom_data) + with open(artifact_path + ".spdx.json", mode="w") as f: f.truncate() f.write(json.dumps(sbom_data, indent=2, sort_keys=True)) From dfdd221a7c6e3cf74987908223c104f5ea2dcdc4 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 10 Feb 2025 16:29:04 -0600 Subject: [PATCH 2/2] Add type annotations to 'check_sbom_data' --- sbom.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sbom.py b/sbom.py index 009c15c3..9d972fac 100644 --- a/sbom.py +++ b/sbom.py @@ -223,10 +223,10 @@ def recursive_sort_in_place(value: list[Any] | dict[str, Any]) -> None: recursive_sort_in_place(cast(dict[str, Any], sbom_data)) -def check_sbom_data(sbom_data): +def check_sbom_data(sbom_data: SBOM) -> None: """Check SBOM data for common issues""" - def check_id_duplicates(sbom_components: list[dict[str, typing.Any]]) -> set[str]: + def check_id_duplicates(sbom_components: list[Package] | list[File]) -> set[str]: all_ids = set() for sbom_component in sbom_components: sbom_component_id = sbom_component["SPDXID"]