Skip to content

Commit ea5c9de

Browse files
committed
Deduplicate SPDX IDs with hash suffixes
1 parent 4c00245 commit ea5c9de

File tree

3 files changed

+22
-12
lines changed

3 files changed

+22
-12
lines changed

sbom.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import typing
2626
import zipfile
2727
from pathlib import Path
28-
from typing import Any, NotRequired, TypedDict, cast
28+
from typing import Any, LiteralString, NotRequired, TypedDict, cast
2929
from urllib.request import urlopen
3030

3131

@@ -90,9 +90,19 @@ class CreationInfo(TypedDict):
9090
licenseListVersion: str
9191

9292

93-
def spdx_id(value: str) -> str:
93+
# Cache of values that we've seen already. We use this
94+
# to de-duplicate values and their corresponding SPDX ID.
95+
_SPDX_IDS_TO_VALUES = {}
96+
97+
98+
def spdx_id(value: LiteralString) -> str:
9499
"""Encode a value into characters that are valid in an SPDX ID"""
95-
return re.sub(r"[^a-zA-Z0-9.\-]+", "-", value)
100+
spdx_id = re.sub(r"[^a-zA-Z0-9.\-]+", "-", value)
101+
# To avoid collisions we append a hash suffix.
102+
suffix = hashlib.sha256(value.encode()).hexdigest()[:8]
103+
spdx_id = f"{spdx_id}-{suffix}"
104+
assert _SPDX_IDS_TO_VALUES.setdefault(spdx_id, value) == value
105+
return spdx_id
96106

97107

98108
def calculate_package_verification_codes(sbom: SBOM) -> None:

tests/sbom/sbom-with-pip-removed.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
"packages": [],
1414
"relationships": [
1515
{
16-
"relatedSpdxElement": "SPDXRef-FILE-Modules-expat-COPYING",
16+
"relatedSpdxElement": "SPDXRef-FILE-Modules-expat-COPYING-497fb0c3",
1717
"relationshipType": "CONTAINS",
18-
"spdxElementId": "SPDXRef-PACKAGE-expat"
18+
"spdxElementId": "SPDXRef-PACKAGE-expat-83b93528"
1919
}
2020
]
2121
}

tests/sbom/sbom-with-pip.json

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
"files": [],
1313
"packages": [
1414
{
15-
"SPDXID": "SPDXRef-PACKAGE-pip",
15+
"SPDXID": "SPDXRef-PACKAGE-pip-ced959c1",
1616
"name": "pip",
1717
"versionInfo": "24.0",
1818
"licenseConcluded": "MIT",
@@ -38,19 +38,19 @@
3838
],
3939
"relationships": [
4040
{
41-
"relatedSpdxElement": "SPDXRef-FILE-Modules-expat-COPYING",
41+
"relatedSpdxElement": "SPDXRef-FILE-Modules-expat-COPYING-497fb0c3",
4242
"relationshipType": "CONTAINS",
43-
"spdxElementId": "SPDXRef-PACKAGE-expat"
43+
"spdxElementId": "SPDXRef-PACKAGE-expat-83b93528"
4444
},
4545
{
46-
"relatedSpdxElement": "SPDXRef-PACKAGE-urllib3",
46+
"relatedSpdxElement": "SPDXRef-PACKAGE-urllib3-b7a198af",
4747
"relationshipType": "DEPENDS_ON",
48-
"spdxElementId": "SPDXRef-PACKAGE-pip"
48+
"spdxElementId": "SPDXRef-PACKAGE-pip-ced959c1"
4949
},
5050
{
51-
"relatedSpdxElement": "SPDXRef-PACKAGE-pip",
51+
"relatedSpdxElement": "SPDXRef-PACKAGE-pip-ced959c1",
5252
"relationshipType": "DEPENDS_ON",
53-
"spdxElementId": "SPDXRef-PACKAGE-cpython"
53+
"spdxElementId": "SPDXRef-PACKAGE-cpython-608f998c"
5454
}
5555
]
5656
}

0 commit comments

Comments
 (0)