Skip to content

Commit 68f6fcc

Browse files
committed
feat: add support for scanning additional images from Harbor API
1 parent 70eceed commit 68f6fcc

2 files changed

Lines changed: 126 additions & 2 deletions

File tree

docker-images

Submodule docker-images updated 332 files

stack_scanner/main.py

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1+
import datetime
12
import tempfile
23
import os
34
import subprocess
45
import sys
56
import json
67
import base64
8+
import urllib.error
9+
import urllib.parse
10+
import urllib.request
711

812
excluded_products = [
913
"hello-world",
@@ -19,6 +23,122 @@
1923
]
2024

2125
REGISTRY_URL = "oci.stackable.tech"
26+
HARBOR_API_BASE = f"https://{REGISTRY_URL}/api/v2.0"
27+
MAX_AGE_DAYS = 180
28+
29+
# Additional images to scan that are not part of the regular versioned release.
30+
# These are third-party or infrastructure images referenced by the Stackable platform.
31+
ADDITIONAL_IMAGES = [
32+
{"project": "sdp", "repository": "csi-node-driver-registrar", "product_name": "csi-node-driver-registrar"},
33+
{"project": "sdp", "repository": "csi-provisioner", "product_name": "csi-provisioner"},
34+
{"project": "sdp", "repository": "git-sync/git-sync", "product_name": "git-sync"},
35+
{"project": "sdp", "repository": "stackable-ui", "product_name": "stackable-ui"},
36+
{"project": "sdp", "repository": "spark-connect-client", "product_name": "spark-connect-client"},
37+
]
38+
39+
40+
def harbor_api_request(path: str, params: dict | None = None) -> list | dict | None:
41+
"""Make a request to the Harbor API and return parsed JSON, or None on failure."""
42+
url = f"{HARBOR_API_BASE}{path}"
43+
if params:
44+
url += "?" + urllib.parse.urlencode(params)
45+
46+
request = urllib.request.Request(url)
47+
48+
username = os.environ.get("HARBOR_USERNAME")
49+
password = os.environ.get("HARBOR_PASSWORD")
50+
if username and password:
51+
credentials = base64.b64encode(f"{username}:{password}".encode()).decode()
52+
request.add_header("Authorization", f"Basic {credentials}")
53+
54+
try:
55+
with urllib.request.urlopen(request) as response:
56+
return json.loads(response.read().decode())
57+
except (urllib.error.URLError, json.JSONDecodeError) as error:
58+
print(f"Harbor API request failed for {path}: {error}")
59+
return None
60+
61+
62+
def get_harbor_recent_tags(project: str, repository: str) -> list[str] | None:
63+
"""Return tags pushed within the last MAX_AGE_DAYS days for a Harbor repository.
64+
65+
Tags belonging to artifacts that have no push_time metadata are included
66+
conservatively (i.e. treated as recent). Returns None when the Harbor API
67+
is unreachable so the caller can decide how to handle the failure.
68+
"""
69+
encoded_repo = urllib.parse.quote(repository, safe="")
70+
path = f"/projects/{project}/repositories/{encoded_repo}/artifacts"
71+
cutoff = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=MAX_AGE_DAYS)
72+
73+
tags: list[str] = []
74+
page = 1
75+
page_size = 100
76+
77+
while True:
78+
artifacts = harbor_api_request(path, {"page": page, "page_size": page_size, "with_tag": "true"})
79+
if artifacts is None:
80+
return None
81+
82+
if not artifacts:
83+
break
84+
85+
for artifact in artifacts:
86+
artifact_tags = [tag["name"] for tag in (artifact.get("tags") or [])]
87+
if not artifact_tags:
88+
continue
89+
90+
push_time_str = artifact.get("push_time")
91+
if not push_time_str:
92+
# No push_time available, include conservatively.
93+
tags.extend(artifact_tags)
94+
continue
95+
96+
try:
97+
push_time = datetime.datetime.fromisoformat(push_time_str.replace("Z", "+00:00"))
98+
if push_time >= cutoff:
99+
tags.extend(artifact_tags)
100+
except ValueError:
101+
# Unparseable timestamp, include conservatively.
102+
tags.extend(artifact_tags)
103+
104+
if len(artifacts) < page_size:
105+
break
106+
page += 1
107+
108+
return tags
109+
110+
111+
def scan_additional_images(secobserve_api_token: str) -> None:
112+
"""Scan additional images that are not part of the regular versioned Stackable release.
113+
114+
For each image the Harbor API is queried for tags pushed within the last
115+
MAX_AGE_DAYS days. If the API is unreachable the image is skipped with a
116+
warning; if individual artifacts lack push_time metadata their tags are
117+
included conservatively.
118+
"""
119+
for image_config in ADDITIONAL_IMAGES:
120+
project = image_config["project"]
121+
repository = image_config["repository"]
122+
product_name = image_config["product_name"]
123+
124+
print(f"Querying Harbor API for recent tags of {project}/{repository}...")
125+
tags = get_harbor_recent_tags(project, repository)
126+
127+
if tags is None:
128+
print(
129+
f"WARNING: Harbor API unavailable for {project}/{repository}. "
130+
"Skipping – re-run once the registry is reachable."
131+
)
132+
continue
133+
134+
if not tags:
135+
print(f"No tags pushed within the last {MAX_AGE_DAYS} days for {project}/{repository}, skipping.")
136+
continue
137+
138+
print(f"Found {len(tags)} recent tag(s) for {project}/{repository}: {tags}")
139+
for tag in tags:
140+
image = f"{REGISTRY_URL}/{project}/{repository}:{tag}"
141+
scan_image(secobserve_api_token, image, product_name, tag)
22142

23143

24144
def main():
@@ -64,7 +184,6 @@ def main():
64184
"druid",
65185
"hbase",
66186
"hdfs",
67-
"hello-world",
68187
"hive",
69188
"kafka",
70189
"listener",
@@ -136,6 +255,11 @@ def main():
136255
f"{product_version}-{arch}",
137256
)
138257

258+
# Scan additional infrastructure/third-party images using Harbor API tag discovery.
259+
# This runs once (not per-arch) because tags from Harbor include the arch suffix
260+
# already or are arch-agnostic manifests.
261+
scan_additional_images(secobserve_api_token)
262+
139263

140264
def scan_image(
141265
secobserve_api_token: str,

0 commit comments

Comments
 (0)