Skip to content

Commit c7397ac

Browse files
committed
fix: scanning of older artifacts / refactoring
1 parent 84c908c commit c7397ac

1 file changed

Lines changed: 68 additions & 29 deletions

File tree

stack_scanner/main.py

Lines changed: 68 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@
2828
MAX_AGE_DAYS = 180
2929
SECOBSERVE_API_BASE_URL = "https://secobserve-backend.stackable.tech"
3030
SECOBSERVE_SCANNER_IMAGE = "oci.stackable.tech/sandbox/secobserve-scanners:latest"
31+
DEV_RELEASE = "0.0.0-dev"
32+
33+
_PR_TAG_RE = re.compile(r"-pr\d+")
3134

3235
# Additional images to scan that are not part of the regular versioned release.
3336
# These are third-party or infrastructure images referenced by the Stackable platform.
@@ -62,18 +65,19 @@ def harbor_api_request(path: str, params: dict | None = None) -> list | dict | N
6265
return None
6366

6467

65-
def get_harbor_recent_tags(project: str, repository: str) -> list[str] | None:
66-
"""Return tags pushed within the last MAX_AGE_DAYS days for a Harbor repository.
68+
def _iter_harbor_tagged_artifacts(
69+
project: str, repository: str
70+
) -> list[tuple[datetime.datetime | None, list[str]]] | None:
71+
"""Paginate all tagged artifacts for a Harbor repository.
6772
68-
Tags belonging to artifacts that have no push_time metadata are included
69-
conservatively (i.e. treated as recent). Returns None when the Harbor API
70-
is unreachable so the caller can decide how to handle the failure.
73+
Returns a list of (push_time, tag_names) pairs, where push_time is None when
74+
the timestamp is missing or unparseable. PR-tagged artifacts are excluded.
75+
Returns None when the Harbor API is unreachable.
7176
"""
7277
encoded_repo = urllib.parse.quote(repository, safe="")
7378
path = f"/projects/{project}/repositories/{encoded_repo}/artifacts"
74-
cutoff = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=MAX_AGE_DAYS)
7579

76-
tags: list[str] = []
80+
result: list[tuple[datetime.datetime | None, list[str]]] = []
7781
page = 1
7882
page_size = 100
7983

@@ -89,30 +93,55 @@ def get_harbor_recent_tags(project: str, repository: str) -> list[str] | None:
8993
artifact_tags = [
9094
tag["name"]
9195
for tag in (artifact.get("tags") or [])
92-
if not re.search(r"-pr\d+", tag["name"])
96+
if not _PR_TAG_RE.search(tag["name"])
9397
]
9498
if not artifact_tags:
9599
continue
96100

101+
push_time: datetime.datetime | None = None
97102
push_time_str = artifact.get("push_time")
98-
if not push_time_str:
99-
# No push_time available, include conservatively.
100-
tags.extend(artifact_tags)
101-
continue
103+
if push_time_str:
104+
try:
105+
push_time = datetime.datetime.fromisoformat(push_time_str.replace("Z", "+00:00"))
106+
except ValueError:
107+
pass
102108

103-
try:
104-
push_time = datetime.datetime.fromisoformat(push_time_str.replace("Z", "+00:00"))
105-
if push_time >= cutoff:
106-
tags.extend(artifact_tags)
107-
except ValueError:
108-
# Unparseable timestamp, include conservatively.
109-
tags.extend(artifact_tags)
109+
result.append((push_time, artifact_tags))
110110

111111
if len(artifacts) < page_size:
112112
break
113113
page += 1
114114

115-
return tags
115+
return result
116+
117+
118+
def get_harbor_tags(
119+
project: str, repository: str
120+
) -> tuple[list[str], str | None] | None:
121+
"""Return (recent_tags, latest_tag) for a Harbor repository in a single API pass.
122+
123+
recent_tags contains tags pushed within the last MAX_AGE_DAYS days; artifacts
124+
without a parseable push_time are included conservatively. latest_tag is the
125+
tag from the most recently pushed artifact with a parseable timestamp, or None.
126+
Returns None when the Harbor API is unreachable.
127+
"""
128+
artifact_data = _iter_harbor_tagged_artifacts(project, repository)
129+
if artifact_data is None:
130+
return None
131+
132+
cutoff = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=MAX_AGE_DAYS)
133+
recent_tags: list[str] = []
134+
latest_tag: str | None = None
135+
latest_time: datetime.datetime | None = None
136+
137+
for push_time, artifact_tags in artifact_data:
138+
if push_time is None or push_time >= cutoff:
139+
recent_tags.extend(artifact_tags)
140+
if push_time is not None and (latest_time is None or push_time > latest_time):
141+
latest_time = push_time
142+
latest_tag = artifact_tags[0]
143+
144+
return recent_tags, latest_tag
116145

117146

118147
def get_latest_github_release(owner: str, repo: str) -> str | None:
@@ -245,20 +274,29 @@ def scan_additional_images(secobserve_api_token: str) -> None:
245274
product_name = image_config["product_name"]
246275

247276
print(f"Querying Harbor API for recent tags of {project}/{repository}...")
248-
tags = get_harbor_recent_tags(project, repository)
277+
result = get_harbor_tags(project, repository)
249278

250-
if tags is None:
279+
if result is None:
251280
print(
252281
f"WARNING: Harbor API unavailable for {project}/{repository}. "
253282
"Skipping – re-run once the registry is reachable."
254283
)
255284
continue
256285

257-
if not tags:
258-
print(f"No tags pushed within the last {MAX_AGE_DAYS} days for {project}/{repository}, skipping.")
286+
recent_tags, latest_tag = result
287+
if recent_tags:
288+
tags = recent_tags
289+
print(f"Found {len(tags)} recent tag(s) for {project}/{repository}: {tags}")
290+
elif latest_tag is not None:
291+
print(
292+
f"No tags pushed within the last {MAX_AGE_DAYS} days for {project}/{repository}, "
293+
"falling back to most recently pushed tag."
294+
)
295+
tags = [latest_tag]
296+
else:
297+
print(f"WARNING: No tagged artifacts found for {project}/{repository}, skipping.")
259298
continue
260299

261-
print(f"Found {len(tags)} recent tag(s) for {project}/{repository}: {tags}")
262300
for tag in tags:
263301
image = f"{REGISTRY_URL}/{project}/{repository}:{tag}"
264302
scan_image(secobserve_api_token, image, product_name, tag)
@@ -290,9 +328,7 @@ def main():
290328
else:
291329
secobserve_api_token = sys.argv[2]
292330
release = sys.argv[3]
293-
checkout = "tags/" + release
294-
if release == "0.0.0-dev":
295-
checkout = "main"
331+
checkout = "main" if release == DEV_RELEASE else "tags/" + release
296332

297333
subprocess.run(["git", "fetch", "--all"], cwd="docker-images")
298334
subprocess.run(["git", "checkout", checkout], cwd="docker-images")
@@ -382,7 +418,10 @@ def main():
382418
scan_additional_images(secobserve_api_token)
383419

384420
# Scan the latest stackablectl binary from GitHub releases.
385-
scan_stackablectl(secobserve_api_token)
421+
# Only run for the dev release to avoid redundant scans when multiple releases
422+
# are processed in the same workflow run (stackablectl is release-independent).
423+
if release == DEV_RELEASE:
424+
scan_stackablectl(secobserve_api_token)
386425

387426

388427
def scan_image(

0 commit comments

Comments
 (0)