2828MAX_AGE_DAYS = 180
2929SECOBSERVE_API_BASE_URL = "https://secobserve-backend.stackable.tech"
3030SECOBSERVE_SCANNER_IMAGE = "oci.stackable.tech/sandbox/secobserve-scanners:latest"
31+ DEV_RELEASE = "0.0.0-dev"
32+
33+ _PR_TAG_RE = re .compile (r"-pr\d+" )
3134
3235# Additional images to scan that are not part of the regular versioned release.
3336# These are third-party or infrastructure images referenced by the Stackable platform.
@@ -62,18 +65,19 @@ def harbor_api_request(path: str, params: dict | None = None) -> list | dict | N
6265 return None
6366
6467
65- def get_harbor_recent_tags (project : str , repository : str ) -> list [str ] | None :
66- """Return tags pushed within the last MAX_AGE_DAYS days for a Harbor repository.
68+ def _iter_harbor_tagged_artifacts (
69+ project : str , repository : str
70+ ) -> list [tuple [datetime .datetime | None , list [str ]]] | None :
71+ """Paginate all tagged artifacts for a Harbor repository.
6772
68- Tags belonging to artifacts that have no push_time metadata are included
69- conservatively (i.e. treated as recent). Returns None when the Harbor API
70- is unreachable so the caller can decide how to handle the failure .
73+ Returns a list of (push_time, tag_names) pairs, where push_time is None when
74+ the timestamp is missing or unparseable. PR-tagged artifacts are excluded.
75+ Returns None when the Harbor API is unreachable .
7176 """
7277 encoded_repo = urllib .parse .quote (repository , safe = "" )
7378 path = f"/projects/{ project } /repositories/{ encoded_repo } /artifacts"
74- cutoff = datetime .datetime .now (datetime .timezone .utc ) - datetime .timedelta (days = MAX_AGE_DAYS )
7579
76- tags : list [str ] = []
80+ result : list [tuple [ datetime . datetime | None , list [ str ]] ] = []
7781 page = 1
7882 page_size = 100
7983
@@ -89,30 +93,55 @@ def get_harbor_recent_tags(project: str, repository: str) -> list[str] | None:
8993 artifact_tags = [
9094 tag ["name" ]
9195 for tag in (artifact .get ("tags" ) or [])
92- if not re .search (r"-pr\d+" , tag ["name" ])
96+ if not _PR_TAG_RE .search (tag ["name" ])
9397 ]
9498 if not artifact_tags :
9599 continue
96100
101+ push_time : datetime .datetime | None = None
97102 push_time_str = artifact .get ("push_time" )
98- if not push_time_str :
99- # No push_time available, include conservatively.
100- tags .extend (artifact_tags )
101- continue
103+ if push_time_str :
104+ try :
105+ push_time = datetime .datetime .fromisoformat (push_time_str .replace ("Z" , "+00:00" ))
106+ except ValueError :
107+ pass
102108
103- try :
104- push_time = datetime .datetime .fromisoformat (push_time_str .replace ("Z" , "+00:00" ))
105- if push_time >= cutoff :
106- tags .extend (artifact_tags )
107- except ValueError :
108- # Unparseable timestamp, include conservatively.
109- tags .extend (artifact_tags )
109+ result .append ((push_time , artifact_tags ))
110110
111111 if len (artifacts ) < page_size :
112112 break
113113 page += 1
114114
115- return tags
115+ return result
116+
117+
118+ def get_harbor_tags (
119+ project : str , repository : str
120+ ) -> tuple [list [str ], str | None ] | None :
121+ """Return (recent_tags, latest_tag) for a Harbor repository in a single API pass.
122+
123+ recent_tags contains tags pushed within the last MAX_AGE_DAYS days; artifacts
124+ without a parseable push_time are included conservatively. latest_tag is the
125+ tag from the most recently pushed artifact with a parseable timestamp, or None.
126+ Returns None when the Harbor API is unreachable.
127+ """
128+ artifact_data = _iter_harbor_tagged_artifacts (project , repository )
129+ if artifact_data is None :
130+ return None
131+
132+ cutoff = datetime .datetime .now (datetime .timezone .utc ) - datetime .timedelta (days = MAX_AGE_DAYS )
133+ recent_tags : list [str ] = []
134+ latest_tag : str | None = None
135+ latest_time : datetime .datetime | None = None
136+
137+ for push_time , artifact_tags in artifact_data :
138+ if push_time is None or push_time >= cutoff :
139+ recent_tags .extend (artifact_tags )
140+ if push_time is not None and (latest_time is None or push_time > latest_time ):
141+ latest_time = push_time
142+ latest_tag = artifact_tags [0 ]
143+
144+ return recent_tags , latest_tag
116145
117146
118147def get_latest_github_release (owner : str , repo : str ) -> str | None :
@@ -245,20 +274,29 @@ def scan_additional_images(secobserve_api_token: str) -> None:
245274 product_name = image_config ["product_name" ]
246275
247276 print (f"Querying Harbor API for recent tags of { project } /{ repository } ..." )
248- tags = get_harbor_recent_tags (project , repository )
277+ result = get_harbor_tags (project , repository )
249278
250- if tags is None :
279+ if result is None :
251280 print (
252281 f"WARNING: Harbor API unavailable for { project } /{ repository } . "
253282 "Skipping – re-run once the registry is reachable."
254283 )
255284 continue
256285
257- if not tags :
258- print (f"No tags pushed within the last { MAX_AGE_DAYS } days for { project } /{ repository } , skipping." )
286+ recent_tags , latest_tag = result
287+ if recent_tags :
288+ tags = recent_tags
289+ print (f"Found { len (tags )} recent tag(s) for { project } /{ repository } : { tags } " )
290+ elif latest_tag is not None :
291+ print (
292+ f"No tags pushed within the last { MAX_AGE_DAYS } days for { project } /{ repository } , "
293+ "falling back to most recently pushed tag."
294+ )
295+ tags = [latest_tag ]
296+ else :
297+ print (f"WARNING: No tagged artifacts found for { project } /{ repository } , skipping." )
259298 continue
260299
261- print (f"Found { len (tags )} recent tag(s) for { project } /{ repository } : { tags } " )
262300 for tag in tags :
263301 image = f"{ REGISTRY_URL } /{ project } /{ repository } :{ tag } "
264302 scan_image (secobserve_api_token , image , product_name , tag )
@@ -290,9 +328,7 @@ def main():
290328 else :
291329 secobserve_api_token = sys .argv [2 ]
292330 release = sys .argv [3 ]
293- checkout = "tags/" + release
294- if release == "0.0.0-dev" :
295- checkout = "main"
331+ checkout = "main" if release == DEV_RELEASE else "tags/" + release
296332
297333 subprocess .run (["git" , "fetch" , "--all" ], cwd = "docker-images" )
298334 subprocess .run (["git" , "checkout" , checkout ], cwd = "docker-images" )
@@ -382,7 +418,10 @@ def main():
382418 scan_additional_images (secobserve_api_token )
383419
384420 # Scan the latest stackablectl binary from GitHub releases.
385- scan_stackablectl (secobserve_api_token )
421+ # Only run for the dev release to avoid redundant scans when multiple releases
422+ # are processed in the same workflow run (stackablectl is release-independent).
423+ if release == DEV_RELEASE :
424+ scan_stackablectl (secobserve_api_token )
386425
387426
388427def scan_image (
0 commit comments