Skip to content

Commit 223b5c4

Browse files
committed
add S3ObjectIndex Class
1 parent aa9bcfd commit 223b5c4

2 files changed

Lines changed: 288 additions & 1 deletion

File tree

src/gardenlinux/s3/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@
66

77
from .bucket import Bucket
88
from .s3_artifacts import S3Artifacts
9+
from .s3_object_index import S3ObjectIndex
910

10-
__all__ = ["Bucket", "S3Artifacts"]
11+
__all__ = ["Bucket", "S3Artifacts", "S3ObjectIndex"]
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
# -*- coding: utf-8 -*-
2+
3+
"""
4+
S3 object index with flavors filtering
5+
"""
6+
7+
import base64
8+
import json
9+
import logging
10+
import os
11+
import subprocess
12+
import time
13+
import yaml
14+
from typing import Any, Optional
15+
16+
from ..flavors.parser import Parser
17+
from ..logger import LoggerSetup
18+
from .bucket import Bucket
19+
20+
21+
class S3ObjectIndex(object):
22+
"""
23+
S3 object index class with flavors filtering capabilities
24+
25+
:author: Garden Linux Maintainers
26+
:copyright: Copyright 2024 SAP SE
27+
:package: gardenlinux
28+
:subpackage: s3
29+
:since: 0.9.0
30+
:license: https://www.apache.org/licenses/LICENSE-2.0
31+
Apache License, Version 2.0
32+
"""
33+
34+
def __init__(
35+
self,
36+
bucket_name: str,
37+
endpoint_url: Optional[str] = None,
38+
s3_resource_config: Optional[dict[str, Any]] = None,
39+
logger: Optional[logging.Logger] = None,
40+
):
41+
"""
42+
Constructor __init__(S3ObjectIndex)
43+
44+
:param bucket_name: S3 bucket name
45+
:param endpoint_url: S3 endpoint URL
46+
:param s3_resource_config: Additional boto3 S3 config values
47+
:param logger: Logger instance
48+
49+
:since: 0.9.0
50+
"""
51+
52+
if logger is None or not logger.hasHandlers():
53+
logger = LoggerSetup.get_logger("gardenlinux.s3")
54+
55+
self._bucket = Bucket(bucket_name, endpoint_url, s3_resource_config)
56+
self._logger = logger
57+
58+
def get_index(
59+
self,
60+
prefix: str,
61+
cache_file: Optional[str] = None,
62+
cache_ttl: int = 3600,
63+
) -> dict[str, Any]:
64+
"""
65+
Get and cache S3 objects with an indexed list of objects.
66+
67+
:param prefix: Prefix for S3 objects
68+
:param cache_file: Path to cache file (optional, enables caching when provided)
69+
:param cache_ttl: Cache time-to-live in seconds
70+
71+
:returns: Dictionary containing 'index' and 'artifacts' keys
72+
73+
:since: 0.9.0
74+
"""
75+
76+
self._logger.debug(f"Getting object index for prefix: {prefix}")
77+
78+
# Fetch directly if no caching
79+
if cache_file is None:
80+
artifacts = [s3_object.key for s3_object in self._bucket.objects.filter(Prefix=prefix).all()]
81+
self._logger.debug(f"Fetched {len(artifacts)} artifacts without caching")
82+
return {"index": self._build_index(artifacts), "artifacts": artifacts}
83+
84+
# Check cache
85+
index_file = cache_file + ".index.json"
86+
if (os.path.exists(cache_file) and os.path.exists(index_file) and
87+
time.time() - os.path.getmtime(cache_file) < cache_ttl):
88+
try:
89+
with open(cache_file, "r") as f:
90+
artifacts = json.load(f)
91+
with open(index_file, "r") as f:
92+
index = json.load(f)
93+
self._logger.debug("Using cached object index")
94+
return {"index": index, "artifacts": artifacts}
95+
except (json.JSONDecodeError, IOError):
96+
self._logger.warning("Cache files corrupted, fetching fresh data")
97+
98+
# Fetch from S3 and cache
99+
artifacts = [s3_object.key for s3_object in self._bucket.objects.filter(Prefix=prefix).all()]
100+
index = self._build_index(artifacts)
101+
102+
self._logger.info(f"Fetched {len(artifacts)} artifacts from S3")
103+
104+
# Save cache
105+
try:
106+
with open(cache_file, "w") as f:
107+
json.dump(artifacts, f)
108+
with open(index_file, "w") as f:
109+
json.dump(index, f)
110+
self._logger.debug("Saved object index to cache")
111+
except IOError:
112+
self._logger.warning("Failed to save cache files")
113+
114+
return {"index": index, "artifacts": artifacts}
115+
116+
def filter_by_commit(
117+
self,
118+
commit: str = "latest",
119+
prefix: str = "objects/",
120+
include_only_patterns: list = [],
121+
wildcard_excludes: list = [],
122+
only_build: bool = False,
123+
only_test: bool = False,
124+
only_test_platform: bool = False,
125+
only_publish: bool = False,
126+
filter_categories: list = [],
127+
exclude_categories: list = [],
128+
cache_file: Optional[str] = None,
129+
cache_ttl: int = 3600,
130+
) -> dict[str, Any]:
131+
"""
132+
Filter S3 objects based on flavors.yaml from a specific commit.
133+
134+
:param commit: Git commit hash or "latest"
135+
:param prefix: S3 prefix for objects
136+
:param include_only_patterns: Include pattern list
137+
:param wildcard_excludes: Exclude wildcard list
138+
:param only_build: Return only build-enabled flavors
139+
:param only_test: Return only test-enabled flavors
140+
:param only_test_platform: Return only platform-test-enabled flavors
141+
:param only_publish: Return only flavors to be published
142+
:param filter_categories: List of categories to include
143+
:param exclude_categories: List of categories to exclude
144+
:param cache_file: Path to cache file (optional)
145+
:param cache_ttl: Cache time-to-live in seconds
146+
147+
:returns: Dictionary containing filtered flavors and artifacts
148+
149+
:since: 0.9.0
150+
"""
151+
152+
commit_short = commit[:8] if commit != "latest" else "latest"
153+
self._logger.debug(f"Filtering objects by commit {commit_short}")
154+
155+
# Get flavors data from GitHub
156+
flavors_data = self._fetch_flavors_from_github(commit)
157+
if not flavors_data:
158+
self._logger.warning(f"Could not fetch flavors.yaml for commit {commit_short}")
159+
return {"filtered_combinations": [], "artifacts": [], "index": {}}
160+
161+
# Parse flavors and apply filters
162+
try:
163+
parser = Parser(flavors_data, logger=self._logger)
164+
filtered_combinations = parser.filter(
165+
include_only_patterns=include_only_patterns,
166+
wildcard_excludes=wildcard_excludes,
167+
only_build=only_build,
168+
only_test=only_test,
169+
only_test_platform=only_test_platform,
170+
only_publish=only_publish,
171+
filter_categories=filter_categories,
172+
exclude_categories=exclude_categories,
173+
)
174+
except Exception as e:
175+
self._logger.error(f"Error parsing flavors data: {e}")
176+
return {"filtered_combinations": [], "artifacts": [], "index": {}}
177+
178+
# Get S3 objects
179+
s3_data = self.get_index(prefix, cache_file, cache_ttl)
180+
artifacts = s3_data.get("artifacts", [])
181+
index = s3_data.get("index", {})
182+
183+
# Filter artifacts based on flavors
184+
filtered_artifacts = self._filter_artifacts_by_flavors(artifacts, filtered_combinations)
185+
186+
self._logger.info(f"Filtered {len(artifacts)} artifacts to {len(filtered_artifacts)} based on commit {commit_short}")
187+
188+
return {
189+
"filtered_combinations": filtered_combinations,
190+
"filtered_artifacts": filtered_artifacts,
191+
"all_artifacts": artifacts,
192+
"index": index,
193+
"commit": commit,
194+
}
195+
196+
def _fetch_flavors_from_github(self, commit: str) -> Optional[str]:
197+
"""
198+
Fetch flavors.yaml from GitHub for a specific commit.
199+
200+
:param commit: Git commit hash or "latest"
201+
:returns: Flavors YAML content as string, or None if failed
202+
203+
:since: 0.9.0
204+
"""
205+
206+
try:
207+
# Try flavors.yaml first
208+
api_path = "/repos/gardenlinux/gardenlinux/contents/flavors.yaml"
209+
if commit != "latest":
210+
api_path = f"{api_path}?ref={commit}"
211+
212+
command = ["gh", "api", api_path]
213+
commit_short = commit[:8] if commit != "latest" else "latest"
214+
self._logger.debug(f"Fetching flavors.yaml from GitHub for commit {commit_short}")
215+
216+
result = subprocess.run(
217+
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
218+
)
219+
220+
if result.returncode != 0:
221+
self._logger.warning(f"Failed to fetch flavors.yaml: {result.stderr}")
222+
return None
223+
224+
# Parse GitHub API response
225+
response_data = json.loads(result.stdout)
226+
if "content" not in response_data:
227+
self._logger.warning("No content field in GitHub API response")
228+
return None
229+
230+
# Decode base64 content
231+
flavors_content = base64.b64decode(response_data["content"]).decode("utf-8")
232+
self._logger.debug(f"Successfully fetched flavors.yaml for commit {commit_short}")
233+
return flavors_content
234+
235+
except subprocess.CalledProcessError as e:
236+
self._logger.error(f"GitHub CLI command failed: {e}")
237+
return None
238+
except json.JSONDecodeError as e:
239+
self._logger.error(f"Failed to parse GitHub API response: {e}")
240+
return None
241+
except Exception as e:
242+
self._logger.error(f"Unexpected error fetching flavors.yaml: {e}")
243+
return None
244+
245+
def _filter_artifacts_by_flavors(self, artifacts: list[str], combinations: list[tuple]) -> list[str]:
246+
"""
247+
Filter artifacts based on flavor combinations.
248+
249+
:param artifacts: List of S3 artifact keys
250+
:param combinations: List of (arch, combination) tuples from flavors filter
251+
:returns: Filtered list of artifacts
252+
253+
:since: 0.9.0
254+
"""
255+
256+
if not combinations:
257+
return []
258+
259+
# Extract combination names (without arch prefix)
260+
combination_names = {combo[1] for combo in combinations}
261+
262+
filtered_artifacts = []
263+
for artifact in artifacts:
264+
if artifact.startswith("objects/"):
265+
# Format: objects/{cname}/filename
266+
parts = artifact.split("/")
267+
if len(parts) >= 3:
268+
cname = parts[1]
269+
# Check if this cname matches any of the filtered combinations
270+
if any(cname.startswith(combo_name) for combo_name in combination_names):
271+
filtered_artifacts.append(artifact)
272+
273+
return filtered_artifacts
274+
275+
def _build_index(self, objects: list[str]) -> dict[str, list[str]]:
276+
"""
277+
Build an index of objects for faster searching.
278+
279+
:param objects: List of object keys
280+
:returns: Dictionary index with simple objects list
281+
:since: 0.9.0
282+
"""
283+
284+
cnames = {obj.split("/")[1] for obj in objects if obj.startswith("objects/") and len(obj.split("/")) >= 3}
285+
self._logger.debug(f"Built index with {len(cnames)} unique objects")
286+
return {"objects": sorted(cnames)}

0 commit comments

Comments
 (0)