|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | + |
| 3 | +""" |
| 4 | +S3 object index with flavors filtering |
| 5 | +""" |
| 6 | + |
| 7 | +import base64 |
| 8 | +import json |
| 9 | +import logging |
| 10 | +import os |
| 11 | +import subprocess |
| 12 | +import time |
| 13 | +import yaml |
| 14 | +from typing import Any, Optional |
| 15 | + |
| 16 | +from ..flavors.parser import Parser |
| 17 | +from ..logger import LoggerSetup |
| 18 | +from .bucket import Bucket |
| 19 | + |
| 20 | + |
| 21 | +class S3ObjectIndex(object): |
| 22 | + """ |
| 23 | + S3 object index class with flavors filtering capabilities |
| 24 | +
|
| 25 | + :author: Garden Linux Maintainers |
| 26 | + :copyright: Copyright 2024 SAP SE |
| 27 | + :package: gardenlinux |
| 28 | + :subpackage: s3 |
| 29 | + :since: 0.9.0 |
| 30 | + :license: https://www.apache.org/licenses/LICENSE-2.0 |
| 31 | + Apache License, Version 2.0 |
| 32 | + """ |
| 33 | + |
| 34 | + def __init__( |
| 35 | + self, |
| 36 | + bucket_name: str, |
| 37 | + endpoint_url: Optional[str] = None, |
| 38 | + s3_resource_config: Optional[dict[str, Any]] = None, |
| 39 | + logger: Optional[logging.Logger] = None, |
| 40 | + ): |
| 41 | + """ |
| 42 | + Constructor __init__(S3ObjectIndex) |
| 43 | +
|
| 44 | + :param bucket_name: S3 bucket name |
| 45 | + :param endpoint_url: S3 endpoint URL |
| 46 | + :param s3_resource_config: Additional boto3 S3 config values |
| 47 | + :param logger: Logger instance |
| 48 | +
|
| 49 | + :since: 0.9.0 |
| 50 | + """ |
| 51 | + |
| 52 | + if logger is None or not logger.hasHandlers(): |
| 53 | + logger = LoggerSetup.get_logger("gardenlinux.s3") |
| 54 | + |
| 55 | + self._bucket = Bucket(bucket_name, endpoint_url, s3_resource_config) |
| 56 | + self._logger = logger |
| 57 | + |
| 58 | + def get_index( |
| 59 | + self, |
| 60 | + prefix: str, |
| 61 | + cache_file: Optional[str] = None, |
| 62 | + cache_ttl: int = 3600, |
| 63 | + ) -> dict[str, Any]: |
| 64 | + """ |
| 65 | + Get and cache S3 objects with an indexed list of objects. |
| 66 | +
|
| 67 | + :param prefix: Prefix for S3 objects |
| 68 | + :param cache_file: Path to cache file (optional, enables caching when provided) |
| 69 | + :param cache_ttl: Cache time-to-live in seconds |
| 70 | +
|
| 71 | + :returns: Dictionary containing 'index' and 'artifacts' keys |
| 72 | +
|
| 73 | + :since: 0.9.0 |
| 74 | + """ |
| 75 | + |
| 76 | + self._logger.debug(f"Getting object index for prefix: {prefix}") |
| 77 | + |
| 78 | + # Fetch directly if no caching |
| 79 | + if cache_file is None: |
| 80 | + artifacts = [s3_object.key for s3_object in self._bucket.objects.filter(Prefix=prefix).all()] |
| 81 | + self._logger.debug(f"Fetched {len(artifacts)} artifacts without caching") |
| 82 | + return {"index": self._build_index(artifacts), "artifacts": artifacts} |
| 83 | + |
| 84 | + # Check cache |
| 85 | + index_file = cache_file + ".index.json" |
| 86 | + if (os.path.exists(cache_file) and os.path.exists(index_file) and |
| 87 | + time.time() - os.path.getmtime(cache_file) < cache_ttl): |
| 88 | + try: |
| 89 | + with open(cache_file, "r") as f: |
| 90 | + artifacts = json.load(f) |
| 91 | + with open(index_file, "r") as f: |
| 92 | + index = json.load(f) |
| 93 | + self._logger.debug("Using cached object index") |
| 94 | + return {"index": index, "artifacts": artifacts} |
| 95 | + except (json.JSONDecodeError, IOError): |
| 96 | + self._logger.warning("Cache files corrupted, fetching fresh data") |
| 97 | + |
| 98 | + # Fetch from S3 and cache |
| 99 | + artifacts = [s3_object.key for s3_object in self._bucket.objects.filter(Prefix=prefix).all()] |
| 100 | + index = self._build_index(artifacts) |
| 101 | + |
| 102 | + self._logger.info(f"Fetched {len(artifacts)} artifacts from S3") |
| 103 | + |
| 104 | + # Save cache |
| 105 | + try: |
| 106 | + with open(cache_file, "w") as f: |
| 107 | + json.dump(artifacts, f) |
| 108 | + with open(index_file, "w") as f: |
| 109 | + json.dump(index, f) |
| 110 | + self._logger.debug("Saved object index to cache") |
| 111 | + except IOError: |
| 112 | + self._logger.warning("Failed to save cache files") |
| 113 | + |
| 114 | + return {"index": index, "artifacts": artifacts} |
| 115 | + |
| 116 | + def filter_by_commit( |
| 117 | + self, |
| 118 | + commit: str = "latest", |
| 119 | + prefix: str = "objects/", |
| 120 | + include_only_patterns: list = [], |
| 121 | + wildcard_excludes: list = [], |
| 122 | + only_build: bool = False, |
| 123 | + only_test: bool = False, |
| 124 | + only_test_platform: bool = False, |
| 125 | + only_publish: bool = False, |
| 126 | + filter_categories: list = [], |
| 127 | + exclude_categories: list = [], |
| 128 | + cache_file: Optional[str] = None, |
| 129 | + cache_ttl: int = 3600, |
| 130 | + ) -> dict[str, Any]: |
| 131 | + """ |
| 132 | + Filter S3 objects based on flavors.yaml from a specific commit. |
| 133 | +
|
| 134 | + :param commit: Git commit hash or "latest" |
| 135 | + :param prefix: S3 prefix for objects |
| 136 | + :param include_only_patterns: Include pattern list |
| 137 | + :param wildcard_excludes: Exclude wildcard list |
| 138 | + :param only_build: Return only build-enabled flavors |
| 139 | + :param only_test: Return only test-enabled flavors |
| 140 | + :param only_test_platform: Return only platform-test-enabled flavors |
| 141 | + :param only_publish: Return only flavors to be published |
| 142 | + :param filter_categories: List of categories to include |
| 143 | + :param exclude_categories: List of categories to exclude |
| 144 | + :param cache_file: Path to cache file (optional) |
| 145 | + :param cache_ttl: Cache time-to-live in seconds |
| 146 | +
|
| 147 | + :returns: Dictionary containing filtered flavors and artifacts |
| 148 | +
|
| 149 | + :since: 0.9.0 |
| 150 | + """ |
| 151 | + |
| 152 | + commit_short = commit[:8] if commit != "latest" else "latest" |
| 153 | + self._logger.debug(f"Filtering objects by commit {commit_short}") |
| 154 | + |
| 155 | + # Get flavors data from GitHub |
| 156 | + flavors_data = self._fetch_flavors_from_github(commit) |
| 157 | + if not flavors_data: |
| 158 | + self._logger.warning(f"Could not fetch flavors.yaml for commit {commit_short}") |
| 159 | + return {"filtered_combinations": [], "artifacts": [], "index": {}} |
| 160 | + |
| 161 | + # Parse flavors and apply filters |
| 162 | + try: |
| 163 | + parser = Parser(flavors_data, logger=self._logger) |
| 164 | + filtered_combinations = parser.filter( |
| 165 | + include_only_patterns=include_only_patterns, |
| 166 | + wildcard_excludes=wildcard_excludes, |
| 167 | + only_build=only_build, |
| 168 | + only_test=only_test, |
| 169 | + only_test_platform=only_test_platform, |
| 170 | + only_publish=only_publish, |
| 171 | + filter_categories=filter_categories, |
| 172 | + exclude_categories=exclude_categories, |
| 173 | + ) |
| 174 | + except Exception as e: |
| 175 | + self._logger.error(f"Error parsing flavors data: {e}") |
| 176 | + return {"filtered_combinations": [], "artifacts": [], "index": {}} |
| 177 | + |
| 178 | + # Get S3 objects |
| 179 | + s3_data = self.get_index(prefix, cache_file, cache_ttl) |
| 180 | + artifacts = s3_data.get("artifacts", []) |
| 181 | + index = s3_data.get("index", {}) |
| 182 | + |
| 183 | + # Filter artifacts based on flavors |
| 184 | + filtered_artifacts = self._filter_artifacts_by_flavors(artifacts, filtered_combinations) |
| 185 | + |
| 186 | + self._logger.info(f"Filtered {len(artifacts)} artifacts to {len(filtered_artifacts)} based on commit {commit_short}") |
| 187 | + |
| 188 | + return { |
| 189 | + "filtered_combinations": filtered_combinations, |
| 190 | + "filtered_artifacts": filtered_artifacts, |
| 191 | + "all_artifacts": artifacts, |
| 192 | + "index": index, |
| 193 | + "commit": commit, |
| 194 | + } |
| 195 | + |
| 196 | + def _fetch_flavors_from_github(self, commit: str) -> Optional[str]: |
| 197 | + """ |
| 198 | + Fetch flavors.yaml from GitHub for a specific commit. |
| 199 | +
|
| 200 | + :param commit: Git commit hash or "latest" |
| 201 | + :returns: Flavors YAML content as string, or None if failed |
| 202 | +
|
| 203 | + :since: 0.9.0 |
| 204 | + """ |
| 205 | + |
| 206 | + try: |
| 207 | + # Try flavors.yaml first |
| 208 | + api_path = "/repos/gardenlinux/gardenlinux/contents/flavors.yaml" |
| 209 | + if commit != "latest": |
| 210 | + api_path = f"{api_path}?ref={commit}" |
| 211 | + |
| 212 | + command = ["gh", "api", api_path] |
| 213 | + commit_short = commit[:8] if commit != "latest" else "latest" |
| 214 | + self._logger.debug(f"Fetching flavors.yaml from GitHub for commit {commit_short}") |
| 215 | + |
| 216 | + result = subprocess.run( |
| 217 | + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True |
| 218 | + ) |
| 219 | + |
| 220 | + if result.returncode != 0: |
| 221 | + self._logger.warning(f"Failed to fetch flavors.yaml: {result.stderr}") |
| 222 | + return None |
| 223 | + |
| 224 | + # Parse GitHub API response |
| 225 | + response_data = json.loads(result.stdout) |
| 226 | + if "content" not in response_data: |
| 227 | + self._logger.warning("No content field in GitHub API response") |
| 228 | + return None |
| 229 | + |
| 230 | + # Decode base64 content |
| 231 | + flavors_content = base64.b64decode(response_data["content"]).decode("utf-8") |
| 232 | + self._logger.debug(f"Successfully fetched flavors.yaml for commit {commit_short}") |
| 233 | + return flavors_content |
| 234 | + |
| 235 | + except subprocess.CalledProcessError as e: |
| 236 | + self._logger.error(f"GitHub CLI command failed: {e}") |
| 237 | + return None |
| 238 | + except json.JSONDecodeError as e: |
| 239 | + self._logger.error(f"Failed to parse GitHub API response: {e}") |
| 240 | + return None |
| 241 | + except Exception as e: |
| 242 | + self._logger.error(f"Unexpected error fetching flavors.yaml: {e}") |
| 243 | + return None |
| 244 | + |
| 245 | + def _filter_artifacts_by_flavors(self, artifacts: list[str], combinations: list[tuple]) -> list[str]: |
| 246 | + """ |
| 247 | + Filter artifacts based on flavor combinations. |
| 248 | +
|
| 249 | + :param artifacts: List of S3 artifact keys |
| 250 | + :param combinations: List of (arch, combination) tuples from flavors filter |
| 251 | + :returns: Filtered list of artifacts |
| 252 | +
|
| 253 | + :since: 0.9.0 |
| 254 | + """ |
| 255 | + |
| 256 | + if not combinations: |
| 257 | + return [] |
| 258 | + |
| 259 | + # Extract combination names (without arch prefix) |
| 260 | + combination_names = {combo[1] for combo in combinations} |
| 261 | + |
| 262 | + filtered_artifacts = [] |
| 263 | + for artifact in artifacts: |
| 264 | + if artifact.startswith("objects/"): |
| 265 | + # Format: objects/{cname}/filename |
| 266 | + parts = artifact.split("/") |
| 267 | + if len(parts) >= 3: |
| 268 | + cname = parts[1] |
| 269 | + # Check if this cname matches any of the filtered combinations |
| 270 | + if any(cname.startswith(combo_name) for combo_name in combination_names): |
| 271 | + filtered_artifacts.append(artifact) |
| 272 | + |
| 273 | + return filtered_artifacts |
| 274 | + |
| 275 | + def _build_index(self, objects: list[str]) -> dict[str, list[str]]: |
| 276 | + """ |
| 277 | + Build an index of objects for faster searching. |
| 278 | +
|
| 279 | + :param objects: List of object keys |
| 280 | + :returns: Dictionary index with simple objects list |
| 281 | + :since: 0.9.0 |
| 282 | + """ |
| 283 | + |
| 284 | + cnames = {obj.split("/")[1] for obj in objects if obj.startswith("objects/") and len(obj.split("/")) >= 3} |
| 285 | + self._logger.debug(f"Built index with {len(cnames)} unique objects") |
| 286 | + return {"objects": sorted(cnames)} |
0 commit comments