diff --git a/tools/tests/metadata_parser/metdata.py b/tools/tests/metadata_parser/metdata.py index 75b5bb425..7bb9863b5 100644 --- a/tools/tests/metadata_parser/metdata.py +++ b/tools/tests/metadata_parser/metdata.py @@ -6,6 +6,9 @@ import itertools from paths import PRECICE_TESTS_DIR, PRECICE_TUTORIAL_DIR +# Import TutorialSource from systemtests.sources (used for external tutorial sources). +from systemtests.sources import TutorialSource + @dataclass class BuildArgument: @@ -279,13 +282,15 @@ def from_cases_tuple(cls, cases: Tuple[Case], tutorial: Tutorial): class ReferenceResult: path: Path case_combination: CaseCombination + base_dir: Optional[Path] = None def __repr__(self) -> str: return f"{self.path.as_posix()}" def __post_init__(self): # built full path - self.path = PRECICE_TUTORIAL_DIR / self.path + base = self.base_dir if self.base_dir is not None else PRECICE_TUTORIAL_DIR + self.path = Path(base) / self.path @dataclass @@ -299,6 +304,7 @@ class Tutorial: url: str participants: List[str] cases: List[Case] + source: "TutorialSource" = field(default_factory=TutorialSource.local) case_combinations: List[CaseCombination] = field(init=False) def __post_init__(self): @@ -355,13 +361,16 @@ def get_case_by_string(self, case_name: str) -> Optional[Case]: return None @classmethod - def from_yaml(cls, path, available_components): + def from_yaml(cls, path, available_components, base_dir=None, source=None): """ Creates a Tutorial instance from a YAML file. Args: - path: The path to the YAML file. + path: The path to the metadata.yaml file. available_components: The Components instance containing available components. + base_dir: Optional base directory for resolving tutorial path (for external sources). + Defaults to PRECICE_TUTORIAL_DIR. + source: Optional TutorialSource (for external tutorials). Returns: An instance of Tutorial. @@ -369,7 +378,8 @@ def from_yaml(cls, path, available_components): with open(path, 'r') as f: data = yaml.safe_load(f) name = data['name'] - path = PRECICE_TUTORIAL_DIR / data['path'] + base = base_dir if base_dir is not None else PRECICE_TUTORIAL_DIR + tutorial_path = Path(base) / data['path'] url = data['url'] participants = data.get('participants', []) cases_raw = data.get('cases', {}) @@ -377,7 +387,10 @@ def from_yaml(cls, path, available_components): for case_name in cases_raw.keys(): cases.append(Case.from_dict( case_name, cases_raw[case_name], available_components)) - return cls(name, path, url, participants, cases) + tut = cls(name, tutorial_path, url, participants, cases) + if source is not None: + tut.source = source + return tut class Tutorials(list): @@ -440,4 +453,4 @@ def from_path(cls, path): for yaml_path in yaml_files: tut = Tutorial.from_yaml(yaml_path, available_components) tutorials.append(tut) - return cls(tutorials) + return cls(tutorials) \ No newline at end of file diff --git a/tools/tests/systemtests/Systemtest.py b/tools/tests/systemtests/Systemtest.py index bfb1151cf..c0fba13e5 100644 --- a/tools/tests/systemtests/Systemtest.py +++ b/tools/tests/systemtests/Systemtest.py @@ -1,4 +1,5 @@ import subprocess +from .sources import resolve_tutorial_root, PRECICE_EXTERNAL_CACHE_DIR from typing import List, Dict, Optional from jinja2 import Environment, FileSystemLoader from dataclasses import dataclass, field @@ -299,28 +300,56 @@ def __copy_tutorial_into_directory(self, run_directory: Path): """ current_time_string = datetime.now().strftime('%Y-%m-%d %H:%M:%S') self.run_directory = run_directory - pr_requested = self.params_to_use.get("TUTORIALS_PR") - if pr_requested: - logging.debug(f"Fetching the PR {pr_requested} HEAD reference") - self._fetch_pr(PRECICE_TUTORIAL_DIR, pr_requested) - current_ref = self._get_git_ref(PRECICE_TUTORIAL_DIR) - ref_requested = self.params_to_use.get("TUTORIALS_REF") - if ref_requested: - logging.debug(f"Checking out tutorials {ref_requested} before copying") - self._fetch_ref(PRECICE_TUTORIAL_DIR, ref_requested) - self._checkout_ref_in_subfolder(PRECICE_TUTORIAL_DIR, self.tutorial.path, ref_requested) - - self.tutorial_folder = slugify(f'{self.tutorial.path.name}_{self.case_combination.cases}_{current_time_string}') + + # Only apply PR/ref overrides for LOCAL tutorials + if self.tutorial.source.type == "local": + pr_requested = self.params_to_use.get("TUTORIALS_PR") + if pr_requested: + logging.debug(f"Fetching the PR {pr_requested} HEAD reference") + self._fetch_pr(PRECICE_TUTORIAL_DIR, pr_requested) + + current_ref = self._get_git_ref(PRECICE_TUTORIAL_DIR) + + ref_requested = self.params_to_use.get("TUTORIALS_REF") + if ref_requested: + logging.debug(f"Checking out tutorials {ref_requested} before copying") + self._fetch_ref(PRECICE_TUTORIAL_DIR, ref_requested) + self._checkout_ref_in_subfolder( + PRECICE_TUTORIAL_DIR, + self.tutorial.path, + ref_requested, + ) + + # Create run directory name + self.tutorial_folder = slugify( + f"{self.tutorial.path.name}_{self.case_combination.cases}_{current_time_string}" + ) + destination = run_directory / self.tutorial_folder - src = self.tutorial.path + + # Resolve the actual tutorial root depending on source type + src = resolve_tutorial_root( + self.tutorial.path, + self.tutorial.source, + PRECICE_EXTERNAL_CACHE_DIR, + ) + self.system_test_dir = destination shutil.copytree(src, destination) - if ref_requested: - with open(destination / "tutorials_ref", 'w') as file: - file.write(ref_requested) - self._checkout_ref_in_subfolder(PRECICE_TUTORIAL_DIR, self.tutorial.path, current_ref) - + # Restore original ref if needed (local tutorials only) + if self.tutorial.source.type == "local": + ref_requested = self.params_to_use.get("TUTORIALS_REF") + if ref_requested: + with open(destination / "tutorials_ref", "w") as file: + file.write(ref_requested) + + current_ref = self._get_git_ref(PRECICE_TUTORIAL_DIR) + self._checkout_ref_in_subfolder( + PRECICE_TUTORIAL_DIR, + self.tutorial.path, + current_ref, + ) def __copy_tools(self, run_directory: Path): destination = run_directory / "tools" src = PRECICE_TOOLS_DIR diff --git a/tools/tests/systemtests/TestSuite.py b/tools/tests/systemtests/TestSuite.py index 9d8c2ac72..1f73a172c 100644 --- a/tools/tests/systemtests/TestSuite.py +++ b/tools/tests/systemtests/TestSuite.py @@ -1,6 +1,20 @@ from dataclasses import dataclass, field +from pathlib import Path from typing import Optional, List, Dict -from metadata_parser.metdata import Tutorials, Tutorial, Case, CaseCombination, ReferenceResult +from metadata_parser.metdata import ( + Tutorials, + Tutorial, + Case, + CaseCombination, + ReferenceResult, + Components, +) +from paths import PRECICE_TESTS_DIR +from systemtests.sources import ( + TutorialSource, + resolve_tutorial_root, + PRECICE_EXTERNAL_CACHE_DIR, +) import yaml @@ -42,6 +56,7 @@ def from_yaml(cls, path, parsed_tutorials: Tutorials): An instance of TestSuites. """ testsuites = [] + available_components = Components.from_yaml(PRECICE_TESTS_DIR / "components.yaml") with open(path, 'r') as f: data = yaml.safe_load(f) test_suites_raw = data['test_suites'] @@ -50,7 +65,28 @@ def from_yaml(cls, path, parsed_tutorials: Tutorials): reference_results_of_tutorial = {} # iterate over tutorials: for tutorial_case in test_suites_raw[test_suite_name]['tutorials']: + source = TutorialSource.from_dict(tutorial_case.get('source')) tutorial = parsed_tutorials.get_by_path(tutorial_case['path']) + if not tutorial and source.type != "local": + # External tutorial: fetch and load metadata + tutorial_root = resolve_tutorial_root( + Path(tutorial_case['path']), + source, + PRECICE_EXTERNAL_CACHE_DIR, + ) + metadata_path = tutorial_root / "metadata.yaml" + if not metadata_path.exists(): + raise FileNotFoundError( + f"No metadata.yaml found for external tutorial " + f"{tutorial_case['path']} at {tutorial_root}" + ) + tutorial = Tutorial.from_yaml( + metadata_path, + available_components, + base_dir=tutorial_root.parent, + source=source, + ) + parsed_tutorials.tutorials.append(tutorial) if not tutorial: raise Exception(f"No tutorial with path {tutorial_case['path']} found.") # initialize the datastructure for the new Testsuite @@ -63,8 +99,12 @@ def from_yaml(cls, path, parsed_tutorials: Tutorials): tutorial_case['case_combination'], tutorial) if case_combination_requested in all_case_combinations: case_combinations_of_tutorial[tutorial].append(case_combination_requested) + ref_base = tutorial.path.parent if source.type != "local" else None reference_results_of_tutorial[tutorial].append(ReferenceResult( - tutorial_case['reference_result'], case_combination_requested)) + Path(tutorial_case['reference_result']), + case_combination_requested, + base_dir=ref_base, + )) else: raise Exception( f"Could not find the following cases {tutorial_case['case-combination']} in the current metadata of tutorial {tutorial.name}") @@ -106,4 +146,4 @@ def __repr__(self) -> str: return_str = "" for tests_suite in self.testsuites: return_str += f"{tests_suite}\n\n" - return return_str + return return_str \ No newline at end of file diff --git a/tools/tests/systemtests/sources.py b/tools/tests/systemtests/sources.py new file mode 100644 index 000000000..f5fe39502 --- /dev/null +++ b/tools/tests/systemtests/sources.py @@ -0,0 +1,206 @@ +""" +Support for external tutorial sources (git, archive) in systemtests. +""" + +import hashlib +import logging +import os +import shutil +import subprocess +import tarfile +import tempfile +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + +# Cache directory for fetched tutorials. Can be overridden via PRECICE_EXTERNAL_CACHE_DIR env. +_DEFAULT_CACHE = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache")) / "precice-tutorials" +PRECICE_EXTERNAL_CACHE_DIR = Path(os.environ.get("PRECICE_EXTERNAL_CACHE_DIR", _DEFAULT_CACHE)) + + +@dataclass +class TutorialSource: + """Describes where a tutorial is sourced from.""" + + type: str # "local" | "git" | "archive" + url: Optional[str] = None + ref: Optional[str] = None + subdir: Optional[str] = None + + @classmethod + def local(cls) -> "TutorialSource": + return cls(type="local") + + @classmethod + def from_dict(cls, data: dict) -> "TutorialSource": + if data is None or data.get("type") == "local": + return cls.local() + return cls( + type=data["type"], + url=data.get("url"), + ref=data.get("ref"), + subdir=data.get("subdir"), + ) + + +def _cache_key(prefix: str, url: str, ref: Optional[str] = None, subdir: Optional[str] = None) -> str: + """Generate a short content-addressable cache key.""" + parts = [url] + if ref: + parts.append(ref) + if subdir: + parts.append(subdir) + raw = f"{prefix}:{':'.join(parts)}" + return hashlib.sha256(raw.encode()).hexdigest()[:16] + + +def fetch_git_repo(url: str, ref: str, cache_dir: Path, subdir: Optional[str] = None) -> Path: + """ + Clone or update a git repository and return the path to the checkout. + If subdir is given, returns the path to that subdirectory within the repo. + """ + cache_dir.mkdir(parents=True, exist_ok=True) + key = _cache_key("git", url, ref, subdir) + checkout = cache_dir / key + + if checkout.exists(): + try: + subprocess.run( + ["git", "-C", str(checkout), "fetch", "origin", ref, "--depth", "1"], + check=True, + capture_output=True, + timeout=120, + ) + subprocess.run( + ["git", "-C", str(checkout), "checkout", "FETCH_HEAD"], + check=True, + capture_output=True, + timeout=60, + ) + except subprocess.CalledProcessError as e: + logging.warning(f"Git fetch/checkout failed for {url}, recloning: {e}") + shutil.rmtree(checkout, ignore_errors=True) + + if not checkout.exists(): + result = subprocess.run( + ["git", "clone", "--depth", "1", "--branch", ref, url, str(checkout)], + capture_output=True, + text=True, + timeout=300, + ) + if result.returncode != 0: + # Fallback: branch may not exist (e.g. repo uses develop/master instead of main) + # Clone without branch, then fetch and checkout ref (with common aliases) + shutil.rmtree(checkout, ignore_errors=True) + logging.debug( + f"git clone --branch {ref} failed ({result.stderr}), trying clone + fetch" + ) + subprocess.run( + ["git", "clone", "--depth", "1", url, str(checkout)], + check=True, + capture_output=True, + timeout=300, + ) + refs_to_try = [ref] + if ref == "main": + refs_to_try.extend(["develop", "master"]) + elif ref == "master": + refs_to_try.extend(["main", "develop"]) + last_err = None + for r in refs_to_try: + res = subprocess.run( + ["git", "-C", str(checkout), "fetch", "origin", r, "--depth", "1"], + capture_output=True, + text=True, + timeout=120, + ) + if res.returncode == 0: + subprocess.run( + ["git", "-C", str(checkout), "checkout", "FETCH_HEAD"], + check=True, + capture_output=True, + timeout=60, + ) + break + last_err = res.stderr + else: + raise RuntimeError( + f"Could not fetch ref '{ref}' (tried {refs_to_try}): {last_err}" + ) + + if subdir: + subpath = checkout / subdir + if not subpath.is_dir(): + raise FileNotFoundError(f"Subdirectory {subdir} not found in {url} (ref {ref})") + return subpath + return checkout + + +def fetch_archive(url: str, cache_dir: Path, subdir: Optional[str] = None) -> Path: + """ + Download and extract an archive (tar.gz, tar, zip) and return the path. + """ + import urllib.request + + cache_dir.mkdir(parents=True, exist_ok=True) + key = _cache_key("archive", url, subdir=subdir) + extract_dir = cache_dir / key + + if extract_dir.exists(): + return extract_dir / subdir if subdir else extract_dir + + with tempfile.NamedTemporaryFile(delete=False, suffix=".tar.gz") as tmp: + tmp_path = Path(tmp.name) + try: + logging.info(f"Downloading {url}") + urllib.request.urlretrieve(url, tmp_path) + + extract_dir.mkdir(parents=True, exist_ok=True) + if url.endswith(".tar.gz") or url.endswith(".tgz") or url.endswith(".tar"): + with tarfile.open(tmp_path, "r:*") as tf: + tf.extractall(extract_dir) + else: + import zipfile + + with zipfile.ZipFile(tmp_path, "r") as zf: + zf.extractall(extract_dir) + finally: + tmp_path.unlink(missing_ok=True) + + if subdir: + subpath = extract_dir / subdir + if not subpath.is_dir(): + raise FileNotFoundError(f"Subdirectory {subdir} not found in {url}") + return subpath + return extract_dir + + +def resolve_tutorial_root( + path: Path, + source: TutorialSource, + cache_dir: Path, +) -> Path: + """ + Resolve the filesystem path to the tutorial root. + + For local sources, returns path as-is (already under PRECICE_TUTORIAL_DIR). + For git/archive sources, fetches the repository/archive and returns the path + to the tutorial directory. The tutorial name (path.name) is used as the + subdirectory within the fetched content. + """ + if source.type == "local": + return path + + if source.type == "git": + if not source.url or not source.ref: + raise ValueError("git source requires 'url' and 'ref'") + root = fetch_git_repo(source.url, source.ref, cache_dir, source.subdir) + return root / path.name + + if source.type == "archive": + if not source.url: + raise ValueError("archive source requires 'url'") + root = fetch_archive(source.url, cache_dir, source.subdir) + return root / path.name + + raise ValueError(f"Unknown source type: {source.type}")