From 014755533b023672ccd48854fe607b69f5ea2a5f Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Wed, 24 Sep 2025 14:10:28 -0700 Subject: [PATCH 1/3] pathogens: Workflows are registered independent of their `nextstrain run` compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Workflows are now registered under a top level "workflows" key in the nextstrain-pathogen.yaml file, and each workflow has its own "compatibility" declarations. The only current "compatibility" declaration remains "nextstrain run". This allows for more meta information about each workflow to be registered in one place not under the banner of compatibility. The nextstrain-pathogen.yaml schema changes to a workflow-first structure like: workflows: ingest: compatibility: nextstrain run: yes phylogenetic: compatibility: nextstrain run: no instead of a compatibility-first structure like: compatibility: nextstrain run: ingest: yes phylogenetic: no as previously introduced¹ (but not yet released). As it was never released, don't bother supporting the latter schema at all anymore. Change first arose out of discussion started by @victorlin in a related PR.² Subsequent discussion with @tsibley³ led to dropping support for the unreleased schema. ¹ ² ³ Co-authored-by: Thomas Sibley --- CHANGES.md | 7 +++--- doc/changes.md | 7 +++--- nextstrain/cli/command/run.py | 18 ++++++++++++-- nextstrain/cli/command/version.py | 6 ++--- nextstrain/cli/pathogens.py | 40 ++++++++++++++++++------------- 5 files changed, 50 insertions(+), 28 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 99aa7700..a57392c6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -27,9 +27,10 @@ supported Python version is always bundled with `nextstrain`. ## Improvements * `nextstrain setup ` and `nextstrain version --pathogens` now list - the available workflows for a pathogen if the pathogen lists the workflows - in the top-level `nextstrain-pathogen.yaml` file. - ([#461](https://github.com/nextstrain/cli/pull/461)) + the available workflows (e.g. `ingest`, `phylogenetic`) for a pathogen if the + workflows are registered as compatible with `nextstrain run` in the + pathogen's `nextstrain-pathogen.yaml` file. + ([#461](https://github.com/nextstrain/cli/pull/461), [#472](https://github.com/nextstrain/cli/pull/472)) * Snakemake's storage support downloaded files (stored in `.snakemake/storage/`) are now downloaded from AWS Batch builds by default. diff --git a/doc/changes.md b/doc/changes.md index 26892fb1..1a25096b 100644 --- a/doc/changes.md +++ b/doc/changes.md @@ -31,9 +31,10 @@ supported Python version is always bundled with `nextstrain`. ### Improvements * `nextstrain setup ` and `nextstrain version --pathogens` now list - the available workflows for a pathogen if the pathogen lists the workflows - in the top-level `nextstrain-pathogen.yaml` file. - ([#461](https://github.com/nextstrain/cli/pull/461)) + the available workflows (e.g. `ingest`, `phylogenetic`) for a pathogen if the + workflows are registered as compatible with `nextstrain run` in the + pathogen's `nextstrain-pathogen.yaml` file. + ([#461](https://github.com/nextstrain/cli/pull/461), [#472](https://github.com/nextstrain/cli/pull/472)) * Snakemake's storage support downloaded files (stored in `.snakemake/storage/`) are now downloaded from AWS Batch builds by default. diff --git a/nextstrain/cli/command/run.py b/nextstrain/cli/command/run.py index 4ba3a515..6a5520c9 100644 --- a/nextstrain/cli/command/run.py +++ b/nextstrain/cli/command/run.py @@ -33,7 +33,7 @@ from ..errors import UserError from ..pathogens import PathogenVersion, UnmanagedPathogen from ..runner import aws_batch, docker, singularity -from ..util import byte_quantity, split_image_name +from ..util import byte_quantity, split_image_name, warn from ..volume import NamedVolume from . import build @@ -241,7 +241,21 @@ def run(opts): debug(f"Treating {opts.pathogen!r} as unmanaged pathogen directory") if opts.workflow not in pathogen.registered_workflows(): - print(f"The {opts.workflow!r} workflow is not registered as a compatible workflow, but trying to run anyways.") + warn(cleandoc(f""" + The {opts.workflow!r} workflow is not registered by pathogen {opts.pathogen!r}! + + Trying to run it anyways (but it likely won't work)… + """)) + warn() + + elif opts.workflow not in pathogen.compatible_workflows("nextstrain run"): + warn(cleandoc(f""" + The {opts.workflow!r} workflow is registered by pathogen {opts.pathogen!r} + but not marked as compatible with `nextstrain run`! + + Trying to run it anyways (but it likely won't work)… + """)) + warn() workflow_directory = pathogen.workflow_path(opts.workflow) diff --git a/nextstrain/cli/command/version.py b/nextstrain/cli/command/version.py index 4ab8a925..b715d4b3 100644 --- a/nextstrain/cli/command/version.py +++ b/nextstrain/cli/command/version.py @@ -72,9 +72,9 @@ def run(opts): if opts.verbose: print(" " + str(version.path)) - if registered_workflows := version.registered_workflows(): - print(" " + "Available workflows:") - for workflow in registered_workflows: + if compatible_workflows := version.compatible_workflows("nextstrain run"): + print(" " + "Compatible workflows:") + for workflow in compatible_workflows: print(" " + workflow) else: print(" " + "No workflows listed, please refer to pathogen docs.") diff --git a/nextstrain/cli/pathogens.py b/nextstrain/cli/pathogens.py index 5281c3d4..6ae52b66 100644 --- a/nextstrain/cli/pathogens.py +++ b/nextstrain/cli/pathogens.py @@ -308,21 +308,33 @@ def __init__(self, name_version_url: str, new_setup: bool = False): def registered_workflows(self) -> Dict[str, Dict]: """ - Parses :attr:`.registration` to return a dict of registered - compatible workflows, where the keys are workflow names. + Parses :attr:`.registration` to return a dict of registered workflows, + where the keys are workflow names. """ if self.registration is None: debug("pathogen does not have a registration") return {} - workflows = self.registration.get("compatibility", {}).get("nextstrain run") + workflows = self.registration.get("workflows") if not isinstance(workflows, dict): - debug(f"pathogen registration.compatibility['nextstrain runs'] is not a dict (got a {type(workflows).__name__})") + debug(f"pathogen registration.workflows is not a dict (got a {type(workflows).__name__})") return {} return workflows + def compatible_workflows(self, feature: str) -> Dict[str, Dict]: + """ + Filters registered workflows to return a subset of workflows that are + compatible with the provided *feature*. + """ + return { + name: info + for name, info in self.registered_workflows().items() + if isinstance(info, dict) and info.get("compatibility", {}).get(feature) + } + + def workflow_path(self, workflow: str) -> Path: return self.path / workflow @@ -498,20 +510,13 @@ def test_compatibility() -> SetupTestResult: if self.registration is None: return msg + "\n(couldn't read registration)", False - try: - compatibility = self.registration["compatibility"]["nextstrain run"] - except (KeyError, IndexError, TypeError): - if DEBUGGING: - traceback.print_exc() - return msg + "\n(couldn't find 'compatibility: nextstrain run: …' field)", False - - if compatibility: - if workflows := self.registered_workflows(): - msg += f"\nAvailable workflows: {list(workflows.keys())}" - else: - msg += f"\nNo workflows listed, please refer to pathogen docs." + if not self.registered_workflows(): + return msg + "\n(no workflows registered)", False + + if not (compatible_workflows := self.compatible_workflows("nextstrain run")): + return msg + "\n(no workflows registered as compatible)", False - return msg, bool(compatibility) + return msg + f"\nCompatible workflows: {list(compatible_workflows.keys())}", True return [ ('downloaded', @@ -690,6 +695,7 @@ def __init__(self, path: str): self.registration = read_pathogen_registration(self.registration_path) registered_workflows = PathogenVersion.registered_workflows + compatible_workflows = PathogenVersion.compatible_workflows workflow_path = PathogenVersion.workflow_path def __str__(self) -> str: From d16f612fae5875f989ad70ea66568fab952317db Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Wed, 24 Sep 2025 14:10:35 -0700 Subject: [PATCH 2/3] version: Declutter pathogen workflow listing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit List the workflow names on the same line as the pathogen name@version when there are any workflows and emit nothing when there aren't. This fits more in the style of the rest of `nextstrain version` and is much less cluttered (more information dense), particularly considering that we, in short order too, expect ~all of our pathogens (and all their future versions) to have the same 2–3 workflows listed (nextclade, ingest, phylogenetic). Prompted by feedback from @tsibley.¹ ¹ Co-authored-by: Thomas Sibley --- nextstrain/cli/command/version.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/nextstrain/cli/command/version.py b/nextstrain/cli/command/version.py index b715d4b3..a1e58395 100644 --- a/nextstrain/cli/command/version.py +++ b/nextstrain/cli/command/version.py @@ -68,15 +68,11 @@ def run(opts): print(" " + name) for version in versions.values(): is_default = version == defaults.get(name) - print(" " + str(version) + (f"={version.url or ''}" if opts.verbose else ""), "(default)" if is_default else "") + compatible_workflows = version.compatible_workflows("nextstrain run") + print(" " + str(version) + (f"={version.url or ''}" if opts.verbose else "") + + (" (default)" if is_default else "") + + (f" {{{', '.join(compatible_workflows)}}}" if compatible_workflows else "")) if opts.verbose: print(" " + str(version.path)) - - if compatible_workflows := version.compatible_workflows("nextstrain run"): - print(" " + "Compatible workflows:") - for workflow in compatible_workflows: - print(" " + workflow) - else: - print(" " + "No workflows listed, please refer to pathogen docs.") else: print(" (none)") From 20c99d461e6a4ab35e1935983e4b197a64944254 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 24 Sep 2025 14:10:40 -0700 Subject: [PATCH 3/3] pathogens: Report compatible workflows as separate setup checks Helpful to ensure that their directories exist as expected to catch registration vs. reality mismatches. --- nextstrain/cli/pathogens.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/nextstrain/cli/pathogens.py b/nextstrain/cli/pathogens.py index 6ae52b66..9a204e89 100644 --- a/nextstrain/cli/pathogens.py +++ b/nextstrain/cli/pathogens.py @@ -513,10 +513,10 @@ def test_compatibility() -> SetupTestResult: if not self.registered_workflows(): return msg + "\n(no workflows registered)", False - if not (compatible_workflows := self.compatible_workflows("nextstrain run")): + if not self.compatible_workflows("nextstrain run"): return msg + "\n(no workflows registered as compatible)", False - return msg + f"\nCompatible workflows: {list(compatible_workflows.keys())}", True + return msg, True return [ ('downloaded', @@ -526,6 +526,9 @@ def test_compatibility() -> SetupTestResult: self.registration_path.is_file()), test_compatibility(), + + *((f'`nextstrain run` workflow {name!r} exists', self.workflow_path(name).is_dir()) + for name in self.compatible_workflows("nextstrain run")), ]