From 054e9cc04d6a1dcd05dc02ea588d6163f2d614f5 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 17 Sep 2025 15:53:40 -0700 Subject: [PATCH 1/2] run: Support unmanaged pathogen repositories with local paths Resolves: --- CHANGES.md | 9 ++++++++ doc/changes.md | 9 ++++++++ doc/commands/run.rst | 9 ++++++-- nextstrain/cli/command/run.py | 43 ++++++++++++++++++++++++----------- nextstrain/cli/pathogens.py | 40 ++++++++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 15 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 4fbe878f..4e1cd2cb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -39,6 +39,15 @@ supported Python version is always bundled with `nextstrain`. Batch job. ([#460](https://github.com/nextstrain/cli/pull/460)) +* `nextstrain run` now supports an alternative invocation where a path to a + local directory that is a pathogen repository can be given instead of a + pathogen name (and optionally version). This allows `nextstrain run` to be + used with pathogen repos managed outside of Nextstrain CLI (i.e. not by + `nextstrain setup` and `nextstrain update`), which can be useful for the + analysis directory support and local testing. The workflow to run is still + given separately by name (not path). + ([#476](https://github.com/nextstrain/cli/issues/476)) + ## Bug fixes * `nextstrain setup @` and `nextstrain update @` diff --git a/doc/changes.md b/doc/changes.md index 6c4e7fbc..f55863c4 100644 --- a/doc/changes.md +++ b/doc/changes.md @@ -43,6 +43,15 @@ supported Python version is always bundled with `nextstrain`. Batch job. ([#460](https://github.com/nextstrain/cli/pull/460)) +* `nextstrain run` now supports an alternative invocation where a path to a + local directory that is a pathogen repository can be given instead of a + pathogen name (and optionally version). This allows `nextstrain run` to be + used with pathogen repos managed outside of Nextstrain CLI (i.e. not by + `nextstrain setup` and `nextstrain update`), which can be useful for the + analysis directory support and local testing. The workflow to run is still + given separately by name (not path). + ([#476](https://github.com/nextstrain/cli/issues/476)) + (v-next-bug-fixes)= ### Bug fixes diff --git a/doc/commands/run.rst b/doc/commands/run.rst index a36b176b..81b70157 100644 --- a/doc/commands/run.rst +++ b/doc/commands/run.rst @@ -12,7 +12,7 @@ nextstrain run .. code-block:: none - usage: nextstrain run [options] [@] [ [ [...]]] + usage: nextstrain run [options] [@]| [ [ [...]]] nextstrain run --help @@ -44,12 +44,17 @@ positional arguments -.. option:: [@] +.. option:: [@]| The name (and optionally, version) of a previously set up pathogen. See :command-reference:`nextstrain setup`. If no version is specified, then the default version (if any) will be used. + Alternatively, the local path to a directory that is a pathogen + repository. For this case to be recognized as such, the path must + contain a separator (/) or consist entirely of the current + directory (.) or parent directory (..) specifier. + Required. .. option:: diff --git a/nextstrain/cli/command/run.py b/nextstrain/cli/command/run.py index d278fb95..b8b3e257 100644 --- a/nextstrain/cli/command/run.py +++ b/nextstrain/cli/command/run.py @@ -23,14 +23,15 @@ manage the run and download results after completion. """ +import os.path from inspect import cleandoc from shlex import quote as shquote from textwrap import dedent from .. import runner from ..argparse import add_extended_help_flags, MkDirectoryPath, SKIP_AUTO_DEFAULT_IN_HELP -from ..debug import DEBUGGING +from ..debug import DEBUGGING, debug from ..errors import UserError -from ..pathogens import PathogenVersion +from ..pathogens import PathogenVersion, UnmanagedPathogen from ..runner import aws_batch, docker, singularity from ..util import byte_quantity, split_image_name from ..volume import NamedVolume @@ -39,7 +40,7 @@ def register_parser(subparser): """ - %(prog)s [options] [@] [ [ [...]]] + %(prog)s [options] [@]| [ [ [...]]] %(prog)s --help """ @@ -48,14 +49,19 @@ def register_parser(subparser): # Positional parameters parser.add_argument( "pathogen", - metavar = "[@]", + metavar = "[@]|", help = cleandoc(f""" The name (and optionally, version) of a previously set up pathogen. See :command-reference:`nextstrain setup`. If no version is specified, then the default version (if any) will be used. + Alternatively, the local path to a directory that is a pathogen + repository. For this case to be recognized as such, the path must + contain a separator ({{path_sep}}) or consist entirely of the current + directory ({os.path.curdir}) or parent directory ({os.path.pardir}) specifier. + Required. - """)) + """.format(path_sep = " or ".join(sorted(set([os.path.sep, os.path.altsep or os.path.sep])))))) parser.add_argument( "workflow", @@ -226,7 +232,13 @@ def run(opts): """) # Resolve pathogen and workflow names to a local workflow directory. - pathogen = PathogenVersion(opts.pathogen) + try: + pathogen = UnmanagedPathogen(opts.pathogen) + except ValueError: + debug(f"Treating {opts.pathogen!r} as managed pathogen version") + pathogen = PathogenVersion(opts.pathogen) + else: + debug(f"Treating {opts.pathogen!r} as unmanaged pathogen directory") if opts.workflow not in pathogen.registered_workflows(): print(f"The {opts.workflow!r} workflow is not registered as a compatible workflow, but trying to run anyways.") @@ -234,13 +246,18 @@ def run(opts): workflow_directory = pathogen.workflow_path(opts.workflow) if not workflow_directory.is_dir() or not (workflow_directory / "Snakefile").is_file(): - raise UserError(f""" - No {opts.workflow!r} workflow for pathogen {opts.pathogen!r} found {f"in {str(workflow_directory)!r}" if DEBUGGING else "locally"}. - - Maybe you need to update to a newer version of the pathogen? - - Hint: to update the pathogen, run `nextstrain update {shquote(pathogen.name)}`. - """) + if isinstance(pathogen, UnmanagedPathogen): + raise UserError(f""" + No {opts.workflow!r} workflow for pathogen {opts.pathogen!r} found {f"in {str(workflow_directory)!r}" if DEBUGGING else "locally"}. + """) + else: + raise UserError(f""" + No {opts.workflow!r} workflow for pathogen {opts.pathogen!r} found {f"in {str(workflow_directory)!r}" if DEBUGGING else "locally"}. + + Maybe you need to update to a newer version of the pathogen? + + Hint: to update the pathogen, run `nextstrain update {shquote(pathogen.name)}`. + """) # The pathogen volume is the pathogen directory (i.e. repo). # The workflow volume is the workflow directory within the pathogen directory. diff --git a/nextstrain/cli/pathogens.py b/nextstrain/cli/pathogens.py index 7afbdc49..eaa43f3f 100644 --- a/nextstrain/cli/pathogens.py +++ b/nextstrain/cli/pathogens.py @@ -642,6 +642,46 @@ def __eq__(self, other) -> bool: and (self.url == other.url or self.url is None or other.url is None) +class UnmanagedPathogen: + """ + A local directory that's a pathogen repo, not managed by Nextstrain CLI. + + Used by ``nextstrain run``. Includes only the :cls:`PathogenVersion` API + surface that ``nextstrain run`` requires. + """ + path: Path + registration_path: Path + + registration: Optional[dict] = None + + def __init__(self, path: str): + spec = PathogenSpec.parse(path) + + if not spec.name or (spec.name not in set([os.path.curdir, os.path.pardir]) and not (set(spec.name) & set([os.path.sep, os.path.altsep or os.path.sep]))): + raise ValueError(f"the {spec.name!r} part of {path!r} does not look like a path") + + self.path = Path(path) + + if not self.path.is_dir(): + raise UserError(f""" + Path {str(self.path)!r} is not a directory (or does not exist). + """) + + self.registration_path = self.path / "nextstrain-pathogen.yaml" + + if self.registration_path.exists(): + self.registration = read_pathogen_registration(self.registration_path) + + registered_workflows = PathogenVersion.registered_workflows + workflow_path = PathogenVersion.workflow_path + + def __str__(self) -> str: + return str(self.path) + + def __repr__(self) -> str: + return f"" + + def every_pathogen_default_by_name(pathogens: Dict[str, Dict[str, PathogenVersion]] = None) -> Dict[str, PathogenVersion]: """ Scans file system to return a dict of :cls:`PathogenVersion` objects, From acd485d9145b072ae3c437aa2189986aaca247ad Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Tue, 23 Sep 2025 10:40:15 -0700 Subject: [PATCH 2/2] run: Explicit pass --directory to Snakemake to override "workdir:" directives in workflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suggested by @joverlee521 in review¹ to avoid writing into the pathogen/workflow source directories when non-compatible (or broken) workflows are used with `nextstrain run`. ¹ Co-authored-by: Jover Lee --- CHANGES.md | 10 ++++++++++ doc/changes.md | 10 ++++++++++ nextstrain/cli/command/run.py | 8 ++++++++ 3 files changed, 28 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 4e1cd2cb..5e3a9045 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -54,6 +54,16 @@ supported Python version is always bundled with `nextstrain`. no longer result in a 404 error if `` includes a slash and it is a valid version specifier. ([#459](https://github.com/nextstrain/cli/pull/459)) +* `nextstrain run` now overrides (i.e. suppresses) any ["workdir:" + directives](https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html) + in a workflow by explicitly setting the working directory when it invokes + Snakemake. This avoids writing files into the pathogen/workflow source + directories when non-compatible (or broken) workflows are used with + `nextstrain run` despite the warnings issued. Such workflows are more likely + to error and fail now early on rather than "succeed" but produce output files + in the wrong location. + ([#476](https://github.com/nextstrain/cli/issues/476)) + # 10.2.1.post1 (1 July 2025) _See also changes in 10.2.1 which was an unreleased version._ diff --git a/doc/changes.md b/doc/changes.md index f55863c4..b08429ca 100644 --- a/doc/changes.md +++ b/doc/changes.md @@ -59,6 +59,16 @@ supported Python version is always bundled with `nextstrain`. no longer result in a 404 error if `` includes a slash and it is a valid version specifier. ([#459](https://github.com/nextstrain/cli/pull/459)) +* `nextstrain run` now overrides (i.e. suppresses) any ["workdir:" + directives](https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html) + in a workflow by explicitly setting the working directory when it invokes + Snakemake. This avoids writing files into the pathogen/workflow source + directories when non-compatible (or broken) workflows are used with + `nextstrain run` despite the warnings issued. Such workflows are more likely + to error and fail now early on rather than "succeed" but produce output files + in the wrong location. + ([#476](https://github.com/nextstrain/cli/issues/476)) + (v10-2-1-post1)= ## 10.2.1.post1 (1 July 2025) diff --git a/nextstrain/cli/command/run.py b/nextstrain/cli/command/run.py index b8b3e257..4ba3a515 100644 --- a/nextstrain/cli/command/run.py +++ b/nextstrain/cli/command/run.py @@ -291,6 +291,14 @@ def run(opts): *(["--forceall"] if opts.force else []), + # Explicitly use Snakemake's current working directory as the + # workflow's workdir, overriding any "workdir:" directive the workflow + # may include. Snakemake uses the cwd by default in the absence of any + # "workdir:" directive, but we want to _always_ use it to avoid writing + # into the pathogen/workflow source directories if a non-compatible + # workflow is run. + "--directory=.", + # Workdir will be the analysis volume (/nextstrain/build in a # containerized runtime), so explicitly point to the Snakefile. "--snakefile=%s/Snakefile" % (