diff --git a/CHANGES.md b/CHANGES.md index 65b4b136..8017a72f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,10 +15,15 @@ development source code and as such may not be routinely kept up to date. ## Improvements +* `nextstrain run` supports running workflows with defined Snakefiles and + configfiles in the `nextstrain-pathogen.yaml` file. This is mainly relevant + for maintainers for pathogens and does not affect users of `nextstrain run`. + ([#462](https://github.com/nextstrain/cli/pull/462)) + * `nextstrain setup ` and `nextstrain version --pathogens` now list the available workflows for a pathogen if the pathogen lists the workflows in the top-level `nextstrain-pathogen.yaml` file. - ([#461](https://github.com/nextstrain/cli/pull/461)) + ([#461](https://github.com/nextstrain/cli/pull/461), [#472](https://github.com/nextstrain/cli/pull/472)) * Snakemake's storage support downloaded files (stored in `.snakemake/storage/`) are now downloaded from AWS Batch builds by default. diff --git a/doc/changes.md b/doc/changes.md index 1a009e3a..c9e2a839 100644 --- a/doc/changes.md +++ b/doc/changes.md @@ -19,10 +19,15 @@ development source code and as such may not be routinely kept up to date. (v-next-improvements)= ### Improvements +* `nextstrain run` supports running workflows with defined Snakefiles and + configfiles in the `nextstrain-pathogen.yaml` file. This is mainly relevant + for maintainers for pathogens and does not affect users of `nextstrain run`. + ([#462](https://github.com/nextstrain/cli/pull/462)) + * `nextstrain setup ` and `nextstrain version --pathogens` now list the available workflows for a pathogen if the pathogen lists the workflows in the top-level `nextstrain-pathogen.yaml` file. - ([#461](https://github.com/nextstrain/cli/pull/461)) + ([#461](https://github.com/nextstrain/cli/pull/461), [#472](https://github.com/nextstrain/cli/pull/472)) * Snakemake's storage support downloaded files (stored in `.snakemake/storage/`) are now downloaded from AWS Batch builds by default. diff --git a/nextstrain/cli/command/run.py b/nextstrain/cli/command/run.py index d278fb95..fe6e7e17 100644 --- a/nextstrain/cli/command/run.py +++ b/nextstrain/cli/command/run.py @@ -228,30 +228,54 @@ def run(opts): # Resolve pathogen and workflow names to a local workflow directory. pathogen = PathogenVersion(opts.pathogen) - if opts.workflow not in pathogen.registered_workflows(): + if opts.workflow not in pathogen.compatible_workflows("nextstrain run"): print(f"The {opts.workflow!r} workflow is not registered as a compatible workflow, but trying to run anyways.") - workflow_directory = pathogen.workflow_path(opts.workflow) + workflow_files = pathogen.workflow_files(opts.workflow) + workflow_snakefile = workflow_files["snakefile"] - if not workflow_directory.is_dir() or not (workflow_directory / "Snakefile").is_file(): + if not workflow_snakefile.is_file(): raise UserError(f""" - No {opts.workflow!r} workflow for pathogen {opts.pathogen!r} found {f"in {str(workflow_directory)!r}" if DEBUGGING else "locally"}. + No {opts.workflow!r} workflow for pathogen {opts.pathogen!r} found {f"(Snakefile {workflow_snakefile!r} does not exist)" if DEBUGGING else "locally"}. Maybe you need to update to a newer version of the pathogen? Hint: to update the pathogen, run `nextstrain update {shquote(pathogen.name)}`. """) + if workflow_configfile := workflow_files["configfile"]: + assert workflow_configfile.is_file(), \ + f"Workflow's registered config file {workflow_configfile!r} does not exist." + # The pathogen volume is the pathogen directory (i.e. repo). - # The workflow volume is the workflow directory within the pathogen directory. # The build volume is the user's analysis directory and will be the working directory. - pathogen_volume, workflow_volume = build.pathogen_volumes(workflow_directory, name = "pathogen") + pathogen_volume, _ = build.pathogen_volumes(pathogen.path, name = "pathogen") build_volume = NamedVolume("build", opts.analysis_directory) # for containerized runtimes (e.g. Docker, Singularity, and AWS Batch) opts.volumes.append(pathogen_volume) opts.volumes.append(build_volume) + # Resolve paths for workflow files + resolved_pathogen = ( + docker.mount_point(pathogen_volume) + if opts.__runner__ in {docker, singularity, aws_batch} else + pathogen_volume.src.resolve(strict = True) + ) + resolved_snakefile = resolved_pathogen / workflow_snakefile.relative_to(pathogen.path) + resolved_configfile = None + if workflow_configfile: + resolved_configfile = resolved_pathogen / workflow_configfile.relative_to(pathogen.path) + + resolved_overlay = None + if (opts.analysis_directory / "config.yaml").is_file(): + resolved_build = ( + docker.mount_point(build_volume) + if opts.__runner__ in {docker, singularity, aws_batch} else + build_volume.src.resolve(strict = True) + ) + resolved_overlay = resolved_build / "config.yaml" + print(f"Running the {opts.workflow!r} workflow for pathogen {pathogen}") # Set up Snakemake invocation. @@ -276,11 +300,15 @@ def run(opts): # Workdir will be the analysis volume (/nextstrain/build in a # containerized runtime), so explicitly point to the Snakefile. - "--snakefile=%s/Snakefile" % ( - docker.mount_point(workflow_volume) - if opts.__runner__ in {docker, singularity, aws_batch} else - workflow_volume.src.resolve(strict = True)), + "--snakefile=%s" % (resolved_snakefile), + + *(["--configfile=%s" % (resolved_configfile)] + if resolved_configfile else []), + # Ensure the overlay config in the user's analysis directory + # overrides any default config file provided above. + *(["--configfile=%s" % (resolved_overlay)] + if resolved_overlay else []), # Pass thru appropriate resource options. # # Snakemake requires the --cores option as of 5.11, so provide a diff --git a/nextstrain/cli/command/version.py b/nextstrain/cli/command/version.py index 4ab8a925..1c0a8db0 100644 --- a/nextstrain/cli/command/version.py +++ b/nextstrain/cli/command/version.py @@ -72,9 +72,9 @@ def run(opts): if opts.verbose: print(" " + str(version.path)) - if registered_workflows := version.registered_workflows(): - print(" " + "Available workflows:") - for workflow in registered_workflows: + if compatible_workflows := version.compatible_workflows("nextstrain run"): + print(" " + "`nextstrain run` compatible workflows:") + for workflow in compatible_workflows: print(" " + workflow) else: print(" " + "No workflows listed, please refer to pathogen docs.") diff --git a/nextstrain/cli/pathogens.py b/nextstrain/cli/pathogens.py index 7afbdc49..d8fd5cb2 100644 --- a/nextstrain/cli/pathogens.py +++ b/nextstrain/cli/pathogens.py @@ -308,24 +308,56 @@ def __init__(self, name_version_url: str, new_setup: bool = False): def registered_workflows(self) -> Dict[str, Dict]: """ Parses :attr:`.registration` to return a dict of registered - compatible workflows, where the keys are workflow names. + workflows, where the keys are workflow names. """ if self.registration is None: debug("pathogen does not have a registration") return {} - workflows = self.registration.get("compatibility", {}).get("nextstrain run") + workflows = self.registration.get("workflows") if not isinstance(workflows, dict): - debug(f"pathogen registration.compatibility['nextstrain runs'] is not a dict (got a {type(workflows).__name__})") + debug(f"pathogen registration.workflows is not a dict (got a {type(workflows).__name__})") return {} return workflows + def compatible_workflows(self, feature: str) -> Dict[str, Dict]: + """ + Parses registered workflows to return a subset of workflows that are + compatible with the provided *feature*. + """ + return { + workflow: workflow_config + for workflow, workflow_config in self.registered_workflows().items() + if workflow_config.get("compatibility", {}).get(feature, False) + } + + def workflow_path(self, workflow: str) -> Path: return self.path / workflow + def workflow_files(self, workflow: str) -> Dict: + """ + Parses :attr:`.registration` to get the path to a *workflow* files, + snakefile and configfile. + """ + files = { + "snakefile": self.workflow_path(workflow) / "Snakefile", + "configfile": None, + } + + if workflow_registration := self.registered_workflows().get(workflow): + if snakefile := workflow_registration.get("snakefile"): + files["snakefile"] = self.path / snakefile + + if configfile := workflow_registration.get("configfile"): + files["configfile"] = self.path / configfile + + return files + + def setup(self, dry_run: bool = False, force: bool = False) -> SetupStatus: """ Downloads and installs this pathogen version from :attr:`.url`. @@ -481,6 +513,11 @@ def test_compatibility() -> SetupTestResult: if self.registration is None: return msg + "\n(couldn't read registration)", False + if compatible_workflows := self.compatible_workflows("nextstrain run"): + return msg + f"\nCompatible workflows: {list(compatible_workflows.keys())}", True + + # If no compatible workflows are listed, then check for the top level + # boolean compatibility declaration try: compatibility = self.registration["compatibility"]["nextstrain run"] except (KeyError, IndexError, TypeError): @@ -488,13 +525,7 @@ def test_compatibility() -> SetupTestResult: traceback.print_exc() return msg + "\n(couldn't find 'compatibility: nextstrain run: …' field)", False - if compatibility: - if workflows := self.registered_workflows(): - msg += f"\nAvailable workflows: {list(workflows.keys())}" - else: - msg += f"\nNo workflows listed, please refer to pathogen docs." - - return msg, bool(compatibility) + return msg + "\nNo compatible workflows listed, please refer to pathogen docs.", bool(compatibility) return [ ('downloaded',