From 65ece58aa9bed0f58b15bdd6d8575d0eb69f7f22 Mon Sep 17 00:00:00 2001 From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Date: Sat, 11 Apr 2026 00:28:58 +0100 Subject: [PATCH 1/6] Address code injection vulnerability Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> --- monai/apps/nnunet/nnunetv2_runner.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index 98b265cbbb..b9be443697 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -196,6 +196,10 @@ def __init__( # dataset_name_or_id has to be a string self.dataset_name_or_id = str(self.input_info.pop("dataset_name_or_id", 1)) + # ensure the dataset name is a single identifier/number, this prevents code injection when composing commands + if len(shlex.split(self.dataset_name_or_id))!=1: + raise ValueError("Value for dataset_name_or_id `{self.dataset_name_or_id}` not a valid dataset name or ID.") + try: from nnunetv2.utilities.dataset_name_id_conversion import maybe_convert_to_dataset_name @@ -574,7 +578,7 @@ def train_single_model_command( cmd = [ "nnUNetv2_train", - f"{self.dataset_name_or_id}", + f"{self.dataset_name_or_id}", # value is checked in constructor to be an identifier or number f"{config}", f"{fold}", "-tr", From 789a67bcc18af6fead10ac33c52c1393c1edc956 Mon Sep 17 00:00:00 2001 From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Date: Sat, 11 Apr 2026 16:15:06 +0100 Subject: [PATCH 2/6] Using regex to match a valid dataset name Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> --- monai/apps/nnunet/nnunetv2_runner.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index b9be443697..6fbbd67df5 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -14,6 +14,7 @@ import glob import os +import re import shlex import subprocess from typing import Any @@ -34,6 +35,8 @@ __all__ = ["nnUNetV2Runner"] +DATASET_ID_FORMAT = r"Dataset[0-9]{3}_[0-9]{3}|[0-9]+" # regex format for a valid nnUnet dataset name + class nnUNetV2Runner: # noqa: N801 """ @@ -197,7 +200,7 @@ def __init__( self.dataset_name_or_id = str(self.input_info.pop("dataset_name_or_id", 1)) # ensure the dataset name is a single identifier/number, this prevents code injection when composing commands - if len(shlex.split(self.dataset_name_or_id))!=1: + if re.fullmatch(DATASET_ID_FORMAT, self.dataset_name_or_id) is None: raise ValueError("Value for dataset_name_or_id `{self.dataset_name_or_id}` not a valid dataset name or ID.") try: @@ -578,21 +581,25 @@ def train_single_model_command( cmd = [ "nnUNetv2_train", - f"{self.dataset_name_or_id}", # value is checked in constructor to be an identifier or number - f"{config}", - f"{fold}", + self.dataset_name_or_id, + config, + fold, "-tr", - f"{self.trainer_class_name}", + self.trainer_class_name, "-num_gpus", - f"{num_gpus}", + num_gpus, ] + if self.export_validation_probabilities: cmd.append("--npz") + for _key, _value in kwargs.items(): - if _key == "p" or _key == "pretrained_weights": - cmd.extend([f"-{_key}", f"{_value}"]) - else: - cmd.extend([f"--{_key}", f"{_value}"]) + prefix = "-" if _key in {"p", "pretrained_weights"} else "--" + cmd += [f"{prefix}{_key}", str(_value)] + + # ensure components are quoted strings to prevent injection (not robust in Windows) + cmd = [shlex.quote(str(c)) for c in cmd] + return cmd, env def train( From c3c79abff56cdfb8d6ecf5a7120970d6b3a3a50d Mon Sep 17 00:00:00 2001 From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Date: Thu, 7 May 2026 12:18:41 +0100 Subject: [PATCH 3/6] Typing fix Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> --- monai/apps/nnunet/nnunetv2_runner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index 6fbbd67df5..f4b8e1e4eb 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -555,7 +555,7 @@ def train_single_model_command( Raises: ValueError: If gpu_id is an empty tuple or list. """ - env = os.environ.copy() + env:dict[str, str] = os.environ.copy() device_setting: str = "0" num_gpus = 1 if isinstance(gpu_id, str): @@ -598,9 +598,9 @@ def train_single_model_command( cmd += [f"{prefix}{_key}", str(_value)] # ensure components are quoted strings to prevent injection (not robust in Windows) - cmd = [shlex.quote(str(c)) for c in cmd] + cmd_str:list[str] = [shlex.quote(str(c)) for c in cmd] - return cmd, env + return cmd_str, env def train( self, From 3111e42969dff84a3b29e51384cd50a26719e09c Mon Sep 17 00:00:00 2001 From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Date: Wed, 20 May 2026 12:00:28 +0100 Subject: [PATCH 4/6] Minor fixes and exceptions added Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> --- monai/apps/nnunet/nnunetv2_runner.py | 30 ++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index f4b8e1e4eb..175dc37ad9 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -198,6 +198,7 @@ def __init__( # dataset_name_or_id has to be a string self.dataset_name_or_id = str(self.input_info.pop("dataset_name_or_id", 1)) + self.dataset_name: str | None = None # ensure the dataset name is a single identifier/number, this prevents code injection when composing commands if re.fullmatch(DATASET_ID_FORMAT, self.dataset_name_or_id) is None: @@ -206,7 +207,7 @@ def __init__( try: from nnunetv2.utilities.dataset_name_id_conversion import maybe_convert_to_dataset_name - self.dataset_name = maybe_convert_to_dataset_name(int(self.dataset_name_or_id)) + self.dataset_name = maybe_convert_to_dataset_name(self.dataset_name_or_id) except BaseException: logger.warning( f"Dataset with name/ID: {self.dataset_name_or_id} cannot be found in the record. " @@ -246,7 +247,7 @@ def convert_dataset(self): from nnunetv2.utilities.dataset_name_id_conversion import maybe_convert_to_dataset_name - self.dataset_name = maybe_convert_to_dataset_name(int(self.dataset_name_or_id)) + self.dataset_name = maybe_convert_to_dataset_name(self.dataset_name_or_id) datalist_json = ConfigParser.load_config_file(self.input_info.pop("datalist")) @@ -555,7 +556,7 @@ def train_single_model_command( Raises: ValueError: If gpu_id is an empty tuple or list. """ - env:dict[str, str] = os.environ.copy() + env: dict[str, str] = os.environ.copy() device_setting: str = "0" num_gpus = 1 if isinstance(gpu_id, str): @@ -598,7 +599,7 @@ def train_single_model_command( cmd += [f"{prefix}{_key}", str(_value)] # ensure components are quoted strings to prevent injection (not robust in Windows) - cmd_str:list[str] = [shlex.quote(str(c)) for c in cmd] + cmd_str: list[str] = [shlex.quote(str(c)) for c in cmd] return cmd_str, env @@ -652,7 +653,14 @@ def train_parallel_cmd( None (all available GPUs). kwargs: this optional parameter allows you to specify additional arguments defined in the ``train_single_model`` method. + + Raises: + ValueError: self.dataset_name must have a value, ie. when using an existing dataset or after creating one. """ + + if self.dataset_name is None: + raise ValueError(f"A valid dataset name must be given in {self.dataset_name=}.") + # unpack compressed files folder_names = [] for root, _, files in os.walk(os.path.join(self.nnunet_preprocessed, self.dataset_name)): @@ -707,7 +715,14 @@ def train_parallel( None (all available GPUs). kwargs: this optional parameter allows you to specify additional arguments defined in the ``train_single_model`` method. + + Raises: + ValueError: self.dataset_name must have a value, ie. when using an existing dataset or after creating one. """ + + if self.dataset_name is None: + raise ValueError(f"A valid dataset name must be given in {self.dataset_name=}.") + all_cmds = self.train_parallel_cmd(configs=configs, gpu_id_for_all=gpu_id_for_all, **kwargs) for s, cmds in enumerate(all_cmds): for gpu_id, gpu_cmd in cmds.items(): @@ -919,7 +934,14 @@ def predict_ensemble_postprocessing( run_postprocessing: whether to conduct post-processing kwargs: this optional parameter allows you to specify additional arguments defined in the ``predict`` method. + + Raises: + ValueError: self.dataset_name must have a value, ie. when using an existing dataset or after creating one. """ + + if self.dataset_name is None: + raise ValueError(f"A valid dataset name must be given in {self.dataset_name=}.") + from nnunetv2.ensembling.ensemble import ensemble_folders from nnunetv2.postprocessing.remove_connected_components import apply_postprocessing_to_folder from nnunetv2.utilities.file_path_utilities import get_output_folder From 95f4b6309e23ea8f3306cd9ad889b70e3c40cbdf Mon Sep 17 00:00:00 2001 From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Date: Wed, 27 May 2026 15:41:35 +0100 Subject: [PATCH 5/6] Update from comments Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> --- monai/apps/nnunet/nnunetv2_runner.py | 9 ++- .../test_integration_nnunetv2_runner.py | 75 +++++++++++++++++++ 2 files changed, 80 insertions(+), 4 deletions(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index 5f8f408895..547e73332f 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -35,7 +35,7 @@ __all__ = ["nnUNetV2Runner"] -DATASET_ID_FORMAT = r"Dataset[0-9]{3}_[0-9]{3}|[0-9]+" # regex format for a valid nnUnet dataset name +DATASET_ID_FORMAT = r"Dataset[0-9]{3}|[0-9]+" # regex format for a valid nnUnet dataset name class nnUNetV2Runner: # noqa: N801 @@ -202,7 +202,9 @@ def __init__( # ensure the dataset name is a single identifier/number, this prevents code injection when composing commands if re.fullmatch(DATASET_ID_FORMAT, self.dataset_name_or_id) is None: - raise ValueError("Value for dataset_name_or_id `{self.dataset_name_or_id}` not a valid dataset name or ID.") + raise ValueError( + f"Value for dataset_name_or_id `{self.dataset_name_or_id}` not a valid dataset name or ID." + ) try: from nnunetv2.utilities.dataset_name_id_conversion import maybe_convert_to_dataset_name @@ -598,8 +600,7 @@ def train_single_model_command( prefix = "-" if _key in {"p", "pretrained_weights"} else "--" cmd += [f"{prefix}{_key}", str(_value)] - # ensure components are quoted strings to prevent injection (not robust in Windows) - cmd_str: list[str] = [shlex.quote(str(c)) for c in cmd] + cmd_str: list[str] = [str(c) for c in cmd] return cmd_str, env diff --git a/tests/integration/test_integration_nnunetv2_runner.py b/tests/integration/test_integration_nnunetv2_runner.py index 44291e5722..1da131c890 100644 --- a/tests/integration/test_integration_nnunetv2_runner.py +++ b/tests/integration/test_integration_nnunetv2_runner.py @@ -11,13 +11,16 @@ from __future__ import annotations +import logging import os import tempfile import unittest +from textwrap import dedent import nibabel as nib import numpy as np +import monai.apps.nnunet.nnunetv2_runner from monai.apps.nnunet import nnUNetV2Runner from monai.bundle.config_parser import ConfigParser from monai.data import create_test_image_3d @@ -27,6 +30,8 @@ _, has_tb = optional_import("torch.utils.tensorboard", name="SummaryWriter") _, has_nnunet = optional_import("nnunetv2") +monai.apps.nnunet.nnunetv2_runner.logger.setLevel(logging.ERROR) # suppress warning logging to clean up test output + sim_datalist: dict[str, list[dict]] = { "testing": [{"image": "val_001.fake.nii.gz"}, {"image": "val_002.fake.nii.gz"}], "training": [ @@ -91,5 +96,75 @@ def tearDown(self) -> None: self.test_dir.cleanup() +@skip_if_quick +@unittest.skipIf(not has_nnunet, "no nnunetv2") +class TestnnUNetV2RunnerSecurity(unittest.TestCase): + def setUp(self) -> None: + self.test_dir = tempfile.TemporaryDirectory() + test_path = self.test_dir.name + + self.good_yml1 = os.path.join(test_path, "good1.yml") + self.good_yml2 = os.path.join(test_path, "good2.yml") + self.inject_yml = os.path.join(test_path, "test.yml") + + good_yml_content1 = """ + dataset_name_or_id: Dataset123 + dataroot: ./data + datalist: ./lists/task4.json + work_dir: ./work + nnunet_raw: ./nnUNet_raw + nnunet_preprocessed: ./nnUNet_preprocessed + nnunet_results: ./nnUNet_results + """ + + with open(self.good_yml1, "w") as o: + o.write(dedent(good_yml_content1)) + + good_yml_content2 = """ + dataset_name_or_id: 123 + dataroot: ./data + datalist: ./lists/task4.json + work_dir: ./work + nnunet_raw: ./nnUNet_raw + nnunet_preprocessed: ./nnUNet_preprocessed + nnunet_results: ./nnUNet_results + """ + + with open(self.good_yml2, "w") as o: + o.write(dedent(good_yml_content2)) + + # define a config file with code-injecting dataset name + injecting_yml_content = """ + dataset_name_or_id: '4 & echo "This is exploited" > "./test.txt" & rem' + dataroot: ./data + datalist: ./lists/task4.json + work_dir: ./work + nnunet_raw: ./nnUNet_raw + nnunet_preprocessed: ./nnUNet_preprocessed + nnunet_results: ./nnUNet_results + """ + + with open(self.inject_yml, "w") as o: + o.write(dedent(injecting_yml_content)) + + def test_nnunetv2runner_good_dataset_name(self) -> None: + """ + Test the dataset name given must conform to the nnUNet requirement of being an int or "Dataset###". + """ + for ds in [self.good_yml1, self.good_yml2]: + with self.subTest(f"Testing {os.path.basename(ds)}"): + nnUNetV2Runner(input_config=ds, trainer_class_name="nnUNetTrainer") + + def test_nnunetv2runner_bad_dataset_name(self) -> None: + """ + Test the dataset name given must conform to the nnUNet requirement of being an int or "Dataset###". + """ + with self.assertRaises(ValueError): + nnUNetV2Runner(input_config=self.inject_yml, trainer_class_name="nnUNetTrainer") + + def tearDown(self) -> None: + self.test_dir.cleanup() + + if __name__ == "__main__": unittest.main() From a4e54654b9103525ba1844629e96ec08f66b0299 Mon Sep 17 00:00:00 2001 From: Soumya Snigdha Kundu Date: Sun, 31 May 2026 09:27:47 +0100 Subject: [PATCH 6/6] Keep generated nnUNet test paths inside the temp dir (#8886) The TestnnUNetV2RunnerSecurity YAMLs used cwd-relative paths (./work, ./nnUNet_raw, ./nnUNet_preprocessed, ./nnUNet_results). nnUNetV2Runner.__init__ creates the raw/preprocessed/results dirs eagerly (before dataset-name validation), so instantiating the runner in these tests wrote into the repo cwd and tearDown could not clean it up. Root all generated paths in self.test_dir so the temp dir owns them, including the injection payload's redirect target. Signed-off-by: Soumya Snigdha Kundu --- .../test_integration_nnunetv2_runner.py | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/integration/test_integration_nnunetv2_runner.py b/tests/integration/test_integration_nnunetv2_runner.py index 1da131c890..f4cf0b4fb1 100644 --- a/tests/integration/test_integration_nnunetv2_runner.py +++ b/tests/integration/test_integration_nnunetv2_runner.py @@ -107,41 +107,41 @@ def setUp(self) -> None: self.good_yml2 = os.path.join(test_path, "good2.yml") self.inject_yml = os.path.join(test_path, "test.yml") - good_yml_content1 = """ + good_yml_content1 = f""" dataset_name_or_id: Dataset123 - dataroot: ./data - datalist: ./lists/task4.json - work_dir: ./work - nnunet_raw: ./nnUNet_raw - nnunet_preprocessed: ./nnUNet_preprocessed - nnunet_results: ./nnUNet_results + dataroot: {test_path}/data + datalist: {test_path}/lists/task4.json + work_dir: {test_path}/work + nnunet_raw: {test_path}/nnUNet_raw + nnunet_preprocessed: {test_path}/nnUNet_preprocessed + nnunet_results: {test_path}/nnUNet_results """ with open(self.good_yml1, "w") as o: o.write(dedent(good_yml_content1)) - good_yml_content2 = """ + good_yml_content2 = f""" dataset_name_or_id: 123 - dataroot: ./data - datalist: ./lists/task4.json - work_dir: ./work - nnunet_raw: ./nnUNet_raw - nnunet_preprocessed: ./nnUNet_preprocessed - nnunet_results: ./nnUNet_results + dataroot: {test_path}/data + datalist: {test_path}/lists/task4.json + work_dir: {test_path}/work + nnunet_raw: {test_path}/nnUNet_raw + nnunet_preprocessed: {test_path}/nnUNet_preprocessed + nnunet_results: {test_path}/nnUNet_results """ with open(self.good_yml2, "w") as o: o.write(dedent(good_yml_content2)) # define a config file with code-injecting dataset name - injecting_yml_content = """ - dataset_name_or_id: '4 & echo "This is exploited" > "./test.txt" & rem' - dataroot: ./data - datalist: ./lists/task4.json - work_dir: ./work - nnunet_raw: ./nnUNet_raw - nnunet_preprocessed: ./nnUNet_preprocessed - nnunet_results: ./nnUNet_results + injecting_yml_content = f""" + dataset_name_or_id: '4 & echo "This is exploited" > "{test_path}/test.txt" & rem' + dataroot: {test_path}/data + datalist: {test_path}/lists/task4.json + work_dir: {test_path}/work + nnunet_raw: {test_path}/nnUNet_raw + nnunet_preprocessed: {test_path}/nnUNet_preprocessed + nnunet_results: {test_path}/nnUNet_results """ with open(self.inject_yml, "w") as o: