From 65ece58aa9bed0f58b15bdd6d8575d0eb69f7f22 Mon Sep 17 00:00:00 2001
From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com>
Date: Sat, 11 Apr 2026 00:28:58 +0100
Subject: [PATCH 1/6] Address code injection vulnerability

Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index 98b265cbbb..b9be443697 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -196,6 +196,10 @@ def __init__(
         # dataset_name_or_id has to be a string
         self.dataset_name_or_id = str(self.input_info.pop("dataset_name_or_id", 1))
 
+        # ensure the dataset name is a single identifier/number, this prevents code injection when composing commands
+        if len(shlex.split(self.dataset_name_or_id))!=1:
+            raise ValueError("Value for dataset_name_or_id `{self.dataset_name_or_id}` not a valid dataset name or ID.")
+
         try:
             from nnunetv2.utilities.dataset_name_id_conversion import maybe_convert_to_dataset_name
 
@@ -574,7 +578,7 @@ def train_single_model_command(
 
         cmd = [
             "nnUNetv2_train",
-            f"{self.dataset_name_or_id}",
+            f"{self.dataset_name_or_id}",  # value is checked in constructor to be an identifier or number
             f"{config}",
             f"{fold}",
             "-tr",

From 789a67bcc18af6fead10ac33c52c1393c1edc956 Mon Sep 17 00:00:00 2001
From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com>
Date: Sat, 11 Apr 2026 16:15:06 +0100
Subject: [PATCH 2/6] Using regex to match a valid dataset name

Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index b9be443697..6fbbd67df5 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -14,6 +14,7 @@
 
 import glob
 import os
+import re
 import shlex
 import subprocess
 from typing import Any
@@ -34,6 +35,8 @@
 
 __all__ = ["nnUNetV2Runner"]
 
+DATASET_ID_FORMAT = r"Dataset[0-9]{3}_[0-9]{3}|[0-9]+"  # regex format for a valid nnUnet dataset name
+
 
 class nnUNetV2Runner:  # noqa: N801
     """
@@ -197,7 +200,7 @@ def __init__(
         self.dataset_name_or_id = str(self.input_info.pop("dataset_name_or_id", 1))
 
         # ensure the dataset name is a single identifier/number, this prevents code injection when composing commands
-        if len(shlex.split(self.dataset_name_or_id))!=1:
+        if re.fullmatch(DATASET_ID_FORMAT, self.dataset_name_or_id) is None:
             raise ValueError("Value for dataset_name_or_id `{self.dataset_name_or_id}` not a valid dataset name or ID.")
 
         try:
@@ -578,21 +581,25 @@ def train_single_model_command(
 
         cmd = [
             "nnUNetv2_train",
-            f"{self.dataset_name_or_id}",  # value is checked in constructor to be an identifier or number
-            f"{config}",
-            f"{fold}",
+            self.dataset_name_or_id,
+            config,
+            fold,
             "-tr",
-            f"{self.trainer_class_name}",
+            self.trainer_class_name,
             "-num_gpus",
-            f"{num_gpus}",
+            num_gpus,
         ]
+
         if self.export_validation_probabilities:
             cmd.append("--npz")
+
         for _key, _value in kwargs.items():
-            if _key == "p" or _key == "pretrained_weights":
-                cmd.extend([f"-{_key}", f"{_value}"])
-            else:
-                cmd.extend([f"--{_key}", f"{_value}"])
+            prefix = "-" if _key in {"p", "pretrained_weights"} else "--"
+            cmd += [f"{prefix}{_key}", str(_value)]
+
+        # ensure components are quoted strings to prevent injection (not robust in Windows)
+        cmd = [shlex.quote(str(c)) for c in cmd]
+
         return cmd, env
 
     def train(

From c3c79abff56cdfb8d6ecf5a7120970d6b3a3a50d Mon Sep 17 00:00:00 2001
From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com>
Date: Thu, 7 May 2026 12:18:41 +0100
Subject: [PATCH 3/6] Typing fix

Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index 6fbbd67df5..f4b8e1e4eb 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -555,7 +555,7 @@ def train_single_model_command(
         Raises:
             ValueError: If gpu_id is an empty tuple or list.
         """
-        env = os.environ.copy()
+        env:dict[str, str]  = os.environ.copy()
         device_setting: str = "0"
         num_gpus = 1
         if isinstance(gpu_id, str):
@@ -598,9 +598,9 @@ def train_single_model_command(
             cmd += [f"{prefix}{_key}", str(_value)]
 
         # ensure components are quoted strings to prevent injection (not robust in Windows)
-        cmd = [shlex.quote(str(c)) for c in cmd]
+        cmd_str:list[str] = [shlex.quote(str(c)) for c in cmd]
 
-        return cmd, env
+        return cmd_str, env
 
     def train(
         self,

From 3111e42969dff84a3b29e51384cd50a26719e09c Mon Sep 17 00:00:00 2001
From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com>
Date: Wed, 20 May 2026 12:00:28 +0100
Subject: [PATCH 4/6] Minor fixes and exceptions added

Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 30 ++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index f4b8e1e4eb..175dc37ad9 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -198,6 +198,7 @@ def __init__(
 
         # dataset_name_or_id has to be a string
         self.dataset_name_or_id = str(self.input_info.pop("dataset_name_or_id", 1))
+        self.dataset_name: str | None = None
 
         # ensure the dataset name is a single identifier/number, this prevents code injection when composing commands
         if re.fullmatch(DATASET_ID_FORMAT, self.dataset_name_or_id) is None:
@@ -206,7 +207,7 @@ def __init__(
         try:
             from nnunetv2.utilities.dataset_name_id_conversion import maybe_convert_to_dataset_name
 
-            self.dataset_name = maybe_convert_to_dataset_name(int(self.dataset_name_or_id))
+            self.dataset_name = maybe_convert_to_dataset_name(self.dataset_name_or_id)
         except BaseException:
             logger.warning(
                 f"Dataset with name/ID: {self.dataset_name_or_id} cannot be found in the record. "
@@ -246,7 +247,7 @@ def convert_dataset(self):
 
             from nnunetv2.utilities.dataset_name_id_conversion import maybe_convert_to_dataset_name
 
-            self.dataset_name = maybe_convert_to_dataset_name(int(self.dataset_name_or_id))
+            self.dataset_name = maybe_convert_to_dataset_name(self.dataset_name_or_id)
 
             datalist_json = ConfigParser.load_config_file(self.input_info.pop("datalist"))
 
@@ -555,7 +556,7 @@ def train_single_model_command(
         Raises:
             ValueError: If gpu_id is an empty tuple or list.
         """
-        env:dict[str, str]  = os.environ.copy()
+        env: dict[str, str] = os.environ.copy()
         device_setting: str = "0"
         num_gpus = 1
         if isinstance(gpu_id, str):
@@ -598,7 +599,7 @@ def train_single_model_command(
             cmd += [f"{prefix}{_key}", str(_value)]
 
         # ensure components are quoted strings to prevent injection (not robust in Windows)
-        cmd_str:list[str] = [shlex.quote(str(c)) for c in cmd]
+        cmd_str: list[str] = [shlex.quote(str(c)) for c in cmd]
 
         return cmd_str, env
 
@@ -652,7 +653,14 @@ def train_parallel_cmd(
                 None (all available GPUs).
             kwargs: this optional parameter allows you to specify additional arguments defined in the
                 ``train_single_model`` method.
+
+        Raises:
+            ValueError: self.dataset_name must have a value, ie. when using an existing dataset or after creating one.
         """
+
+        if self.dataset_name is None:
+            raise ValueError(f"A valid dataset name must be given in {self.dataset_name=}.")
+
         # unpack compressed files
         folder_names = []
         for root, _, files in os.walk(os.path.join(self.nnunet_preprocessed, self.dataset_name)):
@@ -707,7 +715,14 @@ def train_parallel(
                 None (all available GPUs).
             kwargs: this optional parameter allows you to specify additional arguments defined in the
                 ``train_single_model`` method.
+
+        Raises:
+            ValueError: self.dataset_name must have a value, ie. when using an existing dataset or after creating one.
         """
+
+        if self.dataset_name is None:
+            raise ValueError(f"A valid dataset name must be given in {self.dataset_name=}.")
+
         all_cmds = self.train_parallel_cmd(configs=configs, gpu_id_for_all=gpu_id_for_all, **kwargs)
         for s, cmds in enumerate(all_cmds):
             for gpu_id, gpu_cmd in cmds.items():
@@ -919,7 +934,14 @@ def predict_ensemble_postprocessing(
             run_postprocessing: whether to conduct post-processing
             kwargs: this optional parameter allows you to specify additional arguments defined in the
                 ``predict`` method.
+
+        Raises:
+            ValueError: self.dataset_name must have a value, ie. when using an existing dataset or after creating one.
         """
+
+        if self.dataset_name is None:
+            raise ValueError(f"A valid dataset name must be given in {self.dataset_name=}.")
+
         from nnunetv2.ensembling.ensemble import ensemble_folders
         from nnunetv2.postprocessing.remove_connected_components import apply_postprocessing_to_folder
         from nnunetv2.utilities.file_path_utilities import get_output_folder

From 95f4b6309e23ea8f3306cd9ad889b70e3c40cbdf Mon Sep 17 00:00:00 2001
From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com>
Date: Wed, 27 May 2026 15:41:35 +0100
Subject: [PATCH 5/6] Update from comments

Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com>
---
 monai/apps/nnunet/nnunetv2_runner.py          |  9 ++-
 .../test_integration_nnunetv2_runner.py       | 75 +++++++++++++++++++
 2 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index 5f8f408895..547e73332f 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -35,7 +35,7 @@
 
 __all__ = ["nnUNetV2Runner"]
 
-DATASET_ID_FORMAT = r"Dataset[0-9]{3}_[0-9]{3}|[0-9]+"  # regex format for a valid nnUnet dataset name
+DATASET_ID_FORMAT = r"Dataset[0-9]{3}|[0-9]+"  # regex format for a valid nnUnet dataset name
 
 
 class nnUNetV2Runner:  # noqa: N801
@@ -202,7 +202,9 @@ def __init__(
 
         # ensure the dataset name is a single identifier/number, this prevents code injection when composing commands
         if re.fullmatch(DATASET_ID_FORMAT, self.dataset_name_or_id) is None:
-            raise ValueError("Value for dataset_name_or_id `{self.dataset_name_or_id}` not a valid dataset name or ID.")
+            raise ValueError(
+                f"Value for dataset_name_or_id `{self.dataset_name_or_id}` not a valid dataset name or ID."
+            )
 
         try:
             from nnunetv2.utilities.dataset_name_id_conversion import maybe_convert_to_dataset_name
@@ -598,8 +600,7 @@ def train_single_model_command(
             prefix = "-" if _key in {"p", "pretrained_weights"} else "--"
             cmd += [f"{prefix}{_key}", str(_value)]
 
-        # ensure components are quoted strings to prevent injection (not robust in Windows)
-        cmd_str: list[str] = [shlex.quote(str(c)) for c in cmd]
+        cmd_str: list[str] = [str(c) for c in cmd]
 
         return cmd_str, env
 
diff --git a/tests/integration/test_integration_nnunetv2_runner.py b/tests/integration/test_integration_nnunetv2_runner.py
index 44291e5722..1da131c890 100644
--- a/tests/integration/test_integration_nnunetv2_runner.py
+++ b/tests/integration/test_integration_nnunetv2_runner.py
@@ -11,13 +11,16 @@
 
 from __future__ import annotations
 
+import logging
 import os
 import tempfile
 import unittest
+from textwrap import dedent
 
 import nibabel as nib
 import numpy as np
 
+import monai.apps.nnunet.nnunetv2_runner
 from monai.apps.nnunet import nnUNetV2Runner
 from monai.bundle.config_parser import ConfigParser
 from monai.data import create_test_image_3d
@@ -27,6 +30,8 @@
 _, has_tb = optional_import("torch.utils.tensorboard", name="SummaryWriter")
 _, has_nnunet = optional_import("nnunetv2")
 
+monai.apps.nnunet.nnunetv2_runner.logger.setLevel(logging.ERROR)  # suppress warning logging to clean up test output
+
 sim_datalist: dict[str, list[dict]] = {
     "testing": [{"image": "val_001.fake.nii.gz"}, {"image": "val_002.fake.nii.gz"}],
     "training": [
@@ -91,5 +96,75 @@ def tearDown(self) -> None:
         self.test_dir.cleanup()
 
 
+@skip_if_quick
+@unittest.skipIf(not has_nnunet, "no nnunetv2")
+class TestnnUNetV2RunnerSecurity(unittest.TestCase):
+    def setUp(self) -> None:
+        self.test_dir = tempfile.TemporaryDirectory()
+        test_path = self.test_dir.name
+
+        self.good_yml1 = os.path.join(test_path, "good1.yml")
+        self.good_yml2 = os.path.join(test_path, "good2.yml")
+        self.inject_yml = os.path.join(test_path, "test.yml")
+
+        good_yml_content1 = """
+            dataset_name_or_id: Dataset123
+            dataroot: ./data
+            datalist: ./lists/task4.json
+            work_dir: ./work
+            nnunet_raw: ./nnUNet_raw
+            nnunet_preprocessed: ./nnUNet_preprocessed
+            nnunet_results: ./nnUNet_results
+        """
+
+        with open(self.good_yml1, "w") as o:
+            o.write(dedent(good_yml_content1))
+
+        good_yml_content2 = """
+            dataset_name_or_id: 123
+            dataroot: ./data
+            datalist: ./lists/task4.json
+            work_dir: ./work
+            nnunet_raw: ./nnUNet_raw
+            nnunet_preprocessed: ./nnUNet_preprocessed
+            nnunet_results: ./nnUNet_results
+        """
+
+        with open(self.good_yml2, "w") as o:
+            o.write(dedent(good_yml_content2))
+
+        # define a config file with code-injecting dataset name
+        injecting_yml_content = """
+            dataset_name_or_id: '4 & echo "This is exploited" > "./test.txt" & rem'
+            dataroot: ./data
+            datalist: ./lists/task4.json
+            work_dir: ./work
+            nnunet_raw: ./nnUNet_raw
+            nnunet_preprocessed: ./nnUNet_preprocessed
+            nnunet_results: ./nnUNet_results
+        """
+
+        with open(self.inject_yml, "w") as o:
+            o.write(dedent(injecting_yml_content))
+
+    def test_nnunetv2runner_good_dataset_name(self) -> None:
+        """
+        Test the dataset name given must conform to the nnUNet requirement of being an int or "Dataset###".
+        """
+        for ds in [self.good_yml1, self.good_yml2]:
+            with self.subTest(f"Testing {os.path.basename(ds)}"):
+                nnUNetV2Runner(input_config=ds, trainer_class_name="nnUNetTrainer")
+
+    def test_nnunetv2runner_bad_dataset_name(self) -> None:
+        """
+        Test the dataset name given must conform to the nnUNet requirement of being an int or "Dataset###".
+        """
+        with self.assertRaises(ValueError):
+            nnUNetV2Runner(input_config=self.inject_yml, trainer_class_name="nnUNetTrainer")
+
+    def tearDown(self) -> None:
+        self.test_dir.cleanup()
+
+
 if __name__ == "__main__":
     unittest.main()

From a4e54654b9103525ba1844629e96ec08f66b0299 Mon Sep 17 00:00:00 2001
From: Soumya Snigdha Kundu <soumya_snigdha.kundu@kcl.ac.uk>
Date: Sun, 31 May 2026 09:27:47 +0100
Subject: [PATCH 6/6] Keep generated nnUNet test paths inside the temp dir
 (#8886)

The TestnnUNetV2RunnerSecurity YAMLs used cwd-relative paths
(./work, ./nnUNet_raw, ./nnUNet_preprocessed, ./nnUNet_results).
nnUNetV2Runner.__init__ creates the raw/preprocessed/results dirs
eagerly (before dataset-name validation), so instantiating the runner
in these tests wrote into the repo cwd and tearDown could not clean it
up. Root all generated paths in self.test_dir so the temp dir owns
them, including the injection payload's redirect target.

Signed-off-by: Soumya Snigdha Kundu <soumya_snigdha.kundu@kcl.ac.uk>
---
 .../test_integration_nnunetv2_runner.py       | 44 +++++++++----------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/tests/integration/test_integration_nnunetv2_runner.py b/tests/integration/test_integration_nnunetv2_runner.py
index 1da131c890..f4cf0b4fb1 100644
--- a/tests/integration/test_integration_nnunetv2_runner.py
+++ b/tests/integration/test_integration_nnunetv2_runner.py
@@ -107,41 +107,41 @@ def setUp(self) -> None:
         self.good_yml2 = os.path.join(test_path, "good2.yml")
         self.inject_yml = os.path.join(test_path, "test.yml")
 
-        good_yml_content1 = """
+        good_yml_content1 = f"""
             dataset_name_or_id: Dataset123
-            dataroot: ./data
-            datalist: ./lists/task4.json
-            work_dir: ./work
-            nnunet_raw: ./nnUNet_raw
-            nnunet_preprocessed: ./nnUNet_preprocessed
-            nnunet_results: ./nnUNet_results
+            dataroot: {test_path}/data
+            datalist: {test_path}/lists/task4.json
+            work_dir: {test_path}/work
+            nnunet_raw: {test_path}/nnUNet_raw
+            nnunet_preprocessed: {test_path}/nnUNet_preprocessed
+            nnunet_results: {test_path}/nnUNet_results
         """
 
         with open(self.good_yml1, "w") as o:
             o.write(dedent(good_yml_content1))
 
-        good_yml_content2 = """
+        good_yml_content2 = f"""
             dataset_name_or_id: 123
-            dataroot: ./data
-            datalist: ./lists/task4.json
-            work_dir: ./work
-            nnunet_raw: ./nnUNet_raw
-            nnunet_preprocessed: ./nnUNet_preprocessed
-            nnunet_results: ./nnUNet_results
+            dataroot: {test_path}/data
+            datalist: {test_path}/lists/task4.json
+            work_dir: {test_path}/work
+            nnunet_raw: {test_path}/nnUNet_raw
+            nnunet_preprocessed: {test_path}/nnUNet_preprocessed
+            nnunet_results: {test_path}/nnUNet_results
         """
 
         with open(self.good_yml2, "w") as o:
             o.write(dedent(good_yml_content2))
 
         # define a config file with code-injecting dataset name
-        injecting_yml_content = """
-            dataset_name_or_id: '4 & echo "This is exploited" > "./test.txt" & rem'
-            dataroot: ./data
-            datalist: ./lists/task4.json
-            work_dir: ./work
-            nnunet_raw: ./nnUNet_raw
-            nnunet_preprocessed: ./nnUNet_preprocessed
-            nnunet_results: ./nnUNet_results
+        injecting_yml_content = f"""
+            dataset_name_or_id: '4 & echo "This is exploited" > "{test_path}/test.txt" & rem'
+            dataroot: {test_path}/data
+            datalist: {test_path}/lists/task4.json
+            work_dir: {test_path}/work
+            nnunet_raw: {test_path}/nnUNet_raw
+            nnunet_preprocessed: {test_path}/nnUNet_preprocessed
+            nnunet_results: {test_path}/nnUNet_results
         """
 
         with open(self.inject_yml, "w") as o: