From c98fd6e41b2d14e2ccc723f44d47846f25a177b8 Mon Sep 17 00:00:00 2001
From: Keita Watanabe <keitaw09@gmail.com>
Date: Fri, 27 Mar 2026 12:20:53 +0000
Subject: [PATCH 1/6] Add MIG profile support for ml.p6-b300.48xlarge
 (Blackwell Ultra)

Add ml.p6-b300.48xlarge to INSTANCE_TYPE_MIG_PROFILES in constants.py
with the correct B300 MIG profiles derived from the NVIDIA GPU Operator
v25.3.0 upstream ConfigMap (device-filter 0x318210DE):

- mig-1g.34gb, mig-1g.67gb, mig-2g.67gb
- mig-3g.135gb, mig-4g.135gb, mig-7g.269gb

Also add the corresponding uniform and mixed MIG partition profiles
to the Helm chart default-mig-config.yaml ConfigMap, following the
same pattern used for existing GPU types (H100, H200, B200).

The B300 GPU (288GB HBM3e, ~269GB usable) was already registered in
INSTANCE_RESOURCES but had no MIG profile mapping, causing HyperPod
MIG validation to reject accelerator partition requests on this
instance type.
---
 .../config/default-mig-config.yaml            | 106 ++++++++++++++++++
 src/sagemaker/hyperpod/training/constants.py  |   1 +
 2 files changed, 107 insertions(+)

diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml
index 8f4943d7..f18523ed 100644
--- a/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml
+++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml
@@ -341,12 +341,90 @@ mig-configs:
         "1g.24gb": 2
         "2g.48gb": 1
 
+  # P6-B300 (Blackwell Ultra, 288GB HBM3e, ~269GB usable) profiles
+  # Profiles: 1g.34gb (x7), 1g.67gb (x4), 2g.67gb (x3), 3g.135gb (x2), 4g.135gb (x1), 7g.269gb (x1)
+  # Upstream ref: NVIDIA GPU Operator v25.3.0, device-filter 0x318210DE
+
+  all-1g.34gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "1g.34gb": 7
+
   all-1g.67gb:
     - devices: all
       mig-enabled: true
       mig-devices:
         "1g.67gb": 4
 
+  all-2g.67gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "2g.67gb": 3
+
+  all-3g.135gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "3g.135gb": 2
+
+  all-4g.135gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "4g.135gb": 1
+
+  all-7g.269gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "7g.269gb": 1
+
+  mixed-1-3g.135gb-1-4g.135gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "3g.135gb": 1
+        "4g.135gb": 1
+
+  mixed-1-1g.34gb-1-2g.67gb-1-4g.135gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "1g.34gb": 1
+        "2g.67gb": 1
+        "4g.135gb": 1
+
+  mixed-3-1g.34gb-1-4g.135gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "1g.34gb": 3
+        "4g.135gb": 1
+
+  mixed-1-1g.34gb-1-2g.67gb-1-3g.135gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "1g.34gb": 1
+        "2g.67gb": 1
+        "3g.135gb": 1
+
+  mixed-3-1g.34gb-1-3g.135gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "1g.34gb": 3
+        "3g.135gb": 1
+
+  mixed-2-2g.67gb-1-3g.135gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "2g.67gb": 2
+        "3g.135gb": 1
+
   mixed-2-1g.34gb-1-2g.67gb-1-3g.135gb:
     - devices: all
       mig-enabled: true
@@ -354,3 +432,31 @@ mig-configs:
         "1g.34gb": 2
         "2g.67gb": 1
         "3g.135gb": 1
+
+  mixed-4-1g.34gb-1-3g.135gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "1g.34gb": 4
+        "3g.135gb": 1
+
+  mixed-1-1g.34gb-3-2g.67gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "1g.34gb": 1
+        "2g.67gb": 3
+
+  mixed-3-1g.34gb-2-2g.67gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "1g.34gb": 3
+        "2g.67gb": 2
+
+  mixed-5-1g.34gb-1-2g.67gb:
+    - devices: all
+      mig-enabled: true
+      mig-devices:
+        "1g.34gb": 5
+        "2g.67gb": 1
diff --git a/src/sagemaker/hyperpod/training/constants.py b/src/sagemaker/hyperpod/training/constants.py
index 29f58fa8..59230365 100644
--- a/src/sagemaker/hyperpod/training/constants.py
+++ b/src/sagemaker/hyperpod/training/constants.py
@@ -132,6 +132,7 @@
     'ml.p5e.48xlarge': ['mig-1g.18gb', 'mig-1g.35gb', 'mig-2g.35gb', 'mig-3g.71gb', 'mig-4g.71gb', 'mig-7g.141gb'],
     'ml.p5en.48xlarge': ['mig-1g.18gb', 'mig-1g.35gb', 'mig-2g.35gb', 'mig-3g.71gb', 'mig-4g.71gb', 'mig-7g.141gb'],
     'p6-b200.48xlarge': ['mig-1g.23gb', 'mig-1g.45gb', 'mig-2g.45gb', 'mig-3g.90gb', 'mig-4g.90gb', 'mig-7g.180gb'],
+    'ml.p6-b300.48xlarge': ['mig-1g.34gb', 'mig-1g.67gb', 'mig-2g.67gb', 'mig-3g.135gb', 'mig-4g.135gb', 'mig-7g.269gb'],
     'ml.p6e-gb200.36xlarge': ['mig-1g.23gb', 'mig-1g.47gb', 'mig-2g.47gb', 'mig-3g.93gb', 'mig-4g.93gb', 'mig-7g.186gb'],
     'ml.g7e.2xlarge': ['mig-1g.24gb', 'mig-2g.48gb', 'mig-4g.96gb'],
     'ml.g7e.4xlarge': ['mig-1g.24gb', 'mig-2g.48gb', 'mig-4g.96gb'],

From cd5bc5db0e838cea6e18337d14f49760d68ec0ae Mon Sep 17 00:00:00 2001
From: Keita Watanabe <keitaw09@gmail.com>
Date: Sat, 28 Mar 2026 00:06:36 +0000
Subject: [PATCH 2/6] Add unit tests for B300 MIG profile validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Covers ml.p6-b300.48xlarge MIG profile support added in PR #398:
- Profile presence in INSTANCE_TYPE_MIG_PROFILES
- Complete profile list verification (6 profiles)
- All profiles in ALLOWED_ACCELERATOR_PARTITION_TYPES
- GPU slice extraction for all B300 profiles (1g→1, 2g→2, ..., 7g→7)
- CPU/memory default calculation for each profile at max instances
- Validation acceptance for valid B300 profiles
- Validation rejection for invalid profiles on B300 instance type
---
 .../cli/test_accelerator_partition_util.py    | 87 +++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/test/unit_tests/cli/test_accelerator_partition_util.py b/test/unit_tests/cli/test_accelerator_partition_util.py
index b43a44ea..56b1c4db 100644
--- a/test/unit_tests/cli/test_accelerator_partition_util.py
+++ b/test/unit_tests/cli/test_accelerator_partition_util.py
@@ -1,9 +1,14 @@
 from sagemaker.hyperpod.training.accelerator_partition_util import (
     _extract_gpu_slices_from_accelerator_partition_type,
     _get_accelerator_partition,
+    _get_accelerator_partition_defaults,
     _set_default_accelerator_partition_val,
     _validate_accelerator_partition,
 )
+from sagemaker.hyperpod.training.constants import (
+    ALLOWED_ACCELERATOR_PARTITION_TYPES,
+    INSTANCE_TYPE_MIG_PROFILES,
+)
 import pytest
 from unittest.mock import patch, MagicMock
 
@@ -85,3 +90,85 @@ def test_validate_accelerator_partition_fields(self, mock_k8s_client, partition_
         valid, error = _validate_accelerator_partition(partition_type, accelerators, accelerators_limit, node_count, instance_type)
         assert valid is expected_valid
         assert error_check(error)
+
+
+class TestB300MigProfiles:
+    """Tests for NVIDIA B300 (Blackwell Ultra) MIG profile support."""
+
+    def test_b300_in_instance_type_mig_profiles(self):
+        assert "ml.p6-b300.48xlarge" in INSTANCE_TYPE_MIG_PROFILES
+
+    def test_b300_profiles_complete(self):
+        profiles = INSTANCE_TYPE_MIG_PROFILES["ml.p6-b300.48xlarge"]
+        expected = [
+            "mig-1g.34gb",
+            "mig-1g.67gb",
+            "mig-2g.67gb",
+            "mig-3g.135gb",
+            "mig-4g.135gb",
+            "mig-7g.269gb",
+        ]
+        assert profiles == expected
+
+    def test_b300_profiles_in_allowed_set(self):
+        for profile in INSTANCE_TYPE_MIG_PROFILES["ml.p6-b300.48xlarge"]:
+            assert profile in ALLOWED_ACCELERATOR_PARTITION_TYPES
+
+    @pytest.mark.parametrize(
+        "partition_type,expected_slices",
+        [
+            ("mig-1g.34gb", 1),
+            ("mig-2g.67gb", 2),
+            ("mig-3g.135gb", 3),
+            ("mig-4g.135gb", 4),
+            ("mig-7g.269gb", 7),
+        ],
+    )
+    def test_extract_gpu_slices_b300(self, partition_type, expected_slices):
+        assert _extract_gpu_slices_from_accelerator_partition_type(partition_type) == expected_slices
+
+    @pytest.mark.parametrize(
+        "partition_type,partition_count",
+        [
+            ("mig-1g.34gb", 7),
+            ("mig-1g.67gb", 4),
+            ("mig-2g.67gb", 3),
+            ("mig-3g.135gb", 2),
+            ("mig-4g.135gb", 1),
+            ("mig-7g.269gb", 1),
+        ],
+    )
+    def test_accelerator_partition_defaults_b300(self, partition_type, partition_count):
+        """Verify CPU/memory defaults are calculated proportionally for B300 MIG profiles."""
+        defaults = _get_accelerator_partition_defaults(
+            "ml.p6-b300.48xlarge", partition_type, partition_count
+        )
+        assert "cpu" in defaults
+        assert "memory" in defaults
+        assert float(defaults["cpu"]) > 0
+        assert float(defaults["memory"].replace("Gi", "")) > 0
+
+    @pytest.mark.parametrize(
+        "partition_type,expected_valid,error_check",
+        [
+            ("mig-1g.34gb", True, lambda e: e == ""),
+            ("mig-3g.135gb", True, lambda e: e == ""),
+            ("mig-7g.269gb", True, lambda e: e == ""),
+            ("mig-1g.5gb", False, lambda e: "not supported on instance type" in e),
+        ],
+    )
+    @patch("sagemaker.hyperpod.training.accelerator_partition_util.KubernetesClient")
+    def test_validate_b300_partition(
+        self, mock_k8s_client, partition_type, expected_valid, error_check
+    ):
+        mock_node = MagicMock()
+        mock_node.status.allocatable = {f"nvidia.com/{partition_type}": "1"}
+        mock_k8s_client.return_value.get_core_v1_api.return_value.list_node.return_value.items = [
+            mock_node
+        ]
+
+        valid, error = _validate_accelerator_partition(
+            partition_type, None, None, None, "ml.p6-b300.48xlarge"
+        )
+        assert valid is expected_valid
+        assert error_check(error)

From e27a807754c52a1d23cb70f1646ee65a33473181 Mon Sep 17 00:00:00 2001
From: Keita Watanabe <keitaw09@gmail.com>
Date: Sat, 28 Mar 2026 00:25:11 +0000
Subject: [PATCH 3/6] Remove redundant tests and strengthen assertions

- Delete test_b300_in_instance_type_mig_profiles (subsumed by
  test_b300_profiles_complete which KeyErrors on missing key)
- Delete test_b300_profiles_in_allowed_set (tautological: the
  allowed set is computed as union of all profile values)
- Delete test_extract_gpu_slices_b300 (instance-type-agnostic
  regex already covered by existing parametrized tests)
- Replace > 0 assertions with exact expected values in
  test_accelerator_partition_defaults_b300
- Fix misleading mock in test_validate_b300_partition: use empty
  allocatable for the invalid-profile case since validation fails
  at static parameter check before cluster check
- Remove unused ALLOWED_ACCELERATOR_PARTITION_TYPES import
---
 .../cli/test_accelerator_partition_util.py    | 54 ++++++-------------
 1 file changed, 15 insertions(+), 39 deletions(-)

diff --git a/test/unit_tests/cli/test_accelerator_partition_util.py b/test/unit_tests/cli/test_accelerator_partition_util.py
index 56b1c4db..7e8076d7 100644
--- a/test/unit_tests/cli/test_accelerator_partition_util.py
+++ b/test/unit_tests/cli/test_accelerator_partition_util.py
@@ -5,10 +5,7 @@
     _set_default_accelerator_partition_val,
     _validate_accelerator_partition,
 )
-from sagemaker.hyperpod.training.constants import (
-    ALLOWED_ACCELERATOR_PARTITION_TYPES,
-    INSTANCE_TYPE_MIG_PROFILES,
-)
+from sagemaker.hyperpod.training.constants import INSTANCE_TYPE_MIG_PROFILES
 import pytest
 from unittest.mock import patch, MagicMock
 
@@ -93,10 +90,7 @@ def test_validate_accelerator_partition_fields(self, mock_k8s_client, partition_
 
 
 class TestB300MigProfiles:
-    """Tests for NVIDIA B300 (Blackwell Ultra) MIG profile support."""
-
-    def test_b300_in_instance_type_mig_profiles(self):
-        assert "ml.p6-b300.48xlarge" in INSTANCE_TYPE_MIG_PROFILES
+    """Tests for B300 (Blackwell Ultra) MIG profile constants and defaults."""
 
     def test_b300_profiles_complete(self):
         profiles = INSTANCE_TYPE_MIG_PROFILES["ml.p6-b300.48xlarge"]
@@ -110,43 +104,24 @@ def test_b300_profiles_complete(self):
         ]
         assert profiles == expected
 
-    def test_b300_profiles_in_allowed_set(self):
-        for profile in INSTANCE_TYPE_MIG_PROFILES["ml.p6-b300.48xlarge"]:
-            assert profile in ALLOWED_ACCELERATOR_PARTITION_TYPES
-
-    @pytest.mark.parametrize(
-        "partition_type,expected_slices",
-        [
-            ("mig-1g.34gb", 1),
-            ("mig-2g.67gb", 2),
-            ("mig-3g.135gb", 3),
-            ("mig-4g.135gb", 4),
-            ("mig-7g.269gb", 7),
-        ],
-    )
-    def test_extract_gpu_slices_b300(self, partition_type, expected_slices):
-        assert _extract_gpu_slices_from_accelerator_partition_type(partition_type) == expected_slices
-
     @pytest.mark.parametrize(
-        "partition_type,partition_count",
+        "partition_type,partition_count,expected_cpu,expected_memory",
         [
-            ("mig-1g.34gb", 7),
-            ("mig-1g.67gb", 4),
-            ("mig-2g.67gb", 3),
-            ("mig-3g.135gb", 2),
-            ("mig-4g.135gb", 1),
-            ("mig-7g.269gb", 1),
+            ("mig-1g.34gb", 7, "24.0", "512.0Gi"),
+            ("mig-1g.67gb", 4, "13.0", "292.0Gi"),
+            ("mig-2g.67gb", 3, "20.0", "438.0Gi"),
+            ("mig-3g.135gb", 2, "20.0", "438.0Gi"),
+            ("mig-4g.135gb", 1, "13.0", "292.0Gi"),
+            ("mig-7g.269gb", 1, "24.0", "512.0Gi"),
         ],
     )
-    def test_accelerator_partition_defaults_b300(self, partition_type, partition_count):
-        """Verify CPU/memory defaults are calculated proportionally for B300 MIG profiles."""
+    def test_accelerator_partition_defaults_b300(self, partition_type, partition_count, expected_cpu, expected_memory):
+        """Verify CPU/memory defaults match the deterministic ratio formula for B300."""
         defaults = _get_accelerator_partition_defaults(
             "ml.p6-b300.48xlarge", partition_type, partition_count
         )
-        assert "cpu" in defaults
-        assert "memory" in defaults
-        assert float(defaults["cpu"]) > 0
-        assert float(defaults["memory"].replace("Gi", "")) > 0
+        assert defaults["cpu"] == expected_cpu
+        assert defaults["memory"] == expected_memory
 
     @pytest.mark.parametrize(
         "partition_type,expected_valid,error_check",
@@ -162,7 +137,8 @@ def test_validate_b300_partition(
         self, mock_k8s_client, partition_type, expected_valid, error_check
     ):
         mock_node = MagicMock()
-        mock_node.status.allocatable = {f"nvidia.com/{partition_type}": "1"}
+        allocatable = {f"nvidia.com/{partition_type}": "1"} if expected_valid else {}
+        mock_node.status.allocatable = allocatable
         mock_k8s_client.return_value.get_core_v1_api.return_value.list_node.return_value.items = [
             mock_node
         ]

From 6a77b966055fc3cb453f0bcb8700796708c3c97c Mon Sep 17 00:00:00 2001
From: Keita Watanabe <keitaw09@gmail.com>
Date: Sat, 28 Mar 2026 00:28:06 +0000
Subject: [PATCH 4/6] Merge B300 tests into TestAcceleratorPartitionUtil

Eliminate the separate TestB300MigProfiles class. B300 tests now
extend the existing parametrized cases in TestAcceleratorPartitionUtil:

- B300 valid/invalid profile cases added to
  test_validate_accelerator_partition_fields
- B300 defaults with exact values added to
  test_accelerator_partition_defaults (instance-type-parametrized)
- test_instance_type_profiles_not_empty iterates all instance types
  in INSTANCE_TYPE_MIG_PROFILES as a data-driven guard

This pattern scales to future instance types without adding new
test classes.
---
 .../cli/test_accelerator_partition_util.py    | 77 ++++++-------------
 1 file changed, 24 insertions(+), 53 deletions(-)

diff --git a/test/unit_tests/cli/test_accelerator_partition_util.py b/test/unit_tests/cli/test_accelerator_partition_util.py
index 7e8076d7..05b41471 100644
--- a/test/unit_tests/cli/test_accelerator_partition_util.py
+++ b/test/unit_tests/cli/test_accelerator_partition_util.py
@@ -75,76 +75,47 @@ def test_set_default_accelerator_partition_values(self, input_count, input_limit
             ("mig-1g.5gb", None, None, 2, "ml.p4d.24xlarge", False, lambda e: "accelerator_partition_type cannot be used together with node_count." == e),
             # Invalid instance type combination
             ("mig-1g.5gb", None, None, None, "ml.c5.large", False, lambda e: "does not support accelerator partitions" in e),
+            # B300: valid profile accepted
+            ("mig-1g.34gb", None, None, None, "ml.p6-b300.48xlarge", True, lambda e: e == ""),
+            # B300: cross-architecture profile rejected
+            ("mig-1g.5gb", None, None, None, "ml.p6-b300.48xlarge", False, lambda e: "not supported on instance type" in e),
         ]
     )
     @patch('sagemaker.hyperpod.training.accelerator_partition_util.KubernetesClient')
     def test_validate_accelerator_partition_fields(self, mock_k8s_client, partition_type, accelerators, accelerators_limit, node_count, instance_type, expected_valid, error_check):
-        # Mock cluster to have no MIG resources for most tests
         mock_node = MagicMock()
-        mock_node.status.allocatable = {}
+        allocatable = {f"nvidia.com/{partition_type}": "1"} if expected_valid and partition_type else {}
+        mock_node.status.allocatable = allocatable
         mock_k8s_client.return_value.get_core_v1_api.return_value.list_node.return_value.items = [mock_node]
 
         valid, error = _validate_accelerator_partition(partition_type, accelerators, accelerators_limit, node_count, instance_type)
         assert valid is expected_valid
         assert error_check(error)
 
-
-class TestB300MigProfiles:
-    """Tests for B300 (Blackwell Ultra) MIG profile constants and defaults."""
-
-    def test_b300_profiles_complete(self):
-        profiles = INSTANCE_TYPE_MIG_PROFILES["ml.p6-b300.48xlarge"]
-        expected = [
-            "mig-1g.34gb",
-            "mig-1g.67gb",
-            "mig-2g.67gb",
-            "mig-3g.135gb",
-            "mig-4g.135gb",
-            "mig-7g.269gb",
-        ]
-        assert profiles == expected
+    @pytest.mark.parametrize(
+        "instance_type",
+        list(INSTANCE_TYPE_MIG_PROFILES.keys()),
+    )
+    def test_instance_type_profiles_not_empty(self, instance_type):
+        """Every instance type in the MIG mapping must have at least one profile."""
+        assert len(INSTANCE_TYPE_MIG_PROFILES[instance_type]) > 0
 
     @pytest.mark.parametrize(
-        "partition_type,partition_count,expected_cpu,expected_memory",
+        "instance_type,partition_type,partition_count,expected_cpu,expected_memory",
         [
-            ("mig-1g.34gb", 7, "24.0", "512.0Gi"),
-            ("mig-1g.67gb", 4, "13.0", "292.0Gi"),
-            ("mig-2g.67gb", 3, "20.0", "438.0Gi"),
-            ("mig-3g.135gb", 2, "20.0", "438.0Gi"),
-            ("mig-4g.135gb", 1, "13.0", "292.0Gi"),
-            ("mig-7g.269gb", 1, "24.0", "512.0Gi"),
+            # B300 (Blackwell Ultra) — all profiles at max instance count
+            ("ml.p6-b300.48xlarge", "mig-1g.34gb", 7, "24.0", "512.0Gi"),
+            ("ml.p6-b300.48xlarge", "mig-1g.67gb", 4, "13.0", "292.0Gi"),
+            ("ml.p6-b300.48xlarge", "mig-2g.67gb", 3, "20.0", "438.0Gi"),
+            ("ml.p6-b300.48xlarge", "mig-3g.135gb", 2, "20.0", "438.0Gi"),
+            ("ml.p6-b300.48xlarge", "mig-4g.135gb", 1, "13.0", "292.0Gi"),
+            ("ml.p6-b300.48xlarge", "mig-7g.269gb", 1, "24.0", "512.0Gi"),
         ],
     )
-    def test_accelerator_partition_defaults_b300(self, partition_type, partition_count, expected_cpu, expected_memory):
-        """Verify CPU/memory defaults match the deterministic ratio formula for B300."""
+    def test_accelerator_partition_defaults(self, instance_type, partition_type, partition_count, expected_cpu, expected_memory):
+        """Verify CPU/memory defaults match the deterministic ratio formula."""
         defaults = _get_accelerator_partition_defaults(
-            "ml.p6-b300.48xlarge", partition_type, partition_count
+            instance_type, partition_type, partition_count
         )
         assert defaults["cpu"] == expected_cpu
         assert defaults["memory"] == expected_memory
-
-    @pytest.mark.parametrize(
-        "partition_type,expected_valid,error_check",
-        [
-            ("mig-1g.34gb", True, lambda e: e == ""),
-            ("mig-3g.135gb", True, lambda e: e == ""),
-            ("mig-7g.269gb", True, lambda e: e == ""),
-            ("mig-1g.5gb", False, lambda e: "not supported on instance type" in e),
-        ],
-    )
-    @patch("sagemaker.hyperpod.training.accelerator_partition_util.KubernetesClient")
-    def test_validate_b300_partition(
-        self, mock_k8s_client, partition_type, expected_valid, error_check
-    ):
-        mock_node = MagicMock()
-        allocatable = {f"nvidia.com/{partition_type}": "1"} if expected_valid else {}
-        mock_node.status.allocatable = allocatable
-        mock_k8s_client.return_value.get_core_v1_api.return_value.list_node.return_value.items = [
-            mock_node
-        ]
-
-        valid, error = _validate_accelerator_partition(
-            partition_type, None, None, None, "ml.p6-b300.48xlarge"
-        )
-        assert valid is expected_valid
-        assert error_check(error)

From 6b1ab017151ff48ff051db61e9c7a604ba485c8d Mon Sep 17 00:00:00 2001
From: Keita Watanabe <keitaw09@gmail.com>
Date: Sat, 28 Mar 2026 00:32:51 +0000
Subject: [PATCH 5/6] Add B200 MIG test cases (depends on #399 for ml. prefix
 fix)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add B200 (Blackwell) test coverage alongside B300:
- 2 validation cases: valid profile accepted, cross-arch rejected
- 6 defaults cases with exact CPU/memory values

B200 validation tests will fail until #399 merges (fixes the
p6-b200.48xlarge → ml.p6-b200.48xlarge key). B200 defaults tests
pass immediately since INSTANCE_RESOURCES already uses the ml. key.
---
 .../unit_tests/cli/test_accelerator_partition_util.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/test/unit_tests/cli/test_accelerator_partition_util.py b/test/unit_tests/cli/test_accelerator_partition_util.py
index 05b41471..9cb73a37 100644
--- a/test/unit_tests/cli/test_accelerator_partition_util.py
+++ b/test/unit_tests/cli/test_accelerator_partition_util.py
@@ -75,6 +75,10 @@ def test_set_default_accelerator_partition_values(self, input_count, input_limit
             ("mig-1g.5gb", None, None, 2, "ml.p4d.24xlarge", False, lambda e: "accelerator_partition_type cannot be used together with node_count." == e),
             # Invalid instance type combination
             ("mig-1g.5gb", None, None, None, "ml.c5.large", False, lambda e: "does not support accelerator partitions" in e),
+            # B200: valid profile accepted (requires #399 for ml. prefix fix)
+            ("mig-1g.23gb", None, None, None, "ml.p6-b200.48xlarge", True, lambda e: e == ""),
+            # B200: cross-architecture profile rejected
+            ("mig-1g.5gb", None, None, None, "ml.p6-b200.48xlarge", False, lambda e: "not supported on instance type" in e),
             # B300: valid profile accepted
             ("mig-1g.34gb", None, None, None, "ml.p6-b300.48xlarge", True, lambda e: e == ""),
             # B300: cross-architecture profile rejected
@@ -103,6 +107,13 @@ def test_instance_type_profiles_not_empty(self, instance_type):
     @pytest.mark.parametrize(
         "instance_type,partition_type,partition_count,expected_cpu,expected_memory",
         [
+            # B200 (Blackwell) — all profiles at max instance count (requires #399 for ml. prefix fix)
+            ("ml.p6-b200.48xlarge", "mig-1g.23gb", 7, "24.0", "256.0Gi"),
+            ("ml.p6-b200.48xlarge", "mig-1g.45gb", 4, "13.0", "146.0Gi"),
+            ("ml.p6-b200.48xlarge", "mig-2g.45gb", 3, "20.0", "219.0Gi"),
+            ("ml.p6-b200.48xlarge", "mig-3g.90gb", 2, "20.0", "219.0Gi"),
+            ("ml.p6-b200.48xlarge", "mig-4g.90gb", 1, "13.0", "146.0Gi"),
+            ("ml.p6-b200.48xlarge", "mig-7g.180gb", 1, "24.0", "256.0Gi"),
             # B300 (Blackwell Ultra) — all profiles at max instance count
             ("ml.p6-b300.48xlarge", "mig-1g.34gb", 7, "24.0", "512.0Gi"),
             ("ml.p6-b300.48xlarge", "mig-1g.67gb", 4, "13.0", "292.0Gi"),

From 1b00da5b902e4775a00482c644195a64fc3b691f Mon Sep 17 00:00:00 2001
From: Keita Watanabe <keitaw09@gmail.com>
Date: Sat, 28 Mar 2026 00:39:14 +0000
Subject: [PATCH 6/6] Cover all MIG-capable instance types in defaults test

Replace 12 B200/B300-only rows with 1 representative row per
MIG-capable instance type (P4d, P4de, P5, P5e, P5en, B200, B300,
GB200, G7e). Each row uses the smallest profile at max instance
count, verifying that INSTANCE_RESOURCES has correct cpu/gpu/memory
values for the ratio calculation.
---
 .../cli/test_accelerator_partition_util.py    | 25 ++++++++-----------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/test/unit_tests/cli/test_accelerator_partition_util.py b/test/unit_tests/cli/test_accelerator_partition_util.py
index 9cb73a37..296fe65c 100644
--- a/test/unit_tests/cli/test_accelerator_partition_util.py
+++ b/test/unit_tests/cli/test_accelerator_partition_util.py
@@ -107,24 +107,21 @@ def test_instance_type_profiles_not_empty(self, instance_type):
     @pytest.mark.parametrize(
         "instance_type,partition_type,partition_count,expected_cpu,expected_memory",
         [
-            # B200 (Blackwell) — all profiles at max instance count (requires #399 for ml. prefix fix)
-            ("ml.p6-b200.48xlarge", "mig-1g.23gb", 7, "24.0", "256.0Gi"),
-            ("ml.p6-b200.48xlarge", "mig-1g.45gb", 4, "13.0", "146.0Gi"),
-            ("ml.p6-b200.48xlarge", "mig-2g.45gb", 3, "20.0", "219.0Gi"),
-            ("ml.p6-b200.48xlarge", "mig-3g.90gb", 2, "20.0", "219.0Gi"),
-            ("ml.p6-b200.48xlarge", "mig-4g.90gb", 1, "13.0", "146.0Gi"),
-            ("ml.p6-b200.48xlarge", "mig-7g.180gb", 1, "24.0", "256.0Gi"),
-            # B300 (Blackwell Ultra) — all profiles at max instance count
+            # One representative profile per MIG-capable instance type (smallest profile, max count).
+            # Guards that INSTANCE_RESOURCES has correct cpu/gpu/memory for each instance type.
+            ("ml.p4d.24xlarge", "mig-1g.5gb", 7, "12.0", "144.0Gi"),
+            ("ml.p4de.24xlarge", "mig-1g.10gb", 7, "12.0", "144.0Gi"),
+            ("ml.p5.48xlarge", "mig-1g.10gb", 7, "24.0", "256.0Gi"),
+            ("ml.p5e.48xlarge", "mig-1g.18gb", 7, "24.0", "256.0Gi"),
+            ("ml.p5en.48xlarge", "mig-1g.18gb", 7, "24.0", "256.0Gi"),
+            ("ml.p6-b200.48xlarge", "mig-1g.23gb", 7, "24.0", "256.0Gi"),  # requires #399
             ("ml.p6-b300.48xlarge", "mig-1g.34gb", 7, "24.0", "512.0Gi"),
-            ("ml.p6-b300.48xlarge", "mig-1g.67gb", 4, "13.0", "292.0Gi"),
-            ("ml.p6-b300.48xlarge", "mig-2g.67gb", 3, "20.0", "438.0Gi"),
-            ("ml.p6-b300.48xlarge", "mig-3g.135gb", 2, "20.0", "438.0Gi"),
-            ("ml.p6-b300.48xlarge", "mig-4g.135gb", 1, "13.0", "292.0Gi"),
-            ("ml.p6-b300.48xlarge", "mig-7g.269gb", 1, "24.0", "512.0Gi"),
+            ("ml.p6e-gb200.36xlarge", "mig-1g.23gb", 7, "36.0", "240.0Gi"),
+            ("ml.g7e.48xlarge", "mig-1g.24gb", 4, "13.0", "146.0Gi"),
         ],
     )
     def test_accelerator_partition_defaults(self, instance_type, partition_type, partition_count, expected_cpu, expected_memory):
-        """Verify CPU/memory defaults match the deterministic ratio formula."""
+        """Verify CPU/memory defaults for one profile per MIG-capable instance type."""
         defaults = _get_accelerator_partition_defaults(
             instance_type, partition_type, partition_count
         )