diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml index 8f4943d7..f18523ed 100644 --- a/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml @@ -341,12 +341,90 @@ mig-configs: "1g.24gb": 2 "2g.48gb": 1 + # P6-B300 (Blackwell Ultra, 288GB HBM3e, ~269GB usable) profiles + # Profiles: 1g.34gb (x7), 1g.67gb (x4), 2g.67gb (x3), 3g.135gb (x2), 4g.135gb (x1), 7g.269gb (x1) + # Upstream ref: NVIDIA GPU Operator v25.3.0, device-filter 0x318210DE + + all-1g.34gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.34gb": 7 + all-1g.67gb: - devices: all mig-enabled: true mig-devices: "1g.67gb": 4 + all-2g.67gb: + - devices: all + mig-enabled: true + mig-devices: + "2g.67gb": 3 + + all-3g.135gb: + - devices: all + mig-enabled: true + mig-devices: + "3g.135gb": 2 + + all-4g.135gb: + - devices: all + mig-enabled: true + mig-devices: + "4g.135gb": 1 + + all-7g.269gb: + - devices: all + mig-enabled: true + mig-devices: + "7g.269gb": 1 + + mixed-1-3g.135gb-1-4g.135gb: + - devices: all + mig-enabled: true + mig-devices: + "3g.135gb": 1 + "4g.135gb": 1 + + mixed-1-1g.34gb-1-2g.67gb-1-4g.135gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.34gb": 1 + "2g.67gb": 1 + "4g.135gb": 1 + + mixed-3-1g.34gb-1-4g.135gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.34gb": 3 + "4g.135gb": 1 + + mixed-1-1g.34gb-1-2g.67gb-1-3g.135gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.34gb": 1 + "2g.67gb": 1 + "3g.135gb": 1 + + mixed-3-1g.34gb-1-3g.135gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.34gb": 3 + "3g.135gb": 1 + + mixed-2-2g.67gb-1-3g.135gb: + - devices: all + mig-enabled: true + mig-devices: + "2g.67gb": 2 + "3g.135gb": 1 + mixed-2-1g.34gb-1-2g.67gb-1-3g.135gb: - devices: all mig-enabled: true @@ -354,3 +432,31 @@ mig-configs: "1g.34gb": 2 "2g.67gb": 1 "3g.135gb": 1 + + mixed-4-1g.34gb-1-3g.135gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.34gb": 4 + "3g.135gb": 1 + + mixed-1-1g.34gb-3-2g.67gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.34gb": 1 + "2g.67gb": 3 + + mixed-3-1g.34gb-2-2g.67gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.34gb": 3 + "2g.67gb": 2 + + mixed-5-1g.34gb-1-2g.67gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.34gb": 5 + "2g.67gb": 1 diff --git a/src/sagemaker/hyperpod/training/constants.py b/src/sagemaker/hyperpod/training/constants.py index 29f58fa8..59230365 100644 --- a/src/sagemaker/hyperpod/training/constants.py +++ b/src/sagemaker/hyperpod/training/constants.py @@ -132,6 +132,7 @@ 'ml.p5e.48xlarge': ['mig-1g.18gb', 'mig-1g.35gb', 'mig-2g.35gb', 'mig-3g.71gb', 'mig-4g.71gb', 'mig-7g.141gb'], 'ml.p5en.48xlarge': ['mig-1g.18gb', 'mig-1g.35gb', 'mig-2g.35gb', 'mig-3g.71gb', 'mig-4g.71gb', 'mig-7g.141gb'], 'p6-b200.48xlarge': ['mig-1g.23gb', 'mig-1g.45gb', 'mig-2g.45gb', 'mig-3g.90gb', 'mig-4g.90gb', 'mig-7g.180gb'], + 'ml.p6-b300.48xlarge': ['mig-1g.34gb', 'mig-1g.67gb', 'mig-2g.67gb', 'mig-3g.135gb', 'mig-4g.135gb', 'mig-7g.269gb'], 'ml.p6e-gb200.36xlarge': ['mig-1g.23gb', 'mig-1g.47gb', 'mig-2g.47gb', 'mig-3g.93gb', 'mig-4g.93gb', 'mig-7g.186gb'], 'ml.g7e.2xlarge': ['mig-1g.24gb', 'mig-2g.48gb', 'mig-4g.96gb'], 'ml.g7e.4xlarge': ['mig-1g.24gb', 'mig-2g.48gb', 'mig-4g.96gb'],