From 812b0a38c66196d40c4ba32af70a5e1dbf0f8d56 Mon Sep 17 00:00:00 2001 From: Sean Archer Date: Tue, 11 Nov 2025 17:00:49 -0800 Subject: [PATCH 1/3] Add GPU operator MIG support with NVIDIA license notice --- NOTICE | 4 +- helm_chart/HyperPodHelmChart/Chart.yaml | 4 + .../charts/gpu-operator/Chart.yaml | 30 ++ .../config/default-mig-config.yaml | 314 ++++++++++++++++++ .../values-ap-northeast-1.yaml | 13 + .../regional-values/values-ap-south-1.yaml | 13 + .../values-ap-southeast-1.yaml | 13 + .../values-ap-southeast-2.yaml | 13 + .../values-ap-southeast-3.yaml | 13 + .../values-ap-southeast-4.yaml | 13 + .../regional-values/values-eu-central-1.yaml | 13 + .../regional-values/values-eu-north-1.yaml | 13 + .../regional-values/values-eu-south-2.yaml | 13 + .../regional-values/values-eu-west-1.yaml | 13 + .../regional-values/values-eu-west-2.yaml | 13 + .../regional-values/values-sa-east-1.yaml | 13 + .../regional-values/values-us-east-1.yaml | 13 + .../regional-values/values-us-east-2.yaml | 13 + .../regional-values/values-us-west-1.yaml | 13 + .../regional-values/values-us-west-2.yaml | 13 + .../gpu-operator/templates/mig-config.yaml | 11 + .../charts/gpu-operator/values.yaml | 60 ++++ .../values-ap-northeast-1.yaml | 14 + .../regional-values/values-ap-south-1.yaml | 14 + .../values-ap-southeast-1.yaml | 14 + .../values-ap-southeast-2.yaml | 14 + .../values-ap-southeast-3.yaml | 14 + .../values-ap-southeast-4.yaml | 14 + .../regional-values/values-eu-central-1.yaml | 14 + .../regional-values/values-eu-north-1.yaml | 14 + .../regional-values/values-eu-south-2.yaml | 14 + .../regional-values/values-eu-west-1.yaml | 14 + .../regional-values/values-eu-west-2.yaml | 14 + .../regional-values/values-sa-east-1.yaml | 14 + .../regional-values/values-us-east-1.yaml | 14 + .../regional-values/values-us-east-2.yaml | 14 + .../regional-values/values-us-west-1.yaml | 14 + .../regional-values/values-us-west-2.yaml | 14 + helm_chart/HyperPodHelmChart/values.yaml | 2 + 39 files changed, 856 insertions(+), 1 deletion(-) create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/Chart.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-northeast-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-south-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-2.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-3.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-4.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-central-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-north-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-south-2.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-west-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-west-2.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-sa-east-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-east-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-east-2.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-west-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-west-2.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/templates/mig-config.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/values.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-ap-northeast-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-ap-south-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-2.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-3.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-4.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-eu-central-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-eu-north-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-eu-south-2.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-eu-west-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-eu-west-2.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-sa-east-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-us-east-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-us-east-2.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-us-west-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-us-west-2.yaml diff --git a/NOTICE b/NOTICE index f48b352d..b939a525 100644 --- a/NOTICE +++ b/NOTICE @@ -1 +1,3 @@ -Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. \ No newline at end of file +Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + +This software contains source code provided by NVIDIA Corporation. \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/Chart.yaml b/helm_chart/HyperPodHelmChart/Chart.yaml index 82d1db1a..4ef3687f 100644 --- a/helm_chart/HyperPodHelmChart/Chart.yaml +++ b/helm_chart/HyperPodHelmChart/Chart.yaml @@ -88,3 +88,7 @@ dependencies: version: "0.1.0" repository: "file://charts/hyperpod-patching" condition: hyperpod-patching.enabled + - name: gpu-operator + version: "0.1.0" + repository: "file://charts/gpu-operator" + condition: gpu-operator.enabled \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/Chart.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/Chart.yaml new file mode 100644 index 00000000..d58a5de3 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/Chart.yaml @@ -0,0 +1,30 @@ +apiVersion: v2 +name: gpu-operator +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" + +dependencies: + - name: gpu-operator + version: "v25.3.4" + repository: "https://helm.ngc.nvidia.com/nvidia" + condition: gpu-operator.enabled diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml new file mode 100644 index 00000000..a1899373 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml @@ -0,0 +1,314 @@ +version: v1 +mig-configs: + # Profile 1 + all-disabled: + - devices: all + mig-enabled: false + + # Profile 19 + all-1g.10gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.10gb": 7 + + all-1g.5gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.5gb": 7 + + all-1g.18gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.18gb": 7 + + # Profile 5 + all-3g.40gb: + - devices: all + mig-enabled: true + mig-devices: + "3g.40gb": 2 + + all-3g.20gb: + - devices: all + mig-enabled: true + mig-devices: + "3g.20gb": 2 + + all-3g.71gb: + - devices: all + mig-enabled: true + mig-devices: + "3g.71gb": 2 + + # Profile 1 + all-7g.80gb: + - devices: all + mig-enabled: true + mig-devices: + "7g.80gb": 1 + + all-7g.40gb: + - devices: all + mig-enabled: true + mig-devices: + "7g.40gb": 1 + + all-7g.141gb: + - devices: all + mig-enabled: true + mig-devices: + "7g.141gb": 1 + + # Profile 2 + mixed-1-3g.40gb-1-4g.40gb: + - devices: all + mig-enabled: true + mig-devices: + "3g.40gb": 1 + "4g.40gb": 1 + + mixed-1-3g.20gb-1-4g.20gb: + - devices: all + mig-enabled: true + mig-devices: + "3g.20gb": 1 + "4g.20gb": 1 + + mixed-1-3g.71gb-1-4g.71gb: + - devices: all + mig-enabled: true + mig-devices: + "3g.71gb": 1 + "4g.71gb": 1 + + # Profile 3 + mixed-1-1g.10gb-1-2g.20gb-1-4g.40gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.10gb": 1 + "2g.20gb": 1 + "4g.40gb": 1 + + mixed-1-1g.5gb-1-2g.10gb-1-4g.20gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.5gb": 1 + "2g.10gb": 1 + "4g.20gb": 1 + + mixed-1-1g.18gb-1-2g.35gb-1-4g.71gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.18gb": 1 + "2g.35gb": 1 + "4g.71gb": 1 + + # Profile 4 + mixed-3-1g.10gb-1-4g.40gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.10gb": 3 + "4g.40gb": 1 + + mixed-3-1g.5gb-1-4g.20gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.5gb": 3 + "4g.20gb": 1 + + mixed-3-1g.18gb-1-4g.71gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.18gb": 3 + "4g.71gb": 1 + + # Profile 6 + mixed-1g.10gb-1-2g.20gb-1-3g.40gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.10gb": 1 + "2g.20gb": 1 + "3g.40gb": 1 + + mixed-1g.5gb-1-2g.10gb-1-3g.20gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.5gb": 1 + "2g.10gb": 1 + "3g.20gb": 1 + + mixed-1g.18gb-1-2g.35gb-1-3g.71gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.18gb": 1 + "2g.35gb": 1 + "3g.71gb": 1 + + # Profile 7 + mixed-3-1g.10gb-1-3g.40gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.10gb": 3 + "3g.40gb": 1 + + mixed-3-1g.5gb-1-3g.20gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.5gb": 3 + "3g.20gb": 1 + + mixed-3-1g.18gb-1-3g.70gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.18gb": 3 + "3g.71gb": 1 + + # Profile 8 + mixed-2-2g.20gb-1-3g.40gb: + - devices: all + mig-enabled: true + mig-devices: + "2g.20gb": 2 + "3g.40gb": 1 + + mixed-2-2g.10gb-1-3g.20gb: + - devices: all + mig-enabled: true + mig-devices: + "2g.10gb": 2 + "3g.20gb": 1 + + mixed-2-2g.35gb-1-3g.71gb: + - devices: all + mig-enabled: true + mig-devices: + "2g.35gb": 2 + "3g.71gb": 1 + + # Profile 9, 10 + mixed-2-1g.10gb-1-2g.20gb-1-3g.40gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.10gb": 2 + "2g.20gb": 1 + "3g.40gb": 1 + + mixed-2-1g.5gb-1-2g.20gb-1-3g.20gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.5gb": 2 + "2g.10gb": 1 + "3g.20gb": 1 + + mixed-2-1g.18gb-1-2g.47gb-1-3g.71gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.18gb": 2 + "2g.47gb": 1 + "3g.71gb": 1 + + # Profile 11 + mixed-4-1g.10gb-1-3g.40gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.10gb": 4 + "3g.40gb": 1 + + mixed-4-1g.5gb-1-3g.20gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.5gb": 4 + "3g.20gb": 1 + + mixed-4-1g.18gb-1-3g.71gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.18gb": 4 + "3g.71gb": 1 + + # Profile 12 + mixed-1-1g.10gb-3-2g.20gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.10gb": 1 + "2g.20gb": 3 + + mixed-1-1g.5gb-3-2g.10gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.5gb": 1 + "2g.10gb": 3 + + mixed-1-1g.18gb-3-2g.35gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.18gb": 1 + "2g.35gb": 3 + + # Profile 13, 14 + mixed-3-1g.10gb-2-2g.20gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.10gb": 3 + "2g.20gb": 2 + + mixed-3-1g.5gb-2-2g.10gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.5gb": 3 + "2g.10gb": 2 + + mixed-3-1g.18gb-2-2g.35gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.18gb": 3 + "2g.35gb": 2 + + # Profile 15, 16, 17, 18, 19 + mixed-5-1g.10gb-1-2g.20gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.10gb": 5 + "2g.20gb": 1 + + mixed-5-1g.5gb-1-2g.10gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.5gb": 5 + "2g.10gb": 1 + + mixed-5-1g.18gb-1-2g.35gb: + - devices: all + mig-enabled: true + mig-devices: + "1g.18gb": 5 + "2g.35gb": 1 \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-northeast-1.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-northeast-1.yaml new file mode 100644 index 00000000..221e3e1a --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-northeast-1.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "248189928082.dkr.ecr.ap-northeast-1.amazonaws.com" + toolkit: + repository: "248189928082.dkr.ecr.ap-northeast-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "248189928082.dkr.ecr.ap-northeast-1.amazonaws.com" + gfd: + repository: "248189928082.dkr.ecr.ap-northeast-1.amazonaws.com" + migManager: + repository: "248189928082.dkr.ecr.ap-northeast-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "248189928082.dkr.ecr.ap-northeast-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-south-1.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-south-1.yaml new file mode 100644 index 00000000..7b53ba54 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-south-1.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "183295424886.dkr.ecr.ap-south-1.amazonaws.com" + toolkit: + repository: "183295424886.dkr.ecr.ap-south-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "183295424886.dkr.ecr.ap-south-1.amazonaws.com" + gfd: + repository: "183295424886.dkr.ecr.ap-south-1.amazonaws.com" + migManager: + repository: "183295424886.dkr.ecr.ap-south-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "183295424886.dkr.ecr.ap-south-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-1.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-1.yaml new file mode 100644 index 00000000..3f3175be --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-1.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "235494799058.dkr.ecr.ap-southeast-1.amazonaws.com" + toolkit: + repository: "235494799058.dkr.ecr.ap-southeast-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "235494799058.dkr.ecr.ap-southeast-1.amazonaws.com" + gfd: + repository: "235494799058.dkr.ecr.ap-southeast-1.amazonaws.com" + migManager: + repository: "235494799058.dkr.ecr.ap-southeast-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "235494799058.dkr.ecr.ap-southeast-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-2.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-2.yaml new file mode 100644 index 00000000..21974878 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-2.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "495599779797.dkr.ecr.ap-southeast-2.amazonaws.com" + toolkit: + repository: "495599779797.dkr.ecr.ap-southeast-2.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "495599779797.dkr.ecr.ap-southeast-2.amazonaws.com" + gfd: + repository: "495599779797.dkr.ecr.ap-southeast-2.amazonaws.com" + migManager: + repository: "495599779797.dkr.ecr.ap-southeast-2.amazonaws.com/mirror-cloud-native" + validator: + repository: "495599779797.dkr.ecr.ap-southeast-2.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-3.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-3.yaml new file mode 100644 index 00000000..146b6cf7 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-3.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "112551748145.dkr.ecr.ap-southeast-3.amazonaws.com" + toolkit: + repository: "112551748145.dkr.ecr.ap-southeast-3.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "112551748145.dkr.ecr.ap-southeast-3.amazonaws.com" + gfd: + repository: "112551748145.dkr.ecr.ap-southeast-3.amazonaws.com" + migManager: + repository: "112551748145.dkr.ecr.ap-southeast-3.amazonaws.com/mirror-cloud-native" + validator: + repository: "112551748145.dkr.ecr.ap-southeast-3.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-4.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-4.yaml new file mode 100644 index 00000000..f1d57dac --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-southeast-4.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "872515262625.dkr.ecr.ap-southeast-4.amazonaws.com" + toolkit: + repository: "872515262625.dkr.ecr.ap-southeast-4.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "872515262625.dkr.ecr.ap-southeast-4.amazonaws.com" + gfd: + repository: "872515262625.dkr.ecr.ap-southeast-4.amazonaws.com" + migManager: + repository: "872515262625.dkr.ecr.ap-southeast-4.amazonaws.com/mirror-cloud-native" + validator: + repository: "872515262625.dkr.ecr.ap-southeast-4.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-central-1.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-central-1.yaml new file mode 100644 index 00000000..5da8a5d8 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-central-1.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "816069136154.dkr.ecr.eu-central-1.amazonaws.com" + toolkit: + repository: "816069136154.dkr.ecr.eu-central-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "816069136154.dkr.ecr.eu-central-1.amazonaws.com" + gfd: + repository: "816069136154.dkr.ecr.eu-central-1.amazonaws.com" + migManager: + repository: "816069136154.dkr.ecr.eu-central-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "816069136154.dkr.ecr.eu-central-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-north-1.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-north-1.yaml new file mode 100644 index 00000000..fa0ddd2b --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-north-1.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "495599756225.dkr.ecr.eu-north-1.amazonaws.com" + toolkit: + repository: "495599756225.dkr.ecr.eu-north-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "495599756225.dkr.ecr.eu-north-1.amazonaws.com" + gfd: + repository: "495599756225.dkr.ecr.eu-north-1.amazonaws.com" + migManager: + repository: "495599756225.dkr.ecr.eu-north-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "495599756225.dkr.ecr.eu-north-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-south-2.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-south-2.yaml new file mode 100644 index 00000000..a9313b26 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-south-2.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "765762660796.dkr.ecr.eu-south-2.amazonaws.com" + toolkit: + repository: "765762660796.dkr.ecr.eu-south-2.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "765762660796.dkr.ecr.eu-south-2.amazonaws.com" + gfd: + repository: "765762660796.dkr.ecr.eu-south-2.amazonaws.com" + migManager: + repository: "765762660796.dkr.ecr.eu-south-2.amazonaws.com/mirror-cloud-native" + validator: + repository: "765762660796.dkr.ecr.eu-south-2.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-west-1.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-west-1.yaml new file mode 100644 index 00000000..c4ce9a61 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-west-1.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "741448945724.dkr.ecr.eu-west-1.amazonaws.com" + toolkit: + repository: "741448945724.dkr.ecr.eu-west-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "741448945724.dkr.ecr.eu-west-1.amazonaws.com" + gfd: + repository: "741448945724.dkr.ecr.eu-west-1.amazonaws.com" + migManager: + repository: "741448945724.dkr.ecr.eu-west-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "741448945724.dkr.ecr.eu-west-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-west-2.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-west-2.yaml new file mode 100644 index 00000000..6904fd39 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-eu-west-2.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "869935108213.dkr.ecr.eu-west-2.amazonaws.com" + toolkit: + repository: "869935108213.dkr.ecr.eu-west-2.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "869935108213.dkr.ecr.eu-west-2.amazonaws.com" + gfd: + repository: "869935108213.dkr.ecr.eu-west-2.amazonaws.com" + migManager: + repository: "869935108213.dkr.ecr.eu-west-2.amazonaws.com/mirror-cloud-native" + validator: + repository: "869935108213.dkr.ecr.eu-west-2.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-sa-east-1.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-sa-east-1.yaml new file mode 100644 index 00000000..dcf50643 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-sa-east-1.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "985539796278.dkr.ecr.sa-east-1.amazonaws.com" + toolkit: + repository: "985539796278.dkr.ecr.sa-east-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "985539796278.dkr.ecr.sa-east-1.amazonaws.com" + gfd: + repository: "985539796278.dkr.ecr.sa-east-1.amazonaws.com" + migManager: + repository: "985539796278.dkr.ecr.sa-east-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "985539796278.dkr.ecr.sa-east-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-east-1.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-east-1.yaml new file mode 100644 index 00000000..e679afdd --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-east-1.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "463470961310.dkr.ecr.us-east-1.amazonaws.com" + toolkit: + repository: "463470961310.dkr.ecr.us-east-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "463470961310.dkr.ecr.us-east-1.amazonaws.com" + gfd: + repository: "463470961310.dkr.ecr.us-east-1.amazonaws.com" + migManager: + repository: "463470961310.dkr.ecr.us-east-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "463470961310.dkr.ecr.us-east-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-east-2.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-east-2.yaml new file mode 100644 index 00000000..de9b12ae --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-east-2.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "940482419867.dkr.ecr.us-east-2.amazonaws.com" + toolkit: + repository: "940482419867.dkr.ecr.us-east-2.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "940482419867.dkr.ecr.us-east-2.amazonaws.com" + gfd: + repository: "940482419867.dkr.ecr.us-east-2.amazonaws.com" + migManager: + repository: "940482419867.dkr.ecr.us-east-2.amazonaws.com/mirror-cloud-native" + validator: + repository: "940482419867.dkr.ecr.us-east-2.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-west-1.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-west-1.yaml new file mode 100644 index 00000000..dfac75f6 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-west-1.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "034362067326.dkr.ecr.us-west-1.amazonaws.com" + toolkit: + repository: "034362067326.dkr.ecr.us-west-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "034362067326.dkr.ecr.us-west-1.amazonaws.com" + gfd: + repository: "034362067326.dkr.ecr.us-west-1.amazonaws.com" + migManager: + repository: "034362067326.dkr.ecr.us-west-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "034362067326.dkr.ecr.us-west-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-west-2.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-west-2.yaml new file mode 100644 index 00000000..7e8aacf9 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-us-west-2.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "586794468202.dkr.ecr.us-west-2.amazonaws.com" + toolkit: + repository: "586794468202.dkr.ecr.us-west-2.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "586794468202.dkr.ecr.us-west-2.amazonaws.com" + gfd: + repository: "586794468202.dkr.ecr.us-west-2.amazonaws.com" + migManager: + repository: "586794468202.dkr.ecr.us-west-2.amazonaws.com/mirror-cloud-native" + validator: + repository: "586794468202.dkr.ecr.us-west-2.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/templates/mig-config.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/templates/mig-config.yaml new file mode 100644 index 00000000..782344a1 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/templates/mig-config.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: "default-mig-config" + namespace: {{ .Release.Namespace }} + labels: + sagemaker.amazonaws.com/accelerator-partition-configmap: "true" +data: + config.yaml: | +{{- $content := $.Files.Get "config/default-mig-config.yaml" }} +{{ $content | indent 4 }} diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/values.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/values.yaml new file mode 100644 index 00000000..13c8c32c --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/values.yaml @@ -0,0 +1,60 @@ +gpu-operator: + enabled: true + operator: + image: "mirror-gpu-operator" + version: "v25.3.4" + tolerations: + - operator: "Exists" + initContainer: + enabled: false + mig: + strategy: mixed + driver: + enabled: false + manager: + enabled: false + toolkit: + image: "container-toolkit" + version: "v1.13.1-centos7" + devicePlugin: + enabled: true + image: "mirror-k8s-device-plugin" + version: "v0.17.4" + dcgm: + enabled: false + dcgmExporter: + enabled: false + gfd: + image: "mirror-k8s-device-plugin" + version: "v0.17.4" + migManager: + image: "k8s-mig-manager" + version: "v0.12.3-ubuntu20.04" + config: + create: true + name: "default-mig-config" + env: + - name: WITH_REBOOT + value: "true" + validator: + image: "gpu-operator-validator" + version: "v25.3.4" + vgpuDeviceManager: + enabled: false + vfioManager: + enabled: false + sandboxDevicePlugin: + enabled: false + node-feature-discovery: + master: + tolerations: + - operator: "Exists" + worker: + tolerations: + - operator: "Exists" + gc: + tolerations: + - operator: "Exists" + daemonsets: + tolerations: + - operator: Exists diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-ap-northeast-1.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-ap-northeast-1.yaml new file mode 100644 index 00000000..46f0fbe4 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-ap-northeast-1.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "248189928082.dkr.ecr.ap-northeast-1.amazonaws.com" + toolkit: + repository: "248189928082.dkr.ecr.ap-northeast-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "248189928082.dkr.ecr.ap-northeast-1.amazonaws.com" + gfd: + repository: "248189928082.dkr.ecr.ap-northeast-1.amazonaws.com" + migManager: + repository: "248189928082.dkr.ecr.ap-northeast-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "248189928082.dkr.ecr.ap-northeast-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-ap-south-1.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-ap-south-1.yaml new file mode 100644 index 00000000..84fa12bb --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-ap-south-1.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "183295424886.dkr.ecr.ap-south-1.amazonaws.com" + toolkit: + repository: "183295424886.dkr.ecr.ap-south-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "183295424886.dkr.ecr.ap-south-1.amazonaws.com" + gfd: + repository: "183295424886.dkr.ecr.ap-south-1.amazonaws.com" + migManager: + repository: "183295424886.dkr.ecr.ap-south-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "183295424886.dkr.ecr.ap-south-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-1.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-1.yaml new file mode 100644 index 00000000..e3e58052 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-1.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "235494799058.dkr.ecr.ap-southeast-1.amazonaws.com" + toolkit: + repository: "235494799058.dkr.ecr.ap-southeast-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "235494799058.dkr.ecr.ap-southeast-1.amazonaws.com" + gfd: + repository: "235494799058.dkr.ecr.ap-southeast-1.amazonaws.com" + migManager: + repository: "235494799058.dkr.ecr.ap-southeast-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "235494799058.dkr.ecr.ap-southeast-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-2.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-2.yaml new file mode 100644 index 00000000..b8325c91 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-2.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "495599779797.dkr.ecr.ap-southeast-2.amazonaws.com" + toolkit: + repository: "495599779797.dkr.ecr.ap-southeast-2.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "495599779797.dkr.ecr.ap-southeast-2.amazonaws.com" + gfd: + repository: "495599779797.dkr.ecr.ap-southeast-2.amazonaws.com" + migManager: + repository: "495599779797.dkr.ecr.ap-southeast-2.amazonaws.com/mirror-cloud-native" + validator: + repository: "495599779797.dkr.ecr.ap-southeast-2.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-3.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-3.yaml new file mode 100644 index 00000000..653677d0 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-3.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "112551748145.dkr.ecr.ap-southeast-3.amazonaws.com" + toolkit: + repository: "112551748145.dkr.ecr.ap-southeast-3.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "112551748145.dkr.ecr.ap-southeast-3.amazonaws.com" + gfd: + repository: "112551748145.dkr.ecr.ap-southeast-3.amazonaws.com" + migManager: + repository: "112551748145.dkr.ecr.ap-southeast-3.amazonaws.com/mirror-cloud-native" + validator: + repository: "112551748145.dkr.ecr.ap-southeast-3.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-4.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-4.yaml new file mode 100644 index 00000000..65827dd4 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-ap-southeast-4.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "872515262625.dkr.ecr.ap-southeast-4.amazonaws.com" + toolkit: + repository: "872515262625.dkr.ecr.ap-southeast-4.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "872515262625.dkr.ecr.ap-southeast-4.amazonaws.com" + gfd: + repository: "872515262625.dkr.ecr.ap-southeast-4.amazonaws.com" + migManager: + repository: "872515262625.dkr.ecr.ap-southeast-4.amazonaws.com/mirror-cloud-native" + validator: + repository: "872515262625.dkr.ecr.ap-southeast-4.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-eu-central-1.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-eu-central-1.yaml new file mode 100644 index 00000000..ab51700e --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-eu-central-1.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "816069136154.dkr.ecr.eu-central-1.amazonaws.com" + toolkit: + repository: "816069136154.dkr.ecr.eu-central-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "816069136154.dkr.ecr.eu-central-1.amazonaws.com" + gfd: + repository: "816069136154.dkr.ecr.eu-central-1.amazonaws.com" + migManager: + repository: "816069136154.dkr.ecr.eu-central-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "816069136154.dkr.ecr.eu-central-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-eu-north-1.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-eu-north-1.yaml new file mode 100644 index 00000000..2a757d7e --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-eu-north-1.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "495599756225.dkr.ecr.eu-north-1.amazonaws.com" + toolkit: + repository: "495599756225.dkr.ecr.eu-north-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "495599756225.dkr.ecr.eu-north-1.amazonaws.com" + gfd: + repository: "495599756225.dkr.ecr.eu-north-1.amazonaws.com" + migManager: + repository: "495599756225.dkr.ecr.eu-north-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "495599756225.dkr.ecr.eu-north-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-eu-south-2.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-eu-south-2.yaml new file mode 100644 index 00000000..395c612f --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-eu-south-2.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "765762660796.dkr.ecr.eu-south-2.amazonaws.com" + toolkit: + repository: "765762660796.dkr.ecr.eu-south-2.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "765762660796.dkr.ecr.eu-south-2.amazonaws.com" + gfd: + repository: "765762660796.dkr.ecr.eu-south-2.amazonaws.com" + migManager: + repository: "765762660796.dkr.ecr.eu-south-2.amazonaws.com/mirror-cloud-native" + validator: + repository: "765762660796.dkr.ecr.eu-south-2.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-eu-west-1.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-eu-west-1.yaml new file mode 100644 index 00000000..8a360b6b --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-eu-west-1.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "741448945724.dkr.ecr.eu-west-1.amazonaws.com" + toolkit: + repository: "741448945724.dkr.ecr.eu-west-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "741448945724.dkr.ecr.eu-west-1.amazonaws.com" + gfd: + repository: "741448945724.dkr.ecr.eu-west-1.amazonaws.com" + migManager: + repository: "741448945724.dkr.ecr.eu-west-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "741448945724.dkr.ecr.eu-west-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-eu-west-2.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-eu-west-2.yaml new file mode 100644 index 00000000..fde53671 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-eu-west-2.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "869935108213.dkr.ecr.eu-west-2.amazonaws.com" + toolkit: + repository: "869935108213.dkr.ecr.eu-west-2.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "869935108213.dkr.ecr.eu-west-2.amazonaws.com" + gfd: + repository: "869935108213.dkr.ecr.eu-west-2.amazonaws.com" + migManager: + repository: "869935108213.dkr.ecr.eu-west-2.amazonaws.com/mirror-cloud-native" + validator: + repository: "869935108213.dkr.ecr.eu-west-2.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-sa-east-1.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-sa-east-1.yaml new file mode 100644 index 00000000..eebd166c --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-sa-east-1.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "985539796278.dkr.ecr.sa-east-1.amazonaws.com" + toolkit: + repository: "985539796278.dkr.ecr.sa-east-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "985539796278.dkr.ecr.sa-east-1.amazonaws.com" + gfd: + repository: "985539796278.dkr.ecr.sa-east-1.amazonaws.com" + migManager: + repository: "985539796278.dkr.ecr.sa-east-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "985539796278.dkr.ecr.sa-east-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-us-east-1.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-us-east-1.yaml new file mode 100644 index 00000000..269e8a14 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-us-east-1.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "463470961310.dkr.ecr.us-east-1.amazonaws.com" + toolkit: + repository: "463470961310.dkr.ecr.us-east-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "463470961310.dkr.ecr.us-east-1.amazonaws.com" + gfd: + repository: "463470961310.dkr.ecr.us-east-1.amazonaws.com" + migManager: + repository: "463470961310.dkr.ecr.us-east-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "463470961310.dkr.ecr.us-east-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-us-east-2.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-us-east-2.yaml new file mode 100644 index 00000000..369e53e8 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-us-east-2.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "940482419867.dkr.ecr.us-east-2.amazonaws.com" + toolkit: + repository: "940482419867.dkr.ecr.us-east-2.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "940482419867.dkr.ecr.us-east-2.amazonaws.com" + gfd: + repository: "940482419867.dkr.ecr.us-east-2.amazonaws.com" + migManager: + repository: "940482419867.dkr.ecr.us-east-2.amazonaws.com/mirror-cloud-native" + validator: + repository: "940482419867.dkr.ecr.us-east-2.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-us-west-1.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-us-west-1.yaml new file mode 100644 index 00000000..9d618c69 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-us-west-1.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "034362067326.dkr.ecr.us-west-1.amazonaws.com" + toolkit: + repository: "034362067326.dkr.ecr.us-west-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "034362067326.dkr.ecr.us-west-1.amazonaws.com" + gfd: + repository: "034362067326.dkr.ecr.us-west-1.amazonaws.com" + migManager: + repository: "034362067326.dkr.ecr.us-west-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "034362067326.dkr.ecr.us-west-1.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-us-west-2.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-us-west-2.yaml new file mode 100644 index 00000000..f29d30ee --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-us-west-2.yaml @@ -0,0 +1,14 @@ +gpu-operator: + gpu-operator: + operator: + repository: "586794468202.dkr.ecr.us-west-2.amazonaws.com" + toolkit: + repository: "586794468202.dkr.ecr.us-west-2.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "586794468202.dkr.ecr.us-west-2.amazonaws.com" + gfd: + repository: "586794468202.dkr.ecr.us-west-2.amazonaws.com" + migManager: + repository: "586794468202.dkr.ecr.us-west-2.amazonaws.com/mirror-cloud-native" + validator: + repository: "586794468202.dkr.ecr.us-west-2.amazonaws.com/mirror-cloud-native" \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/values.yaml b/helm_chart/HyperPodHelmChart/values.yaml index bef20af4..bd76e88e 100644 --- a/helm_chart/HyperPodHelmChart/values.yaml +++ b/helm_chart/HyperPodHelmChart/values.yaml @@ -288,3 +288,5 @@ job-auto-restart: enabled: true hyperpod-patching: enabled: true +gpu-operator: + enabled: false \ No newline at end of file From fbef9c606ed29290c356d8250b719c7cfa2239e6 Mon Sep 17 00:00:00 2001 From: Sean Archer Date: Mon, 17 Nov 2025 19:21:33 -0800 Subject: [PATCH 2/3] Add regional values for ap-northeast-2 and ca-central-1 GPU operator MIG support --- .../regional-values/values-ap-northeast-2.yaml | 15 +++++++++++++++ .../regional-values/values-ca-central-1.yaml | 13 +++++++++++++ .../regional-values/values-ap-northeast-2.yaml | 16 ++++++++++++++++ .../regional-values/values-ca-central-1.yaml | 16 ++++++++++++++++ 4 files changed, 60 insertions(+) create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-northeast-2.yaml create mode 100644 helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ca-central-1.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-ap-northeast-2.yaml create mode 100644 helm_chart/HyperPodHelmChart/regional-values/values-ca-central-1.yaml diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-northeast-2.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-northeast-2.yaml new file mode 100644 index 00000000..346d4d37 --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ap-northeast-2.yaml @@ -0,0 +1,15 @@ +gpu-operator: + operator: + repository: "743267407464.dkr.ecr.ap-northeast-2.amazonaws.com" + toolkit: + repository: "743267407464.dkr.ecr.ap-northeast-2.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "743267407464.dkr.ecr.ap-northeast-2.amazonaws.com" + gfd: + repository: "743267407464.dkr.ecr.ap-northeast-2.amazonaws.com" + migManager: + repository: "743267407464.dkr.ecr.ap-northeast-2.amazonaws.com/mirror-cloud-native" + validator: + repository: "743267407464.dkr.ecr.ap-northeast-2.amazonaws.com/mirror-cloud-native" + + diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ca-central-1.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ca-central-1.yaml new file mode 100644 index 00000000..473f013e --- /dev/null +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/regional-values/values-ca-central-1.yaml @@ -0,0 +1,13 @@ +gpu-operator: + operator: + repository: "035462350821.dkr.ecr.ca-central-1.amazonaws.com" + toolkit: + repository: "035462350821.dkr.ecr.ca-central-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "035462350821.dkr.ecr.ca-central-1.amazonaws.com" + gfd: + repository: "035462350821.dkr.ecr.ca-central-1.amazonaws.com" + migManager: + repository: "035462350821.dkr.ecr.ca-central-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "035462350821.dkr.ecr.ca-central-1.amazonaws.com/mirror-cloud-native" diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-ap-northeast-2.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-ap-northeast-2.yaml new file mode 100644 index 00000000..0d36800a --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-ap-northeast-2.yaml @@ -0,0 +1,16 @@ +gpu-operator: + gpu-operator: + operator: + repository: "743267407464.dkr.ecr.ap-northeast-2.amazonaws.com" + toolkit: + repository: "743267407464.dkr.ecr.ap-northeast-2.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "743267407464.dkr.ecr.ap-northeast-2.amazonaws.com" + gfd: + repository: "743267407464.dkr.ecr.ap-northeast-2.amazonaws.com" + migManager: + repository: "743267407464.dkr.ecr.ap-northeast-2.amazonaws.com/mirror-cloud-native" + validator: + repository: "743267407464.dkr.ecr.ap-northeast-2.amazonaws.com/mirror-cloud-native" + + diff --git a/helm_chart/HyperPodHelmChart/regional-values/values-ca-central-1.yaml b/helm_chart/HyperPodHelmChart/regional-values/values-ca-central-1.yaml new file mode 100644 index 00000000..9473752f --- /dev/null +++ b/helm_chart/HyperPodHelmChart/regional-values/values-ca-central-1.yaml @@ -0,0 +1,16 @@ +gpu-operator: + gpu-operator: + operator: + repository: "035462350821.dkr.ecr.ca-central-1.amazonaws.com" + toolkit: + repository: "035462350821.dkr.ecr.ca-central-1.amazonaws.com/mirror-k8s" + devicePlugin: + repository: "035462350821.dkr.ecr.ca-central-1.amazonaws.com" + gfd: + repository: "035462350821.dkr.ecr.ca-central-1.amazonaws.com" + migManager: + repository: "035462350821.dkr.ecr.ca-central-1.amazonaws.com/mirror-cloud-native" + validator: + repository: "035462350821.dkr.ecr.ca-central-1.amazonaws.com/mirror-cloud-native" + + From 7051537ce813b663ab08fd24376011d11b774e93 Mon Sep 17 00:00:00 2001 From: Sean Archer Date: Tue, 13 Jan 2026 14:40:07 -0800 Subject: [PATCH 3/3] Update MIG config for GPU operator --- .../config/default-mig-config.yaml | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml b/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml index a1899373..cbbd8892 100644 --- a/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml +++ b/helm_chart/HyperPodHelmChart/charts/gpu-operator/config/default-mig-config.yaml @@ -23,7 +23,7 @@ mig-configs: mig-enabled: true mig-devices: "1g.18gb": 7 - + # Profile 5 all-3g.40gb: - devices: all @@ -42,7 +42,7 @@ mig-configs: mig-enabled: true mig-devices: "3g.71gb": 2 - + # Profile 1 all-7g.80gb: - devices: all @@ -108,7 +108,7 @@ mig-configs: "1g.18gb": 1 "2g.35gb": 1 "4g.71gb": 1 - + # Profile 4 mixed-3-1g.10gb-1-4g.40gb: - devices: all @@ -132,7 +132,7 @@ mig-configs: "4g.71gb": 1 # Profile 6 - mixed-1g.10gb-1-2g.20gb-1-3g.40gb: + mixed-1-1g.10gb-1-2g.20gb-1-3g.40gb: - devices: all mig-enabled: true mig-devices: @@ -140,7 +140,7 @@ mig-configs: "2g.20gb": 1 "3g.40gb": 1 - mixed-1g.5gb-1-2g.10gb-1-3g.20gb: + mixed-1-1g.5gb-1-2g.10gb-1-3g.20gb: - devices: all mig-enabled: true mig-devices: @@ -148,7 +148,7 @@ mig-configs: "2g.10gb": 1 "3g.20gb": 1 - mixed-1g.18gb-1-2g.35gb-1-3g.71gb: + mixed-1-1g.18gb-1-2g.35gb-1-3g.71gb: - devices: all mig-enabled: true mig-devices: @@ -171,7 +171,7 @@ mig-configs: "1g.5gb": 3 "3g.20gb": 1 - mixed-3-1g.18gb-1-3g.70gb: + mixed-3-1g.18gb-1-3g.71gb: - devices: all mig-enabled: true mig-devices: @@ -209,7 +209,7 @@ mig-configs: "2g.20gb": 1 "3g.40gb": 1 - mixed-2-1g.5gb-1-2g.20gb-1-3g.20gb: + mixed-2-1g.5gb-1-2g.10gb-1-3g.20gb: - devices: all mig-enabled: true mig-devices: @@ -217,12 +217,12 @@ mig-configs: "2g.10gb": 1 "3g.20gb": 1 - mixed-2-1g.18gb-1-2g.47gb-1-3g.71gb: + mixed-2-1g.18gb-1-2g.35gb-1-3g.71gb: - devices: all mig-enabled: true mig-devices: "1g.18gb": 2 - "2g.47gb": 1 + "2g.35gb": 1 "3g.71gb": 1 # Profile 11