From 99de915d45377a84d61f31ff7e3b0f3550fa4878 Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Tue, 4 Nov 2025 10:29:43 -0500 Subject: [PATCH 1/4] fix(azure): add token-minter for self-managed hosted clusters Azure Disk and File CSI drivers fail on Azure self-managed hosted clusters because the service account token at /var/run/secrets/openshift/serviceaccount/token does not exist. Add runtime deployment hooks that conditionally inject token-minter sidecar container for self-managed Azure clusters. The token-minter creates guest cluster service account tokens that the CSI drivers use for Azure workload identity authentication. ARO HCP continues to use Secret Provider Class with managed identities and is not affected by this change. Fixes: OCPBUGS-63698 Signed-off-by: Bryan Cox Commit-Message-Assisted-by: Claude (via Claude Code) --- pkg/driver/azure-disk/azure_disk.go | 4 +++ pkg/driver/azure-file/azure_file.go | 4 +++ pkg/driver/common/operator/hooks.go | 47 +++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) diff --git a/pkg/driver/azure-disk/azure_disk.go b/pkg/driver/azure-disk/azure_disk.go index f4b52c190..4a3f051c2 100644 --- a/pkg/driver/azure-disk/azure_disk.go +++ b/pkg/driver/azure-disk/azure_disk.go @@ -211,7 +211,11 @@ func GetAzureDiskOperatorControllerConfig(ctx context.Context, flavour generator if flavour == generator.FlavourHyperShift { azureDiskSecretProviderClass := strings.TrimSpace(os.Getenv("ARO_HCP_SECRET_PROVIDER_CLASS_FOR_DISK")) if azureDiskSecretProviderClass != "" { + // ARO HCP: use Secret Provider Class for managed identities cfg.DeploymentHooks = append(cfg.DeploymentHooks, withAROCSIVolume(azureDiskSecretProviderClass)) + } else { + // Self-managed Azure: use token-minter for workload identity + cfg.DeploymentHooks = append(cfg.DeploymentHooks, operator.WithTokenMinter("azure-disk-csi-driver-controller-sa")) } } diff --git a/pkg/driver/azure-file/azure_file.go b/pkg/driver/azure-file/azure_file.go index c75589935..6b9d8d007 100644 --- a/pkg/driver/azure-file/azure_file.go +++ b/pkg/driver/azure-file/azure_file.go @@ -176,7 +176,11 @@ func GetAzureFileOperatorControllerConfig(ctx context.Context, flavour generator if flavour == generator.FlavourHyperShift { azureFileSecretProviderClass := strings.TrimSpace(os.Getenv("ARO_HCP_SECRET_PROVIDER_CLASS_FOR_FILE")) if azureFileSecretProviderClass != "" { + // ARO HCP: use Secret Provider Class for managed identities cfg.DeploymentHooks = append(cfg.DeploymentHooks, withAROCSIVolume(azureFileSecretProviderClass)) + } else { + // Self-managed Azure: use token-minter for workload identity + cfg.DeploymentHooks = append(cfg.DeploymentHooks, operator.WithTokenMinter("azure-file-csi-driver-controller-sa")) } } diff --git a/pkg/driver/common/operator/hooks.go b/pkg/driver/common/operator/hooks.go index de702c4da..00d2236b9 100644 --- a/pkg/driver/common/operator/hooks.go +++ b/pkg/driver/common/operator/hooks.go @@ -16,6 +16,7 @@ import ( dc "github.com/openshift/library-go/pkg/operator/deploymentcontroller" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" @@ -261,3 +262,49 @@ func withHyperShiftRunAsUser(c *clients.Clients) (dc.DeploymentHookFunc, []facto } return hook, nil } + +// WithTokenMinter returns a Deployment hook that adds a token-minter sidecar container for HyperShift. +// The token-minter creates guest cluster service account tokens for use in the management cluster. +// Note: The bound-sa-token and hosted-kubeconfig volumes are added by the HyperShift patch files, +// so this hook only adds the container that uses them. +func WithTokenMinter(serviceAccountName string) dc.DeploymentHookFunc { + return func(_ *opv1.OperatorSpec, deployment *appsv1.Deployment) error { + tokenMinter := corev1.Container{ + Name: "token-minter", + Image: os.Getenv("HYPERSHIFT_IMAGE"), + Command: []string{ + "/usr/bin/control-plane-operator", + "token-minter", + }, + Args: []string{ + "--service-account-namespace=openshift-cluster-csi-drivers", + "--service-account-name=" + serviceAccountName, + "--token-audience=openshift", + "--token-file=/var/run/secrets/openshift/serviceaccount/token", + "--kubeconfig=/etc/hosted-kubernetes/kubeconfig", + }, + ImagePullPolicy: corev1.PullIfNotPresent, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("10Mi"), + }, + }, + TerminationMessagePolicy: corev1.TerminationMessageFallbackToLogsOnError, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "bound-sa-token", + MountPath: "/var/run/secrets/openshift/serviceaccount", + }, + { + Name: "hosted-kubeconfig", + MountPath: "/etc/hosted-kubernetes", + ReadOnly: true, + }, + }, + } + deployment.Spec.Template.Spec.Containers = append(deployment.Spec.Template.Spec.Containers, tokenMinter) + + return nil + } +} From e568221327a5f7c6fa2cc07675d2e1252f2f21b9 Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Tue, 4 Nov 2025 12:12:26 -0500 Subject: [PATCH 2/4] fix: use HYPERSHIFT_IMAGE placeholder for token-minter The token-minter image should use the placeholder instead of reading os.Getenv() directly. The placeholder is replaced at runtime by the DefaultReplacements() function when the operator processes the deployment. This matches the pattern used in AWS EBS static patches. --- pkg/driver/common/operator/hooks.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/driver/common/operator/hooks.go b/pkg/driver/common/operator/hooks.go index 00d2236b9..7a52166e3 100644 --- a/pkg/driver/common/operator/hooks.go +++ b/pkg/driver/common/operator/hooks.go @@ -271,7 +271,7 @@ func WithTokenMinter(serviceAccountName string) dc.DeploymentHookFunc { return func(_ *opv1.OperatorSpec, deployment *appsv1.Deployment) error { tokenMinter := corev1.Container{ Name: "token-minter", - Image: os.Getenv("HYPERSHIFT_IMAGE"), + Image: "${HYPERSHIFT_IMAGE}", Command: []string{ "/usr/bin/control-plane-operator", "token-minter", From 95f07165319e4b0e0c1326a68fd2aec6d42dc988 Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Tue, 4 Nov 2025 13:43:46 -0500 Subject: [PATCH 3/4] fix: correct condition for HYPERSHIFT_IMAGE replacement Fix copy-paste error in DefaultReplacements() where HYPERSHIFT_IMAGE placeholder replacement was incorrectly gated on csiDriver != "" instead of hyperShiftImage != "". This bug prevented ${HYPERSHIFT_IMAGE} placeholders from being replaced with the actual image value, causing token-minter containers to have invalid image references. Signed-off-by: Bryan Cox Commit-Message-Assisted-by: Claude (via Claude Code) --- pkg/driver/common/operator/replacer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/driver/common/operator/replacer.go b/pkg/driver/common/operator/replacer.go index e004e06c2..9761788a8 100644 --- a/pkg/driver/common/operator/replacer.go +++ b/pkg/driver/common/operator/replacer.go @@ -61,7 +61,7 @@ func DefaultReplacements(controlPlaneNamespace, guestNamespace string) []string } hyperShiftImage := os.Getenv(hyperShiftImageEnvName) - if csiDriver != "" { + if hyperShiftImage != "" { pairs = append(pairs, []string{"${HYPERSHIFT_IMAGE}", hyperShiftImage}...) } From 0af283438ab108fdf1cfbbd2328087ae1e9940d4 Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Tue, 4 Nov 2025 14:49:55 -0500 Subject: [PATCH 4/4] fix: use direct env var instead of placeholder in token-minter hook Deployment hooks run after asset placeholder replacement, so placeholders added by hooks never get replaced. Fix by directly reading os.Getenv("HYPERSHIFT_IMAGE") in the hook instead of using a placeholder string. Also add conditional behavior: if HYPERSHIFT_IMAGE is not set, skip adding the token-minter container. This allows the same hook to work for both self-managed Azure (where cluster-storage-operator sets HYPERSHIFT_IMAGE) and ARO HCP (where it doesn't). Signed-off-by: Bryan Cox Commit-Message-Assisted-by: Claude (via Claude Code) --- pkg/driver/common/operator/hooks.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pkg/driver/common/operator/hooks.go b/pkg/driver/common/operator/hooks.go index 7a52166e3..60b6e4921 100644 --- a/pkg/driver/common/operator/hooks.go +++ b/pkg/driver/common/operator/hooks.go @@ -267,11 +267,18 @@ func withHyperShiftRunAsUser(c *clients.Clients) (dc.DeploymentHookFunc, []facto // The token-minter creates guest cluster service account tokens for use in the management cluster. // Note: The bound-sa-token and hosted-kubeconfig volumes are added by the HyperShift patch files, // so this hook only adds the container that uses them. +// If HYPERSHIFT_IMAGE environment variable is not set, the hook does nothing (allows conditional behavior). func WithTokenMinter(serviceAccountName string) dc.DeploymentHookFunc { return func(_ *opv1.OperatorSpec, deployment *appsv1.Deployment) error { + hyperShiftImage := os.Getenv("HYPERSHIFT_IMAGE") + if hyperShiftImage == "" { + // HYPERSHIFT_IMAGE not set, skip adding token-minter + return nil + } + tokenMinter := corev1.Container{ Name: "token-minter", - Image: "${HYPERSHIFT_IMAGE}", + Image: hyperShiftImage, Command: []string{ "/usr/bin/control-plane-operator", "token-minter",