From 699bed7ecd5267db4f7024a32797433424434e57 Mon Sep 17 00:00:00 2001 From: Jasin Aferkou Date: Tue, 19 May 2026 17:48:14 +0200 Subject: [PATCH 1/5] fix(openbao): improve backup strategy --- cli/cmd/install_openbao.go | 40 +++ docs/oms_install_openbao.md | 2 + .../installer/manifests/openbao/vault-cr.yaml | 2 +- internal/installer/openbao.go | 244 ++++++++++++++---- internal/installer/openbao_test.go | 1 + 5 files changed, 234 insertions(+), 55 deletions(-) diff --git a/cli/cmd/install_openbao.go b/cli/cmd/install_openbao.go index 705ae0ce..97ee7c94 100644 --- a/cli/cmd/install_openbao.go +++ b/cli/cmd/install_openbao.go @@ -4,12 +4,16 @@ package cmd import ( + "bufio" "context" + "errors" "fmt" + "io" "os" "os/exec" "os/signal" "path/filepath" + "strings" "syscall" "time" @@ -35,6 +39,8 @@ type InstallOpenBaoOpts struct { Replicas int StorageSize string Timeout time.Duration + AgeKeyFile string + Yes bool } func (c *InstallOpenBaoCmd) RunE(_ *cobra.Command, _ []string) error { @@ -42,6 +48,12 @@ func (c *InstallOpenBaoCmd) RunE(_ *cobra.Command, _ []string) error { return err } + // If --age-key-file is provided, set SOPS_AGE_KEY_FILE so ResolveAgeKey + // picks it up. Otherwise, fall back to the normal auto-discovery chain. + if c.Opts.AgeKeyFile != "" { + os.Setenv("SOPS_AGE_KEY_FILE", c.Opts.AgeKeyFile) + } + configDir, err := os.UserConfigDir() if err != nil { return fmt.Errorf("determining user config directory: %w", err) @@ -69,6 +81,32 @@ func (c *InstallOpenBaoCmd) RunE(_ *cobra.Command, _ []string) error { return fmt.Errorf("initializing openbao installer: %w", err) } + inst.ConfirmFunc = func() error { + if c.Opts.Yes { + return nil + } + + fmt.Printf("\nWARNING: No DR backup found at: %s\n", c.Opts.DRBackupPath) + fmt.Println("This will perform a FRESH OpenBao initialization:") + fmt.Println(" - Existing Vault CR will be deleted") + fmt.Println(" - All OpenBao pods will be terminated") + fmt.Println(" - Persistent volume claims (data) will be deleted") + fmt.Println(" - Existing unseal keys will be removed") + fmt.Println("") + fmt.Println("If you intended to restore from a backup, verify --dr-backup-path is correct.") + fmt.Print("\nType 'yes' to continue: ") + + reader := bufio.NewReader(os.Stdin) + input, err := reader.ReadString('\n') + if err != nil && !errors.Is(err, io.EOF) { + return fmt.Errorf("failed to read confirmation: %w", err) + } + if strings.TrimSpace(strings.ToLower(input)) != "yes" { + return fmt.Errorf("aborted: type 'yes' to continue or pass --yes") + } + return nil + } + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) defer stop() @@ -106,6 +144,8 @@ func AddInstallOpenBaoCmd(install *cobra.Command, opts *GlobalOptions) { openbao.cmd.Flags().IntVar(&openbao.Opts.Replicas, "replicas", 1, "Number of OpenBao replicas (1 for single-node, odd number >= 3 for HA)") openbao.cmd.Flags().StringVar(&openbao.Opts.StorageSize, "storage-size", "10Gi", "PVC storage size for each OpenBao replica") openbao.cmd.Flags().DurationVar(&openbao.Opts.Timeout, "timeout", 5*time.Minute, "Timeout for waiting on initialization") + openbao.cmd.Flags().StringVarP(&openbao.Opts.AgeKeyFile, "age-key-file", "k", "", "Path to age private key file for SOPS encryption/decryption (auto-detected if not set)") + openbao.cmd.Flags().BoolVarP(&openbao.Opts.Yes, "yes", "y", false, "Auto-approve fresh initialization when no DR backup is found") util.MarkFlagRequired(openbao.cmd, "dr-backup-path") diff --git a/docs/oms_install_openbao.md b/docs/oms_install_openbao.md index 78ec9936..b418a7b1 100644 --- a/docs/oms_install_openbao.md +++ b/docs/oms_install_openbao.md @@ -37,6 +37,7 @@ $ oms install openbao --dr-backup-path ./backups/cluster-1.enc.json --timeout 10 ### Options ``` + -k, --age-key-file string Path to age private key file for SOPS encryption/decryption (auto-detected if not set) --bao-user string Username for the userpass auth method (ignored on restore, uses DR backup value) (default "admin") --dr-backup-path string Path for SOPS-encrypted DR backup file (required) -h, --help help for openbao @@ -44,6 +45,7 @@ $ oms install openbao --dr-backup-path ./backups/cluster-1.enc.json --timeout 10 --secrets-engine string Name of the KV-v2 secrets engine to provision (default "cs-secrets-engine") --storage-size string PVC storage size for each OpenBao replica (default "10Gi") --timeout duration Timeout for waiting on initialization (default 5m0s) + -y, --yes Auto-approve fresh initialization when no DR backup is found ``` ### SEE ALSO diff --git a/internal/installer/manifests/openbao/vault-cr.yaml b/internal/installer/manifests/openbao/vault-cr.yaml index a9390c15..85d0759d 100644 --- a/internal/installer/manifests/openbao/vault-cr.yaml +++ b/internal/installer/manifests/openbao/vault-cr.yaml @@ -82,7 +82,7 @@ spec: value: "http://$(POD_NAME).{{ .Namespace }}.svc.cluster.local:8200" unsealConfig: options: - preFlightChecks: false + preFlightChecks: true storeRootToken: false kubernetes: secretNamespace: {{ .Namespace }} diff --git a/internal/installer/openbao.go b/internal/installer/openbao.go index 8bc2173e..8a7cd751 100644 --- a/internal/installer/openbao.go +++ b/internal/installer/openbao.go @@ -24,6 +24,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" + corev1client "k8s.io/client-go/kubernetes/typed/core/v1" ) //go:embed manifests/openbao/vault-cr.yaml @@ -63,11 +64,17 @@ type OpenBaoInstaller struct { Logger *bootstrap.StepLogger Config OpenBaoInstallerConfig + // ConfirmFunc is called when the destructive fresh-install path is about + // to proceed (no DR backup found). If it returns an error the install is + // aborted. When nil the install proceeds without confirmation. + ConfirmFunc func() error + // Intermediate state populated during the install pipeline - ctx context.Context - password string - drBackupExists bool - unsealSecret *corev1.Secret + ctx context.Context + password string + drBackupExists bool + unsealSecret *corev1.Secret + backupUnsealKeys map[string][]byte // unseal keys from DR backup, used during WaitForInitialization } // NewOpenBaoInstaller constructs an OpenBaoInstaller with real Kubernetes and Helm clients. @@ -121,6 +128,21 @@ func (o *OpenBaoInstaller) Install(ctx context.Context) error { return fmt.Errorf("pre-flight DR check failed: %w", err) } + // Only warn when an existing deployment is detected but no DR backup was + // found — the user likely supplied the wrong backup path. A genuine first + // install (no existing deployment) proceeds without prompting. + if !o.drBackupExists && o.ConfirmFunc != nil { + exists, checkErr := o.hasExistingDeployment() + if checkErr != nil { + return fmt.Errorf("checking for existing deployment: %w", checkErr) + } + if exists { + if err := o.ConfirmFunc(); err != nil { + return err + } + } + } + // Only generate a new password for fresh installs; on DR restore the // password was already extracted from the backup in PreFlightDRCheck. if !o.drBackupExists { @@ -138,12 +160,11 @@ func (o *OpenBaoInstaller) Install(ctx context.Context) error { // If a previous install left behind an unseal-keys Secret (e.g. Raft storage // was wiped or the cluster was rebuilt), those keys belong to the old master // key and will cause bank-vaults to permanently fail unsealing the new instance. - // We delete the Vault CR first and wait for pods to exit, otherwise the old - // sidecar's retry loop re-creates the secret after we remove it. + // We clean the full prior install state: Vault CR, pods, PVCs, and the secret. if !o.drBackupExists { - err = o.Logger.Step("Removing stale unseal keys", o.DeleteStaleUnsealKeys) + err = o.Logger.Step("Cleaning stale install state", o.CleanStaleInstallState) if err != nil { - return fmt.Errorf("failed to remove stale unseal keys: %w", err) + return fmt.Errorf("failed to clean stale install state: %w", err) } } @@ -157,6 +178,11 @@ func (o *OpenBaoInstaller) Install(ctx context.Context) error { return fmt.Errorf("failed waiting for initialization: %w", err) } + err = o.Logger.Step("Waiting for all OpenBao pods to be ready", o.WaitForPodsReady) + if err != nil { + return fmt.Errorf("failed waiting for pods to be ready: %w", err) + } + err = o.Logger.Step("Extracting and encrypting DR backup", o.ExtractAndEncrypt) if err != nil { return fmt.Errorf("failed to extract and encrypt DR backup: %w", err) @@ -184,7 +210,7 @@ func (o *OpenBaoInstaller) PreFlightDRCheck() error { return fmt.Errorf("checking DR backup file %s: %w", o.Config.DRBackupPath, err) } - o.Logger.Logf("Found existing DR backup at %s — restoring unseal keys", o.Config.DRBackupPath) + o.Logger.Logf("Found existing DR backup at %s", o.Config.DRBackupPath) decrypted, err := DecryptFileWithSOPS(o.Config.DRBackupPath, o.Config.AgeKeyPath) if err != nil { @@ -196,39 +222,13 @@ func (o *OpenBaoInstaller) PreFlightDRCheck() error { return fmt.Errorf("parsing DR backup: %w", err) } - secretData := make(map[string][]byte) + // Store backup unseal keys for later use in WaitForInitialization. + // We do NOT write them to Kubernetes yet — the operator may delete or + // recreate the secret during Vault CR reconciliation, so we defer + // secret creation to the initialization wait loop where we can retry. + o.backupUnsealKeys = make(map[string][]byte) for k, v := range backup.UnsealKeys { - secretData[k] = []byte(v) - } - - secret := &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: openBaoUnsealSecretName, - Namespace: openBaoNamespace, - }, - Data: secretData, - } - - if err := o.ensureNamespace(o.ctx); err != nil { - return err - } - - secretsClient := o.Clientset.CoreV1().Secrets(openBaoNamespace) - existing, err := secretsClient.Get(o.ctx, openBaoUnsealSecretName, metav1.GetOptions{}) - if err != nil { - if !k8serrors.IsNotFound(err) { - return fmt.Errorf("checking for existing secret: %w", err) - } - _, err = secretsClient.Create(o.ctx, secret, metav1.CreateOptions{}) - if err != nil { - return fmt.Errorf("creating unseal secret from DR backup: %w", err) - } - } else { - secret.ResourceVersion = existing.ResourceVersion - _, err = secretsClient.Update(o.ctx, secret, metav1.UpdateOptions{}) - if err != nil { - return fmt.Errorf("updating unseal secret from DR backup: %w", err) - } + o.backupUnsealKeys[k] = []byte(v) } // Reuse the password and username from the DR backup so the Vault CR is @@ -239,7 +239,6 @@ func (o *OpenBaoInstaller) PreFlightDRCheck() error { o.password = backup.Password o.Config.Username = backup.Username - o.Logger.Logf("Unseal keys restored from DR backup successfully") o.drBackupExists = true return nil } @@ -333,30 +332,119 @@ func (o *OpenBaoInstaller) ApplyVaultCR() error { // WaitForInitialization polls the openbao-unseal-keys Secret until it contains // unseal key data, indicating that Bank-Vaults has completed initialization. +// +// When a DR backup was loaded (backupUnsealKeys is set), the function ensures +// the secret exists with the backup's unseal keys on every poll iteration. This +// handles the case where the bank-vaults operator deletes or recreates the +// secret during Vault CR reconciliation — we simply re-apply it until the +// operator settles and the sidecar can successfully unseal. func (o *OpenBaoInstaller) WaitForInitialization() error { secretsClient := o.Clientset.CoreV1().Secrets(openBaoNamespace) return o.pollUntil("waiting for openbao-unseal-keys to be populated", func() (bool, error) { secret, err := secretsClient.Get(o.ctx, openBaoUnsealSecretName, metav1.GetOptions{}) if err != nil { - if k8serrors.IsNotFound(err) { - return false, nil // Secret doesn't exist yet — keep polling + if !k8serrors.IsNotFound(err) { + return false, fmt.Errorf("fetching unseal secret: %w", err) } - return false, fmt.Errorf("fetching unseal secret: %w", err) + // Secret doesn't exist yet. + if o.backupUnsealKeys != nil { + // DR restore: create the secret from backup so the sidecar can unseal. + if createErr := o.ensureUnsealSecret(secretsClient); createErr != nil { + return false, createErr + } + } + return false, nil // Keep polling — sidecar hasn't confirmed unseal yet } // Check if the secret has meaningful data: at least one key must be // present, indicating bank-vaults has completed initialization and - // written the unseal keys. Bank-vaults writes all keys atomically - // during Init(), so any data means init is done. + // written the unseal keys. if len(secret.Data) > 0 { o.unsealSecret = secret return true, nil } + + // Secret exists but is empty — restore from backup if available. + if o.backupUnsealKeys != nil { + if updateErr := o.ensureUnsealSecret(secretsClient); updateErr != nil { + return false, updateErr + } + } return false, nil }) } +// ensureUnsealSecret creates or updates the unseal keys secret from the DR backup. +// It preserves existing metadata (labels, annotations, ownerReferences) when updating. +func (o *OpenBaoInstaller) ensureUnsealSecret(secretsClient corev1client.SecretInterface) error { + existing, err := secretsClient.Get(o.ctx, openBaoUnsealSecretName, metav1.GetOptions{}) + if err != nil { + if !k8serrors.IsNotFound(err) { + return fmt.Errorf("checking unseal secret: %w", err) + } + // Create new secret + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: openBaoUnsealSecretName, + Namespace: openBaoNamespace, + }, + Data: o.backupUnsealKeys, + } + _, err = secretsClient.Create(o.ctx, secret, metav1.CreateOptions{}) + if err != nil && !k8serrors.IsAlreadyExists(err) { + return fmt.Errorf("creating unseal secret from backup: %w", err) + } + return nil + } + + // Update existing secret — preserve metadata, only set Data + existing.Data = o.backupUnsealKeys + _, err = secretsClient.Update(o.ctx, existing, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("updating unseal secret from backup: %w", err) + } + return nil +} + +// WaitForPodsReady polls until the expected number of vault pods (matching the +// configured replica count) are in Running phase with all containers Ready. +// This ensures scaling operations have fully completed before reporting success. +func (o *OpenBaoInstaller) WaitForPodsReady() error { + selector := labels.SelectorFromSet(labels.Set{"vault_cr": "openbao"}).String() + expected := o.Config.Replicas + + return o.pollUntil("waiting for all OpenBao pods to be ready", func() (bool, error) { + list, err := o.Clientset.CoreV1().Pods(openBaoNamespace).List(o.ctx, metav1.ListOptions{ + LabelSelector: selector, + }) + if err != nil { + return false, fmt.Errorf("listing vault pods: %w", err) + } + + readyCount := 0 + for i := range list.Items { + if isPodReady(&list.Items[i]) { + readyCount++ + } + } + return readyCount >= expected, nil + }) +} + +// isPodReady returns true if the pod is in Running phase and has the Ready condition. +func isPodReady(pod *corev1.Pod) bool { + if pod.Status.Phase != corev1.PodRunning { + return false + } + for _, cond := range pod.Status.Conditions { + if cond.Type == corev1.PodReady && cond.Status == corev1.ConditionTrue { + return true + } + } + return false +} + // ExtractAndEncrypt reads the unseal keys Secret, combines it with the generated // password, and creates a SOPS-encrypted backup file. func (o *OpenBaoInstaller) ExtractAndEncrypt() error { @@ -410,15 +498,17 @@ func (o *OpenBaoInstaller) ExtractAndEncrypt() error { return nil } -// DeleteStaleUnsealKeys removes unseal keys left by a prior installation whose +// CleanStaleInstallState removes all state left by a prior installation whose // Raft storage no longer exists (e.g. cluster rebuild, PVC deletion). Without -// removal, bank-vaults would attempt to unseal with the old master key's shares +// cleanup, bank-vaults would attempt to unseal with the old master key's shares // and fail permanently — the new instance needs to run a fresh init. // -// To prevent the old bank-vaults sidecar from re-creating the secret via its -// retry loop, the Vault CR is deleted first and we wait for all pods to exit -// before removing the secret. -func (o *OpenBaoInstaller) DeleteStaleUnsealKeys() error { +// The cleanup sequence is: +// 1. Delete the Vault CR (stops the bank-vaults sidecar retry loop) +// 2. Wait for all vault pods to terminate +// 3. Delete PVCs (removes stale Raft data that would confuse initialization) +// 4. Delete the unseal-keys Secret +func (o *OpenBaoInstaller) CleanStaleInstallState() error { vaultGVR := k8s.VaultGVR() // Tolerates NotFound — this may be a first-time install with no prior Vault CR. @@ -433,6 +523,26 @@ func (o *OpenBaoInstaller) DeleteStaleUnsealKeys() error { return err } + // Delete PVCs associated with the prior StatefulSet so that stale Raft + // data does not cause OpenBao to report as "initialized" on a fresh install. + pvcList, err := o.Clientset.CoreV1().PersistentVolumeClaims(openBaoNamespace).List( + o.ctx, metav1.ListOptions{LabelSelector: "vault_cr=openbao"}, + ) + if err != nil { + return fmt.Errorf("listing stale PVCs: %w", err) + } + for i := range pvcList.Items { + delErr = o.Clientset.CoreV1().PersistentVolumeClaims(openBaoNamespace).Delete( + o.ctx, pvcList.Items[i].Name, metav1.DeleteOptions{}, + ) + if delErr != nil && !k8serrors.IsNotFound(delErr) { + return fmt.Errorf("deleting PVC %s: %w", pvcList.Items[i].Name, delErr) + } + } + if len(pvcList.Items) > 0 { + o.Logger.Logf("Deleted %d stale PVC(s)", len(pvcList.Items)) + } + // Now it is safe to delete the stale secret. delErr = o.Clientset.CoreV1().Secrets(openBaoNamespace).Delete( o.ctx, openBaoUnsealSecretName, metav1.DeleteOptions{}, @@ -441,10 +551,36 @@ func (o *OpenBaoInstaller) DeleteStaleUnsealKeys() error { return fmt.Errorf("deleting stale unseal secret: %w", delErr) } - o.Logger.Logf("Stale unseal keys removed (vault CR deleted, pods terminated)") + o.Logger.Logf("Stale install state cleaned (CR, pods, PVCs, unseal secret)") return nil } +// hasExistingDeployment checks whether an OpenBao deployment already exists +// in the cluster by looking for the Vault CR or PVCs with vault_cr=openbao. +// This is used to distinguish a genuine first install (nothing exists) from a +// re-install where the user may have supplied the wrong DR backup path. +func (o *OpenBaoInstaller) hasExistingDeployment() (bool, error) { + vaultGVR := k8s.VaultGVR() + _, err := o.DynClient.Resource(vaultGVR).Namespace(openBaoNamespace).Get( + o.ctx, "openbao", metav1.GetOptions{}, + ) + if err == nil { + return true, nil + } + if !k8serrors.IsNotFound(err) { + return false, fmt.Errorf("checking Vault CR: %w", err) + } + + // Vault CR gone but PVCs may linger (e.g. CR was manually deleted). + pvcList, err := o.Clientset.CoreV1().PersistentVolumeClaims(openBaoNamespace).List( + o.ctx, metav1.ListOptions{LabelSelector: "vault_cr=openbao"}, + ) + if err != nil { + return false, fmt.Errorf("listing PVCs: %w", err) + } + return len(pvcList.Items) > 0, nil +} + // waitForVaultPodsGone polls until no pods with label vault_cr=openbao remain // in the vault namespace, or until the context deadline is exceeded. func (o *OpenBaoInstaller) waitForVaultPodsGone() error { diff --git a/internal/installer/openbao_test.go b/internal/installer/openbao_test.go index 0444a3d9..0655cb97 100644 --- a/internal/installer/openbao_test.go +++ b/internal/installer/openbao_test.go @@ -356,6 +356,7 @@ var _ = Describe("OpenBaoInstaller", func() { unsealConfig := spec["unsealConfig"].(map[string]interface{}) options := unsealConfig["options"].(map[string]interface{}) Expect(options["storeRootToken"]).To(BeFalse()) + Expect(options["preFlightChecks"]).To(BeTrue()) // Verify externalConfig has the secrets engine externalConfig := spec["externalConfig"].(map[string]interface{}) From f47f3452296b55a1846bfa3b9a2e28ad62658305 Mon Sep 17 00:00:00 2001 From: Jcing95 <23337729+Jcing95@users.noreply.github.com> Date: Tue, 19 May 2026 16:22:26 +0000 Subject: [PATCH 2/5] chore(docs): Auto-update docs and licenses Signed-off-by: Jcing95 <23337729+Jcing95@users.noreply.github.com> --- NOTICE | 2 +- internal/tmpl/NOTICE | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NOTICE b/NOTICE index fe0e1d54..e6030a6c 100644 --- a/NOTICE +++ b/NOTICE @@ -1165,7 +1165,7 @@ License URL: https://cs.opensource.google/go/x/time/+/v0.15.0:LICENSE Module: gomodules.xyz/jsonpatch/v2 Version: v2.5.0 License: Apache-2.0 -License URL: https://github.com/gomodules/jsonpatch/blob/v2.5.0/LICENSE +License URL: https://github.com/gomodules/jsonpatch/blob/v2.5.0/v2/LICENSE ---------- Module: google.golang.org/api diff --git a/internal/tmpl/NOTICE b/internal/tmpl/NOTICE index fe0e1d54..e6030a6c 100644 --- a/internal/tmpl/NOTICE +++ b/internal/tmpl/NOTICE @@ -1165,7 +1165,7 @@ License URL: https://cs.opensource.google/go/x/time/+/v0.15.0:LICENSE Module: gomodules.xyz/jsonpatch/v2 Version: v2.5.0 License: Apache-2.0 -License URL: https://github.com/gomodules/jsonpatch/blob/v2.5.0/LICENSE +License URL: https://github.com/gomodules/jsonpatch/blob/v2.5.0/v2/LICENSE ---------- Module: google.golang.org/api From 5ca3a5bb09029ceb883eb150b073e4c18563403f Mon Sep 17 00:00:00 2001 From: Jasin Aferkou Date: Thu, 21 May 2026 10:36:06 +0200 Subject: [PATCH 3/5] allow namespace customization --- cli/cmd/install_openbao.go | 3 + internal/installer/openbao.go | 98 +++++++++++++++++++++--------- internal/installer/openbao_test.go | 74 +++++++++++++++++++--- 3 files changed, 136 insertions(+), 39 deletions(-) diff --git a/cli/cmd/install_openbao.go b/cli/cmd/install_openbao.go index 97ee7c94..4717fca8 100644 --- a/cli/cmd/install_openbao.go +++ b/cli/cmd/install_openbao.go @@ -33,6 +33,7 @@ type InstallOpenBaoCmd struct { // InstallOpenBaoOpts holds the CLI flags for the OpenBao installer. type InstallOpenBaoOpts struct { *GlobalOptions + Namespace string SecretsEngineName string BaoUsername string DRBackupPath string @@ -66,6 +67,7 @@ func (c *InstallOpenBaoCmd) RunE(_ *cobra.Command, _ []string) error { } cfg := installer.OpenBaoInstallerConfig{ + Namespace: c.Opts.Namespace, SecretsEngineName: c.Opts.SecretsEngineName, Username: c.Opts.BaoUsername, DRBackupPath: c.Opts.DRBackupPath, @@ -138,6 +140,7 @@ func AddInstallOpenBaoCmd(install *cobra.Command, opts *GlobalOptions) { }, Opts: &InstallOpenBaoOpts{GlobalOptions: opts}, } + openbao.cmd.Flags().StringVarP(&openbao.Opts.Namespace, "namespace", "n", installer.DefaultOpenBaoNamespace, "Kubernetes namespace for OpenBao deployment") openbao.cmd.Flags().StringVar(&openbao.Opts.SecretsEngineName, "secrets-engine", "cs-secrets-engine", "Name of the KV-v2 secrets engine to provision") openbao.cmd.Flags().StringVar(&openbao.Opts.BaoUsername, "bao-user", "admin", "Username for the userpass auth method (ignored on restore, uses DR backup value)") openbao.cmd.Flags().StringVar(&openbao.Opts.DRBackupPath, "dr-backup-path", "", "Path for SOPS-encrypted DR backup file (required)") diff --git a/internal/installer/openbao.go b/internal/installer/openbao.go index 8a7cd751..503eac63 100644 --- a/internal/installer/openbao.go +++ b/internal/installer/openbao.go @@ -31,20 +31,21 @@ import ( var vaultCRTemplate []byte const ( - openBaoUnsealSecretName = "openbao-unseal-keys" - openBaoNamespace = "vault" - openBaoImage = "quay.io/openbao/openbao:2.1.0" - bankVaultsImage = "ghcr.io/bank-vaults/bank-vaults:v1.31.3" - bankVaultsChartRepo = "oci://ghcr.io/bank-vaults/helm-charts" - bankVaultsChartName = "vault-operator" - bankVaultsChartVersion = "1.22.5" - defaultPasswordLength = 32 - pollInterval = 5 * time.Second - maxPollInterval = 30 * time.Second + openBaoUnsealSecretName = "openbao-unseal-keys" + DefaultOpenBaoNamespace = "vault" + openBaoImage = "quay.io/openbao/openbao:2.1.0" + bankVaultsImage = "ghcr.io/bank-vaults/bank-vaults:v1.31.3" + bankVaultsChartRepo = "oci://ghcr.io/bank-vaults/helm-charts" + bankVaultsChartName = "vault-operator" + bankVaultsChartVersion = "1.22.5" + defaultPasswordLength = 32 + pollInterval = 5 * time.Second + maxPollInterval = 30 * time.Second ) // OpenBaoInstallerConfig holds all configurable parameters for the OpenBao bootstrap. type OpenBaoInstallerConfig struct { + Namespace string SecretsEngineName string Username string DRBackupPath string @@ -79,7 +80,7 @@ type OpenBaoInstaller struct { // NewOpenBaoInstaller constructs an OpenBaoInstaller with real Kubernetes and Helm clients. func NewOpenBaoInstaller(cfg OpenBaoInstallerConfig) (*OpenBaoInstaller, error) { - helm, err := NewHelmClient(openBaoNamespace) + helm, err := NewHelmClient(cfg.Namespace) if err != nil { return nil, fmt.Errorf("creating helm client: %w", err) } @@ -101,6 +102,9 @@ func NewOpenBaoInstaller(cfg OpenBaoInstallerConfig) (*OpenBaoInstaller, error) const defaultTimeout = 5 * time.Minute func (o *OpenBaoInstaller) validateConfig() error { + if o.Config.Namespace == "" { + o.Config.Namespace = DefaultOpenBaoNamespace + } r := o.Config.Replicas if r < 1 { return fmt.Errorf("--replicas must be >= 1, got %d", r) @@ -168,6 +172,13 @@ func (o *OpenBaoInstaller) Install(ctx context.Context) error { } } + err = o.Logger.Step("Ensuring namespace exists", func() error { + return o.ensureNamespace(o.ctx) + }) + if err != nil { + return fmt.Errorf("failed to ensure namespace: %w", err) + } + err = o.Logger.Step("Applying Vault CR (OpenBao desired state)", o.ApplyVaultCR) if err != nil { return fmt.Errorf("failed to apply Vault CR: %w", err) @@ -251,18 +262,45 @@ func (o *OpenBaoInstaller) GeneratePassword() error { } // DeployBankVaultsOperator installs or upgrades the Bank-Vaults Operator Helm chart. -// This is idempotent via UpgradeChart with InstallIfNotExist. +// +// The operator is cluster-scoped (it creates ClusterRoles, ClusterRoleBindings) +// and watches Vault CRs across all namespaces. If the operator is already +// installed in a different namespace, we skip re-deployment — one instance +// is sufficient for the entire cluster. func (o *OpenBaoInstaller) DeployBankVaultsOperator() error { cfg := ChartConfig{ ReleaseName: "vault-operator", ChartName: bankVaultsChartRepo + "/" + bankVaultsChartName, Version: bankVaultsChartVersion, - Namespace: openBaoNamespace, + Namespace: o.Config.Namespace, CreateNamespace: true, Values: map[string]interface{}{}, } - return o.Helm.UpgradeChart(o.ctx, cfg, UpgradeChartOptions{InstallIfNotExist: true}) + // Check if the release already exists in the target namespace. + rel, err := o.Helm.FindRelease(o.Config.Namespace, cfg.ReleaseName) + if err != nil { + return err + } + if rel != nil { + // Release exists in target namespace — upgrade in place. + return o.Helm.UpgradeChart(o.ctx, cfg, UpgradeChartOptions{}) + } + + // Release not found in target namespace. Check if the operator is already + // deployed cluster-wide (in another namespace) by looking for its ClusterRole. + _, err = o.Clientset.RbacV1().ClusterRoles().Get(o.ctx, "vault-operator", metav1.GetOptions{}) + if err == nil { + // Operator already installed in another namespace — skip. + o.Logger.Logf("Bank-Vaults Operator already installed in the cluster, skipping deployment") + return nil + } + if !k8serrors.IsNotFound(err) { + return fmt.Errorf("checking for existing vault-operator ClusterRole: %w", err) + } + + // Operator does not exist — perform fresh install. + return o.Helm.InstallChart(o.ctx, cfg) } // vaultCRTemplateData holds the values injected into the Vault CR template. @@ -291,12 +329,12 @@ func (o *OpenBaoInstaller) ApplyVaultCR() error { // later only requires changing the replica count. var retryJoinAddrs []string for i := 0; i < o.Config.Replicas; i++ { - addr := fmt.Sprintf("http://openbao-%d.%s.svc.cluster.local:8200", i, openBaoNamespace) + addr := fmt.Sprintf("http://openbao-%d.%s.svc.cluster.local:8200", i, o.Config.Namespace) retryJoinAddrs = append(retryJoinAddrs, addr) } data := vaultCRTemplateData{ - Namespace: openBaoNamespace, + Namespace: o.Config.Namespace, OpenBaoImage: openBaoImage, BankVaultsImage: bankVaultsImage, SecretsEngineName: o.Config.SecretsEngineName, @@ -339,7 +377,7 @@ func (o *OpenBaoInstaller) ApplyVaultCR() error { // secret during Vault CR reconciliation — we simply re-apply it until the // operator settles and the sidecar can successfully unseal. func (o *OpenBaoInstaller) WaitForInitialization() error { - secretsClient := o.Clientset.CoreV1().Secrets(openBaoNamespace) + secretsClient := o.Clientset.CoreV1().Secrets(o.Config.Namespace) return o.pollUntil("waiting for openbao-unseal-keys to be populated", func() (bool, error) { secret, err := secretsClient.Get(o.ctx, openBaoUnsealSecretName, metav1.GetOptions{}) @@ -387,7 +425,7 @@ func (o *OpenBaoInstaller) ensureUnsealSecret(secretsClient corev1client.SecretI secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: openBaoUnsealSecretName, - Namespace: openBaoNamespace, + Namespace: o.Config.Namespace, }, Data: o.backupUnsealKeys, } @@ -415,7 +453,7 @@ func (o *OpenBaoInstaller) WaitForPodsReady() error { expected := o.Config.Replicas return o.pollUntil("waiting for all OpenBao pods to be ready", func() (bool, error) { - list, err := o.Clientset.CoreV1().Pods(openBaoNamespace).List(o.ctx, metav1.ListOptions{ + list, err := o.Clientset.CoreV1().Pods(o.Config.Namespace).List(o.ctx, metav1.ListOptions{ LabelSelector: selector, }) if err != nil { @@ -512,7 +550,7 @@ func (o *OpenBaoInstaller) CleanStaleInstallState() error { vaultGVR := k8s.VaultGVR() // Tolerates NotFound — this may be a first-time install with no prior Vault CR. - delErr := o.DynClient.Resource(vaultGVR).Namespace(openBaoNamespace).Delete( + delErr := o.DynClient.Resource(vaultGVR).Namespace(o.Config.Namespace).Delete( o.ctx, "openbao", metav1.DeleteOptions{}, ) if delErr != nil && !k8serrors.IsNotFound(delErr) { @@ -525,14 +563,14 @@ func (o *OpenBaoInstaller) CleanStaleInstallState() error { // Delete PVCs associated with the prior StatefulSet so that stale Raft // data does not cause OpenBao to report as "initialized" on a fresh install. - pvcList, err := o.Clientset.CoreV1().PersistentVolumeClaims(openBaoNamespace).List( + pvcList, err := o.Clientset.CoreV1().PersistentVolumeClaims(o.Config.Namespace).List( o.ctx, metav1.ListOptions{LabelSelector: "vault_cr=openbao"}, ) if err != nil { return fmt.Errorf("listing stale PVCs: %w", err) } for i := range pvcList.Items { - delErr = o.Clientset.CoreV1().PersistentVolumeClaims(openBaoNamespace).Delete( + delErr = o.Clientset.CoreV1().PersistentVolumeClaims(o.Config.Namespace).Delete( o.ctx, pvcList.Items[i].Name, metav1.DeleteOptions{}, ) if delErr != nil && !k8serrors.IsNotFound(delErr) { @@ -544,7 +582,7 @@ func (o *OpenBaoInstaller) CleanStaleInstallState() error { } // Now it is safe to delete the stale secret. - delErr = o.Clientset.CoreV1().Secrets(openBaoNamespace).Delete( + delErr = o.Clientset.CoreV1().Secrets(o.Config.Namespace).Delete( o.ctx, openBaoUnsealSecretName, metav1.DeleteOptions{}, ) if delErr != nil && !k8serrors.IsNotFound(delErr) { @@ -561,7 +599,7 @@ func (o *OpenBaoInstaller) CleanStaleInstallState() error { // re-install where the user may have supplied the wrong DR backup path. func (o *OpenBaoInstaller) hasExistingDeployment() (bool, error) { vaultGVR := k8s.VaultGVR() - _, err := o.DynClient.Resource(vaultGVR).Namespace(openBaoNamespace).Get( + _, err := o.DynClient.Resource(vaultGVR).Namespace(o.Config.Namespace).Get( o.ctx, "openbao", metav1.GetOptions{}, ) if err == nil { @@ -572,7 +610,7 @@ func (o *OpenBaoInstaller) hasExistingDeployment() (bool, error) { } // Vault CR gone but PVCs may linger (e.g. CR was manually deleted). - pvcList, err := o.Clientset.CoreV1().PersistentVolumeClaims(openBaoNamespace).List( + pvcList, err := o.Clientset.CoreV1().PersistentVolumeClaims(o.Config.Namespace).List( o.ctx, metav1.ListOptions{LabelSelector: "vault_cr=openbao"}, ) if err != nil { @@ -587,7 +625,7 @@ func (o *OpenBaoInstaller) waitForVaultPodsGone() error { selector := labels.SelectorFromSet(labels.Set{"vault_cr": "openbao"}).String() return o.pollUntil("waiting for vault pods to terminate", func() (bool, error) { - list, err := o.Clientset.CoreV1().Pods(openBaoNamespace).List(o.ctx, metav1.ListOptions{ + list, err := o.Clientset.CoreV1().Pods(o.Config.Namespace).List(o.ctx, metav1.ListOptions{ LabelSelector: selector, }) if err != nil { @@ -634,18 +672,18 @@ func (o *OpenBaoInstaller) pollUntil(timeoutMsg string, check func() (bool, erro func (o *OpenBaoInstaller) ensureNamespace(ctx context.Context) error { ns := &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ - Name: openBaoNamespace, + Name: o.Config.Namespace, }, } - _, err := o.Clientset.CoreV1().Namespaces().Get(ctx, openBaoNamespace, metav1.GetOptions{}) + _, err := o.Clientset.CoreV1().Namespaces().Get(ctx, o.Config.Namespace, metav1.GetOptions{}) if err != nil { if !k8serrors.IsNotFound(err) { - return fmt.Errorf("checking namespace %s: %w", openBaoNamespace, err) + return fmt.Errorf("checking namespace %s: %w", o.Config.Namespace, err) } _, err = o.Clientset.CoreV1().Namespaces().Create(ctx, ns, metav1.CreateOptions{}) if err != nil && !k8serrors.IsAlreadyExists(err) { - return fmt.Errorf("creating namespace %s: %w", openBaoNamespace, err) + return fmt.Errorf("creating namespace %s: %w", o.Config.Namespace, err) } } return nil diff --git a/internal/installer/openbao_test.go b/internal/installer/openbao_test.go index 0655cb97..1053e9e2 100644 --- a/internal/installer/openbao_test.go +++ b/internal/installer/openbao_test.go @@ -21,6 +21,7 @@ import ( . "github.com/onsi/gomega" "github.com/stretchr/testify/mock" corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/yaml" "k8s.io/client-go/kubernetes/fake" @@ -49,20 +50,48 @@ var _ = Describe("OpenBaoInstaller", func() { }) Describe("Install — deploy Bank-Vaults Operator", func() { - It("calls UpgradeChart with InstallIfNotExist for the operator", func() { - helmMock.EXPECT().UpgradeChart(mock.Anything, mock.MatchedBy(func(cfg installer.ChartConfig) bool { + It("performs fresh install when operator does not exist", func() { + // FindRelease returns nil (no existing release in target namespace) + helmMock.EXPECT().FindRelease("vault", "vault-operator").Return(nil, nil) + + // No ClusterRole exists (fake clientset has nothing), so InstallChart is called + helmMock.EXPECT().InstallChart(mock.Anything, mock.MatchedBy(func(cfg installer.ChartConfig) bool { return cfg.ReleaseName == "vault-operator" && cfg.ChartName == "oci://ghcr.io/bank-vaults/helm-charts/vault-operator" && cfg.Version == "1.22.5" && cfg.Namespace == "vault" && cfg.CreateNamespace == true - }), installer.UpgradeChartOptions{InstallIfNotExist: true}).Return(nil) + })).Return(nil) + + inst := &installer.OpenBaoInstaller{ + Helm: helmMock, + Clientset: clientset, + Logger: bootstrap.NewStepLogger(true), + Config: installer.OpenBaoInstallerConfig{Namespace: "vault"}, + } + inst.SetCtx(ctx) + + err := inst.DeployBankVaultsOperator() + Expect(err).ToNot(HaveOccurred()) + }) + + It("upgrades when release already exists in target namespace", func() { + // FindRelease returns an existing release + helmMock.EXPECT().FindRelease("vault", "vault-operator").Return(&installer.ReleaseInfo{ + Name: "vault-operator", + InstalledVersion: "1.22.0", + }, nil) + + helmMock.EXPECT().UpgradeChart(mock.Anything, mock.MatchedBy(func(cfg installer.ChartConfig) bool { + return cfg.ReleaseName == "vault-operator" && + cfg.Namespace == "vault" + }), installer.UpgradeChartOptions{}).Return(nil) inst := &installer.OpenBaoInstaller{ Helm: helmMock, Clientset: clientset, Logger: bootstrap.NewStepLogger(true), - Config: installer.OpenBaoInstallerConfig{}, + Config: installer.OpenBaoInstallerConfig{Namespace: "vault"}, } inst.SetCtx(ctx) @@ -70,15 +99,40 @@ var _ = Describe("OpenBaoInstaller", func() { Expect(err).ToNot(HaveOccurred()) }) - It("returns an error when Helm fails", func() { - helmMock.EXPECT().UpgradeChart(mock.Anything, mock.Anything, mock.Anything). + It("skips deployment when operator exists in another namespace", func() { + // FindRelease returns nil (not in target namespace) + helmMock.EXPECT().FindRelease("second", "vault-operator").Return(nil, nil) + + // Pre-create the ClusterRole to simulate operator installed elsewhere + cr := &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{Name: "vault-operator"}, + } + _, err := clientset.RbacV1().ClusterRoles().Create(ctx, cr, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + inst := &installer.OpenBaoInstaller{ + Helm: helmMock, + Clientset: clientset, + Logger: bootstrap.NewStepLogger(true), + Config: installer.OpenBaoInstallerConfig{Namespace: "second"}, + } + inst.SetCtx(ctx) + + // Should not call InstallChart or UpgradeChart + err = inst.DeployBankVaultsOperator() + Expect(err).ToNot(HaveOccurred()) + }) + + It("returns an error when Helm InstallChart fails", func() { + helmMock.EXPECT().FindRelease("vault", "vault-operator").Return(nil, nil) + helmMock.EXPECT().InstallChart(mock.Anything, mock.Anything). Return(fmt.Errorf("chart not found")) inst := &installer.OpenBaoInstaller{ Helm: helmMock, Clientset: clientset, Logger: bootstrap.NewStepLogger(true), - Config: installer.OpenBaoInstallerConfig{}, + Config: installer.OpenBaoInstallerConfig{Namespace: "vault"}, } inst.SetCtx(ctx) @@ -148,7 +202,8 @@ var _ = Describe("OpenBaoInstaller", func() { Clientset: clientset, Logger: bootstrap.NewStepLogger(true), Config: installer.OpenBaoInstallerConfig{ - Timeout: 5 * time.Second, + Namespace: "vault", + Timeout: 5 * time.Second, }, } inst.SetCtx(ctx) @@ -164,7 +219,8 @@ var _ = Describe("OpenBaoInstaller", func() { Clientset: clientset, Logger: bootstrap.NewStepLogger(true), Config: installer.OpenBaoInstallerConfig{ - Timeout: 1 * time.Second, + Namespace: "vault", + Timeout: 1 * time.Second, }, } inst.SetCtx(ctx) From 49b88f12815d1e1b75a463d9a3f4adfb69331516 Mon Sep 17 00:00:00 2001 From: Jcing95 <23337729+Jcing95@users.noreply.github.com> Date: Thu, 21 May 2026 08:37:28 +0000 Subject: [PATCH 4/5] chore(docs): Auto-update docs and licenses Signed-off-by: Jcing95 <23337729+Jcing95@users.noreply.github.com> --- docs/oms_install_openbao.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/oms_install_openbao.md b/docs/oms_install_openbao.md index b418a7b1..35c2b544 100644 --- a/docs/oms_install_openbao.md +++ b/docs/oms_install_openbao.md @@ -41,6 +41,7 @@ $ oms install openbao --dr-backup-path ./backups/cluster-1.enc.json --timeout 10 --bao-user string Username for the userpass auth method (ignored on restore, uses DR backup value) (default "admin") --dr-backup-path string Path for SOPS-encrypted DR backup file (required) -h, --help help for openbao + -n, --namespace string Kubernetes namespace for OpenBao deployment (default "vault") --replicas int Number of OpenBao replicas (1 for single-node, odd number >= 3 for HA) (default 1) --secrets-engine string Name of the KV-v2 secrets engine to provision (default "cs-secrets-engine") --storage-size string PVC storage size for each OpenBao replica (default "10Gi") From f69ef196738bdb866d1d48d3978da1dff2874d99 Mon Sep 17 00:00:00 2001 From: Jasin Aferkou Date: Thu, 21 May 2026 11:11:57 +0200 Subject: [PATCH 5/5] fix lint --- cli/cmd/install_openbao.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cli/cmd/install_openbao.go b/cli/cmd/install_openbao.go index 4717fca8..35a082cb 100644 --- a/cli/cmd/install_openbao.go +++ b/cli/cmd/install_openbao.go @@ -52,7 +52,9 @@ func (c *InstallOpenBaoCmd) RunE(_ *cobra.Command, _ []string) error { // If --age-key-file is provided, set SOPS_AGE_KEY_FILE so ResolveAgeKey // picks it up. Otherwise, fall back to the normal auto-discovery chain. if c.Opts.AgeKeyFile != "" { - os.Setenv("SOPS_AGE_KEY_FILE", c.Opts.AgeKeyFile) + if err := os.Setenv("SOPS_AGE_KEY_FILE", c.Opts.AgeKeyFile); err != nil { + return fmt.Errorf("setting SOPS_AGE_KEY_FILE: %w", err) + } } configDir, err := os.UserConfigDir()