Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions manifests/postgresql.crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3246,6 +3246,13 @@ spec:
- name
type: object
type: array
lifecycle:
description: LifecycleSpec describes the lifecycle state of a Postgres
cluster.
properties:
phase:
type: string
type: object
logicalBackupRetention:
type: string
logicalBackupSchedule:
Expand Down Expand Up @@ -4197,6 +4204,9 @@ spec:
properties:
PostgresClusterStatus:
type: string
previousNumberOfInstances:
format: int32
type: integer
required:
- PostgresClusterStatus
type: object
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/acid.zalan.do/v1/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ const (
ClusterStatusSyncFailed = "SyncFailed"
ClusterStatusAddFailed = "CreateFailed"
ClusterStatusRunning = "Running"
ClusterStatusStopping = "Stopping"
ClusterStatusStopped = "Stopped"
ClusterStatusInvalid = "Invalid"
)

Expand Down
10 changes: 10 additions & 0 deletions pkg/apis/acid.zalan.do/v1/postgresql.crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3246,6 +3246,13 @@ spec:
- name
type: object
type: array
lifecycle:
description: LifecycleSpec describes the lifecycle state of a Postgres
cluster.
properties:
phase:
type: string
type: object
logicalBackupRetention:
type: string
logicalBackupSchedule:
Expand Down Expand Up @@ -4197,6 +4204,9 @@ spec:
properties:
PostgresClusterStatus:
type: string
previousNumberOfInstances:
format: int32
type: integer
required:
- PostgresClusterStatus
type: object
Expand Down
9 changes: 8 additions & 1 deletion pkg/apis/acid.zalan.do/v1/postgresql_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ type PostgresSpec struct {
TLS *TLSDescription `json:"tls,omitempty"`
AdditionalVolumes []AdditionalVolume `json:"additionalVolumes,omitempty"`
Streams []Stream `json:"streams,omitempty"`
Lifecycle *LifecycleSpec `json:"lifecycle,omitempty"`
Env []v1.EnvVar `json:"env,omitempty"`

// deprecated
Expand Down Expand Up @@ -257,6 +258,11 @@ type StandbyDescription struct {
StandbyPrimarySlotName string `json:"standby_primary_slot_name,omitempty"`
}

// LifecycleSpec describes the lifecycle state of a Postgres cluster.
type LifecycleSpec struct {
Phase string `json:"phase,omitempty"`
}

// TLSDescription specs TLS properties
type TLSDescription struct {
// +required
Expand Down Expand Up @@ -302,7 +308,8 @@ type UserFlags []string

// PostgresStatus contains status of the PostgreSQL cluster (running, creation failed etc.)
type PostgresStatus struct {
PostgresClusterStatus string `json:"PostgresClusterStatus"`
PostgresClusterStatus string `json:"PostgresClusterStatus"`
PreviousNumberOfInstances int32 `json:"previousNumberOfInstances,omitempty"`
}

// ConnectionPooler Options for connection pooler
Expand Down
10 changes: 10 additions & 0 deletions pkg/apis/acid.zalan.do/v1/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,16 @@ func (postgresStatus PostgresStatus) Creating() bool {
return postgresStatus.PostgresClusterStatus == ClusterStatusCreating
}

// Stopping status of cluster
func (postgresStatus PostgresStatus) Stopping() bool {
return postgresStatus.PostgresClusterStatus == ClusterStatusStopping
}

// Stopped status of cluster
func (postgresStatus PostgresStatus) Stopped() bool {
return postgresStatus.PostgresClusterStatus == ClusterStatusStopped
}

func (postgresStatus PostgresStatus) String() string {
return postgresStatus.PostgresClusterStatus
}
21 changes: 21 additions & 0 deletions pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

120 changes: 119 additions & 1 deletion pkg/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -1008,13 +1008,31 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
c.mu.Lock()
defer c.mu.Unlock()

// Block all spec changes when cluster is stopped or stopping
blocked, err := c.blockLifecycleUpdate(newSpec)
if err != nil {
return err
}
if blocked {
return nil
}

newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusUpdating

newSpec, err := c.KubeClient.SetPostgresCRDStatus(c.clusterName(), newSpec)
newSpec, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), newSpec)
if err != nil {
return fmt.Errorf("could not set cluster status to updating: %w", err)
}

// Handle lifecycle transitions (hibernate/wake-up)
handled, err := c.handleHibernateAndWakeUp(newSpec)
if err != nil {
return err
}
if handled {
return nil
}

if !c.isInMaintenanceWindow(newSpec.Spec.MaintenanceWindows) {
// do not apply any major version related changes yet
newSpec.Spec.PostgresqlParam.PgVersion = oldSpec.Spec.PostgresqlParam.PgVersion
Expand Down Expand Up @@ -1220,6 +1238,106 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
return nil
}

// blockLifecycleUpdate checks if an update should be blocked due to lifecycle state.
// Returns (blocked bool, err error):
// - (true, nil) if update is blocked and caller should return early
// - (false, nil) if update can proceed
// - (false, error) on error
func (c *Cluster) blockLifecycleUpdate(newSpec *acidv1.Postgresql) (bool, error) {
if !c.Status.Stopped() && !c.Status.Stopping() {
return false, nil
}

lifecyclePhase := ""
if newSpec.Spec.Lifecycle != nil {
lifecyclePhase = newSpec.Spec.Lifecycle.Phase
}

// During Stopping: block ALL spec changes (no cancellation allowed)
if c.Status.Stopping() {
return true, fmt.Errorf("cannot update cluster while it is stopping. Wait for it to fully stop first")
}

// During Stopped: only block if keeping lifecycle.phase="stopped"
if lifecyclePhase == "stopped" {
return true, fmt.Errorf("cannot update cluster while stopped. Remove lifecycle.phase to wake up the cluster")
}

return false, nil
}

// handleHibernateAndWakeUp processes lifecycle hibernate/wake-up transitions.
// Returns (handled bool, err error):
// - (true, nil) if lifecycle transition was handled, Update() should return early
// - (false, nil) if no lifecycle transition, normal update continues
// - (false, error) on error
func (c *Cluster) handleHibernateAndWakeUp(newSpec *acidv1.Postgresql) (bool, error) {
// === INITIATE HIBERNATE: Running -> Stopping ===
if c.Status.Running() && newSpec.Spec.Lifecycle != nil && newSpec.Spec.Lifecycle.Phase == "stopped" {
c.logger.Infof("[lifecycle] initiating hibernate for cluster %s: current numberOfInstances=%d", c.Name, c.Spec.NumberOfInstances)

// Store previousNumberOfInstances BEFORE setting numberOfInstances to 0
newSpec.Status.PreviousNumberOfInstances = c.Spec.NumberOfInstances
newSpec.Spec.NumberOfInstances = 0
newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusStopping

c.logger.Infof("[lifecycle] hibernate initiated: setting numberOfInstances=0, previousNumberOfInstances=%d", newSpec.Status.PreviousNumberOfInstances)

// Update spec first (Update only updates spec when CR has status subresource)
pgUpdated, err := c.KubeClient.UpdatePostgresCR(c.clusterName(), newSpec)
if err != nil {
return false, fmt.Errorf("could not update spec during hibernate: %w", err)
}
c.logger.Infof("[lifecycle] hibernate: spec updated successfully")

// Update status separately - preserve status values since UpdatePostgresCR returns object with status zeroed
pgUpdated.Status.PreviousNumberOfInstances = newSpec.Status.PreviousNumberOfInstances
pgUpdated.Status.PostgresClusterStatus = newSpec.Status.PostgresClusterStatus

pgUpdated, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), pgUpdated)
if err != nil {
return false, fmt.Errorf("could not update status during hibernate: %w", err)
}
c.logger.Infof("[lifecycle] hibernate: status updated successfully, previousNumberOfInstances=%d", pgUpdated.Status.PreviousNumberOfInstances)

c.setSpec(pgUpdated)
return true, nil
}

// === WAKE-UP: Stopped -> Running ===
if c.Status.Stopped() && (newSpec.Spec.Lifecycle == nil || newSpec.Spec.Lifecycle.Phase != "stopped") {
if newSpec.Status.PreviousNumberOfInstances > 0 {
c.logger.Infof("[lifecycle] waking up cluster %s: restoring numberOfInstances=%d", c.Name, newSpec.Status.PreviousNumberOfInstances)

// Restore numberOfInstances from previousNumberOfInstances
newSpec.Spec.NumberOfInstances = newSpec.Status.PreviousNumberOfInstances
newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusUpdating

// Update spec first
pgUpdated, err := c.KubeClient.UpdatePostgresCR(c.clusterName(), newSpec)
if err != nil {
return false, fmt.Errorf("could not update spec during wake-up: %w", err)
}
c.logger.Infof("[lifecycle] wake-up: spec updated successfully")

// Update status separately, and clear previousNumberOfInstances after restore
pgUpdated.Status.PreviousNumberOfInstances = 0
pgUpdated.Status.PostgresClusterStatus = newSpec.Status.PostgresClusterStatus

pgUpdated, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), pgUpdated)
if err != nil {
return false, fmt.Errorf("could not update status during wake-up: %w", err)
}
c.logger.Infof("[lifecycle] wake-up: status updated successfully, previousNumberOfInstances cleared")

c.setSpec(pgUpdated)
return true, nil
}
}

return false, nil
}

func syncResources(a, b *v1.ResourceRequirements) bool {
for _, res := range []v1.ResourceName{
v1.ResourceCPU,
Expand Down
77 changes: 77 additions & 0 deletions pkg/cluster/lifecycle.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package cluster

import (
acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1"
)

// manageHibernateState manages cluster hibernate/wake-up state transitions.
// Returns true if sync should continue, false if it should return early.
//
// This function handles the following state transitions:
// - Running -> Stopping: When user sets lifecycle.phase = "stopped"
// - Stopping -> Stopped: When StatefulSet replicas reach 0
// - Stopped -> Updating: When user clears lifecycle.phase (wake-up)
// - Updating -> Running: Normal sync continues, defer sets final status
func (c *Cluster) manageHibernateState(oldSpec acidv1.Postgresql, newSpec *acidv1.Postgresql) bool {
// When Update() is called, it sets status=Updating before Sync() runs.
// So we need to check if oldSpec.Status was Stopped and newSpec is Updating
// with lifecycle cleared to properly detect wake-up.
isWakingUp := oldSpec.Status.Stopped() &&
newSpec.Status.PostgresClusterStatus == acidv1.ClusterStatusUpdating &&
(newSpec.Spec.Lifecycle == nil || newSpec.Spec.Lifecycle.Phase != "stopped")

// If lifecycle was cleared and we have previousNumberOfInstances and numberOfInstances is 0
isWakingUpSimple := newSpec.Spec.Lifecycle == nil || newSpec.Spec.Lifecycle.Phase != "stopped"
hasPreviousInstances := newSpec.Status.PreviousNumberOfInstances > 0
needsRestore := newSpec.Spec.NumberOfInstances == 0

// double verification of waking up
isWakingUp = isWakingUp || (isWakingUpSimple && hasPreviousInstances && needsRestore)

// === INITIATE HIBERNATE: Running -> Stopping ===
// Only initiate if not already stopping or stopped, and lifecycle.phase = "stopped"
if newSpec.Spec.Lifecycle != nil &&
newSpec.Spec.Lifecycle.Phase == "stopped" &&
!newSpec.Status.Stopping() &&
!newSpec.Status.Stopped() {

newSpec.Status.PreviousNumberOfInstances = newSpec.Spec.NumberOfInstances
newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusStopping
newSpec.Spec.NumberOfInstances = 0
c.logger.Infof("[lifecycle] cluster is going to hibernate, stored previous number of instances: %d",
newSpec.Status.PreviousNumberOfInstances)
return true
}

// === STOPPING -> STOPPED: Check actual StatefulSet replicas ===
// Only transition to Stopped when StatefulSet replicas have actually reached 0
if newSpec.Status.Stopping() {
if c.Statefulset != nil && *c.Statefulset.Spec.Replicas == 0 {
newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusStopped
c.logger.Infof("[lifecycle] cluster has stopped, all pods are terminated")
}
return true
}

// === WAKE-UP: Stopped/Updating -> Running ===
// Restore numberOfInstances from previousNumberOfInstances when waking up
if newSpec.Status.Stopped() || isWakingUp {
// Check if lifecycle.phase was cleared (user wants to wake up)
if isWakingUp || newSpec.Spec.Lifecycle == nil || newSpec.Spec.Lifecycle.Phase != "stopped" {
if newSpec.Status.PreviousNumberOfInstances > 0 {
newSpec.Spec.NumberOfInstances = newSpec.Status.PreviousNumberOfInstances
c.logger.Infof("[lifecycle] cluster is waking up, restoring number of instances: %d",
newSpec.Status.PreviousNumberOfInstances)
} else {
c.logger.Warningf("[lifecycle] cluster is waking up but previousNumberOfInstances is 0, cannot restore")
}
newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusUpdating
return true
}
// Still stopped and lifecycle.phase = "stopped", skip further sync
return false
}

return true
}

7 changes: 6 additions & 1 deletion pkg/cluster/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
if err != nil {
c.logger.Warningf("error while syncing cluster state: %v", err)
newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusSyncFailed
} else if !c.Status.Running() {
} else if !c.Status.Running() && !c.Status.Stopping() && !c.Status.Stopped() {
newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusRunning
}

Expand All @@ -65,6 +65,11 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
c.logger.Debugf("could not sync finalizers: %v", err)
}

// Handle lifecycle hibernate/wake-up state transitions
if !c.manageHibernateState(oldSpec, newSpec) {
return nil
}

if err = c.initUsers(); err != nil {
err = fmt.Errorf("could not init users: %v", err)
return err
Expand Down
10 changes: 10 additions & 0 deletions pkg/util/k8sutil/k8sutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,16 @@ func (client *KubernetesClient) SetPostgresCRDStatus(clusterName spec.Namespaced
return pg, nil
}

// UpdatePostgresCR of Postgres cluster (updates full resource including spec)
func (client *KubernetesClient) UpdatePostgresCR(clusterName spec.NamespacedName, pg *apiacidv1.Postgresql) (*apiacidv1.Postgresql, error) {
pg, err := client.PostgresqlsGetter.Postgresqls(clusterName.Namespace).Update(context.TODO(), pg, metav1.UpdateOptions{})
if err != nil {
return pg, fmt.Errorf("could not update PostgresCR: %v", err)
}

return pg, nil
}

// SetFinalizer of Postgres cluster
func (client *KubernetesClient) SetFinalizer(clusterName spec.NamespacedName, pg *apiacidv1.Postgresql, finalizers []string) (*apiacidv1.Postgresql, error) {
var (
Expand Down
Loading