diff --git a/tests/e2e/lib/backup_cli.go b/tests/e2e/lib/backup_cli.go index 217470474b..003e530771 100644 --- a/tests/e2e/lib/backup_cli.go +++ b/tests/e2e/lib/backup_cli.go @@ -5,6 +5,7 @@ import ( "fmt" "log" "strings" + "time" velero "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "k8s.io/apimachinery/pkg/util/wait" @@ -191,8 +192,14 @@ func IsBackupCompletedSuccessfullyViaCLI(name string) (bool, error) { ) } -// DescribeBackupViaCLI describes backup using the OADP CLI +// DescribeBackupViaCLI describes backup using the OADP CLI with default timeout and retry. +// The timeout prevents the command from hanging when retrieving backup details from object storage. func DescribeBackupViaCLI(name string) (backupDescription string) { + return DescribeBackupViaCLIWithOptions(name, DefaultCLITimeout, DefaultCLIRetries, DefaultCLIRetryDelay) +} + +// DescribeBackupViaCLIWithOptions describes backup using the OADP CLI with specified timeout and retry options. +func DescribeBackupViaCLIWithOptions(name string, timeout time.Duration, maxRetries int, retryDelay time.Duration) (backupDescription string) { // Use CLI to describe backup cmd := &CLICommand{ Resource: "backup", @@ -200,7 +207,16 @@ func DescribeBackupViaCLI(name string) (backupDescription string) { Name: name, Options: []string{"--details"}, } - output, err := cmd.Execute() + + var output []byte + var err error + + if maxRetries > 1 { + output, err = cmd.ExecuteWithTimeoutAndRetry(timeout, maxRetries, retryDelay) + } else { + output, err = cmd.ExecuteWithTimeout(timeout) + } + if err != nil { return fmt.Sprintf("could not describe backup via CLI: %v, output: %s", err, string(output)) } @@ -208,20 +224,41 @@ func DescribeBackupViaCLI(name string) (backupDescription string) { return string(output) } -// BackupLogsViaCLI gets backup logs using the OADP CLI +// BackupLogsViaCLI gets backup logs using the OADP CLI with default timeout and retry. +// The timeout prevents the command from hanging indefinitely when streaming logs from object storage. +// Retry logic helps handle transient network issues. func BackupLogsViaCLI(name string) (backupLogs string, err error) { + return BackupLogsViaCLIWithOptions(name, DefaultCLITimeout, DefaultCLIRetries, DefaultCLIRetryDelay) +} + +// BackupLogsViaCLIWithTimeout gets backup logs using the OADP CLI with a specified timeout (no retry). +func BackupLogsViaCLIWithTimeout(name string, timeout time.Duration) (backupLogs string, err error) { + return BackupLogsViaCLIWithOptions(name, timeout, 1, 0) +} + +// BackupLogsViaCLIWithOptions gets backup logs using the OADP CLI with specified timeout and retry options. +func BackupLogsViaCLIWithOptions(name string, timeout time.Duration, maxRetries int, retryDelay time.Duration) (backupLogs string, err error) { if name == "" { return "", fmt.Errorf("backup name cannot be empty") } - // Use CLI to get backup logs + // Use CLI to get backup logs with timeout and retry to prevent hanging cmd := &CLICommand{ Resource: "backup", Action: "logs", Name: name, Options: []string{}, } - output, cmdErr := cmd.ExecuteOutput() + + var output []byte + var cmdErr error + + if maxRetries > 1 { + output, cmdErr = cmd.ExecuteOutputWithTimeoutAndRetry(timeout, maxRetries, retryDelay) + } else { + output, cmdErr = cmd.ExecuteOutputWithTimeout(timeout) + } + if cmdErr != nil { return "", fmt.Errorf("failed to get backup logs via CLI: %v", cmdErr) } diff --git a/tests/e2e/lib/cli_common.go b/tests/e2e/lib/cli_common.go index eb54aa274f..eb19741776 100644 --- a/tests/e2e/lib/cli_common.go +++ b/tests/e2e/lib/cli_common.go @@ -1,12 +1,23 @@ package lib import ( + "context" "fmt" "log" "os" "os/exec" "path/filepath" "strings" + "time" +) + +// Default timeout for CLI commands that may hang (e.g., log streaming) +const DefaultCLITimeout = 5 * time.Minute + +// Default retry settings for CLI commands +const ( + DefaultCLIRetries = 3 + DefaultCLIRetryDelay = 10 * time.Second ) type CLICommand struct { @@ -14,6 +25,7 @@ type CLICommand struct { Action string // "create", "get", "delete", etc. Name string Options []string + Timeout time.Duration // Optional timeout for commands that may hang } func (c *CLICommand) Execute() ([]byte, error) { @@ -40,6 +52,103 @@ func (c *CLICommand) ExecuteOutput() ([]byte, error) { return cmd.Output() } +// ExecuteWithTimeout executes the CLI command with a timeout. +// If the timeout is exceeded, the command is killed and an error is returned. +func (c *CLICommand) ExecuteWithTimeout(timeout time.Duration) ([]byte, error) { + args := []string{"oadp", c.Resource, c.Action} + if c.Name != "" { + args = append(args, c.Name) + } + args = append(args, c.Options...) + + c.LogCLICommand() + + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + cmd := exec.CommandContext(ctx, "kubectl", args...) + output, err := cmd.CombinedOutput() + + if ctx.Err() == context.DeadlineExceeded { + return output, fmt.Errorf("command timed out after %v: kubectl %s", timeout, strings.Join(args, " ")) + } + + return output, err +} + +// ExecuteOutputWithTimeout executes the CLI command with a timeout and returns stdout only. +// If the timeout is exceeded, the command is killed and an error is returned. +func (c *CLICommand) ExecuteOutputWithTimeout(timeout time.Duration) ([]byte, error) { + args := []string{"oadp", c.Resource, c.Action} + if c.Name != "" { + args = append(args, c.Name) + } + args = append(args, c.Options...) + + c.LogCLICommand() + + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + cmd := exec.CommandContext(ctx, "kubectl", args...) + output, err := cmd.Output() + + if ctx.Err() == context.DeadlineExceeded { + return output, fmt.Errorf("command timed out after %v: kubectl %s", timeout, strings.Join(args, " ")) + } + + return output, err +} + +// ExecuteOutputWithTimeoutAndRetry executes the CLI command with a timeout and retry logic. +// It retries the command up to maxRetries times with a delay between attempts. +// This is useful for commands that may fail due to transient issues (e.g., network problems). +func (c *CLICommand) ExecuteOutputWithTimeoutAndRetry(timeout time.Duration, maxRetries int, retryDelay time.Duration) ([]byte, error) { + var lastErr error + var lastOutput []byte + + for attempt := 1; attempt <= maxRetries; attempt++ { + output, err := c.ExecuteOutputWithTimeout(timeout) + if err == nil { + return output, nil + } + + lastErr = err + lastOutput = output + + if attempt < maxRetries { + log.Printf("CLI command failed (attempt %d/%d): %v. Retrying in %v...", attempt, maxRetries, err, retryDelay) + time.Sleep(retryDelay) + } + } + + return lastOutput, fmt.Errorf("CLI command failed after %d attempts: %v", maxRetries, lastErr) +} + +// ExecuteWithTimeoutAndRetry executes the CLI command with a timeout and retry logic. +// It retries the command up to maxRetries times with a delay between attempts. +func (c *CLICommand) ExecuteWithTimeoutAndRetry(timeout time.Duration, maxRetries int, retryDelay time.Duration) ([]byte, error) { + var lastErr error + var lastOutput []byte + + for attempt := 1; attempt <= maxRetries; attempt++ { + output, err := c.ExecuteWithTimeout(timeout) + if err == nil { + return output, nil + } + + lastErr = err + lastOutput = output + + if attempt < maxRetries { + log.Printf("CLI command failed (attempt %d/%d): %v. Retrying in %v...", attempt, maxRetries, err, retryDelay) + time.Sleep(retryDelay) + } + } + + return lastOutput, fmt.Errorf("CLI command failed after %d attempts: %v", maxRetries, lastErr) +} + func (c *CLICommand) LogCLICommand() { args := []string{"kubectl", "oadp", c.Resource, c.Action} if c.Name != "" { diff --git a/tests/e2e/lib/restore_cli.go b/tests/e2e/lib/restore_cli.go index ae70afdc79..0c252d3e87 100644 --- a/tests/e2e/lib/restore_cli.go +++ b/tests/e2e/lib/restore_cli.go @@ -5,6 +5,7 @@ import ( "fmt" "log" "strings" + "time" velero "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "k8s.io/apimachinery/pkg/util/wait" @@ -166,8 +167,14 @@ func IsRestoreCompletedSuccessfullyViaCLI(name string) (bool, error) { ) } -// DescribeRestoreViaCLI describes restore using the OADP CLI +// DescribeRestoreViaCLI describes restore using the OADP CLI with default timeout and retry. +// The timeout prevents the command from hanging when retrieving restore details from object storage. func DescribeRestoreViaCLI(name string) string { + return DescribeRestoreViaCLIWithOptions(name, DefaultCLITimeout, DefaultCLIRetries, DefaultCLIRetryDelay) +} + +// DescribeRestoreViaCLIWithOptions describes restore using the OADP CLI with specified timeout and retry options. +func DescribeRestoreViaCLIWithOptions(name string, timeout time.Duration, maxRetries int, retryDelay time.Duration) string { // Use CLI to describe restore cmd := &CLICommand{ Resource: "restore", @@ -175,7 +182,16 @@ func DescribeRestoreViaCLI(name string) string { Name: name, Options: []string{"--details"}, } - output, err := cmd.Execute() + + var output []byte + var err error + + if maxRetries > 1 { + output, err = cmd.ExecuteWithTimeoutAndRetry(timeout, maxRetries, retryDelay) + } else { + output, err = cmd.ExecuteWithTimeout(timeout) + } + if err != nil { return fmt.Sprintf("could not describe restore via CLI: %v, output: %s", err, string(output)) } @@ -183,20 +199,41 @@ func DescribeRestoreViaCLI(name string) string { return string(output) } -// RestoreLogsViaCLI gets restore logs using the OADP CLI +// RestoreLogsViaCLI gets restore logs using the OADP CLI with default timeout and retry. +// The timeout prevents the command from hanging indefinitely when streaming logs from object storage. +// Retry logic helps handle transient network issues. func RestoreLogsViaCLI(name string) (restoreLogs string, err error) { + return RestoreLogsViaCLIWithOptions(name, DefaultCLITimeout, DefaultCLIRetries, DefaultCLIRetryDelay) +} + +// RestoreLogsViaCLIWithTimeout gets restore logs using the OADP CLI with a specified timeout (no retry). +func RestoreLogsViaCLIWithTimeout(name string, timeout time.Duration) (restoreLogs string, err error) { + return RestoreLogsViaCLIWithOptions(name, timeout, 1, 0) +} + +// RestoreLogsViaCLIWithOptions gets restore logs using the OADP CLI with specified timeout and retry options. +func RestoreLogsViaCLIWithOptions(name string, timeout time.Duration, maxRetries int, retryDelay time.Duration) (restoreLogs string, err error) { if name == "" { return "", fmt.Errorf("restore name cannot be empty") } - // Use CLI to get restore logs + // Use CLI to get restore logs with timeout and retry to prevent hanging cmd := &CLICommand{ Resource: "restore", Action: "logs", Name: name, Options: []string{}, } - output, cmdErr := cmd.ExecuteOutput() + + var output []byte + var cmdErr error + + if maxRetries > 1 { + output, cmdErr = cmd.ExecuteOutputWithTimeoutAndRetry(timeout, maxRetries, retryDelay) + } else { + output, cmdErr = cmd.ExecuteOutputWithTimeout(timeout) + } + if cmdErr != nil { return "", fmt.Errorf("failed to get restore logs via CLI: %v", cmdErr) }