@@ -14,6 +14,7 @@ import (
1414 "os/exec"
1515 "path/filepath"
1616 "reflect"
17+ "strconv"
1718 "strings"
1819 "time"
1920
@@ -88,6 +89,13 @@ func RegisterSteps(sc *godog.ScenarioContext) {
8889 sc .Step (`^(?i)resource apply fails with error msg containing "([^"]+)"$` , ResourceApplyFails )
8990 sc .Step (`^(?i)resource "([^"]+)" is eventually restored$` , ResourceRestored )
9091 sc .Step (`^(?i)resource "([^"]+)" matches$` , ResourceMatches )
92+ sc .Step (`^(?i)user performs rollout restart on "([^"]+)"$` , UserPerformsRolloutRestart )
93+ sc .Step (`^(?i)resource "([^"]+)" has restart annotation$` , ResourceHasRestartAnnotation )
94+ sc .Step (`^(?i)deployment "([^"]+)" is ready$` , DeploymentIsReady )
95+ sc .Step (`^(?i)deployment "([^"]+)" rollout completes successfully$` , DeploymentRolloutCompletesSuccessfully )
96+ sc .Step (`^(?i)I wait for "([^"]+)" seconds$` , WaitForSeconds )
97+ sc .Step (`^(?i)deployment "([^"]+)" rollout is still successful$` , DeploymentRolloutIsStillSuccessful )
98+ sc .Step (`^(?i)deployment "([^"]+)" has expected number of ready replicas$` , DeploymentHasExpectedReadyReplicas )
9199
92100 sc .Step (`^(?i)ServiceAccount "([^"]*)" with needed permissions is available in test namespace$` , ServiceAccountWithNeededPermissionsIsAvailableInNamespace )
93101 sc .Step (`^(?i)ServiceAccount "([^"]*)" with needed permissions is available in \${TEST_NAMESPACE}$` , ServiceAccountWithNeededPermissionsIsAvailableInNamespace )
@@ -1288,3 +1296,266 @@ func latestActiveRevisionForExtension(extName string) (*ocv1.ClusterExtensionRev
12881296
12891297 return latest , nil
12901298}
1299+
1300+ // UserPerformsRolloutRestart simulates a user running "kubectl rollout restart deployment/<name>".
1301+ // This adds a restart annotation to trigger a rolling restart of pods.
1302+ // This is used to test the generic fix - OLM should not undo ANY user-added annotations.
1303+ // In OLMv0, OLM would undo this change. In OLMv1, it should stay because kubectl owns it.
1304+ // See: https://github.com/operator-framework/operator-lifecycle-manager/issues/3392
1305+ func UserPerformsRolloutRestart (ctx context.Context , resourceName string ) error {
1306+ sc := scenarioCtx (ctx )
1307+ resourceName = substituteScenarioVars (resourceName , sc )
1308+
1309+ kind , deploymentName , ok := strings .Cut (resourceName , "/" )
1310+ if ! ok {
1311+ return fmt .Errorf ("invalid resource name format: %s (expected kind/name)" , resourceName )
1312+ }
1313+
1314+ if kind != "deployment" {
1315+ return fmt .Errorf ("only deployment resources are supported for restart annotation, got: %s" , kind )
1316+ }
1317+
1318+ // Run kubectl rollout restart to add the restart annotation.
1319+ // This is the real command users run, so we test actual user behavior.
1320+ out , err := k8sClient ("rollout" , "restart" , resourceName , "-n" , sc .namespace )
1321+ if err != nil {
1322+ return fmt .Errorf ("failed to rollout restart %s: %w" , resourceName , err )
1323+ }
1324+
1325+ logger .V (1 ).Info ("Rollout restart initiated" , "deployment" , deploymentName , "output" , out )
1326+
1327+ return nil
1328+ }
1329+
1330+ // ResourceHasRestartAnnotation checks that a deployment has a restart annotation.
1331+ // This confirms that user changes stay in place after OLM runs.
1332+ // The fix is generic and works for ANY user-added annotations on pod templates.
1333+ func ResourceHasRestartAnnotation (ctx context.Context , resourceName string ) error {
1334+ sc := scenarioCtx (ctx )
1335+ resourceName = substituteScenarioVars (resourceName , sc )
1336+
1337+ kind , deploymentName , ok := strings .Cut (resourceName , "/" )
1338+ if ! ok {
1339+ return fmt .Errorf ("invalid resource name format: %s (expected kind/name)" , resourceName )
1340+ }
1341+
1342+ if kind != "deployment" {
1343+ return fmt .Errorf ("only deployment resources are supported for restart annotation check, got: %s" , kind )
1344+ }
1345+
1346+ // Look for the restart annotation added by "kubectl rollout restart"
1347+ restartAnnotationKey := "kubectl.kubernetes.io/restartedAt"
1348+
1349+ // Keep checking until the annotation appears (it may not show up immediately)
1350+ // This is better than checking only once
1351+ var annotationValue string
1352+ waitFor (ctx , func () bool {
1353+ out , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1354+ "-o" , fmt .Sprintf ("jsonpath={.spec.template.metadata.annotations['%s']}" , restartAnnotationKey ))
1355+ if err != nil {
1356+ return false
1357+ }
1358+ // If the annotation exists and has a value, it stayed in place
1359+ if out == "" {
1360+ return false
1361+ }
1362+ annotationValue = out
1363+ return true
1364+ })
1365+
1366+ logger .V (1 ).Info ("Restart annotation found" , "deployment" , deploymentName , "restartedAt" , annotationValue )
1367+ return nil
1368+ }
1369+
1370+ // DeploymentIsReady checks that a deployment is ready with all pods running.
1371+ func DeploymentIsReady (ctx context.Context , deploymentName string ) error {
1372+ sc := scenarioCtx (ctx )
1373+ deploymentName = substituteScenarioVars (deploymentName , sc )
1374+
1375+ waitFor (ctx , func () bool {
1376+ // Check if deployment is ready
1377+ out , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1378+ "-o" , "jsonpath={.status.conditions[?(@.type=='Available')].status}" )
1379+ if err != nil {
1380+ return false
1381+ }
1382+ return out == "True"
1383+ })
1384+
1385+ logger .V (1 ).Info ("Deployment is ready" , "deployment" , deploymentName )
1386+ return nil
1387+ }
1388+
1389+ // DeploymentRolloutCompletesSuccessfully waits for the rollout to finish.
1390+ // This checks that new pods were created and are running.
1391+ func DeploymentRolloutCompletesSuccessfully (ctx context.Context , deploymentName string ) error {
1392+ sc := scenarioCtx (ctx )
1393+ deploymentName = substituteScenarioVars (deploymentName , sc )
1394+
1395+ // Use kubectl rollout status to wait until done.
1396+ // This makes sure new pods are created and running.
1397+ // Timeout is 7m to account for startup probes (test-operator has 5min startup probe)
1398+ out , err := k8sClient ("rollout" , "status" , "deployment/" + deploymentName , "-n" , sc .namespace , "--timeout=7m" )
1399+ if err != nil {
1400+ return fmt .Errorf ("deployment rollout failed: %w, output: %s" , err , out )
1401+ }
1402+
1403+ logger .V (1 ).Info ("Deployment rollout completed" , "deployment" , deploymentName , "status" , out )
1404+
1405+ // Check deployment status
1406+ available , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1407+ "-o" , "jsonpath={.status.conditions[?(@.type=='Available')].status}" )
1408+ if err != nil {
1409+ return fmt .Errorf ("failed to check deployment availability: %w" , err )
1410+ }
1411+ if available != "True" {
1412+ return fmt .Errorf ("deployment %s is not available" , deploymentName )
1413+ }
1414+
1415+ progressing , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1416+ "-o" , "jsonpath={.status.conditions[?(@.type=='Progressing')].status}" )
1417+ if err != nil {
1418+ return fmt .Errorf ("failed to check deployment progressing: %w" , err )
1419+ }
1420+ if progressing != "True" {
1421+ return fmt .Errorf ("deployment %s is not progressing correctly" , deploymentName )
1422+ }
1423+
1424+ return nil
1425+ }
1426+
1427+ // WaitForSeconds waits for the given number of seconds.
1428+ // This gives OLM time to run its checks between test steps.
1429+ //
1430+ // Note: We wait for a fixed time (not checking in a loop) because we need to make sure
1431+ // OLM actually runs (it runs every 10 seconds). We want to check that user changes stay
1432+ // AFTER OLM runs. If we just checked in a loop, we wouldn't know if OLM ran yet.
1433+ func WaitForSeconds (ctx context.Context , seconds string ) error {
1434+ sec , err := strconv .Atoi (seconds )
1435+ if err != nil {
1436+ return fmt .Errorf ("invalid seconds value %s: %w" , seconds , err )
1437+ }
1438+
1439+ if sec <= 0 {
1440+ return fmt .Errorf ("seconds value must be greater than 0, got %d" , sec )
1441+ }
1442+
1443+ logger .V (1 ).Info ("Waiting for reconciliation" , "seconds" , sec )
1444+
1445+ timer := time .NewTimer (time .Duration (sec ) * time .Second )
1446+ defer timer .Stop ()
1447+
1448+ select {
1449+ case <- timer .C :
1450+ logger .V (1 ).Info ("Wait complete" )
1451+ return nil
1452+ case <- ctx .Done ():
1453+ return fmt .Errorf ("wait canceled: %w" , ctx .Err ())
1454+ }
1455+ }
1456+
1457+ // verifyDeploymentReplicaStatus checks if a deployment has the right number of ready pods.
1458+ func verifyDeploymentReplicaStatus (deploymentName , namespace string ) (string , string , error ) {
1459+ readyReplicas , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , namespace ,
1460+ "-o" , "jsonpath={.status.readyReplicas}" )
1461+ if err != nil {
1462+ return "" , "" , fmt .Errorf ("failed to get ready replicas: %w" , err )
1463+ }
1464+
1465+ replicas , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , namespace ,
1466+ "-o" , "jsonpath={.spec.replicas}" )
1467+ if err != nil {
1468+ return "" , "" , fmt .Errorf ("failed to get desired replicas: %w" , err )
1469+ }
1470+
1471+ // Normalize empty jsonpath results (when readyReplicas is omitted for 0 replicas)
1472+ readyReplicas = strings .TrimSpace (readyReplicas )
1473+ replicas = strings .TrimSpace (replicas )
1474+ if readyReplicas == "" {
1475+ readyReplicas = "0"
1476+ }
1477+ if replicas == "" {
1478+ replicas = "0"
1479+ }
1480+
1481+ // Compare as integers to avoid false negatives
1482+ readyInt , err := strconv .Atoi (readyReplicas )
1483+ if err != nil {
1484+ return readyReplicas , replicas , fmt .Errorf ("invalid ready replicas value %q: %w" , readyReplicas , err )
1485+ }
1486+ replicasInt , err := strconv .Atoi (replicas )
1487+ if err != nil {
1488+ return readyReplicas , replicas , fmt .Errorf ("invalid desired replicas value %q: %w" , replicas , err )
1489+ }
1490+
1491+ if readyInt != replicasInt {
1492+ return readyReplicas , replicas , fmt .Errorf ("deployment %s has %d ready replicas but expected %d" ,
1493+ deploymentName , readyInt , replicasInt )
1494+ }
1495+
1496+ return readyReplicas , replicas , nil
1497+ }
1498+
1499+ // DeploymentRolloutIsStillSuccessful checks that the rollout is still working.
1500+ // This makes sure OLM didn't undo the user's "kubectl rollout restart" command.
1501+ // It checks that new pods are still running.
1502+ func DeploymentRolloutIsStillSuccessful (ctx context.Context , deploymentName string ) error {
1503+ sc := scenarioCtx (ctx )
1504+ deploymentName = substituteScenarioVars (deploymentName , sc )
1505+
1506+ // Check deployment status
1507+ available , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1508+ "-o" , "jsonpath={.status.conditions[?(@.type=='Available')].status}" )
1509+ if err != nil {
1510+ return fmt .Errorf ("failed to check deployment availability: %w" , err )
1511+ }
1512+ if available != "True" {
1513+ return fmt .Errorf ("deployment %s is not available - rollout was undone" , deploymentName )
1514+ }
1515+
1516+ // Check that the deployment is still working correctly
1517+ progressing , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1518+ "-o" , "jsonpath={.status.conditions[?(@.type=='Progressing')].status}" )
1519+ if err != nil {
1520+ return fmt .Errorf ("failed to check deployment progressing: %w" , err )
1521+ }
1522+ if progressing != "True" {
1523+ return fmt .Errorf ("deployment %s is not working - rollout may have been undone" , deploymentName )
1524+ }
1525+
1526+ // Check that the right number of pods are ready (rollout finished and wasn't stopped)
1527+ readyReplicas , replicas , err := verifyDeploymentReplicaStatus (deploymentName , sc .namespace )
1528+ if err != nil {
1529+ return fmt .Errorf ("%w - rollout may have been undone" , err )
1530+ }
1531+
1532+ logger .V (1 ).Info ("Deployment rollout is still successful" , "deployment" , deploymentName ,
1533+ "readyReplicas" , readyReplicas , "desiredReplicas" , replicas )
1534+
1535+ return nil
1536+ }
1537+
1538+ // DeploymentHasExpectedReadyReplicas checks that the deployment has the right number of ready pods.
1539+ // This makes sure the rollout finished successfully and pods are running.
1540+ func DeploymentHasExpectedReadyReplicas (ctx context.Context , deploymentName string ) error {
1541+ sc := scenarioCtx (ctx )
1542+ deploymentName = substituteScenarioVars (deploymentName , sc )
1543+
1544+ // Check that the right number of pods are ready
1545+ readyReplicas , replicas , err := verifyDeploymentReplicaStatus (deploymentName , sc .namespace )
1546+ if err != nil {
1547+ return err
1548+ }
1549+
1550+ // Also check that there are no unavailable pods
1551+ unavailableReplicas , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1552+ "-o" , "jsonpath={.status.unavailableReplicas}" )
1553+ if err == nil && unavailableReplicas != "" && unavailableReplicas != "0" {
1554+ return fmt .Errorf ("deployment %s has %s unavailable pods" , deploymentName , unavailableReplicas )
1555+ }
1556+
1557+ logger .V (1 ).Info ("Deployment has expected ready replicas" , "deployment" , deploymentName ,
1558+ "readyReplicas" , readyReplicas , "desiredReplicas" , replicas )
1559+
1560+ return nil
1561+ }
0 commit comments