Skip to content

Commit 4ddeabd

Browse files
authored
wait for server to be ACTIVE (#99)
* wait for server to be ACTIVE Signed-off-by: Felix Breuer <f.breuer94@gmail.com> * add tests for waiting functions Signed-off-by: Felix Breuer <f.breuer94@gmail.com> --------- Signed-off-by: Felix Breuer <f.breuer94@gmail.com>
1 parent 6dfc95c commit 4ddeabd

File tree

8 files changed

+193
-28
lines changed

8 files changed

+193
-28
lines changed

pkg/client/sdk.go

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -206,12 +206,7 @@ func (c *SdkStackitClient) CreateServer(ctx context.Context, projectID, region s
206206
}
207207

208208
// Convert SDK server to our Server type
209-
server := &Server{
210-
ID: sdkServer.GetId(),
211-
Name: sdkServer.GetName(),
212-
Status: sdkServer.GetStatus(),
213-
Labels: convertLabelsFromSDK(sdkServer.Labels),
214-
}
209+
server := convertSDKServerToServer(sdkServer)
215210

216211
return server, nil
217212
}
@@ -228,12 +223,7 @@ func (c *SdkStackitClient) GetServer(ctx context.Context, projectID, region, ser
228223
}
229224

230225
// Convert SDK server to our Server type
231-
server := &Server{
232-
ID: sdkServer.GetId(),
233-
Name: sdkServer.GetName(),
234-
Status: sdkServer.GetStatus(),
235-
Labels: convertLabelsFromSDK(sdkServer.Labels),
236-
}
226+
server := convertSDKServerToServer(sdkServer)
237227

238228
return server, nil
239229
}
@@ -282,13 +272,7 @@ func (c *SdkStackitClient) ListServers(ctx context.Context, projectID, region st
282272
if sdkResponse.Items != nil {
283273
for i := range *sdkResponse.Items {
284274
sdkServer := &(*sdkResponse.Items)[i]
285-
286-
server := &Server{
287-
ID: sdkServer.GetId(),
288-
Name: sdkServer.GetName(),
289-
Status: sdkServer.GetStatus(),
290-
Labels: convertLabelsFromSDK(sdkServer.Labels),
291-
}
275+
server := convertSDKServerToServer(sdkServer)
292276
servers = append(servers, server)
293277
}
294278
}
@@ -358,6 +342,16 @@ func convertSDKNICtoNIC(nic *iaas.NIC) *NIC {
358342
}
359343
}
360344

345+
func convertSDKServerToServer(sdkServer *iaas.Server) *Server {
346+
return &Server{
347+
ID: sdkServer.GetId(),
348+
Name: sdkServer.GetName(),
349+
Status: sdkServer.GetStatus(),
350+
ErrorMessage: sdkServer.GetErrorMessage(),
351+
Labels: convertLabelsFromSDK(sdkServer.Labels),
352+
}
353+
}
354+
361355
// isNotFoundError checks if an error is a 404 Not Found error from the SDK
362356
func isNotFoundError(err error) bool {
363357
if err == nil {

pkg/client/stackit.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,11 @@ type AgentRequest struct {
8181

8282
// Server represents a STACKIT server response
8383
type Server struct {
84-
ID string `json:"id"`
85-
Name string `json:"name"`
86-
Status string `json:"status"`
87-
Labels map[string]string `json:"labels,omitempty"`
84+
ID string `json:"id"`
85+
Name string `json:"name"`
86+
Status string `json:"status"`
87+
ErrorMessage string `json:"errorMessage,omitempty"`
88+
Labels map[string]string `json:"labels,omitempty"`
8889
}
8990

9091
// NIC represents a STACKIT network interface

pkg/provider/create.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/stackitcloud/machine-controller-manager-provider-stackit/pkg/client"
1414
api "github.com/stackitcloud/machine-controller-manager-provider-stackit/pkg/provider/apis"
1515
"github.com/stackitcloud/machine-controller-manager-provider-stackit/pkg/provider/apis/validation"
16+
"k8s.io/apimachinery/pkg/util/wait"
1617
"k8s.io/klog/v2"
1718
)
1819

@@ -72,10 +73,21 @@ func (p *Provider) CreateMachine(ctx context.Context, req *driver.CreateMachineR
7273
server, err = p.client.CreateServer(ctx, projectID, providerSpec.Region, p.createServerRequest(req, providerSpec))
7374
if err != nil {
7475
klog.Errorf("Failed to create server for machine %q: %v", req.Machine.Name, err)
76+
if isResourceExhaustedError(err) {
77+
return nil, status.Error(codes.ResourceExhausted, fmt.Sprintf("failed to create server: %v", err))
78+
}
7579
return nil, status.Error(codes.Unavailable, fmt.Sprintf("failed to create server: %v", err))
7680
}
7781
}
7882

83+
if err := p.WaitUntilServerRunning(ctx, projectID, providerSpec.Region, server.ID); err != nil {
84+
klog.Errorf("Failed waiting for server %q to reach ACTIVE state: %v", req.Machine.Name, err)
85+
if isResourceExhaustedError(err) {
86+
return nil, status.Error(codes.ResourceExhausted, fmt.Sprintf("failed waiting for server to be ACTIVE: %v", err))
87+
}
88+
return nil, status.Error(codes.DeadlineExceeded, fmt.Sprintf("failed waiting for server to be ACTIVE: %v", err))
89+
}
90+
7991
if err := p.patchNetworkInterface(ctx, projectID, server.ID, providerSpec); err != nil {
8092
klog.Errorf("Failed to patch network interface for server %q: %v", req.Machine.Name, err)
8193
return nil, status.Error(codes.Unavailable, fmt.Sprintf("failed to patch network interface for server: %v", err))
@@ -265,3 +277,22 @@ func (p *Provider) patchNetworkInterface(ctx context.Context, projectID, serverI
265277

266278
return nil
267279
}
280+
281+
func (p *Provider) WaitUntilServerRunning(ctx context.Context, projectID, region, serverID string) error {
282+
return wait.PollUntilContextTimeout(ctx, p.pollingInterval, p.pollingTimeout, true, func(ctx context.Context) (bool, error) {
283+
server, err := p.client.GetServer(ctx, projectID, region, serverID)
284+
if err != nil {
285+
return false, err
286+
}
287+
288+
switch server.Status {
289+
case "ACTIVE":
290+
klog.V(2).Infof("Server %q reached ACTIVE state", serverID)
291+
return true, nil
292+
case "ERROR":
293+
return false, fmt.Errorf("server in ERROR state: %q", server.ErrorMessage)
294+
}
295+
296+
return false, nil
297+
})
298+
}

pkg/provider/create_basic_test.go

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package provider
33
import (
44
"context"
55
"fmt"
6+
"time"
67

78
"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
89
"github.com/gardener/machine-controller-manager/pkg/util/provider/driver"
@@ -33,7 +34,9 @@ var _ = Describe("CreateMachine", func() {
3334
ctx = context.Background()
3435
mockClient = &mock.StackitClient{}
3536
provider = &Provider{
36-
client: mockClient,
37+
client: mockClient,
38+
pollingInterval: 10 * time.Millisecond,
39+
pollingTimeout: 5 * time.Second,
3740
}
3841

3942
secret = &corev1.Secret{
@@ -114,6 +117,41 @@ var _ = Describe("CreateMachine", func() {
114117
Expect(capturedReq.MachineType).To(Equal("c2i.2"))
115118
Expect(capturedReq.ImageID).To(Equal("12345678-1234-1234-1234-123456789abc"))
116119
})
120+
121+
It("should poll GetServer until server is ACTIVE", func() {
122+
getServerCallCount := 0
123+
124+
mockClient.CreateServerFunc = func(_ context.Context, _, _ string, req *client.CreateServerRequest) (*client.Server, error) {
125+
return &client.Server{
126+
ID: "550e8400-e29b-41d4-a716-446655440000",
127+
Name: req.Name,
128+
Status: "CREATING",
129+
}, nil
130+
}
131+
mockClient.GetServerFunc = func(_ context.Context, _, _, _ string) (*client.Server, error) {
132+
getServerCallCount++
133+
// First call returns CREATING, second call returns ACTIVE
134+
if getServerCallCount == 1 {
135+
return &client.Server{
136+
ID: "550e8400-e29b-41d4-a716-446655440000",
137+
Name: "test-machine",
138+
Status: "CREATING",
139+
}, nil
140+
}
141+
return &client.Server{
142+
ID: "550e8400-e29b-41d4-a716-446655440000",
143+
Name: "test-machine",
144+
Status: "ACTIVE",
145+
}, nil
146+
}
147+
148+
resp, err := provider.CreateMachine(ctx, req)
149+
150+
Expect(err).NotTo(HaveOccurred())
151+
Expect(resp).NotTo(BeNil())
152+
Expect(resp.ProviderID).To(Equal("stackit://11111111-2222-3333-4444-555555555555/550e8400-e29b-41d4-a716-446655440000"))
153+
Expect(getServerCallCount).To(BeNumerically(">=", 2))
154+
})
117155
})
118156

119157
Context("with invalid ProviderSpec", func() {
@@ -185,5 +223,31 @@ var _ = Describe("CreateMachine", func() {
185223
Expect(ok).To(BeTrue())
186224
Expect(statusErr.Code()).To(Equal(codes.Unavailable))
187225
})
226+
227+
It("should return ResourceExhausted when server enters ERROR state with 'no valid host'", func() {
228+
mockClient.CreateServerFunc = func(_ context.Context, _, _ string, req *client.CreateServerRequest) (*client.Server, error) {
229+
return &client.Server{
230+
ID: "550e8400-e29b-41d4-a716-446655440000",
231+
Name: req.Name,
232+
Status: "CREATING",
233+
}, nil
234+
}
235+
mockClient.GetServerFunc = func(_ context.Context, _, _, _ string) (*client.Server, error) {
236+
return &client.Server{
237+
ID: "550e8400-e29b-41d4-a716-446655440000",
238+
Name: "test-machine",
239+
Status: "ERROR",
240+
ErrorMessage: "No valid host was found. There are not enough hosts available.",
241+
}, nil
242+
}
243+
244+
_, err := provider.CreateMachine(ctx, req)
245+
246+
Expect(err).To(HaveOccurred())
247+
statusErr, ok := status.FromError(err)
248+
Expect(ok).To(BeTrue())
249+
Expect(statusErr.Code()).To(Equal(codes.ResourceExhausted))
250+
Expect(err.Error()).To(ContainSubstring("No valid host"))
251+
})
188252
})
189253
})

pkg/provider/delete.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes"
1111
"github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/status"
1212
"github.com/stackitcloud/machine-controller-manager-provider-stackit/pkg/client"
13+
"k8s.io/apimachinery/pkg/util/wait"
1314
"k8s.io/klog/v2"
1415
)
1516

@@ -31,7 +32,7 @@ func (p *Provider) DeleteMachine(ctx context.Context, req *driver.DeleteMachineR
3132

3233
// Initialize client on first use (lazy initialization)
3334
if err := p.ensureClient(serviceAccountKey); err != nil {
34-
return nil, status.Error(codes.Internal, fmt.Sprintf("failed to initialize STACKIT client: %v", err))
35+
return nil, status.Error(codes.Unauthenticated, fmt.Sprintf("failed to initialize STACKIT client: %v", err))
3536
}
3637

3738
var projectID, serverID string
@@ -87,7 +88,25 @@ func (p *Provider) DeleteMachine(ctx context.Context, req *driver.DeleteMachineR
8788
return nil, status.Error(codes.Internal, fmt.Sprintf("failed to delete server: %v", err))
8889
}
8990

90-
klog.V(2).Infof("Successfully deleted server %q for machine %q", serverID, req.Machine.Name)
91+
if err := p.WaitUntilServerDeleted(ctx, projectID, providerSpec.Region, serverID); err != nil {
92+
klog.Errorf("Failed waiting for server %q to be deleted for machine %q: %v", serverID, req.Machine.Name, err)
93+
return nil, status.Error(codes.DeadlineExceeded, fmt.Sprintf("failed waiting for server to be deleted: %v", err))
94+
}
9195

9296
return &driver.DeleteMachineResponse{}, nil
9397
}
98+
99+
func (p *Provider) WaitUntilServerDeleted(ctx context.Context, projectID, region, serverID string) error {
100+
return wait.PollUntilContextTimeout(ctx, p.pollingInterval, p.pollingTimeout, true, func(ctx context.Context) (bool, error) {
101+
_, err := p.client.GetServer(ctx, projectID, region, serverID)
102+
if err != nil {
103+
// Server is deleted if we get a not found error
104+
if errors.Is(err, client.ErrServerNotFound) {
105+
klog.V(2).Infof("Server %q has been deleted", serverID)
106+
return true, nil
107+
}
108+
}
109+
110+
return false, err
111+
})
112+
}

pkg/provider/delete_test.go

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package provider
33
import (
44
"context"
55
"fmt"
6+
"time"
67

78
"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
89
"github.com/gardener/machine-controller-manager/pkg/util/provider/driver"
@@ -33,7 +34,9 @@ var _ = Describe("DeleteMachine", func() {
3334
ctx = context.Background()
3435
mockClient = &mock.StackitClient{}
3536
provider = &Provider{
36-
client: mockClient,
37+
client: mockClient,
38+
pollingInterval: 10 * time.Millisecond,
39+
pollingTimeout: 5 * time.Second,
3740
}
3841

3942
// Create secret with projectId
@@ -87,6 +90,9 @@ var _ = Describe("DeleteMachine", func() {
8790
mockClient.DeleteServerFunc = func(_ context.Context, _, _, _ string) error {
8891
return nil
8992
}
93+
mockClient.GetServerFunc = func(_ context.Context, _, _, _ string) (*client.Server, error) {
94+
return nil, fmt.Errorf("%w: status 404", client.ErrServerNotFound)
95+
}
9096

9197
resp, err := provider.DeleteMachine(ctx, req)
9298

@@ -103,19 +109,58 @@ var _ = Describe("DeleteMachine", func() {
103109
capturedServerID = serverID
104110
return nil
105111
}
112+
mockClient.GetServerFunc = func(_ context.Context, _, _, _ string) (*client.Server, error) {
113+
return nil, fmt.Errorf("%w: status 404", client.ErrServerNotFound)
114+
}
106115

107116
_, err := provider.DeleteMachine(ctx, req)
108117

109118
Expect(err).NotTo(HaveOccurred())
110119
Expect(capturedProjectID).To(Equal("11111111-2222-3333-4444-555555555555"))
111120
Expect(capturedServerID).To(Equal("550e8400-e29b-41d4-a716-446655440000"))
112121
})
122+
123+
It("should poll GetServer until server is deleted", func() {
124+
getServerCallCount := 0
125+
126+
mockClient.DeleteServerFunc = func(_ context.Context, _, _, _ string) error {
127+
return nil
128+
}
129+
mockClient.GetServerFunc = func(_ context.Context, _, _, _ string) (*client.Server, error) {
130+
getServerCallCount++
131+
// First call returns server still exists, second call returns not found
132+
if getServerCallCount == 1 {
133+
return &client.Server{
134+
ID: "550e8400-e29b-41d4-a716-446655440000",
135+
Name: "test-machine",
136+
Status: "SHUTTING_DOWN",
137+
}, nil
138+
}
139+
return nil, fmt.Errorf("%w: status 404", client.ErrServerNotFound)
140+
}
141+
142+
resp, err := provider.DeleteMachine(ctx, req)
143+
144+
Expect(err).NotTo(HaveOccurred())
145+
Expect(resp).NotTo(BeNil())
146+
Expect(getServerCallCount).To(BeNumerically(">=", 2))
147+
})
113148
})
114149

115150
Context("with missing or invalid ProviderID", func() {
116151
It("should still delete the machine when ProviderID is missing", func() {
117152
machine.Spec.ProviderID = ""
118153

154+
mockClient.GetServerFunc = func(_ context.Context, _, _, _ string) (*client.Server, error) {
155+
return &client.Server{
156+
ID: "550e8400-e29b-41d4-a716-446655440000",
157+
Name: "test-machine",
158+
}, nil
159+
}
160+
mockClient.DeleteServerFunc = func(_ context.Context, _, _, _ string) error {
161+
return nil
162+
}
163+
119164
_, err := provider.DeleteMachine(ctx, req)
120165

121166
Expect(err).ToNot(HaveOccurred())

pkg/provider/helpers.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,8 @@ func extractSecretCredentials(secretData map[string][]byte) (projectID, serviceA
5858
serviceAccountKey = string(secretData[validation.StackitServiceAccountKey])
5959
return projectID, serviceAccountKey
6060
}
61+
62+
func isResourceExhaustedError(err error) bool {
63+
errMsg := strings.ToLower(err.Error())
64+
return strings.Contains(errMsg, "no valid host") || strings.Contains(errMsg, "quota exceeded")
65+
}

pkg/provider/provider.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package provider
33
import (
44
"fmt"
55
"sync"
6+
"time"
67

78
"github.com/gardener/machine-controller-manager/pkg/util/provider/driver"
89
client2 "github.com/stackitcloud/machine-controller-manager-provider-stackit/pkg/client"
@@ -24,12 +25,17 @@ type Provider struct {
2425
clientOnce sync.Once // Ensures client is initialized exactly once
2526
clientErr error // Stores initialization error if any
2627
capturedCredentials string // Service account key used for initialization (for defensive checks)
28+
// intervals need to be configurable to speed up tests
29+
pollingInterval time.Duration // Interval between polling attempts
30+
pollingTimeout time.Duration // Maximum time to wait during polling
2731
}
2832

2933
// NewProvider returns an empty provider object
3034
func NewProvider(i spi.SessionProviderInterface) driver.Driver {
3135
return &Provider{
32-
SPI: i,
36+
SPI: i,
37+
pollingInterval: 5 * time.Second,
38+
pollingTimeout: 10 * time.Minute,
3339
}
3440
}
3541

0 commit comments

Comments
 (0)