diff --git a/lambdas/functions/control-plane/src/aws/runners.test.ts b/lambdas/functions/control-plane/src/aws/runners.test.ts index 4243e4b06b..82f31295be 100644 --- a/lambdas/functions/control-plane/src/aws/runners.test.ts +++ b/lambdas/functions/control-plane/src/aws/runners.test.ts @@ -511,13 +511,15 @@ describe('create runner with errors', () => { it('test ScaleError with multiple error.', async () => { createFleetMockWithErrors(['UnfulfillableCapacity', 'MaxSpotInstanceCountExceeded', 'NotMappedError']); - await expect(createRunner(createRunnerConfig(defaultRunnerConfig))).rejects.toMatchObject({ + await expect( + createRunner({ ...createRunnerConfig(defaultRunnerConfig), numberOfRunners: 3 }), + ).rejects.toMatchObject({ name: 'ScaleError', - failedInstanceCount: 2, + failedInstanceCount: 3, // numberOfRunners when zero instances created }); expect(mockEC2Client).toHaveReceivedCommandWith( CreateFleetCommand, - expectedCreateFleetRequest(defaultExpectedFleetRequestValues), + expectedCreateFleetRequest({ ...defaultExpectedFleetRequestValues, totalTargetCapacity: 3 }), ); expect(mockSSMClient).not.toHaveReceivedCommand(PutParameterCommand); }); @@ -543,6 +545,18 @@ describe('create runner with errors', () => { ); }); + it('returns partial instances on recognized scale error instead of throwing', async () => { + createFleetMockWithErrors(['UnfulfillableCapacity'], ['i-partial']); + + await expect( + createRunner({ ...createRunnerConfig(defaultRunnerConfig), numberOfRunners: 3 }), + ).resolves.toEqual(['i-partial']); + expect(mockEC2Client).toHaveReceivedCommandWith( + CreateFleetCommand, + expectedCreateFleetRequest({ ...defaultExpectedFleetRequestValues, totalTargetCapacity: 3 }), + ); + }); + it('test error by create fleet call is thrown.', async () => { mockEC2Client.on(CreateFleetCommand).rejects(new Error('Some error')); @@ -688,12 +702,14 @@ describe('create runner with errors fail over to OnDemand', () => { // fallback to on demand for UnfulfillableCapacity but InsufficientInstanceCapacity is thrown createFleetMockWithWithOnDemandFallback(['UnfulfillableCapacity'], instancesIds); + // Partial success: 1 instance created, unrecognized error for the rest. + // Returns partial instances instead of throwing to prevent orphans. await expect( createRunner({ ...createRunnerConfig(defaultRunnerConfig), numberOfRunners: 2, }), - ).rejects.toBeInstanceOf(Error); + ).resolves.toEqual(['i-123']); expect(mockEC2Client).toHaveReceivedCommandTimes(CreateFleetCommand, 1); diff --git a/lambdas/functions/control-plane/src/aws/runners.ts b/lambdas/functions/control-plane/src/aws/runners.ts index 193c82d2e7..93b52e0a90 100644 --- a/lambdas/functions/control-plane/src/aws/runners.ts +++ b/lambdas/functions/control-plane/src/aws/runners.ts @@ -201,9 +201,26 @@ async function processFleetResult( const failedCount = countScaleErrors(errors, scaleErrors); if (failedCount > 0) { - logger.warn('Create fleet failed, ScaleError will be thrown to trigger retry for ephemeral runners.'); + if (instances.length > 0) { + logger.warn( + `Partial fleet success: ${instances.length}/${runnerParameters.numberOfRunners} instances created. ` + + `Returning partial results; unfulfilled requests remain for retry.`, + { data: fleet.Errors, created: instances.length, requested: runnerParameters.numberOfRunners }, + ); + return instances; + } + logger.warn('Create fleet failed with zero instances, ScaleError will be thrown to trigger retry.'); logger.debug('Create fleet failed.', { data: fleet.Errors }); - throw new ScaleError(failedCount); + throw new ScaleError(runnerParameters.numberOfRunners); + } + + if (instances.length > 0) { + logger.warn( + `Partial fleet success: ${instances.length}/${runnerParameters.numberOfRunners} instances created. ` + + `Error not recognized as scaling error; returning partial results.`, + { data: fleet.Errors, created: instances.length, requested: runnerParameters.numberOfRunners }, + ); + return instances; } logger.warn('Create fleet failed, error not recognized as scaling error.', { data: fleet.Errors });