Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion lambdas/functions/control-plane/src/lambda.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { Context, SQSEvent, SQSRecord } from 'aws-lambda';

import { addMiddleware, adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper, jobRetryCheck } from './lambda';
import { adjust } from './pool/pool';
import ScaleError from './scale-runners/ScaleError';
import ScaleError, { GHHttpError } from './scale-runners/ScaleError';
import { scaleDown } from './scale-runners/scale-down';
import { ActionRequestMessage, scaleUp } from './scale-runners/scale-up';
import { cleanSSMTokens } from './scale-runners/ssm-housekeeper';
Expand Down Expand Up @@ -229,6 +229,43 @@ describe('Test scale up lambda wrapper.', () => {
batchItemFailures: [{ itemIdentifier: 'message-0' }, { itemIdentifier: 'message-1' }],
});
});

it('Should return all messages as batch item failures when scaleUp throws GHHttpError', async () => {
const records = createMultipleRecords(3);
const multiRecordEvent: SQSEvent = { Records: records };

const error = new GHHttpError('Validation Failed', 422);
vi.mocked(scaleUp).mockRejectedValue(error);

const result = await scaleUpHandler(multiRecordEvent, context);
expect(result).toEqual({
batchItemFailures: [
{ itemIdentifier: 'message-0' },
{ itemIdentifier: 'message-1' },
{ itemIdentifier: 'message-2' },
],
});
});

it('Should return single message as batch item failure when scaleUp throws GHHttpError', async () => {
const error = new GHHttpError('Bad credentials', 401);
vi.mocked(scaleUp).mockRejectedValue(error);

const result = await scaleUpHandler(sqsEvent, context);
expect(result).toEqual({
batchItemFailures: [{ itemIdentifier: sqsRecord.messageId }],
});
});

it('Should not return batch item failures for generic errors (not ScaleError or GHHttpError)', async () => {
const records = createMultipleRecords(2);
const multiRecordEvent: SQSEvent = { Records: records };

vi.mocked(scaleUp).mockRejectedValue(new Error('Some unexpected error'));

const result = await scaleUpHandler(multiRecordEvent, context);
expect(result).toEqual({ batchItemFailures: [] });
});
});
});

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { describe, expect, it } from 'vitest';
import type { ActionRequestMessageSQS } from './scale-up';
import ScaleError from './ScaleError';
import ScaleError, { GHHttpError } from './ScaleError';

describe('ScaleError', () => {
describe('detailedMessage', () => {
Expand Down Expand Up @@ -74,3 +74,87 @@ describe('ScaleError', () => {
});
});
});

describe('GHHttpError', () => {
describe('constructor', () => {
it('should set name, message, and status', () => {
const error = new GHHttpError('Validation Failed', 422);

expect(error.name).toBe('GHHttpError');
expect(error.message).toBe('Validation Failed');
expect(error.status).toBe(422);
});

it('should be an instance of ScaleError', () => {
const error = new GHHttpError('Not Found', 404);

expect(error).toBeInstanceOf(ScaleError);
expect(error).toBeInstanceOf(GHHttpError);
expect(error).toBeInstanceOf(Error);
});
});

describe('detailedMessage', () => {
it.each([
{ message: 'Validation Failed', status: 422, expected: 'GitHub API HTTP error (status 422): Validation Failed' },
{ message: 'Bad credentials', status: 401, expected: 'GitHub API HTTP error (status 401): Bad credentials' },
{ message: 'Not Found', status: 404, expected: 'GitHub API HTTP error (status 404): Not Found' },
{
message: 'Internal Server Error',
status: 500,
expected: 'GitHub API HTTP error (status 500): Internal Server Error',
},
])('should format message for status $status', ({ message, status, expected }) => {
const error = new GHHttpError(message, status);

expect(error.detailedMessage).toBe(expected);
});
});

describe('toBatchItemFailures', () => {
const mockMessages: ActionRequestMessageSQS[] = [
{ messageId: 'msg-1', id: 1, eventType: 'workflow_job' },
{ messageId: 'msg-2', id: 2, eventType: 'workflow_job' },
{ messageId: 'msg-3', id: 3, eventType: 'workflow_job' },
];

it('should retry ALL messages regardless of status', () => {
const error = new GHHttpError('Validation Failed', 422);
const failures = error.toBatchItemFailures(mockMessages);

expect(failures).toEqual([{ itemIdentifier: 'msg-1' }, { itemIdentifier: 'msg-2' }, { itemIdentifier: 'msg-3' }]);
});

it('should retry the single message when only one is provided', () => {
const error = new GHHttpError('Bad credentials', 401);
const failures = error.toBatchItemFailures([mockMessages[0]]);

expect(failures).toEqual([{ itemIdentifier: 'msg-1' }]);
});

it('should return empty array for empty messages', () => {
const error = new GHHttpError('Server Error', 500);
const failures = error.toBatchItemFailures([]);

expect(failures).toEqual([]);
});

it('should retry all messages unlike ScaleError which retries only failedInstanceCount', () => {
const messages: ActionRequestMessageSQS[] = [
{ messageId: 'msg-1', id: 1, eventType: 'workflow_job' },
{ messageId: 'msg-2', id: 2, eventType: 'workflow_job' },
{ messageId: 'msg-3', id: 3, eventType: 'workflow_job' },
{ messageId: 'msg-4', id: 4, eventType: 'workflow_job' },
{ messageId: 'msg-5', id: 5, eventType: 'workflow_job' },
];

// ScaleError with failedInstanceCount=1 retries only 1 message
const scaleError = new ScaleError(1);
expect(scaleError.toBatchItemFailures(messages)).toHaveLength(1);

// GHHttpError retries ALL messages
const ghError = new GHHttpError('error', 422);
expect(ghError.toBatchItemFailures(messages)).toHaveLength(5);
});
});
});
31 changes: 31 additions & 0 deletions lambdas/functions/control-plane/src/scale-runners/ScaleError.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,35 @@ class ScaleError extends Error {
}
}

/**
* Custom error for GitHub HTTP API failures during runner config creation.
* Extends ScaleError so it is caught by the same handler in the Lambda entry point.
*
* Unlike a plain ScaleError (which retries only `failedInstanceCount` messages),
* a GHHttpError retries ALL messages because the GitHub API failure may affect
* every instance that was just launched.
*/
export class GHHttpError extends ScaleError {
public readonly status: number;

constructor(message: string, status: number) {
super();
this.message = message;
this.name = 'GHHttpError';
this.status = status;
}

public override get detailedMessage(): string {
return `GitHub API HTTP error (status ${this.status}): ${this.message}`;
}

/**
* Override: retry ALL messages because the GitHub API error affects the
* entire batch of instances that were already created.
*/
public override toBatchItemFailures(messages: ActionRequestMessageSQS[]): SQSBatchItemFailure[] {
return messages.map(({ messageId }) => ({ itemIdentifier: messageId }));
}
}

export default ScaleError;
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import * as ghAuth from '../github/auth';
import { createRunner, listEC2Runners } from './../aws/runners';
import { RunnerInputParameters } from './../aws/runners.d';
import * as scaleUpModule from './scale-up';
import { GHHttpError } from './ScaleError';
import { getParameter } from '@aws-github-runner/aws-ssm-util';
import { publishRetryMessage } from './job-retry';
import { describe, it, expect, beforeEach, vi } from 'vitest';
Expand Down Expand Up @@ -1852,6 +1853,69 @@ describe('Retry mechanism tests', () => {
});
});

describe('GHHttpError', () => {
it('should have correct name, message, and status properties', () => {
const error = new GHHttpError('test error', 422);
expect(error).toBeInstanceOf(Error);
expect(error).toBeInstanceOf(GHHttpError);
expect(error.name).toBe('GHHttpError');
expect(error.message).toBe('test error');
expect(error.status).toBe(422);
});

describe('createRunners throws GHHttpError on GitHub API HTTP failure', () => {
beforeEach(() => {
process.env.GHES_URL = 'https://github.enterprise.something';
process.env.ENABLE_ORGANIZATION_RUNNERS = 'true';
process.env.ENABLE_EPHEMERAL_RUNNERS = 'true';
process.env.ENABLE_JIT_CONFIG = 'true';
process.env.RUNNER_NAME_PREFIX = 'unit-test-';
process.env.RUNNER_GROUP_NAME = 'Default';
process.env.SSM_CONFIG_PATH = '/github-action-runners/default/runners/config';
process.env.SSM_TOKEN_PATH = '/github-action-runners/default/runners/config';
process.env.RUNNER_LABELS = 'label1,label2';
process.env.RUNNERS_MAXIMUM_COUNT = '3';
});

it('wraps JIT config GitHub HTTP error as GHHttpError', async () => {
const httpError = new Error('Validation Failed') as Error & { status: number };
httpError.name = 'HttpError';
httpError.status = 422;
mockOctokit.actions.generateRunnerJitconfigForOrg.mockRejectedValue(httpError);

await expect(scaleUpModule.scaleUp(TEST_DATA)).rejects.toThrow(GHHttpError);
await expect(scaleUpModule.scaleUp(TEST_DATA)).rejects.toMatchObject({
status: 422,
message: expect.stringContaining('Validation Failed'),
});
});

it('wraps registration token GitHub HTTP error as GHHttpError', async () => {
process.env.ENABLE_EPHEMERAL_RUNNERS = 'false';
process.env.ENABLE_JIT_CONFIG = 'false';

const httpError = new Error('Bad credentials') as Error & { status: number };
httpError.name = 'HttpError';
httpError.status = 401;
mockOctokit.actions.createRegistrationTokenForOrg.mockRejectedValue(httpError);

await expect(scaleUpModule.scaleUp(TEST_DATA)).rejects.toThrow(GHHttpError);
await expect(scaleUpModule.scaleUp(TEST_DATA)).rejects.toMatchObject({
status: 401,
message: expect.stringContaining('Bad credentials'),
});
});

it('does not wrap non-HTTP errors as GHHttpError', async () => {
const genericError = new Error('Some SSM error');
mockOctokit.actions.generateRunnerJitconfigForOrg.mockRejectedValue(genericError);

await expect(scaleUpModule.scaleUp(TEST_DATA)).rejects.toThrow('Some SSM error');
await expect(scaleUpModule.scaleUp(TEST_DATA)).rejects.not.toBeInstanceOf(GHHttpError);
});
});
});

function defaultOctokitMockImpl() {
mockOctokit.actions.getJobForWorkflowRun.mockImplementation(() => ({
data: {
Expand Down
22 changes: 18 additions & 4 deletions lambdas/functions/control-plane/src/scale-runners/scale-up.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { createRunner, listEC2Runners, tag } from './../aws/runners';
import { RunnerInputParameters } from './../aws/runners.d';
import { metricGitHubAppRateLimit } from '../github/rate-limit';
import { publishRetryMessage } from './job-retry';
import { GHHttpError } from './ScaleError';

const logger = createChildLogger('scale-up');

Expand Down Expand Up @@ -538,10 +539,23 @@ async function createStartRunnerConfig(
instances: string[],
ghClient: Octokit,
) {
if (githubRunnerConfig.enableJitConfig && githubRunnerConfig.ephemeral) {
await createJitConfig(githubRunnerConfig, instances, ghClient);
} else {
await createRegistrationTokenConfig(githubRunnerConfig, instances, ghClient);
try {
if (githubRunnerConfig.enableJitConfig && githubRunnerConfig.ephemeral) {
await createJitConfig(githubRunnerConfig, instances, ghClient);
} else {
await createRegistrationTokenConfig(githubRunnerConfig, instances, ghClient);
}
} catch (error) {
if (error instanceof Error && error.name === 'HttpError') {
const status = (error as Error & { status: number }).status;
logger.error('GitHub API HTTP error during runner config creation', {
status,
message: error.message,
instances,
});
throw new GHHttpError(`Failed to create runner start config: ${error.message}`, status);
}
throw error;
}
}

Expand Down
Loading