From f020057fddf08c2dfb5aa64743957b690b29c1fb Mon Sep 17 00:00:00 2001 From: Brend Smits Date: Fri, 9 Jan 2026 13:45:11 +0100 Subject: [PATCH 1/2] feat: add bypass-removal tag to prevent runner scale-down Add support for ghr:bypass-removal EC2 tag that allows engineers to manually tag runners to prevent them from being scaled down during debugging or investigation. When this tag is set to 'true', the runner will be skipped during scale-down operations with appropriate logging. --- lambdas/functions/control-plane/src/aws/runners.d.ts | 1 + lambdas/functions/control-plane/src/aws/runners.test.ts | 3 +++ lambdas/functions/control-plane/src/aws/runners.ts | 1 + .../control-plane/src/scale-runners/scale-down.test.ts | 1 + .../control-plane/src/scale-runners/scale-down.ts | 8 ++++++++ 5 files changed, 14 insertions(+) diff --git a/lambdas/functions/control-plane/src/aws/runners.d.ts b/lambdas/functions/control-plane/src/aws/runners.d.ts index 3975d093f5..7e9bf0fbba 100644 --- a/lambdas/functions/control-plane/src/aws/runners.d.ts +++ b/lambdas/functions/control-plane/src/aws/runners.d.ts @@ -11,6 +11,7 @@ export interface RunnerList { org?: string; orphan?: boolean; runnerId?: string; + bypassRemoval?: boolean; } export interface RunnerInfo { diff --git a/lambdas/functions/control-plane/src/aws/runners.test.ts b/lambdas/functions/control-plane/src/aws/runners.test.ts index c3cc6d8487..63f1412dd0 100644 --- a/lambdas/functions/control-plane/src/aws/runners.test.ts +++ b/lambdas/functions/control-plane/src/aws/runners.test.ts @@ -91,6 +91,7 @@ describe('list instances', () => { type: 'Org', owner: 'CoderToCat', orphan: false, + bypassRemoval: false, }); }); @@ -105,6 +106,7 @@ describe('list instances', () => { owner: 'CoderToCat', orphan: false, runnerId: '9876543210', + bypassRemoval: false, }); }); @@ -124,6 +126,7 @@ describe('list instances', () => { type: 'Org', owner: 'CoderToCat', orphan: true, + bypassRemoval: false, }); }); diff --git a/lambdas/functions/control-plane/src/aws/runners.ts b/lambdas/functions/control-plane/src/aws/runners.ts index 0f4fc5bee9..7f7f5750bf 100644 --- a/lambdas/functions/control-plane/src/aws/runners.ts +++ b/lambdas/functions/control-plane/src/aws/runners.ts @@ -93,6 +93,7 @@ function getRunnerInfo(runningInstances: DescribeInstancesResult) { org: i.Tags?.find((e) => e.Key === 'ghr:Org')?.Value as string, orphan: i.Tags?.find((e) => e.Key === 'ghr:orphan')?.Value === 'true', runnerId: i.Tags?.find((e) => e.Key === 'ghr:github_runner_id')?.Value as string, + bypassRemoval: i.Tags?.find((e) => e.Key === 'ghr:bypass-removal')?.Value === 'true', }); } } diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts b/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts index 026650d745..7ba52d2d0f 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts @@ -813,5 +813,6 @@ function createRunnerTestData( orphan, shouldBeTerminated, runnerId: runnerId !== undefined ? String(runnerId) : undefined, + bypassRemoval: false, }; } diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-down.ts b/lambdas/functions/control-plane/src/scale-runners/scale-down.ts index 1e5e712a24..6086af7714 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-down.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-down.ts @@ -130,6 +130,14 @@ function runnerMinimumTimeExceeded(runner: RunnerInfo): boolean { async function removeRunner(ec2runner: RunnerInfo, ghRunnerIds: number[]): Promise { const githubAppClient = await getOrCreateOctokit(ec2runner); try { + const runnerList = ec2runner as unknown as RunnerList; + if (runnerList.bypassRemoval) { + logger.info( + `Runner '${ec2runner.instanceId}' has bypass-removal tag set, skipping removal. Remove the tag to allow scale-down.`, + ); + return; + } + const states = await Promise.all( ghRunnerIds.map(async (ghRunnerId) => { // Get busy state instead of using the output of listGitHubRunners(...) to minimize to race condition. From bfba0f5db97db35dd7bdcbfa6975cf5ec4798480 Mon Sep 17 00:00:00 2001 From: Brend Smits Date: Fri, 9 Jan 2026 17:08:36 +0100 Subject: [PATCH 2/2] add test and documentation --- docs/additional_notes.md | 28 +++++++++++++++++++ .../src/scale-runners/scale-down.test.ts | 19 +++++++++++++ 2 files changed, 47 insertions(+) diff --git a/docs/additional_notes.md b/docs/additional_notes.md index 0427ca19ff..5b1212464d 100644 --- a/docs/additional_notes.md +++ b/docs/additional_notes.md @@ -32,3 +32,31 @@ If default labels are removed: | 'custom5' | Linux | no match | | 'custom5' | [ self-hosted, Linux ] | no match | | 'custom5' | [ custom5, self-hosted, Linux ] | no match | + +# Preventing Runner Scale-Down for Debugging + +The module supports a bypass mechanism that allows you to prevent specific runners from being scaled down during debugging or investigation. This is useful when you need to access a runner instance directly to troubleshoot issues. + +## Usage + +To prevent a runner from being terminated during scale-down operations, add the `ghr:bypass-removal` tag to the EC2 instance with a value of `true`: + +```bash +aws ec2 create-tags --resources --tags Key=ghr:bypass-removal,Value=true +``` + +When this tag is set, the scale-down process will skip the runner and log a message indicating that the runner is protected: + +``` +Runner 'i-xxxxxxxxxxxx' has bypass-removal tag set, skipping removal. Remove the tag to allow scale-down. +``` + +## Removing the Protection + +Once you've finished debugging and want to allow the runner to be scaled down normally, remove the tag or set it to any other value: + +```bash +aws ec2 delete-tags --resources --tags Key=ghr:bypass-removal +``` + +**Note:** The bypass-removal tag only prevents automatic scale-down. The runner will still continue to process job(s) as normal. Make sure to remove the tag after debugging to ensure proper resource management. It will also still terminate itself if the instance is empheral and the job is complete. diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts b/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts index 7ba52d2d0f..2dfb190a38 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts @@ -286,6 +286,25 @@ describe('Scale down runners', () => { checkNonTerminated(runners); }); + it(`Should not terminate runner with bypass-removal tag set.`, async () => { + // setup + const runners = [ + createRunnerTestData('idle-with-bypass', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 10, true, false, false), + ]; + // Set bypass-removal tag + runners[0].bypassRemoval = true; + + mockGitHubRunners(runners); + mockAwsRunners(runners); + + // act + await scaleDown(); + + // assert + expect(terminateRunner).not.toHaveBeenCalled(); + checkNonTerminated(runners); + }); + it(`Should not terminate a runner that became busy just before deregister runner.`, async () => { // setup const runners = [