From 2bd80584f46956e0d21157d012a13655dbb86f63 Mon Sep 17 00:00:00 2001 From: Vegard Hansen Date: Wed, 27 May 2026 06:44:19 +0200 Subject: [PATCH] fix: enable Octokit throttle retry and SSM adaptive retry under burst Two resilience improvements for burst load: 1. Octokit plugin-throttling callbacks now return true (with caps) so rate-limited GitHub API requests are retried instead of immediately failing. Primary: up to 2 retries. Secondary: 1 retry. 2. SSM client switched to adaptive retry mode with maxAttempts=10, giving ~30s of retry budget for PutParameter under throttling. Standard mode (3 attempts, ~3s) is insufficient when multiple concurrent Lambdas write JIT configs during burst. Fixes #5135 --- .../control-plane/src/github/auth.ts | 8 +++++-- lambdas/libs/aws-ssm-util/src/index.ts | 21 ++++++++++++++++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/lambdas/functions/control-plane/src/github/auth.ts b/lambdas/functions/control-plane/src/github/auth.ts index 9a572c48a8..fea7f3fb07 100644 --- a/lambdas/functions/control-plane/src/github/auth.ts +++ b/lambdas/functions/control-plane/src/github/auth.ts @@ -54,11 +54,15 @@ export async function createOctokitClient(token: string, ghesApiUrl = ''): Promi throttle: { onRateLimit: (retryAfter: number, options: Required) => { logger.warn( - `GitHub rate limit: Request quota exhausted for request ${options.method} ${options.url}. Requested `, + `GitHub rate limit: Request quota exhausted for request ${options.method} ${options.url}, retrying after ${retryAfter}s`, ); + return (options as unknown as { request: { retryCount: number } }).request.retryCount < 2; }, onSecondaryRateLimit: (retryAfter: number, options: Required) => { - logger.warn(`GitHub rate limit: SecondaryRateLimit detected for request ${options.method} ${options.url}`); + logger.warn( + `GitHub rate limit: SecondaryRateLimit detected for request ${options.method} ${options.url}, retrying after ${retryAfter}s`, + ); + return (options as unknown as { request: { retryCount: number } }).request.retryCount < 1; }, }, }); diff --git a/lambdas/libs/aws-ssm-util/src/index.ts b/lambdas/libs/aws-ssm-util/src/index.ts index 9173cbb210..c4cfb67a00 100644 --- a/lambdas/libs/aws-ssm-util/src/index.ts +++ b/lambdas/libs/aws-ssm-util/src/index.ts @@ -2,8 +2,23 @@ import { GetParametersCommand, PutParameterCommand, SSMClient, Tag } from '@aws- import { getTracedAWSV3Client } from '@aws-github-runner/aws-powertools-util'; import { SSMProvider } from '@aws-lambda-powertools/parameters/ssm'; +// SSM PutParameter has a per-account, per-region rate limit (~40 TPS standard +// throughput). Under burst load with multiple concurrent Lambdas each writing +// JIT configs, the default retry (standard, 3 attempts, ~3s budget) is +// insufficient and throws ThrottlingException. +// +// `adaptive` retry mode adds client-side rate-sensing via a token bucket: +// when the SDK sees ThrottlingException it slows further calls to match the +// observed budget. Combined with maxAttempts=10 this gives ~30s of retry +// per call without hammering the API. +const SSM_CLIENT_CONFIG = { + region: process.env.AWS_REGION, + maxAttempts: 10, + retryMode: 'adaptive' as const, +}; + export async function getParameter(parameter_name: string): Promise { - const ssmClient = getTracedAWSV3Client(new SSMClient({ region: process.env.AWS_REGION })); + const ssmClient = getTracedAWSV3Client(new SSMClient(SSM_CLIENT_CONFIG)); const client = new SSMProvider({ awsSdkV3Client: ssmClient }); //getTracedAWSV3Client(); const result = await client.get(parameter_name, { decrypt: true, @@ -48,7 +63,7 @@ export async function getParameters(parameter_names: string[]): Promise(); // AWS SSM GetParameters API has a limit of 10 parameters per call @@ -80,7 +95,7 @@ export async function putParameter( secure: boolean, options: { tags?: Tag[] } = {}, ): Promise { - const client = getTracedAWSV3Client(new SSMClient({ region: process.env.AWS_REGION })); + const client = getTracedAWSV3Client(new SSMClient(SSM_CLIENT_CONFIG)); // Determine tier based on parameter_value size const valueSizeBytes = Buffer.byteLength(parameter_value, 'utf8');