From 60eb2a2bbb6a345ee1b7ae8faa0b0adad00526f5 Mon Sep 17 00:00:00 2001 From: Ariel Caplan Date: Thu, 30 Oct 2025 18:45:49 +0200 Subject: [PATCH 1/5] Treat transient network errors as AbortError instead of BugError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renames isARetryableNetworkError to isTransientNetworkError for clarity and exports it for reuse in fetchApiVersions error handling. This fixes 6,941+ production errors that were incorrectly classified as bugs. Changes: - Renamed isARetryableNetworkError -> isTransientNetworkError - Added JSDoc explaining these are transient network errors - Exported function for reuse across the codebase - Enhanced detection patterns to include TLS/cert errors, premature close, timeouts - Updated fetchApiVersions to classify network errors as AbortError Network errors now properly classified include: - TLS certificate validation failures (main issue with 6,941 occurrences) - Connection timeouts (ETIMEDOUT) - Connection resets (ECONNRESET) - DNS failures (ENOTFOUND, getaddrinfo) - Socket disconnects and premature closes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .changeset/network-errors-as-abort-errors.md | 5 +++++ packages/cli-kit/src/private/node/api.ts | 18 ++++++++++++++++-- packages/cli-kit/src/public/node/api/admin.ts | 19 ++++++++++++++++--- 3 files changed, 37 insertions(+), 5 deletions(-) create mode 100644 .changeset/network-errors-as-abort-errors.md diff --git a/.changeset/network-errors-as-abort-errors.md b/.changeset/network-errors-as-abort-errors.md new file mode 100644 index 00000000000..23033616441 --- /dev/null +++ b/.changeset/network-errors-as-abort-errors.md @@ -0,0 +1,5 @@ +--- +'@shopify/cli-kit': patch +--- + +Treat transient network errors as user-facing AbortError instead of BugError in fetchApiVersions diff --git a/packages/cli-kit/src/private/node/api.ts b/packages/cli-kit/src/private/node/api.ts index ea344592a0c..18da9f9d4c1 100644 --- a/packages/cli-kit/src/private/node/api.ts +++ b/packages/cli-kit/src/private/node/api.ts @@ -68,7 +68,13 @@ type VerboseResponse = | CanRetryErrorResponse | UnauthorizedErrorResponse -function isARetryableNetworkError(error: unknown): boolean { +/** + * Checks if an error is a transient network error (connection issues, timeouts, DNS failures, TLS errors, etc.) + * rather than an application logic error. These errors are typically: + * - Worth retrying (when retry logic is configured) + * - Should be reported as user-facing errors (AbortError) rather than bugs (BugError) + */ +export function isTransientNetworkError(error: unknown): boolean { if (error instanceof Error) { const networkErrorMessages = [ 'socket hang up', @@ -82,6 +88,14 @@ function isARetryableNetworkError(error: unknown): boolean { 'eai_again', 'epipe', 'the operation was aborted', + 'timeout', + 'premature close', + 'certificate', + 'cert', + 'tls', + 'ssl', + 'altnames', + 'getaddrinfo', ] const errorMessage = error.message.toLowerCase() const anyMatches = networkErrorMessages.some((issueMessage) => errorMessage.includes(issueMessage)) @@ -105,7 +119,7 @@ async function runRequestWithNetworkLevelRetry Date: Sun, 2 Nov 2025 15:27:40 +0200 Subject: [PATCH 2/5] Distinguish between transient and permanent network errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses PR review feedback about retrying permanent network errors. Changes: - Split isTransientNetworkError (for retry logic) from isNetworkError (for error classification) - isTransientNetworkError: Only includes retryable errors (timeouts, DNS failures, connection issues) - isNetworkError: Includes all network errors (transient + permanent SSL/TLS/certificate errors) - Updated fetchApiVersions to use isNetworkError for error classification - Eliminated code duplication by having isNetworkError call isTransientNetworkError Impact: - Certificate validation failures are still reported as user-facing AbortErrors (not bugs) - But they are no longer retried unnecessarily - All existing tests pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- packages/cli-kit/src/private/node/api.ts | 49 ++++++++++++++----- packages/cli-kit/src/public/node/api/admin.ts | 9 ++-- 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/packages/cli-kit/src/private/node/api.ts b/packages/cli-kit/src/private/node/api.ts index 18da9f9d4c1..ee37dea7e4c 100644 --- a/packages/cli-kit/src/private/node/api.ts +++ b/packages/cli-kit/src/private/node/api.ts @@ -69,14 +69,19 @@ type VerboseResponse = | UnauthorizedErrorResponse /** - * Checks if an error is a transient network error (connection issues, timeouts, DNS failures, TLS errors, etc.) - * rather than an application logic error. These errors are typically: - * - Worth retrying (when retry logic is configured) - * - Should be reported as user-facing errors (AbortError) rather than bugs (BugError) + * Checks if an error is a transient network error that is likely to recover with retries. + * + * Use this function for retry logic. Use isNetworkError for error classification. + * + * Examples of transient errors (worth retrying): + * - Connection timeouts, resets, and aborts + * - DNS failures (enotfound, getaddrinfo, eai_again) - can be temporary + * - Socket disconnects and hang ups + * - Premature connection closes */ export function isTransientNetworkError(error: unknown): boolean { if (error instanceof Error) { - const networkErrorMessages = [ + const transientErrorMessages = [ 'socket hang up', 'econnreset', 'econnaborted', @@ -90,21 +95,43 @@ export function isTransientNetworkError(error: unknown): boolean { 'the operation was aborted', 'timeout', 'premature close', - 'certificate', - 'cert', - 'tls', - 'ssl', - 'altnames', 'getaddrinfo', ] const errorMessage = error.message.toLowerCase() - const anyMatches = networkErrorMessages.some((issueMessage) => errorMessage.includes(issueMessage)) + const anyMatches = transientErrorMessages.some((issueMessage) => errorMessage.includes(issueMessage)) const missingReason = /^request to .* failed, reason:\s*$/.test(errorMessage) return anyMatches || missingReason } return false } +/** + * Checks if an error is any kind of network-related error (connection issues, timeouts, DNS failures, + * TLS/certificate errors, etc.) rather than an application logic error. + * + * These errors should be reported as user-facing errors (AbortError) rather than bugs (BugError), + * regardless of whether they are transient or permanent. + * + * Examples include: + * - Transient: connection timeouts, socket hang ups, temporary DNS failures + * - Permanent: certificate validation failures, misconfigured SSL + */ +export function isNetworkError(error: unknown): boolean { + // First check if it's a transient network error + if (isTransientNetworkError(error)) { + return true + } + + // Then check for permanent network errors (SSL/TLS/certificate issues) + if (error instanceof Error) { + const permanentNetworkErrorMessages = ['certificate', 'cert', 'tls', 'ssl', 'altnames'] + const errorMessage = error.message.toLowerCase() + return permanentNetworkErrorMessages.some((issueMessage) => errorMessage.includes(issueMessage)) + } + + return false +} + async function runRequestWithNetworkLevelRetry( requestOptions: RequestOptions, ): Promise { diff --git a/packages/cli-kit/src/public/node/api/admin.ts b/packages/cli-kit/src/public/node/api/admin.ts index 650dc39fdd3..f10e092ce1e 100644 --- a/packages/cli-kit/src/public/node/api/admin.ts +++ b/packages/cli-kit/src/public/node/api/admin.ts @@ -14,7 +14,7 @@ import { restRequestUrl, isThemeAccessSession, } from '../../../private/node/api/rest.js' -import {isTransientNetworkError} from '../../../private/node/api.js' +import {isNetworkError} from '../../../private/node/api.js' import {RequestModeInput, shopifyFetch} from '../http.js' import {PublicApiVersions} from '../../../cli/api/graphql/admin/generated/public_api_versions.js' @@ -185,9 +185,10 @@ async function fetchApiVersions(session: AdminSession, preferredBehaviour?: Requ ) } - // Check for network-level errors (connection issues, timeouts, DNS failures, TLS errors, etc.) - // These are transient errors that may have been retried already by lower-level retry logic - if (isTransientNetworkError(error)) { + // Check for network-level errors (connection issues, timeouts, DNS failures, TLS/certificate errors, etc.) + // All network errors should be treated as user-facing errors, not CLI bugs + // Note: Some of these may have been retried already by lower-level retry logic + if (isNetworkError(error)) { throw new AbortError( `Network error connecting to your store ${session.storeFqdn}: ${ error instanceof Error ? error.message : String(error) From f8af4aaaa09e504f5f66fd0f2574e9d114fd82fc Mon Sep 17 00:00:00 2001 From: Ariel Caplan Date: Sun, 2 Nov 2025 15:30:31 +0200 Subject: [PATCH 3/5] Add comprehensive tests for network error detection and retry behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests verify the key behaviors from the PR: 1. Certificate/TLS/SSL errors are detected as network errors (not bugs) 2. Certificate errors are NOT retried (they're permanent) 3. Transient errors (timeouts, DNS failures) ARE retried 4. All production error patterns from the PR are covered New test coverage: - isTransientNetworkError: 14 transient error patterns, blank reasons - isNetworkError: All transient errors + permanent cert/TLS/SSL errors - Retry behavior: Verifies cert errors fail immediately without retries - Edge cases: Non-Error objects, non-network errors This ensures the fix for 6,941+ production certificate errors works correctly. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- packages/cli-kit/src/private/node/api.test.ts | 155 +++++++++++++++++- 1 file changed, 154 insertions(+), 1 deletion(-) diff --git a/packages/cli-kit/src/private/node/api.test.ts b/packages/cli-kit/src/private/node/api.test.ts index e5586b0ca67..29259f1f547 100644 --- a/packages/cli-kit/src/private/node/api.test.ts +++ b/packages/cli-kit/src/private/node/api.test.ts @@ -1,4 +1,4 @@ -import {retryAwareRequest} from './api.js' +import {retryAwareRequest, isNetworkError, isTransientNetworkError} from './api.js' import {ClientError} from 'graphql-request' import {describe, test, vi, expect, beforeEach, afterEach} from 'vitest' @@ -342,4 +342,157 @@ describe('retryAwareRequest', () => { await expect(result).rejects.toThrowError(nonBlankUnknownReason) expect(mockRequestFn).toHaveBeenCalledTimes(1) }) + + test('does not retry certificate/TLS/SSL errors (permanent network errors)', async () => { + vi.useRealTimers() + const certificateErrors = [ + 'certificate has expired', + "Hostname/IP does not match certificate's altnames", + 'TLS handshake failed', + 'SSL certificate problem: unable to get local issuer certificate', + ] + + for (const certError of certificateErrors) { + const mockRequestFn = vi.fn().mockImplementation(() => { + throw new Error(certError) + }) + + const result = retryAwareRequest( + { + request: mockRequestFn, + url: 'https://example.com/graphql.json', + useNetworkLevelRetry: true, + maxRetryTimeMs: 2000, + }, + undefined, + {defaultDelayMs: 10, scheduleDelay: (fn) => fn()}, + ) + + await expect(result).rejects.toThrowError(certError) + expect(mockRequestFn).toHaveBeenCalledTimes(1) + } + }) +}) + +describe('isTransientNetworkError', () => { + test('identifies transient network errors that should be retried', () => { + const transientErrors = [ + 'socket hang up', + 'ECONNRESET', + 'ECONNABORTED', + 'ENOTFOUND', + 'ENETUNREACH', + 'network socket disconnected', + 'ETIMEDOUT', + 'ECONNREFUSED', + 'EAI_AGAIN', + 'EPIPE', + 'the operation was aborted', + 'timeout occurred', + 'premature close', + 'getaddrinfo ENOTFOUND', + ] + + for (const errorMsg of transientErrors) { + expect(isTransientNetworkError(new Error(errorMsg))).toBe(true) + } + }) + + test('identifies blank reason network errors', () => { + const blankReasonErrors = [ + 'request to https://example.com failed, reason:', + 'request to https://example.com failed, reason: ', + 'request to https://example.com failed, reason:\n\t', + ] + + for (const errorMsg of blankReasonErrors) { + expect(isTransientNetworkError(new Error(errorMsg))).toBe(true) + } + }) + + test('does not identify certificate errors as transient (should not be retried)', () => { + const permanentErrors = [ + 'certificate has expired', + 'cert verification failed', + 'TLS handshake failed', + 'SSL certificate problem', + "Hostname/IP does not match certificate's altnames", + ] + + for (const errorMsg of permanentErrors) { + expect(isTransientNetworkError(new Error(errorMsg))).toBe(false) + } + }) + + test('does not identify non-network errors as transient', () => { + const nonNetworkErrors = [ + 'Invalid JSON', + 'Syntax error', + 'undefined is not a function', + 'request failed with status 500', + ] + + for (const errorMsg of nonNetworkErrors) { + expect(isTransientNetworkError(new Error(errorMsg))).toBe(false) + } + }) + + test('returns false for non-Error objects', () => { + expect(isTransientNetworkError('string error')).toBe(false) + expect(isTransientNetworkError(null)).toBe(false) + expect(isTransientNetworkError(undefined)).toBe(false) + expect(isTransientNetworkError({message: 'ENOTFOUND'})).toBe(false) + }) +}) + +describe('isNetworkError', () => { + test('identifies all transient network errors', () => { + const transientErrors = [ + 'ECONNRESET', + 'ETIMEDOUT', + 'ENOTFOUND', + 'socket hang up', + 'premature close', + ] + + for (const errorMsg of transientErrors) { + expect(isNetworkError(new Error(errorMsg))).toBe(true) + } + }) + + test('identifies permanent network errors (certificate/TLS/SSL)', () => { + const permanentErrors = [ + 'certificate has expired', + 'cert verification failed', + 'TLS handshake failed', + 'SSL certificate problem', + "Hostname/IP does not match certificate's altnames", + 'unable to verify the first certificate', + 'self signed certificate in certificate chain', + ] + + for (const errorMsg of permanentErrors) { + expect(isNetworkError(new Error(errorMsg))).toBe(true) + } + }) + + test('does not identify non-network errors', () => { + const nonNetworkErrors = [ + 'Invalid JSON', + 'Syntax error', + 'undefined is not a function', + 'request failed with status 500', + ] + + for (const errorMsg of nonNetworkErrors) { + expect(isNetworkError(new Error(errorMsg))).toBe(false) + } + }) + + test('returns false for non-Error objects', () => { + expect(isNetworkError('string error')).toBe(false) + expect(isNetworkError(null)).toBe(false) + expect(isNetworkError(undefined)).toBe(false) + expect(isNetworkError({message: 'certificate error'})).toBe(false) + }) }) From 03428149c2b011be13535628bf42ba065f19e5ce Mon Sep 17 00:00:00 2001 From: Ariel Caplan Date: Sun, 2 Nov 2025 15:55:25 +0200 Subject: [PATCH 4/5] Fix lint error: use Promise.all instead of await in loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactored the certificate error test to use Promise.all() instead of awaiting in a loop, which is more efficient and satisfies the no-await-in-loop eslint rule. The test still verifies that all 4 certificate error types are not retried, but now runs them in parallel instead of sequentially. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- packages/cli-kit/src/private/node/api.test.ts | 46 +++++++++---------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/packages/cli-kit/src/private/node/api.test.ts b/packages/cli-kit/src/private/node/api.test.ts index 29259f1f547..776441dcd2b 100644 --- a/packages/cli-kit/src/private/node/api.test.ts +++ b/packages/cli-kit/src/private/node/api.test.ts @@ -352,25 +352,27 @@ describe('retryAwareRequest', () => { 'SSL certificate problem: unable to get local issuer certificate', ] - for (const certError of certificateErrors) { - const mockRequestFn = vi.fn().mockImplementation(() => { - throw new Error(certError) - }) + await Promise.all( + certificateErrors.map(async (certError) => { + const mockRequestFn = vi.fn().mockImplementation(() => { + throw new Error(certError) + }) - const result = retryAwareRequest( - { - request: mockRequestFn, - url: 'https://example.com/graphql.json', - useNetworkLevelRetry: true, - maxRetryTimeMs: 2000, - }, - undefined, - {defaultDelayMs: 10, scheduleDelay: (fn) => fn()}, - ) - - await expect(result).rejects.toThrowError(certError) - expect(mockRequestFn).toHaveBeenCalledTimes(1) - } + const result = retryAwareRequest( + { + request: mockRequestFn, + url: 'https://example.com/graphql.json', + useNetworkLevelRetry: true, + maxRetryTimeMs: 2000, + }, + undefined, + {defaultDelayMs: 10, scheduleDelay: (fn) => fn()}, + ) + + await expect(result).rejects.toThrowError(certError) + expect(mockRequestFn).toHaveBeenCalledTimes(1) + }), + ) }) }) @@ -447,13 +449,7 @@ describe('isTransientNetworkError', () => { describe('isNetworkError', () => { test('identifies all transient network errors', () => { - const transientErrors = [ - 'ECONNRESET', - 'ETIMEDOUT', - 'ENOTFOUND', - 'socket hang up', - 'premature close', - ] + const transientErrors = ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'socket hang up', 'premature close'] for (const errorMsg of transientErrors) { expect(isNetworkError(new Error(errorMsg))).toBe(true) From 771d0017a64fb7d0a7dcbc2eec5fe93d7089e1e8 Mon Sep 17 00:00:00 2001 From: Ariel Caplan Date: Sun, 2 Nov 2025 16:15:43 +0200 Subject: [PATCH 5/5] Update changeset to match new PR contents --- .changeset/network-errors-as-abort-errors.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/network-errors-as-abort-errors.md b/.changeset/network-errors-as-abort-errors.md index 23033616441..ebdba6f5242 100644 --- a/.changeset/network-errors-as-abort-errors.md +++ b/.changeset/network-errors-as-abort-errors.md @@ -2,4 +2,4 @@ '@shopify/cli-kit': patch --- -Treat transient network errors as user-facing AbortError instead of BugError in fetchApiVersions +Treat all network errors as user-facing AbortError instead of BugError in fetchApiVersions