From 55be48c60c98c8a71b68e776e8b752ff40b50892 Mon Sep 17 00:00:00 2001 From: cawthorne Date: Tue, 20 Jan 2026 23:25:38 +0000 Subject: [PATCH 1/4] Add WebSocket failover counter metric, abnormal closure tracking, and URL change logging --- src/metrics/index.ts | 5 +++++ src/transports/websocket.ts | 18 +++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/metrics/index.ts b/src/metrics/index.ts index 08d15242..2893f9bf 100644 --- a/src/metrics/index.ts +++ b/src/metrics/index.ts @@ -357,4 +357,9 @@ export const metrics = new Metrics(() => ({ help: 'The number of addresses in PoR request input parameters', labelNames: ['feed_id'] as const, }), + wsConnectionFailoverCount: new client.Gauge({ + name: 'ws_connection_failover_count', + help: 'The number of consecutive connection issues (unresponsive/no data, abnormal closures), used to trigger URL failover. Resets to 0 when data flows successfully.', + labelNames: ['transport_name'] as const, + }), })) diff --git a/src/transports/websocket.ts b/src/transports/websocket.ts index eafb3077..881efd86 100644 --- a/src/transports/websocket.ts +++ b/src/transports/websocket.ts @@ -295,6 +295,16 @@ export class WebSocketTransport< `Closed websocket connection. Code: ${event.code} ; reason: ${event.reason?.toString()}`, ) + // If abnormal closure, increment failover counter to trigger potential URL switch + // Code 1000 is normal closure, all other codes indicate abnormal disconnections + if (event.code !== 1000) { + this.streamHandlerInvocationsWithNoConnection += 1 + logger.info( + `Abnormal closure detected (code ${event.code}), incremented failover counter to ${this.streamHandlerInvocationsWithNoConnection}`, + ) + metrics.get('wsConnectionFailoverCount').labels({ transport_name: this.name }).set(this.streamHandlerInvocationsWithNoConnection) + } + // Record active ws connections by decrementing count on close // Using URL in label since connection_key is removed from v3 metrics.get('wsConnectionActive').dec() @@ -414,9 +424,10 @@ export class WebSocketTransport< // to determine minimum TTL of an open connection given no explicit connection errors. if (connectionUnresponsive) { this.streamHandlerInvocationsWithNoConnection += 1 - logger.trace( - `The connection is unresponsive, incremented streamHandlerIterationsWithNoConnection = ${this.streamHandlerInvocationsWithNoConnection}`, + logger.info( + `The connection is unresponsive (last message ${timeSinceLastMessage}ms ago), incremented failover counter to ${this.streamHandlerInvocationsWithNoConnection}`, ) + metrics.get('wsConnectionFailoverCount').labels({ transport_name: this.name }).set(this.streamHandlerInvocationsWithNoConnection) } // We want to check if the URL we calculate is different from the one currently connected. @@ -431,9 +442,10 @@ export class WebSocketTransport< // Check if we should close the current connection if (!connectionClosed && (urlChanged || connectionUnresponsive)) { if (urlChanged) { + logger.info('Websocket URL has changed, closing connection to reconnect...') censorLogs(() => logger.debug( - `Websocket url has changed from ${this.currentUrl} to ${urlFromConfig}, closing connection...`, + `Websocket URL changed from ${this.currentUrl} to ${urlFromConfig}`, ), ) } else { From 7090a7cfa74df948763c200ade98043f40eb9a6c Mon Sep 17 00:00:00 2001 From: cawthorne Date: Tue, 20 Jan 2026 23:45:02 +0000 Subject: [PATCH 2/4] Remove increment on 1006 --- src/transports/websocket.ts | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/transports/websocket.ts b/src/transports/websocket.ts index 881efd86..8de94291 100644 --- a/src/transports/websocket.ts +++ b/src/transports/websocket.ts @@ -295,16 +295,6 @@ export class WebSocketTransport< `Closed websocket connection. Code: ${event.code} ; reason: ${event.reason?.toString()}`, ) - // If abnormal closure, increment failover counter to trigger potential URL switch - // Code 1000 is normal closure, all other codes indicate abnormal disconnections - if (event.code !== 1000) { - this.streamHandlerInvocationsWithNoConnection += 1 - logger.info( - `Abnormal closure detected (code ${event.code}), incremented failover counter to ${this.streamHandlerInvocationsWithNoConnection}`, - ) - metrics.get('wsConnectionFailoverCount').labels({ transport_name: this.name }).set(this.streamHandlerInvocationsWithNoConnection) - } - // Record active ws connections by decrementing count on close // Using URL in label since connection_key is removed from v3 metrics.get('wsConnectionActive').dec() From b59c885e77eb076959465aab9dd194fc5822566c Mon Sep 17 00:00:00 2001 From: cawthorne Date: Tue, 27 Jan 2026 16:01:12 +0000 Subject: [PATCH 3/4] Remove bad description --- src/metrics/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/metrics/index.ts b/src/metrics/index.ts index 2893f9bf..658b46a6 100644 --- a/src/metrics/index.ts +++ b/src/metrics/index.ts @@ -359,7 +359,7 @@ export const metrics = new Metrics(() => ({ }), wsConnectionFailoverCount: new client.Gauge({ name: 'ws_connection_failover_count', - help: 'The number of consecutive connection issues (unresponsive/no data, abnormal closures), used to trigger URL failover. Resets to 0 when data flows successfully.', + help: 'The number of consecutive unresponsive connection detections (no data for WS_SUBSCRIPTION_UNRESPONSIVE_TTL), used to trigger URL failover', labelNames: ['transport_name'] as const, }), })) From 245747c76b3b5fb4e49ae18d9502979f89db1473 Mon Sep 17 00:00:00 2001 From: cawthorne Date: Wed, 4 Feb 2026 22:37:39 +0000 Subject: [PATCH 4/4] run prettier --- src/transports/websocket.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/transports/websocket.ts b/src/transports/websocket.ts index 8de94291..de74ac64 100644 --- a/src/transports/websocket.ts +++ b/src/transports/websocket.ts @@ -417,7 +417,10 @@ export class WebSocketTransport< logger.info( `The connection is unresponsive (last message ${timeSinceLastMessage}ms ago), incremented failover counter to ${this.streamHandlerInvocationsWithNoConnection}`, ) - metrics.get('wsConnectionFailoverCount').labels({ transport_name: this.name }).set(this.streamHandlerInvocationsWithNoConnection) + metrics + .get('wsConnectionFailoverCount') + .labels({ transport_name: this.name }) + .set(this.streamHandlerInvocationsWithNoConnection) } // We want to check if the URL we calculate is different from the one currently connected. @@ -434,9 +437,7 @@ export class WebSocketTransport< if (urlChanged) { logger.info('Websocket URL has changed, closing connection to reconnect...') censorLogs(() => - logger.debug( - `Websocket URL changed from ${this.currentUrl} to ${urlFromConfig}`, - ), + logger.debug(`Websocket URL changed from ${this.currentUrl} to ${urlFromConfig}`), ) } else { censorLogs(() =>