From 697e3f235ca5f2500b7b594b25f4392dc6ca609e Mon Sep 17 00:00:00 2001 From: Charly Gomez Date: Fri, 20 Mar 2026 17:04:56 +0100 Subject: [PATCH] wip --- .../node-runtime-metrics/scenario-opt-out.ts | 28 ++ .../suites/node-runtime-metrics/scenario.ts | 23 ++ .../suites/node-runtime-metrics/test.ts | 48 ++++ packages/node-core/src/common-exports.ts | 1 + .../src/integrations/nodeRuntimeMetrics.ts | 150 ++++++++++ .../integrations/nodeRuntimeMetrics.test.ts | 271 ++++++++++++++++++ packages/node/src/index.ts | 2 + 7 files changed, 523 insertions(+) create mode 100644 dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-opt-out.ts create mode 100644 dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario.ts create mode 100644 dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts create mode 100644 packages/node-core/src/integrations/nodeRuntimeMetrics.ts create mode 100644 packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts diff --git a/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-opt-out.ts b/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-opt-out.ts new file mode 100644 index 000000000000..f6a62c561ceb --- /dev/null +++ b/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-opt-out.ts @@ -0,0 +1,28 @@ +import * as Sentry from '@sentry/node'; +import { loggingTransport } from '@sentry-internal/node-integration-tests'; + +Sentry.init({ + dsn: 'https://public@dsn.ingest.sentry.io/1337', + release: '1.0.0', + environment: 'test', + transport: loggingTransport, + integrations: [ + Sentry.nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 100, + collect: { + cpu: false, + eventLoopDelay: false, + eventLoopUtilization: false, + uptime: false, + }, + }), + ], +}); + +async function run(): Promise { + await new Promise(resolve => setTimeout(resolve, 250)); + await Sentry.flush(); +} + +// eslint-disable-next-line @typescript-eslint/no-floating-promises +run(); diff --git a/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario.ts b/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario.ts new file mode 100644 index 000000000000..b862634c719a --- /dev/null +++ b/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario.ts @@ -0,0 +1,23 @@ +import * as Sentry from '@sentry/node'; +import { loggingTransport } from '@sentry-internal/node-integration-tests'; + +Sentry.init({ + dsn: 'https://public@dsn.ingest.sentry.io/1337', + release: '1.0.0', + environment: 'test', + transport: loggingTransport, + integrations: [ + Sentry.nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 100, + }), + ], +}); + +async function run(): Promise { + // Wait long enough for the collection interval to fire at least once. + await new Promise(resolve => setTimeout(resolve, 250)); + await Sentry.flush(); +} + +// eslint-disable-next-line @typescript-eslint/no-floating-promises +run(); diff --git a/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts b/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts new file mode 100644 index 000000000000..e4af3f5f0918 --- /dev/null +++ b/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts @@ -0,0 +1,48 @@ +import { afterAll, describe, expect, test } from 'vitest'; +import { cleanupChildProcesses, createRunner } from '../../utils/runner'; + +describe('nodeRuntimeMetricsIntegration', () => { + afterAll(() => { + cleanupChildProcesses(); + }); + + test('emits runtime metrics', async () => { + const runner = createRunner(__dirname, 'scenario.ts') + .expect({ + trace_metric: { + items: expect.arrayContaining([ + expect.objectContaining({ name: 'node.runtime.mem.rss', type: 'gauge', unit: 'byte' }), + expect.objectContaining({ name: 'node.runtime.mem.heap_total', type: 'gauge', unit: 'byte' }), + expect.objectContaining({ name: 'node.runtime.mem.heap_used', type: 'gauge', unit: 'byte' }), + expect.objectContaining({ name: 'node.runtime.mem.external', type: 'gauge', unit: 'byte' }), + expect.objectContaining({ name: 'node.runtime.cpu.user', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.cpu.system', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.cpu.percent', type: 'gauge', unit: '1' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.min', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.max', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.mean', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.p50', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.p90', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.p99', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.utilization', type: 'gauge', unit: '1' }), + expect.objectContaining({ name: 'node.runtime.process.uptime', type: 'counter', unit: 'second' }), + ]), + }, + }) + .start(); + + await runner.completed(); + }); + + test('respects opt-out options', async () => { + const runner = createRunner(__dirname, 'scenario-opt-out.ts') + .expect({ + trace_metric: { + items: expect.arrayContaining([expect.objectContaining({ name: 'node.runtime.mem.rss', type: 'gauge' })]), + }, + }) + .start(); + + await runner.completed(); + }); +}); diff --git a/packages/node-core/src/common-exports.ts b/packages/node-core/src/common-exports.ts index 3fff4100b352..d6d1e070ef85 100644 --- a/packages/node-core/src/common-exports.ts +++ b/packages/node-core/src/common-exports.ts @@ -12,6 +12,7 @@ import * as logger from './logs/exports'; // Node-core integrations (not OTel-dependent) export { nodeContextIntegration } from './integrations/context'; +export { nodeRuntimeMetricsIntegration, type NodeRuntimeMetricsOptions } from './integrations/nodeRuntimeMetrics'; export { contextLinesIntegration } from './integrations/contextlines'; export { localVariablesIntegration } from './integrations/local-variables'; export { modulesIntegration } from './integrations/modules'; diff --git a/packages/node-core/src/integrations/nodeRuntimeMetrics.ts b/packages/node-core/src/integrations/nodeRuntimeMetrics.ts new file mode 100644 index 000000000000..22b20cc7b8e1 --- /dev/null +++ b/packages/node-core/src/integrations/nodeRuntimeMetrics.ts @@ -0,0 +1,150 @@ +import { monitorEventLoopDelay, performance } from 'perf_hooks'; +import { defineIntegration, flushIfServerless, metrics } from '@sentry/core'; + +const INTEGRATION_NAME = 'NodeRuntimeMetrics'; +const DEFAULT_INTERVAL_MS = 30_000; + +export interface NodeRuntimeMetricsOptions { + /** + * Which metric groups to collect. All groups are enabled by default. + */ + collect?: { + cpu?: boolean; + memory?: boolean; + eventLoopDelay?: boolean; + eventLoopUtilization?: boolean; + uptime?: boolean; + }; + /** + * How often to collect metrics, in milliseconds. + * @default 30000 + */ + collectionIntervalMs?: number; +} + +/** + * Automatically collects Node.js runtime metrics and emits them to Sentry. + * + * @example + * ```ts + * Sentry.init({ + * integrations: [ + * Sentry.nodeRuntimeMetricsIntegration(), + * ], + * }); + * ``` + */ +export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRuntimeMetricsOptions = {}) => { + const collectionIntervalMs = options.collectionIntervalMs ?? DEFAULT_INTERVAL_MS; + const collect = { + cpu: true, + memory: true, + eventLoopDelay: true, + eventLoopUtilization: true, + uptime: true, + ...options.collect, + }; + + let intervalId: ReturnType | undefined; + let prevCpuUsage: NodeJS.CpuUsage | undefined; + let prevElu: ReturnType | undefined; + let prevFlushTime: number | undefined; + let eventLoopDelayHistogram: ReturnType | undefined; + + function collectMetrics(): void { + const now = Date.now(); + const elapsed = now - (prevFlushTime ?? now); + + if (collect.cpu && prevCpuUsage !== undefined) { + const delta = process.cpuUsage(prevCpuUsage); + metrics.gauge('node.runtime.cpu.user', delta.user / 1e6, { unit: 'second' }); + metrics.gauge('node.runtime.cpu.system', delta.system / 1e6, { unit: 'second' }); + if (elapsed > 0) { + // Ratio of CPU time to wall-clock time. Can exceed 1.0 on multi-core systems. + // TODO: In cluster mode, add a runtime_id/process_id attribute to disambiguate per-worker metrics. + metrics.gauge('node.runtime.cpu.percent', (delta.user + delta.system) / (elapsed * 1000), { unit: '1' }); + } + prevCpuUsage = process.cpuUsage(); + } + + if (collect.memory) { + const mem = process.memoryUsage(); + metrics.gauge('node.runtime.mem.rss', mem.rss, { unit: 'byte' }); + metrics.gauge('node.runtime.mem.heap_total', mem.heapTotal, { unit: 'byte' }); + metrics.gauge('node.runtime.mem.heap_used', mem.heapUsed, { unit: 'byte' }); + metrics.gauge('node.runtime.mem.external', mem.external, { unit: 'byte' }); + if (mem.arrayBuffers !== undefined) { + metrics.gauge('node.runtime.mem.array_buffers', mem.arrayBuffers, { unit: 'byte' }); + } + } + + if (collect.eventLoopDelay && eventLoopDelayHistogram) { + // Resolution is 10ms (10_000_000 ns) as configured below. Subtract it to normalize out sampling overhead. + const resolutionNs = 10_000_000; + const nsToS = (ns: number): number => Math.max(0, (ns - resolutionNs) / 1e9); + + metrics.gauge('node.runtime.event_loop.delay.min', nsToS(eventLoopDelayHistogram.min), { unit: 'second' }); + metrics.gauge('node.runtime.event_loop.delay.max', nsToS(eventLoopDelayHistogram.max), { unit: 'second' }); + metrics.gauge('node.runtime.event_loop.delay.mean', nsToS(eventLoopDelayHistogram.mean), { unit: 'second' }); + metrics.gauge('node.runtime.event_loop.delay.p50', nsToS(eventLoopDelayHistogram.percentile(50)), { + unit: 'second', + }); + metrics.gauge('node.runtime.event_loop.delay.p90', nsToS(eventLoopDelayHistogram.percentile(90)), { + unit: 'second', + }); + metrics.gauge('node.runtime.event_loop.delay.p99', nsToS(eventLoopDelayHistogram.percentile(99)), { + unit: 'second', + }); + + eventLoopDelayHistogram.reset(); + } + + if (collect.eventLoopUtilization && prevElu !== undefined) { + const currentElu = performance.eventLoopUtilization(); + const delta = performance.eventLoopUtilization(currentElu, prevElu); + metrics.gauge('node.runtime.event_loop.utilization', delta.utilization, { unit: '1' }); + prevElu = currentElu; + } + + if (collect.uptime && elapsed > 0) { + metrics.count('node.runtime.process.uptime', elapsed / 1000, { unit: 'second' }); + } + + prevFlushTime = now; + } + + return { + name: INTEGRATION_NAME, + + setup(): void { + if (collect.eventLoopDelay) { + try { + eventLoopDelayHistogram = monitorEventLoopDelay({ resolution: 10 }); + eventLoopDelayHistogram.enable(); + } catch { + // Not available in all runtimes (e.g. Bun throws NotImplementedError). + } + } + + // Prime baselines before the first collection interval. + if (collect.cpu) { + prevCpuUsage = process.cpuUsage(); + } + if (collect.eventLoopUtilization) { + prevElu = performance.eventLoopUtilization(); + } + prevFlushTime = Date.now(); + + intervalId = setInterval(collectMetrics, collectionIntervalMs); + // Do not keep the process alive solely for metric collection. + intervalId.unref(); + + // In serverless environments the process may not live long enough to hit the interval. + // Collect and flush eagerly whenever the event loop drains (end of invocation). + process.on('beforeExit', () => { + collectMetrics(); + void flushIfServerless(); + }); + }, + }; +}); diff --git a/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts b/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts new file mode 100644 index 000000000000..f970fbcd9148 --- /dev/null +++ b/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts @@ -0,0 +1,271 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { metrics } from '@sentry/core'; +import { nodeRuntimeMetricsIntegration } from '../../src/integrations/nodeRuntimeMetrics'; + +const { mockHistogram, mockMonitorEventLoopDelay, mockPerformance } = vi.hoisted(() => { + const mockHistogram = { + min: 2_000_000, + max: 20_000_000, + mean: 10_000_000, + percentile: vi.fn((p: number) => { + if (p === 50) return 8_000_000; + if (p === 90) return 15_000_000; + if (p === 99) return 19_000_000; + return 0; + }), + enable: vi.fn(), + reset: vi.fn(), + disable: vi.fn(), + }; + + const mockMonitorEventLoopDelay = vi.fn(() => mockHistogram); + const mockElu = { idle: 700, active: 300, utilization: 0.3 }; + const mockEluDelta = { idle: 700, active: 300, utilization: 0.3 }; + const mockPerformance = { + eventLoopUtilization: vi.fn((curr?: object, _prev?: object) => { + if (curr) return mockEluDelta; + return mockElu; + }), + }; + + return { mockHistogram, mockMonitorEventLoopDelay, mockPerformance }; +}); + +vi.mock('perf_hooks', () => ({ + monitorEventLoopDelay: mockMonitorEventLoopDelay, + performance: mockPerformance, +})); + +vi.mock('@sentry/core', async () => { + const actual = await vi.importActual('@sentry/core'); + return { + ...actual, + flushIfServerless: vi.fn(), + }; +}); + +describe('nodeRuntimeMetricsIntegration', () => { + let gaugeSpy: ReturnType; + let countSpy: ReturnType; + + beforeEach(() => { + vi.useFakeTimers(); + gaugeSpy = vi.spyOn(metrics, 'gauge'); + countSpy = vi.spyOn(metrics, 'count'); + + vi.spyOn(process, 'cpuUsage').mockReturnValue({ user: 500_000, system: 200_000 }); + vi.spyOn(process, 'memoryUsage').mockReturnValue({ + rss: 50_000_000, + heapTotal: 30_000_000, + heapUsed: 20_000_000, + external: 1_000_000, + arrayBuffers: 500_000, + }); + + mockHistogram.percentile.mockClear(); + mockHistogram.enable.mockClear(); + mockHistogram.reset.mockClear(); + mockMonitorEventLoopDelay.mockClear(); + mockPerformance.eventLoopUtilization.mockClear(); + }); + + afterEach(() => { + vi.useRealTimers(); + vi.restoreAllMocks(); + }); + + it('has the correct name', () => { + const integration = nodeRuntimeMetricsIntegration(); + expect(integration.name).toBe('NodeRuntimeMetrics'); + }); + + describe('setup', () => { + it('initializes event loop delay histogram with resolution 10', () => { + const integration = nodeRuntimeMetricsIntegration(); + integration.setup(); + + expect(mockMonitorEventLoopDelay).toHaveBeenCalledWith({ resolution: 10 }); + expect(mockHistogram.enable).toHaveBeenCalledOnce(); + }); + + it('does not throw if monitorEventLoopDelay is unavailable (e.g. Bun)', () => { + mockMonitorEventLoopDelay.mockImplementationOnce(() => { + throw new Error('NotImplementedError'); + }); + + const integration = nodeRuntimeMetricsIntegration(); + expect(() => integration.setup()).not.toThrow(); + }); + + it('starts a collection interval', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + + expect(gaugeSpy).not.toHaveBeenCalled(); + vi.advanceTimersByTime(1_000); + expect(gaugeSpy).toHaveBeenCalled(); + }); + }); + + describe('metric collection', () => { + it('emits CPU metrics', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.user', expect.any(Number), { unit: 'second' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.system', expect.any(Number), { unit: 'second' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.percent', expect.any(Number), { unit: '1' }); + }); + + it('emits memory metrics', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.rss', 50_000_000, { unit: 'byte' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.heap_total', 30_000_000, { unit: 'byte' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.heap_used', 20_000_000, { unit: 'byte' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.external', 1_000_000, { unit: 'byte' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.array_buffers', 500_000, { unit: 'byte' }); + }); + + it('emits event loop delay metrics and resets histogram after collection', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + // min: (2_000_000 - 10_000_000) clamped to 0 → 0s + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.min', 0, { unit: 'second' }); + // max: (20_000_000 - 10_000_000) / 1e9 → 0.01s + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.max', 0.01, { unit: 'second' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.mean', 0, { unit: 'second' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.p50', expect.any(Number), { + unit: 'second', + }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.p90', expect.any(Number), { + unit: 'second', + }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.p99', expect.any(Number), { + unit: 'second', + }); + expect(mockHistogram.reset).toHaveBeenCalledOnce(); + }); + + it('emits event loop utilization metric', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.utilization', 0.3, { unit: '1' }); + }); + + it('emits uptime counter', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(countSpy).toHaveBeenCalledWith('node.runtime.process.uptime', expect.any(Number), { unit: 'second' }); + }); + + it('does not emit event loop delay metrics if monitorEventLoopDelay threw', () => { + mockMonitorEventLoopDelay.mockImplementationOnce(() => { + throw new Error('NotImplementedError'); + }); + + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).not.toHaveBeenCalledWith( + 'node.runtime.event_loop.delay.min', + expect.anything(), + expect.anything(), + ); + }); + }); + + describe('opt-out', () => { + it('skips CPU metrics when collect.cpu is false', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { cpu: false }, + }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.user', expect.anything(), expect.anything()); + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.system', expect.anything(), expect.anything()); + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.percent', expect.anything(), expect.anything()); + }); + + it('skips memory metrics when collect.memory is false', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { memory: false }, + }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.mem.rss', expect.anything(), expect.anything()); + }); + + it('skips event loop delay when collect.eventLoopDelay is false', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { eventLoopDelay: false }, + }); + integration.setup(); + + expect(mockMonitorEventLoopDelay).not.toHaveBeenCalled(); + vi.advanceTimersByTime(1_000); + expect(gaugeSpy).not.toHaveBeenCalledWith( + 'node.runtime.event_loop.delay.min', + expect.anything(), + expect.anything(), + ); + }); + + it('skips event loop utilization when collect.eventLoopUtilization is false', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { eventLoopUtilization: false }, + }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).not.toHaveBeenCalledWith( + 'node.runtime.event_loop.utilization', + expect.anything(), + expect.anything(), + ); + }); + + it('skips uptime when collect.uptime is false', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { uptime: false }, + }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(countSpy).not.toHaveBeenCalledWith('node.runtime.process.uptime', expect.anything(), expect.anything()); + }); + }); + + describe('serverless flush', () => { + it('collects metrics and calls flushIfServerless on beforeExit', async () => { + const { flushIfServerless } = await import('@sentry/core'); + const flushSpy = vi.mocked(flushIfServerless); + + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 60_000 }); + integration.setup(); + + // Interval has not fired yet — beforeExit should still trigger a collection + flush. + process.emit('beforeExit', 0); + + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.rss', expect.any(Number), { unit: 'byte' }); + expect(flushSpy).toHaveBeenCalled(); + }); + }); +}); diff --git a/packages/node/src/index.ts b/packages/node/src/index.ts index 8458dee5f6a7..67fe97e59300 100644 --- a/packages/node/src/index.ts +++ b/packages/node/src/index.ts @@ -188,6 +188,8 @@ export { spotlightIntegration, childProcessIntegration, processSessionIntegration, + nodeRuntimeMetricsIntegration, + type NodeRuntimeMetricsOptions, pinoIntegration, createSentryWinstonTransport, SentryContextManager,