Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import * as Sentry from '@sentry/node';
import { loggingTransport } from '@sentry-internal/node-integration-tests';

Sentry.init({
dsn: 'https://public@dsn.ingest.sentry.io/1337',
release: '1.0.0',
environment: 'test',
transport: loggingTransport,
integrations: [
Sentry.nodeRuntimeMetricsIntegration({
collectionIntervalMs: 100,
collect: {
cpu: false,
eventLoopDelay: false,
eventLoopUtilization: false,
uptime: false,
},
}),
],
});

async function run(): Promise<void> {
await new Promise<void>(resolve => setTimeout(resolve, 250));
await Sentry.flush();
}

// eslint-disable-next-line @typescript-eslint/no-floating-promises
run();
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import * as Sentry from '@sentry/node';
import { loggingTransport } from '@sentry-internal/node-integration-tests';

Sentry.init({
dsn: 'https://public@dsn.ingest.sentry.io/1337',
release: '1.0.0',
environment: 'test',
transport: loggingTransport,
integrations: [
Sentry.nodeRuntimeMetricsIntegration({
collectionIntervalMs: 100,
}),
],
});

async function run(): Promise<void> {
// Wait long enough for the collection interval to fire at least once.
await new Promise<void>(resolve => setTimeout(resolve, 250));
await Sentry.flush();
}

// eslint-disable-next-line @typescript-eslint/no-floating-promises
run();
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { afterAll, describe, expect, test } from 'vitest';
import { cleanupChildProcesses, createRunner } from '../../utils/runner';

describe('nodeRuntimeMetricsIntegration', () => {
afterAll(() => {
cleanupChildProcesses();
});

test('emits runtime metrics', async () => {
const runner = createRunner(__dirname, 'scenario.ts')
.expect({
trace_metric: {
items: expect.arrayContaining([
expect.objectContaining({ name: 'node.runtime.mem.rss', type: 'gauge', unit: 'byte' }),
expect.objectContaining({ name: 'node.runtime.mem.heap_total', type: 'gauge', unit: 'byte' }),
expect.objectContaining({ name: 'node.runtime.mem.heap_used', type: 'gauge', unit: 'byte' }),
expect.objectContaining({ name: 'node.runtime.mem.external', type: 'gauge', unit: 'byte' }),
expect.objectContaining({ name: 'node.runtime.cpu.user', type: 'gauge', unit: 'second' }),
expect.objectContaining({ name: 'node.runtime.cpu.system', type: 'gauge', unit: 'second' }),
expect.objectContaining({ name: 'node.runtime.cpu.percent', type: 'gauge', unit: '1' }),
expect.objectContaining({ name: 'node.runtime.event_loop.delay.min', type: 'gauge', unit: 'second' }),
expect.objectContaining({ name: 'node.runtime.event_loop.delay.max', type: 'gauge', unit: 'second' }),
expect.objectContaining({ name: 'node.runtime.event_loop.delay.mean', type: 'gauge', unit: 'second' }),
expect.objectContaining({ name: 'node.runtime.event_loop.delay.p50', type: 'gauge', unit: 'second' }),
expect.objectContaining({ name: 'node.runtime.event_loop.delay.p90', type: 'gauge', unit: 'second' }),
expect.objectContaining({ name: 'node.runtime.event_loop.delay.p99', type: 'gauge', unit: 'second' }),
expect.objectContaining({ name: 'node.runtime.event_loop.utilization', type: 'gauge', unit: '1' }),
expect.objectContaining({ name: 'node.runtime.process.uptime', type: 'counter', unit: 'second' }),
]),
},
})
.start();

await runner.completed();
});

test('respects opt-out options', async () => {
const runner = createRunner(__dirname, 'scenario-opt-out.ts')
.expect({
trace_metric: {
items: expect.arrayContaining([expect.objectContaining({ name: 'node.runtime.mem.rss', type: 'gauge' })]),
},
})
.start();

await runner.completed();
});
});
1 change: 1 addition & 0 deletions packages/node-core/src/common-exports.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import * as logger from './logs/exports';

// Node-core integrations (not OTel-dependent)
export { nodeContextIntegration } from './integrations/context';
export { nodeRuntimeMetricsIntegration, type NodeRuntimeMetricsOptions } from './integrations/nodeRuntimeMetrics';
export { contextLinesIntegration } from './integrations/contextlines';
export { localVariablesIntegration } from './integrations/local-variables';
export { modulesIntegration } from './integrations/modules';
Expand Down
150 changes: 150 additions & 0 deletions packages/node-core/src/integrations/nodeRuntimeMetrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import { monitorEventLoopDelay, performance } from 'perf_hooks';
import { defineIntegration, flushIfServerless, metrics } from '@sentry/core';

const INTEGRATION_NAME = 'NodeRuntimeMetrics';
const DEFAULT_INTERVAL_MS = 30_000;

export interface NodeRuntimeMetricsOptions {
/**
* Which metric groups to collect. All groups are enabled by default.
*/
collect?: {
cpu?: boolean;
memory?: boolean;
eventLoopDelay?: boolean;
eventLoopUtilization?: boolean;
uptime?: boolean;
};
/**
* How often to collect metrics, in milliseconds.
* @default 30000
*/
collectionIntervalMs?: number;
}

/**
* Automatically collects Node.js runtime metrics and emits them to Sentry.
*
* @example
* ```ts
* Sentry.init({
* integrations: [
* Sentry.nodeRuntimeMetricsIntegration(),
* ],
* });
* ```
*/
export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRuntimeMetricsOptions = {}) => {
const collectionIntervalMs = options.collectionIntervalMs ?? DEFAULT_INTERVAL_MS;
const collect = {
cpu: true,
memory: true,
eventLoopDelay: true,
eventLoopUtilization: true,
uptime: true,
...options.collect,
};

let intervalId: ReturnType<typeof setInterval> | undefined;
let prevCpuUsage: NodeJS.CpuUsage | undefined;
let prevElu: ReturnType<typeof performance.eventLoopUtilization> | undefined;
let prevFlushTime: number | undefined;
let eventLoopDelayHistogram: ReturnType<typeof monitorEventLoopDelay> | undefined;

function collectMetrics(): void {
const now = Date.now();

Check failure on line 55 in packages/node-core/src/integrations/nodeRuntimeMetrics.ts

View workflow job for this annotation

GitHub Actions / Lint

sdk(no-unsafe-random-apis)

`Date.now()` should be replaced with `safeDateNow()` from `@sentry/core` to ensure safe time value generation. You can disable this rule with an eslint-disable comment if this usage is intentional.
const elapsed = now - (prevFlushTime ?? now);

if (collect.cpu && prevCpuUsage !== undefined) {
const delta = process.cpuUsage(prevCpuUsage);
metrics.gauge('node.runtime.cpu.user', delta.user / 1e6, { unit: 'second' });
metrics.gauge('node.runtime.cpu.system', delta.system / 1e6, { unit: 'second' });
if (elapsed > 0) {
// Ratio of CPU time to wall-clock time. Can exceed 1.0 on multi-core systems.
// TODO: In cluster mode, add a runtime_id/process_id attribute to disambiguate per-worker metrics.
metrics.gauge('node.runtime.cpu.percent', (delta.user + delta.system) / (elapsed * 1000), { unit: '1' });
}
prevCpuUsage = process.cpuUsage();
}

if (collect.memory) {
const mem = process.memoryUsage();
metrics.gauge('node.runtime.mem.rss', mem.rss, { unit: 'byte' });
metrics.gauge('node.runtime.mem.heap_total', mem.heapTotal, { unit: 'byte' });
metrics.gauge('node.runtime.mem.heap_used', mem.heapUsed, { unit: 'byte' });
metrics.gauge('node.runtime.mem.external', mem.external, { unit: 'byte' });
if (mem.arrayBuffers !== undefined) {
metrics.gauge('node.runtime.mem.array_buffers', mem.arrayBuffers, { unit: 'byte' });
}
}

if (collect.eventLoopDelay && eventLoopDelayHistogram) {
// Resolution is 10ms (10_000_000 ns) as configured below. Subtract it to normalize out sampling overhead.
const resolutionNs = 10_000_000;
const nsToS = (ns: number): number => Math.max(0, (ns - resolutionNs) / 1e9);

metrics.gauge('node.runtime.event_loop.delay.min', nsToS(eventLoopDelayHistogram.min), { unit: 'second' });
metrics.gauge('node.runtime.event_loop.delay.max', nsToS(eventLoopDelayHistogram.max), { unit: 'second' });
metrics.gauge('node.runtime.event_loop.delay.mean', nsToS(eventLoopDelayHistogram.mean), { unit: 'second' });
metrics.gauge('node.runtime.event_loop.delay.p50', nsToS(eventLoopDelayHistogram.percentile(50)), {
unit: 'second',
});
metrics.gauge('node.runtime.event_loop.delay.p90', nsToS(eventLoopDelayHistogram.percentile(90)), {
unit: 'second',
});
metrics.gauge('node.runtime.event_loop.delay.p99', nsToS(eventLoopDelayHistogram.percentile(99)), {
unit: 'second',
});

eventLoopDelayHistogram.reset();
}

if (collect.eventLoopUtilization && prevElu !== undefined) {
const currentElu = performance.eventLoopUtilization();
const delta = performance.eventLoopUtilization(currentElu, prevElu);
metrics.gauge('node.runtime.event_loop.utilization', delta.utilization, { unit: '1' });
prevElu = currentElu;
}

if (collect.uptime && elapsed > 0) {
metrics.count('node.runtime.process.uptime', elapsed / 1000, { unit: 'second' });
}

prevFlushTime = now;
}

return {
name: INTEGRATION_NAME,

setup(): void {
if (collect.eventLoopDelay) {
try {
eventLoopDelayHistogram = monitorEventLoopDelay({ resolution: 10 });
eventLoopDelayHistogram.enable();
} catch {
// Not available in all runtimes (e.g. Bun throws NotImplementedError).
}
}

// Prime baselines before the first collection interval.
if (collect.cpu) {
prevCpuUsage = process.cpuUsage();
}
if (collect.eventLoopUtilization) {
prevElu = performance.eventLoopUtilization();
}
prevFlushTime = Date.now();

Check failure on line 136 in packages/node-core/src/integrations/nodeRuntimeMetrics.ts

View workflow job for this annotation

GitHub Actions / Lint

sdk(no-unsafe-random-apis)

`Date.now()` should be replaced with `safeDateNow()` from `@sentry/core` to ensure safe time value generation. You can disable this rule with an eslint-disable comment if this usage is intentional.

intervalId = setInterval(collectMetrics, collectionIntervalMs);
// Do not keep the process alive solely for metric collection.
intervalId.unref();

// In serverless environments the process may not live long enough to hit the interval.
// Collect and flush eagerly whenever the event loop drains (end of invocation).
process.on('beforeExit', () => {
collectMetrics();
void flushIfServerless();
});
},
};
});
Loading
Loading