diff --git a/.size-limit.js b/.size-limit.js index 207fd6a2b85b..3991872c617b 100644 --- a/.size-limit.js +++ b/.size-limit.js @@ -38,7 +38,7 @@ module.exports = [ path: 'packages/browser/build/npm/esm/prod/index.js', import: createImport('init', 'browserTracingIntegration'), gzip: true, - limit: '42 KB', + limit: '43 KB', }, { name: '@sentry/browser (incl. Tracing, Profiling)', diff --git a/dev-packages/browser-integration-tests/suites/tracing/request/fetch-data-url/init.js b/dev-packages/browser-integration-tests/suites/tracing/request/fetch-data-url/init.js new file mode 100644 index 000000000000..5ab240338c8c --- /dev/null +++ b/dev-packages/browser-integration-tests/suites/tracing/request/fetch-data-url/init.js @@ -0,0 +1,10 @@ +import * as Sentry from '@sentry/browser'; + +window.Sentry = Sentry; + +Sentry.init({ + dsn: 'https://public@dsn.ingest.sentry.io/1337', + integrations: [Sentry.browserTracingIntegration()], + tracesSampleRate: 1, + autoSessionTracking: false, +}); diff --git a/dev-packages/browser-integration-tests/suites/tracing/request/fetch-data-url/subject.js b/dev-packages/browser-integration-tests/suites/tracing/request/fetch-data-url/subject.js new file mode 100644 index 000000000000..63d2d14fbd43 --- /dev/null +++ b/dev-packages/browser-integration-tests/suites/tracing/request/fetch-data-url/subject.js @@ -0,0 +1,6 @@ +// Fetch a data URL to verify that the span name and attributes are sanitized +// Data URLs are used for inline resources, e.g., Web Workers with inline scripts +const dataUrl = 'data:text/plain;base64,SGVsbG8gV29ybGQh'; +fetch(dataUrl).catch(() => { + // Data URL fetch might fail in some browsers, but the span should still be created +}); diff --git a/dev-packages/browser-integration-tests/suites/tracing/request/fetch-data-url/test.ts b/dev-packages/browser-integration-tests/suites/tracing/request/fetch-data-url/test.ts new file mode 100644 index 000000000000..46995dd6c152 --- /dev/null +++ b/dev-packages/browser-integration-tests/suites/tracing/request/fetch-data-url/test.ts @@ -0,0 +1,35 @@ +import { expect } from '@playwright/test'; +import { sentryTest } from '../../../../utils/fixtures'; +import { + envelopeRequestParser, + shouldSkipTracingTest, + waitForTransactionRequestOnUrl, +} from '../../../../utils/helpers'; + +sentryTest('sanitizes data URLs in fetch span name and attributes', async ({ getLocalTestUrl, page }) => { + if (shouldSkipTracingTest()) { + sentryTest.skip(); + } + + const url = await getLocalTestUrl({ testDir: __dirname }); + + const req = await waitForTransactionRequestOnUrl(page, url); + const transactionEvent = envelopeRequestParser(req); + + const requestSpans = transactionEvent.spans?.filter(({ op }) => op === 'http.client'); + + expect(requestSpans).toHaveLength(1); + + const span = requestSpans?.[0]; + + const sanitizedUrl = 'data:text/plain,base64,SGVsbG8gV2... [truncated]'; + expect(span?.description).toBe(`GET ${sanitizedUrl}`); + + expect(span?.data).toMatchObject({ + 'http.method': 'GET', + url: sanitizedUrl, + type: 'fetch', + }); + + expect(span?.data?.['http.url']).toBe(sanitizedUrl); +}); diff --git a/dev-packages/browser-integration-tests/suites/tracing/request/xhr-data-url/init.js b/dev-packages/browser-integration-tests/suites/tracing/request/xhr-data-url/init.js new file mode 100644 index 000000000000..5ab240338c8c --- /dev/null +++ b/dev-packages/browser-integration-tests/suites/tracing/request/xhr-data-url/init.js @@ -0,0 +1,10 @@ +import * as Sentry from '@sentry/browser'; + +window.Sentry = Sentry; + +Sentry.init({ + dsn: 'https://public@dsn.ingest.sentry.io/1337', + integrations: [Sentry.browserTracingIntegration()], + tracesSampleRate: 1, + autoSessionTracking: false, +}); diff --git a/dev-packages/browser-integration-tests/suites/tracing/request/xhr-data-url/subject.js b/dev-packages/browser-integration-tests/suites/tracing/request/xhr-data-url/subject.js new file mode 100644 index 000000000000..76656f862519 --- /dev/null +++ b/dev-packages/browser-integration-tests/suites/tracing/request/xhr-data-url/subject.js @@ -0,0 +1,5 @@ +// XHR request to a data URL to verify that the span name and attributes are sanitized +const dataUrl = 'data:text/plain;base64,SGVsbG8gV29ybGQh'; +const xhr = new XMLHttpRequest(); +xhr.open('GET', dataUrl); +xhr.send(); diff --git a/dev-packages/browser-integration-tests/suites/tracing/request/xhr-data-url/test.ts b/dev-packages/browser-integration-tests/suites/tracing/request/xhr-data-url/test.ts new file mode 100644 index 000000000000..88bce31e1753 --- /dev/null +++ b/dev-packages/browser-integration-tests/suites/tracing/request/xhr-data-url/test.ts @@ -0,0 +1,30 @@ +import { expect } from '@playwright/test'; +import type { Event } from '@sentry/core'; +import { sentryTest } from '../../../../utils/fixtures'; +import { getFirstSentryEnvelopeRequest, shouldSkipTracingTest } from '../../../../utils/helpers'; + +sentryTest('sanitizes data URLs in XHR span name and attributes', async ({ getLocalTestUrl, page }) => { + if (shouldSkipTracingTest()) { + sentryTest.skip(); + } + + const url = await getLocalTestUrl({ testDir: __dirname }); + + const eventData = await getFirstSentryEnvelopeRequest(page, url); + const requestSpans = eventData.spans?.filter(({ op }) => op === 'http.client'); + + expect(requestSpans).toHaveLength(1); + + const span = requestSpans?.[0]; + + const sanitizedUrl = 'data:text/plain,base64,SGVsbG8gV2... [truncated]'; + expect(span?.description).toBe(`GET ${sanitizedUrl}`); + + expect(span?.data).toMatchObject({ + 'http.method': 'GET', + url: sanitizedUrl, + type: 'xhr', + }); + + expect(span?.data?.['http.url']).toBe(sanitizedUrl); +}); diff --git a/packages/browser/src/integrations/globalhandlers.ts b/packages/browser/src/integrations/globalhandlers.ts index 6bada802b98e..c8cd806d0062 100644 --- a/packages/browser/src/integrations/globalhandlers.ts +++ b/packages/browser/src/integrations/globalhandlers.ts @@ -9,6 +9,7 @@ import { getLocationHref, isPrimitive, isString, + stripDataUrlContent, UNKNOWN_FUNCTION, } from '@sentry/core'; import type { BrowserClient } from '../client'; @@ -208,14 +209,13 @@ function getFilenameFromUrl(url: string | undefined): string | undefined { return undefined; } - // stack frame urls can be data urls, for example when initializing a Worker with a base64 encoded script - // in this case we just show the data prefix and mime type to avoid too long raw data urls + // Strip data URL content to avoid long base64 strings in stack frames + // (e.g. when initializing a Worker with a base64 encoded script) + // Don't include data prefix for filenames as it's not useful for stack traces + // Wrap with < > to indicate it's a placeholder if (url.startsWith('data:')) { - const match = url.match(/^data:([^;]+)/); - const mimeType = match ? match[1] : 'text/javascript'; - const isBase64 = url.includes('base64,'); - return ``; + return `<${stripDataUrlContent(url, false)}>`; } - return url; // it's fine to not truncate it as it's not put in a regex (https://codeql.github.com/codeql-query-help/javascript/js-polynomial-redos) + return url; } diff --git a/packages/browser/src/tracing/request.ts b/packages/browser/src/tracing/request.ts index 025b08b12168..0c0e30629436 100644 --- a/packages/browser/src/tracing/request.ts +++ b/packages/browser/src/tracing/request.ts @@ -23,6 +23,7 @@ import { spanToJSON, startInactiveSpan, stringMatchesSomePattern, + stripDataUrlContent, stripUrlQueryAndFragment, } from '@sentry/core'; import type { XhrHint } from '@sentry-internal/browser-utils'; @@ -199,7 +200,7 @@ export function instrumentOutgoingRequests(client: Client, _options?: Partial @@ -317,9 +322,22 @@ function getSpanStartOptions( method: string, spanOrigin: SpanOrigin, ): Parameters[0] { + // Data URLs need special handling because parseStringToURLObject treats them as "relative" + // (no "://"), causing getSanitizedUrlStringFromUrlObject to return just the pathname + // without the "data:" prefix, making later stripDataUrlContent calls ineffective. + // So for data URLs, we strip the content first and use that directly. + if (url.startsWith('data:')) { + const sanitizedUrl = stripDataUrlContent(url); + return { + name: `${method} ${sanitizedUrl}`, + attributes: getFetchSpanAttributes(url, undefined, method, spanOrigin), + }; + } + const parsedUrl = parseStringToURLObject(url); + const sanitizedUrl = parsedUrl ? getSanitizedUrlStringFromUrlObject(parsedUrl) : url; return { - name: parsedUrl ? `${method} ${getSanitizedUrlStringFromUrlObject(parsedUrl)}` : method, + name: `${method} ${sanitizedUrl}`, attributes: getFetchSpanAttributes(url, parsedUrl, method, spanOrigin), }; } @@ -331,7 +349,7 @@ function getFetchSpanAttributes( spanOrigin: SpanOrigin, ): SpanAttributes { const attributes: SpanAttributes = { - url, + url: stripDataUrlContent(url), type: 'fetch', 'http.method': method, [SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: spanOrigin, @@ -339,7 +357,7 @@ function getFetchSpanAttributes( }; if (parsedUrl) { if (!isURLObjectRelative(parsedUrl)) { - attributes['http.url'] = parsedUrl.href; + attributes['http.url'] = stripDataUrlContent(parsedUrl.href); attributes['server.address'] = parsedUrl.host; } if (parsedUrl.search) { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 0fdd328a42d2..19a83d230155 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -313,6 +313,7 @@ export { getHttpSpanDetailsFromUrlObject, isURLObjectRelative, getSanitizedUrlStringFromUrlObject, + stripDataUrlContent, } from './utils/url'; export { eventFromMessage, diff --git a/packages/core/src/utils/url.ts b/packages/core/src/utils/url.ts index ca09e6e8b5e7..bf0c17dbc278 100644 --- a/packages/core/src/utils/url.ts +++ b/packages/core/src/utils/url.ts @@ -263,3 +263,39 @@ export function getSanitizedUrlString(url: PartialURL): string { return `${protocol ? `${protocol}://` : ''}${filteredHost}${path}`; } + +/** + * Strips the content from a data URL, returning a placeholder with the MIME type. + * + * Data URLs can be very long (e.g. base64 encoded scripts for Web Workers), + * with little valuable information, often leading to envelopes getting dropped due + * to size limit violations. Therefore, we strip data URLs and replace them with a + * placeholder. + * + * @param url - The URL to process + * @param includeDataPrefix - If true, includes the first 10 characters of the data stream + * for debugging (e.g., to identify magic bytes like WASM's AGFzbQ). + * Defaults to true. + * @returns For data URLs, returns a short format like `data:text/javascript;base64,SGVsbG8gV2... [truncated]`. + * For non-data URLs, returns the original URL unchanged. + */ +export function stripDataUrlContent(url: string, includeDataPrefix: boolean = true): string { + if (url.startsWith('data:')) { + // Match the MIME type (everything after 'data:' until the first ';' or ',') + const match = url.match(/^data:([^;,]+)/); + const mimeType = match ? match[1] : 'text/plain'; + const isBase64 = url.includes(';base64,'); + + // Find where the actual data starts (after the comma) + const dataStart = url.indexOf(','); + let dataPrefix = ''; + if (includeDataPrefix && dataStart !== -1) { + const data = url.slice(dataStart + 1); + // Include first 10 chars of data to help identify content (e.g., magic bytes) + dataPrefix = data.length > 10 ? `${data.slice(0, 10)}... [truncated]` : data; + } + + return `data:${mimeType}${isBase64 ? ',base64' : ''}${dataPrefix ? `,${dataPrefix}` : ''}`; + } + return url; +} diff --git a/packages/core/test/lib/utils/url.test.ts b/packages/core/test/lib/utils/url.test.ts index 33364d66daa5..7bdfcfd63804 100644 --- a/packages/core/test/lib/utils/url.test.ts +++ b/packages/core/test/lib/utils/url.test.ts @@ -6,6 +6,7 @@ import { isURLObjectRelative, parseStringToURLObject, parseUrl, + stripDataUrlContent, stripUrlQueryAndFragment, } from '../../../src/utils/url'; @@ -638,3 +639,119 @@ describe('getHttpSpanDetailsFromUrlObject', () => { }); }); }); + +describe('stripDataUrlContent', () => { + it('returns regular URLs unchanged', () => { + expect(stripDataUrlContent('https://example.com/api')).toBe('https://example.com/api'); + expect(stripDataUrlContent('http://localhost:3000/test')).toBe('http://localhost:3000/test'); + expect(stripDataUrlContent('/relative/path')).toBe('/relative/path'); + }); + + it('should be applied BEFORE parseStringToURLObject for data URLs', () => { + // This test documents an important behavior: + // Data URLs are treated as "relative" by parseStringToURLObject because they don't contain "://". + // This means getSanitizedUrlStringFromUrlObject returns just the pathname (without "data:" prefix), + // and stripDataUrlContent won't match since it checks url.startsWith('data:'). + // Therefore, stripDataUrlContent MUST be applied to the original URL before parsing. + const dataUrl = 'data:text/javascript;base64,SGVsbG8gV29ybGQ='; + + // Verify data URLs are treated as relative + const parsedUrl = parseStringToURLObject(dataUrl); + expect(parsedUrl).toBeDefined(); + expect(isURLObjectRelative(parsedUrl!)).toBe(true); + + // getSanitizedUrlStringFromUrlObject returns just the pathname for relative URLs + const sanitizedWithoutStripping = getSanitizedUrlStringFromUrlObject(parsedUrl!); + // The pathname doesn't start with 'data:', so stripDataUrlContent wouldn't work on it + expect(sanitizedWithoutStripping.startsWith('data:')).toBe(false); + // Applying stripDataUrlContent AFTER parsing is ineffective + expect(stripDataUrlContent(sanitizedWithoutStripping)).toBe(sanitizedWithoutStripping); + + // CORRECT approach: strip data URL content FIRST, before any URL parsing + const strippedUrl = stripDataUrlContent(dataUrl); + // Default behavior includes first 10 chars of data for debugging (e.g., magic bytes) + expect(strippedUrl).toBe('data:text/javascript,base64,SGVsbG8gV2... [truncated]'); + // The stripped URL is already sanitized and can be used directly as the span name + }); + + describe('with includeDataPrefix=true (default)', () => { + it('includes first 10 chars of data for base64 data URLs', () => { + // SGVsbG8gV29ybGQ= is "Hello World" in base64 + expect(stripDataUrlContent('data:text/javascript;base64,SGVsbG8gV29ybGQ=')).toBe( + 'data:text/javascript,base64,SGVsbG8gV2... [truncated]', + ); + expect(stripDataUrlContent('data:application/json;base64,eyJrZXkiOiJ2YWx1ZSJ9')).toBe( + 'data:application/json,base64,eyJrZXkiOi... [truncated]', + ); + }); + + it('includes first 10 chars of data for non-base64 data URLs', () => { + expect(stripDataUrlContent('data:text/plain,Hello%20World')).toBe('data:text/plain,Hello%20Wo... [truncated]'); + expect(stripDataUrlContent('data:text/html,

Hello

')).toBe('data:text/html,

Hello<... [truncated]'); + }); + + it('includes all data if less than 10 chars', () => { + expect(stripDataUrlContent('data:text/plain,Hi')).toBe('data:text/plain,Hi'); + expect(stripDataUrlContent('data:text/plain;base64,SGk=')).toBe('data:text/plain,base64,SGk='); + }); + + it('helps identify WASM by magic bytes (AGFzbQ)', () => { + // WASM magic bytes: \0asm -> base64: AGFzbQ + const wasmDataUrl = 'data:application/wasm;base64,AGFzbQEAAAA='; + expect(stripDataUrlContent(wasmDataUrl)).toBe('data:application/wasm,base64,AGFzbQEAAA... [truncated]'); + }); + + it('handles various MIME types', () => { + expect(stripDataUrlContent('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA')).toBe( + 'data:image/png,base64,iVBORw0KGg... [truncated]', + ); + expect(stripDataUrlContent('data:image/svg+xml;base64,PHN2Zz4=')).toBe('data:image/svg+xml,base64,PHN2Zz4='); + }); + + it('defaults to text/plain for data URLs without MIME type', () => { + expect(stripDataUrlContent('data:,Hello')).toBe('data:text/plain,Hello'); + expect(stripDataUrlContent('data:;base64,SGVsbG8=')).toBe('data:text/plain,base64,SGVsbG8='); + }); + + it('handles empty data URLs', () => { + expect(stripDataUrlContent('data:')).toBe('data:text/plain'); + }); + + it('handles very long base64 encoded data URLs', () => { + const longBase64 = 'A'.repeat(10000); + expect(stripDataUrlContent(`data:text/javascript;base64,${longBase64}`)).toBe( + 'data:text/javascript,base64,AAAAAAAAAA... [truncated]', + ); + }); + }); + + describe('with includeDataPrefix=false', () => { + it('strips all content from base64 data URLs', () => { + expect(stripDataUrlContent('data:text/javascript;base64,SGVsbG8gV29ybGQ=', false)).toBe( + 'data:text/javascript,base64', + ); + expect(stripDataUrlContent('data:application/json;base64,eyJrZXkiOiJ2YWx1ZSJ9', false)).toBe( + 'data:application/json,base64', + ); + }); + + it('strips all content from non-base64 data URLs', () => { + expect(stripDataUrlContent('data:text/plain,Hello%20World', false)).toBe('data:text/plain'); + expect(stripDataUrlContent('data:text/html,

Hello

', false)).toBe('data:text/html'); + }); + + it('handles various MIME types', () => { + expect(stripDataUrlContent('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA', false)).toBe( + 'data:image/png,base64', + ); + expect(stripDataUrlContent('data:application/wasm;base64,AGFzbQEAAAA=', false)).toBe( + 'data:application/wasm,base64', + ); + }); + + it('defaults to text/plain for data URLs without MIME type', () => { + expect(stripDataUrlContent('data:,Hello', false)).toBe('data:text/plain'); + expect(stripDataUrlContent('data:;base64,SGVsbG8=', false)).toBe('data:text/plain,base64'); + }); + }); +}); diff --git a/packages/node/src/integrations/http.ts b/packages/node/src/integrations/http.ts index e6c48a6bd550..7c2cadf9eb43 100644 --- a/packages/node/src/integrations/http.ts +++ b/packages/node/src/integrations/http.ts @@ -3,7 +3,13 @@ import { diag } from '@opentelemetry/api'; import type { HttpInstrumentationConfig } from '@opentelemetry/instrumentation-http'; import { HttpInstrumentation } from '@opentelemetry/instrumentation-http'; import type { Span } from '@sentry/core'; -import { defineIntegration, getClient, hasSpansEnabled } from '@sentry/core'; +import { + defineIntegration, + getClient, + hasSpansEnabled, + SEMANTIC_ATTRIBUTE_URL_FULL, + stripDataUrlContent, +} from '@sentry/core'; import type { HTTPModuleRequestIncomingMessage, NodeClient, SentryHttpInstrumentationOptions } from '@sentry/node-core'; import { addOriginToSpan, @@ -282,6 +288,15 @@ function getConfigWithDefaults(options: Partial = {}): HttpInstrume requestHook: (span, req) => { addOriginToSpan(span, 'auto.http.otel.http'); + // Sanitize data URLs to prevent long base64 strings in span attributes + const url = getRequestUrl(req as ClientRequest); + if (url.startsWith('data:')) { + const sanitizedUrl = stripDataUrlContent(url); + span.setAttribute('http.url', sanitizedUrl); + span.setAttribute(SEMANTIC_ATTRIBUTE_URL_FULL, sanitizedUrl); + span.updateName(`${(req as ClientRequest).method || 'GET'} ${sanitizedUrl}`); + } + options.instrumentation?.requestHook?.(span, req); }, responseHook: (span, res) => { diff --git a/packages/node/src/integrations/node-fetch.ts b/packages/node/src/integrations/node-fetch.ts index 6da9fd628bac..b07c1cce8628 100644 --- a/packages/node/src/integrations/node-fetch.ts +++ b/packages/node/src/integrations/node-fetch.ts @@ -1,7 +1,15 @@ import type { UndiciInstrumentationConfig } from '@opentelemetry/instrumentation-undici'; import { UndiciInstrumentation } from '@opentelemetry/instrumentation-undici'; import type { IntegrationFn } from '@sentry/core'; -import { defineIntegration, getClient, hasSpansEnabled, SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN } from '@sentry/core'; +import { + defineIntegration, + getClient, + hasSpansEnabled, + SEMANTIC_ATTRIBUTE_SENTRY_CUSTOM_SPAN_NAME, + SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN, + SEMANTIC_ATTRIBUTE_URL_FULL, + stripDataUrlContent, +} from '@sentry/core'; import type { NodeClient } from '@sentry/node-core'; import { generateInstrumentOnce, SentryNodeFetchInstrumentation } from '@sentry/node-core'; import type { NodeClientOptions } from '../types'; @@ -101,7 +109,20 @@ function getConfigWithDefaults(options: Partial = {}): UndiciI return !!shouldIgnore; }, - startSpanHook: () => { + startSpanHook: request => { + const url = getAbsoluteUrl(request.origin, request.path); + + // Sanitize data URLs to prevent long base64 strings in span attributes + if (url.startsWith('data:')) { + const sanitizedUrl = stripDataUrlContent(url); + return { + [SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.http.otel.node_fetch', + 'http.url': sanitizedUrl, + [SEMANTIC_ATTRIBUTE_URL_FULL]: sanitizedUrl, + [SEMANTIC_ATTRIBUTE_SENTRY_CUSTOM_SPAN_NAME]: `${request.method || 'GET'} ${sanitizedUrl}`, + }; + } + return { [SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.http.otel.node_fetch', };