From 3dbdef8c0c5a255e0c18dd79ae2617ea97454b30 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Tue, 23 Dec 2025 15:06:47 +0100 Subject: [PATCH 1/6] urllib: add support for capturing request and response headers --- .../instrumentation/urllib/__init__.py | 144 +++++++ .../tests/test_urllib_integration.py | 356 +++++++++++++++++- 2 files changed, 498 insertions(+), 2 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py index a368ed7a97..78f9a1c158 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py @@ -77,6 +77,99 @@ def response_hook(span: Span, request: Request, response: HTTPResponse): will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``. +Capture HTTP request and response headers +***************************************** +You can configure the agent to capture specified HTTP headers as span attributes, according to the +`semantic conventions `_. + +Request headers +*************** +To capture HTTP request headers as span attributes, set the environment variable +``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST`` to a comma delimited list of HTTP header names. + +For example using the environment variable, +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST="content-type,custom_request_header" + +will extract ``content-type`` and ``custom_request_header`` from the request headers and add them as span attributes. + +Request header names in aiohttp are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment +variable will capture the header named ``custom-header``. + +Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example: +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST="Accept.*,X-.*" + +Would match all request headers that start with ``Accept`` and ``X-``. + +To capture all request headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST`` to ``".*"``. +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST=".*" + +The name of the added span attribute will follow the format ``http.request.header.`` where ```` +is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a +single item list containing all the header values. + +For example: +``http.request.header.custom_request_header = ["", ""]`` + +Response headers +**************** +To capture HTTP response headers as span attributes, set the environment variable +``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE`` to a comma delimited list of HTTP header names. + +For example using the environment variable, +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE="content-type,custom_response_header" + +will extract ``content-type`` and ``custom_response_header`` from the response headers and add them as span attributes. + +Response header names in aiohttp are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment +variable will capture the header named ``custom-header``. + +Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example: +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE="Content.*,X-.*" + +Would match all response headers that start with ``Content`` and ``X-``. + +To capture all response headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE`` to ``".*"``. +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE=".*" + +The name of the added span attribute will follow the format ``http.response.header.`` where ```` +is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a +list containing the header values. + +For example: +``http.response.header.custom_response_header = ["", ""]`` + +Sanitizing headers +****************** +In order to prevent storing sensitive data such as personally identifiable information (PII), session keys, passwords, +etc, set the environment variable ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS`` +to a comma delimited list of HTTP header names to be sanitized. + +Regexes may be used, and all header names will be matched in a case-insensitive manner. + +For example using the environment variable, +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS=".*session.*,set-cookie" + +will replace the value of headers such as ``session-id`` and ``set-cookie`` with ``[REDACTED]`` in the span. + +Note: + The environment variable names used to capture HTTP headers are still experimental, and thus are subject to change. + + + API --- """ @@ -135,8 +228,15 @@ def response_hook(span: Span, request: Request, response: HTTPResponse): ) from opentelemetry.trace import Span, SpanKind, Tracer, get_tracer from opentelemetry.util.http import ( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST, + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE, + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS, ExcludeList, + get_custom_header_attributes, + get_custom_headers, get_excluded_urls, + normalise_request_header_name, + normalise_response_header_name, parse_excluded_urls, redact_url, sanitize_method, @@ -169,6 +269,9 @@ def _instrument(self, **kwargs: Any): ``response_hook``: An optional callback which is invoked right before the span is finished processing a response ``excluded_urls``: A string containing a comma-delimited list of regexes used to exclude URLs from tracking + ``captured_request_headers``: A comma-separated list of regexes to match against request headers to capture + ``captured_response_headers``: A comma-separated list of regexes to match against response headers to capture + ``sensitive_headers``: A comma-separated list of regexes to match against captured headers to be sanitized """ # initialize semantic conventions opt-in if needed _OpenTelemetrySemanticConventionStability._initialize() @@ -205,6 +308,24 @@ def _instrument(self, **kwargs: Any): else parse_excluded_urls(excluded_urls) ), sem_conv_opt_in_mode=sem_conv_opt_in_mode, + captured_request_headers=kwargs.get( + "captured_request_headers", + get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST + ), + ), + captured_response_headers=kwargs.get( + "captured_response_headers", + get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE + ), + ), + sensitive_headers=kwargs.get( + "sensitive_headers", + get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS + ), + ), ) def _uninstrument(self, **kwargs: Any): @@ -223,6 +344,9 @@ def _instrument( response_hook: _ResponseHookT = None, excluded_urls: ExcludeList | None = None, sem_conv_opt_in_mode: _StabilityMode = _StabilityMode.DEFAULT, + captured_request_headers: list[str] | None = None, + captured_response_headers: list[str] | None = None, + sensitive_headers: list[str] | None = None, ): """Enables tracing of all requests calls that go through :code:`urllib.Client._make_request`""" @@ -275,6 +399,16 @@ def _instrumented_open_call( ) _set_http_url(labels, url, sem_conv_opt_in_mode) + labels.update( + get_custom_header_attributes( + # TODO: safe with multiple entries for the same header? + dict(request.header_items()), + captured_request_headers, + sensitive_headers, + normalise_request_header_name, + ) + ) + with tracer.start_as_current_span( span_name, kind=SpanKind.CLIENT, attributes=labels ) as span: @@ -310,6 +444,16 @@ def _instrumented_open_call( labels, f"{ver_[:1]}.{ver_[:-1]}", sem_conv_opt_in_mode ) + if span.is_recording(): + response_headers_to_set = get_custom_header_attributes( + result.headers, + captured_response_headers, + sensitive_headers, + normalise_response_header_name, + ) + for header, value in response_headers_to_set.items(): + span.set_attribute(header, value) + if exception is not None and _report_new(sem_conv_opt_in_mode): span.set_attribute(ERROR_TYPE, type(exception).__qualname__) labels[ERROR_TYPE] = type(exception).__qualname__ diff --git a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py index c9f45e141e..f3e7c1bde5 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py @@ -15,6 +15,7 @@ import abc import socket import urllib +from typing import Sequence from unittest import mock from unittest.mock import patch from urllib import request @@ -139,7 +140,11 @@ def assert_span(self, exporter=None, num_spans=1): @staticmethod @abc.abstractmethod - def perform_request(url: str, opener: OpenerDirector = None): + def perform_request( + url: str, + opener: OpenerDirector = None, + headers: Sequence[tuple[str, str]] | None = None, + ): pass def test_basic(self): @@ -553,12 +558,359 @@ def test_no_op_tracer_provider(self): self.assertEqual(result.read(), b"Hello!") self.assert_span(num_spans=0) + def test_custom_response_headers_captured(self): + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument( + captured_response_headers=["X-Custom-Header", "X-Another-Header"] + ) + + response_headers = { + "X-Custom-Header": "custom-value", + "X-Another-Header": "another-value", + } + url = "http://mock//capture_headers" + httpretty.register_uri( + httpretty.GET, url, body="Hello!", adding_headers=response_headers + ) + self.perform_request(url) + + span = self.assert_span(num_spans=1) + self.assertEqual( + span.attributes["http.response.header.x_custom_header"], + ("custom-value",), + ) + self.assertEqual( + span.attributes["http.response.header.x_another_header"], + ("another-value",), + ) + self.assertNotIn( + "http.response.header.x_excluded_header", span.attributes + ) + + def test_custom_headers_not_captured_when_not_configured(self): + """Test that headers are not captured when env vars are not set.""" + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument() + + self.perform_request( + self.URL, + headers=[("X-Request-Header", "request-value")], + ) + + span = self.assert_span(num_spans=1) + self.assertNotIn( + "http.request.header.x_request_header", span.attributes + ) + self.assertNotIn( + "http.response.header.x_response_header", span.attributes + ) + + def test_sensitive_headers_sanitized(self): + """Test that sensitive header values are redacted.""" + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument( + captured_request_headers=["Authorization", "X-Api-Key"], + captured_response_headers=["Set-Cookie", "X-Secret"], + sensitive_headers=[ + "Authorization", + "X-Api-Key", + "Set-Cookie", + "X-Secret", + ], + ) + + response_headers = { + "Set-Cookie": "session=abc123", + "X-Secret": "secret", + } + url = "http://mock//capture_headers" + httpretty.register_uri( + httpretty.GET, url, body="Hello!", adding_headers=response_headers + ) + self.perform_request( + url, + headers=[ + ("Authorization", "Bearer secret-token"), + ("X-Api-Key", "secret-key"), + ], + ) + + span = self.assert_span(num_spans=1) + self.assertEqual( + span.attributes["http.request.header.authorization"], + ("[REDACTED]",), + ) + self.assertEqual( + span.attributes["http.request.header.x_api_key"], + ("[REDACTED]",), + ) + self.assertEqual( + span.attributes["http.response.header.set_cookie"], + ("[REDACTED]",), + ) + self.assertEqual( + span.attributes["http.response.header.x_secret"], + ("[REDACTED]",), + ) + + def test_custom_headers_with_regex(self): + """Test that header capture works with regex patterns.""" + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument( + captured_request_headers=["X-Custom-Request-.*"], + captured_response_headers=["X-Custom-Response-.*"], + ) + + response_headers = { + "X-Custom-Response-A": "value-A", + "X-Custom-Response-B": "value-B", + "X-Other-Response-Header": "other-value", + } + url = "http://mock//capture_headers" + httpretty.register_uri( + httpretty.GET, url, body="Hello!", adding_headers=response_headers + ) + self.perform_request( + url, + headers=[ + ("X-Custom-Request-One", "value-one"), + ("X-Custom-Request-Two", "value-two"), + ("X-Other-Request-Header", "other-value"), + ], + ) + + span = self.assert_span(num_spans=1) + self.assertEqual( + span.attributes["http.request.header.x_custom_request_one"], + ("value-one",), + ) + self.assertEqual( + span.attributes["http.request.header.x_custom_request_two"], + ("value-two",), + ) + self.assertNotIn( + "http.request.header.x_other_request_header", span.attributes + ) + self.assertEqual( + span.attributes["http.response.header.x_custom_response_a"], + ("value-A",), + ) + self.assertEqual( + span.attributes["http.response.header.x_custom_response_b"], + ("value-B",), + ) + self.assertNotIn( + "http.response.header.x_other_response_header", span.attributes + ) + + def test_custom_headers_case_insensitive(self): + """Test that header capture is case-insensitive.""" + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument( + captured_request_headers=["x-request-header"], + captured_response_headers=["x-response-header"], + ) + + response_headers = {"X-ReSPoNse-HeaDER": "custom-value"} + url = "http://mock//capture_headers" + httpretty.register_uri( + httpretty.GET, url, body="Hello!", adding_headers=response_headers + ) + self.perform_request( + url, + headers=[("X-ReQuESt-HeaDER", "custom-value")], + ) + + span = self.assert_span(num_spans=1) + self.assertEqual( + span.attributes["http.request.header.x_request_header"], + ("custom-value",), + ) + self.assertEqual( + span.attributes["http.response.header.x_response_header"], + ("custom-value",), + ) + + def test_standard_http_headers_captured(self): + """Test that standard HTTP headers can be captured.""" + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument( + captured_request_headers=["Content-Type", "Accept"], + captured_response_headers=["Content-Type", "Server"], + ) + + response_headers = { + "Content-Type": "text/plain", + "Server": "TestServer/1.0", + } + url = "http://mock//capture_headers" + httpretty.register_uri( + httpretty.GET, url, body="Hello!", adding_headers=response_headers + ) + self.perform_request( + url, + headers=[ + ("Content-Type", "application/json"), + ("Accept", "application/json"), + ], + ) + + span = self.assert_span(num_spans=1) + self.assertEqual( + span.attributes["http.request.header.content_type"], + ("application/json",), + ) + self.assertEqual( + span.attributes["http.request.header.accept"], + ("application/json",), + ) + self.assertEqual( + span.attributes["http.response.header.content_type"], + ("text/plain",), + ) + self.assertEqual( + span.attributes["http.response.header.server"], + ("TestServer/1.0",), + ) + + def test_capture_all_request_headers(self): + """Test that all request headers can be captured with .* pattern.""" + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument(captured_request_headers=[".*"]) + + self.perform_request( + self.URL, + headers=[ + ("X-Header-One", "value1"), + ("X-Header-Two", "value2"), + ("X-Header-Three", "value3"), + ], + ) + + span = self.assert_span(num_spans=1) + self.assertEqual( + span.attributes["http.request.header.x_header_one"], + ("value1",), + ) + self.assertEqual( + span.attributes["http.request.header.x_header_two"], + ("value2",), + ) + self.assertEqual( + span.attributes["http.request.header.x_header_three"], + ("value3",), + ) + + def test_capture_all_response_headers(self): + """Test that all response headers can be captured with .* pattern.""" + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument(captured_response_headers=[".*"]) + + response_headers = { + "X-Response-One": "value1", + "X-Response-Two": "value2", + "X-Response-Three": "value3", + } + url = "http://mock//capture_headers" + httpretty.register_uri( + httpretty.GET, url, body="Hello!", adding_headers=response_headers + ) + self.perform_request(url) + + span = self.assert_span(num_spans=1) + self.assertEqual( + span.attributes["http.response.header.x_response_one"], + ("value1",), + ) + self.assertEqual( + span.attributes["http.response.header.x_response_two"], + ("value2",), + ) + self.assertEqual( + span.attributes["http.response.header.x_response_three"], + ("value3",), + ) + + def test_sanitize_with_regex_pattern(self): + """Test that sanitization works with regex patterns.""" + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument( + captured_request_headers=["X-Test.*"], + sensitive_headers=[".*secret.*"], + ) + + self.perform_request( + self.URL, + headers=[ + ("X-Test", "normal-value"), + ("X-Test-Secret", "secret-value"), + ], + ) + + span = self.assert_span(num_spans=1) + self.assertEqual( + span.attributes["http.request.header.x_test"], + ("normal-value",), + ) + self.assertEqual( + span.attributes["http.request.header.x_test_secret"], + ("[REDACTED]",), + ) + + @mock.patch.dict( + "os.environ", + { + "OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST": "x-request-one,x-request-two", + "OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE": "x-response-one", + "OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS": "x-request-two", + }, + ) + def test_capture_and_sanitize_environment_variables(self): + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument() + + response_headers = { + "X-Response-One": "value1", + "X-Response-Two": "value2", + } + url = "http://mock//capture_headers" + httpretty.register_uri( + httpretty.GET, url, body="Hello!", adding_headers=response_headers + ) + self.perform_request( + url, headers=[("x-request-one", "one"), ("x-request-two", "two")] + ) + + span = self.assert_span(num_spans=1) + self.assertEqual( + span.attributes["http.request.header.x_request_one"], + ("one",), + ) + self.assertEqual( + span.attributes["http.request.header.x_request_two"], + ("[REDACTED]",), + ) + self.assertEqual( + span.attributes["http.response.header.x_response_one"], + ("value1",), + ) + self.assertNotIn( + "http.response.header.x_response_two", + span.attributes, + ) + class TestURLLibIntegration(URLLibIntegrationTestBase, TestBase): @staticmethod - def perform_request(url: str, opener: OpenerDirector = None): + def perform_request( + url: str, + opener: OpenerDirector = None, + headers: Sequence[tuple[str, str]] | None = None, + ): if not opener: opener = urllib.request.build_opener() + if headers: + opener.addheaders = headers return opener.open(fullurl=url) def test_invalid_url(self): From fdc3b15b706dd5c484170d0d12d8cecacddce4cb Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Wed, 24 Dec 2025 09:26:06 +0100 Subject: [PATCH 2/6] Apply suggestions from code review Co-authored-by: Lukas Hering <40302054+herin049@users.noreply.github.com> --- .../src/opentelemetry/instrumentation/urllib/__init__.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py index 78f9a1c158..e896b8700e 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py @@ -94,7 +94,7 @@ def response_hook(span: Span, request: Request, response: HTTPResponse): will extract ``content-type`` and ``custom_request_header`` from the request headers and add them as span attributes. -Request header names in aiohttp are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment +Request header names in urllib are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment variable will capture the header named ``custom-header``. Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example: @@ -128,7 +128,7 @@ def response_hook(span: Span, request: Request, response: HTTPResponse): will extract ``content-type`` and ``custom_response_header`` from the response headers and add them as span attributes. -Response header names in aiohttp are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment +Response header names in urllib are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment variable will capture the header named ``custom-header``. Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example: @@ -168,8 +168,6 @@ def response_hook(span: Span, request: Request, response: HTTPResponse): Note: The environment variable names used to capture HTTP headers are still experimental, and thus are subject to change. - - API --- """ From 6bf2df6d0cc7816076c9c372fb07042819d3c37d Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Tue, 23 Dec 2025 17:46:11 +0100 Subject: [PATCH 3/6] Cleanup type --- .../tests/test_urllib_integration.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py index f3e7c1bde5..d9d7e53d58 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py @@ -15,7 +15,6 @@ import abc import socket import urllib -from typing import Sequence from unittest import mock from unittest.mock import patch from urllib import request @@ -143,7 +142,7 @@ def assert_span(self, exporter=None, num_spans=1): def perform_request( url: str, opener: OpenerDirector = None, - headers: Sequence[tuple[str, str]] | None = None, + headers: list[tuple[str, str]] | None = None, ): pass @@ -905,7 +904,7 @@ class TestURLLibIntegration(URLLibIntegrationTestBase, TestBase): def perform_request( url: str, opener: OpenerDirector = None, - headers: Sequence[tuple[str, str]] | None = None, + headers: list[tuple[str, str]] | None = None, ): if not opener: opener = urllib.request.build_opener() From e5d860ad56cc79c07d6c63401b121471d89fff01 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Wed, 24 Dec 2025 11:31:56 +0100 Subject: [PATCH 4/6] Fix handling of headers in instrumented Opener.open --- .../instrumentation/urllib/__init__.py | 24 ++++++++++--------- .../tests/test_urllib_integration.py | 19 ++++++++++++++- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py index e896b8700e..00443fc6d0 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py @@ -354,7 +354,10 @@ def _instrument( @functools.wraps(opener_open) def instrumented_open(opener, fullurl, data=None, timeout=None): if isinstance(fullurl, str): - request_ = Request(fullurl, data) + # in case of multiple entries for the same header Opener.open sends the first value + request_ = Request( + fullurl, data, headers=dict(reversed(opener.addheaders)) + ) else: request_ = fullurl @@ -397,10 +400,10 @@ def _instrumented_open_call( ) _set_http_url(labels, url, sem_conv_opt_in_mode) + headers = get_or_create_headers() labels.update( get_custom_header_attributes( - # TODO: safe with multiple entries for the same header? - dict(request.header_items()), + headers, captured_request_headers, sensitive_headers, normalise_request_header_name, @@ -414,7 +417,6 @@ def _instrumented_open_call( if callable(request_hook): request_hook(span, request) - headers = get_or_create_headers() inject(headers) with suppress_http_instrumentation(): @@ -443,14 +445,14 @@ def _instrumented_open_call( ) if span.is_recording(): - response_headers_to_set = get_custom_header_attributes( - result.headers, - captured_response_headers, - sensitive_headers, - normalise_response_header_name, + span.set_attributes( + get_custom_header_attributes( + result.headers, + captured_response_headers, + sensitive_headers, + normalise_response_header_name, + ) ) - for header, value in response_headers_to_set.items(): - span.set_attribute(header, value) if exception is not None and _report_new(sem_conv_opt_in_mode): span.set_attribute(ERROR_TYPE, type(exception).__qualname__) diff --git a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py index d9d7e53d58..463cf29239 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py @@ -895,7 +895,24 @@ def test_capture_and_sanitize_environment_variables(self): ) self.assertNotIn( "http.response.header.x_response_two", - span.attributes, + (span.attributes,), + ) + + def test_only_the_first_header_is_sent_with_duplicated_headers(self): + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument( + captured_request_headers=["X-foo"], + ) + result = self.perform_request( + self.URL, headers=[("X-foo", "foo"), ("X-foo", "bar")] + ) + self.assertEqual(result.read(), b"Hello!") + + span = self.assert_span() + + self.assertEqual( + span.attributes["http.request.header.x_foo"], + ("foo",), ) From 59ff4b48d437d0afdbfb5388d14abee80c28f3b6 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Wed, 24 Dec 2025 11:35:50 +0100 Subject: [PATCH 5/6] Add missing support for new style annotations --- .../tests/test_urllib_integration.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py index 463cf29239..6c723b8cb0 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import abc import socket import urllib From b18e1034477593f6d4fa82db2934091dbfb1ddf6 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Wed, 24 Dec 2025 11:37:34 +0100 Subject: [PATCH 6/6] Add changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cf8b6d504..8130ded102 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#3938](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3938)) - `opentelemetry-instrumentation-aiohttp-server`: Support passing `TracerProvider` when instrumenting. ([#3819](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3819)) +- `opentelemetry-instrumentation-urllib`: add ability to capture custom headers + ([#4051](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4051)) ### Fixed