test: correct manifest divergence notes and route in-handler notifications deterministically

maxisbey · maxisbey · commit 7a026f282045 · 2026-05-27T16:57:38.000Z
Four manifest fixes from spec/SDK re-verification:
- lifecycle:capability:* divergence notes used SHOULD; spec basic/lifecycle#operation
  has been MUST since 2025-06-18
- mcpserver:tool:naming-validation deferred reason claimed no naming check exists;
  Tool.from_function calls validate_and_warn_tool_name (warns, doesn't reject) -
  converted to a Divergence with a pinning test
- client-auth:...issuer-validation divergence's second sentence is false
  (OAuthMetadata types the endpoints AnyHttpUrl, so scheme is validated)
- resources:annotations now records that the SDK Annotations model lacks
  lastModified; the round-trip test sends it via model_validate so the snapshot
  pins the drop

Twelve lowlevel tests sent notifications from inside a tool handler without
related_request_id, so on the streamable-HTTP leg they routed to the standalone
GET stream and the assertion relied on cross-stream ordering the suite documents
as not guaranteed. Eight now pass related_request_id; four whose senders don't
accept it use anyio.Event with the snapshot still proving the delivered set. The
module docstrings that overstated the ordering guarantee are corrected.

README §Coverage now documents the four lax-no-cover teardown markers and the
sse.py aclose() fix that landed alongside this suite.
diff --git a/tests/interaction/README.md b/tests/interaction/README.md
@@ -209,7 +209,16 @@ assert after the call, with no synchronisation. The exceptions:
 CI requires 100% line and branch coverage, including `tests/`, and `strict-no-cover` fails the
 build if a line marked `# pragma: no cover` is ever executed. When a new test starts covering a
 pragma'd line in `src/`, delete the pragma in the same change. Do not add new `# type: ignore` or
-`# noqa` comments; restructure instead. The one sanctioned pragma is `# pragma: no branch` on a
-`with`/`async with` line whose only fault is coverage.py mis-tracing the exit arc of a nested
-async context — restructure first, and reserve the pragma for shapes that cannot collapse (a sync
-`with` adjacent to an `async with`).
+`# noqa` comments; restructure instead. The one sanctioned pragma in this suite's test code is
+`# pragma: no branch` on a `with`/`async with` line whose only fault is coverage.py mis-tracing
+the exit arc of a nested async context — restructure first, and reserve the pragma for shapes
+that cannot collapse (a sync `with` adjacent to an `async with`).
+
+A handful of `# pragma: lax no cover` markers in `src/` cover teardown exception handlers whose
+execution is timing-dependent under the in-process HTTP bridge — the POST-stream and
+stateless-session `except Exception` handlers in `server/streamable_http*.py`, the `_terminated`
+check in `message_router`, and the response-stream double-close guard in
+`BaseSession._receive_loop`. `strict-no-cover` does not check `lax` lines; do not promote them to
+strict `no cover` without first making the teardown ordering deterministic. The suite also relies
+on a one-line `src/mcp/server/sse.py` fix (`sse_stream_reader.aclose()`) that closes a stream the
+SSE leg would otherwise leak.
diff --git a/tests/interaction/_requirements.py b/tests/interaction/_requirements.py
@@ -90,7 +90,7 @@ def __post_init__(self) -> None:
         divergence=Divergence(
             note=(
                 "The client does not check its own declared capabilities before sending notifications or "
-                "serving callbacks; nothing prevents a caller from violating the spec's SHOULD."
+                "serving callbacks; nothing prevents a caller from violating the spec's MUST."
             ),
         ),
         deferred=(
@@ -106,7 +106,7 @@ def __post_init__(self) -> None:
         divergence=Divergence(
             note=(
                 "The client sends any request regardless of the server's advertised capabilities and "
-                "surfaces whatever the server answers; the spec's SHOULD is not enforced."
+                "surfaces whatever the server answers; the spec's MUST is not enforced."
             ),
         ),
         deferred=(
@@ -693,9 +693,12 @@ def __post_init__(self) -> None:
     "mcpserver:tool:naming-validation": Requirement(
         source="sdk",
         behavior="Tool names that violate the spec's naming rules are rejected at registration time.",
-        deferred=(
-            "Not implemented in the SDK: MCPServer accepts any string as a tool name; there is no "
-            "spec-naming-rules check at registration time."
+        divergence=Divergence(
+            note=(
+                "MCPServer runs the SEP-986 naming check at registration (validate_and_warn_tool_name at "
+                "tools/base.py) and logs a warning for non-conforming names, but does not reject them; the "
+                "bool result is discarded and registration proceeds."
+            ),
         ),
     ),
     "mcpserver:tool:output-schema:model": Requirement(
@@ -769,9 +772,12 @@ def __post_init__(self) -> None:
     # ═══════════════════════════════════════════════════════════════════════════
     "resources:annotations": Requirement(
         source=f"{SPEC_BASE_URL}/server/resources#annotations",
-        behavior=(
-            "Resource annotations (audience, priority) supplied by the server round-trip to the client "
-            "in the list result."
+        behavior="Resource annotations supplied by the server round-trip to the client in the list result.",
+        divergence=Divergence(
+            note=(
+                "The SDK Annotations model is missing the schema's lastModified field; MCPModel uses the "
+                "pydantic default extra='ignore', so the value is silently dropped on parse."
+            ),
         ),
     ),
     "resources:capability:declared": Requirement(
@@ -2413,9 +2419,7 @@ def __post_init__(self) -> None:
         divergence=Divergence(
             note=(
                 "The SDK parses authorization-server metadata without comparing issuer to the discovery "
-                "URL; a mismatched issuer is accepted and the flow proceeds. The SDK also does not "
-                "validate that the document's authorization_endpoint, token_endpoint, and "
-                "registration_endpoint use http(s) schemes."
+                "URL; a mismatched issuer is accepted and the flow proceeds."
             ),
         ),
     ),
diff --git a/tests/interaction/lowlevel/test_elicitation.py b/tests/interaction/lowlevel/test_elicitation.py
@@ -304,8 +304,9 @@ async def test_elicitation_complete_notification_carries_the_elicited_id_back_to
     The lifecycle under test: the tool elicits a URL interaction with an elicitationId, the user
     agrees to visit the URL, the out-of-band interaction finishes, and the server emits
     elicitation/complete so the client can correlate the completion with the elicitation it
-    accepted earlier. Both messages arrive before the tool call returns, so a plain collected
-    list needs no synchronisation.
+    accepted earlier. The completion notification carries ``related_request_id`` so over
+    streamable HTTP it rides the tool call's own stream and reaches the client before the call
+    returns; the same ordering already holds on in-memory and SSE transports.
     """
     elicitation_id = "auth-001"
     elicited_ids: list[str] = []
@@ -327,7 +328,7 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara
             "Authorize access to your files.", "https://example.com/oauth/authorize", elicitation_id
         )
         assert answer.action == "accept"
-        await ctx.session.send_elicit_complete(elicitation_id)
+        await ctx.session.send_elicit_complete(elicitation_id, related_request_id=ctx.request_id)
         return CallToolResult(content=[TextContent(text="linked")])
 
     server = Server("authorizer", on_list_tools=list_tools, on_call_tool=call_tool)
@@ -559,7 +560,7 @@ async def list_tools(
 
     async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestParams) -> CallToolResult:
         assert params.name == "noop"
-        await ctx.session.send_elicit_complete("never-elicited")
+        await ctx.session.send_elicit_complete("never-elicited", related_request_id=ctx.request_id)
         return CallToolResult(content=[TextContent(text="ok")])
 
     server = Server("notifier", on_list_tools=list_tools, on_call_tool=call_tool)
diff --git a/tests/interaction/lowlevel/test_list_changed.py b/tests/interaction/lowlevel/test_list_changed.py
@@ -1,11 +1,14 @@
 """List-changed notifications from the low-level Server, driven through the public Client API.
 
-The notifications are emitted from inside a tool call, so the ordering guarantee described in
-test_logging.py applies: they reach the client's message handler before the tool call returns,
-and the tests assert on a plain collected list with no synchronisation. The collector records
-every message the handler receives, so the assertions also prove nothing else was delivered.
+``send_*_list_changed`` does not take a ``related_request_id``, so over streamable HTTP the
+notification routes to the standalone GET stream and is not guaranteed to arrive before the tool
+result on its POST stream. Tests therefore wait on an event the collector sets, the same pattern
+as ``transports/test_streamable_http.py::test_unrelated_server_messages_arrive_on_the_standalone_stream``.
+The collector still records every message it receives, so the snapshot also proves nothing else
+was delivered.
 """
 
+import anyio
 import pytest
 from inline_snapshot import snapshot
 
@@ -29,9 +32,11 @@
 async def test_tool_list_changed_notification(connect: Connect) -> None:
     """A tools/list_changed notification sent during a tool call reaches the client's message handler."""
     received: list[IncomingMessage] = []
+    seen = anyio.Event()
 
     async def collect(message: IncomingMessage) -> None:
         received.append(message)
+        seen.set()
 
     async def list_tools(
         ctx: ServerRequestContext, params: types.PaginatedRequestParams | None
@@ -47,6 +52,8 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara
 
     async with connect(server, message_handler=collect) as client:
         await client.call_tool("install", {})
+        with anyio.fail_after(5):
+            await seen.wait()
 
     assert received == snapshot([ToolListChangedNotification()])
 
@@ -55,9 +62,11 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara
 async def test_resource_list_changed_notification(connect: Connect) -> None:
     """A resources/list_changed notification sent during a tool call reaches the client's message handler."""
     received: list[IncomingMessage] = []
+    seen = anyio.Event()
 
     async def collect(message: IncomingMessage) -> None:
         received.append(message)
+        seen.set()
 
     async def list_tools(
         ctx: ServerRequestContext, params: types.PaginatedRequestParams | None
@@ -73,6 +82,8 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara
 
     async with connect(server, message_handler=collect) as client:
         await client.call_tool("mount", {})
+        with anyio.fail_after(5):
+            await seen.wait()
 
     assert received == snapshot([ResourceListChangedNotification()])
 
@@ -81,9 +92,11 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara
 async def test_prompt_list_changed_notification(connect: Connect) -> None:
     """A prompts/list_changed notification sent during a tool call reaches the client's message handler."""
     received: list[IncomingMessage] = []
+    seen = anyio.Event()
 
     async def collect(message: IncomingMessage) -> None:
         received.append(message)
+        seen.set()
 
     async def list_tools(
         ctx: ServerRequestContext, params: types.PaginatedRequestParams | None
@@ -99,5 +112,7 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara
 
     async with connect(server, message_handler=collect) as client:
         await client.call_tool("learn", {})
+        with anyio.fail_after(5):
+            await seen.wait()
 
     assert received == snapshot([PromptListChangedNotification()])
diff --git a/tests/interaction/lowlevel/test_logging.py b/tests/interaction/lowlevel/test_logging.py
@@ -2,11 +2,11 @@
 
 Notification ordering: the in-memory transport delivers every server-to-client message on one
 ordered stream, and the client's receive loop dispatches each incoming message to completion
-before reading the next one. Together these guarantee that every notification the server sends
-before its response reaches the client callback before the originating request returns, so tests
-collect notifications into a plain list and assert after the request completes -- no events, no
-waiting. This does not generalise to transports that split messages across streams (the
-streamable HTTP standalone GET stream); tests over those transports must synchronise explicitly.
+before reading the next one. Over streamable HTTP that ordered single-stream guarantee holds
+only for messages that carry a ``related_request_id`` (they ride the originating request's POST
+stream); without it the message routes to the standalone GET stream and may arrive after the
+response. These tests pass ``related_request_id`` so they can collect into a plain list and
+assert after the request completes on every transport leg -- no events, no waiting.
 """
 
 import pytest
@@ -68,8 +68,12 @@ async def list_tools(
 
     async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestParams) -> CallToolResult:
         assert params.name == "chatty"
-        await ctx.session.send_log_message(level="info", data="starting up", logger="app.lifecycle")
-        await ctx.session.send_log_message(level="error", data={"code": 502, "retryable": True})
+        await ctx.session.send_log_message(
+            level="info", data="starting up", logger="app.lifecycle", related_request_id=ctx.request_id
+        )
+        await ctx.session.send_log_message(
+            level="error", data={"code": 502, "retryable": True}, related_request_id=ctx.request_id
+        )
         return CallToolResult(content=[TextContent(text="done")])
 
     server = Server("logger", on_list_tools=list_tools, on_call_tool=call_tool)
@@ -102,7 +106,9 @@ async def list_tools(
     async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestParams) -> CallToolResult:
         assert params.name == "siren"
         for level in ALL_LEVELS:
-            await ctx.session.send_log_message(level=level, data=f"a {level} message")
+            await ctx.session.send_log_message(
+                level=level, data=f"a {level} message", related_request_id=ctx.request_id
+            )
         return CallToolResult(content=[TextContent(text="logged")])
 
     server = Server("logger", on_list_tools=list_tools, on_call_tool=call_tool)
diff --git a/tests/interaction/lowlevel/test_progress.py b/tests/interaction/lowlevel/test_progress.py
@@ -1,10 +1,12 @@
 """Progress interactions against the low-level Server, driven through the public Client API.
 
 Server-to-client progress emitted during a request follows the same ordering guarantee as
-logging notifications (see test_logging.py): everything the server sends before its response is
-dispatched to the progress callback before the request returns, so no synchronisation is needed.
-The client-to-server direction is a standalone notification with no response to await, so that
-test waits on an event set by the server's handler.
+logging notifications (see test_logging.py) -- on the in-memory transport unconditionally, and
+over streamable HTTP only when sent with ``related_request_id`` so the notification rides the
+originating request's POST stream rather than the standalone GET stream. These tests pass
+``related_request_id`` so no synchronisation is needed. The client-to-server direction is a
+standalone notification with no response to await, so that test waits on an event set by the
+server's handler.
 """
 
 import anyio
@@ -42,9 +44,15 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara
         assert ctx.meta is not None
         token = ctx.meta.get("progress_token")
         assert token is not None
-        await ctx.session.send_progress_notification(token, 1.0, total=3.0, message="first chunk")
-        await ctx.session.send_progress_notification(token, 2.0, total=3.0, message="second chunk")
-        await ctx.session.send_progress_notification(token, 3.0, total=3.0, message="done")
+        await ctx.session.send_progress_notification(
+            token, 1.0, total=3.0, message="first chunk", related_request_id=str(ctx.request_id)
+        )
+        await ctx.session.send_progress_notification(
+            token, 2.0, total=3.0, message="second chunk", related_request_id=str(ctx.request_id)
+        )
+        await ctx.session.send_progress_notification(
+            token, 3.0, total=3.0, message="done", related_request_id=str(ctx.request_id)
+        )
         return CallToolResult(content=[TextContent(text="downloaded")])
 
     server = Server("downloader", on_list_tools=list_tools, on_call_tool=call_tool)
@@ -166,10 +174,14 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara
         # The two handlers interleave by waiting on alternating turns: a takes 0 and 2, b takes 1 and 3.
         first, second = (0, 2) if label == "a" else (1, 3)
         await turns[first].wait()
-        await ctx.session.send_progress_notification(token, progress_values[label][0])
+        await ctx.session.send_progress_notification(
+            token, progress_values[label][0], related_request_id=str(ctx.request_id)
+        )
         turns[first + 1].set()
         await turns[second].wait()
-        await ctx.session.send_progress_notification(token, progress_values[label][1])
+        await ctx.session.send_progress_notification(
+            token, progress_values[label][1], related_request_id=str(ctx.request_id)
+        )
         if second + 1 < len(turns):
             turns[second + 1].set()
         return CallToolResult(content=[TextContent(text="done")])
@@ -227,7 +239,7 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara
         token = ctx.meta.get("progress_token")
         assert token is not None
         captured.append((ctx.session, token))
-        await ctx.session.send_progress_notification(token, 0.5)
+        await ctx.session.send_progress_notification(token, 0.5, related_request_id=str(ctx.request_id))
         return CallToolResult(content=[TextContent(text="done")])
 
     server = Server("reporter", on_list_tools=list_tools, on_call_tool=call_tool)
@@ -276,9 +288,9 @@ async def call_tool(ctx: ServerRequestContext, params: types.CallToolRequestPara
         assert ctx.meta is not None
         token = ctx.meta.get("progress_token")
         assert token is not None
-        await ctx.session.send_progress_notification(token, 0.5)
-        await ctx.session.send_progress_notification(token, 0.3)
-        await ctx.session.send_progress_notification(token, 0.9)
+        await ctx.session.send_progress_notification(token, 0.5, related_request_id=str(ctx.request_id))
+        await ctx.session.send_progress_notification(token, 0.3, related_request_id=str(ctx.request_id))
+        await ctx.session.send_progress_notification(token, 0.9, related_request_id=str(ctx.request_id))
         return CallToolResult(content=[TextContent(text="done")])
 
     server = Server("zigzagger", on_list_tools=list_tools, on_call_tool=call_tool)
diff --git a/tests/interaction/lowlevel/test_resources.py b/tests/interaction/lowlevel/test_resources.py
diff --git a/tests/interaction/mcpserver/test_tools.py b/tests/interaction/mcpserver/test_tools.py