From 637ee98b29899d7b8e3dcc02fbf783960c1dd6c5 Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Thu, 7 May 2026 12:29:44 -0500
Subject: [PATCH] test: add LangGraph node_metrics isolation regression test

Mirrors the OpenAI regression test added in PR #155. Verifies that
LangGraphAgentGraphRunner.run() produces a fresh node_metrics dict on
each invocation so callback-handler state cannot leak across runs on
the same runner instance.
---
 .../test_langgraph_agent_graph_runner.py      | 70 +++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/packages/ai-providers/server-ai-langchain/tests/test_langgraph_agent_graph_runner.py b/packages/ai-providers/server-ai-langchain/tests/test_langgraph_agent_graph_runner.py
index 76bf530..49ab015 100644
--- a/packages/ai-providers/server-ai-langchain/tests/test_langgraph_agent_graph_runner.py
+++ b/packages/ai-providers/server-ai-langchain/tests/test_langgraph_agent_graph_runner.py
@@ -1,5 +1,6 @@
 """Tests for LangGraphAgentGraphRunner and LangChainRunnerFactory.create_agent_graph()."""
 
+from uuid import uuid4
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
@@ -156,3 +157,72 @@ async def test_langgraph_runner_run_success():
     tracker.track_path.assert_not_called()
     tracker.track_invocation_success.assert_not_called()
     tracker.track_duration.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_langgraph_runner_run_resets_node_metrics_between_runs():
+    """Successive runs do not leak stale node metrics from a previous run.
+
+    Mirrors ``test_openai_agent_graph_runner_run_resets_node_metrics_between_runs``
+    in the OpenAI provider tests.  Each ``run()`` invocation must produce its
+    own fresh ``node_metrics`` rather than a union of all prior runs' metrics.
+
+    Strategy: bypass ``_build_graph()`` by pre-populating ``_compiled`` and
+    ``_node_keys`` on the runner.  The mock compiled graph's ``ainvoke`` is a
+    side-effect coroutine that fires callbacks on the handler passed in via
+    ``config['callbacks']`` — the same handler the real LangGraph executor
+    would invoke.  Each call fires events for only ``root-agent`` so we can
+    assert the second result's ``node_metrics`` reflects only the second run.
+    """
+    graph = _make_graph()
+
+    mock_message = MagicMock()
+    mock_message.content = "answer"
+    mock_message.usage_metadata = None
+    mock_message.response_metadata = None
+
+    async def fire_callbacks(_payload, *, config):
+        handler = config['callbacks'][0]
+        # If state leaked across runs, the handler passed in here on the
+        # second call would already contain entries from the first run before
+        # any callback fires.  We assert below that this is not the case.
+        run_id = uuid4()
+        handler.on_chain_start({}, {}, run_id=run_id, name='root-agent')
+        handler.on_chain_end({}, run_id=run_id)
+        return {'messages': [mock_message]}
+
+    mock_compiled = MagicMock()
+    mock_compiled.ainvoke = AsyncMock(side_effect=fire_callbacks)
+
+    mock_human_message = MagicMock()
+    mock_lc_core_messages = MagicMock()
+    mock_lc_core_messages.HumanMessage = MagicMock(return_value=mock_human_message)
+
+    runner = LangGraphAgentGraphRunner(graph, {})
+    # Bypass _build_graph(): provide a pre-compiled graph and the node keys
+    # that the callback handler would otherwise be initialised with.
+    runner._compiled = mock_compiled
+    runner._node_keys = {'root-agent'}
+    runner._fn_name_to_config_key = {}
+
+    with patch.dict('sys.modules', {
+        'langchain_core': MagicMock(),
+        'langchain_core.messages': mock_lc_core_messages,
+    }):
+        first = await runner.run("attempt 1")
+        assert first.metrics.success is True
+        assert 'root-agent' in first.metrics.node_metrics
+        first_metrics = first.metrics.node_metrics['root-agent']
+
+        second = await runner.run("attempt 2")
+
+    assert second.metrics.success is True
+    assert 'root-agent' in second.metrics.node_metrics
+    # The second run's per-node metrics must be a fresh object, not the
+    # accumulated state from the first run.  If the runner leaked the
+    # callback handler (or its state dict) across invocations, the second
+    # run would return the same LDAIMetrics instance with cumulative values.
+    assert second.metrics.node_metrics['root-agent'] is not first_metrics
+    # Path and node_metrics keys reflect only the second invocation.
+    assert second.metrics.path == ['root-agent']
+    assert set(second.metrics.node_metrics.keys()) == {'root-agent'}