From 4875cf55726c9e87c8314fba8b5b732c943d86a4 Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Thu, 5 Mar 2026 14:42:43 +0200
Subject: [PATCH 01/14] feat: record model thoughts

Signed-off-by: Danny Kopping <danny@coder.com>
---
 fixtures/anthropic/single_builtin_tool.txtar |  48 ++++++-
 intercept/messages/blocking.go               |  42 ++++++-
 intercept/messages/streaming.go              |  46 +++++++
 internal/integrationtest/bridge_test.go      | 125 +++++++++++++++++++
 internal/integrationtest/trace_test.go       |   2 +
 internal/testutil/mock_recorder.go           |  15 +++
 recorder/recorder.go                         |  34 +++++
 recorder/types.go                            |  10 ++
 8 files changed, 316 insertions(+), 6 deletions(-)

diff --git a/fixtures/anthropic/single_builtin_tool.txtar b/fixtures/anthropic/single_builtin_tool.txtar
index 50ca93f1..c271cb7c 100644
--- a/fixtures/anthropic/single_builtin_tool.txtar
+++ b/fixtures/anthropic/single_builtin_tool.txtar
@@ -33,22 +33,55 @@ event: message_start
 data: {"type":"message_start","message":{"id":"msg_015SQewixvT9s4cABCVvUE6g","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":2,"cache_creation_input_tokens":22,"cache_read_input_tokens":13993,"output_tokens":5,"service_tier":"standard"}}             }
 
 event: content_block_start
-data: {"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_01RX68weRSquLx6HUTj65iBo","name":"Read","input":{}}           }
+data: {"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"The user wants me to read"}            }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":" a"}       }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":" file called \""}           }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"foo\"."}       }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":" Let me find"}         }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":" and"} }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":" read it."}    }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":""}               }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"Eu8BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ=="}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":0}
+
+event: content_block_start
+data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01RX68weRSquLx6HUTj65iBo","name":"Read","input":{}}}
 
 event: ping
 data: {"type": "ping"}
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":""}            }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}            }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\"file_path\": \"/tmp/blah/foo"}       }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"file_path\": \"/tmp/blah/foo"}       }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"\"}"}  }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\"}"}  }
 
 event: content_block_stop
-data: {"type":"content_block_stop","index":0          }
+data: {"type":"content_block_stop","index":1          }
 
 event: message_delta
 data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":61}              }
@@ -65,6 +98,11 @@ data: {"type":"message_stop"              }
     "expires_at": "0001-01-01T00:00:00Z"
   },
   "content": [
+    {
+      "type": "thinking",
+      "thinking": "The user wants me to read a file called \"foo\". Let me find and read it.",
+      "signature": "Eu8BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ=="
+    },
     {
       "citations": null,
       "text": "I can see there's a file named `foo` in the `/tmp/blah` directory. Let me read it.",
diff --git a/intercept/messages/blocking.go b/intercept/messages/blocking.go
index e22b97f8..28e64578 100644
--- a/intercept/messages/blocking.go
+++ b/intercept/messages/blocking.go
@@ -135,6 +135,23 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 
 		accumulateUsage(&cumulativeUsage, resp.Usage)
 
+		// Capture any thinking blocks that were returned.
+		var thoughtRecords []*recorder.ModelThoughtRecord
+		if !i.isSmallFastModel() {
+			for _, block := range resp.Content {
+				switch variant := block.AsAny().(type) {
+				case anthropic.ThinkingBlock:
+					thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{
+						InterceptionID: i.ID().String(),
+						Content:        variant.Thinking,
+					})
+				case anthropic.RedactedThinkingBlock:
+					// For redacted thinking, there's nothing useful we can capture.
+					continue
+				}
+			}
+		}
+
 		// Handle tool calls for non-streaming.
 		var pendingToolCalls []anthropic.ToolUseBlock
 		for _, c := range resp.Content {
@@ -158,10 +175,20 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 				Injected:       false,
 			})
 
+			// Associate the model thoughts with this tool call.
+			for _, thought := range thoughtRecords {
+				thought.ProviderToolCallID = toolUse.ID
+			}
 		}
 
-		// If no injected tool calls, we're done.
+		// If no injected tool calls, persist thoughts and we're done.
 		if len(pendingToolCalls) == 0 {
+			for _, thought := range thoughtRecords {
+				if thought.ProviderToolCallID == "" {
+					continue
+				}
+				_ = i.recorder.RecordModelThought(ctx, thought)
+			}
 			break
 		}
 
@@ -198,6 +225,11 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 				InvocationError: err,
 			})
 
+			// Associate the model thoughts with this tool call.
+			for _, thought := range thoughtRecords {
+				thought.ProviderToolCallID = tc.ID
+			}
+
 			if err != nil {
 				// Always provide a tool_result even if the tool call failed
 				messages.Messages = append(messages.Messages,
@@ -283,6 +315,14 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 			}
 		}
 
+		// Only persist thoughts that are associated to a tool call.
+		for _, thought := range thoughtRecords {
+			if thought.ProviderToolCallID == "" {
+				continue
+			}
+			_ = i.recorder.RecordModelThought(ctx, thought)
+		}
+
 		// Sync the raw payload with updated messages so that withBody()
 		// sends the updated payload on the next iteration.
 		if err := i.syncPayloadMessages(messages.Messages); err != nil {
diff --git a/intercept/messages/streaming.go b/intercept/messages/streaming.go
index 4e87fd85..f9b6525f 100644
--- a/intercept/messages/streaming.go
+++ b/intercept/messages/streaming.go
@@ -252,6 +252,24 @@ newStream:
 
 			// Don't send message_stop until all tools have been called.
 			case string(constant.ValueOf[constant.MessageStop]()):
+
+				// Capture any thinking blocks that were returned.
+				var thoughtRecords []*recorder.ModelThoughtRecord
+				if !i.isSmallFastModel() { // TODO: remove.
+					for _, block := range message.Content {
+						switch variant := block.AsAny().(type) {
+						case anthropic.ThinkingBlock:
+							thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{
+								InterceptionID: i.ID().String(),
+								Content:        variant.Thinking,
+							})
+						case anthropic.RedactedThinkingBlock:
+							// For redacted thinking, there's nothing useful we can capture.
+							continue
+						}
+					}
+				}
+
 				if len(pendingToolCalls) > 0 {
 					// Append the whole message from this stream as context since we'll be sending a new request with the tool results.
 					messages.Messages = append(messages.Messages, message.ToParam())
@@ -306,6 +324,11 @@ newStream:
 							InvocationError: err,
 						})
 
+						// Associate the model thoughts with this tool call.
+						for _, thought := range thoughtRecords {
+							thought.ProviderToolCallID = id
+						}
+
 						if err != nil {
 							// Always provide a tool_result even if the tool call failed
 							messages.Messages = append(messages.Messages,
@@ -390,6 +413,15 @@ newStream:
 						}
 					}
 
+					// Only persist thoughts that are associated to a tool call.
+					for _, thought := range thoughtRecords {
+						if thought.ProviderToolCallID == "" {
+							continue
+						}
+
+						_ = i.recorder.RecordModelThought(streamCtx, thought)
+					}
+
 					// Sync the raw payload with updated messages so that withBody()
 					// sends the updated payload on the next iteration.
 					if syncErr := i.syncPayloadMessages(messages.Messages); syncErr != nil {
@@ -417,7 +449,21 @@ newStream:
 								Args:           variant.Input,
 								Injected:       false,
 							})
+
+							// Associate the model thoughts with this tool call.
+							for _, thought := range thoughtRecords {
+								thought.ProviderToolCallID = variant.ID
+							}
+						}
+					}
+
+					// Only persist thoughts that are associated to a tool call.
+					for _, thought := range thoughtRecords {
+						if thought.ProviderToolCallID == "" {
+							continue
 						}
+
+						_ = i.recorder.RecordModelThought(streamCtx, thought)
 					}
 				}
 			}
diff --git a/internal/integrationtest/bridge_test.go b/internal/integrationtest/bridge_test.go
index 01eb5815..4956e8df 100644
--- a/internal/integrationtest/bridge_test.go
+++ b/internal/integrationtest/bridge_test.go
@@ -124,6 +124,131 @@ func TestAnthropicMessages(t *testing.T) {
 	})
 }
 
+func TestAnthropicMessagesModelThoughts(t *testing.T) {
+	t.Parallel()
+
+	t.Run("thinking captured with builtin tool", func(t *testing.T) {
+		t.Parallel()
+
+		cases := []struct {
+			streaming              bool
+			expectedToolCallID     string
+			expectedThinkingSubstr string
+		}{
+			{
+				streaming:              true,
+				expectedToolCallID:     "toolu_01RX68weRSquLx6HUTj65iBo",
+				expectedThinkingSubstr: "Let me find and read it.",
+			},
+			{
+				streaming:              false,
+				expectedToolCallID:     "toolu_01AusGgY5aKFhzWrFBv9JfHq",
+				expectedThinkingSubstr: "Let me find and read it.",
+			},
+		}
+
+		for _, tc := range cases {
+			t.Run(fmt.Sprintf("streaming=%v", tc.streaming), func(t *testing.T) {
+				t.Parallel()
+
+				ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
+				t.Cleanup(cancel)
+
+				fix := fixtures.Parse(t, fixtures.AntSingleBuiltinTool)
+				upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix))
+
+				recorderClient := &testutil.MockRecorder{}
+				logger := slogtest.Make(t, &slogtest.Options{}).Leveled(slog.LevelDebug)
+				providers := []aibridge.Provider{provider.NewAnthropic(anthropicCfg(upstream.URL, apiKey), nil)}
+				b, err := aibridge.NewRequestBridge(ctx, providers, recorderClient, mcp.NewServerProxyManager(nil, testTracer), logger, nil, testTracer)
+				require.NoError(t, err)
+
+				mockSrv := httptest.NewUnstartedServer(b)
+				t.Cleanup(mockSrv.Close)
+				mockSrv.Config.BaseContext = func(_ net.Listener) context.Context {
+					return aibcontext.AsActor(ctx, userID, nil)
+				}
+				mockSrv.Start()
+
+				reqBody, err := sjson.SetBytes(fix.Request(), "stream", tc.streaming)
+				require.NoError(t, err)
+				req := createAnthropicMessagesReq(t, mockSrv.URL, reqBody)
+				client := &http.Client{}
+				resp, err := client.Do(req)
+				require.NoError(t, err)
+				require.Equal(t, http.StatusOK, resp.StatusCode)
+				defer resp.Body.Close()
+
+				if tc.streaming {
+					sp := aibridge.NewSSEParser()
+					require.NoError(t, sp.Parse(resp.Body))
+					assert.Contains(t, sp.AllEvents(), "message_start")
+					assert.Contains(t, sp.AllEvents(), "message_stop")
+				}
+
+				// Verify model thoughts were captured and associated with the tool call.
+				thoughts := recorderClient.RecordedModelThoughts()
+				require.Len(t, thoughts, 1)
+				assert.Contains(t, thoughts[0].Content, "The user wants me to read")
+				assert.Contains(t, thoughts[0].Content, tc.expectedThinkingSubstr)
+				assert.NotEmpty(t, thoughts[0].InterceptionID)
+				assert.Equal(t, tc.expectedToolCallID, thoughts[0].ProviderToolCallID)
+
+				// Verify tool usage was also recorded.
+				toolUsages := recorderClient.RecordedToolUsages()
+				require.Len(t, toolUsages, 1)
+				assert.Equal(t, "Read", toolUsages[0].Tool)
+				assert.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID)
+
+				recorderClient.VerifyAllInterceptionsEnded(t)
+			})
+		}
+	})
+
+	t.Run("no thoughts without tool calls", func(t *testing.T) {
+		t.Parallel()
+
+		ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
+		t.Cleanup(cancel)
+
+		// Use the simple fixture which has no tool calls — any thinking blocks
+		// should not be persisted since they can't be associated with a tool call.
+		fix := fixtures.Parse(t, fixtures.AntSimple)
+		upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix))
+
+		recorderClient := &testutil.MockRecorder{}
+		logger := slogtest.Make(t, &slogtest.Options{}).Leveled(slog.LevelDebug)
+		providers := []aibridge.Provider{provider.NewAnthropic(anthropicCfg(upstream.URL, apiKey), nil)}
+		b, err := aibridge.NewRequestBridge(ctx, providers, recorderClient, mcp.NewServerProxyManager(nil, testTracer), logger, nil, testTracer)
+		require.NoError(t, err)
+
+		mockSrv := httptest.NewUnstartedServer(b)
+		t.Cleanup(mockSrv.Close)
+		mockSrv.Config.BaseContext = func(_ net.Listener) context.Context {
+			return aibcontext.AsActor(ctx, userID, nil)
+		}
+		mockSrv.Start()
+
+		reqBody, err := sjson.SetBytes(fix.Request(), "stream", true)
+		require.NoError(t, err)
+		req := createAnthropicMessagesReq(t, mockSrv.URL, reqBody)
+		client := &http.Client{}
+		resp, err := client.Do(req)
+		require.NoError(t, err)
+		require.Equal(t, http.StatusOK, resp.StatusCode)
+		defer resp.Body.Close()
+
+		sp := aibridge.NewSSEParser()
+		require.NoError(t, sp.Parse(resp.Body))
+
+		// No thoughts should be recorded when there are no tool calls.
+		thoughts := recorderClient.RecordedModelThoughts()
+		assert.Empty(t, thoughts)
+
+		recorderClient.VerifyAllInterceptionsEnded(t)
+	})
+}
+
 func TestAWSBedrockIntegration(t *testing.T) {
 	t.Parallel()
 
diff --git a/internal/integrationtest/trace_test.go b/internal/integrationtest/trace_test.go
index 88bec31c..bdfb7f7f 100644
--- a/internal/integrationtest/trace_test.go
+++ b/internal/integrationtest/trace_test.go
@@ -51,6 +51,7 @@ func TestTraceAnthropic(t *testing.T) {
 		{"Intercept.RecordPromptUsage", 1, codes.Unset},
 		{"Intercept.RecordTokenUsage", 1, codes.Unset},
 		{"Intercept.RecordToolUsage", 1, codes.Unset},
+		{"Intercept.RecordModelThought", 1, codes.Unset},
 		{"Intercept.ProcessRequest.Upstream", 1, codes.Unset},
 	}
 
@@ -63,6 +64,7 @@ func TestTraceAnthropic(t *testing.T) {
 		{"Intercept.RecordPromptUsage", 1, codes.Unset},
 		{"Intercept.RecordTokenUsage", 2, codes.Unset},
 		{"Intercept.RecordToolUsage", 1, codes.Unset},
+		{"Intercept.RecordModelThought", 1, codes.Unset},
 		{"Intercept.ProcessRequest.Upstream", 1, codes.Unset},
 	}
 
diff --git a/internal/testutil/mock_recorder.go b/internal/testutil/mock_recorder.go
index 09bcac39..55eafcd6 100644
--- a/internal/testutil/mock_recorder.go
+++ b/internal/testutil/mock_recorder.go
@@ -20,6 +20,7 @@ type MockRecorder struct {
 	tokenUsages      []*recorder.TokenUsageRecord
 	userPrompts      []*recorder.PromptUsageRecord
 	toolUsages       []*recorder.ToolUsageRecord
+	modelThoughts    []*recorder.ModelThoughtRecord
 	interceptionsEnd map[string]*recorder.InterceptionRecordEnded
 }
 
@@ -64,6 +65,13 @@ func (m *MockRecorder) RecordToolUsage(ctx context.Context, req *recorder.ToolUs
 	return nil
 }
 
+func (m *MockRecorder) RecordModelThought(ctx context.Context, req *recorder.ModelThoughtRecord) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.modelThoughts = append(m.modelThoughts, req)
+	return nil
+}
+
 // RecordedTokenUsages returns a copy of recorded token usages in a thread-safe manner.
 // Note: This is a shallow clone - the slice is copied but the pointers reference the
 // same underlying records. This is sufficient for our test assertions which only read
@@ -128,6 +136,13 @@ func (m *MockRecorder) ToolUsages() []*recorder.ToolUsageRecord {
 	return m.toolUsages
 }
 
+// RecordedModelThoughts returns a copy of recorded model thoughts in a thread-safe manner.
+func (m *MockRecorder) RecordedModelThoughts() []*recorder.ModelThoughtRecord {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	return slices.Clone(m.modelThoughts)
+}
+
 // RecordedInterceptionEnd returns the stored InterceptionRecordEnded for the
 // given interception ID, or nil if not found.
 func (m *MockRecorder) RecordedInterceptionEnd(id string) *recorder.InterceptionRecordEnded {
diff --git a/recorder/recorder.go b/recorder/recorder.go
index 6e37b632..3bd657f9 100644
--- a/recorder/recorder.go
+++ b/recorder/recorder.go
@@ -116,6 +116,24 @@ func (r *RecorderWrapper) RecordToolUsage(ctx context.Context, req *ToolUsageRec
 	return err
 }
 
+func (r *RecorderWrapper) RecordModelThought(ctx context.Context, req *ModelThoughtRecord) (outErr error) {
+	ctx, span := r.tracer.Start(ctx, "Intercept.RecordModelThought", trace.WithAttributes(tracing.InterceptionAttributesFromContext(ctx)...))
+	defer tracing.EndSpanErr(span, &outErr)
+
+	client, err := r.clientFn()
+	if err != nil {
+		return fmt.Errorf("acquire client: %w", err)
+	}
+
+	req.CreatedAt = time.Now()
+	if err = client.RecordModelThought(ctx, req); err == nil {
+		return nil
+	}
+
+	r.logger.Warn(ctx, "failed to record model thought", slog.Error(err), slog.F("interception_id", req.InterceptionID))
+	return err
+}
+
 func NewRecorder(logger slog.Logger, tracer trace.Tracer, clientFn func() (Recorder, error)) *RecorderWrapper {
 	return &RecorderWrapper{
 		logger:   logger,
@@ -259,6 +277,22 @@ func (a *AsyncRecorder) RecordToolUsage(ctx context.Context, req *ToolUsageRecor
 	return nil // Caller is not interested in error.
 }
 
+func (a *AsyncRecorder) RecordModelThought(ctx context.Context, req *ModelThoughtRecord) error {
+	a.wg.Add(1)
+	go func() {
+		defer a.wg.Done()
+		timedCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), a.timeout)
+		defer cancel()
+
+		err := a.wrapped.RecordModelThought(timedCtx, req)
+		if err != nil {
+			a.logger.Warn(timedCtx, "failed to record model thought", slog.Error(err), slog.F("payload", req))
+		}
+	}()
+
+	return nil // Caller is not interested in error.
+}
+
 func (a *AsyncRecorder) Wait() {
 	a.wg.Wait()
 }
diff --git a/recorder/types.go b/recorder/types.go
index b33494d4..609e7142 100644
--- a/recorder/types.go
+++ b/recorder/types.go
@@ -19,6 +19,8 @@ type Recorder interface {
 	RecordPromptUsage(ctx context.Context, req *PromptUsageRecord) error
 	// RecordToolUsage records the tools used in an interception with an upstream AI provider.
 	RecordToolUsage(ctx context.Context, req *ToolUsageRecord) error
+	// RecordModelThought records the reasoning/thinking produced in an interception with an upstream AI provider.
+	RecordModelThought(ctx context.Context, req *ModelThoughtRecord) error
 }
 
 type ToolArgs any
@@ -73,3 +75,11 @@ type ToolUsageRecord struct {
 	Metadata        Metadata
 	CreatedAt       time.Time
 }
+
+type ModelThoughtRecord struct {
+	InterceptionID string
+	ProviderToolCallID string
+	Content        string
+	Metadata       Metadata
+	CreatedAt      time.Time
+}

From f878dd6318d2f17960ed0462ce8ec5ba5cc449c5 Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Thu, 5 Mar 2026 17:36:44 +0200
Subject: [PATCH 02/14] fix: send model thoughts with tool usage recording

Signed-off-by: Danny Kopping <danny@coder.com>
---
 intercept/messages/blocking.go          | 56 ++++++++----------------
 intercept/messages/streaming.go         | 58 ++++++++-----------------
 internal/integrationtest/bridge_test.go | 22 +++++-----
 internal/integrationtest/trace_test.go  |  2 -
 internal/testutil/mock_recorder.go      | 15 -------
 recorder/recorder.go                    | 34 ---------------
 recorder/types.go                       | 12 +++--
 7 files changed, 52 insertions(+), 147 deletions(-)

diff --git a/intercept/messages/blocking.go b/intercept/messages/blocking.go
index 28e64578..4ba71874 100644
--- a/intercept/messages/blocking.go
+++ b/intercept/messages/blocking.go
@@ -137,18 +137,16 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 
 		// Capture any thinking blocks that were returned.
 		var thoughtRecords []*recorder.ModelThoughtRecord
-		if !i.isSmallFastModel() {
-			for _, block := range resp.Content {
-				switch variant := block.AsAny().(type) {
-				case anthropic.ThinkingBlock:
-					thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{
-						InterceptionID: i.ID().String(),
-						Content:        variant.Thinking,
-					})
-				case anthropic.RedactedThinkingBlock:
-					// For redacted thinking, there's nothing useful we can capture.
-					continue
-				}
+		for _, block := range resp.Content {
+			switch variant := block.AsAny().(type) {
+			case anthropic.ThinkingBlock:
+				thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{
+					Content:   variant.Thinking,
+					CreatedAt: time.Now(),
+				})
+			case anthropic.RedactedThinkingBlock:
+				// For redacted thinking, there's nothing useful we can capture.
+				continue
 			}
 		}
 
@@ -173,22 +171,15 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 				Tool:           toolUse.Name,
 				Args:           toolUse.Input,
 				Injected:       false,
+				ModelThoughts:  thoughtRecords,
 			})
-
-			// Associate the model thoughts with this tool call.
-			for _, thought := range thoughtRecords {
-				thought.ProviderToolCallID = toolUse.ID
-			}
+			// Clear after first use to avoid duplicating across
+			// multiple tool calls in the same message.
+			thoughtRecords = nil
 		}
 
-		// If no injected tool calls, persist thoughts and we're done.
+		// If no injected tool calls, we're done.
 		if len(pendingToolCalls) == 0 {
-			for _, thought := range thoughtRecords {
-				if thought.ProviderToolCallID == "" {
-					continue
-				}
-				_ = i.recorder.RecordModelThought(ctx, thought)
-			}
 			break
 		}
 
@@ -223,12 +214,11 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 				Args:            tc.Input,
 				Injected:        true,
 				InvocationError: err,
+				ModelThoughts:   thoughtRecords,
 			})
-
-			// Associate the model thoughts with this tool call.
-			for _, thought := range thoughtRecords {
-				thought.ProviderToolCallID = tc.ID
-			}
+			// Clear after first use to avoid duplicating across
+			// multiple tool calls in the same message.
+			thoughtRecords = nil
 
 			if err != nil {
 				// Always provide a tool_result even if the tool call failed
@@ -315,14 +305,6 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 			}
 		}
 
-		// Only persist thoughts that are associated to a tool call.
-		for _, thought := range thoughtRecords {
-			if thought.ProviderToolCallID == "" {
-				continue
-			}
-			_ = i.recorder.RecordModelThought(ctx, thought)
-		}
-
 		// Sync the raw payload with updated messages so that withBody()
 		// sends the updated payload on the next iteration.
 		if err := i.syncPayloadMessages(messages.Messages); err != nil {
diff --git a/intercept/messages/streaming.go b/intercept/messages/streaming.go
index f9b6525f..949401f9 100644
--- a/intercept/messages/streaming.go
+++ b/intercept/messages/streaming.go
@@ -255,18 +255,16 @@ newStream:
 
 				// Capture any thinking blocks that were returned.
 				var thoughtRecords []*recorder.ModelThoughtRecord
-				if !i.isSmallFastModel() { // TODO: remove.
-					for _, block := range message.Content {
-						switch variant := block.AsAny().(type) {
-						case anthropic.ThinkingBlock:
-							thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{
-								InterceptionID: i.ID().String(),
-								Content:        variant.Thinking,
-							})
-						case anthropic.RedactedThinkingBlock:
-							// For redacted thinking, there's nothing useful we can capture.
-							continue
-						}
+				for _, block := range message.Content {
+					switch variant := block.AsAny().(type) {
+					case anthropic.ThinkingBlock:
+						thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{
+							Content:   variant.Thinking,
+							CreatedAt: time.Now(),
+						})
+					case anthropic.RedactedThinkingBlock:
+						// For redacted thinking, there's nothing useful we can capture.
+						continue
 					}
 				}
 
@@ -322,12 +320,11 @@ newStream:
 							Args:            input,
 							Injected:        true,
 							InvocationError: err,
+							ModelThoughts:   thoughtRecords,
 						})
-
-						// Associate the model thoughts with this tool call.
-						for _, thought := range thoughtRecords {
-							thought.ProviderToolCallID = id
-						}
+						// Clear after first use to avoid duplicating across
+						// multiple tool calls in the same message.
+						thoughtRecords = nil
 
 						if err != nil {
 							// Always provide a tool_result even if the tool call failed
@@ -413,15 +410,6 @@ newStream:
 						}
 					}
 
-					// Only persist thoughts that are associated to a tool call.
-					for _, thought := range thoughtRecords {
-						if thought.ProviderToolCallID == "" {
-							continue
-						}
-
-						_ = i.recorder.RecordModelThought(streamCtx, thought)
-					}
-
 					// Sync the raw payload with updated messages so that withBody()
 					// sends the updated payload on the next iteration.
 					if syncErr := i.syncPayloadMessages(messages.Messages); syncErr != nil {
@@ -448,23 +436,13 @@ newStream:
 								Tool:           variant.Name,
 								Args:           variant.Input,
 								Injected:       false,
+								ModelThoughts:  thoughtRecords,
 							})
-
-							// Associate the model thoughts with this tool call.
-							for _, thought := range thoughtRecords {
-								thought.ProviderToolCallID = variant.ID
-							}
+							// Clear after first use to avoid duplicating across
+							// multiple tool calls in the same message.
+							thoughtRecords = nil
 						}
 					}
-
-					// Only persist thoughts that are associated to a tool call.
-					for _, thought := range thoughtRecords {
-						if thought.ProviderToolCallID == "" {
-							continue
-						}
-
-						_ = i.recorder.RecordModelThought(streamCtx, thought)
-					}
 				}
 			}
 
diff --git a/internal/integrationtest/bridge_test.go b/internal/integrationtest/bridge_test.go
index 4956e8df..4ebcc997 100644
--- a/internal/integrationtest/bridge_test.go
+++ b/internal/integrationtest/bridge_test.go
@@ -186,20 +186,17 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 					assert.Contains(t, sp.AllEvents(), "message_stop")
 				}
 
-				// Verify model thoughts were captured and associated with the tool call.
-				thoughts := recorderClient.RecordedModelThoughts()
-				require.Len(t, thoughts, 1)
-				assert.Contains(t, thoughts[0].Content, "The user wants me to read")
-				assert.Contains(t, thoughts[0].Content, tc.expectedThinkingSubstr)
-				assert.NotEmpty(t, thoughts[0].InterceptionID)
-				assert.Equal(t, tc.expectedToolCallID, thoughts[0].ProviderToolCallID)
-
-				// Verify tool usage was also recorded.
+				// Verify tool usage was recorded with associated model thoughts.
 				toolUsages := recorderClient.RecordedToolUsages()
 				require.Len(t, toolUsages, 1)
 				assert.Equal(t, "Read", toolUsages[0].Tool)
 				assert.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID)
 
+				// Model thoughts should be embedded in the tool usage record.
+				require.Len(t, toolUsages[0].ModelThoughts, 1)
+				assert.Contains(t, toolUsages[0].ModelThoughts[0].Content, "The user wants me to read")
+				assert.Contains(t, toolUsages[0].ModelThoughts[0].Content, tc.expectedThinkingSubstr)
+
 				recorderClient.VerifyAllInterceptionsEnded(t)
 			})
 		}
@@ -241,9 +238,10 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 		sp := aibridge.NewSSEParser()
 		require.NoError(t, sp.Parse(resp.Body))
 
-		// No thoughts should be recorded when there are no tool calls.
-		thoughts := recorderClient.RecordedModelThoughts()
-		assert.Empty(t, thoughts)
+		// No tool usages (and therefore no thoughts) should be recorded
+		// when there are no tool calls.
+		toolUsages := recorderClient.RecordedToolUsages()
+		assert.Empty(t, toolUsages)
 
 		recorderClient.VerifyAllInterceptionsEnded(t)
 	})
diff --git a/internal/integrationtest/trace_test.go b/internal/integrationtest/trace_test.go
index bdfb7f7f..88bec31c 100644
--- a/internal/integrationtest/trace_test.go
+++ b/internal/integrationtest/trace_test.go
@@ -51,7 +51,6 @@ func TestTraceAnthropic(t *testing.T) {
 		{"Intercept.RecordPromptUsage", 1, codes.Unset},
 		{"Intercept.RecordTokenUsage", 1, codes.Unset},
 		{"Intercept.RecordToolUsage", 1, codes.Unset},
-		{"Intercept.RecordModelThought", 1, codes.Unset},
 		{"Intercept.ProcessRequest.Upstream", 1, codes.Unset},
 	}
 
@@ -64,7 +63,6 @@ func TestTraceAnthropic(t *testing.T) {
 		{"Intercept.RecordPromptUsage", 1, codes.Unset},
 		{"Intercept.RecordTokenUsage", 2, codes.Unset},
 		{"Intercept.RecordToolUsage", 1, codes.Unset},
-		{"Intercept.RecordModelThought", 1, codes.Unset},
 		{"Intercept.ProcessRequest.Upstream", 1, codes.Unset},
 	}
 
diff --git a/internal/testutil/mock_recorder.go b/internal/testutil/mock_recorder.go
index 55eafcd6..09bcac39 100644
--- a/internal/testutil/mock_recorder.go
+++ b/internal/testutil/mock_recorder.go
@@ -20,7 +20,6 @@ type MockRecorder struct {
 	tokenUsages      []*recorder.TokenUsageRecord
 	userPrompts      []*recorder.PromptUsageRecord
 	toolUsages       []*recorder.ToolUsageRecord
-	modelThoughts    []*recorder.ModelThoughtRecord
 	interceptionsEnd map[string]*recorder.InterceptionRecordEnded
 }
 
@@ -65,13 +64,6 @@ func (m *MockRecorder) RecordToolUsage(ctx context.Context, req *recorder.ToolUs
 	return nil
 }
 
-func (m *MockRecorder) RecordModelThought(ctx context.Context, req *recorder.ModelThoughtRecord) error {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-	m.modelThoughts = append(m.modelThoughts, req)
-	return nil
-}
-
 // RecordedTokenUsages returns a copy of recorded token usages in a thread-safe manner.
 // Note: This is a shallow clone - the slice is copied but the pointers reference the
 // same underlying records. This is sufficient for our test assertions which only read
@@ -136,13 +128,6 @@ func (m *MockRecorder) ToolUsages() []*recorder.ToolUsageRecord {
 	return m.toolUsages
 }
 
-// RecordedModelThoughts returns a copy of recorded model thoughts in a thread-safe manner.
-func (m *MockRecorder) RecordedModelThoughts() []*recorder.ModelThoughtRecord {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-	return slices.Clone(m.modelThoughts)
-}
-
 // RecordedInterceptionEnd returns the stored InterceptionRecordEnded for the
 // given interception ID, or nil if not found.
 func (m *MockRecorder) RecordedInterceptionEnd(id string) *recorder.InterceptionRecordEnded {
diff --git a/recorder/recorder.go b/recorder/recorder.go
index 3bd657f9..6e37b632 100644
--- a/recorder/recorder.go
+++ b/recorder/recorder.go
@@ -116,24 +116,6 @@ func (r *RecorderWrapper) RecordToolUsage(ctx context.Context, req *ToolUsageRec
 	return err
 }
 
-func (r *RecorderWrapper) RecordModelThought(ctx context.Context, req *ModelThoughtRecord) (outErr error) {
-	ctx, span := r.tracer.Start(ctx, "Intercept.RecordModelThought", trace.WithAttributes(tracing.InterceptionAttributesFromContext(ctx)...))
-	defer tracing.EndSpanErr(span, &outErr)
-
-	client, err := r.clientFn()
-	if err != nil {
-		return fmt.Errorf("acquire client: %w", err)
-	}
-
-	req.CreatedAt = time.Now()
-	if err = client.RecordModelThought(ctx, req); err == nil {
-		return nil
-	}
-
-	r.logger.Warn(ctx, "failed to record model thought", slog.Error(err), slog.F("interception_id", req.InterceptionID))
-	return err
-}
-
 func NewRecorder(logger slog.Logger, tracer trace.Tracer, clientFn func() (Recorder, error)) *RecorderWrapper {
 	return &RecorderWrapper{
 		logger:   logger,
@@ -277,22 +259,6 @@ func (a *AsyncRecorder) RecordToolUsage(ctx context.Context, req *ToolUsageRecor
 	return nil // Caller is not interested in error.
 }
 
-func (a *AsyncRecorder) RecordModelThought(ctx context.Context, req *ModelThoughtRecord) error {
-	a.wg.Add(1)
-	go func() {
-		defer a.wg.Done()
-		timedCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), a.timeout)
-		defer cancel()
-
-		err := a.wrapped.RecordModelThought(timedCtx, req)
-		if err != nil {
-			a.logger.Warn(timedCtx, "failed to record model thought", slog.Error(err), slog.F("payload", req))
-		}
-	}()
-
-	return nil // Caller is not interested in error.
-}
-
 func (a *AsyncRecorder) Wait() {
 	a.wg.Wait()
 }
diff --git a/recorder/types.go b/recorder/types.go
index 609e7142..d3cbaf73 100644
--- a/recorder/types.go
+++ b/recorder/types.go
@@ -18,9 +18,8 @@ type Recorder interface {
 	// RecordPromptUsage records the prompts used in an interception with an upstream AI provider.
 	RecordPromptUsage(ctx context.Context, req *PromptUsageRecord) error
 	// RecordToolUsage records the tools used in an interception with an upstream AI provider.
+	// Any associated model thoughts should be included in the ToolUsageRecord.
 	RecordToolUsage(ctx context.Context, req *ToolUsageRecord) error
-	// RecordModelThought records the reasoning/thinking produced in an interception with an upstream AI provider.
-	RecordModelThought(ctx context.Context, req *ModelThoughtRecord) error
 }
 
 type ToolArgs any
@@ -74,12 +73,11 @@ type ToolUsageRecord struct {
 	InvocationError error
 	Metadata        Metadata
 	CreatedAt       time.Time
+	ModelThoughts   []*ModelThoughtRecord
 }
 
 type ModelThoughtRecord struct {
-	InterceptionID string
-	ProviderToolCallID string
-	Content        string
-	Metadata       Metadata
-	CreatedAt      time.Time
+	Content   string
+	Metadata  Metadata
+	CreatedAt time.Time
 }

From daaec9fe2500ccf298d7558d47ad60159dde10d3 Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Fri, 6 Mar 2026 14:18:23 +0200
Subject: [PATCH 03/14] feat: capture responses reasoning

Signed-off-by: Danny Kopping <danny@coder.com>
---
 fixtures/anthropic/simple.txtar               | 69 ++++++++-------
 .../blocking/single_builtin_tool.txtar        | 11 +++
 .../responses/streaming/builtin_tool.txtar    | 44 +++++++---
 .../openai/responses/streaming/simple.txtar   | 64 +++++++++-----
 intercept/responses/base.go                   | 35 ++++++++
 intercept/responses/injected_tools.go         | 11 ++-
 internal/integrationtest/bridge_test.go       | 50 +++--------
 internal/integrationtest/responses_test.go    | 84 +++++++++++++++++++
 8 files changed, 262 insertions(+), 106 deletions(-)

diff --git a/fixtures/anthropic/simple.txtar b/fixtures/anthropic/simple.txtar
index f1300b7b..235138cc 100644
--- a/fixtures/anthropic/simple.txtar
+++ b/fixtures/anthropic/simple.txtar
@@ -23,91 +23,100 @@ event: message_start
 data: {"type":"message_start","message":{"id":"msg_01Pvyf26bY17RcjmWfJsXGBn","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":18,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":1,"service_tier":"standard"}}       }
 
 event: content_block_start
-data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}  }
+data: {"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"This is a classic philosophical question about medieval scholasticism. I'll give a thoughtful answer."}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":0}
+
+event: content_block_start
+data: {"type":"content_block_start","index":1,"content_block":{"type":"text","text":""}  }
 
 event: ping
 data: {"type": "ping"}
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"This"} }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"This"} }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" is a famous philosophical question often used to illustrate medieval"}}
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" is a famous philosophical question often used to illustrate medieval"}}
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" scholastic debates that seem pointless or ov"}            }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" scholastic debates that seem pointless or ov"}            }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"erly abstract. The question \"How many angels can dance on the head of"}           }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"erly abstract. The question \"How many angels can dance on the head of"}           }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" a pin?\" is typically cited as an example of us"}              }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" a pin?\" is typically cited as an example of us"}              }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"eless speculation.\n\nHistorically, medieval theolog"}  }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"eless speculation.\n\nHistorically, medieval theolog"}  }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"ians did debate the nature of angels -"}             }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"ians did debate the nature of angels -"}             }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" whether they were incorporeal beings, how"}}
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" whether they were incorporeal beings, how"}}
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" they occupied space, and whether multiple angels could exist"}          }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" they occupied space, and whether multiple angels could exist"}          }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" in the same location. However, there"}  }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" in the same location. However, there"}  }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s little evidence they literally"}     }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"'s little evidence they literally"}     }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" debated dancing angels on pinheads.\n\nThe question has"}    }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" debated dancing angels on pinheads.\n\nThe question has"}    }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" no factual answer since it depends on assumptions about:"}}
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" no factual answer since it depends on assumptions about:"}}
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n- The existence and nature of angels\n- Whether"}      }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"\n- The existence and nature of angels\n- Whether"}      }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" incorporeal beings occupy physical space\n- What"}   }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" incorporeal beings occupy physical space\n- What"}   }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" constitutes \"dancing\" for a spiritual"}     }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" constitutes \"dancing\" for a spiritual"}     }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" entity\n- The size of both the"}            }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" entity\n- The size of both the"}            }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" pin and the angels\n\nIt's become a metaph"}         }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" pin and the angels\n\nIt's become a metaph"}         }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"or for overthinking trivial matters"}    }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"or for overthinking trivial matters"}    }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" or getting lost in theoretical discussions disconnected from practical reality."}   }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" or getting lost in theoretical discussions disconnected from practical reality."}   }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" Some use it to critique certain types of academic"}          }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" Some use it to critique certain types of academic"}          }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" or theological debate, while others defen"}   }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" or theological debate, while others defen"}   }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"d the value of exploring fundamental questions about existence an"}        }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"d the value of exploring fundamental questions about existence an"}        }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"d metaphysics.\n\nSo while u"}               }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"d metaphysics.\n\nSo while u"}               }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"nanswerable literally, it serves as an interesting lens"}          }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"nanswerable literally, it serves as an interesting lens"}          }
 
 event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" for discussing the nature of philosophical inquiry itself."}       }
+data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" for discussing the nature of philosophical inquiry itself."}       }
 
 event: content_block_stop
-data: {"type":"content_block_stop","index":0  }
+data: {"type":"content_block_stop","index":1  }
 
 event: message_delta
 data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":240}     }
@@ -122,6 +131,10 @@ data: {"type":"message_stop"            }
   "role": "assistant",
   "model": "claude-sonnet-4-20250514",
   "content": [
+    {
+      "type": "thinking",
+      "thinking": "This is a classic philosophical question about medieval scholasticism. I'll give a thoughtful answer."
+    },
     {
       "type": "text",
       "text": "This is a famous philosophical question, often called \"How many angels can dance on the head of a pin?\" It's typically used to represent pointless or overly abstract theological debates.\n\nThe question doesn't have a literal answer because:\n\n1. **Historical context**: It's often attributed to medieval scholastic philosophers, though there's little evidence they actually debated this exact question. It became a popular way to mock what some saw as useless academic arguments.\n\n2. **Philosophical purpose**: The question highlights the difficulty of discussing non-physical beings (angels) in physical terms (space on a pinhead).\n\n3. **Different interpretations**: \n   - If angels are purely spiritual, they might not take up physical space at all\n   - If they do occupy space, we'd need to know their \"size\"\n   - The question might be asking about the nature of space, matter, and spirit\n\nSo the real answer is that it's not meant to be answered literally - it's a thought experiment about the limits of rational inquiry and the sometimes absurd directions theological speculation can take.\n\nWould you like to explore the philosophical implications behind this question, or were you thinking about it in a different context?"
diff --git a/fixtures/openai/responses/blocking/single_builtin_tool.txtar b/fixtures/openai/responses/blocking/single_builtin_tool.txtar
index f41bd3cc..14299ff3 100644
--- a/fixtures/openai/responses/blocking/single_builtin_tool.txtar
+++ b/fixtures/openai/responses/blocking/single_builtin_tool.txtar
@@ -50,6 +50,17 @@
   "max_tool_calls": null,
   "model": "gpt-4.1-2025-04-14",
   "output": [
+    {
+      "id": "rs_0da6045a8b68fa5200695fa23e100081a19bf68887d47ae93d",
+      "type": "reasoning",
+      "status": "completed",
+      "summary": [
+        {
+          "type": "summary_text",
+          "text": "The user wants to add 3 and 5. Let me call the add function."
+        }
+      ]
+    },
     {
       "id": "fc_0da6045a8b68fa5200695fa23e198081a19bf68887d47ae93d",
       "type": "function_call",
diff --git a/fixtures/openai/responses/streaming/builtin_tool.txtar b/fixtures/openai/responses/streaming/builtin_tool.txtar
index b6a7a0a5..98793f3b 100644
--- a/fixtures/openai/responses/streaming/builtin_tool.txtar
+++ b/fixtures/openai/responses/streaming/builtin_tool.txtar
@@ -40,41 +40,59 @@ event: response.in_progress
 data: {"type":"response.in_progress","response":{"id":"resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458","object":"response","created_at":1767875312,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
 
 event: response.output_item.added
-data: {"type":"response.output_item.added","item":{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"in_progress","arguments":"","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"},"output_index":0,"sequence_number":2}
+data: {"type":"response.output_item.added","item":{"id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","type":"reasoning","status":"in_progress","summary":[]},"output_index":0,"sequence_number":2}
+
+event: response.reasoning_summary_part.added
+data: {"type":"response.reasoning_summary_part.added","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"part":{"type":"summary_text","text":""},"summary_index":0,"sequence_number":3}
+
+event: response.reasoning_summary_text.delta
+data: {"type":"response.reasoning_summary_text.delta","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"summary_index":0,"delta":"The user wants to add 3 and 5. Let me call the add function.","sequence_number":4}
+
+event: response.reasoning_summary_text.done
+data: {"type":"response.reasoning_summary_text.done","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"summary_index":0,"text":"The user wants to add 3 and 5. Let me call the add function.","sequence_number":5}
+
+event: response.reasoning_summary_part.done
+data: {"type":"response.reasoning_summary_part.done","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"part":{"type":"summary_text","text":"The user wants to add 3 and 5. Let me call the add function."},"summary_index":0,"sequence_number":6}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants to add 3 and 5. Let me call the add function."}]},"output_index":0,"sequence_number":7}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"in_progress","arguments":"","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"},"output_index":1,"sequence_number":8}
 
 event: response.function_call_arguments.delta
-data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"gWZHP8i4lSgQYT","output_index":0,"sequence_number":3}
+data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"gWZHP8i4lSgQYT","output_index":1,"sequence_number":9}
 
 event: response.function_call_arguments.delta
-data: {"type":"response.function_call_arguments.delta","delta":"a","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"yC1iubuqc098ZSH","output_index":0,"sequence_number":4}
+data: {"type":"response.function_call_arguments.delta","delta":"a","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"yC1iubuqc098ZSH","output_index":1,"sequence_number":10}
 
 event: response.function_call_arguments.delta
-data: {"type":"response.function_call_arguments.delta","delta":"\":","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"G17nNbWUcJkqA2","output_index":0,"sequence_number":5}
+data: {"type":"response.function_call_arguments.delta","delta":"\":","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"G17nNbWUcJkqA2","output_index":1,"sequence_number":11}
 
 event: response.function_call_arguments.delta
-data: {"type":"response.function_call_arguments.delta","delta":"3","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"Mj71L4eeLZbIEFU","output_index":0,"sequence_number":6}
+data: {"type":"response.function_call_arguments.delta","delta":"3","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"Mj71L4eeLZbIEFU","output_index":1,"sequence_number":12}
 
 event: response.function_call_arguments.delta
-data: {"type":"response.function_call_arguments.delta","delta":",\"","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"ZchcCauvlPtVc7","output_index":0,"sequence_number":7}
+data: {"type":"response.function_call_arguments.delta","delta":",\"","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"ZchcCauvlPtVc7","output_index":1,"sequence_number":13}
 
 event: response.function_call_arguments.delta
-data: {"type":"response.function_call_arguments.delta","delta":"b","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"gWLYMrsBI3ZHKVP","output_index":0,"sequence_number":8}
+data: {"type":"response.function_call_arguments.delta","delta":"b","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"gWLYMrsBI3ZHKVP","output_index":1,"sequence_number":14}
 
 event: response.function_call_arguments.delta
-data: {"type":"response.function_call_arguments.delta","delta":"\":","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"n4iUzpnbPE4DnO","output_index":0,"sequence_number":9}
+data: {"type":"response.function_call_arguments.delta","delta":"\":","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"n4iUzpnbPE4DnO","output_index":1,"sequence_number":15}
 
 event: response.function_call_arguments.delta
-data: {"type":"response.function_call_arguments.delta","delta":"5","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"23mO3rxkXqDOi6g","output_index":0,"sequence_number":10}
+data: {"type":"response.function_call_arguments.delta","delta":"5","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"23mO3rxkXqDOi6g","output_index":1,"sequence_number":16}
 
 event: response.function_call_arguments.delta
-data: {"type":"response.function_call_arguments.delta","delta":"}","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"AQnBsNz7GqkdylH","output_index":0,"sequence_number":11}
+data: {"type":"response.function_call_arguments.delta","delta":"}","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"AQnBsNz7GqkdylH","output_index":1,"sequence_number":17}
 
 event: response.function_call_arguments.done
-data: {"type":"response.function_call_arguments.done","arguments":"{\"a\":3,\"b\":5}","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","output_index":0,"sequence_number":12}
+data: {"type":"response.function_call_arguments.done","arguments":"{\"a\":3,\"b\":5}","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","output_index":1,"sequence_number":18}
 
 event: response.output_item.done
-data: {"type":"response.output_item.done","item":{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"},"output_index":0,"sequence_number":13}
+data: {"type":"response.output_item.done","item":{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"},"output_index":1,"sequence_number":19}
 
 event: response.completed
-data: {"type":"response.completed","response":{"id":"resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458","object":"response","created_at":1767875312,"status":"completed","background":false,"completed_at":1767875312,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":58,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":76},"user":null,"metadata":{}},"sequence_number":14}
+data: {"type":"response.completed","response":{"id":"resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458","object":"response","created_at":1767875312,"status":"completed","background":false,"completed_at":1767875312,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[{"id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants to add 3 and 5. Let me call the add function."}]},{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":58,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":76},"user":null,"metadata":{}},"sequence_number":20}
 
diff --git a/fixtures/openai/responses/streaming/simple.txtar b/fixtures/openai/responses/streaming/simple.txtar
index d86aa6e4..c8736f9d 100644
--- a/fixtures/openai/responses/streaming/simple.txtar
+++ b/fixtures/openai/responses/streaming/simple.txtar
@@ -13,71 +13,89 @@ event: response.in_progress
 data: {"type":"response.in_progress","response":{"id":"resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000","object":"response","created_at":1767874658,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
 
 event: response.output_item.added
-data: {"type":"response.output_item.added","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2}
+data: {"type":"response.output_item.added","item":{"id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","type":"reasoning","status":"in_progress","summary":[]},"output_index":0,"sequence_number":2}
+
+event: response.reasoning_summary_part.added
+data: {"type":"response.reasoning_summary_part.added","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"summary_text","text":""},"summary_index":0,"sequence_number":3}
+
+event: response.reasoning_summary_text.delta
+data: {"type":"response.reasoning_summary_text.delta","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"summary_index":0,"delta":"The user wants a joke. I will tell a classic scarecrow joke.","sequence_number":4}
+
+event: response.reasoning_summary_text.done
+data: {"type":"response.reasoning_summary_text.done","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"summary_index":0,"text":"The user wants a joke. I will tell a classic scarecrow joke.","sequence_number":5}
+
+event: response.reasoning_summary_part.done
+data: {"type":"response.reasoning_summary_part.done","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"summary_text","text":"The user wants a joke. I will tell a classic scarecrow joke."},"summary_index":0,"sequence_number":6}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants a joke. I will tell a classic scarecrow joke."}]},"output_index":0,"sequence_number":7}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":1,"sequence_number":8}
 
 event: response.content_part.added
-data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3}
+data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":1,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":9}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":"Why","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N16SG5UiLncOU","output_index":0,"sequence_number":4}
+data: {"type":"response.output_text.delta","content_index":0,"delta":"Why","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N16SG5UiLncOU","output_index":1,"sequence_number":10}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" did","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"OpojJ3pv0h55","output_index":0,"sequence_number":5}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" did","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"OpojJ3pv0h55","output_index":1,"sequence_number":11}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" the","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"11RCrnBxLo5x","output_index":0,"sequence_number":6}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" the","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"11RCrnBxLo5x","output_index":1,"sequence_number":12}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" scare","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"QZrRBlk6BV","output_index":0,"sequence_number":7}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" scare","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"QZrRBlk6BV","output_index":1,"sequence_number":13}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":"crow","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"gp7F8IVupiHG","output_index":0,"sequence_number":8}
+data: {"type":"response.output_text.delta","content_index":0,"delta":"crow","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"gp7F8IVupiHG","output_index":1,"sequence_number":14}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" win","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"uKq4X8mT1jl9","output_index":0,"sequence_number":9}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" win","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"uKq4X8mT1jl9","output_index":1,"sequence_number":15}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" an","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"2Ox5JzaAsJHuT","output_index":0,"sequence_number":10}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" an","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"2Ox5JzaAsJHuT","output_index":1,"sequence_number":16}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" award","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"ZOQbZabNAQ","output_index":0,"sequence_number":11}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" award","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"ZOQbZabNAQ","output_index":1,"sequence_number":17}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":"?\n\n","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N2dSd0FHBxooR","output_index":0,"sequence_number":12}
+data: {"type":"response.output_text.delta","content_index":0,"delta":"?\n\n","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N2dSd0FHBxooR","output_index":1,"sequence_number":18}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":"Because","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"LZ1O4laHt","output_index":0,"sequence_number":13}
+data: {"type":"response.output_text.delta","content_index":0,"delta":"Because","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"LZ1O4laHt","output_index":1,"sequence_number":19}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" he","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dqcS6ePaMvxMD","output_index":0,"sequence_number":14}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" he","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dqcS6ePaMvxMD","output_index":1,"sequence_number":20}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" was","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"nR6CtC7MUsWW","output_index":0,"sequence_number":15}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" was","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"nR6CtC7MUsWW","output_index":1,"sequence_number":21}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" outstanding","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dNVG","output_index":0,"sequence_number":16}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" outstanding","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dNVG","output_index":1,"sequence_number":22}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" in","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"P7w4jjOcdVOla","output_index":0,"sequence_number":17}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" in","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"P7w4jjOcdVOla","output_index":1,"sequence_number":23}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" his","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"u9dg4RLIld4e","output_index":0,"sequence_number":18}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" his","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"u9dg4RLIld4e","output_index":1,"sequence_number":24}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" field","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"qefuqzOCOy","output_index":0,"sequence_number":19}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" field","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"qefuqzOCOy","output_index":1,"sequence_number":25}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":"!","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"DT9j4dSh0xyJdxU","output_index":0,"sequence_number":20}
+data: {"type":"response.output_text.delta","content_index":0,"delta":"!","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"DT9j4dSh0xyJdxU","output_index":1,"sequence_number":26}
 
 event: response.output_text.done
-data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"output_index":0,"sequence_number":21,"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}
+data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"output_index":1,"sequence_number":27,"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}
 
 event: response.content_part.done
-data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"},"sequence_number":22}
+data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":1,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"},"sequence_number":28}
 
 event: response.output_item.done
-data: {"type":"response.output_item.done","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"},"output_index":0,"sequence_number":23}
+data: {"type":"response.output_item.done","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"},"output_index":1,"sequence_number":29}
 
 event: response.completed
-data: {"type":"response.completed","response":{"id":"resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000","object":"response","created_at":1767874658,"status":"completed","background":false,"completed_at":1767874660,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":11,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":29},"user":null,"metadata":{}},"sequence_number":24}
+data: {"type":"response.completed","response":{"id":"resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000","object":"response","created_at":1767874658,"status":"completed","background":false,"completed_at":1767874660,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[{"id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants a joke. I will tell a classic scarecrow joke."}]},{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":11,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":29},"user":null,"metadata":{}},"sequence_number":30}
 
diff --git a/intercept/responses/base.go b/intercept/responses/base.go
index 69db3878..0a889bb5 100644
--- a/intercept/responses/base.go
+++ b/intercept/responses/base.go
@@ -260,6 +260,9 @@ func (i *responsesInterceptionBase) recordNonInjectedToolUsage(ctx context.Conte
 		return
 	}
 
+	// Capture any reasoning items from the response output as model thoughts.
+	thoughtRecords := i.extractModelThoughts(response)
+
 	for _, item := range response.Output {
 		var args recorder.ToolArgs
 
@@ -280,9 +283,13 @@ func (i *responsesInterceptionBase) recordNonInjectedToolUsage(ctx context.Conte
 			Tool:           item.Name,
 			Args:           args,
 			Injected:       false,
+			ModelThoughts:  thoughtRecords,
 		}); err != nil {
 			i.logger.Warn(ctx, "failed to record tool usage", slog.Error(err), slog.F("tool", item.Name))
 		}
+		// Clear after first use to avoid duplicating across
+		// multiple tool calls in the same message.
+		thoughtRecords = nil
 	}
 }
 
@@ -326,6 +333,34 @@ func (i *responsesInterceptionBase) recordTokenUsage(ctx context.Context, respon
 	}
 }
 
+// extractModelThoughts extracts reasoning summary items from response output
+// and converts them to ModelThoughtRecords for association with tool usage.
+func (i *responsesInterceptionBase) extractModelThoughts(response *responses.Response) []*recorder.ModelThoughtRecord {
+	if response == nil {
+		return nil
+	}
+
+	var thoughts []*recorder.ModelThoughtRecord
+	for _, item := range response.Output {
+		if item.Type != string(constant.ValueOf[constant.Reasoning]()) {
+			continue
+		}
+
+		reasoning := item.AsReasoning()
+		for _, summary := range reasoning.Summary {
+			if summary.Text == "" {
+				continue
+			}
+			thoughts = append(thoughts, &recorder.ModelThoughtRecord{
+				Content:   summary.Text,
+				CreatedAt: time.Now(),
+			})
+		}
+	}
+
+	return thoughts
+}
+
 func (i *responsesInterceptionBase) hasInjectableTools() bool {
 	return i.mcpProxy != nil && len(i.mcpProxy.ListTools()) > 0
 }
diff --git a/intercept/responses/injected_tools.go b/intercept/responses/injected_tools.go
index c3934fa3..db81941f 100644
--- a/intercept/responses/injected_tools.go
+++ b/intercept/responses/injected_tools.go
@@ -109,9 +109,15 @@ func (i *responsesInterceptionBase) handleInjectedToolCalls(ctx context.Context,
 		return nil, nil
 	}
 
+	// Capture any reasoning items from the response output as model thoughts.
+	thoughtRecords := i.extractModelThoughts(response)
+
 	var results []responses.ResponseInputItemUnionParam
 	for _, fc := range pending {
-		results = append(results, i.invokeInjectedTool(ctx, response.ID, fc))
+		results = append(results, i.invokeInjectedTool(ctx, response.ID, fc, thoughtRecords))
+		// Clear after first use to avoid duplicating across
+		// multiple tool calls in the same message.
+		thoughtRecords = nil
 	}
 
 	return results, nil
@@ -196,7 +202,7 @@ func (i *responsesInterceptionBase) getPendingInjectedToolCalls(response *respon
 	return calls
 }
 
-func (i *responsesInterceptionBase) invokeInjectedTool(ctx context.Context, responseID string, fc responses.ResponseFunctionToolCall) responses.ResponseInputItemUnionParam {
+func (i *responsesInterceptionBase) invokeInjectedTool(ctx context.Context, responseID string, fc responses.ResponseFunctionToolCall, thoughtRecords []*recorder.ModelThoughtRecord) responses.ResponseInputItemUnionParam {
 	tool := i.mcpProxy.GetTool(fc.Name)
 	if tool == nil {
 		return responses.ResponseInputItemParamOfFunctionCallOutput(fc.CallID, fmt.Sprintf("error: unknown injected function %q", fc.ID))
@@ -213,6 +219,7 @@ func (i *responsesInterceptionBase) invokeInjectedTool(ctx context.Context, resp
 		Args:            args,
 		Injected:        true,
 		InvocationError: err,
+		ModelThoughts:   thoughtRecords,
 	})
 
 	var output string
diff --git a/internal/integrationtest/bridge_test.go b/internal/integrationtest/bridge_test.go
index 4ebcc997..918cfdee 100644
--- a/internal/integrationtest/bridge_test.go
+++ b/internal/integrationtest/bridge_test.go
@@ -155,29 +155,14 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 				t.Cleanup(cancel)
 
 				fix := fixtures.Parse(t, fixtures.AntSingleBuiltinTool)
-				upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix))
-
-				recorderClient := &testutil.MockRecorder{}
-				logger := slogtest.Make(t, &slogtest.Options{}).Leveled(slog.LevelDebug)
-				providers := []aibridge.Provider{provider.NewAnthropic(anthropicCfg(upstream.URL, apiKey), nil)}
-				b, err := aibridge.NewRequestBridge(ctx, providers, recorderClient, mcp.NewServerProxyManager(nil, testTracer), logger, nil, testTracer)
-				require.NoError(t, err)
+				upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
 
-				mockSrv := httptest.NewUnstartedServer(b)
-				t.Cleanup(mockSrv.Close)
-				mockSrv.Config.BaseContext = func(_ net.Listener) context.Context {
-					return aibcontext.AsActor(ctx, userID, nil)
-				}
-				mockSrv.Start()
+				bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)
 
 				reqBody, err := sjson.SetBytes(fix.Request(), "stream", tc.streaming)
 				require.NoError(t, err)
-				req := createAnthropicMessagesReq(t, mockSrv.URL, reqBody)
-				client := &http.Client{}
-				resp, err := client.Do(req)
-				require.NoError(t, err)
+				resp := bridgeServer.makeRequest(t, http.MethodPost, pathAnthropicMessages, reqBody)
 				require.Equal(t, http.StatusOK, resp.StatusCode)
-				defer resp.Body.Close()
 
 				if tc.streaming {
 					sp := aibridge.NewSSEParser()
@@ -187,7 +172,7 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 				}
 
 				// Verify tool usage was recorded with associated model thoughts.
-				toolUsages := recorderClient.RecordedToolUsages()
+				toolUsages := bridgeServer.Recorder.RecordedToolUsages()
 				require.Len(t, toolUsages, 1)
 				assert.Equal(t, "Read", toolUsages[0].Tool)
 				assert.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID)
@@ -197,7 +182,7 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 				assert.Contains(t, toolUsages[0].ModelThoughts[0].Content, "The user wants me to read")
 				assert.Contains(t, toolUsages[0].ModelThoughts[0].Content, tc.expectedThinkingSubstr)
 
-				recorderClient.VerifyAllInterceptionsEnded(t)
+				bridgeServer.Recorder.VerifyAllInterceptionsEnded(t)
 			})
 		}
 	})
@@ -211,39 +196,24 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 		// Use the simple fixture which has no tool calls — any thinking blocks
 		// should not be persisted since they can't be associated with a tool call.
 		fix := fixtures.Parse(t, fixtures.AntSimple)
-		upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix))
-
-		recorderClient := &testutil.MockRecorder{}
-		logger := slogtest.Make(t, &slogtest.Options{}).Leveled(slog.LevelDebug)
-		providers := []aibridge.Provider{provider.NewAnthropic(anthropicCfg(upstream.URL, apiKey), nil)}
-		b, err := aibridge.NewRequestBridge(ctx, providers, recorderClient, mcp.NewServerProxyManager(nil, testTracer), logger, nil, testTracer)
-		require.NoError(t, err)
+		upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
 
-		mockSrv := httptest.NewUnstartedServer(b)
-		t.Cleanup(mockSrv.Close)
-		mockSrv.Config.BaseContext = func(_ net.Listener) context.Context {
-			return aibcontext.AsActor(ctx, userID, nil)
-		}
-		mockSrv.Start()
+		bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)
 
 		reqBody, err := sjson.SetBytes(fix.Request(), "stream", true)
 		require.NoError(t, err)
-		req := createAnthropicMessagesReq(t, mockSrv.URL, reqBody)
-		client := &http.Client{}
-		resp, err := client.Do(req)
-		require.NoError(t, err)
+		resp := bridgeServer.makeRequest(t, http.MethodPost, pathAnthropicMessages, reqBody)
 		require.Equal(t, http.StatusOK, resp.StatusCode)
-		defer resp.Body.Close()
 
 		sp := aibridge.NewSSEParser()
 		require.NoError(t, sp.Parse(resp.Body))
 
 		// No tool usages (and therefore no thoughts) should be recorded
 		// when there are no tool calls.
-		toolUsages := recorderClient.RecordedToolUsages()
+		toolUsages := bridgeServer.Recorder.RecordedToolUsages()
 		assert.Empty(t, toolUsages)
 
-		recorderClient.VerifyAllInterceptionsEnded(t)
+		bridgeServer.Recorder.VerifyAllInterceptionsEnded(t)
 	})
 }
 
diff --git a/internal/integrationtest/responses_test.go b/internal/integrationtest/responses_test.go
index eee1235f..0573e7dd 100644
--- a/internal/integrationtest/responses_test.go
+++ b/internal/integrationtest/responses_test.go
@@ -374,6 +374,7 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
 				require.Len(t, recordedTools, 1)
 				recordedTools[0].InterceptionID = tc.expectToolRecorded.InterceptionID // ignore interception id (interception id is not constant and response doesn't contain it)
 				recordedTools[0].CreatedAt = tc.expectToolRecorded.CreatedAt           // ignore time
+				recordedTools[0].ModelThoughts = tc.expectToolRecorded.ModelThoughts   // ignore model thoughts (tested separately)
 				require.Equal(t, tc.expectToolRecorded, recordedTools[0])
 			} else {
 				require.Empty(t, recordedTools)
@@ -936,6 +937,89 @@ func TestResponsesInjectedTool(t *testing.T) {
 	}
 }
 
+func TestResponsesModelThoughts(t *testing.T) {
+	t.Parallel()
+
+	t.Run("reasoning captured with builtin tool", func(t *testing.T) {
+		t.Parallel()
+
+		cases := []struct {
+			streaming          bool
+			expectedToolCallID string
+		}{
+			{
+				streaming:          false,
+				expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq",
+			},
+			{
+				streaming:          true,
+				expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t",
+			},
+		}
+
+		for _, tc := range cases {
+			t.Run(fmt.Sprintf("streaming=%v", tc.streaming), func(t *testing.T) {
+				t.Parallel()
+
+				ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
+				t.Cleanup(cancel)
+
+				var fix fixtures.Fixture
+				if tc.streaming {
+					fix = fixtures.Parse(t, fixtures.OaiResponsesStreamingBuiltinTool)
+				} else {
+					fix = fixtures.Parse(t, fixtures.OaiResponsesBlockingSingleBuiltinTool)
+				}
+				upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
+
+				bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)
+
+				resp := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, fix.Request())
+				require.Equal(t, http.StatusOK, resp.StatusCode)
+
+				_, err := io.ReadAll(resp.Body)
+				require.NoError(t, err)
+
+				// Verify tool usage was recorded with associated model thoughts.
+				toolUsages := bridgeServer.Recorder.RecordedToolUsages()
+				require.Len(t, toolUsages, 1)
+				require.Equal(t, "add", toolUsages[0].Tool)
+				require.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID)
+
+				// Model thoughts should be embedded in the tool usage record.
+				require.Len(t, toolUsages[0].ModelThoughts, 1)
+				require.Contains(t, toolUsages[0].ModelThoughts[0].Content, "The user wants to add 3 and 5")
+				require.Contains(t, toolUsages[0].ModelThoughts[0].Content, "Let me call the add function")
+			})
+		}
+	})
+
+	t.Run("no thoughts without tool calls", func(t *testing.T) {
+		t.Parallel()
+
+		ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
+		t.Cleanup(cancel)
+
+		// Use the simple fixture which has no tool calls — any reasoning
+		// should not be persisted since it can't be associated with a tool call.
+		fix := fixtures.Parse(t, fixtures.OaiResponsesStreamingSimple)
+		upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
+
+		bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)
+
+		resp := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, fix.Request())
+		require.Equal(t, http.StatusOK, resp.StatusCode)
+
+		_, err := io.ReadAll(resp.Body)
+		require.NoError(t, err)
+
+		// No tool usages (and therefore no thoughts) should be recorded
+		// when there are no tool calls.
+		toolUsages := bridgeServer.Recorder.RecordedToolUsages()
+		require.Empty(t, toolUsages)
+	})
+}
+
 func requireResponsesError(t *testing.T, code int, message string, body []byte) {
 	var respErr responses.Error
 	err := json.Unmarshal(body, &respErr)

From 1c9a526c150db92976bdf2c0ca2507b0238b4a22 Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Fri, 6 Mar 2026 15:54:11 +0200
Subject: [PATCH 04/14] chore: refactor tests

Signed-off-by: Danny Kopping <danny@coder.com>
---
 .../openai/responses/streaming/simple.txtar   | 64 +++++++------------
 internal/integrationtest/bridge_test.go       | 16 ++---
 internal/integrationtest/responses_test.go    |  2 +-
 3 files changed, 30 insertions(+), 52 deletions(-)

diff --git a/fixtures/openai/responses/streaming/simple.txtar b/fixtures/openai/responses/streaming/simple.txtar
index c8736f9d..d86aa6e4 100644
--- a/fixtures/openai/responses/streaming/simple.txtar
+++ b/fixtures/openai/responses/streaming/simple.txtar
@@ -13,89 +13,71 @@ event: response.in_progress
 data: {"type":"response.in_progress","response":{"id":"resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000","object":"response","created_at":1767874658,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
 
 event: response.output_item.added
-data: {"type":"response.output_item.added","item":{"id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","type":"reasoning","status":"in_progress","summary":[]},"output_index":0,"sequence_number":2}
-
-event: response.reasoning_summary_part.added
-data: {"type":"response.reasoning_summary_part.added","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"summary_text","text":""},"summary_index":0,"sequence_number":3}
-
-event: response.reasoning_summary_text.delta
-data: {"type":"response.reasoning_summary_text.delta","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"summary_index":0,"delta":"The user wants a joke. I will tell a classic scarecrow joke.","sequence_number":4}
-
-event: response.reasoning_summary_text.done
-data: {"type":"response.reasoning_summary_text.done","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"summary_index":0,"text":"The user wants a joke. I will tell a classic scarecrow joke.","sequence_number":5}
-
-event: response.reasoning_summary_part.done
-data: {"type":"response.reasoning_summary_part.done","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"summary_text","text":"The user wants a joke. I will tell a classic scarecrow joke."},"summary_index":0,"sequence_number":6}
-
-event: response.output_item.done
-data: {"type":"response.output_item.done","item":{"id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants a joke. I will tell a classic scarecrow joke."}]},"output_index":0,"sequence_number":7}
-
-event: response.output_item.added
-data: {"type":"response.output_item.added","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":1,"sequence_number":8}
+data: {"type":"response.output_item.added","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2}
 
 event: response.content_part.added
-data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":1,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":9}
+data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":"Why","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N16SG5UiLncOU","output_index":1,"sequence_number":10}
+data: {"type":"response.output_text.delta","content_index":0,"delta":"Why","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N16SG5UiLncOU","output_index":0,"sequence_number":4}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" did","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"OpojJ3pv0h55","output_index":1,"sequence_number":11}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" did","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"OpojJ3pv0h55","output_index":0,"sequence_number":5}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" the","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"11RCrnBxLo5x","output_index":1,"sequence_number":12}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" the","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"11RCrnBxLo5x","output_index":0,"sequence_number":6}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" scare","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"QZrRBlk6BV","output_index":1,"sequence_number":13}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" scare","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"QZrRBlk6BV","output_index":0,"sequence_number":7}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":"crow","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"gp7F8IVupiHG","output_index":1,"sequence_number":14}
+data: {"type":"response.output_text.delta","content_index":0,"delta":"crow","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"gp7F8IVupiHG","output_index":0,"sequence_number":8}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" win","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"uKq4X8mT1jl9","output_index":1,"sequence_number":15}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" win","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"uKq4X8mT1jl9","output_index":0,"sequence_number":9}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" an","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"2Ox5JzaAsJHuT","output_index":1,"sequence_number":16}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" an","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"2Ox5JzaAsJHuT","output_index":0,"sequence_number":10}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" award","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"ZOQbZabNAQ","output_index":1,"sequence_number":17}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" award","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"ZOQbZabNAQ","output_index":0,"sequence_number":11}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":"?\n\n","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N2dSd0FHBxooR","output_index":1,"sequence_number":18}
+data: {"type":"response.output_text.delta","content_index":0,"delta":"?\n\n","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N2dSd0FHBxooR","output_index":0,"sequence_number":12}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":"Because","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"LZ1O4laHt","output_index":1,"sequence_number":19}
+data: {"type":"response.output_text.delta","content_index":0,"delta":"Because","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"LZ1O4laHt","output_index":0,"sequence_number":13}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" he","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dqcS6ePaMvxMD","output_index":1,"sequence_number":20}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" he","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dqcS6ePaMvxMD","output_index":0,"sequence_number":14}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" was","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"nR6CtC7MUsWW","output_index":1,"sequence_number":21}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" was","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"nR6CtC7MUsWW","output_index":0,"sequence_number":15}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" outstanding","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dNVG","output_index":1,"sequence_number":22}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" outstanding","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dNVG","output_index":0,"sequence_number":16}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" in","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"P7w4jjOcdVOla","output_index":1,"sequence_number":23}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" in","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"P7w4jjOcdVOla","output_index":0,"sequence_number":17}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" his","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"u9dg4RLIld4e","output_index":1,"sequence_number":24}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" his","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"u9dg4RLIld4e","output_index":0,"sequence_number":18}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":" field","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"qefuqzOCOy","output_index":1,"sequence_number":25}
+data: {"type":"response.output_text.delta","content_index":0,"delta":" field","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"qefuqzOCOy","output_index":0,"sequence_number":19}
 
 event: response.output_text.delta
-data: {"type":"response.output_text.delta","content_index":0,"delta":"!","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"DT9j4dSh0xyJdxU","output_index":1,"sequence_number":26}
+data: {"type":"response.output_text.delta","content_index":0,"delta":"!","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"DT9j4dSh0xyJdxU","output_index":0,"sequence_number":20}
 
 event: response.output_text.done
-data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"output_index":1,"sequence_number":27,"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}
+data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"output_index":0,"sequence_number":21,"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}
 
 event: response.content_part.done
-data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":1,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"},"sequence_number":28}
+data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"},"sequence_number":22}
 
 event: response.output_item.done
-data: {"type":"response.output_item.done","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"},"output_index":1,"sequence_number":29}
+data: {"type":"response.output_item.done","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"},"output_index":0,"sequence_number":23}
 
 event: response.completed
-data: {"type":"response.completed","response":{"id":"resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000","object":"response","created_at":1767874658,"status":"completed","background":false,"completed_at":1767874660,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[{"id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants a joke. I will tell a classic scarecrow joke."}]},{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":11,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":29},"user":null,"metadata":{}},"sequence_number":30}
+data: {"type":"response.completed","response":{"id":"resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000","object":"response","created_at":1767874658,"status":"completed","background":false,"completed_at":1767874660,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":11,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":29},"user":null,"metadata":{}},"sequence_number":24}
 
diff --git a/internal/integrationtest/bridge_test.go b/internal/integrationtest/bridge_test.go
index 918cfdee..9cdf7ac1 100644
--- a/internal/integrationtest/bridge_test.go
+++ b/internal/integrationtest/bridge_test.go
@@ -131,19 +131,16 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 		t.Parallel()
 
 		cases := []struct {
-			streaming              bool
-			expectedToolCallID     string
-			expectedThinkingSubstr string
+			streaming          bool
+			expectedToolCallID string
 		}{
 			{
-				streaming:              true,
-				expectedToolCallID:     "toolu_01RX68weRSquLx6HUTj65iBo",
-				expectedThinkingSubstr: "Let me find and read it.",
+				streaming:          true,
+				expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo",
 			},
 			{
-				streaming:              false,
-				expectedToolCallID:     "toolu_01AusGgY5aKFhzWrFBv9JfHq",
-				expectedThinkingSubstr: "Let me find and read it.",
+				streaming:          false,
+				expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq",
 			},
 		}
 
@@ -180,7 +177,6 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 				// Model thoughts should be embedded in the tool usage record.
 				require.Len(t, toolUsages[0].ModelThoughts, 1)
 				assert.Contains(t, toolUsages[0].ModelThoughts[0].Content, "The user wants me to read")
-				assert.Contains(t, toolUsages[0].ModelThoughts[0].Content, tc.expectedThinkingSubstr)
 
 				bridgeServer.Recorder.VerifyAllInterceptionsEnded(t)
 			})
diff --git a/internal/integrationtest/responses_test.go b/internal/integrationtest/responses_test.go
index 0573e7dd..900ab783 100644
--- a/internal/integrationtest/responses_test.go
+++ b/internal/integrationtest/responses_test.go
@@ -1002,7 +1002,7 @@ func TestResponsesModelThoughts(t *testing.T) {
 
 		// Use the simple fixture which has no tool calls — any reasoning
 		// should not be persisted since it can't be associated with a tool call.
-		fix := fixtures.Parse(t, fixtures.OaiResponsesStreamingSimple)
+		fix := fixtures.Parse(t, fixtures.OaiResponsesStreamingCodex)
 		upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
 
 		bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)

From 7e4a15a418570fe4d3494c64cc518a549a60d5f2 Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Fri, 6 Mar 2026 16:07:06 +0200
Subject: [PATCH 05/14] chore: test multiple thoughts

Signed-off-by: Danny Kopping <danny@coder.com>
---
 .../multi_thinking_builtin_tool.txtar         | 136 +++++++++++++++++
 fixtures/fixtures.go                          |   9 ++
 .../multi_reasoning_builtin_tool.txtar        | 142 ++++++++++++++++++
 .../multi_reasoning_builtin_tool.txtar        |  94 ++++++++++++
 internal/integrationtest/bridge_test.go       |  34 ++++-
 internal/integrationtest/responses_test.go    |  42 ++++--
 6 files changed, 437 insertions(+), 20 deletions(-)
 create mode 100644 fixtures/anthropic/multi_thinking_builtin_tool.txtar
 create mode 100644 fixtures/openai/responses/blocking/multi_reasoning_builtin_tool.txtar
 create mode 100644 fixtures/openai/responses/streaming/multi_reasoning_builtin_tool.txtar

diff --git a/fixtures/anthropic/multi_thinking_builtin_tool.txtar b/fixtures/anthropic/multi_thinking_builtin_tool.txtar
new file mode 100644
index 00000000..633d11d9
--- /dev/null
+++ b/fixtures/anthropic/multi_thinking_builtin_tool.txtar
@@ -0,0 +1,136 @@
+Claude Code has builtin tools to (e.g.) explore the filesystem.
+This fixture has two thinking blocks before the tool_use block.
+
+-- request --
+{
+	"model": "claude-sonnet-4-20250514",
+	"max_tokens": 1024,
+	"messages": [
+		{
+			"role": "user",
+			"content": "read the foo file"
+		}
+	]
+}
+
+-- streaming --
+event: message_start
+data: {"type":"message_start","message":{"id":"msg_015SQewixvT9s4cABCVvUE6g","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":2,"cache_creation_input_tokens":22,"cache_read_input_tokens":13993,"output_tokens":5,"service_tier":"standard"}}             }
+
+event: content_block_start
+data: {"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"The user wants me to read a file called \"foo\". Let me find and read it."}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"Eu8BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ=="}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":0}
+
+event: content_block_start
+data: {"type":"content_block_start","index":1,"content_block":{"type":"thinking","thinking":""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"thinking_delta","thinking":"I should use the Read tool to access the file contents."}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"signature_delta","signature":"Aa1BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ=="}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":1}
+
+event: content_block_start
+data: {"type":"content_block_start","index":2,"content_block":{"type":"tool_use","id":"toolu_01RX68weRSquLx6HUTj65iBo","name":"Read","input":{}}}
+
+event: ping
+data: {"type": "ping"}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":""}            }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":"{\"file_path\": \"/tmp/blah/foo"}       }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":"\"}"}  }
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":2          }
+
+event: message_delta
+data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":61}              }
+
+event: message_stop
+data: {"type":"message_stop"              }
+
+
+-- non-streaming --
+{
+  "id": "msg_01JHKqEmh7wYuPXqUWUvusfL",
+  "container": {
+    "id": "",
+    "expires_at": "0001-01-01T00:00:00Z"
+  },
+  "content": [
+    {
+      "type": "thinking",
+      "thinking": "The user wants me to read a file called \"foo\". Let me find and read it.",
+      "signature": "Eu8BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ=="
+    },
+    {
+      "type": "thinking",
+      "thinking": "I should use the Read tool to access the file contents.",
+      "signature": "Aa1BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ=="
+    },
+    {
+      "citations": null,
+      "text": "",
+      "type": "tool_use",
+      "id": "toolu_01AusGgY5aKFhzWrFBv9JfHq",
+      "input": {
+        "file_path": "/tmp/blah/foo"
+      },
+      "name": "Read",
+      "content": {
+        "OfWebSearchResultBlockArray": null,
+        "OfString": "",
+        "OfMCPToolResultBlockContent": null,
+        "error_code": "",
+        "type": "",
+        "content": null,
+        "return_code": 0,
+        "stderr": "",
+        "stdout": ""
+      },
+      "tool_use_id": "",
+      "server_name": "",
+      "is_error": false,
+      "file_id": "",
+      "signature": "",
+      "thinking": "",
+      "data": ""
+    }
+  ],
+  "model": "claude-sonnet-4-20250514",
+  "role": "assistant",
+  "stop_reason": "tool_use",
+  "stop_sequence": "",
+  "type": "message",
+  "usage": {
+    "cache_creation": {
+      "ephemeral_1h_input_tokens": 0,
+      "ephemeral_5m_input_tokens": 0
+    },
+    "cache_creation_input_tokens": 0,
+    "cache_read_input_tokens": 23490,
+    "input_tokens": 5,
+    "output_tokens": 84,
+    "server_tool_use": {
+      "web_search_requests": 0
+    },
+    "service_tier": "standard"
+  }
+}
+
diff --git a/fixtures/fixtures.go b/fixtures/fixtures.go
index 3c150471..cacf657c 100644
--- a/fixtures/fixtures.go
+++ b/fixtures/fixtures.go
@@ -15,6 +15,9 @@ var (
 	//go:embed anthropic/single_builtin_tool.txtar
 	AntSingleBuiltinTool []byte
 
+	//go:embed anthropic/multi_thinking_builtin_tool.txtar
+	AntMultiThinkingBuiltinTool []byte
+
 	//go:embed anthropic/single_injected_tool.txtar
 	AntSingleInjectedTool []byte
 
@@ -61,6 +64,9 @@ var (
 	//go:embed openai/responses/blocking/single_builtin_tool.txtar
 	OaiResponsesBlockingSingleBuiltinTool []byte
 
+	//go:embed openai/responses/blocking/multi_reasoning_builtin_tool.txtar
+	OaiResponsesBlockingMultiReasoningBuiltinTool []byte
+
 	//go:embed openai/responses/blocking/cached_input_tokens.txtar
 	OaiResponsesBlockingCachedInputTokens []byte
 
@@ -96,6 +102,9 @@ var (
 	//go:embed openai/responses/streaming/builtin_tool.txtar
 	OaiResponsesStreamingBuiltinTool []byte
 
+	//go:embed openai/responses/streaming/multi_reasoning_builtin_tool.txtar
+	OaiResponsesStreamingMultiReasoningBuiltinTool []byte
+
 	//go:embed openai/responses/streaming/cached_input_tokens.txtar
 	OaiResponsesStreamingCachedInputTokens []byte
 
diff --git a/fixtures/openai/responses/blocking/multi_reasoning_builtin_tool.txtar b/fixtures/openai/responses/blocking/multi_reasoning_builtin_tool.txtar
new file mode 100644
index 00000000..022b433e
--- /dev/null
+++ b/fixtures/openai/responses/blocking/multi_reasoning_builtin_tool.txtar
@@ -0,0 +1,142 @@
+Two reasoning output items before a function_call.
+
+-- request --
+{
+  "input": [
+    {
+      "role": "user",
+      "content": "Is 3 + 5 a prime number? Use the add function to calculate the sum."
+    }
+  ],
+  "model": "gpt-4.1",
+  "stream": false,
+  "tools": [
+    {
+      "type": "function",
+      "name": "add",
+      "description": "Add two numbers together.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "a": {
+            "type": "number"
+          },
+          "b": {
+            "type": "number"
+          }
+        },
+        "required": [
+          "a",
+          "b"
+        ]
+      }
+    }
+  ]
+}
+
+-- non-streaming --
+{
+  "id": "resp_0da6045a8b68fa5200695fa23dcc2c81a19c849f627abf8a31",
+  "object": "response",
+  "created_at": 1767875133,
+  "status": "completed",
+  "background": false,
+  "completed_at": 1767875134,
+  "error": null,
+  "incomplete_details": null,
+  "instructions": null,
+  "max_output_tokens": null,
+  "max_tool_calls": null,
+  "model": "gpt-4.1-2025-04-14",
+  "output": [
+    {
+      "id": "rs_0da6045a8b68fa5200695fa23e100081a19bf68887d47ae93d",
+      "type": "reasoning",
+      "status": "completed",
+      "summary": [
+        {
+          "type": "summary_text",
+          "text": "The user wants to add 3 and 5. Let me call the add function."
+        }
+      ]
+    },
+    {
+      "id": "rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e",
+      "type": "reasoning",
+      "status": "completed",
+      "summary": [
+        {
+          "type": "summary_text",
+          "text": "After adding, I will check if the result is prime."
+        }
+      ]
+    },
+    {
+      "id": "fc_0da6045a8b68fa5200695fa23e198081a19bf68887d47ae93d",
+      "type": "function_call",
+      "status": "completed",
+      "arguments": "{\"a\":3,\"b\":5}",
+      "call_id": "call_CJSaa2u51JG996575oVljuNq",
+      "name": "add"
+    }
+  ],
+  "parallel_tool_calls": true,
+  "previous_response_id": null,
+  "prompt_cache_key": null,
+  "prompt_cache_retention": null,
+  "reasoning": {
+    "effort": null,
+    "summary": null
+  },
+  "safety_identifier": null,
+  "service_tier": "default",
+  "store": true,
+  "temperature": 1.0,
+  "text": {
+    "format": {
+      "type": "text"
+    },
+    "verbosity": "medium"
+  },
+  "tool_choice": "auto",
+  "tools": [
+    {
+      "type": "function",
+      "description": "Add two numbers together.",
+      "name": "add",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "a": {
+            "type": "number"
+          },
+          "b": {
+            "type": "number"
+          }
+        },
+        "required": [
+          "a",
+          "b"
+        ],
+        "additionalProperties": false
+      },
+      "strict": true
+    }
+  ],
+  "top_logprobs": 0,
+  "top_p": 1.0,
+  "truncation": "disabled",
+  "usage": {
+    "input_tokens": 58,
+    "input_tokens_details": {
+      "cached_tokens": 0
+    },
+    "output_tokens": 18,
+    "output_tokens_details": {
+      "reasoning_tokens": 0
+    },
+    "total_tokens": 76
+  },
+  "user": null,
+  "metadata": {}
+}
diff --git a/fixtures/openai/responses/streaming/multi_reasoning_builtin_tool.txtar b/fixtures/openai/responses/streaming/multi_reasoning_builtin_tool.txtar
new file mode 100644
index 00000000..b54ebc7a
--- /dev/null
+++ b/fixtures/openai/responses/streaming/multi_reasoning_builtin_tool.txtar
@@ -0,0 +1,94 @@
+Two reasoning output items before a function_call.
+
+-- request --
+{
+  "input": [
+    {
+      "role": "user",
+      "content": "Is 3 + 5 a prime number? Use the add function to calculate the sum."
+    }
+  ],
+  "model": "gpt-4.1",
+  "stream": true,
+  "tools": [
+    {
+      "type": "function",
+      "name": "add",
+      "description": "Add two numbers together.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "a": {
+            "type": "number"
+          },
+          "b": {
+            "type": "number"
+          }
+        },
+        "required": [
+          "a",
+          "b"
+        ]
+      }
+    }
+  ]
+}
+
+-- streaming --
+event: response.created
+data: {"type":"response.created","response":{"id":"resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458","object":"response","created_at":1767875312,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0}
+
+event: response.in_progress
+data: {"type":"response.in_progress","response":{"id":"resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458","object":"response","created_at":1767875312,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","type":"reasoning","status":"in_progress","summary":[]},"output_index":0,"sequence_number":2}
+
+event: response.reasoning_summary_part.added
+data: {"type":"response.reasoning_summary_part.added","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"part":{"type":"summary_text","text":""},"summary_index":0,"sequence_number":3}
+
+event: response.reasoning_summary_text.delta
+data: {"type":"response.reasoning_summary_text.delta","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"summary_index":0,"delta":"The user wants to add 3 and 5. Let me call the add function.","sequence_number":4}
+
+event: response.reasoning_summary_text.done
+data: {"type":"response.reasoning_summary_text.done","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"summary_index":0,"text":"The user wants to add 3 and 5. Let me call the add function.","sequence_number":5}
+
+event: response.reasoning_summary_part.done
+data: {"type":"response.reasoning_summary_part.done","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"part":{"type":"summary_text","text":"The user wants to add 3 and 5. Let me call the add function."},"summary_index":0,"sequence_number":6}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants to add 3 and 5. Let me call the add function."}]},"output_index":0,"sequence_number":7}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","type":"reasoning","status":"in_progress","summary":[]},"output_index":1,"sequence_number":8}
+
+event: response.reasoning_summary_part.added
+data: {"type":"response.reasoning_summary_part.added","item_id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","output_index":1,"part":{"type":"summary_text","text":""},"summary_index":0,"sequence_number":9}
+
+event: response.reasoning_summary_text.delta
+data: {"type":"response.reasoning_summary_text.delta","item_id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","output_index":1,"summary_index":0,"delta":"After adding, I will check if the result is prime.","sequence_number":10}
+
+event: response.reasoning_summary_text.done
+data: {"type":"response.reasoning_summary_text.done","item_id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","output_index":1,"summary_index":0,"text":"After adding, I will check if the result is prime.","sequence_number":11}
+
+event: response.reasoning_summary_part.done
+data: {"type":"response.reasoning_summary_part.done","item_id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","output_index":1,"part":{"type":"summary_text","text":"After adding, I will check if the result is prime."},"summary_index":0,"sequence_number":12}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"After adding, I will check if the result is prime."}]},"output_index":1,"sequence_number":13}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"in_progress","arguments":"","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"},"output_index":2,"sequence_number":14}
+
+event: response.function_call_arguments.delta
+data: {"type":"response.function_call_arguments.delta","delta":"{\"a\":3,\"b\":5}","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"gWZHP8i4lSgQYT","output_index":2,"sequence_number":15}
+
+event: response.function_call_arguments.done
+data: {"type":"response.function_call_arguments.done","arguments":"{\"a\":3,\"b\":5}","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","output_index":2,"sequence_number":16}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"},"output_index":2,"sequence_number":17}
+
+event: response.completed
+data: {"type":"response.completed","response":{"id":"resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458","object":"response","created_at":1767875312,"status":"completed","background":false,"completed_at":1767875312,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[{"id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants to add 3 and 5. Let me call the add function."}]},{"id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"After adding, I will check if the result is prime."}]},{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":58,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":76},"user":null,"metadata":{}},"sequence_number":18}
+
diff --git a/internal/integrationtest/bridge_test.go b/internal/integrationtest/bridge_test.go
index 9cdf7ac1..88678f95 100644
--- a/internal/integrationtest/bridge_test.go
+++ b/internal/integrationtest/bridge_test.go
@@ -131,27 +131,50 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 		t.Parallel()
 
 		cases := []struct {
+			name               string
 			streaming          bool
+			fixture            []byte
 			expectedToolCallID string
+			expectedThoughts   []string
 		}{
 			{
+				name:               "single thinking block/streaming",
 				streaming:          true,
+				fixture:            fixtures.AntSingleBuiltinTool,
 				expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo",
+				expectedThoughts:   []string{"The user wants me to read"},
 			},
 			{
+				name:               "single thinking block/blocking",
 				streaming:          false,
+				fixture:            fixtures.AntSingleBuiltinTool,
 				expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq",
+				expectedThoughts:   []string{"The user wants me to read"},
+			},
+			{
+				name:               "multiple thinking blocks/streaming",
+				streaming:          true,
+				fixture:            fixtures.AntMultiThinkingBuiltinTool,
+				expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo",
+				expectedThoughts:   []string{"The user wants me to read", "I should use the Read tool"},
+			},
+			{
+				name:               "multiple thinking blocks/blocking",
+				streaming:          false,
+				fixture:            fixtures.AntMultiThinkingBuiltinTool,
+				expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq",
+				expectedThoughts:   []string{"The user wants me to read", "I should use the Read tool"},
 			},
 		}
 
 		for _, tc := range cases {
-			t.Run(fmt.Sprintf("streaming=%v", tc.streaming), func(t *testing.T) {
+			t.Run(tc.name, func(t *testing.T) {
 				t.Parallel()
 
 				ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
 				t.Cleanup(cancel)
 
-				fix := fixtures.Parse(t, fixtures.AntSingleBuiltinTool)
+				fix := fixtures.Parse(t, tc.fixture)
 				upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
 
 				bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)
@@ -174,9 +197,10 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 				assert.Equal(t, "Read", toolUsages[0].Tool)
 				assert.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID)
 
-				// Model thoughts should be embedded in the tool usage record.
-				require.Len(t, toolUsages[0].ModelThoughts, 1)
-				assert.Contains(t, toolUsages[0].ModelThoughts[0].Content, "The user wants me to read")
+				require.Len(t, toolUsages[0].ModelThoughts, len(tc.expectedThoughts))
+				for i, expected := range tc.expectedThoughts {
+					assert.Contains(t, toolUsages[0].ModelThoughts[i].Content, expected)
+				}
 
 				bridgeServer.Recorder.VerifyAllInterceptionsEnded(t)
 			})
diff --git a/internal/integrationtest/responses_test.go b/internal/integrationtest/responses_test.go
index 900ab783..3a1519db 100644
--- a/internal/integrationtest/responses_test.go
+++ b/internal/integrationtest/responses_test.go
@@ -3,7 +3,6 @@ package integrationtest
 import (
 	"context"
 	"encoding/json"
-	"fmt"
 	"io"
 	"net"
 	"net/http"
@@ -944,32 +943,45 @@ func TestResponsesModelThoughts(t *testing.T) {
 		t.Parallel()
 
 		cases := []struct {
-			streaming          bool
+			name               string
+			fixture            []byte
 			expectedToolCallID string
+			expectedThoughts   []string
 		}{
 			{
-				streaming:          false,
+				name:               "single reasoning/blocking",
+				fixture:            fixtures.OaiResponsesBlockingSingleBuiltinTool,
 				expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq",
+				expectedThoughts:   []string{"The user wants to add 3 and 5"},
 			},
 			{
-				streaming:          true,
+				name:               "single reasoning/streaming",
+				fixture:            fixtures.OaiResponsesStreamingBuiltinTool,
 				expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t",
+				expectedThoughts:   []string{"The user wants to add 3 and 5"},
+			},
+			{
+				name:               "multiple reasoning items/blocking",
+				fixture:            fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool,
+				expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq",
+				expectedThoughts:   []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
+			},
+			{
+				name:               "multiple reasoning items/streaming",
+				fixture:            fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool,
+				expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t",
+				expectedThoughts:   []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
 			},
 		}
 
 		for _, tc := range cases {
-			t.Run(fmt.Sprintf("streaming=%v", tc.streaming), func(t *testing.T) {
+			t.Run(tc.name, func(t *testing.T) {
 				t.Parallel()
 
 				ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
 				t.Cleanup(cancel)
 
-				var fix fixtures.Fixture
-				if tc.streaming {
-					fix = fixtures.Parse(t, fixtures.OaiResponsesStreamingBuiltinTool)
-				} else {
-					fix = fixtures.Parse(t, fixtures.OaiResponsesBlockingSingleBuiltinTool)
-				}
+				fix := fixtures.Parse(t, tc.fixture)
 				upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
 
 				bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)
@@ -986,10 +998,10 @@ func TestResponsesModelThoughts(t *testing.T) {
 				require.Equal(t, "add", toolUsages[0].Tool)
 				require.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID)
 
-				// Model thoughts should be embedded in the tool usage record.
-				require.Len(t, toolUsages[0].ModelThoughts, 1)
-				require.Contains(t, toolUsages[0].ModelThoughts[0].Content, "The user wants to add 3 and 5")
-				require.Contains(t, toolUsages[0].ModelThoughts[0].Content, "Let me call the add function")
+				require.Len(t, toolUsages[0].ModelThoughts, len(tc.expectedThoughts))
+				for i, expected := range tc.expectedThoughts {
+					require.Contains(t, toolUsages[0].ModelThoughts[i].Content, expected)
+				}
 			})
 		}
 	})

From abfc67953193d6b645f2c3dc91a582bba8f217e3 Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Fri, 6 Mar 2026 16:13:51 +0200
Subject: [PATCH 06/14] chore: refactor tests

Signed-off-by: Danny Kopping <danny@coder.com>
---
 internal/integrationtest/bridge_test.go    | 159 +++++++++------------
 internal/integrationtest/responses_test.go | 131 +++++++----------
 2 files changed, 122 insertions(+), 168 deletions(-)

diff --git a/internal/integrationtest/bridge_test.go b/internal/integrationtest/bridge_test.go
index 88678f95..6150173a 100644
--- a/internal/integrationtest/bridge_test.go
+++ b/internal/integrationtest/bridge_test.go
@@ -127,72 +127,76 @@ func TestAnthropicMessages(t *testing.T) {
 func TestAnthropicMessagesModelThoughts(t *testing.T) {
 	t.Parallel()
 
-	t.Run("thinking captured with builtin tool", func(t *testing.T) {
-		t.Parallel()
-
-		cases := []struct {
-			name               string
-			streaming          bool
-			fixture            []byte
-			expectedToolCallID string
-			expectedThoughts   []string
-		}{
-			{
-				name:               "single thinking block/streaming",
-				streaming:          true,
-				fixture:            fixtures.AntSingleBuiltinTool,
-				expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo",
-				expectedThoughts:   []string{"The user wants me to read"},
-			},
-			{
-				name:               "single thinking block/blocking",
-				streaming:          false,
-				fixture:            fixtures.AntSingleBuiltinTool,
-				expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq",
-				expectedThoughts:   []string{"The user wants me to read"},
-			},
-			{
-				name:               "multiple thinking blocks/streaming",
-				streaming:          true,
-				fixture:            fixtures.AntMultiThinkingBuiltinTool,
-				expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo",
-				expectedThoughts:   []string{"The user wants me to read", "I should use the Read tool"},
-			},
-			{
-				name:               "multiple thinking blocks/blocking",
-				streaming:          false,
-				fixture:            fixtures.AntMultiThinkingBuiltinTool,
-				expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq",
-				expectedThoughts:   []string{"The user wants me to read", "I should use the Read tool"},
-			},
-		}
+	cases := []struct {
+		name               string
+		streaming          bool
+		fixture            []byte
+		expectedToolCallID string
+		expectedThoughts   []string // nil means no tool usages expected at all
+	}{
+		{
+			name:               "single thinking block/streaming",
+			streaming:          true,
+			fixture:            fixtures.AntSingleBuiltinTool,
+			expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo",
+			expectedThoughts:   []string{"The user wants me to read"},
+		},
+		{
+			name:               "single thinking block/blocking",
+			streaming:          false,
+			fixture:            fixtures.AntSingleBuiltinTool,
+			expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq",
+			expectedThoughts:   []string{"The user wants me to read"},
+		},
+		{
+			name:               "multiple thinking blocks/streaming",
+			streaming:          true,
+			fixture:            fixtures.AntMultiThinkingBuiltinTool,
+			expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo",
+			expectedThoughts:   []string{"The user wants me to read", "I should use the Read tool"},
+		},
+		{
+			name:               "multiple thinking blocks/blocking",
+			streaming:          false,
+			fixture:            fixtures.AntMultiThinkingBuiltinTool,
+			expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq",
+			expectedThoughts:   []string{"The user wants me to read", "I should use the Read tool"},
+		},
+		{
+			name:      "no thoughts without tool calls",
+			streaming: true,
+			fixture:   fixtures.AntSimple, // This fixture contains thoughts, but they're not associated with tool calls.
+		},
+	}
 
-		for _, tc := range cases {
-			t.Run(tc.name, func(t *testing.T) {
-				t.Parallel()
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
 
-				ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
-				t.Cleanup(cancel)
+			ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
+			t.Cleanup(cancel)
 
-				fix := fixtures.Parse(t, tc.fixture)
-				upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
+			fix := fixtures.Parse(t, tc.fixture)
+			upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
 
-				bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)
+			bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)
 
-				reqBody, err := sjson.SetBytes(fix.Request(), "stream", tc.streaming)
-				require.NoError(t, err)
-				resp := bridgeServer.makeRequest(t, http.MethodPost, pathAnthropicMessages, reqBody)
-				require.Equal(t, http.StatusOK, resp.StatusCode)
+			reqBody, err := sjson.SetBytes(fix.Request(), "stream", tc.streaming)
+			require.NoError(t, err)
+			resp := bridgeServer.makeRequest(t, http.MethodPost, pathAnthropicMessages, reqBody)
+			require.Equal(t, http.StatusOK, resp.StatusCode)
 
-				if tc.streaming {
-					sp := aibridge.NewSSEParser()
-					require.NoError(t, sp.Parse(resp.Body))
-					assert.Contains(t, sp.AllEvents(), "message_start")
-					assert.Contains(t, sp.AllEvents(), "message_stop")
-				}
+			if tc.streaming {
+				sp := aibridge.NewSSEParser()
+				require.NoError(t, sp.Parse(resp.Body))
+				assert.Contains(t, sp.AllEvents(), "message_start")
+				assert.Contains(t, sp.AllEvents(), "message_stop")
+			}
 
-				// Verify tool usage was recorded with associated model thoughts.
-				toolUsages := bridgeServer.Recorder.RecordedToolUsages()
+			toolUsages := bridgeServer.Recorder.RecordedToolUsages()
+			if tc.expectedThoughts == nil {
+				assert.Empty(t, toolUsages)
+			} else {
 				require.Len(t, toolUsages, 1)
 				assert.Equal(t, "Read", toolUsages[0].Tool)
 				assert.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID)
@@ -201,40 +205,11 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 				for i, expected := range tc.expectedThoughts {
 					assert.Contains(t, toolUsages[0].ModelThoughts[i].Content, expected)
 				}
+			}
 
-				bridgeServer.Recorder.VerifyAllInterceptionsEnded(t)
-			})
-		}
-	})
-
-	t.Run("no thoughts without tool calls", func(t *testing.T) {
-		t.Parallel()
-
-		ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
-		t.Cleanup(cancel)
-
-		// Use the simple fixture which has no tool calls — any thinking blocks
-		// should not be persisted since they can't be associated with a tool call.
-		fix := fixtures.Parse(t, fixtures.AntSimple)
-		upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
-
-		bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)
-
-		reqBody, err := sjson.SetBytes(fix.Request(), "stream", true)
-		require.NoError(t, err)
-		resp := bridgeServer.makeRequest(t, http.MethodPost, pathAnthropicMessages, reqBody)
-		require.Equal(t, http.StatusOK, resp.StatusCode)
-
-		sp := aibridge.NewSSEParser()
-		require.NoError(t, sp.Parse(resp.Body))
-
-		// No tool usages (and therefore no thoughts) should be recorded
-		// when there are no tool calls.
-		toolUsages := bridgeServer.Recorder.RecordedToolUsages()
-		assert.Empty(t, toolUsages)
-
-		bridgeServer.Recorder.VerifyAllInterceptionsEnded(t)
-	})
+			bridgeServer.Recorder.VerifyAllInterceptionsEnded(t)
+		})
+	}
 }
 
 func TestAWSBedrockIntegration(t *testing.T) {
diff --git a/internal/integrationtest/responses_test.go b/internal/integrationtest/responses_test.go
index 3a1519db..2cee005a 100644
--- a/internal/integrationtest/responses_test.go
+++ b/internal/integrationtest/responses_test.go
@@ -3,6 +3,7 @@ package integrationtest
 import (
 	"context"
 	"encoding/json"
+	"fmt"
 	"io"
 	"net"
 	"net/http"
@@ -939,61 +940,64 @@ func TestResponsesInjectedTool(t *testing.T) {
 func TestResponsesModelThoughts(t *testing.T) {
 	t.Parallel()
 
-	t.Run("reasoning captured with builtin tool", func(t *testing.T) {
-		t.Parallel()
-
-		cases := []struct {
-			name               string
-			fixture            []byte
-			expectedToolCallID string
-			expectedThoughts   []string
-		}{
-			{
-				name:               "single reasoning/blocking",
-				fixture:            fixtures.OaiResponsesBlockingSingleBuiltinTool,
-				expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq",
-				expectedThoughts:   []string{"The user wants to add 3 and 5"},
-			},
-			{
-				name:               "single reasoning/streaming",
-				fixture:            fixtures.OaiResponsesStreamingBuiltinTool,
-				expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t",
-				expectedThoughts:   []string{"The user wants to add 3 and 5"},
-			},
-			{
-				name:               "multiple reasoning items/blocking",
-				fixture:            fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool,
-				expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq",
-				expectedThoughts:   []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
-			},
-			{
-				name:               "multiple reasoning items/streaming",
-				fixture:            fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool,
-				expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t",
-				expectedThoughts:   []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
-			},
-		}
+	cases := []struct {
+		name               string
+		fixture            []byte
+		expectedToolCallID string
+		expectedThoughts   []string // nil means no tool usages expected at all
+	}{
+		{
+			name:               "single reasoning/blocking",
+			fixture:            fixtures.OaiResponsesBlockingSingleBuiltinTool,
+			expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq",
+			expectedThoughts:   []string{"The user wants to add 3 and 5"},
+		},
+		{
+			name:               "single reasoning/streaming",
+			fixture:            fixtures.OaiResponsesStreamingBuiltinTool,
+			expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t",
+			expectedThoughts:   []string{"The user wants to add 3 and 5"},
+		},
+		{
+			name:               "multiple reasoning items/blocking",
+			fixture:            fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool,
+			expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq",
+			expectedThoughts:   []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
+		},
+		{
+			name:               "multiple reasoning items/streaming",
+			fixture:            fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool,
+			expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t",
+			expectedThoughts:   []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
+		},
+		{
+			name:    "no thoughts without tool calls",
+			fixture: fixtures.OaiResponsesStreamingCodex, // This fixture contains reasoning, but it's not associated with tool calls.
+		},
+	}
 
-		for _, tc := range cases {
-			t.Run(tc.name, func(t *testing.T) {
-				t.Parallel()
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
 
-				ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
-				t.Cleanup(cancel)
+			ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
+			t.Cleanup(cancel)
 
-				fix := fixtures.Parse(t, tc.fixture)
-				upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
+			fix := fixtures.Parse(t, tc.fixture)
+			upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
 
-				bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)
+			bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)
 
-				resp := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, fix.Request())
-				require.Equal(t, http.StatusOK, resp.StatusCode)
+			resp := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, fix.Request())
+			require.Equal(t, http.StatusOK, resp.StatusCode)
 
-				_, err := io.ReadAll(resp.Body)
-				require.NoError(t, err)
+			_, err := io.ReadAll(resp.Body)
+			require.NoError(t, err)
 
-				// Verify tool usage was recorded with associated model thoughts.
-				toolUsages := bridgeServer.Recorder.RecordedToolUsages()
+			toolUsages := bridgeServer.Recorder.RecordedToolUsages()
+			if tc.expectedThoughts == nil {
+				require.Empty(t, toolUsages)
+			} else {
 				require.Len(t, toolUsages, 1)
 				require.Equal(t, "add", toolUsages[0].Tool)
 				require.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID)
@@ -1002,34 +1006,9 @@ func TestResponsesModelThoughts(t *testing.T) {
 				for i, expected := range tc.expectedThoughts {
 					require.Contains(t, toolUsages[0].ModelThoughts[i].Content, expected)
 				}
-			})
-		}
-	})
-
-	t.Run("no thoughts without tool calls", func(t *testing.T) {
-		t.Parallel()
-
-		ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
-		t.Cleanup(cancel)
-
-		// Use the simple fixture which has no tool calls — any reasoning
-		// should not be persisted since it can't be associated with a tool call.
-		fix := fixtures.Parse(t, fixtures.OaiResponsesStreamingCodex)
-		upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
-
-		bridgeServer := newBridgeTestServer(t, ctx, upstream.URL)
-
-		resp := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, fix.Request())
-		require.Equal(t, http.StatusOK, resp.StatusCode)
-
-		_, err := io.ReadAll(resp.Body)
-		require.NoError(t, err)
-
-		// No tool usages (and therefore no thoughts) should be recorded
-		// when there are no tool calls.
-		toolUsages := bridgeServer.Recorder.RecordedToolUsages()
-		require.Empty(t, toolUsages)
-	})
+			}
+		})
+	}
 }
 
 func requireResponsesError(t *testing.T, code int, message string, body []byte) {

From 464670a5233d8c9c817683f785392cdf62ec86a8 Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Thu, 12 Mar 2026 12:07:15 +0200
Subject: [PATCH 07/14] feat: capture commentary as model thoughts

Signed-off-by: Danny Kopping <danny@coder.com>
---
 fixtures/fixtures.go                          |  12 ++
 .../blocking/commentary_builtin_tool.txtar    | 139 +++++++++++++++++
 .../summary_and_commentary_builtin_tool.txtar | 146 ++++++++++++++++++
 .../streaming/commentary_builtin_tool.txtar   |  80 ++++++++++
 .../summary_and_commentary_builtin_tool.txtar |  94 +++++++++++
 intercept/responses/base.go                   |  47 ++++--
 internal/integrationtest/responses_test.go    |  24 +++
 7 files changed, 530 insertions(+), 12 deletions(-)
 create mode 100644 fixtures/openai/responses/blocking/commentary_builtin_tool.txtar
 create mode 100644 fixtures/openai/responses/blocking/summary_and_commentary_builtin_tool.txtar
 create mode 100644 fixtures/openai/responses/streaming/commentary_builtin_tool.txtar
 create mode 100644 fixtures/openai/responses/streaming/summary_and_commentary_builtin_tool.txtar

diff --git a/fixtures/fixtures.go b/fixtures/fixtures.go
index cacf657c..06447a67 100644
--- a/fixtures/fixtures.go
+++ b/fixtures/fixtures.go
@@ -67,6 +67,12 @@ var (
 	//go:embed openai/responses/blocking/multi_reasoning_builtin_tool.txtar
 	OaiResponsesBlockingMultiReasoningBuiltinTool []byte
 
+	//go:embed openai/responses/blocking/commentary_builtin_tool.txtar
+	OaiResponsesBlockingCommentaryBuiltinTool []byte
+
+	//go:embed openai/responses/blocking/summary_and_commentary_builtin_tool.txtar
+	OaiResponsesBlockingSummaryAndCommentaryBuiltinTool []byte
+
 	//go:embed openai/responses/blocking/cached_input_tokens.txtar
 	OaiResponsesBlockingCachedInputTokens []byte
 
@@ -105,6 +111,12 @@ var (
 	//go:embed openai/responses/streaming/multi_reasoning_builtin_tool.txtar
 	OaiResponsesStreamingMultiReasoningBuiltinTool []byte
 
+	//go:embed openai/responses/streaming/commentary_builtin_tool.txtar
+	OaiResponsesStreamingCommentaryBuiltinTool []byte
+
+	//go:embed openai/responses/streaming/summary_and_commentary_builtin_tool.txtar
+	OaiResponsesStreamingSummaryAndCommentaryBuiltinTool []byte
+
 	//go:embed openai/responses/streaming/cached_input_tokens.txtar
 	OaiResponsesStreamingCachedInputTokens []byte
 
diff --git a/fixtures/openai/responses/blocking/commentary_builtin_tool.txtar b/fixtures/openai/responses/blocking/commentary_builtin_tool.txtar
new file mode 100644
index 00000000..d0e83dd7
--- /dev/null
+++ b/fixtures/openai/responses/blocking/commentary_builtin_tool.txtar
@@ -0,0 +1,139 @@
+-- request --
+{
+  "input": [
+    {
+      "role": "user",
+      "content": "Is 3 + 5 a prime number? Use the add function to calculate the sum."
+    }
+  ],
+  "model": "gpt-5.4",
+  "stream": false,
+  "tools": [
+    {
+      "type": "function",
+      "name": "add",
+      "description": "Add two numbers together.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "a": {
+            "type": "number"
+          },
+          "b": {
+            "type": "number"
+          }
+        },
+        "required": [
+          "a",
+          "b"
+        ]
+      }
+    }
+  ]
+}
+
+-- non-streaming --
+{
+  "id": "resp_0aba2ac43dc240b30169b15720243c819ebb64977365d42cf5",
+  "object": "response",
+  "created_at": 1773229856,
+  "status": "completed",
+  "background": false,
+  "completed_at": 1773229861,
+  "error": null,
+  "incomplete_details": null,
+  "instructions": null,
+  "max_output_tokens": null,
+  "max_tool_calls": null,
+  "model": "gpt-5.4-2026-03-05",
+  "output": [
+    {
+      "id": "rs_0aba2ac43dc240b30169b157208c88819e8238a91b5f7a919b",
+      "type": "reasoning",
+      "status": "completed",
+      "encrypted_content": "gAAAAA==",
+      "summary": []
+    },
+    {
+      "id": "msg_0aba2ac43dc240b30169b1572286d0819eb24b1d0f84c8fb3f",
+      "type": "message",
+      "status": "completed",
+      "content": [
+        {
+          "type": "output_text",
+          "annotations": [],
+          "text": "Checking whether 3 + 5 is prime by calling the add function first."
+        }
+      ],
+      "phase": "commentary",
+      "role": "assistant"
+    },
+    {
+      "id": "fc_0aba2ac43dc240b30169b157255604819e8a108124efc1635c",
+      "type": "function_call",
+      "status": "completed",
+      "arguments": "{\"a\":3,\"b\":5}",
+      "call_id": "call_A8TkZmIcKtw2Zw952Wc5QVe7",
+      "name": "add"
+    }
+  ],
+  "parallel_tool_calls": true,
+  "previous_response_id": null,
+  "prompt_cache_key": null,
+  "prompt_cache_retention": null,
+  "reasoning": {
+    "effort": "xhigh",
+    "summary": null
+  },
+  "safety_identifier": null,
+  "service_tier": "default",
+  "store": false,
+  "temperature": 1.0,
+  "text": {
+    "format": {
+      "type": "text"
+    },
+    "verbosity": "low"
+  },
+  "tool_choice": "auto",
+  "tools": [
+    {
+      "type": "function",
+      "description": "Add two numbers together.",
+      "name": "add",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "a": {
+            "type": "number"
+          },
+          "b": {
+            "type": "number"
+          }
+        },
+        "required": [
+          "a",
+          "b"
+        ],
+        "additionalProperties": false
+      },
+      "strict": true
+    }
+  ],
+  "top_logprobs": 0,
+  "top_p": 0.98,
+  "truncation": "disabled",
+  "usage": {
+    "input_tokens": 58,
+    "input_tokens_details": {
+      "cached_tokens": 0
+    },
+    "output_tokens": 30,
+    "output_tokens_details": {
+      "reasoning_tokens": 10
+    },
+    "total_tokens": 88
+  },
+  "user": null,
+  "metadata": {}
+}
diff --git a/fixtures/openai/responses/blocking/summary_and_commentary_builtin_tool.txtar b/fixtures/openai/responses/blocking/summary_and_commentary_builtin_tool.txtar
new file mode 100644
index 00000000..15082c36
--- /dev/null
+++ b/fixtures/openai/responses/blocking/summary_and_commentary_builtin_tool.txtar
@@ -0,0 +1,146 @@
+Both a reasoning summary and a commentary message before a function_call.
+
+-- request --
+{
+  "input": [
+    {
+      "role": "user",
+      "content": "Is 3 + 5 a prime number? Use the add function to calculate the sum."
+    }
+  ],
+  "model": "gpt-5.4",
+  "stream": false,
+  "tools": [
+    {
+      "type": "function",
+      "name": "add",
+      "description": "Add two numbers together.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "a": {
+            "type": "number"
+          },
+          "b": {
+            "type": "number"
+          }
+        },
+        "required": [
+          "a",
+          "b"
+        ]
+      }
+    }
+  ]
+}
+
+-- non-streaming --
+{
+  "id": "resp_1bba3bc54ed351c41270c26831354d920fcc75088476e53de6",
+  "object": "response",
+  "created_at": 1773229900,
+  "status": "completed",
+  "background": false,
+  "completed_at": 1773229905,
+  "error": null,
+  "incomplete_details": null,
+  "instructions": null,
+  "max_output_tokens": null,
+  "max_tool_calls": null,
+  "model": "gpt-5.4-2026-03-05",
+  "output": [
+    {
+      "id": "rs_1bba3bc54ed351c41270c26831908d920fcc75088476e53de6",
+      "type": "reasoning",
+      "status": "completed",
+      "encrypted_content": "gAAAAA==",
+      "summary": [
+        {
+          "type": "summary_text",
+          "text": "I need to add 3 and 5 to check primality."
+        }
+      ]
+    },
+    {
+      "id": "msg_1bba3bc54ed351c41270c26831a09d920fdd86199587f64ef7",
+      "type": "message",
+      "status": "completed",
+      "content": [
+        {
+          "type": "output_text",
+          "annotations": [],
+          "text": "Let me calculate the sum first using the add function."
+        }
+      ],
+      "phase": "commentary",
+      "role": "assistant"
+    },
+    {
+      "id": "fc_1bba3bc54ed351c41270c26831b0ad920fee97200698074f08",
+      "type": "function_call",
+      "status": "completed",
+      "arguments": "{\"a\":3,\"b\":5}",
+      "call_id": "call_B9UjYX01Lvvv1XwjDsdmRW3f",
+      "name": "add"
+    }
+  ],
+  "parallel_tool_calls": true,
+  "previous_response_id": null,
+  "prompt_cache_key": null,
+  "prompt_cache_retention": null,
+  "reasoning": {
+    "effort": "xhigh",
+    "summary": null
+  },
+  "safety_identifier": null,
+  "service_tier": "default",
+  "store": false,
+  "temperature": 1.0,
+  "text": {
+    "format": {
+      "type": "text"
+    },
+    "verbosity": "low"
+  },
+  "tool_choice": "auto",
+  "tools": [
+    {
+      "type": "function",
+      "description": "Add two numbers together.",
+      "name": "add",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "a": {
+            "type": "number"
+          },
+          "b": {
+            "type": "number"
+          }
+        },
+        "required": [
+          "a",
+          "b"
+        ],
+        "additionalProperties": false
+      },
+      "strict": true
+    }
+  ],
+  "top_logprobs": 0,
+  "top_p": 0.98,
+  "truncation": "disabled",
+  "usage": {
+    "input_tokens": 58,
+    "input_tokens_details": {
+      "cached_tokens": 0
+    },
+    "output_tokens": 35,
+    "output_tokens_details": {
+      "reasoning_tokens": 10
+    },
+    "total_tokens": 93
+  },
+  "user": null,
+  "metadata": {}
+}
diff --git a/fixtures/openai/responses/streaming/commentary_builtin_tool.txtar b/fixtures/openai/responses/streaming/commentary_builtin_tool.txtar
new file mode 100644
index 00000000..2f090f62
--- /dev/null
+++ b/fixtures/openai/responses/streaming/commentary_builtin_tool.txtar
@@ -0,0 +1,80 @@
+-- request --
+{
+  "input": [
+    {
+      "role": "user",
+      "content": "Is 3 + 5 a prime number? Use the add function to calculate the sum."
+    }
+  ],
+  "model": "gpt-5.4",
+  "stream": true,
+  "tools": [
+    {
+      "type": "function",
+      "name": "add",
+      "description": "Add two numbers together.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "a": {
+            "type": "number"
+          },
+          "b": {
+            "type": "number"
+          }
+        },
+        "required": [
+          "a",
+          "b"
+        ]
+      }
+    }
+  ]
+}
+
+-- streaming --
+event: response.created
+data: {"type":"response.created","response":{"id":"resp_0aba2ac43dc240b30169b15720243c819ebb64977365d42cf5","object":"response","created_at":1773229856,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.4-2026-03-05","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"xhigh","summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"low"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":0.98,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0}
+
+event: response.in_progress
+data: {"type":"response.in_progress","response":{"id":"resp_0aba2ac43dc240b30169b15720243c819ebb64977365d42cf5","object":"response","created_at":1773229856,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.4-2026-03-05","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"xhigh","summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"low"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":0.98,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"rs_0aba2ac43dc240b30169b157208c88819e8238a91b5f7a919b","type":"reasoning","status":"in_progress","summary":[]},"output_index":0,"sequence_number":2}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"rs_0aba2ac43dc240b30169b157208c88819e8238a91b5f7a919b","type":"reasoning","status":"completed","encrypted_content":"gAAAAA==","summary":[]},"output_index":0,"sequence_number":3}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"msg_0aba2ac43dc240b30169b1572286d0819eb24b1d0f84c8fb3f","type":"message","status":"in_progress","content":[],"phase":"commentary","role":"assistant"},"output_index":1,"sequence_number":4}
+
+event: response.content_part.added
+data: {"type":"response.content_part.added","item_id":"msg_0aba2ac43dc240b30169b1572286d0819eb24b1d0f84c8fb3f","output_index":1,"content_index":0,"part":{"type":"output_text","text":"","annotations":[]},"sequence_number":5}
+
+event: response.output_text.delta
+data: {"type":"response.output_text.delta","item_id":"msg_0aba2ac43dc240b30169b1572286d0819eb24b1d0f84c8fb3f","output_index":1,"content_index":0,"delta":"Checking whether 3 + 5 is prime by calling the add function first.","sequence_number":6}
+
+event: response.output_text.done
+data: {"type":"response.output_text.done","item_id":"msg_0aba2ac43dc240b30169b1572286d0819eb24b1d0f84c8fb3f","output_index":1,"content_index":0,"text":"Checking whether 3 + 5 is prime by calling the add function first.","sequence_number":7}
+
+event: response.content_part.done
+data: {"type":"response.content_part.done","item_id":"msg_0aba2ac43dc240b30169b1572286d0819eb24b1d0f84c8fb3f","output_index":1,"content_index":0,"part":{"type":"output_text","text":"Checking whether 3 + 5 is prime by calling the add function first.","annotations":[]},"sequence_number":8}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"msg_0aba2ac43dc240b30169b1572286d0819eb24b1d0f84c8fb3f","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"Checking whether 3 + 5 is prime by calling the add function first."}],"phase":"commentary","role":"assistant"},"output_index":1,"sequence_number":9}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"fc_0aba2ac43dc240b30169b157255604819e8a108124efc1635c","type":"function_call","status":"in_progress","arguments":"","call_id":"call_A8TkZmIcKtw2Zw952Wc5QVe7","name":"add"},"output_index":2,"sequence_number":10}
+
+event: response.function_call_arguments.delta
+data: {"type":"response.function_call_arguments.delta","delta":"{\"a\":3,\"b\":5}","item_id":"fc_0aba2ac43dc240b30169b157255604819e8a108124efc1635c","output_index":2,"sequence_number":11}
+
+event: response.function_call_arguments.done
+data: {"type":"response.function_call_arguments.done","arguments":"{\"a\":3,\"b\":5}","item_id":"fc_0aba2ac43dc240b30169b157255604819e8a108124efc1635c","output_index":2,"sequence_number":12}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"fc_0aba2ac43dc240b30169b157255604819e8a108124efc1635c","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_A8TkZmIcKtw2Zw952Wc5QVe7","name":"add"},"output_index":2,"sequence_number":13}
+
+event: response.completed
+data: {"type":"response.completed","response":{"id":"resp_0aba2ac43dc240b30169b15720243c819ebb64977365d42cf5","object":"response","created_at":1773229856,"status":"completed","background":false,"completed_at":1773229861,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.4-2026-03-05","output":[{"id":"rs_0aba2ac43dc240b30169b157208c88819e8238a91b5f7a919b","type":"reasoning","status":"completed","encrypted_content":"gAAAAA==","summary":[]},{"id":"msg_0aba2ac43dc240b30169b1572286d0819eb24b1d0f84c8fb3f","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"Checking whether 3 + 5 is prime by calling the add function first."}],"phase":"commentary","role":"assistant"},{"id":"fc_0aba2ac43dc240b30169b157255604819e8a108124efc1635c","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_A8TkZmIcKtw2Zw952Wc5QVe7","name":"add"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"xhigh","summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"low"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":0.98,"truncation":"disabled","usage":{"input_tokens":58,"input_tokens_details":{"cached_tokens":0},"output_tokens":30,"output_tokens_details":{"reasoning_tokens":10},"total_tokens":88},"user":null,"metadata":{}},"sequence_number":14}
+
diff --git a/fixtures/openai/responses/streaming/summary_and_commentary_builtin_tool.txtar b/fixtures/openai/responses/streaming/summary_and_commentary_builtin_tool.txtar
new file mode 100644
index 00000000..172b0065
--- /dev/null
+++ b/fixtures/openai/responses/streaming/summary_and_commentary_builtin_tool.txtar
@@ -0,0 +1,94 @@
+Both a reasoning summary and a commentary message before a function_call.
+
+-- request --
+{
+  "input": [
+    {
+      "role": "user",
+      "content": "Is 3 + 5 a prime number? Use the add function to calculate the sum."
+    }
+  ],
+  "model": "gpt-5.4",
+  "stream": true,
+  "tools": [
+    {
+      "type": "function",
+      "name": "add",
+      "description": "Add two numbers together.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "a": {
+            "type": "number"
+          },
+          "b": {
+            "type": "number"
+          }
+        },
+        "required": [
+          "a",
+          "b"
+        ]
+      }
+    }
+  ]
+}
+
+-- streaming --
+event: response.created
+data: {"type":"response.created","response":{"id":"resp_1bba3bc54ed351c41270c26831354d920fcc75088476e53de6","object":"response","created_at":1773229900,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.4-2026-03-05","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"xhigh","summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"low"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":0.98,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0}
+
+event: response.in_progress
+data: {"type":"response.in_progress","response":{"id":"resp_1bba3bc54ed351c41270c26831354d920fcc75088476e53de6","object":"response","created_at":1773229900,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.4-2026-03-05","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"xhigh","summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"low"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":0.98,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"rs_1bba3bc54ed351c41270c26831908d920fcc75088476e53de6","type":"reasoning","status":"in_progress","summary":[]},"output_index":0,"sequence_number":2}
+
+event: response.reasoning_summary_part.added
+data: {"type":"response.reasoning_summary_part.added","item_id":"rs_1bba3bc54ed351c41270c26831908d920fcc75088476e53de6","output_index":0,"part":{"type":"summary_text","text":""},"summary_index":0,"sequence_number":3}
+
+event: response.reasoning_summary_text.delta
+data: {"type":"response.reasoning_summary_text.delta","item_id":"rs_1bba3bc54ed351c41270c26831908d920fcc75088476e53de6","output_index":0,"summary_index":0,"delta":"I need to add 3 and 5 to check primality.","sequence_number":4}
+
+event: response.reasoning_summary_text.done
+data: {"type":"response.reasoning_summary_text.done","item_id":"rs_1bba3bc54ed351c41270c26831908d920fcc75088476e53de6","output_index":0,"summary_index":0,"text":"I need to add 3 and 5 to check primality.","sequence_number":5}
+
+event: response.reasoning_summary_part.done
+data: {"type":"response.reasoning_summary_part.done","item_id":"rs_1bba3bc54ed351c41270c26831908d920fcc75088476e53de6","output_index":0,"part":{"type":"summary_text","text":"I need to add 3 and 5 to check primality."},"summary_index":0,"sequence_number":6}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"rs_1bba3bc54ed351c41270c26831908d920fcc75088476e53de6","type":"reasoning","status":"completed","encrypted_content":"gAAAAA==","summary":[{"type":"summary_text","text":"I need to add 3 and 5 to check primality."}]},"output_index":0,"sequence_number":7}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"msg_1bba3bc54ed351c41270c26831a09d920fdd86199587f64ef7","type":"message","status":"in_progress","content":[],"phase":"commentary","role":"assistant"},"output_index":1,"sequence_number":8}
+
+event: response.content_part.added
+data: {"type":"response.content_part.added","item_id":"msg_1bba3bc54ed351c41270c26831a09d920fdd86199587f64ef7","output_index":1,"content_index":0,"part":{"type":"output_text","text":"","annotations":[]},"sequence_number":9}
+
+event: response.output_text.delta
+data: {"type":"response.output_text.delta","item_id":"msg_1bba3bc54ed351c41270c26831a09d920fdd86199587f64ef7","output_index":1,"content_index":0,"delta":"Let me calculate the sum first using the add function.","sequence_number":10}
+
+event: response.output_text.done
+data: {"type":"response.output_text.done","item_id":"msg_1bba3bc54ed351c41270c26831a09d920fdd86199587f64ef7","output_index":1,"content_index":0,"text":"Let me calculate the sum first using the add function.","sequence_number":11}
+
+event: response.content_part.done
+data: {"type":"response.content_part.done","item_id":"msg_1bba3bc54ed351c41270c26831a09d920fdd86199587f64ef7","output_index":1,"content_index":0,"part":{"type":"output_text","text":"Let me calculate the sum first using the add function.","annotations":[]},"sequence_number":12}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"msg_1bba3bc54ed351c41270c26831a09d920fdd86199587f64ef7","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"Let me calculate the sum first using the add function."}],"phase":"commentary","role":"assistant"},"output_index":1,"sequence_number":13}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"fc_1bba3bc54ed351c41270c26831b0ad920fee97200698074f08","type":"function_call","status":"in_progress","arguments":"","call_id":"call_B9UjYX01Lvvv1XwjDsdmRW3f","name":"add"},"output_index":2,"sequence_number":14}
+
+event: response.function_call_arguments.delta
+data: {"type":"response.function_call_arguments.delta","delta":"{\"a\":3,\"b\":5}","item_id":"fc_1bba3bc54ed351c41270c26831b0ad920fee97200698074f08","output_index":2,"sequence_number":15}
+
+event: response.function_call_arguments.done
+data: {"type":"response.function_call_arguments.done","arguments":"{\"a\":3,\"b\":5}","item_id":"fc_1bba3bc54ed351c41270c26831b0ad920fee97200698074f08","output_index":2,"sequence_number":16}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"fc_1bba3bc54ed351c41270c26831b0ad920fee97200698074f08","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_B9UjYX01Lvvv1XwjDsdmRW3f","name":"add"},"output_index":2,"sequence_number":17}
+
+event: response.completed
+data: {"type":"response.completed","response":{"id":"resp_1bba3bc54ed351c41270c26831354d920fcc75088476e53de6","object":"response","created_at":1773229900,"status":"completed","background":false,"completed_at":1773229905,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.4-2026-03-05","output":[{"id":"rs_1bba3bc54ed351c41270c26831908d920fcc75088476e53de6","type":"reasoning","status":"completed","encrypted_content":"gAAAAA==","summary":[{"type":"summary_text","text":"I need to add 3 and 5 to check primality."}]},{"id":"msg_1bba3bc54ed351c41270c26831a09d920fdd86199587f64ef7","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"Let me calculate the sum first using the add function."}],"phase":"commentary","role":"assistant"},{"id":"fc_1bba3bc54ed351c41270c26831b0ad920fee97200698074f08","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_B9UjYX01Lvvv1XwjDsdmRW3f","name":"add"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"xhigh","summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"low"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":0.98,"truncation":"disabled","usage":{"input_tokens":58,"input_tokens_details":{"cached_tokens":0},"output_tokens":35,"output_tokens_details":{"reasoning_tokens":10},"total_tokens":93},"user":null,"metadata":{}},"sequence_number":18}
+
diff --git a/intercept/responses/base.go b/intercept/responses/base.go
index 0a889bb5..c3550f30 100644
--- a/intercept/responses/base.go
+++ b/intercept/responses/base.go
@@ -333,8 +333,9 @@ func (i *responsesInterceptionBase) recordTokenUsage(ctx context.Context, respon
 	}
 }
 
-// extractModelThoughts extracts reasoning summary items from response output
-// and converts them to ModelThoughtRecords for association with tool usage.
+// extractModelThoughts extracts model thoughts from response output items.
+// It captures both reasoning summary items and commentary messages (message
+// output items with "phase": "commentary") as model thoughts.
 func (i *responsesInterceptionBase) extractModelThoughts(response *responses.Response) []*recorder.ModelThoughtRecord {
 	if response == nil {
 		return nil
@@ -342,19 +343,41 @@ func (i *responsesInterceptionBase) extractModelThoughts(response *responses.Res
 
 	var thoughts []*recorder.ModelThoughtRecord
 	for _, item := range response.Output {
-		if item.Type != string(constant.ValueOf[constant.Reasoning]()) {
-			continue
-		}
+		switch item.Type {
+		case string(constant.ValueOf[constant.Reasoning]()):
+			reasoning := item.AsReasoning()
+			for _, summary := range reasoning.Summary {
+				if summary.Text == "" {
+					continue
+				}
+				thoughts = append(thoughts, &recorder.ModelThoughtRecord{
+					Content:   summary.Text,
+					CreatedAt: time.Now(),
+				})
+			}
 
-		reasoning := item.AsReasoning()
-		for _, summary := range reasoning.Summary {
-			if summary.Text == "" {
+		case string(constant.ValueOf[constant.Message]()):
+			// The API sometimes returns commentary messages instead of reasoning
+			// summaries. These are assistant message output items with "phase": "commentary".
+			// The SDK doesn't expose a Phase field, so we extract it from raw JSON.
+			raw := item.RawJSON()
+			if gjson.Get(raw, "role").String() != string(constant.ValueOf[constant.Assistant]()) ||
+				gjson.Get(raw, "phase").String() != "commentary" {
 				continue
 			}
-			thoughts = append(thoughts, &recorder.ModelThoughtRecord{
-				Content:   summary.Text,
-				CreatedAt: time.Now(),
-			})
+			msg := item.AsMessage()
+			for _, part := range msg.Content {
+				if part.Type != string(constant.ValueOf[constant.OutputText]()) {
+					continue
+				}
+				if part.Text == "" {
+					continue
+				}
+				thoughts = append(thoughts, &recorder.ModelThoughtRecord{
+					Content:   part.Text,
+					CreatedAt: time.Now(),
+				})
+			}
 		}
 	}
 
diff --git a/internal/integrationtest/responses_test.go b/internal/integrationtest/responses_test.go
index 2cee005a..1aceaacf 100644
--- a/internal/integrationtest/responses_test.go
+++ b/internal/integrationtest/responses_test.go
@@ -970,6 +970,30 @@ func TestResponsesModelThoughts(t *testing.T) {
 			expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t",
 			expectedThoughts:   []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
 		},
+		{
+			name:               "commentary/blocking",
+			fixture:            fixtures.OaiResponsesBlockingCommentaryBuiltinTool,
+			expectedToolCallID: "call_A8TkZmIcKtw2Zw952Wc5QVe7",
+			expectedThoughts:   []string{"Checking whether 3 + 5 is prime by calling the add function first."},
+		},
+		{
+			name:               "commentary/streaming",
+			fixture:            fixtures.OaiResponsesStreamingCommentaryBuiltinTool,
+			expectedToolCallID: "call_A8TkZmIcKtw2Zw952Wc5QVe7",
+			expectedThoughts:   []string{"Checking whether 3 + 5 is prime by calling the add function first."},
+		},
+		{
+			name:               "summary and commentary/blocking",
+			fixture:            fixtures.OaiResponsesBlockingSummaryAndCommentaryBuiltinTool,
+			expectedToolCallID: "call_B9UjYX01Lvvv1XwjDsdmRW3f",
+			expectedThoughts:   []string{"I need to add 3 and 5 to check primality.", "Let me calculate the sum first using the add function."},
+		},
+		{
+			name:               "summary and commentary/streaming",
+			fixture:            fixtures.OaiResponsesStreamingSummaryAndCommentaryBuiltinTool,
+			expectedToolCallID: "call_B9UjYX01Lvvv1XwjDsdmRW3f",
+			expectedThoughts:   []string{"I need to add 3 and 5 to check primality.", "Let me calculate the sum first using the add function."},
+		},
 		{
 			name:    "no thoughts without tool calls",
 			fixture: fixtures.OaiResponsesStreamingCodex, // This fixture contains reasoning, but it's not associated with tool calls.

From bf7d2bc5a91cef2b20ebc9e5784a13173b7f2b5b Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Thu, 12 Mar 2026 12:20:35 +0200
Subject: [PATCH 08/14] chore: cleaning up /v1/messages impl

Signed-off-by: Danny Kopping <danny@coder.com>
---
 intercept/messages/base.go      | 20 ++++++++++++++++++++
 intercept/messages/blocking.go  | 14 +-------------
 intercept/messages/streaming.go | 23 ++++++++++-------------
 3 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/intercept/messages/base.go b/intercept/messages/base.go
index 37522380..58ac23a9 100644
--- a/intercept/messages/base.go
+++ b/intercept/messages/base.go
@@ -168,6 +168,26 @@ func (i *interceptionBase) disableParallelToolCalls() {
 	}
 }
 
+// extractModelThoughts returns any thinking blocks that were returned in the response.
+func (i *interceptionBase) extractModelThoughts(msg *anthropic.Message) []*recorder.ModelThoughtRecord {
+	if msg == nil {
+		return nil
+	}
+
+	var thoughtRecords []*recorder.ModelThoughtRecord
+	for _, block := range msg.Content {
+		switch variant := block.AsAny().(type) {
+		case anthropic.ThinkingBlock:
+			thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{
+				Content:   variant.Thinking,
+				CreatedAt: time.Now(),
+			})
+		}
+		// anthropic.RedactedThinkingBlock also exists, but there's nothing useful we can capture.
+	}
+	return thoughtRecords
+}
+
 // IsSmallFastModel checks if the model is a small/fast model (Haiku 3.5).
 // These models are optimized for tasks like code autocomplete and other small, quick operations.
 // See `ANTHROPIC_SMALL_FAST_MODEL`: https://docs.anthropic.com/en/docs/claude-code/settings#environment-variables
diff --git a/intercept/messages/blocking.go b/intercept/messages/blocking.go
index 4ba71874..e2bed379 100644
--- a/intercept/messages/blocking.go
+++ b/intercept/messages/blocking.go
@@ -136,19 +136,7 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 		accumulateUsage(&cumulativeUsage, resp.Usage)
 
 		// Capture any thinking blocks that were returned.
-		var thoughtRecords []*recorder.ModelThoughtRecord
-		for _, block := range resp.Content {
-			switch variant := block.AsAny().(type) {
-			case anthropic.ThinkingBlock:
-				thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{
-					Content:   variant.Thinking,
-					CreatedAt: time.Now(),
-				})
-			case anthropic.RedactedThinkingBlock:
-				// For redacted thinking, there's nothing useful we can capture.
-				continue
-			}
-		}
+		thoughtRecords := i.extractModelThoughts(resp)
 
 		// Handle tool calls for non-streaming.
 		var pendingToolCalls []anthropic.ToolUseBlock
diff --git a/intercept/messages/streaming.go b/intercept/messages/streaming.go
index 949401f9..6c2545c9 100644
--- a/intercept/messages/streaming.go
+++ b/intercept/messages/streaming.go
@@ -254,20 +254,9 @@ newStream:
 			case string(constant.ValueOf[constant.MessageStop]()):
 
 				// Capture any thinking blocks that were returned.
-				var thoughtRecords []*recorder.ModelThoughtRecord
-				for _, block := range message.Content {
-					switch variant := block.AsAny().(type) {
-					case anthropic.ThinkingBlock:
-						thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{
-							Content:   variant.Thinking,
-							CreatedAt: time.Now(),
-						})
-					case anthropic.RedactedThinkingBlock:
-						// For redacted thinking, there's nothing useful we can capture.
-						continue
-					}
-				}
+				thoughtRecords := i.extractModelThoughts(&message)
 
+				// Process injected tool
 				if len(pendingToolCalls) > 0 {
 					// Append the whole message from this stream as context since we'll be sending a new request with the tool results.
 					messages.Messages = append(messages.Messages, message.ToParam())
@@ -322,8 +311,12 @@ newStream:
 							InvocationError: err,
 							ModelThoughts:   thoughtRecords,
 						})
+
 						// Clear after first use to avoid duplicating across
 						// multiple tool calls in the same message.
+						//
+						// This is not strictly need for injected tools since we disable parallel tool calls,
+						// but just adding this here for defensiveness.
 						thoughtRecords = nil
 
 						if err != nil {
@@ -438,8 +431,12 @@ newStream:
 								Injected:       false,
 								ModelThoughts:  thoughtRecords,
 							})
+
 							// Clear after first use to avoid duplicating across
 							// multiple tool calls in the same message.
+							//
+							// This effectively means that in the case of parallel tool calls
+							// the thoughts will only be associated to the first tool use which is fine.
 							thoughtRecords = nil
 						}
 					}

From 424382d620816146e631d72eda0bb396b942d107 Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Thu, 12 Mar 2026 12:39:31 +0200
Subject: [PATCH 09/14] chore: update comments

Signed-off-by: Danny Kopping <danny@coder.com>
---
 intercept/messages/blocking.go        | 4 ++++
 intercept/responses/base.go           | 4 ++++
 intercept/responses/injected_tools.go | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/intercept/messages/blocking.go b/intercept/messages/blocking.go
index e2bed379..b32f9e8d 100644
--- a/intercept/messages/blocking.go
+++ b/intercept/messages/blocking.go
@@ -161,8 +161,12 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 				Injected:       false,
 				ModelThoughts:  thoughtRecords,
 			})
+
 			// Clear after first use to avoid duplicating across
 			// multiple tool calls in the same message.
+			//
+			// This effectively means that in the case of parallel tool calls
+			// the thoughts will only be associated to the first tool use which is fine.
 			thoughtRecords = nil
 		}
 
diff --git a/intercept/responses/base.go b/intercept/responses/base.go
index c3550f30..e61bc86c 100644
--- a/intercept/responses/base.go
+++ b/intercept/responses/base.go
@@ -287,8 +287,12 @@ func (i *responsesInterceptionBase) recordNonInjectedToolUsage(ctx context.Conte
 		}); err != nil {
 			i.logger.Warn(ctx, "failed to record tool usage", slog.Error(err), slog.F("tool", item.Name))
 		}
+
 		// Clear after first use to avoid duplicating across
 		// multiple tool calls in the same message.
+		//
+		// This effectively means that in the case of parallel tool calls
+		// the thoughts will only be associated to the first tool use which is fine.
 		thoughtRecords = nil
 	}
 }
diff --git a/intercept/responses/injected_tools.go b/intercept/responses/injected_tools.go
index db81941f..7d95db8f 100644
--- a/intercept/responses/injected_tools.go
+++ b/intercept/responses/injected_tools.go
@@ -115,8 +115,12 @@ func (i *responsesInterceptionBase) handleInjectedToolCalls(ctx context.Context,
 	var results []responses.ResponseInputItemUnionParam
 	for _, fc := range pending {
 		results = append(results, i.invokeInjectedTool(ctx, response.ID, fc, thoughtRecords))
+
 		// Clear after first use to avoid duplicating across
 		// multiple tool calls in the same message.
+		//
+		// This is not strictly need for injected tools since we disable parallel tool calls,
+		// but just adding this here for defensiveness.
 		thoughtRecords = nil
 	}
 

From 7511cc42e2dddd927a24ee9619b1909b4280f659 Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Thu, 12 Mar 2026 14:09:13 +0200
Subject: [PATCH 10/14] chore: add tests for parallel tool calls

Signed-off-by: Danny Kopping <danny@coder.com>
---
 .../multi_thinking_builtin_tool.txtar         |  16 ++
 .../single_builtin_tool_parallel.txtar        | 175 ++++++++++++++++++
 fixtures/fixtures.go                          |   9 +
 .../single_builtin_tool_parallel.txtar        | 140 ++++++++++++++
 .../single_builtin_tool_parallel.txtar        |  86 +++++++++
 intercept/messages/blocking.go                |   5 +
 intercept/messages/streaming.go               |   7 +-
 intercept/responses/injected_tools.go         |   5 +-
 internal/integrationtest/bridge_test.go       |  79 ++++----
 internal/integrationtest/responses_test.go    |  95 +++++-----
 10 files changed, 537 insertions(+), 80 deletions(-)
 create mode 100644 fixtures/anthropic/single_builtin_tool_parallel.txtar
 create mode 100644 fixtures/openai/responses/blocking/single_builtin_tool_parallel.txtar
 create mode 100644 fixtures/openai/responses/streaming/single_builtin_tool_parallel.txtar

diff --git a/fixtures/anthropic/multi_thinking_builtin_tool.txtar b/fixtures/anthropic/multi_thinking_builtin_tool.txtar
index 633d11d9..d27ad63f 100644
--- a/fixtures/anthropic/multi_thinking_builtin_tool.txtar
+++ b/fixtures/anthropic/multi_thinking_builtin_tool.txtar
@@ -5,6 +5,22 @@ This fixture has two thinking blocks before the tool_use block.
 {
 	"model": "claude-sonnet-4-20250514",
 	"max_tokens": 1024,
+	"tools": [
+		{
+			"name": "Read",
+			"description": "Read the contents of a file at the given path.",
+			"input_schema": {
+				"type": "object",
+				"properties": {
+					"file_path": {
+						"type": "string",
+						"description": "The absolute path to the file to read"
+					}
+				},
+				"required": ["file_path"]
+			}
+		}
+	],
 	"messages": [
 		{
 			"role": "user",
diff --git a/fixtures/anthropic/single_builtin_tool_parallel.txtar b/fixtures/anthropic/single_builtin_tool_parallel.txtar
new file mode 100644
index 00000000..9c53ed2c
--- /dev/null
+++ b/fixtures/anthropic/single_builtin_tool_parallel.txtar
@@ -0,0 +1,175 @@
+Claude Code has builtin tools to (e.g.) explore the filesystem.
+This fixture has a single thinking block followed by two parallel tool_use blocks.
+The thinking should only be attributed to the first tool_use.
+
+-- request --
+{
+	"model": "claude-sonnet-4-20250514",
+	"max_tokens": 1024,
+	"tools": [
+		{
+			"name": "Read",
+			"description": "Read the contents of a file at the given path.",
+			"input_schema": {
+				"type": "object",
+				"properties": {
+					"file_path": {
+						"type": "string",
+						"description": "The absolute path to the file to read"
+					}
+				},
+				"required": ["file_path"]
+			}
+		}
+	],
+	"messages": [
+		{
+			"role": "user",
+			"content": "read the foo and bar files"
+		}
+	]
+}
+
+-- streaming --
+event: message_start
+data: {"type":"message_start","message":{"id":"msg_01ParallelToolStream","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":2,"cache_creation_input_tokens":22,"cache_read_input_tokens":13993,"output_tokens":5,"service_tier":"standard"}}             }
+
+event: content_block_start
+data: {"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"The user wants me to read two files: \"foo\" and \"bar\". I'll read both of them."}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"Eu8BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ=="}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":0}
+
+event: content_block_start
+data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01ParallelFirst000000000","name":"Read","input":{}}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}            }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"file_path\": \"/tmp/blah/foo"}       }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\"}"}  }
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":1          }
+
+event: content_block_start
+data: {"type":"content_block_start","index":2,"content_block":{"type":"tool_use","id":"toolu_01ParallelSecond00000000","name":"Read","input":{}}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":""}            }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":"{\"file_path\": \"/tmp/blah/bar"}       }
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":"\"}"}  }
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":2          }
+
+event: message_delta
+data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":72}              }
+
+event: message_stop
+data: {"type":"message_stop"              }
+
+
+-- non-streaming --
+{
+  "id": "msg_01ParallelToolBlocking",
+  "container": {
+    "id": "",
+    "expires_at": "0001-01-01T00:00:00Z"
+  },
+  "content": [
+    {
+      "type": "thinking",
+      "thinking": "The user wants me to read two files: \"foo\" and \"bar\". I'll read both of them.",
+      "signature": "Eu8BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ=="
+    },
+    {
+      "citations": null,
+      "text": "",
+      "type": "tool_use",
+      "id": "toolu_01ParallelBlockFirst0000",
+      "input": {
+        "file_path": "/tmp/blah/foo"
+      },
+      "name": "Read",
+      "content": {
+        "OfWebSearchResultBlockArray": null,
+        "OfString": "",
+        "OfMCPToolResultBlockContent": null,
+        "error_code": "",
+        "type": "",
+        "content": null,
+        "return_code": 0,
+        "stderr": "",
+        "stdout": ""
+      },
+      "tool_use_id": "",
+      "server_name": "",
+      "is_error": false,
+      "file_id": "",
+      "signature": "",
+      "thinking": "",
+      "data": ""
+    },
+    {
+      "citations": null,
+      "text": "",
+      "type": "tool_use",
+      "id": "toolu_01ParallelBlockSecond000",
+      "input": {
+        "file_path": "/tmp/blah/bar"
+      },
+      "name": "Read",
+      "content": {
+        "OfWebSearchResultBlockArray": null,
+        "OfString": "",
+        "OfMCPToolResultBlockContent": null,
+        "error_code": "",
+        "type": "",
+        "content": null,
+        "return_code": 0,
+        "stderr": "",
+        "stdout": ""
+      },
+      "tool_use_id": "",
+      "server_name": "",
+      "is_error": false,
+      "file_id": "",
+      "signature": "",
+      "thinking": "",
+      "data": ""
+    }
+  ],
+  "model": "claude-sonnet-4-20250514",
+  "role": "assistant",
+  "stop_reason": "tool_use",
+  "stop_sequence": "",
+  "type": "message",
+  "usage": {
+    "cache_creation": {
+      "ephemeral_1h_input_tokens": 0,
+      "ephemeral_5m_input_tokens": 0
+    },
+    "cache_creation_input_tokens": 0,
+    "cache_read_input_tokens": 23490,
+    "input_tokens": 5,
+    "output_tokens": 95,
+    "server_tool_use": {
+      "web_search_requests": 0
+    },
+    "service_tier": "standard"
+  }
+}
diff --git a/fixtures/fixtures.go b/fixtures/fixtures.go
index 06447a67..8aaeef15 100644
--- a/fixtures/fixtures.go
+++ b/fixtures/fixtures.go
@@ -18,6 +18,9 @@ var (
 	//go:embed anthropic/multi_thinking_builtin_tool.txtar
 	AntMultiThinkingBuiltinTool []byte
 
+	//go:embed anthropic/single_builtin_tool_parallel.txtar
+	AntSingleBuiltinToolParallel []byte
+
 	//go:embed anthropic/single_injected_tool.txtar
 	AntSingleInjectedTool []byte
 
@@ -88,6 +91,9 @@ var (
 	//go:embed openai/responses/blocking/prev_response_id.txtar
 	OaiResponsesBlockingPrevResponseID []byte
 
+	//go:embed openai/responses/blocking/single_builtin_tool_parallel.txtar
+	OaiResponsesBlockingSingleBuiltinToolParallel []byte
+
 	//go:embed openai/responses/blocking/single_injected_tool.txtar
 	OaiResponsesBlockingSingleInjectedTool []byte
 
@@ -132,6 +138,9 @@ var (
 	//go:embed openai/responses/streaming/prev_response_id.txtar
 	OaiResponsesStreamingPrevResponseID []byte
 
+	//go:embed openai/responses/streaming/single_builtin_tool_parallel.txtar
+	OaiResponsesStreamingSingleBuiltinToolParallel []byte
+
 	//go:embed openai/responses/streaming/single_injected_tool.txtar
 	OaiResponsesStreamingSingleInjectedTool []byte
 
diff --git a/fixtures/openai/responses/blocking/single_builtin_tool_parallel.txtar b/fixtures/openai/responses/blocking/single_builtin_tool_parallel.txtar
new file mode 100644
index 00000000..4be0d240
--- /dev/null
+++ b/fixtures/openai/responses/blocking/single_builtin_tool_parallel.txtar
@@ -0,0 +1,140 @@
+-- request --
+{
+  "input": [
+    {
+      "role": "user",
+      "content": "Is 3 + 5 a prime number? Also add 10 + 20. Use the add function for both."
+    }
+  ],
+  "model": "gpt-4.1",
+  "stream": false,
+  "tools": [
+    {
+      "type": "function",
+      "name": "add",
+      "description": "Add two numbers together.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "a": {
+            "type": "number"
+          },
+          "b": {
+            "type": "number"
+          }
+        },
+        "required": [
+          "a",
+          "b"
+        ]
+      }
+    }
+  ]
+}
+
+-- non-streaming --
+{
+  "id": "resp_parallel_blocking_001",
+  "object": "response",
+  "created_at": 1767875133,
+  "status": "completed",
+  "background": false,
+  "billing": {
+    "payer": "developer"
+  },
+  "completed_at": 1767875134,
+  "error": null,
+  "incomplete_details": null,
+  "instructions": null,
+  "max_output_tokens": null,
+  "max_tool_calls": null,
+  "model": "gpt-4.1-2025-04-14",
+  "output": [
+    {
+      "id": "rs_parallel_blocking_reasoning_001",
+      "type": "reasoning",
+      "status": "completed",
+      "summary": [
+        {
+          "type": "summary_text",
+          "text": "The user wants two additions: 3+5 and 10+20. I'll call add for both."
+        }
+      ]
+    },
+    {
+      "id": "fc_parallel_blocking_first_001",
+      "type": "function_call",
+      "status": "completed",
+      "arguments": "{\"a\":3,\"b\":5}",
+      "call_id": "call_ParallelBlockingFirst001",
+      "name": "add"
+    },
+    {
+      "id": "fc_parallel_blocking_second_001",
+      "type": "function_call",
+      "status": "completed",
+      "arguments": "{\"a\":10,\"b\":20}",
+      "call_id": "call_ParallelBlockingSecond01",
+      "name": "add"
+    }
+  ],
+  "parallel_tool_calls": true,
+  "previous_response_id": null,
+  "prompt_cache_key": null,
+  "prompt_cache_retention": null,
+  "reasoning": {
+    "effort": null,
+    "summary": null
+  },
+  "safety_identifier": null,
+  "service_tier": "default",
+  "store": true,
+  "temperature": 1.0,
+  "text": {
+    "format": {
+      "type": "text"
+    },
+    "verbosity": "medium"
+  },
+  "tool_choice": "auto",
+  "tools": [
+    {
+      "type": "function",
+      "description": "Add two numbers together.",
+      "name": "add",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "a": {
+            "type": "number"
+          },
+          "b": {
+            "type": "number"
+          }
+        },
+        "required": [
+          "a",
+          "b"
+        ],
+        "additionalProperties": false
+      },
+      "strict": true
+    }
+  ],
+  "top_logprobs": 0,
+  "top_p": 1.0,
+  "truncation": "disabled",
+  "usage": {
+    "input_tokens": 65,
+    "input_tokens_details": {
+      "cached_tokens": 0
+    },
+    "output_tokens": 30,
+    "output_tokens_details": {
+      "reasoning_tokens": 0
+    },
+    "total_tokens": 95
+  },
+  "user": null,
+  "metadata": {}
+}
diff --git a/fixtures/openai/responses/streaming/single_builtin_tool_parallel.txtar b/fixtures/openai/responses/streaming/single_builtin_tool_parallel.txtar
new file mode 100644
index 00000000..0319cab0
--- /dev/null
+++ b/fixtures/openai/responses/streaming/single_builtin_tool_parallel.txtar
@@ -0,0 +1,86 @@
+-- request --
+{
+  "input": [
+    {
+      "role": "user",
+      "content": "Is 3 + 5 a prime number? Also add 10 + 20. Use the add function for both."
+    }
+  ],
+  "model": "gpt-4.1",
+  "stream": true,
+  "tools": [
+    {
+      "type": "function",
+      "name": "add",
+      "description": "Add two numbers together.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "a": {
+            "type": "number"
+          },
+          "b": {
+            "type": "number"
+          }
+        },
+        "required": [
+          "a",
+          "b"
+        ]
+      }
+    }
+  ]
+}
+
+-- streaming --
+event: response.created
+data: {"type":"response.created","response":{"id":"resp_parallel_streaming_001","object":"response","created_at":1767875312,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0}
+
+event: response.in_progress
+data: {"type":"response.in_progress","response":{"id":"resp_parallel_streaming_001","object":"response","created_at":1767875312,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"rs_parallel_streaming_reasoning_001","type":"reasoning","status":"in_progress","summary":[]},"output_index":0,"sequence_number":2}
+
+event: response.reasoning_summary_part.added
+data: {"type":"response.reasoning_summary_part.added","item_id":"rs_parallel_streaming_reasoning_001","output_index":0,"part":{"type":"summary_text","text":""},"summary_index":0,"sequence_number":3}
+
+event: response.reasoning_summary_text.delta
+data: {"type":"response.reasoning_summary_text.delta","item_id":"rs_parallel_streaming_reasoning_001","output_index":0,"summary_index":0,"delta":"The user wants two additions: 3+5 and 10+20. I'll call add for both.","sequence_number":4}
+
+event: response.reasoning_summary_text.done
+data: {"type":"response.reasoning_summary_text.done","item_id":"rs_parallel_streaming_reasoning_001","output_index":0,"summary_index":0,"text":"The user wants two additions: 3+5 and 10+20. I'll call add for both.","sequence_number":5}
+
+event: response.reasoning_summary_part.done
+data: {"type":"response.reasoning_summary_part.done","item_id":"rs_parallel_streaming_reasoning_001","output_index":0,"part":{"type":"summary_text","text":"The user wants two additions: 3+5 and 10+20. I'll call add for both."},"summary_index":0,"sequence_number":6}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"rs_parallel_streaming_reasoning_001","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants two additions: 3+5 and 10+20. I'll call add for both."}]},"output_index":0,"sequence_number":7}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"fc_parallel_streaming_first_001","type":"function_call","status":"in_progress","arguments":"","call_id":"call_ParallelStreamFirst001","name":"add"},"output_index":1,"sequence_number":8}
+
+event: response.function_call_arguments.delta
+data: {"type":"response.function_call_arguments.delta","delta":"{\"a\":3,\"b\":5}","item_id":"fc_parallel_streaming_first_001","output_index":1,"sequence_number":9}
+
+event: response.function_call_arguments.done
+data: {"type":"response.function_call_arguments.done","arguments":"{\"a\":3,\"b\":5}","item_id":"fc_parallel_streaming_first_001","output_index":1,"sequence_number":10}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"fc_parallel_streaming_first_001","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_ParallelStreamFirst001","name":"add"},"output_index":1,"sequence_number":11}
+
+event: response.output_item.added
+data: {"type":"response.output_item.added","item":{"id":"fc_parallel_streaming_second_001","type":"function_call","status":"in_progress","arguments":"","call_id":"call_ParallelStreamSecond01","name":"add"},"output_index":2,"sequence_number":12}
+
+event: response.function_call_arguments.delta
+data: {"type":"response.function_call_arguments.delta","delta":"{\"a\":10,\"b\":20}","item_id":"fc_parallel_streaming_second_001","output_index":2,"sequence_number":13}
+
+event: response.function_call_arguments.done
+data: {"type":"response.function_call_arguments.done","arguments":"{\"a\":10,\"b\":20}","item_id":"fc_parallel_streaming_second_001","output_index":2,"sequence_number":14}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"id":"fc_parallel_streaming_second_001","type":"function_call","status":"completed","arguments":"{\"a\":10,\"b\":20}","call_id":"call_ParallelStreamSecond01","name":"add"},"output_index":2,"sequence_number":15}
+
+event: response.completed
+data: {"type":"response.completed","response":{"id":"resp_parallel_streaming_001","object":"response","created_at":1767875312,"status":"completed","background":false,"completed_at":1767875312,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[{"id":"rs_parallel_streaming_reasoning_001","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants two additions: 3+5 and 10+20. I'll call add for both."}]},{"id":"fc_parallel_streaming_first_001","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_ParallelStreamFirst001","name":"add"},{"id":"fc_parallel_streaming_second_001","type":"function_call","status":"completed","arguments":"{\"a\":10,\"b\":20}","call_id":"call_ParallelStreamSecond01","name":"add"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":65,"input_tokens_details":{"cached_tokens":0},"output_tokens":30,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":95},"user":null,"metadata":{}},"sequence_number":16}
+
diff --git a/intercept/messages/blocking.go b/intercept/messages/blocking.go
index b32f9e8d..0c888d0a 100644
--- a/intercept/messages/blocking.go
+++ b/intercept/messages/blocking.go
@@ -208,8 +208,13 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 				InvocationError: err,
 				ModelThoughts:   thoughtRecords,
 			})
+
 			// Clear after first use to avoid duplicating across
 			// multiple tool calls in the same message.
+			//
+			// This is not strictly needed for injected tools since we
+			// disable parallel tool calls, but just adding this here
+			// for defensiveness.
 			thoughtRecords = nil
 
 			if err != nil {
diff --git a/intercept/messages/streaming.go b/intercept/messages/streaming.go
index 6c2545c9..878565c3 100644
--- a/intercept/messages/streaming.go
+++ b/intercept/messages/streaming.go
@@ -256,7 +256,7 @@ newStream:
 				// Capture any thinking blocks that were returned.
 				thoughtRecords := i.extractModelThoughts(&message)
 
-				// Process injected tool
+				// Process injected tools.
 				if len(pendingToolCalls) > 0 {
 					// Append the whole message from this stream as context since we'll be sending a new request with the tool results.
 					messages.Messages = append(messages.Messages, message.ToParam())
@@ -315,8 +315,9 @@ newStream:
 						// Clear after first use to avoid duplicating across
 						// multiple tool calls in the same message.
 						//
-						// This is not strictly need for injected tools since we disable parallel tool calls,
-						// but just adding this here for defensiveness.
+						// This is not strictly needed for injected tools since we
+						// disable parallel tool calls, but just adding this here
+						// for defensiveness.
 						thoughtRecords = nil
 
 						if err != nil {
diff --git a/intercept/responses/injected_tools.go b/intercept/responses/injected_tools.go
index 7d95db8f..9f813720 100644
--- a/intercept/responses/injected_tools.go
+++ b/intercept/responses/injected_tools.go
@@ -119,8 +119,9 @@ func (i *responsesInterceptionBase) handleInjectedToolCalls(ctx context.Context,
 		// Clear after first use to avoid duplicating across
 		// multiple tool calls in the same message.
 		//
-		// This is not strictly need for injected tools since we disable parallel tool calls,
-		// but just adding this here for defensiveness.
+		// This is not strictly needed for injected tools since we
+		// disable parallel tool calls, but just adding this here
+		// for defensiveness.
 		thoughtRecords = nil
 	}
 
diff --git a/internal/integrationtest/bridge_test.go b/internal/integrationtest/bridge_test.go
index 6150173a..feb03c58 100644
--- a/internal/integrationtest/bridge_test.go
+++ b/internal/integrationtest/bridge_test.go
@@ -128,39 +128,46 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 	t.Parallel()
 
 	cases := []struct {
-		name               string
-		streaming          bool
-		fixture            []byte
-		expectedToolCallID string
-		expectedThoughts   []string // nil means no tool usages expected at all
+		name             string
+		streaming        bool
+		fixture          []byte
+		expectedThoughts []string // nil means no tool usages expected at all
 	}{
 		{
-			name:               "single thinking block/streaming",
-			streaming:          true,
-			fixture:            fixtures.AntSingleBuiltinTool,
-			expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo",
-			expectedThoughts:   []string{"The user wants me to read"},
+			name:             "single thinking block/streaming",
+			streaming:        true,
+			fixture:          fixtures.AntSingleBuiltinTool,
+			expectedThoughts: []string{"The user wants me to read"},
+		},
+		{
+			name:             "single thinking block/blocking",
+			streaming:        false,
+			fixture:          fixtures.AntSingleBuiltinTool,
+			expectedThoughts: []string{"The user wants me to read"},
+		},
+		{
+			name:             "multiple thinking blocks/streaming",
+			streaming:        true,
+			fixture:          fixtures.AntMultiThinkingBuiltinTool,
+			expectedThoughts: []string{"The user wants me to read", "I should use the Read tool"},
 		},
 		{
-			name:               "single thinking block/blocking",
-			streaming:          false,
-			fixture:            fixtures.AntSingleBuiltinTool,
-			expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq",
-			expectedThoughts:   []string{"The user wants me to read"},
+			name:             "multiple thinking blocks/blocking",
+			streaming:        false,
+			fixture:          fixtures.AntMultiThinkingBuiltinTool,
+			expectedThoughts: []string{"The user wants me to read", "I should use the Read tool"},
 		},
 		{
-			name:               "multiple thinking blocks/streaming",
-			streaming:          true,
-			fixture:            fixtures.AntMultiThinkingBuiltinTool,
-			expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo",
-			expectedThoughts:   []string{"The user wants me to read", "I should use the Read tool"},
+			name:             "parallel tool calls/streaming",
+			streaming:        true,
+			fixture:          fixtures.AntSingleBuiltinToolParallel,
+			expectedThoughts: []string{"The user wants me to read two files"},
 		},
 		{
-			name:               "multiple thinking blocks/blocking",
-			streaming:          false,
-			fixture:            fixtures.AntMultiThinkingBuiltinTool,
-			expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq",
-			expectedThoughts:   []string{"The user wants me to read", "I should use the Read tool"},
+			name:             "parallel tool calls/blocking",
+			streaming:        false,
+			fixture:          fixtures.AntSingleBuiltinToolParallel,
+			expectedThoughts: []string{"The user wants me to read two files"},
 		},
 		{
 			name:      "no thoughts without tool calls",
@@ -197,14 +204,22 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 			if tc.expectedThoughts == nil {
 				assert.Empty(t, toolUsages)
 			} else {
-				require.Len(t, toolUsages, 1)
-				assert.Equal(t, "Read", toolUsages[0].Tool)
-				assert.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID)
-
-				require.Len(t, toolUsages[0].ModelThoughts, len(tc.expectedThoughts))
-				for i, expected := range tc.expectedThoughts {
-					assert.Contains(t, toolUsages[0].ModelThoughts[i].Content, expected)
+				require.NotEmpty(t, toolUsages)
+
+				// Exactly one tool usage should have the expected thoughts;
+				// all others should have none.
+				var withThoughts int
+				for _, tu := range toolUsages {
+					assert.Equal(t, "Read", tu.Tool)
+					if len(tu.ModelThoughts) > 0 {
+						withThoughts++
+						require.Len(t, tu.ModelThoughts, len(tc.expectedThoughts))
+						for i, expected := range tc.expectedThoughts {
+							assert.Contains(t, tu.ModelThoughts[i].Content, expected)
+						}
+					}
 				}
+				assert.Equal(t, 1, withThoughts, "expected exactly one tool usage with model thoughts")
 			}
 
 			bridgeServer.Recorder.VerifyAllInterceptionsEnded(t)
diff --git a/internal/integrationtest/responses_test.go b/internal/integrationtest/responses_test.go
index 1aceaacf..d120c3bb 100644
--- a/internal/integrationtest/responses_test.go
+++ b/internal/integrationtest/responses_test.go
@@ -941,58 +941,59 @@ func TestResponsesModelThoughts(t *testing.T) {
 	t.Parallel()
 
 	cases := []struct {
-		name               string
-		fixture            []byte
-		expectedToolCallID string
-		expectedThoughts   []string // nil means no tool usages expected at all
+		name             string
+		fixture          []byte
+		expectedThoughts []string // nil means no tool usages expected at all
 	}{
 		{
-			name:               "single reasoning/blocking",
-			fixture:            fixtures.OaiResponsesBlockingSingleBuiltinTool,
-			expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq",
-			expectedThoughts:   []string{"The user wants to add 3 and 5"},
+			name:             "single reasoning/blocking",
+			fixture:          fixtures.OaiResponsesBlockingSingleBuiltinTool,
+			expectedThoughts: []string{"The user wants to add 3 and 5"},
 		},
 		{
-			name:               "single reasoning/streaming",
-			fixture:            fixtures.OaiResponsesStreamingBuiltinTool,
-			expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t",
-			expectedThoughts:   []string{"The user wants to add 3 and 5"},
+			name:             "single reasoning/streaming",
+			fixture:          fixtures.OaiResponsesStreamingBuiltinTool,
+			expectedThoughts: []string{"The user wants to add 3 and 5"},
 		},
 		{
-			name:               "multiple reasoning items/blocking",
-			fixture:            fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool,
-			expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq",
-			expectedThoughts:   []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
+			name:             "multiple reasoning items/blocking",
+			fixture:          fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool,
+			expectedThoughts: []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
 		},
 		{
-			name:               "multiple reasoning items/streaming",
-			fixture:            fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool,
-			expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t",
-			expectedThoughts:   []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
+			name:             "multiple reasoning items/streaming",
+			fixture:          fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool,
+			expectedThoughts: []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
 		},
 		{
-			name:               "commentary/blocking",
-			fixture:            fixtures.OaiResponsesBlockingCommentaryBuiltinTool,
-			expectedToolCallID: "call_A8TkZmIcKtw2Zw952Wc5QVe7",
-			expectedThoughts:   []string{"Checking whether 3 + 5 is prime by calling the add function first."},
+			name:             "commentary/blocking",
+			fixture:          fixtures.OaiResponsesBlockingCommentaryBuiltinTool,
+			expectedThoughts: []string{"Checking whether 3 + 5 is prime by calling the add function first."},
 		},
 		{
-			name:               "commentary/streaming",
-			fixture:            fixtures.OaiResponsesStreamingCommentaryBuiltinTool,
-			expectedToolCallID: "call_A8TkZmIcKtw2Zw952Wc5QVe7",
-			expectedThoughts:   []string{"Checking whether 3 + 5 is prime by calling the add function first."},
+			name:             "commentary/streaming",
+			fixture:          fixtures.OaiResponsesStreamingCommentaryBuiltinTool,
+			expectedThoughts: []string{"Checking whether 3 + 5 is prime by calling the add function first."},
 		},
 		{
-			name:               "summary and commentary/blocking",
-			fixture:            fixtures.OaiResponsesBlockingSummaryAndCommentaryBuiltinTool,
-			expectedToolCallID: "call_B9UjYX01Lvvv1XwjDsdmRW3f",
-			expectedThoughts:   []string{"I need to add 3 and 5 to check primality.", "Let me calculate the sum first using the add function."},
+			name:             "summary and commentary/blocking",
+			fixture:          fixtures.OaiResponsesBlockingSummaryAndCommentaryBuiltinTool,
+			expectedThoughts: []string{"I need to add 3 and 5 to check primality.", "Let me calculate the sum first using the add function."},
 		},
 		{
-			name:               "summary and commentary/streaming",
-			fixture:            fixtures.OaiResponsesStreamingSummaryAndCommentaryBuiltinTool,
-			expectedToolCallID: "call_B9UjYX01Lvvv1XwjDsdmRW3f",
-			expectedThoughts:   []string{"I need to add 3 and 5 to check primality.", "Let me calculate the sum first using the add function."},
+			name:             "summary and commentary/streaming",
+			fixture:          fixtures.OaiResponsesStreamingSummaryAndCommentaryBuiltinTool,
+			expectedThoughts: []string{"I need to add 3 and 5 to check primality.", "Let me calculate the sum first using the add function."},
+		},
+		{
+			name:             "parallel tool calls/blocking",
+			fixture:          fixtures.OaiResponsesBlockingSingleBuiltinToolParallel,
+			expectedThoughts: []string{"The user wants two additions"},
+		},
+		{
+			name:             "parallel tool calls/streaming",
+			fixture:          fixtures.OaiResponsesStreamingSingleBuiltinToolParallel,
+			expectedThoughts: []string{"The user wants two additions"},
 		},
 		{
 			name:    "no thoughts without tool calls",
@@ -1022,14 +1023,22 @@ func TestResponsesModelThoughts(t *testing.T) {
 			if tc.expectedThoughts == nil {
 				require.Empty(t, toolUsages)
 			} else {
-				require.Len(t, toolUsages, 1)
-				require.Equal(t, "add", toolUsages[0].Tool)
-				require.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID)
-
-				require.Len(t, toolUsages[0].ModelThoughts, len(tc.expectedThoughts))
-				for i, expected := range tc.expectedThoughts {
-					require.Contains(t, toolUsages[0].ModelThoughts[i].Content, expected)
+				require.NotEmpty(t, toolUsages)
+
+				// Exactly one tool usage should have the expected thoughts;
+				// all others should have none.
+				var withThoughts int
+				for _, tu := range toolUsages {
+					require.Equal(t, "add", tu.Tool)
+					if len(tu.ModelThoughts) > 0 {
+						withThoughts++
+						require.Len(t, tu.ModelThoughts, len(tc.expectedThoughts))
+						for i, expected := range tc.expectedThoughts {
+							require.Contains(t, tu.ModelThoughts[i].Content, expected)
+						}
+					}
 				}
+				require.Equal(t, 1, withThoughts, "expected exactly one tool usage with model thoughts")
 			}
 		})
 	}

From befe9d94061533350ea45392b490d9c7cee8c3e0 Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Thu, 12 Mar 2026 14:34:43 +0200
Subject: [PATCH 11/14] chore: capture source of thinking/reasoning

Signed-off-by: Danny Kopping <danny@coder.com>
---
 intercept/messages/base.go                 |  1 +
 intercept/responses/base.go                |  2 +
 internal/integrationtest/bridge_test.go    |  2 +
 internal/integrationtest/responses_test.go | 59 ++++++++++++++--------
 4 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/intercept/messages/base.go b/intercept/messages/base.go
index 58ac23a9..b50de147 100644
--- a/intercept/messages/base.go
+++ b/intercept/messages/base.go
@@ -180,6 +180,7 @@ func (i *interceptionBase) extractModelThoughts(msg *anthropic.Message) []*recor
 		case anthropic.ThinkingBlock:
 			thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{
 				Content:   variant.Thinking,
+				Metadata:  recorder.Metadata{"source": "thinking"},
 				CreatedAt: time.Now(),
 			})
 		}
diff --git a/intercept/responses/base.go b/intercept/responses/base.go
index e61bc86c..12cddebf 100644
--- a/intercept/responses/base.go
+++ b/intercept/responses/base.go
@@ -356,6 +356,7 @@ func (i *responsesInterceptionBase) extractModelThoughts(response *responses.Res
 				}
 				thoughts = append(thoughts, &recorder.ModelThoughtRecord{
 					Content:   summary.Text,
+					Metadata:  recorder.Metadata{"source": "reasoning_summary"},
 					CreatedAt: time.Now(),
 				})
 			}
@@ -379,6 +380,7 @@ func (i *responsesInterceptionBase) extractModelThoughts(response *responses.Res
 				}
 				thoughts = append(thoughts, &recorder.ModelThoughtRecord{
 					Content:   part.Text,
+					Metadata:  recorder.Metadata{"source": "commentary"},
 					CreatedAt: time.Now(),
 				})
 			}
diff --git a/internal/integrationtest/bridge_test.go b/internal/integrationtest/bridge_test.go
index feb03c58..4a424f32 100644
--- a/internal/integrationtest/bridge_test.go
+++ b/internal/integrationtest/bridge_test.go
@@ -216,6 +216,8 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 						require.Len(t, tu.ModelThoughts, len(tc.expectedThoughts))
 						for i, expected := range tc.expectedThoughts {
 							assert.Contains(t, tu.ModelThoughts[i].Content, expected)
+							assert.Equal(t, "thinking", tu.ModelThoughts[i].Metadata["source"],
+								"thought %d should have source \"thinking\"", i)
 						}
 					}
 				}
diff --git a/internal/integrationtest/responses_test.go b/internal/integrationtest/responses_test.go
index d120c3bb..2cff3e63 100644
--- a/internal/integrationtest/responses_test.go
+++ b/internal/integrationtest/responses_test.go
@@ -940,60 +940,77 @@ func TestResponsesInjectedTool(t *testing.T) {
 func TestResponsesModelThoughts(t *testing.T) {
 	t.Parallel()
 
+	type expectedThought struct {
+		content string
+		source  string // "reasoning_summary" or "commentary"
+	}
+
 	cases := []struct {
 		name             string
 		fixture          []byte
-		expectedThoughts []string // nil means no tool usages expected at all
+		expectedThoughts []expectedThought // nil means no tool usages expected at all
 	}{
 		{
 			name:             "single reasoning/blocking",
 			fixture:          fixtures.OaiResponsesBlockingSingleBuiltinTool,
-			expectedThoughts: []string{"The user wants to add 3 and 5"},
+			expectedThoughts: []expectedThought{{content: "The user wants to add 3 and 5", source: "reasoning_summary"}},
 		},
 		{
 			name:             "single reasoning/streaming",
 			fixture:          fixtures.OaiResponsesStreamingBuiltinTool,
-			expectedThoughts: []string{"The user wants to add 3 and 5"},
+			expectedThoughts: []expectedThought{{content: "The user wants to add 3 and 5", source: "reasoning_summary"}},
 		},
 		{
-			name:             "multiple reasoning items/blocking",
-			fixture:          fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool,
-			expectedThoughts: []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
+			name:    "multiple reasoning items/blocking",
+			fixture: fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool,
+			expectedThoughts: []expectedThought{
+				{content: "The user wants to add 3 and 5", source: "reasoning_summary"},
+				{content: "After adding, I will check if the result is prime", source: "reasoning_summary"},
+			},
 		},
 		{
-			name:             "multiple reasoning items/streaming",
-			fixture:          fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool,
-			expectedThoughts: []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"},
+			name:    "multiple reasoning items/streaming",
+			fixture: fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool,
+			expectedThoughts: []expectedThought{
+				{content: "The user wants to add 3 and 5", source: "reasoning_summary"},
+				{content: "After adding, I will check if the result is prime", source: "reasoning_summary"},
+			},
 		},
 		{
 			name:             "commentary/blocking",
 			fixture:          fixtures.OaiResponsesBlockingCommentaryBuiltinTool,
-			expectedThoughts: []string{"Checking whether 3 + 5 is prime by calling the add function first."},
+			expectedThoughts: []expectedThought{{content: "Checking whether 3 + 5 is prime by calling the add function first.", source: "commentary"}},
 		},
 		{
 			name:             "commentary/streaming",
 			fixture:          fixtures.OaiResponsesStreamingCommentaryBuiltinTool,
-			expectedThoughts: []string{"Checking whether 3 + 5 is prime by calling the add function first."},
+			expectedThoughts: []expectedThought{{content: "Checking whether 3 + 5 is prime by calling the add function first.", source: "commentary"}},
 		},
 		{
-			name:             "summary and commentary/blocking",
-			fixture:          fixtures.OaiResponsesBlockingSummaryAndCommentaryBuiltinTool,
-			expectedThoughts: []string{"I need to add 3 and 5 to check primality.", "Let me calculate the sum first using the add function."},
+			name:    "summary and commentary/blocking",
+			fixture: fixtures.OaiResponsesBlockingSummaryAndCommentaryBuiltinTool,
+			expectedThoughts: []expectedThought{
+				{content: "I need to add 3 and 5 to check primality.", source: "reasoning_summary"},
+				{content: "Let me calculate the sum first using the add function.", source: "commentary"},
+			},
 		},
 		{
-			name:             "summary and commentary/streaming",
-			fixture:          fixtures.OaiResponsesStreamingSummaryAndCommentaryBuiltinTool,
-			expectedThoughts: []string{"I need to add 3 and 5 to check primality.", "Let me calculate the sum first using the add function."},
+			name:    "summary and commentary/streaming",
+			fixture: fixtures.OaiResponsesStreamingSummaryAndCommentaryBuiltinTool,
+			expectedThoughts: []expectedThought{
+				{content: "I need to add 3 and 5 to check primality.", source: "reasoning_summary"},
+				{content: "Let me calculate the sum first using the add function.", source: "commentary"},
+			},
 		},
 		{
 			name:             "parallel tool calls/blocking",
 			fixture:          fixtures.OaiResponsesBlockingSingleBuiltinToolParallel,
-			expectedThoughts: []string{"The user wants two additions"},
+			expectedThoughts: []expectedThought{{content: "The user wants two additions", source: "reasoning_summary"}},
 		},
 		{
 			name:             "parallel tool calls/streaming",
 			fixture:          fixtures.OaiResponsesStreamingSingleBuiltinToolParallel,
-			expectedThoughts: []string{"The user wants two additions"},
+			expectedThoughts: []expectedThought{{content: "The user wants two additions", source: "reasoning_summary"}},
 		},
 		{
 			name:    "no thoughts without tool calls",
@@ -1034,7 +1051,9 @@ func TestResponsesModelThoughts(t *testing.T) {
 						withThoughts++
 						require.Len(t, tu.ModelThoughts, len(tc.expectedThoughts))
 						for i, expected := range tc.expectedThoughts {
-							require.Contains(t, tu.ModelThoughts[i].Content, expected)
+							require.Contains(t, tu.ModelThoughts[i].Content, expected.content)
+							require.Equal(t, expected.source, tu.ModelThoughts[i].Metadata["source"],
+								"thought %d should have source %q", i, expected.source)
 						}
 					}
 				}

From 7fbfb8e3983449217f9d4554ca31bf4bab98aafe Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Fri, 13 Mar 2026 17:24:30 +0200
Subject: [PATCH 12/14] chore: break association between thoughts and tools

Signed-off-by: Danny Kopping <danny@coder.com>
---
 api.go                                     |  1 +
 intercept/messages/base.go                 |  8 ++--
 intercept/messages/blocking.go             | 27 ++++--------
 intercept/messages/streaming.go            | 25 +++--------
 intercept/responses/base.go                | 32 +++++++-------
 intercept/responses/blocking.go            |  1 +
 intercept/responses/injected_tools.go      | 18 ++------
 intercept/responses/streaming.go           |  2 +
 internal/integrationtest/bridge_test.go    | 51 +++++++++++++---------
 internal/integrationtest/responses_test.go | 43 +++++++++---------
 internal/integrationtest/trace_test.go     |  2 +
 internal/testutil/mock_recorder.go         | 16 +++++++
 recorder/recorder.go                       | 34 +++++++++++++++
 recorder/types.go                          | 18 +++++---
 14 files changed, 160 insertions(+), 118 deletions(-)

diff --git a/api.go b/api.go
index acc789ef..e0486d77 100644
--- a/api.go
+++ b/api.go
@@ -30,6 +30,7 @@ type (
 	TokenUsageRecord        = recorder.TokenUsageRecord
 	PromptUsageRecord       = recorder.PromptUsageRecord
 	ToolUsageRecord         = recorder.ToolUsageRecord
+	ModelThoughtRecord      = recorder.ModelThoughtRecord
 	Recorder                = recorder.Recorder
 	Metadata                = recorder.Metadata
 
diff --git a/intercept/messages/base.go b/intercept/messages/base.go
index b50de147..f1a123ad 100644
--- a/intercept/messages/base.go
+++ b/intercept/messages/base.go
@@ -178,10 +178,12 @@ func (i *interceptionBase) extractModelThoughts(msg *anthropic.Message) []*recor
 	for _, block := range msg.Content {
 		switch variant := block.AsAny().(type) {
 		case anthropic.ThinkingBlock:
+			if variant.Thinking == "" {
+				continue
+			}
 			thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{
-				Content:   variant.Thinking,
-				Metadata:  recorder.Metadata{"source": "thinking"},
-				CreatedAt: time.Now(),
+				Content:  variant.Thinking,
+				Metadata: recorder.Metadata{"source": recorder.ThoughtSourceThinking},
 			})
 		}
 		// anthropic.RedactedThinkingBlock also exists, but there's nothing useful we can capture.
diff --git a/intercept/messages/blocking.go b/intercept/messages/blocking.go
index 0c888d0a..6d2ed6f8 100644
--- a/intercept/messages/blocking.go
+++ b/intercept/messages/blocking.go
@@ -136,9 +136,15 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 		accumulateUsage(&cumulativeUsage, resp.Usage)
 
 		// Capture any thinking blocks that were returned.
-		thoughtRecords := i.extractModelThoughts(resp)
+		for _, t := range i.extractModelThoughts(resp) {
+			_ = i.recorder.RecordModelThought(ctx, &recorder.ModelThoughtRecord{
+				InterceptionID: i.ID().String(),
+				Content:        t.Content,
+				Metadata:       t.Metadata,
+			})
+		}
 
-		// Handle tool calls for non-streaming.
+		// Handle tool calls.
 		var pendingToolCalls []anthropic.ToolUseBlock
 		for _, c := range resp.Content {
 			toolUse := c.AsToolUse()
@@ -159,15 +165,7 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 				Tool:           toolUse.Name,
 				Args:           toolUse.Input,
 				Injected:       false,
-				ModelThoughts:  thoughtRecords,
 			})
-
-			// Clear after first use to avoid duplicating across
-			// multiple tool calls in the same message.
-			//
-			// This effectively means that in the case of parallel tool calls
-			// the thoughts will only be associated to the first tool use which is fine.
-			thoughtRecords = nil
 		}
 
 		// If no injected tool calls, we're done.
@@ -206,17 +204,8 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 				Args:            tc.Input,
 				Injected:        true,
 				InvocationError: err,
-				ModelThoughts:   thoughtRecords,
 			})
 
-			// Clear after first use to avoid duplicating across
-			// multiple tool calls in the same message.
-			//
-			// This is not strictly needed for injected tools since we
-			// disable parallel tool calls, but just adding this here
-			// for defensiveness.
-			thoughtRecords = nil
-
 			if err != nil {
 				// Always provide a tool_result even if the tool call failed
 				messages.Messages = append(messages.Messages,
diff --git a/intercept/messages/streaming.go b/intercept/messages/streaming.go
index 878565c3..595cfe44 100644
--- a/intercept/messages/streaming.go
+++ b/intercept/messages/streaming.go
@@ -254,7 +254,13 @@ newStream:
 			case string(constant.ValueOf[constant.MessageStop]()):
 
 				// Capture any thinking blocks that were returned.
-				thoughtRecords := i.extractModelThoughts(&message)
+				for _, t := range i.extractModelThoughts(&message) {
+					_ = i.recorder.RecordModelThought(ctx, &recorder.ModelThoughtRecord{
+						InterceptionID: i.ID().String(),
+						Content:        t.Content,
+						Metadata:       t.Metadata,
+					})
+				}
 
 				// Process injected tools.
 				if len(pendingToolCalls) > 0 {
@@ -309,17 +315,8 @@ newStream:
 							Args:            input,
 							Injected:        true,
 							InvocationError: err,
-							ModelThoughts:   thoughtRecords,
 						})
 
-						// Clear after first use to avoid duplicating across
-						// multiple tool calls in the same message.
-						//
-						// This is not strictly needed for injected tools since we
-						// disable parallel tool calls, but just adding this here
-						// for defensiveness.
-						thoughtRecords = nil
-
 						if err != nil {
 							// Always provide a tool_result even if the tool call failed
 							messages.Messages = append(messages.Messages,
@@ -430,15 +427,7 @@ newStream:
 								Tool:           variant.Name,
 								Args:           variant.Input,
 								Injected:       false,
-								ModelThoughts:  thoughtRecords,
 							})
-
-							// Clear after first use to avoid duplicating across
-							// multiple tool calls in the same message.
-							//
-							// This effectively means that in the case of parallel tool calls
-							// the thoughts will only be associated to the first tool use which is fine.
-							thoughtRecords = nil
 						}
 					}
 				}
diff --git a/intercept/responses/base.go b/intercept/responses/base.go
index 12cddebf..59a14b02 100644
--- a/intercept/responses/base.go
+++ b/intercept/responses/base.go
@@ -254,15 +254,22 @@ func (i *responsesInterceptionBase) recordUserPrompt(ctx context.Context, respon
 	}
 }
 
+func (i *responsesInterceptionBase) recordModelThoughts(ctx context.Context, response *responses.Response) {
+	for _, t := range i.extractModelThoughts(response) {
+		_ = i.recorder.RecordModelThought(ctx, &recorder.ModelThoughtRecord{
+			InterceptionID: i.ID().String(),
+			Content:        t.Content,
+			Metadata:       t.Metadata,
+		})
+	}
+}
+
 func (i *responsesInterceptionBase) recordNonInjectedToolUsage(ctx context.Context, response *responses.Response) {
 	if response == nil {
 		i.logger.Warn(ctx, "got empty response, skipping tool usage recording")
 		return
 	}
 
-	// Capture any reasoning items from the response output as model thoughts.
-	thoughtRecords := i.extractModelThoughts(response)
-
 	for _, item := range response.Output {
 		var args recorder.ToolArgs
 
@@ -283,17 +290,9 @@ func (i *responsesInterceptionBase) recordNonInjectedToolUsage(ctx context.Conte
 			Tool:           item.Name,
 			Args:           args,
 			Injected:       false,
-			ModelThoughts:  thoughtRecords,
 		}); err != nil {
 			i.logger.Warn(ctx, "failed to record tool usage", slog.Error(err), slog.F("tool", item.Name))
 		}
-
-		// Clear after first use to avoid duplicating across
-		// multiple tool calls in the same message.
-		//
-		// This effectively means that in the case of parallel tool calls
-		// the thoughts will only be associated to the first tool use which is fine.
-		thoughtRecords = nil
 	}
 }
 
@@ -355,9 +354,8 @@ func (i *responsesInterceptionBase) extractModelThoughts(response *responses.Res
 					continue
 				}
 				thoughts = append(thoughts, &recorder.ModelThoughtRecord{
-					Content:   summary.Text,
-					Metadata:  recorder.Metadata{"source": "reasoning_summary"},
-					CreatedAt: time.Now(),
+					Content:  summary.Text,
+					Metadata: recorder.Metadata{"source": recorder.ThoughtSourceReasoningSummary},
 				})
 			}
 
@@ -365,6 +363,7 @@ func (i *responsesInterceptionBase) extractModelThoughts(response *responses.Res
 			// The API sometimes returns commentary messages instead of reasoning
 			// summaries. These are assistant message output items with "phase": "commentary".
 			// The SDK doesn't expose a Phase field, so we extract it from raw JSON.
+			// TODO: revisit when the OpenAI SDK adds a proper Phase field.
 			raw := item.RawJSON()
 			if gjson.Get(raw, "role").String() != string(constant.ValueOf[constant.Assistant]()) ||
 				gjson.Get(raw, "phase").String() != "commentary" {
@@ -379,9 +378,8 @@ func (i *responsesInterceptionBase) extractModelThoughts(response *responses.Res
 					continue
 				}
 				thoughts = append(thoughts, &recorder.ModelThoughtRecord{
-					Content:   part.Text,
-					Metadata:  recorder.Metadata{"source": "commentary"},
-					CreatedAt: time.Now(),
+					Content:  part.Text,
+					Metadata: recorder.Metadata{"source": recorder.ThoughtSourceCommentary},
 				})
 			}
 		}
diff --git a/intercept/responses/blocking.go b/intercept/responses/blocking.go
index 0c11a541..48491e43 100644
--- a/intercept/responses/blocking.go
+++ b/intercept/responses/blocking.go
@@ -94,6 +94,7 @@ func (i *BlockingResponsesInterceptor) ProcessRequest(w http.ResponseWriter, r *
 		}
 
 		i.recordTokenUsage(ctx, response)
+		i.recordModelThoughts(ctx, response)
 
 		// Check if there any injected tools to invoke.
 		pending := i.getPendingInjectedToolCalls(response)
diff --git a/intercept/responses/injected_tools.go b/intercept/responses/injected_tools.go
index 9f813720..e3720230 100644
--- a/intercept/responses/injected_tools.go
+++ b/intercept/responses/injected_tools.go
@@ -109,20 +109,9 @@ func (i *responsesInterceptionBase) handleInjectedToolCalls(ctx context.Context,
 		return nil, nil
 	}
 
-	// Capture any reasoning items from the response output as model thoughts.
-	thoughtRecords := i.extractModelThoughts(response)
-
 	var results []responses.ResponseInputItemUnionParam
 	for _, fc := range pending {
-		results = append(results, i.invokeInjectedTool(ctx, response.ID, fc, thoughtRecords))
-
-		// Clear after first use to avoid duplicating across
-		// multiple tool calls in the same message.
-		//
-		// This is not strictly needed for injected tools since we
-		// disable parallel tool calls, but just adding this here
-		// for defensiveness.
-		thoughtRecords = nil
+		results = append(results, i.invokeInjectedTool(ctx, response.ID, fc))
 	}
 
 	return results, nil
@@ -182,7 +171,7 @@ func (i *responsesInterceptionBase) prepareRequestForAgenticLoop(ctx context.Con
 	return nil
 }
 
-// getPendingInjectedToolCalls extracts function calls from the response that are managed by MCP proxy
+// getPendingInjectedToolCalls extracts function calls from the response that are managed by MCP proxy.
 func (i *responsesInterceptionBase) getPendingInjectedToolCalls(response *responses.Response) []responses.ResponseFunctionToolCall {
 	var calls []responses.ResponseFunctionToolCall
 
@@ -207,7 +196,7 @@ func (i *responsesInterceptionBase) getPendingInjectedToolCalls(response *respon
 	return calls
 }
 
-func (i *responsesInterceptionBase) invokeInjectedTool(ctx context.Context, responseID string, fc responses.ResponseFunctionToolCall, thoughtRecords []*recorder.ModelThoughtRecord) responses.ResponseInputItemUnionParam {
+func (i *responsesInterceptionBase) invokeInjectedTool(ctx context.Context, responseID string, fc responses.ResponseFunctionToolCall) responses.ResponseInputItemUnionParam {
 	tool := i.mcpProxy.GetTool(fc.Name)
 	if tool == nil {
 		return responses.ResponseInputItemParamOfFunctionCallOutput(fc.CallID, fmt.Sprintf("error: unknown injected function %q", fc.ID))
@@ -224,7 +213,6 @@ func (i *responsesInterceptionBase) invokeInjectedTool(ctx context.Context, resp
 		Args:            args,
 		Injected:        true,
 		InvocationError: err,
-		ModelThoughts:   thoughtRecords,
 	})
 
 	var output string
diff --git a/intercept/responses/streaming.go b/intercept/responses/streaming.go
index 38f5771b..32ee1f02 100644
--- a/intercept/responses/streaming.go
+++ b/intercept/responses/streaming.go
@@ -172,6 +172,8 @@ func (i *StreamingResponsesInterceptor) ProcessRequest(w http.ResponseWriter, r
 			// Record token usage for each inner loop iteration
 			i.recordTokenUsage(ctx, completedResponse)
 		}
+
+		i.recordModelThoughts(ctx, completedResponse)
 	}
 
 	if promptFound {
diff --git a/internal/integrationtest/bridge_test.go b/internal/integrationtest/bridge_test.go
index 4a424f32..9ceafe0d 100644
--- a/internal/integrationtest/bridge_test.go
+++ b/internal/integrationtest/bridge_test.go
@@ -131,7 +131,7 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 		name             string
 		streaming        bool
 		fixture          []byte
-		expectedThoughts []string // nil means no tool usages expected at all
+		expectedThoughts []string // nil means no model thoughts expected
 	}{
 		{
 			name:             "single thinking block/streaming",
@@ -170,9 +170,16 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 			expectedThoughts: []string{"The user wants me to read two files"},
 		},
 		{
-			name:      "no thoughts without tool calls",
-			streaming: true,
-			fixture:   fixtures.AntSimple, // This fixture contains thoughts, but they're not associated with tool calls.
+			name:             "thoughts without tool calls/streaming",
+			streaming:        true,
+			fixture:          fixtures.AntSimple,
+			expectedThoughts: []string{"This is a classic philosophical question about medieval scholasticism"},
+		},
+		{
+			name:             "thoughts without tool calls/blocking",
+			streaming:        false,
+			fixture:          fixtures.AntSimple,
+			expectedThoughts: []string{"This is a classic philosophical question about medieval scholasticism"},
 		},
 	}
 
@@ -200,28 +207,30 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 				assert.Contains(t, sp.AllEvents(), "message_stop")
 			}
 
-			toolUsages := bridgeServer.Recorder.RecordedToolUsages()
+			interceptions := bridgeServer.Recorder.RecordedInterceptions()
+			require.GreaterOrEqual(t, len(interceptions), 1)
+
+			thoughts := bridgeServer.Recorder.RecordedModelThoughts()
 			if tc.expectedThoughts == nil {
-				assert.Empty(t, toolUsages)
+				assert.Empty(t, thoughts)
 			} else {
-				require.NotEmpty(t, toolUsages)
-
-				// Exactly one tool usage should have the expected thoughts;
-				// all others should have none.
-				var withThoughts int
-				for _, tu := range toolUsages {
-					assert.Equal(t, "Read", tu.Tool)
-					if len(tu.ModelThoughts) > 0 {
-						withThoughts++
-						require.Len(t, tu.ModelThoughts, len(tc.expectedThoughts))
-						for i, expected := range tc.expectedThoughts {
-							assert.Contains(t, tu.ModelThoughts[i].Content, expected)
-							assert.Equal(t, "thinking", tu.ModelThoughts[i].Metadata["source"],
-								"thought %d should have source \"thinking\"", i)
+				require.Len(t, thoughts, len(tc.expectedThoughts), "unexpected number of model thoughts")
+
+				// We can't guarantee the order of model thoughts since they're recorded separately, so
+				// we have to scan all thoughts for a match.
+
+				for _, expected := range tc.expectedThoughts {
+					var matched *aibridge.ModelThoughtRecord
+					for _, thought := range thoughts {
+						if strings.Contains(thought.Content, expected) {
+							matched = thought
 						}
 					}
+
+					require.NotNil(t, matched, "could not find thought matching %q", expected)
+					require.Equal(t, interceptions[0].ID, matched.InterceptionID)
+					require.Equal(t, "thinking", matched.Metadata["source"])
 				}
-				assert.Equal(t, 1, withThoughts, "expected exactly one tool usage with model thoughts")
 			}
 
 			bridgeServer.Recorder.VerifyAllInterceptionsEnded(t)
diff --git a/internal/integrationtest/responses_test.go b/internal/integrationtest/responses_test.go
index 2cff3e63..358175a9 100644
--- a/internal/integrationtest/responses_test.go
+++ b/internal/integrationtest/responses_test.go
@@ -10,6 +10,7 @@ import (
 	"net/http/httptest"
 	"slices"
 	"strconv"
+	"strings"
 	"sync"
 	"testing"
 	"time"
@@ -374,7 +375,6 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
 				require.Len(t, recordedTools, 1)
 				recordedTools[0].InterceptionID = tc.expectToolRecorded.InterceptionID // ignore interception id (interception id is not constant and response doesn't contain it)
 				recordedTools[0].CreatedAt = tc.expectToolRecorded.CreatedAt           // ignore time
-				recordedTools[0].ModelThoughts = tc.expectToolRecorded.ModelThoughts   // ignore model thoughts (tested separately)
 				require.Equal(t, tc.expectToolRecorded, recordedTools[0])
 			} else {
 				require.Empty(t, recordedTools)
@@ -1013,8 +1013,9 @@ func TestResponsesModelThoughts(t *testing.T) {
 			expectedThoughts: []expectedThought{{content: "The user wants two additions", source: "reasoning_summary"}},
 		},
 		{
-			name:    "no thoughts without tool calls",
-			fixture: fixtures.OaiResponsesStreamingCodex, // This fixture contains reasoning, but it's not associated with tool calls.
+			name:             "thoughts without tool calls",
+			fixture:          fixtures.OaiResponsesStreamingCodex, // This fixture contains reasoning, but it's not associated with tool calls.
+			expectedThoughts: []expectedThought{{content: "Preparing simple response", source: "reasoning_summary"}},
 		},
 	}
 
@@ -1036,28 +1037,30 @@ func TestResponsesModelThoughts(t *testing.T) {
 			_, err := io.ReadAll(resp.Body)
 			require.NoError(t, err)
 
-			toolUsages := bridgeServer.Recorder.RecordedToolUsages()
+			interceptions := bridgeServer.Recorder.RecordedInterceptions()
+			require.GreaterOrEqual(t, len(interceptions), 1)
+
+			thoughts := bridgeServer.Recorder.RecordedModelThoughts()
 			if tc.expectedThoughts == nil {
-				require.Empty(t, toolUsages)
+				assert.Empty(t, thoughts)
 			} else {
-				require.NotEmpty(t, toolUsages)
-
-				// Exactly one tool usage should have the expected thoughts;
-				// all others should have none.
-				var withThoughts int
-				for _, tu := range toolUsages {
-					require.Equal(t, "add", tu.Tool)
-					if len(tu.ModelThoughts) > 0 {
-						withThoughts++
-						require.Len(t, tu.ModelThoughts, len(tc.expectedThoughts))
-						for i, expected := range tc.expectedThoughts {
-							require.Contains(t, tu.ModelThoughts[i].Content, expected.content)
-							require.Equal(t, expected.source, tu.ModelThoughts[i].Metadata["source"],
-								"thought %d should have source %q", i, expected.source)
+				require.Len(t, thoughts, len(tc.expectedThoughts), "unexpected number of model thoughts")
+
+				// We can't guarantee the order of model thoughts since they're recorded separately, so
+				// we have to scan all thoughts for a match.
+
+				for _, expected := range tc.expectedThoughts {
+					var matched *aibridge.ModelThoughtRecord
+					for _, thought := range thoughts {
+						if strings.Contains(thought.Content, expected.content) {
+							matched = thought
 						}
 					}
+
+					require.NotNil(t, matched, "could not find thought matching %q", expected)
+					require.Equal(t, interceptions[0].ID, matched.InterceptionID)
+					require.Equal(t, expected.source, matched.Metadata["source"])
 				}
-				require.Equal(t, 1, withThoughts, "expected exactly one tool usage with model thoughts")
 			}
 		})
 	}
diff --git a/internal/integrationtest/trace_test.go b/internal/integrationtest/trace_test.go
index 88bec31c..bdfb7f7f 100644
--- a/internal/integrationtest/trace_test.go
+++ b/internal/integrationtest/trace_test.go
@@ -51,6 +51,7 @@ func TestTraceAnthropic(t *testing.T) {
 		{"Intercept.RecordPromptUsage", 1, codes.Unset},
 		{"Intercept.RecordTokenUsage", 1, codes.Unset},
 		{"Intercept.RecordToolUsage", 1, codes.Unset},
+		{"Intercept.RecordModelThought", 1, codes.Unset},
 		{"Intercept.ProcessRequest.Upstream", 1, codes.Unset},
 	}
 
@@ -63,6 +64,7 @@ func TestTraceAnthropic(t *testing.T) {
 		{"Intercept.RecordPromptUsage", 1, codes.Unset},
 		{"Intercept.RecordTokenUsage", 2, codes.Unset},
 		{"Intercept.RecordToolUsage", 1, codes.Unset},
+		{"Intercept.RecordModelThought", 1, codes.Unset},
 		{"Intercept.ProcessRequest.Upstream", 1, codes.Unset},
 	}
 
diff --git a/internal/testutil/mock_recorder.go b/internal/testutil/mock_recorder.go
index 09bcac39..5cd4420f 100644
--- a/internal/testutil/mock_recorder.go
+++ b/internal/testutil/mock_recorder.go
@@ -20,6 +20,7 @@ type MockRecorder struct {
 	tokenUsages      []*recorder.TokenUsageRecord
 	userPrompts      []*recorder.PromptUsageRecord
 	toolUsages       []*recorder.ToolUsageRecord
+	modelThoughts    []*recorder.ModelThoughtRecord
 	interceptionsEnd map[string]*recorder.InterceptionRecordEnded
 }
 
@@ -64,6 +65,13 @@ func (m *MockRecorder) RecordToolUsage(ctx context.Context, req *recorder.ToolUs
 	return nil
 }
 
+func (m *MockRecorder) RecordModelThought(ctx context.Context, req *recorder.ModelThoughtRecord) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.modelThoughts = append(m.modelThoughts, req)
+	return nil
+}
+
 // RecordedTokenUsages returns a copy of recorded token usages in a thread-safe manner.
 // Note: This is a shallow clone - the slice is copied but the pointers reference the
 // same underlying records. This is sufficient for our test assertions which only read
@@ -112,6 +120,14 @@ func (m *MockRecorder) RecordedToolUsages() []*recorder.ToolUsageRecord {
 	return slices.Clone(m.toolUsages)
 }
 
+// RecordedModelThoughts returns a copy of recorded model thoughts in a thread-safe manner.
+// Note: This is a shallow clone (see RecordedTokenUsages for details).
+func (m *MockRecorder) RecordedModelThoughts() []*recorder.ModelThoughtRecord {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	return slices.Clone(m.modelThoughts)
+}
+
 // RecordedInterceptions returns a copy of recorded interceptions in a thread-safe manner.
 // Note: This is a shallow clone (see RecordedTokenUsages for details).
 func (m *MockRecorder) RecordedInterceptions() []*recorder.InterceptionRecord {
diff --git a/recorder/recorder.go b/recorder/recorder.go
index 6e37b632..c4f427c5 100644
--- a/recorder/recorder.go
+++ b/recorder/recorder.go
@@ -116,6 +116,24 @@ func (r *RecorderWrapper) RecordToolUsage(ctx context.Context, req *ToolUsageRec
 	return err
 }
 
+func (r *RecorderWrapper) RecordModelThought(ctx context.Context, req *ModelThoughtRecord) (outErr error) {
+	ctx, span := r.tracer.Start(ctx, "Intercept.RecordModelThought", trace.WithAttributes(tracing.InterceptionAttributesFromContext(ctx)...))
+	defer tracing.EndSpanErr(span, &outErr)
+
+	client, err := r.clientFn()
+	if err != nil {
+		return fmt.Errorf("acquire client: %w", err)
+	}
+
+	req.CreatedAt = time.Now()
+	if err = client.RecordModelThought(ctx, req); err == nil {
+		return nil
+	}
+
+	r.logger.Warn(ctx, "failed to record model thought", slog.Error(err), slog.F("interception_id", req.InterceptionID))
+	return err
+}
+
 func NewRecorder(logger slog.Logger, tracer trace.Tracer, clientFn func() (Recorder, error)) *RecorderWrapper {
 	return &RecorderWrapper{
 		logger:   logger,
@@ -259,6 +277,22 @@ func (a *AsyncRecorder) RecordToolUsage(ctx context.Context, req *ToolUsageRecor
 	return nil // Caller is not interested in error.
 }
 
+func (a *AsyncRecorder) RecordModelThought(ctx context.Context, req *ModelThoughtRecord) error {
+	a.wg.Add(1)
+	go func() {
+		defer a.wg.Done()
+		timedCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), a.timeout)
+		defer cancel()
+
+		err := a.wrapped.RecordModelThought(timedCtx, req)
+		if err != nil {
+			a.logger.Warn(timedCtx, "failed to record model thought", slog.F("type", "model_thought"), slog.Error(err), slog.F("payload", req))
+		}
+	}()
+
+	return nil // Caller is not interested in error.
+}
+
 func (a *AsyncRecorder) Wait() {
 	a.wg.Wait()
 }
diff --git a/recorder/types.go b/recorder/types.go
index d3cbaf73..20e735f4 100644
--- a/recorder/types.go
+++ b/recorder/types.go
@@ -18,8 +18,9 @@ type Recorder interface {
 	// RecordPromptUsage records the prompts used in an interception with an upstream AI provider.
 	RecordPromptUsage(ctx context.Context, req *PromptUsageRecord) error
 	// RecordToolUsage records the tools used in an interception with an upstream AI provider.
-	// Any associated model thoughts should be included in the ToolUsageRecord.
 	RecordToolUsage(ctx context.Context, req *ToolUsageRecord) error
+	// RecordModelThought records model thoughts produced in an interception with an upstream AI provider.
+	RecordModelThought(ctx context.Context, req *ModelThoughtRecord) error
 }
 
 type ToolArgs any
@@ -73,11 +74,18 @@ type ToolUsageRecord struct {
 	InvocationError error
 	Metadata        Metadata
 	CreatedAt       time.Time
-	ModelThoughts   []*ModelThoughtRecord
 }
 
+// Model thought source constants.
+const (
+	ThoughtSourceThinking         = "thinking"
+	ThoughtSourceReasoningSummary = "reasoning_summary"
+	ThoughtSourceCommentary       = "commentary"
+)
+
 type ModelThoughtRecord struct {
-	Content   string
-	Metadata  Metadata
-	CreatedAt time.Time
+	InterceptionID string
+	Content        string
+	Metadata       Metadata
+	CreatedAt      time.Time
 }

From 33cec2a0feb47178b478a4f68de27c4b5775d5ff Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Mon, 16 Mar 2026 15:35:51 +0200
Subject: [PATCH 13/14] chore: expand trace testing

Signed-off-by: Danny Kopping <danny@coder.com>
---
 internal/integrationtest/trace_test.go | 85 +++++++++++++++++++++++---
 1 file changed, 78 insertions(+), 7 deletions(-)

diff --git a/internal/integrationtest/trace_test.go b/internal/integrationtest/trace_test.go
index bdfb7f7f..e9b27d64 100644
--- a/internal/integrationtest/trace_test.go
+++ b/internal/integrationtest/trace_test.go
@@ -70,36 +70,70 @@ func TestTraceAnthropic(t *testing.T) {
 
 	cases := []struct {
 		name      string
+		fixture   []byte
 		streaming bool
 		bedrock   bool
 		expect    []expectTrace
 	}{
 		{
-			name:   "trace_anthr_non_streaming",
-			expect: expectNonStreaming,
+			name:    "trace_anthr_non_streaming",
+			expect:  expectNonStreaming,
+			fixture: fixtures.AntSingleBuiltinTool,
 		},
 		{
 			name:    "trace_bedrock_non_streaming",
 			bedrock: true,
 			expect:  expectNonStreaming,
+			fixture: fixtures.AntSingleBuiltinTool,
 		},
 		{
 			name:      "trace_anthr_streaming",
 			streaming: true,
 			expect:    expectStreaming,
+			fixture:   fixtures.AntSingleBuiltinTool,
 		},
 		{
 			name:      "trace_bedrock_streaming",
 			streaming: true,
 			bedrock:   true,
 			expect:    expectStreaming,
+			fixture:   fixtures.AntSingleBuiltinTool,
+		},
+		{
+			name:    "trace_multi_thinking_non_streaming",
+			fixture: fixtures.AntMultiThinkingBuiltinTool,
+			expect: []expectTrace{
+				{"Intercept", 1, codes.Unset},
+				{"Intercept.CreateInterceptor", 1, codes.Unset},
+				{"Intercept.RecordInterception", 1, codes.Unset},
+				{"Intercept.ProcessRequest", 1, codes.Unset},
+				{"Intercept.RecordInterceptionEnded", 1, codes.Unset},
+				{"Intercept.RecordPromptUsage", 1, codes.Unset},
+				{"Intercept.RecordTokenUsage", 1, codes.Unset},
+				{"Intercept.RecordToolUsage", 1, codes.Unset},
+				{"Intercept.RecordModelThought", 2, codes.Unset},
+				{"Intercept.ProcessRequest.Upstream", 1, codes.Unset},
+			},
+		},
+		{
+			name:      "trace_multi_thinking_streaming",
+			fixture:   fixtures.AntMultiThinkingBuiltinTool,
+			streaming: true,
+			expect: []expectTrace{
+				{"Intercept", 1, codes.Unset},
+				{"Intercept.CreateInterceptor", 1, codes.Unset},
+				{"Intercept.RecordInterception", 1, codes.Unset},
+				{"Intercept.ProcessRequest", 1, codes.Unset},
+				{"Intercept.RecordInterceptionEnded", 1, codes.Unset},
+				{"Intercept.RecordPromptUsage", 1, codes.Unset},
+				{"Intercept.RecordTokenUsage", 2, codes.Unset},
+				{"Intercept.RecordToolUsage", 1, codes.Unset},
+				{"Intercept.RecordModelThought", 2, codes.Unset},
+				{"Intercept.ProcessRequest.Upstream", 1, codes.Unset},
+			},
 		},
 	}
 
-	fix := fixtures.Parse(t, fixtures.AntSingleBuiltinTool)
-
-	fixtureReqBody := fix.Request()
-
 	for _, tc := range cases {
 		t.Run(tc.name, func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
@@ -107,6 +141,7 @@ func TestTraceAnthropic(t *testing.T) {
 
 			sr, tracer := setupTracer(t)
 
+			fix := fixtures.Parse(t, tc.fixture)
 			upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
 
 			opts := []bridgeOption{
@@ -117,7 +152,7 @@ func TestTraceAnthropic(t *testing.T) {
 			}
 			bridgeServer := newBridgeTestServer(t, ctx, upstream.URL, opts...)
 
-			reqBody, err := sjson.SetBytes(fixtureReqBody, "stream", tc.streaming)
+			reqBody, err := sjson.SetBytes(fix.Request(), "stream", tc.streaming)
 			require.NoError(t, err)
 			resp := bridgeServer.makeRequest(t, http.MethodPost, pathAnthropicMessages, reqBody)
 			require.Equal(t, http.StatusOK, resp.StatusCode)
@@ -453,6 +488,42 @@ func TestTraceOpenAI(t *testing.T) {
 				{"Intercept.ProcessRequest.Upstream", 1, codes.Unset},
 			},
 		},
+		{
+			name:      "trace_openai_responses_streaming_with_reasoning",
+			fixture:   fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool,
+			streaming: true,
+			path:      pathOpenAIResponses,
+			expect: []expectTrace{
+				{"Intercept", 1, codes.Unset},
+				{"Intercept.CreateInterceptor", 1, codes.Unset},
+				{"Intercept.RecordInterception", 1, codes.Unset},
+				{"Intercept.ProcessRequest", 1, codes.Unset},
+				{"Intercept.RecordInterceptionEnded", 1, codes.Unset},
+				{"Intercept.RecordPromptUsage", 1, codes.Unset},
+				{"Intercept.RecordTokenUsage", 1, codes.Unset},
+				{"Intercept.RecordToolUsage", 1, codes.Unset},
+				{"Intercept.RecordModelThought", 2, codes.Unset},
+				{"Intercept.ProcessRequest.Upstream", 1, codes.Unset},
+			},
+		},
+		{
+			name:      "trace_openai_responses_blocking_with_reasoning",
+			fixture:   fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool,
+			streaming: false,
+			path:      pathOpenAIResponses,
+			expect: []expectTrace{
+				{"Intercept", 1, codes.Unset},
+				{"Intercept.CreateInterceptor", 1, codes.Unset},
+				{"Intercept.RecordInterception", 1, codes.Unset},
+				{"Intercept.ProcessRequest", 1, codes.Unset},
+				{"Intercept.RecordInterceptionEnded", 1, codes.Unset},
+				{"Intercept.RecordPromptUsage", 1, codes.Unset},
+				{"Intercept.RecordTokenUsage", 1, codes.Unset},
+				{"Intercept.RecordToolUsage", 1, codes.Unset},
+				{"Intercept.RecordModelThought", 2, codes.Unset},
+				{"Intercept.ProcessRequest.Upstream", 1, codes.Unset},
+			},
+		},
 	}
 
 	for _, tc := range cases {

From 92078562c87329b4e51f327230b245cccca75265 Mon Sep 17 00:00:00 2001
From: Danny Kopping <danny@coder.com>
Date: Mon, 16 Mar 2026 16:56:41 +0200
Subject: [PATCH 14/14] chore: refactor model thought assertions into common
 func

Signed-off-by: Danny Kopping <danny@coder.com>
---
 internal/integrationtest/bridge_test.go    | 69 +++++++++-----------
 internal/integrationtest/helpers.go        | 10 +++
 internal/integrationtest/responses_test.go | 73 +++++++---------------
 internal/testutil/mock_recorder.go         | 26 ++++++++
 4 files changed, 86 insertions(+), 92 deletions(-)

diff --git a/internal/integrationtest/bridge_test.go b/internal/integrationtest/bridge_test.go
index 9ceafe0d..a2a746e3 100644
--- a/internal/integrationtest/bridge_test.go
+++ b/internal/integrationtest/bridge_test.go
@@ -131,55 +131,67 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 		name             string
 		streaming        bool
 		fixture          []byte
-		expectedThoughts []string // nil means no model thoughts expected
+		expectedThoughts []recorder.ModelThoughtRecord // nil means no model thoughts expected
 	}{
 		{
 			name:             "single thinking block/streaming",
 			streaming:        true,
 			fixture:          fixtures.AntSingleBuiltinTool,
-			expectedThoughts: []string{"The user wants me to read"},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("The user wants me to read", recorder.ThoughtSourceThinking)},
 		},
 		{
 			name:             "single thinking block/blocking",
 			streaming:        false,
 			fixture:          fixtures.AntSingleBuiltinTool,
-			expectedThoughts: []string{"The user wants me to read"},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("The user wants me to read", recorder.ThoughtSourceThinking)},
 		},
 		{
-			name:             "multiple thinking blocks/streaming",
-			streaming:        true,
-			fixture:          fixtures.AntMultiThinkingBuiltinTool,
-			expectedThoughts: []string{"The user wants me to read", "I should use the Read tool"},
+			name:      "multiple thinking blocks/streaming",
+			streaming: true,
+			fixture:   fixtures.AntMultiThinkingBuiltinTool,
+			expectedThoughts: []recorder.ModelThoughtRecord{
+				newModelThought("The user wants me to read", recorder.ThoughtSourceThinking),
+				newModelThought("I should use the Read tool", recorder.ThoughtSourceThinking),
+			},
 		},
 		{
-			name:             "multiple thinking blocks/blocking",
-			streaming:        false,
-			fixture:          fixtures.AntMultiThinkingBuiltinTool,
-			expectedThoughts: []string{"The user wants me to read", "I should use the Read tool"},
+			name:      "multiple thinking blocks/blocking",
+			streaming: false,
+			fixture:   fixtures.AntMultiThinkingBuiltinTool,
+			expectedThoughts: []recorder.ModelThoughtRecord{
+				newModelThought("The user wants me to read", recorder.ThoughtSourceThinking),
+				newModelThought("I should use the Read tool", recorder.ThoughtSourceThinking),
+			},
 		},
 		{
 			name:             "parallel tool calls/streaming",
 			streaming:        true,
 			fixture:          fixtures.AntSingleBuiltinToolParallel,
-			expectedThoughts: []string{"The user wants me to read two files"},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("The user wants me to read two files", recorder.ThoughtSourceThinking)},
 		},
 		{
 			name:             "parallel tool calls/blocking",
 			streaming:        false,
 			fixture:          fixtures.AntSingleBuiltinToolParallel,
-			expectedThoughts: []string{"The user wants me to read two files"},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("The user wants me to read two files", recorder.ThoughtSourceThinking)},
 		},
 		{
 			name:             "thoughts without tool calls/streaming",
 			streaming:        true,
 			fixture:          fixtures.AntSimple,
-			expectedThoughts: []string{"This is a classic philosophical question about medieval scholasticism"},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("This is a classic philosophical question about medieval scholasticism", recorder.ThoughtSourceThinking)},
 		},
 		{
 			name:             "thoughts without tool calls/blocking",
 			streaming:        false,
 			fixture:          fixtures.AntSimple,
-			expectedThoughts: []string{"This is a classic philosophical question about medieval scholasticism"},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("This is a classic philosophical question about medieval scholasticism", recorder.ThoughtSourceThinking)},
+		},
+		{
+			name:             "no thoughts captured",
+			streaming:        false,
+			fixture:          fixtures.AntSingleInjectedTool,
+			expectedThoughts: nil,
 		},
 	}
 
@@ -207,32 +219,7 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) {
 				assert.Contains(t, sp.AllEvents(), "message_stop")
 			}
 
-			interceptions := bridgeServer.Recorder.RecordedInterceptions()
-			require.GreaterOrEqual(t, len(interceptions), 1)
-
-			thoughts := bridgeServer.Recorder.RecordedModelThoughts()
-			if tc.expectedThoughts == nil {
-				assert.Empty(t, thoughts)
-			} else {
-				require.Len(t, thoughts, len(tc.expectedThoughts), "unexpected number of model thoughts")
-
-				// We can't guarantee the order of model thoughts since they're recorded separately, so
-				// we have to scan all thoughts for a match.
-
-				for _, expected := range tc.expectedThoughts {
-					var matched *aibridge.ModelThoughtRecord
-					for _, thought := range thoughts {
-						if strings.Contains(thought.Content, expected) {
-							matched = thought
-						}
-					}
-
-					require.NotNil(t, matched, "could not find thought matching %q", expected)
-					require.Equal(t, interceptions[0].ID, matched.InterceptionID)
-					require.Equal(t, "thinking", matched.Metadata["source"])
-				}
-			}
-
+			bridgeServer.Recorder.VerifyModelThoughtsRecorded(t, tc.expectedThoughts)
 			bridgeServer.Recorder.VerifyAllInterceptionsEnded(t)
 		})
 	}
diff --git a/internal/integrationtest/helpers.go b/internal/integrationtest/helpers.go
index 84bd64d5..038e6335 100644
--- a/internal/integrationtest/helpers.go
+++ b/internal/integrationtest/helpers.go
@@ -6,6 +6,7 @@ import (
 	"cdr.dev/slog/v3"
 	"cdr.dev/slog/v3/sloggers/slogtest"
 	"github.com/coder/aibridge/config"
+	"github.com/coder/aibridge/recorder"
 )
 
 // anthropicCfg creates a minimal Anthropic config for testing.
@@ -53,3 +54,12 @@ func newLogger(t *testing.T) slog.Logger {
 	t.Helper()
 	return slogtest.Make(t, &slogtest.Options{}).Leveled(slog.LevelDebug)
 }
+
+func newModelThought(content, source string) recorder.ModelThoughtRecord {
+	return recorder.ModelThoughtRecord{
+		Content: content,
+		Metadata: recorder.Metadata{
+			"source": source,
+		},
+	}
+}
diff --git a/internal/integrationtest/responses_test.go b/internal/integrationtest/responses_test.go
index 358175a9..43e09023 100644
--- a/internal/integrationtest/responses_test.go
+++ b/internal/integrationtest/responses_test.go
@@ -10,7 +10,6 @@ import (
 	"net/http/httptest"
 	"slices"
 	"strconv"
-	"strings"
 	"sync"
 	"testing"
 	"time"
@@ -940,82 +939,77 @@ func TestResponsesInjectedTool(t *testing.T) {
 func TestResponsesModelThoughts(t *testing.T) {
 	t.Parallel()
 
-	type expectedThought struct {
-		content string
-		source  string // "reasoning_summary" or "commentary"
-	}
-
 	cases := []struct {
 		name             string
 		fixture          []byte
-		expectedThoughts []expectedThought // nil means no tool usages expected at all
+		expectedThoughts []recorder.ModelThoughtRecord // nil means no tool usages expected at all
 	}{
 		{
 			name:             "single reasoning/blocking",
 			fixture:          fixtures.OaiResponsesBlockingSingleBuiltinTool,
-			expectedThoughts: []expectedThought{{content: "The user wants to add 3 and 5", source: "reasoning_summary"}},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("The user wants to add 3 and 5", recorder.ThoughtSourceReasoningSummary)},
 		},
 		{
 			name:             "single reasoning/streaming",
 			fixture:          fixtures.OaiResponsesStreamingBuiltinTool,
-			expectedThoughts: []expectedThought{{content: "The user wants to add 3 and 5", source: "reasoning_summary"}},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("The user wants to add 3 and 5", recorder.ThoughtSourceReasoningSummary)},
 		},
 		{
 			name:    "multiple reasoning items/blocking",
 			fixture: fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool,
-			expectedThoughts: []expectedThought{
-				{content: "The user wants to add 3 and 5", source: "reasoning_summary"},
-				{content: "After adding, I will check if the result is prime", source: "reasoning_summary"},
+			expectedThoughts: []recorder.ModelThoughtRecord{
+				newModelThought("The user wants to add 3 and 5", recorder.ThoughtSourceReasoningSummary),
+				newModelThought("After adding, I will check if the result is prime", recorder.ThoughtSourceReasoningSummary),
 			},
 		},
 		{
 			name:    "multiple reasoning items/streaming",
 			fixture: fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool,
-			expectedThoughts: []expectedThought{
-				{content: "The user wants to add 3 and 5", source: "reasoning_summary"},
-				{content: "After adding, I will check if the result is prime", source: "reasoning_summary"},
+			expectedThoughts: []recorder.ModelThoughtRecord{
+				newModelThought("The user wants to add 3 and 5", recorder.ThoughtSourceReasoningSummary),
+				newModelThought("After adding, I will check if the result is prime", recorder.ThoughtSourceReasoningSummary),
 			},
 		},
 		{
 			name:             "commentary/blocking",
 			fixture:          fixtures.OaiResponsesBlockingCommentaryBuiltinTool,
-			expectedThoughts: []expectedThought{{content: "Checking whether 3 + 5 is prime by calling the add function first.", source: "commentary"}},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("Checking whether 3 + 5 is prime by calling the add function first.", recorder.ThoughtSourceCommentary)},
 		},
 		{
 			name:             "commentary/streaming",
 			fixture:          fixtures.OaiResponsesStreamingCommentaryBuiltinTool,
-			expectedThoughts: []expectedThought{{content: "Checking whether 3 + 5 is prime by calling the add function first.", source: "commentary"}},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("Checking whether 3 + 5 is prime by calling the add function first.", recorder.ThoughtSourceCommentary)},
 		},
 		{
 			name:    "summary and commentary/blocking",
 			fixture: fixtures.OaiResponsesBlockingSummaryAndCommentaryBuiltinTool,
-			expectedThoughts: []expectedThought{
-				{content: "I need to add 3 and 5 to check primality.", source: "reasoning_summary"},
-				{content: "Let me calculate the sum first using the add function.", source: "commentary"},
+			expectedThoughts: []recorder.ModelThoughtRecord{
+				newModelThought("I need to add 3 and 5 to check primality.", recorder.ThoughtSourceReasoningSummary),
+				newModelThought("Let me calculate the sum first using the add function.", recorder.ThoughtSourceCommentary),
 			},
 		},
 		{
 			name:    "summary and commentary/streaming",
 			fixture: fixtures.OaiResponsesStreamingSummaryAndCommentaryBuiltinTool,
-			expectedThoughts: []expectedThought{
-				{content: "I need to add 3 and 5 to check primality.", source: "reasoning_summary"},
-				{content: "Let me calculate the sum first using the add function.", source: "commentary"},
+			expectedThoughts: []recorder.ModelThoughtRecord{
+				newModelThought("I need to add 3 and 5 to check primality.", recorder.ThoughtSourceReasoningSummary),
+				newModelThought("Let me calculate the sum first using the add function.", recorder.ThoughtSourceCommentary),
 			},
 		},
 		{
 			name:             "parallel tool calls/blocking",
 			fixture:          fixtures.OaiResponsesBlockingSingleBuiltinToolParallel,
-			expectedThoughts: []expectedThought{{content: "The user wants two additions", source: "reasoning_summary"}},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("The user wants two additions", recorder.ThoughtSourceReasoningSummary)},
 		},
 		{
 			name:             "parallel tool calls/streaming",
 			fixture:          fixtures.OaiResponsesStreamingSingleBuiltinToolParallel,
-			expectedThoughts: []expectedThought{{content: "The user wants two additions", source: "reasoning_summary"}},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("The user wants two additions", recorder.ThoughtSourceReasoningSummary)},
 		},
 		{
 			name:             "thoughts without tool calls",
 			fixture:          fixtures.OaiResponsesStreamingCodex, // This fixture contains reasoning, but it's not associated with tool calls.
-			expectedThoughts: []expectedThought{{content: "Preparing simple response", source: "reasoning_summary"}},
+			expectedThoughts: []recorder.ModelThoughtRecord{newModelThought("Preparing simple response", recorder.ThoughtSourceReasoningSummary)},
 		},
 	}
 
@@ -1037,31 +1031,8 @@ func TestResponsesModelThoughts(t *testing.T) {
 			_, err := io.ReadAll(resp.Body)
 			require.NoError(t, err)
 
-			interceptions := bridgeServer.Recorder.RecordedInterceptions()
-			require.GreaterOrEqual(t, len(interceptions), 1)
-
-			thoughts := bridgeServer.Recorder.RecordedModelThoughts()
-			if tc.expectedThoughts == nil {
-				assert.Empty(t, thoughts)
-			} else {
-				require.Len(t, thoughts, len(tc.expectedThoughts), "unexpected number of model thoughts")
-
-				// We can't guarantee the order of model thoughts since they're recorded separately, so
-				// we have to scan all thoughts for a match.
-
-				for _, expected := range tc.expectedThoughts {
-					var matched *aibridge.ModelThoughtRecord
-					for _, thought := range thoughts {
-						if strings.Contains(thought.Content, expected.content) {
-							matched = thought
-						}
-					}
-
-					require.NotNil(t, matched, "could not find thought matching %q", expected)
-					require.Equal(t, interceptions[0].ID, matched.InterceptionID)
-					require.Equal(t, expected.source, matched.Metadata["source"])
-				}
-			}
+			bridgeServer.Recorder.VerifyModelThoughtsRecorded(t, tc.expectedThoughts)
+			bridgeServer.Recorder.VerifyAllInterceptionsEnded(t)
 		})
 	}
 }
diff --git a/internal/testutil/mock_recorder.go b/internal/testutil/mock_recorder.go
index 5cd4420f..991d0904 100644
--- a/internal/testutil/mock_recorder.go
+++ b/internal/testutil/mock_recorder.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"slices"
+	"strings"
 	"sync"
 	"testing"
 
@@ -163,3 +164,28 @@ func (m *MockRecorder) VerifyAllInterceptionsEnded(t *testing.T) {
 		require.Containsf(t, m.interceptionsEnd, intc.ID, "interception with id: %v has not been ended", intc.ID)
 	}
 }
+
+func (m *MockRecorder) VerifyModelThoughtsRecorded(t *testing.T, expected []recorder.ModelThoughtRecord) {
+	thoughts := m.RecordedModelThoughts()
+	if expected == nil {
+		require.Empty(t, thoughts)
+		return
+	}
+
+	require.Len(t, thoughts, len(expected), "unexpected number of model thoughts")
+
+	// We can't guarantee the order of model thoughts since they're recorded separately, so
+	// we have to scan all thoughts for a match.
+
+	for _, exp := range expected {
+		var matched *recorder.ModelThoughtRecord
+		for _, thought := range thoughts {
+			if strings.Contains(thought.Content, exp.Content) {
+				matched = thought
+			}
+		}
+
+		require.NotNil(t, matched, "could not find thought matching %q", exp.Content)
+		require.EqualValues(t, exp.Metadata, matched.Metadata)
+	}
+}