Skip to content
1 change: 1 addition & 0 deletions packages/types/src/provider-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ const openAiSchema = baseProviderSettingsSchema.extend({
openAiStreamingEnabled: z.boolean().optional(),
openAiHostHeader: z.string().optional(), // Keep temporarily for backward compatibility during migration.
openAiHeaders: z.record(z.string(), z.string()).optional(),
openAiStrictToolMessageOrdering: z.boolean().optional(), // For providers that don't allow user messages after tool messages.
})

const ollamaSchema = baseProviderSettingsSchema.extend({
Expand Down
28 changes: 22 additions & 6 deletions src/api/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,18 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
content: systemPrompt,
}

// When strict tool message ordering is enabled, merge text content after tool_results
// into the last tool message instead of creating a separate user message.
// This is required for providers like NVIDIA NIM that don't allow user messages after tool messages.
const strictToolMessageOrdering = this.options.openAiStrictToolMessageOrdering ?? false

if (this.options.openAiStreamingEnabled ?? true) {
let convertedMessages

if (deepseekReasoner) {
convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages], {
mergeToolResultText: strictToolMessageOrdering,
})
} else {
if (modelInfo.supportsPromptCache) {
systemMessage = {
Expand All @@ -122,7 +129,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}
}

convertedMessages = [systemMessage, ...convertToOpenAiMessages(messages)]
convertedMessages = [
systemMessage,
...convertToOpenAiMessages(messages, { mergeToolResultText: strictToolMessageOrdering }),
]

if (modelInfo.supportsPromptCache) {
// Note: the following logic is copied from openrouter:
Expand Down Expand Up @@ -227,8 +237,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
model: modelId,
messages: deepseekReasoner
? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
: [systemMessage, ...convertToOpenAiMessages(messages)],
? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages], {
mergeToolResultText: strictToolMessageOrdering,
})
: [
systemMessage,
...convertToOpenAiMessages(messages, { mergeToolResultText: strictToolMessageOrdering }),
],
...(metadata?.tools && { tools: this.convertToolsForOpenAI(metadata.tools) }),
...(metadata?.tool_choice && { tool_choice: metadata.tool_choice }),
...(metadata?.toolProtocol === "native" && {
Expand Down Expand Up @@ -338,6 +353,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
): ApiStream {
const modelInfo = this.getModel().info
const methodIsAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
const strictToolMessageOrdering = this.options.openAiStrictToolMessageOrdering ?? false

if (this.options.openAiStreamingEnabled ?? true) {
const isGrokXAI = this._isGrokXAI(this.options.openAiBaseUrl)
Expand All @@ -349,7 +365,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
role: "developer",
content: `Formatting re-enabled\n${systemPrompt}`,
},
...convertToOpenAiMessages(messages),
...convertToOpenAiMessages(messages, { mergeToolResultText: strictToolMessageOrdering }),
],
stream: true,
...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
Expand Down Expand Up @@ -386,7 +402,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
role: "developer",
content: `Formatting re-enabled\n${systemPrompt}`,
},
...convertToOpenAiMessages(messages),
...convertToOpenAiMessages(messages, { mergeToolResultText: strictToolMessageOrdering }),
],
reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined,
temperature: undefined,
Expand Down
172 changes: 170 additions & 2 deletions src/api/transform/__tests__/openai-format.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ describe("convertToOpenAiMessages", () => {
)
})

it("should NOT merge text when images are present (fall back to user message)", () => {
it("should merge text and send images separately when mergeToolResultText is true", () => {
const anthropicMessages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
Expand All @@ -329,6 +329,10 @@ describe("convertToOpenAiMessages", () => {
tool_use_id: "tool-123",
content: "Tool result content",
},
{
type: "text",
text: "<environment_details>Context info</environment_details>",
},
{
type: "image",
source: {
Expand All @@ -343,9 +347,57 @@ describe("convertToOpenAiMessages", () => {

const openAiMessages = convertToOpenAiMessages(anthropicMessages, { mergeToolResultText: true })

// Should produce a tool message AND a user message (because image is present)
// Should produce a tool message with merged text AND a user message for the image
expect(openAiMessages).toHaveLength(2)

// First message: tool message with merged text
const toolMessage = openAiMessages[0] as OpenAI.Chat.ChatCompletionToolMessageParam
expect(toolMessage.role).toBe("tool")
expect(toolMessage.tool_call_id).toBe("tool-123")
expect(toolMessage.content).toBe(
"Tool result content\n\n<environment_details>Context info</environment_details>",
)

// Second message: user message with only the image
expect(openAiMessages[1].role).toBe("user")
const userContent = openAiMessages[1].content as Array<{ type: string; image_url?: { url: string } }>
expect(Array.isArray(userContent)).toBe(true)
expect(userContent).toHaveLength(1)
expect(userContent[0].type).toBe("image_url")
expect(userContent[0].image_url?.url).toBe("")
})

it("should send only images as user message when no text content exists with mergeToolResultText", () => {
const anthropicMessages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: [
{
type: "tool_result",
tool_use_id: "tool-123",
content: "Tool result content",
},
{
type: "image",
source: {
type: "base64",
media_type: "image/png",
data: "base64data",
},
},
],
},
]

const openAiMessages = convertToOpenAiMessages(anthropicMessages, { mergeToolResultText: true })

// Should produce a tool message AND a user message (only image, no text to merge)
expect(openAiMessages).toHaveLength(2)
expect((openAiMessages[0] as OpenAI.Chat.ChatCompletionToolMessageParam).role).toBe("tool")
// Tool message content should NOT be modified since there's no text to merge
expect((openAiMessages[0] as OpenAI.Chat.ChatCompletionToolMessageParam).content).toBe(
"Tool result content",
)
expect(openAiMessages[1].role).toBe("user")
})

Expand Down Expand Up @@ -430,6 +482,122 @@ describe("convertToOpenAiMessages", () => {
expect(openAiMessages).toHaveLength(1)
expect(openAiMessages[0].role).toBe("user")
})

it("should merge text into tool messages for multiple tool calls across conversation turns", () => {
// This test simulates a full conversation with multiple tool_result + environment_details messages
// to ensure mergeToolResultText works correctly for ALL tool_result messages, not just the first one
// Regression test for: "The fix works for the first message but after the first response the text content is NOT merged"
const anthropicMessages: Anthropic.Messages.MessageParam[] = [
// Initial user message (no tool_results)
{
role: "user",
content: [
{ type: "text", text: "Create a file for me" },
{ type: "text", text: "<environment_details>Context 1</environment_details>" },
],
},
// Assistant uses first tool
{
role: "assistant",
content: [
{ type: "text", text: "I'll create the file for you." },
{
type: "tool_use",
id: "call_1",
name: "write_file",
input: { path: "test.txt", content: "hello" },
},
],
},
// First tool result + environment_details
{
role: "user",
content: [
{ type: "tool_result", tool_use_id: "call_1", content: "File created successfully" },
{ type: "text", text: "<environment_details>Context 2</environment_details>" },
],
},
// Assistant uses second tool
{
role: "assistant",
content: [
{ type: "text", text: "Now I'll read the file to verify." },
{ type: "tool_use", id: "call_2", name: "read_file", input: { path: "test.txt" } },
],
},
// Second tool result + environment_details (this is where the bug was reported)
{
role: "user",
content: [
{ type: "tool_result", tool_use_id: "call_2", content: "File content: hello" },
{ type: "text", text: "<environment_details>Context 3</environment_details>" },
],
},
]

const openAiMessages = convertToOpenAiMessages(anthropicMessages, { mergeToolResultText: true })

// Expected structure:
// 1. User message (initial, no tool_results - text should remain as user message)
// 2. Assistant message with tool_calls
// 3. Tool message with merged text (first tool_result)
// 4. Assistant message with tool_calls
// 5. Tool message with merged text (second tool_result)
expect(openAiMessages).toHaveLength(5)

// First message should be a user message (no tool_results to merge into)
expect(openAiMessages[0].role).toBe("user")

// Second message should be assistant with tool_calls
expect(openAiMessages[1].role).toBe("assistant")
expect((openAiMessages[1] as OpenAI.Chat.ChatCompletionAssistantMessageParam).tool_calls).toHaveLength(1)

// Third message should be tool message with merged environment_details
const firstToolMsg = openAiMessages[2] as OpenAI.Chat.ChatCompletionToolMessageParam
expect(firstToolMsg.role).toBe("tool")
expect(firstToolMsg.tool_call_id).toBe("call_1")
expect(firstToolMsg.content).toContain("File created successfully")
expect(firstToolMsg.content).toContain("<environment_details>Context 2</environment_details>")

// Fourth message should be assistant with tool_calls
expect(openAiMessages[3].role).toBe("assistant")
expect((openAiMessages[3] as OpenAI.Chat.ChatCompletionAssistantMessageParam).tool_calls).toHaveLength(1)

// Fifth message should be tool message with merged environment_details (THE BUG FIX)
const secondToolMsg = openAiMessages[4] as OpenAI.Chat.ChatCompletionToolMessageParam
expect(secondToolMsg.role).toBe("tool")
expect(secondToolMsg.tool_call_id).toBe("call_2")
expect(secondToolMsg.content).toContain("File content: hello")
expect(secondToolMsg.content).toContain("<environment_details>Context 3</environment_details>")
})

it("should NOT create user messages after tool messages when mergeToolResultText is true", () => {
// This test specifically verifies that the "user after tool" error is avoided
const anthropicMessages: Anthropic.Messages.MessageParam[] = [
{
role: "assistant",
content: [{ type: "tool_use", id: "tool_1", name: "read_file", input: { path: "test.ts" } }],
},
{
role: "user",
content: [
{ type: "tool_result", tool_use_id: "tool_1", content: "File contents" },
{ type: "text", text: "<environment_details>Some context</environment_details>" },
],
},
]

const openAiMessages = convertToOpenAiMessages(anthropicMessages, { mergeToolResultText: true })

// Should produce assistant + tool (no user message)
expect(openAiMessages).toHaveLength(2)
expect(openAiMessages[0].role).toBe("assistant")
expect(openAiMessages[1].role).toBe("tool")
// The text should be merged into the tool message, NOT as a separate user message
expect((openAiMessages[1] as OpenAI.Chat.ChatCompletionToolMessageParam).content).toContain(
"<environment_details>Some context</environment_details>",
)
})
})

describe("reasoning_details transformation", () => {
Expand Down
46 changes: 31 additions & 15 deletions src/api/transform/openai-format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,24 +138,40 @@ export function convertToOpenAiMessages(

// Process non-tool messages
if (nonToolMessages.length > 0) {
// Check if we should merge text into the last tool message
// This is critical for reasoning/thinking models where a user message
// after tool results causes the model to drop all previous reasoning_content
const hasOnlyTextContent = nonToolMessages.every((part) => part.type === "text")
const hasToolMessages = toolMessages.length > 0
const shouldMergeIntoToolMessage =
options?.mergeToolResultText && hasToolMessages && hasOnlyTextContent

if (shouldMergeIntoToolMessage) {
if (options?.mergeToolResultText && hasToolMessages) {
// When mergeToolResultText is enabled, separate text and images
// Merge text into the last tool message, and send images separately
// This is critical for providers like NVIDIA NIM that don't allow user messages after tool messages
const textMessages = nonToolMessages.filter(
(part) => part.type === "text",
) as Anthropic.TextBlockParam[]
const imageMessages = nonToolMessages.filter(
(part) => part.type === "image",
) as Anthropic.ImageBlockParam[]

// Merge text content into the last tool message
const lastToolMessage = openAiMessages[
openAiMessages.length - 1
] as OpenAI.Chat.ChatCompletionToolMessageParam
if (lastToolMessage?.role === "tool") {
const additionalText = nonToolMessages
.map((part) => (part as Anthropic.TextBlockParam).text)
.join("\n")
lastToolMessage.content = `${lastToolMessage.content}\n\n${additionalText}`
if (textMessages.length > 0) {
const lastToolMessage = openAiMessages[
openAiMessages.length - 1
] as OpenAI.Chat.ChatCompletionToolMessageParam
if (lastToolMessage?.role === "tool") {
const additionalText = textMessages.map((part) => part.text).join("\n")
lastToolMessage.content = `${lastToolMessage.content}\n\n${additionalText}`
}
}

// Send images as a separate user message if any
// Note: Images must still be sent as user messages since tool messages don't support images
if (imageMessages.length > 0) {
openAiMessages.push({
role: "user",
content: imageMessages.map((part) => ({
type: "image_url",
image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` },
})),
})
}
} else {
// Standard behavior: add user message with text/image content
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,16 @@ export const OpenAICompatible = ({
onChange={handleInputChange("openAiStreamingEnabled", noTransform)}>
{t("settings:modelInfo.enableStreaming")}
</Checkbox>
<div>
<Checkbox
checked={apiConfiguration?.openAiStrictToolMessageOrdering ?? false}
onChange={handleInputChange("openAiStrictToolMessageOrdering", noTransform)}>
{t("settings:providers.openAiStrictToolMessageOrdering.label")}
</Checkbox>
<div className="text-sm text-vscode-descriptionForeground ml-6">
{t("settings:providers.openAiStrictToolMessageOrdering.description")}
</div>
</div>
<div>
<Checkbox
checked={apiConfiguration?.includeMaxTokens ?? true}
Expand Down
4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/ca/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/de/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/en/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,10 @@
},
"resetDefaults": "Reset to Defaults"
},
"openAiStrictToolMessageOrdering": {
"label": "Strict tool message ordering",
"description": "Enable for providers like NVIDIA NIM and Devstral that require strict message ordering. When enabled, text content after tool results is merged into the last tool message instead of creating a separate user message."
},
Comment on lines +501 to +504
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This addition creates duplicate keys in the JSON file. There are older flat format entries at lines 539-541 ("openAiStrictToolMessageOrdering": "Strict tool message ordering" and "openAiStrictToolMessageOrderingDescription": "...") that were not removed. Since JSON parsers typically use the last value for duplicate keys, the flat format at line 539 will take precedence, causing t("settings:providers.openAiStrictToolMessageOrdering.label") to return undefined. Remove the old flat format entries at lines 539-541.

Fix it with Roo Code or mention @roomote and request a fix.

"rateLimitSeconds": {
"label": "Rate limit",
"description": "Minimum time between API requests."
Expand Down
Loading
Loading