From 17321d50428dc5909d738719cbc4b29930093aef Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Wed, 21 Jan 2026 15:35:33 +0000
Subject: [PATCH] feat: adjust Z.ai temperature to 1 and add top_p=0.95

- Update ZAI_DEFAULT_TEMPERATURE from 0.6 to 1
- Add top_p parameter set to 0.95 for all Z.ai API calls
- Update both thinking and non-thinking model implementations
- Update test expectations to include top_p parameter

Fixes COM-538
---
 packages/types/src/providers/zai.ts     |  2 +-
 src/api/providers/__tests__/zai.spec.ts |  1 +
 src/api/providers/zai.ts                | 47 +++++++++++++++++++++++--
 3 files changed, 46 insertions(+), 4 deletions(-)
diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts
index e9fe7f9bfb6..82731c4a743 100644
--- a/packages/types/src/providers/zai.ts
+++ b/packages/types/src/providers/zai.ts
@@ -223,7 +223,7 @@ export const mainlandZAiModels = {
 	},
 } as const satisfies Record<string, ModelInfo>
 
-export const ZAI_DEFAULT_TEMPERATURE = 0.6
+export const ZAI_DEFAULT_TEMPERATURE = 1
 
 export const zaiApiLineConfigs = {
 	international_coding: {
diff --git a/src/api/providers/__tests__/zai.spec.ts b/src/api/providers/__tests__/zai.spec.ts
index 34323b108d3..ce51486242b 100644
--- a/src/api/providers/__tests__/zai.spec.ts
+++ b/src/api/providers/__tests__/zai.spec.ts
@@ -395,6 +395,7 @@ describe("ZAiHandler", () => {
 					model: modelId,
 					max_tokens: expectedMaxTokens,
 					temperature: ZAI_DEFAULT_TEMPERATURE,
+					top_p: 0.95,
 					messages: expect.arrayContaining([{ role: "system", content: systemPrompt }]),
 					stream: true,
 					stream_options: { include_usage: true },
diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts
index 15a7b47b7c9..14d57bb5bb6 100644
--- a/src/api/providers/zai.ts
+++ b/src/api/providers/zai.ts
@@ -40,7 +40,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 	}
 
 	/**
-	 * Override createStream to handle GLM-4.7's thinking mode.
+	 * Override createStream to handle GLM-4.7's thinking mode and add top_p parameter.
 	 * GLM-4.7 has thinking enabled by default in the API, so we need to
 	 * explicitly send { type: "disabled" } when the user turns off reasoning.
 	 */
@@ -64,8 +64,48 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 			return this.createStreamWithThinking(systemPrompt, messages, metadata, useReasoning)
 		}
 
-		// For non-thinking models, use the default behavior
-		return super.createStream(systemPrompt, messages, metadata, requestOptions)
+		// For non-thinking models, use the default behavior with added top_p
+		return this.createStreamWithTopP(systemPrompt, messages, metadata, requestOptions)
+	}
+
+	/**
+	 * Creates a stream with top_p parameter for non-thinking models
+	 */
+	private createStreamWithTopP(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+		requestOptions?: OpenAI.RequestOptions,
+	) {
+		const { id: model, info } = this.getModel()
+
+		const max_tokens =
+			getModelMaxOutputTokens({
+				modelId: model,
+				model: info,
+				settings: this.options,
+				format: "openai",
+			}) ?? undefined
+
+		const temperature = this.options.modelTemperature ?? info.defaultTemperature ?? this.defaultTemperature
+
+		const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
+			model,
+			max_tokens,
+			temperature,
+			top_p: 0.95,
+			messages: [
+				{ role: "system", content: systemPrompt },
+				...convertToZAiFormat(messages, { mergeToolResultText: true }),
+			],
+			stream: true,
+			stream_options: { include_usage: true },
+			tools: this.convertToolsForOpenAI(metadata?.tools),
+			tool_choice: metadata?.tool_choice,
+			parallel_tool_calls: metadata?.parallelToolCalls ?? false,
+		}
+
+		return this.client.chat.completions.create(params, requestOptions)
 	}
 
 	/**
@@ -96,6 +136,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 			model,
 			max_tokens,
 			temperature,
+			top_p: 0.95,
 			messages: [{ role: "system", content: systemPrompt }, ...convertedMessages],
 			stream: true,
 			stream_options: { include_usage: true },