From 3932ec16eb3455faa75f7a9dd0e94badeb4eeeec Mon Sep 17 00:00:00 2001
From: erdemgoksel <erdemgoksel@MAU-BILISIM42>
Date: Wed, 21 Jan 2026 11:44:36 +0300
Subject: [PATCH] feat: Update Z.AI models with new variants and pricing

- Add GLM-4.7-Flash (free) and GLM-4.7-FlashX models
- Add GLM-4.6V, GLM-4.6V-Flash (free), and GLM-4.6V-FlashX vision models
- Update GLM-4.6V pricing (international: .3/.9, mainland: .15/.45)
- All new models support prompt caching
- GLM-4.7 variants: 200k context window
- GLM-4.6V variants: 131k context window with vision support
---
 packages/types/src/providers/zai.ts | 120 ++++++++++++++++++++++++++++
 1 file changed, 120 insertions(+)

diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts
index e9fe7f9bfb..41a6a808ca 100644
--- a/packages/types/src/providers/zai.ts
+++ b/packages/types/src/providers/zai.ts
@@ -81,6 +81,18 @@ export const internationalZAiModels = {
 		description:
 			"GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.",
 	},
+	"glm-4.6v": {
+		maxTokens: 16_384,
+		contextWindow: 131_072,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0.3,
+		outputPrice: 0.9,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.05,
+		description:
+			"GLM-4.6V is an advanced multimodal vision model with improved performance and cost-efficiency for visual understanding tasks.",
+	},
 	"glm-4.6": {
 		maxTokens: 16_384,
 		contextWindow: 200_000,
@@ -108,6 +120,54 @@ export const internationalZAiModels = {
 		description:
 			"GLM-4.7 is Zhipu's latest model with built-in thinking capabilities enabled by default. It provides enhanced reasoning for complex tasks while maintaining fast response times.",
 	},
+	"glm-4.7-flash": {
+		maxTokens: 16_384,
+		contextWindow: 200_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		inputPrice: 0,
+		outputPrice: 0,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description:
+			"GLM-4.7-Flash is a free, high-speed variant of GLM-4.7 offering fast responses for reasoning and coding tasks.",
+	},
+	"glm-4.7-flashx": {
+		maxTokens: 16_384,
+		contextWindow: 200_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		inputPrice: 0.07,
+		outputPrice: 0.4,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.01,
+		description:
+			"GLM-4.7-FlashX is an ultra-fast variant of GLM-4.7 with exceptional speed and cost-effectiveness for high-throughput applications.",
+	},
+	"glm-4.6v-flash": {
+		maxTokens: 16_384,
+		contextWindow: 131_072,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0,
+		outputPrice: 0,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description:
+			"GLM-4.6V-Flash is a free, high-speed multimodal vision model for rapid image understanding and visual reasoning tasks.",
+	},
+	"glm-4.6v-flashx": {
+		maxTokens: 16_384,
+		contextWindow: 131_072,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0.04,
+		outputPrice: 0.4,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.004,
+		description:
+			"GLM-4.6V-FlashX is an ultra-fast multimodal vision model optimized for high-speed visual processing at low cost.",
+	},
 	"glm-4-32b-0414-128k": {
 		maxTokens: 16_384,
 		contextWindow: 131_072,
@@ -221,6 +281,66 @@ export const mainlandZAiModels = {
 		description:
 			"GLM-4.7 is Zhipu's latest model with built-in thinking capabilities enabled by default. It provides enhanced reasoning for complex tasks while maintaining fast response times.",
 	},
+	"glm-4.7-flash": {
+		maxTokens: 16_384,
+		contextWindow: 204_800,
+		supportsImages: false,
+		supportsPromptCache: true,
+		inputPrice: 0,
+		outputPrice: 0,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description:
+			"GLM-4.7-Flash is a free, high-speed variant of GLM-4.7 offering fast responses for reasoning and coding tasks.",
+	},
+	"glm-4.7-flashx": {
+		maxTokens: 16_384,
+		contextWindow: 204_800,
+		supportsImages: false,
+		supportsPromptCache: true,
+		inputPrice: 0.035,
+		outputPrice: 0.2,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.005,
+		description:
+			"GLM-4.7-FlashX is an ultra-fast variant of GLM-4.7 with exceptional speed and cost-effectiveness for high-throughput applications.",
+	},
+	"glm-4.6v": {
+		maxTokens: 16_384,
+		contextWindow: 131_072,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0.15,
+		outputPrice: 0.45,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.025,
+		description:
+			"GLM-4.6V is an advanced multimodal vision model with improved performance and cost-efficiency for visual understanding tasks.",
+	},
+	"glm-4.6v-flash": {
+		maxTokens: 16_384,
+		contextWindow: 131_072,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0,
+		outputPrice: 0,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description:
+			"GLM-4.6V-Flash is a free, high-speed multimodal vision model for rapid image understanding and visual reasoning tasks.",
+	},
+	"glm-4.6v-flashx": {
+		maxTokens: 16_384,
+		contextWindow: 131_072,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 0.02,
+		outputPrice: 0.2,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.002,
+		description:
+			"GLM-4.6V-FlashX is an ultra-fast multimodal vision model optimized for high-speed visual processing at low cost.",
+	},
 } as const satisfies Record<string, ModelInfo>
 
 export const ZAI_DEFAULT_TEMPERATURE = 0.6