Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/opencode/src/flag/flag.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ export namespace Flag {
export const OPENCODE_EXPERIMENTAL_MARKDOWN = !falsy("OPENCODE_EXPERIMENTAL_MARKDOWN")
export const OPENCODE_MODELS_URL = process.env["OPENCODE_MODELS_URL"]
export const OPENCODE_MODELS_PATH = process.env["OPENCODE_MODELS_PATH"]
export const OPENCODE_READ_MAX_ATTACHMENT_BYTES = number("OPENCODE_READ_MAX_ATTACHMENT_BYTES")
export const OPENCODE_DISABLE_EMBEDDED_WEB_UI = truthy("OPENCODE_DISABLE_EMBEDDED_WEB_UI")
export const OPENCODE_DB = process.env["OPENCODE_DB"]
export const OPENCODE_DISABLE_CHANNEL_DB = truthy("OPENCODE_DISABLE_CHANNEL_DB")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,22 @@ export function convertToOpenAICompatibleChatMessages(prompt: LanguageModelV3Pro
},
...partMetadata,
}
} else {
throw new UnsupportedFunctionalityError({
functionality: `file part media type ${part.mediaType}`,
})
}
if (part.mediaType.startsWith("video/")) {
return {
type: "video_url",
video_url: {
url:
part.data instanceof URL
? part.data.toString()
: `data:${part.mediaType};base64,${convertToBase64(part.data)}`,
},
...partMetadata,
}
}
throw new UnsupportedFunctionalityError({
functionality: `file part media type ${part.mediaType}`,
})
}
}
}),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,21 @@ export interface OpenAICompatibleUserMessage extends JsonRecord<OpenAICompatible
content: string | Array<OpenAICompatibleContentPart>
}

export type OpenAICompatibleContentPart = OpenAICompatibleContentPartText | OpenAICompatibleContentPartImage
export type OpenAICompatibleContentPart =
| OpenAICompatibleContentPartText
| OpenAICompatibleContentPartImage
| OpenAICompatibleContentPartVideo

export interface OpenAICompatibleContentPartImage extends JsonRecord {
type: "image_url"
image_url: { url: string }
}

export interface OpenAICompatibleContentPartVideo extends JsonRecord {
type: "video_url"
video_url: { url: string }
}

export interface OpenAICompatibleContentPartText extends JsonRecord {
type: "text"
text: string
Expand Down
2 changes: 1 addition & 1 deletion packages/opencode/src/session/compaction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ When constructing the summary, try to stick to this template:
})
const text =
(input.overflow
? "The previous request exceeded the provider's size limit due to large media attachments. The conversation was compacted and media files were removed from context. If the user was asking about attached images or files, explain that the attachments were too large to process and suggest they try again with smaller or fewer files.\n\n"
? "The previous request exceeded the provider's size limit due to large media attachments. The conversation was compacted and media files were removed from context. If the user was asking about attached media or files, explain that the attachments were too large to process and suggest they try again with smaller or fewer files.\n\n"
: "") +
"Continue if you have next steps, or stop and ask for clarification if you are unsure how to proceed."
yield* session.updatePart({
Expand Down
99 changes: 67 additions & 32 deletions packages/opencode/src/session/message-v2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,46 @@ interface FetchDecompressionError extends Error {
}

export namespace MessageV2 {
export const SYNTHETIC_ATTACHMENT_PROMPT = "Attached image(s) from tool result:"
export const SYNTHETIC_ATTACHMENT_PROMPT = "Attached media file(s) from tool result:"

export function isMedia(mime: string) {
return mime.startsWith("image/") || mime === "application/pdf"
return (
mime.startsWith("image/") ||
mime.startsWith("video/") ||
mime.startsWith("audio/") ||
mime === "application/pdf"
)
}

type Modality = Exclude<keyof Provider.Model["capabilities"]["input"], "text">

function modality(mime: string): Modality | undefined {
if (mime.startsWith("image/")) return "image"
if (mime.startsWith("video/")) return "video"
if (mime.startsWith("audio/")) return "audio"
if (mime === "application/pdf") return "pdf"
return undefined
}

function accepts(model: Provider.Model, mime: string) {
const kind = modality(mime)
return kind ? model.capabilities.input[kind] : false
}

function toolable(model: Provider.Model, mime: string) {
const kind = modality(mime)
if (!kind) return false
if (!accepts(model, mime)) return false
if (kind === "audio" || kind === "video") return false
if (model.api.npm === "@ai-sdk/anthropic") return true
if (model.api.npm === "@ai-sdk/openai") return true
if (model.api.npm === "@ai-sdk/amazon-bedrock") return kind === "image"
if (model.api.npm === "@ai-sdk/google-vertex/anthropic") return true
if (model.api.npm === "@ai-sdk/google") {
const id = model.api.id.toLowerCase()
return id.includes("gemini-3") && !id.includes("gemini-2")
}
return false
}

export const OutputLengthError = NamedError.create("MessageOutputLengthError", z.object({}))
Expand Down Expand Up @@ -589,26 +625,6 @@ export namespace MessageV2 {
) {
const result: UIMessage[] = []
const toolNames = new Set<string>()
// Track media from tool results that need to be injected as user messages
// for providers that don't support media in tool results.
//
// OpenAI-compatible APIs only support string content in tool results, so we need
// to extract media and inject as user messages. Other SDKs (anthropic, google,
// bedrock) handle type: "content" with media parts natively.
//
// Only apply this workaround if the model actually supports image input -
// otherwise there's no point extracting images.
const supportsMediaInToolResults = (() => {
if (model.api.npm === "@ai-sdk/anthropic") return true
if (model.api.npm === "@ai-sdk/openai") return true
if (model.api.npm === "@ai-sdk/amazon-bedrock") return true
if (model.api.npm === "@ai-sdk/google-vertex/anthropic") return true
if (model.api.npm === "@ai-sdk/google") {
const id = model.api.id.toLowerCase()
return id.includes("gemini-3") && !id.includes("gemini-2")
}
return false
})()

const toModelOutput = (options: { toolCallId: string; input: unknown; output: unknown }) => {
const output = options.output
Expand Down Expand Up @@ -694,7 +710,8 @@ export namespace MessageV2 {

if (msg.info.role === "assistant") {
const differentModel = `${model.providerID}/${model.id}` !== `${msg.info.providerID}/${msg.info.modelID}`
const media: Array<{ mime: string; url: string }> = []
const media: Array<{ mime: string; url: string; filename?: string }> = []
const errors: string[] = []

if (
msg.info.error &&
Expand Down Expand Up @@ -727,14 +744,27 @@ export namespace MessageV2 {
const outputText = part.state.time.compacted ? "[Old tool result content cleared]" : part.state.output
const attachments = part.state.time.compacted || options?.stripMedia ? [] : (part.state.attachments ?? [])

// For providers that don't support media in tool results, extract media files
// (images, PDFs) to be sent as a separate user message
const mediaAttachments = attachments.filter((a) => isMedia(a.mime))
const nonMediaAttachments = attachments.filter((a) => !isMedia(a.mime))
if (!supportsMediaInToolResults && mediaAttachments.length > 0) {
media.push(...mediaAttachments)
// Some providers only support specific media types in tool results.
// Route the rest through a user message where model modality support is explicit.
const finalAttachments = attachments.filter((a) => !isMedia(a.mime) || toolable(model, a.mime))
const userAttachments = attachments.filter(
(a) => isMedia(a.mime) && accepts(model, a.mime) && !toolable(model, a.mime),
)
const badAttachments = attachments.filter((a) => isMedia(a.mime) && !accepts(model, a.mime))
if (userAttachments.length > 0) {
media.push(...userAttachments)
}
if (badAttachments.length > 0) {
errors.push(
...badAttachments.map((a) => {
const kind = modality(a.mime)
const name = a.filename ? `"${a.filename}"` : (kind ?? a.mime)
return kind
? `ERROR: Cannot read ${name} (this model does not support ${kind} input). Inform the user.`
: `ERROR: Cannot read ${name}. Inform the user.`
}),
)
}
const finalAttachments = supportsMediaInToolResults ? attachments : nonMediaAttachments

const output =
finalAttachments.length > 0
Expand Down Expand Up @@ -802,8 +832,8 @@ export namespace MessageV2 {
if (assistantMessage.parts.length > 0) {
result.push(assistantMessage)
// Inject pending media as a user message for providers that don't support
// media (images, PDFs) in tool results
if (media.length > 0) {
// media in tool results.
if (media.length > 0 || errors.length > 0) {
result.push({
id: MessageID.ascending(),
role: "user",
Expand All @@ -812,10 +842,15 @@ export namespace MessageV2 {
type: "text" as const,
text: SYNTHETIC_ATTACHMENT_PROMPT,
},
...errors.map((text) => ({
type: "text" as const,
text,
})),
...media.map((attachment) => ({
type: "file" as const,
url: attachment.url,
mediaType: attachment.mime,
filename: attachment.filename,
})),
],
})
Expand Down
34 changes: 30 additions & 4 deletions packages/opencode/src/tool/read.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@ import DESCRIPTION from "./read.txt"
import { Instance } from "../project/instance"
import { assertExternalDirectoryEffect } from "./external-directory"
import { Instruction } from "../session/instruction"
import { Flag } from "../flag/flag"

const DEFAULT_READ_LIMIT = 2000
const MAX_LINE_LENGTH = 2000
const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`
const MAX_BYTES = 50 * 1024
const MAX_BYTES_LABEL = `${MAX_BYTES / 1024} KB`
const MAX_ATTACHMENT_BYTES = 256 * 1024 * 1024

const parameters = z.object({
filePath: z.string().describe("The absolute path to the file or directory to read"),
Expand Down Expand Up @@ -146,10 +148,19 @@ export const ReadTool = Tool.define(
const loaded = yield* instruction.resolve(ctx.messages, filepath, ctx.messageID)

const mime = AppFileSystem.mimeType(filepath)
const isImage = mime.startsWith("image/") && mime !== "image/svg+xml" && mime !== "image/vnd.fastbidsheet"
const isPdf = mime === "application/pdf"
if (isImage || isPdf) {
const msg = `${isImage ? "Image" : "PDF"} read successfully`
const attach = attachable(mime)
if (attach) {
const bytes = Number(stat.size)
const max = Flag.OPENCODE_READ_MAX_ATTACHMENT_BYTES ?? MAX_ATTACHMENT_BYTES
if (bytes > max) {
return yield* Effect.fail(
new Error(
`Cannot attach ${attach} file larger than ${format(max)}: ${filepath} (${format(bytes)})`,
),
)
}

const msg = `${attach[0]!.toUpperCase()}${attach.slice(1)} read successfully`
return {
title,
output: msg,
Expand Down Expand Up @@ -265,6 +276,21 @@ async function lines(filepath: string, opts: { limit: number; offset: number })
return { raw, count, cut, more, offset: opts.offset }
}

function attachable(mime: string): "image" | "PDF" | "video" | "audio" | undefined {
if (mime.startsWith("image/") && mime !== "image/svg+xml" && mime !== "image/vnd.fastbidsheet") return "image"
if (mime === "application/pdf") return "PDF"
if (mime.startsWith("video/")) return "video"
if (mime.startsWith("audio/")) return "audio"
return undefined
}

function format(bytes: number) {
if (bytes < 1024) return `${bytes} B`
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`
}

async function isBinaryFile(filepath: string, fileSize: number): Promise<boolean> {
const ext = path.extname(filepath).toLowerCase()
// binary check for common non-text extensions
Expand Down
3 changes: 2 additions & 1 deletion packages/opencode/src/tool/read.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ Usage:
- Any line longer than 2000 characters is truncated.
- Call this tool in parallel when you know there are multiple files you want to read.
- Avoid tiny repeated slices (30 line chunks). If you need more context, read a larger window.
- This tool can read image files and PDFs and return them as file attachments.
- This tool can read images, PDFs, audio, and video files and return them as file attachments when the file type and size are supported.
- Oversized binary attachments fail explicitly instead of being returned as text or truncated base64. The default attachment limit is 256 MB and can be changed with OPENCODE_READ_MAX_ATTACHMENT_BYTES.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { convertToOpenAICompatibleChatMessages as convertToCopilotMessages } from "@/provider/sdk/copilot/chat/convert-to-openai-compatible-chat-messages"
import { describe, test, expect } from "bun:test"
import { UnsupportedFunctionalityError } from "@ai-sdk/provider"

describe("system messages", () => {
test("should convert system message content to string", () => {
Expand Down Expand Up @@ -116,6 +117,79 @@ describe("user messages", () => {
])
})

test("should convert messages with video parts from Uint8Array", () => {
const result = convertToCopilotMessages([
{
role: "user",
content: [
{ type: "text", text: "Watch this" },
{
type: "file",
data: new Uint8Array([0, 1, 2, 3]),
mediaType: "video/mp4",
},
],
},
])

expect(result).toEqual([
{
role: "user",
content: [
{ type: "text", text: "Watch this" },
{
type: "video_url",
video_url: { url: "data:video/mp4;base64,AAECAw==" },
},
],
},
])
})

test("should handle URL-based videos", () => {
const result = convertToCopilotMessages([
{
role: "user",
content: [
{
type: "file",
data: new URL("https://example.com/clip.mp4"),
mediaType: "video/mp4",
},
],
},
])

expect(result).toEqual([
{
role: "user",
content: [
{
type: "video_url",
video_url: { url: "https://example.com/clip.mp4" },
},
],
},
])
})

test("should reject unsupported file parts", () => {
expect(() =>
convertToCopilotMessages([
{
role: "user",
content: [
{
type: "file",
data: new Uint8Array([0, 1, 2, 3]),
mediaType: "application/octet-stream",
},
],
},
]),
).toThrow(UnsupportedFunctionalityError)
})

test("should handle multiple text parts without flattening", () => {
const result = convertToCopilotMessages([
{
Expand Down
12 changes: 12 additions & 0 deletions packages/opencode/test/session/compaction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,15 @@ describe("session.compaction.process", () => {
filename: "cat.png",
url: "https://example.com/cat.png",
})
await Session.updatePart({
id: PartID.ascending(),
messageID: replay.id,
sessionID: session.id,
type: "file",
mime: "video/mp4",
filename: "clip.mp4",
url: "https://example.com/clip.mp4",
})
const msg = await user(session.id, "current")
const rt = runtime("continue", Plugin.defaultLayer, wide())
try {
Expand All @@ -702,6 +711,9 @@ describe("session.compaction.process", () => {
expect(
last?.parts.some((part) => part.type === "text" && part.text.includes("Attached image/png: cat.png")),
).toBe(true)
expect(
last?.parts.some((part) => part.type === "text" && part.text.includes("Attached video/mp4: clip.mp4")),
).toBe(true)
} finally {
await rt.dispose()
}
Expand Down
Loading
Loading