Skip to content

Commit 98f37ce

Browse files
committed
Add media attachments to read tool
1 parent 7230cd2 commit 98f37ce

File tree

8 files changed

+226
-12
lines changed

8 files changed

+226
-12
lines changed

packages/opencode/src/flag/flag.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ export namespace Flag {
7878
export const OPENCODE_EXPERIMENTAL_MARKDOWN = !falsy("OPENCODE_EXPERIMENTAL_MARKDOWN")
7979
export const OPENCODE_MODELS_URL = process.env["OPENCODE_MODELS_URL"]
8080
export const OPENCODE_MODELS_PATH = process.env["OPENCODE_MODELS_PATH"]
81+
export const OPENCODE_READ_MAX_ATTACHMENT_BYTES = number("OPENCODE_READ_MAX_ATTACHMENT_BYTES")
8182
export const OPENCODE_DISABLE_EMBEDDED_WEB_UI = truthy("OPENCODE_DISABLE_EMBEDDED_WEB_UI")
8283
export const OPENCODE_DB = process.env["OPENCODE_DB"]
8384
export const OPENCODE_DISABLE_CHANNEL_DB = truthy("OPENCODE_DISABLE_CHANNEL_DB")

packages/opencode/src/session/compaction.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ When constructing the summary, try to stick to this template:
320320
})
321321
const text =
322322
(input.overflow
323-
? "The previous request exceeded the provider's size limit due to large media attachments. The conversation was compacted and media files were removed from context. If the user was asking about attached images or files, explain that the attachments were too large to process and suggest they try again with smaller or fewer files.\n\n"
323+
? "The previous request exceeded the provider's size limit due to large media attachments. The conversation was compacted and media files were removed from context. If the user was asking about attached media or files, explain that the attachments were too large to process and suggest they try again with smaller or fewer files.\n\n"
324324
: "") +
325325
"Continue if you have next steps, or stop and ask for clarification if you are unsure how to proceed."
326326
yield* session.updatePart({

packages/opencode/src/session/message-v2.ts

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,15 @@ interface FetchDecompressionError extends Error {
2525
}
2626

2727
export namespace MessageV2 {
28-
export const SYNTHETIC_ATTACHMENT_PROMPT = "Attached image(s) from tool result:"
28+
export const SYNTHETIC_ATTACHMENT_PROMPT = "Attached media file(s) from tool result:"
2929

3030
export function isMedia(mime: string) {
31-
return mime.startsWith("image/") || mime === "application/pdf"
31+
return (
32+
mime.startsWith("image/") ||
33+
mime.startsWith("video/") ||
34+
mime.startsWith("audio/") ||
35+
mime === "application/pdf"
36+
)
3237
}
3338

3439
export const OutputLengthError = NamedError.create("MessageOutputLengthError", z.object({}))
@@ -596,8 +601,7 @@ export namespace MessageV2 {
596601
// to extract media and inject as user messages. Other SDKs (anthropic, google,
597602
// bedrock) handle type: "content" with media parts natively.
598603
//
599-
// Only apply this workaround if the model actually supports image input -
600-
// otherwise there's no point extracting images.
604+
// Apply this workaround only for adapters that cannot carry media in tool results.
601605
const supportsMediaInToolResults = (() => {
602606
if (model.api.npm === "@ai-sdk/anthropic") return true
603607
if (model.api.npm === "@ai-sdk/openai") return true
@@ -728,7 +732,7 @@ export namespace MessageV2 {
728732
const attachments = part.state.time.compacted || options?.stripMedia ? [] : (part.state.attachments ?? [])
729733

730734
// For providers that don't support media in tool results, extract media files
731-
// (images, PDFs) to be sent as a separate user message
735+
// to be sent as a separate user message.
732736
const mediaAttachments = attachments.filter((a) => isMedia(a.mime))
733737
const nonMediaAttachments = attachments.filter((a) => !isMedia(a.mime))
734738
if (!supportsMediaInToolResults && mediaAttachments.length > 0) {
@@ -802,7 +806,7 @@ export namespace MessageV2 {
802806
if (assistantMessage.parts.length > 0) {
803807
result.push(assistantMessage)
804808
// Inject pending media as a user message for providers that don't support
805-
// media (images, PDFs) in tool results
809+
// media in tool results.
806810
if (media.length > 0) {
807811
result.push({
808812
id: MessageID.ascending(),

packages/opencode/src/tool/read.ts

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@ import DESCRIPTION from "./read.txt"
1212
import { Instance } from "../project/instance"
1313
import { assertExternalDirectoryEffect } from "./external-directory"
1414
import { Instruction } from "../session/instruction"
15+
import { Flag } from "../flag/flag"
1516

1617
const DEFAULT_READ_LIMIT = 2000
1718
const MAX_LINE_LENGTH = 2000
1819
const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`
1920
const MAX_BYTES = 50 * 1024
2021
const MAX_BYTES_LABEL = `${MAX_BYTES / 1024} KB`
22+
const MAX_ATTACHMENT_BYTES = 256 * 1024 * 1024
2123

2224
const parameters = z.object({
2325
filePath: z.string().describe("The absolute path to the file or directory to read"),
@@ -146,10 +148,19 @@ export const ReadTool = Tool.define(
146148
const loaded = yield* instruction.resolve(ctx.messages, filepath, ctx.messageID)
147149

148150
const mime = AppFileSystem.mimeType(filepath)
149-
const isImage = mime.startsWith("image/") && mime !== "image/svg+xml" && mime !== "image/vnd.fastbidsheet"
150-
const isPdf = mime === "application/pdf"
151-
if (isImage || isPdf) {
152-
const msg = `${isImage ? "Image" : "PDF"} read successfully`
151+
const attach = attachable(mime)
152+
if (attach) {
153+
const bytes = Number(stat.size)
154+
const max = Flag.OPENCODE_READ_MAX_ATTACHMENT_BYTES ?? MAX_ATTACHMENT_BYTES
155+
if (bytes > max) {
156+
return yield* Effect.fail(
157+
new Error(
158+
`Cannot attach ${attach} file larger than ${format(max)}: ${filepath} (${format(bytes)})`,
159+
),
160+
)
161+
}
162+
163+
const msg = `${attach[0]!.toUpperCase()}${attach.slice(1)} read successfully`
153164
return {
154165
title,
155166
output: msg,
@@ -265,6 +276,21 @@ async function lines(filepath: string, opts: { limit: number; offset: number })
265276
return { raw, count, cut, more, offset: opts.offset }
266277
}
267278

279+
function attachable(mime: string): "image" | "PDF" | "video" | "audio" | undefined {
280+
if (mime.startsWith("image/") && mime !== "image/svg+xml" && mime !== "image/vnd.fastbidsheet") return "image"
281+
if (mime === "application/pdf") return "PDF"
282+
if (mime.startsWith("video/")) return "video"
283+
if (mime.startsWith("audio/")) return "audio"
284+
return undefined
285+
}
286+
287+
function format(bytes: number) {
288+
if (bytes < 1024) return `${bytes} B`
289+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`
290+
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`
291+
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`
292+
}
293+
268294
async function isBinaryFile(filepath: string, fileSize: number): Promise<boolean> {
269295
const ext = path.extname(filepath).toLowerCase()
270296
// binary check for common non-text extensions

packages/opencode/src/tool/read.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ Usage:
1111
- Any line longer than 2000 characters is truncated.
1212
- Call this tool in parallel when you know there are multiple files you want to read.
1313
- Avoid tiny repeated slices (30 line chunks). If you need more context, read a larger window.
14-
- This tool can read image files and PDFs and return them as file attachments.
14+
- This tool can read images, PDFs, audio, and video files and return them as file attachments when the file type and size are supported.
15+
- Oversized binary attachments fail explicitly instead of being returned as text or truncated base64. The default attachment limit is 256 MB and can be changed with OPENCODE_READ_MAX_ATTACHMENT_BYTES.

packages/opencode/test/session/compaction.test.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,15 @@ describe("session.compaction.process", () => {
678678
filename: "cat.png",
679679
url: "https://example.com/cat.png",
680680
})
681+
await Session.updatePart({
682+
id: PartID.ascending(),
683+
messageID: replay.id,
684+
sessionID: session.id,
685+
type: "file",
686+
mime: "video/mp4",
687+
filename: "clip.mp4",
688+
url: "https://example.com/clip.mp4",
689+
})
681690
const msg = await user(session.id, "current")
682691
const rt = runtime("continue", Plugin.defaultLayer, wide())
683692
try {
@@ -702,6 +711,9 @@ describe("session.compaction.process", () => {
702711
expect(
703712
last?.parts.some((part) => part.type === "text" && part.text.includes("Attached image/png: cat.png")),
704713
).toBe(true)
714+
expect(
715+
last?.parts.some((part) => part.type === "text" && part.text.includes("Attached video/mp4: clip.mp4")),
716+
).toBe(true)
705717
} finally {
706718
await rt.dispose()
707719
}

packages/opencode/test/session/message-v2.test.ts

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,16 @@ function basePart(messageID: string, id: string) {
107107
}
108108
}
109109

110+
describe("session.message-v2.isMedia", () => {
111+
test("classifies images, PDFs, video, and audio as media", () => {
112+
expect(MessageV2.isMedia("image/png")).toBe(true)
113+
expect(MessageV2.isMedia("application/pdf")).toBe(true)
114+
expect(MessageV2.isMedia("video/mp4")).toBe(true)
115+
expect(MessageV2.isMedia("audio/mpeg")).toBe(true)
116+
expect(MessageV2.isMedia("text/plain")).toBe(false)
117+
})
118+
})
119+
110120
describe("session.message-v2.toModelMessage", () => {
111121
test("filters out messages with no parts", async () => {
112122
const input: MessageV2.WithParts[] = [
@@ -359,6 +369,101 @@ describe("session.message-v2.toModelMessage", () => {
359369
])
360370
})
361371

372+
test("injects media tool attachments for providers without media tool results", async () => {
373+
const userID = "m-user"
374+
const assistantID = "m-assistant"
375+
const next = {
376+
...model,
377+
api: {
378+
...model.api,
379+
npm: "@ai-sdk/openai-compatible",
380+
},
381+
} as Provider.Model
382+
383+
const input: MessageV2.WithParts[] = [
384+
{
385+
info: userInfo(userID),
386+
parts: [
387+
{
388+
...basePart(userID, "u1"),
389+
type: "text",
390+
text: "read video",
391+
},
392+
] as MessageV2.Part[],
393+
},
394+
{
395+
info: assistantInfo(assistantID, userID),
396+
parts: [
397+
{
398+
...basePart(assistantID, "a1"),
399+
type: "tool",
400+
callID: "call-1",
401+
tool: "read",
402+
state: {
403+
status: "completed",
404+
input: { filePath: "/tmp/clip.mp4" },
405+
output: "Video read successfully",
406+
title: "clip.mp4",
407+
metadata: {},
408+
time: { start: 0, end: 1 },
409+
attachments: [
410+
{
411+
...basePart(assistantID, "file-1"),
412+
type: "file",
413+
mime: "video/mp4",
414+
filename: "clip.mp4",
415+
url: "data:video/mp4;base64,Zm9v",
416+
},
417+
],
418+
},
419+
},
420+
] as MessageV2.Part[],
421+
},
422+
]
423+
424+
expect(await MessageV2.toModelMessages(input, next)).toStrictEqual([
425+
{
426+
role: "user",
427+
content: [{ type: "text", text: "read video" }],
428+
},
429+
{
430+
role: "assistant",
431+
content: [
432+
{
433+
type: "tool-call",
434+
toolCallId: "call-1",
435+
toolName: "read",
436+
input: { filePath: "/tmp/clip.mp4" },
437+
providerExecuted: undefined,
438+
},
439+
],
440+
},
441+
{
442+
role: "tool",
443+
content: [
444+
{
445+
type: "tool-result",
446+
toolCallId: "call-1",
447+
toolName: "read",
448+
output: { type: "text", value: "Video read successfully" },
449+
},
450+
],
451+
},
452+
{
453+
role: "user",
454+
content: [
455+
{ type: "text", text: MessageV2.SYNTHETIC_ATTACHMENT_PROMPT },
456+
{
457+
type: "file",
458+
data: "data:video/mp4;base64,Zm9v",
459+
filename: undefined,
460+
mediaType: "video/mp4",
461+
},
462+
],
463+
},
464+
])
465+
})
466+
362467
test("omits provider metadata when assistant model differs", async () => {
363468
const userID = "m-user"
364469
const assistantID = "m-assistant"

packages/opencode/test/tool/read.test.ts

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import { afterEach, describe, expect } from "bun:test"
22
import { Cause, Effect, Exit, Layer } from "effect"
3+
import { truncate as resize } from "fs/promises"
34
import path from "path"
45
import { Agent } from "../../src/agent/agent"
56
import * as CrossSpawnSpawner from "../../src/effect/cross-spawn-spawner"
67
import { AppFileSystem } from "../../src/filesystem"
78
import { FileTime } from "../../src/file/time"
9+
import { Flag } from "../../src/flag/flag"
810
import { LSP } from "../../src/lsp"
911
import { Permission } from "../../src/permission"
1012
import { Instance } from "../../src/project/instance"
@@ -409,6 +411,69 @@ describe("tool.read truncation", () => {
409411
}),
410412
)
411413

414+
it.live("attaches PDFs, video, and audio as file attachments", () =>
415+
Effect.gen(function* () {
416+
const dir = yield* tmpdirScoped()
417+
const cases = [
418+
{ file: "doc.pdf", mime: "application/pdf", msg: "PDF read successfully" },
419+
{ file: "clip.mp4", mime: "video/mp4", msg: "Video read successfully" },
420+
{ file: "clip.webm", mime: "video/webm", msg: "Video read successfully" },
421+
{ file: "sound.mp3", mime: "audio/mpeg", msg: "Audio read successfully" },
422+
]
423+
424+
yield* Effect.forEach(
425+
cases,
426+
(item) =>
427+
Effect.gen(function* () {
428+
yield* put(path.join(dir, item.file), "media")
429+
const result = yield* exec(dir, { filePath: path.join(dir, item.file) })
430+
431+
expect(result.output).toBe(item.msg)
432+
expect(result.metadata.preview).toBe(item.msg)
433+
expect(result.metadata.truncated).toBe(false)
434+
expect(result.attachments).toBeDefined()
435+
expect(result.attachments?.[0].type).toBe("file")
436+
expect(result.attachments?.[0].mime).toBe(item.mime)
437+
expect(result.attachments?.[0].url).toStartWith(`data:${item.mime};base64,`)
438+
}),
439+
{ concurrency: "unbounded" },
440+
)
441+
}),
442+
)
443+
444+
it.live("rejects oversized media attachments before reading content", () =>
445+
Effect.gen(function* () {
446+
const dir = yield* tmpdirScoped()
447+
const file = path.join(dir, "large.mp4")
448+
yield* put(file, "")
449+
yield* Effect.promise(() => resize(file, 256 * 1024 * 1024 + 1))
450+
451+
const err = yield* fail(dir, { filePath: file })
452+
expect(err.message).toContain("Cannot attach video file larger than 256.0 MB")
453+
expect(err.message).toContain(file)
454+
}),
455+
)
456+
457+
it.live("uses OPENCODE_READ_MAX_ATTACHMENT_BYTES for media attachment limit", () =>
458+
Effect.gen(function* () {
459+
const prev = Flag.OPENCODE_READ_MAX_ATTACHMENT_BYTES
460+
try {
461+
// @ts-expect-error tests can override static env flags
462+
Flag.OPENCODE_READ_MAX_ATTACHMENT_BYTES = 4
463+
const dir = yield* tmpdirScoped()
464+
const file = path.join(dir, "small.mp4")
465+
yield* put(file, "media")
466+
467+
const err = yield* fail(dir, { filePath: file })
468+
expect(err.message).toContain("Cannot attach video file larger than 4 B")
469+
expect(err.message).toContain("(5 B)")
470+
} finally {
471+
// @ts-expect-error tests can override static env flags
472+
Flag.OPENCODE_READ_MAX_ATTACHMENT_BYTES = prev
473+
}
474+
}),
475+
)
476+
412477
it.live(".fbs files (FlatBuffers schema) are read as text, not images", () =>
413478
Effect.gen(function* () {
414479
const dir = yield* tmpdirScoped()

0 commit comments

Comments
 (0)