From a3c1a022937d7d7c243c36f1308cb042306c9274 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 29 Mar 2026 02:34:08 +0000
Subject: [PATCH 1/2] feat(core): preserve multimodal content blocks in
 provider responses

Update Claude and Pi providers to preserve non-text content blocks
(images) in Message.content instead of discarding them via
extractTextContent(). This enables multimodal content to flow from
provider response through to evaluators.

Changes:
- Create shared claude-content.ts with toContentArray() and
  extractTextContent() used by all 3 Claude providers
- Update claude-cli, claude-sdk, claude providers to use
  structuredContent ?? textContent pattern
- Add toPiContentArray() to pi-utils.ts for Pi provider
- Update pi-coding-agent convertAgentMessage() to preserve
  structured content
- Add 23 unit tests covering content preservation, backward
  compat, and end-to-end multimodal flow

Text-only responses still produce plain strings (no unnecessary
wrapping). extractTextContent() remains available for backward
compatibility.

Closes #818

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../src/evaluation/providers/claude-cli.ts    |  68 +----
 .../evaluation/providers/claude-content.ts    |  94 ++++++
 .../src/evaluation/providers/claude-sdk.ts    |  28 +-
 .../core/src/evaluation/providers/claude.ts   |  28 +-
 .../evaluation/providers/pi-coding-agent.ts   |   5 +-
 .../core/src/evaluation/providers/pi-utils.ts |  50 +++
 .../providers/content-preserve.test.ts        | 288 ++++++++++++++++++
 7 files changed, 442 insertions(+), 119 deletions(-)
 create mode 100644 packages/core/src/evaluation/providers/claude-content.ts
 create mode 100644 packages/core/test/evaluation/providers/content-preserve.test.ts

diff --git a/packages/core/src/evaluation/providers/claude-cli.ts b/packages/core/src/evaluation/providers/claude-cli.ts
index d400c2069..1699810dd 100644
--- a/packages/core/src/evaluation/providers/claude-cli.ts
+++ b/packages/core/src/evaluation/providers/claude-cli.ts
@@ -5,7 +5,7 @@ import type { WriteStream } from 'node:fs';
 import { mkdir } from 'node:fs/promises';
 import path from 'node:path';
 
-import type { Content } from '../content.js';
+import { extractTextContent, toContentArray } from './claude-content.js';
 import { recordClaudeLogEntry } from './claude-log-tracker.js';
 import { buildPromptDocument, normalizeInputFiles } from './preread.js';
 import type { ClaudeResolvedConfig } from './targets.js';
@@ -479,72 +479,6 @@ function summarizeEvent(event: Record<string, unknown>): string | undefined {
   }
 }
 
-/**
- * Convert Claude's content array to Content[] preserving non-text blocks.
- * Returns undefined if content is a plain string or has only text blocks
- * (no benefit over the simpler string representation).
- */
-function toContentArray(content: unknown): Content[] | undefined {
-  if (!Array.isArray(content)) return undefined;
-
-  let hasNonText = false;
-  const blocks: Content[] = [];
-
-  for (const part of content) {
-    if (!part || typeof part !== 'object') continue;
-    const p = part as Record<string, unknown>;
-
-    if (p.type === 'text' && typeof p.text === 'string') {
-      blocks.push({ type: 'text', text: p.text });
-    } else if (p.type === 'image' && typeof p.source === 'object' && p.source !== null) {
-      const src = p.source as Record<string, unknown>;
-      const mediaType =
-        typeof p.media_type === 'string'
-          ? p.media_type
-          : typeof src.media_type === 'string'
-            ? src.media_type
-            : 'application/octet-stream';
-      const data =
-        typeof src.data === 'string'
-          ? `data:${mediaType};base64,${src.data}`
-          : typeof p.url === 'string'
-            ? (p.url as string)
-            : '';
-      blocks.push({ type: 'image', media_type: mediaType, source: data });
-      hasNonText = true;
-    } else if (p.type === 'tool_use') {
-      // tool_use blocks are handled separately as ToolCall — skip
-    } else if (p.type === 'tool_result') {
-      // tool_result blocks are not user content — skip
-    }
-  }
-
-  return hasNonText && blocks.length > 0 ? blocks : undefined;
-}
-
-/**
- * Extract text content from Claude's content array format.
- */
-function extractTextContent(content: unknown): string | undefined {
-  if (typeof content === 'string') {
-    return content;
-  }
-  if (!Array.isArray(content)) {
-    return undefined;
-  }
-  const textParts: string[] = [];
-  for (const part of content) {
-    if (!part || typeof part !== 'object') {
-      continue;
-    }
-    const p = part as Record<string, unknown>;
-    if (p.type === 'text' && typeof p.text === 'string') {
-      textParts.push(p.text);
-    }
-  }
-  return textParts.length > 0 ? textParts.join('\n') : undefined;
-}
-
 /**
  * Extract tool calls from Claude's content array format.
  */
diff --git a/packages/core/src/evaluation/providers/claude-content.ts b/packages/core/src/evaluation/providers/claude-content.ts
new file mode 100644
index 000000000..889029fc9
--- /dev/null
+++ b/packages/core/src/evaluation/providers/claude-content.ts
@@ -0,0 +1,94 @@
+/**
+ * Shared content-mapping utilities for Claude-based providers.
+ *
+ * Converts Claude's raw content array format (Anthropic API) into the AgentV
+ * Content[] union so that non-text blocks (images) flow through the pipeline
+ * without lossy flattening.
+ *
+ * Used by: claude-cli, claude-sdk, claude (legacy).
+ *
+ * ## Claude content format
+ *
+ * Claude responses use:
+ * ```json
+ * { "content": [
+ *     { "type": "text", "text": "..." },
+ *     { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": "..." } },
+ *     { "type": "tool_use", "name": "...", "input": {...}, "id": "..." }
+ * ]}
+ * ```
+ *
+ * `toContentArray` maps text and image blocks to `Content[]`.
+ * `tool_use` and `tool_result` blocks are handled separately as `ToolCall`.
+ */
+
+import type { Content } from '../content.js';
+
+/**
+ * Convert Claude's raw content array to `Content[]`, preserving non-text blocks.
+ *
+ * Returns `undefined` when the content is a plain string or contains only text
+ * blocks — callers should fall back to the text-only string representation in
+ * that case (no benefit from wrapping plain text in `Content[]`).
+ */
+export function toContentArray(content: unknown): Content[] | undefined {
+  if (!Array.isArray(content)) return undefined;
+
+  let hasNonText = false;
+  const blocks: Content[] = [];
+
+  for (const part of content) {
+    if (!part || typeof part !== 'object') continue;
+    const p = part as Record<string, unknown>;
+
+    if (p.type === 'text' && typeof p.text === 'string') {
+      blocks.push({ type: 'text', text: p.text });
+    } else if (p.type === 'image' && typeof p.source === 'object' && p.source !== null) {
+      const src = p.source as Record<string, unknown>;
+      const mediaType =
+        typeof p.media_type === 'string'
+          ? p.media_type
+          : typeof src.media_type === 'string'
+            ? src.media_type
+            : 'application/octet-stream';
+      const data =
+        typeof src.data === 'string'
+          ? `data:${mediaType};base64,${src.data}`
+          : typeof p.url === 'string'
+            ? (p.url as string)
+            : '';
+      blocks.push({ type: 'image', media_type: mediaType, source: data });
+      hasNonText = true;
+    } else if (p.type === 'tool_use') {
+      // tool_use blocks are handled separately as ToolCall — skip
+    } else if (p.type === 'tool_result') {
+      // tool_result blocks are not user content — skip
+    }
+  }
+
+  return hasNonText && blocks.length > 0 ? blocks : undefined;
+}
+
+/**
+ * Extract text content from Claude's content array format.
+ * Returns joined text from all `type: 'text'` blocks (newline-separated).
+ */
+export function extractTextContent(content: unknown): string | undefined {
+  if (typeof content === 'string') {
+    return content;
+  }
+  if (!Array.isArray(content)) {
+    return undefined;
+  }
+  const textParts: string[] = [];
+  for (const part of content) {
+    if (!part || typeof part !== 'object') {
+      continue;
+    }
+    const p = part as Record<string, unknown>;
+    if (p.type === 'text' && typeof p.text === 'string') {
+      textParts.push(p.text);
+    }
+  }
+  return textParts.length > 0 ? textParts.join('\n') : undefined;
+}
diff --git a/packages/core/src/evaluation/providers/claude-sdk.ts b/packages/core/src/evaluation/providers/claude-sdk.ts
index aab8cc16b..6e8985fa4 100644
--- a/packages/core/src/evaluation/providers/claude-sdk.ts
+++ b/packages/core/src/evaluation/providers/claude-sdk.ts
@@ -4,6 +4,7 @@ import type { WriteStream } from 'node:fs';
 import { mkdir } from 'node:fs/promises';
 import path from 'node:path';
 
+import { extractTextContent, toContentArray } from './claude-content.js';
 import { recordClaudeLogEntry } from './claude-log-tracker.js';
 import { buildPromptDocument, normalizeInputFiles } from './preread.js';
 import type { ClaudeResolvedConfig } from './targets.js';
@@ -139,12 +140,13 @@ export class ClaudeSdkProvider implements Provider {
             if (betaMessage && typeof betaMessage === 'object') {
               const msg = betaMessage as Record<string, unknown>;
               const content = msg.content;
+              const structuredContent = toContentArray(content);
               const textContent = extractTextContent(content);
               const toolCalls = extractToolCalls(content);
 
               const outputMsg: Message = {
                 role: 'assistant',
-                content: textContent,
+                content: structuredContent ?? textContent,
                 toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
               };
               output.push(outputMsg);
@@ -280,30 +282,6 @@ export class ClaudeSdkProvider implements Provider {
   }
 }
 
-/**
- * Extract text content from Claude's content array format.
- * Claude uses: content: [{ type: "text", text: "..." }, ...]
- */
-function extractTextContent(content: unknown): string | undefined {
-  if (typeof content === 'string') {
-    return content;
-  }
-  if (!Array.isArray(content)) {
-    return undefined;
-  }
-  const textParts: string[] = [];
-  for (const part of content) {
-    if (!part || typeof part !== 'object') {
-      continue;
-    }
-    const p = part as Record<string, unknown>;
-    if (p.type === 'text' && typeof p.text === 'string') {
-      textParts.push(p.text);
-    }
-  }
-  return textParts.length > 0 ? textParts.join('\n') : undefined;
-}
-
 /**
  * Extract tool calls from Claude's content array format.
  * Claude uses: content: [{ type: "tool_use", name: "...", input: {...}, id: "..." }, ...]
diff --git a/packages/core/src/evaluation/providers/claude.ts b/packages/core/src/evaluation/providers/claude.ts
index 62382a604..2ac222e4f 100644
--- a/packages/core/src/evaluation/providers/claude.ts
+++ b/packages/core/src/evaluation/providers/claude.ts
@@ -4,6 +4,7 @@ import type { WriteStream } from 'node:fs';
 import { mkdir } from 'node:fs/promises';
 import path from 'node:path';
 
+import { extractTextContent, toContentArray } from './claude-content.js';
 import { recordClaudeLogEntry } from './claude-log-tracker.js';
 import { buildPromptDocument, normalizeInputFiles } from './preread.js';
 import type { ClaudeResolvedConfig } from './targets.js';
@@ -139,12 +140,13 @@ export class ClaudeProvider implements Provider {
             if (betaMessage && typeof betaMessage === 'object') {
               const msg = betaMessage as Record<string, unknown>;
               const content = msg.content;
+              const structuredContent = toContentArray(content);
               const textContent = extractTextContent(content);
               const toolCalls = extractToolCalls(content);
 
               const outputMsg: Message = {
                 role: 'assistant',
-                content: textContent,
+                content: structuredContent ?? textContent,
                 toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
               };
               output.push(outputMsg);
@@ -278,30 +280,6 @@ export class ClaudeProvider implements Provider {
   }
 }
 
-/**
- * Extract text content from Claude's content array format.
- * Claude uses: content: [{ type: "text", text: "..." }, ...]
- */
-function extractTextContent(content: unknown): string | undefined {
-  if (typeof content === 'string') {
-    return content;
-  }
-  if (!Array.isArray(content)) {
-    return undefined;
-  }
-  const textParts: string[] = [];
-  for (const part of content) {
-    if (!part || typeof part !== 'object') {
-      continue;
-    }
-    const p = part as Record<string, unknown>;
-    if (p.type === 'text' && typeof p.text === 'string') {
-      textParts.push(p.text);
-    }
-  }
-  return textParts.length > 0 ? textParts.join('\n') : undefined;
-}
-
 /**
  * Extract tool calls from Claude's content array format.
  * Claude uses: content: [{ type: "tool_use", name: "...", input: {...}, id: "..." }, ...]
diff --git a/packages/core/src/evaluation/providers/pi-coding-agent.ts b/packages/core/src/evaluation/providers/pi-coding-agent.ts
index 1c92f92cd..3e4691bd0 100644
--- a/packages/core/src/evaluation/providers/pi-coding-agent.ts
+++ b/packages/core/src/evaluation/providers/pi-coding-agent.ts
@@ -18,7 +18,7 @@ import { createInterface } from 'node:readline';
 import { fileURLToPath } from 'node:url';
 
 import { recordPiLogEntry } from './pi-log-tracker.js';
-import { extractPiTextContent, toFiniteNumber } from './pi-utils.js';
+import { extractPiTextContent, toFiniteNumber, toPiContentArray } from './pi-utils.js';
 import { normalizeInputFiles } from './preread.js';
 import type { PiCodingAgentResolvedConfig } from './targets.js';
 import type {
@@ -564,7 +564,8 @@ function convertAgentMessage(
 
   const msg = message as Record<string, unknown>;
   const role = typeof msg.role === 'string' ? msg.role : 'unknown';
-  const content = extractPiTextContent(msg.content);
+  const structuredContent = toPiContentArray(msg.content);
+  const content = structuredContent ?? extractPiTextContent(msg.content);
   const toolCalls = extractToolCalls(msg.content, toolTrackers, completedToolResults);
   const startTimeVal =
     typeof msg.timestamp === 'number'
diff --git a/packages/core/src/evaluation/providers/pi-utils.ts b/packages/core/src/evaluation/providers/pi-utils.ts
index 058720870..3ea78d3d8 100644
--- a/packages/core/src/evaluation/providers/pi-utils.ts
+++ b/packages/core/src/evaluation/providers/pi-utils.ts
@@ -5,6 +5,8 @@
  * and safe numeric conversions.
  */
 
+import type { Content } from '../content.js';
+
 /**
  * Extract text content from Pi's content array format.
  * Pi uses: content: [{ type: "text", text: "..." }, ...]
@@ -32,6 +34,54 @@ export function extractPiTextContent(content: unknown): string | undefined {
   return textParts.length > 0 ? textParts.join('\n') : undefined;
 }
 
+/**
+ * Convert Pi's content array to `Content[]`, preserving non-text blocks.
+ *
+ * Returns `undefined` when content is a plain string or contains only text
+ * blocks — callers should fall back to the text-only string representation.
+ */
+export function toPiContentArray(content: unknown): Content[] | undefined {
+  if (!Array.isArray(content)) return undefined;
+
+  let hasNonText = false;
+  const blocks: Content[] = [];
+
+  for (const part of content) {
+    if (!part || typeof part !== 'object') continue;
+    const p = part as Record<string, unknown>;
+
+    if (p.type === 'text' && typeof p.text === 'string') {
+      blocks.push({ type: 'text', text: p.text });
+    } else if (p.type === 'image') {
+      const mediaType =
+        typeof p.media_type === 'string' ? p.media_type : 'application/octet-stream';
+
+      let source = '';
+      if (typeof p.source === 'object' && p.source !== null) {
+        const src = p.source as Record<string, unknown>;
+        const srcMediaType =
+          typeof src.media_type === 'string' ? src.media_type : mediaType;
+        source =
+          typeof src.data === 'string'
+            ? `data:${srcMediaType};base64,${src.data}`
+            : '';
+      }
+      if (!source && typeof p.url === 'string') {
+        source = p.url;
+      }
+
+      if (source) {
+        blocks.push({ type: 'image', media_type: mediaType, source });
+        hasNonText = true;
+      }
+    } else if (p.type === 'tool_use' || p.type === 'tool_result') {
+      // Handled separately — skip
+    }
+  }
+
+  return hasNonText && blocks.length > 0 ? blocks : undefined;
+}
+
 /**
  * Safely convert an unknown value to a finite number, or undefined.
  */
diff --git a/packages/core/test/evaluation/providers/content-preserve.test.ts b/packages/core/test/evaluation/providers/content-preserve.test.ts
new file mode 100644
index 000000000..626ed0e35
--- /dev/null
+++ b/packages/core/test/evaluation/providers/content-preserve.test.ts
@@ -0,0 +1,288 @@
+import { describe, expect, it } from 'vitest';
+
+import { getTextContent } from '../../../src/evaluation/content.js';
+import {
+  extractTextContent,
+  toContentArray,
+} from '../../../src/evaluation/providers/claude-content.js';
+import {
+  extractPiTextContent,
+  toPiContentArray,
+} from '../../../src/evaluation/providers/pi-utils.js';
+import type { Content } from '../../../src/evaluation/content.js';
+import type { Message } from '../../../src/evaluation/providers/types.js';
+
+// ---------------------------------------------------------------------------
+// toContentArray (Claude)
+// ---------------------------------------------------------------------------
+describe('toContentArray', () => {
+  it('returns undefined for non-array input', () => {
+    expect(toContentArray('plain string')).toBeUndefined();
+    expect(toContentArray(42)).toBeUndefined();
+    expect(toContentArray(null)).toBeUndefined();
+    expect(toContentArray(undefined)).toBeUndefined();
+  });
+
+  it('returns undefined when content has only text blocks', () => {
+    const content = [
+      { type: 'text', text: 'hello' },
+      { type: 'text', text: 'world' },
+    ];
+    expect(toContentArray(content)).toBeUndefined();
+  });
+
+  it('preserves image + text with base64 data', () => {
+    const content = [
+      { type: 'text', text: 'Here is an image:' },
+      {
+        type: 'image',
+        source: { type: 'base64', media_type: 'image/png', data: 'abc123' },
+      },
+    ];
+    const result = toContentArray(content);
+    expect(result).toBeDefined();
+    expect(result).toHaveLength(2);
+    expect(result![0]).toEqual({ type: 'text', text: 'Here is an image:' });
+    expect(result![1]).toEqual({
+      type: 'image',
+      media_type: 'image/png',
+      source: 'data:image/png;base64,abc123',
+    });
+  });
+
+  it('handles url images', () => {
+    const content = [
+      {
+        type: 'image',
+        url: 'https://example.com/img.png',
+        source: { type: 'url' },
+        media_type: 'image/png',
+      },
+    ];
+    const result = toContentArray(content);
+    expect(result).toBeDefined();
+    expect(result![0]).toEqual({
+      type: 'image',
+      media_type: 'image/png',
+      source: 'https://example.com/img.png',
+    });
+  });
+
+  it('skips tool_use and tool_result blocks', () => {
+    const content = [
+      { type: 'text', text: 'hi' },
+      { type: 'tool_use', name: 'bash', input: { cmd: 'ls' }, id: 't1' },
+      { type: 'tool_result', tool_use_id: 't1', content: 'ok' },
+      {
+        type: 'image',
+        source: { data: 'AAAA', media_type: 'image/jpeg' },
+      },
+    ];
+    const result = toContentArray(content);
+    expect(result).toBeDefined();
+    expect(result).toHaveLength(2);
+    expect(result![0]).toEqual({ type: 'text', text: 'hi' });
+    expect(result![1].type).toBe('image');
+  });
+
+  it('handles invalid parts gracefully', () => {
+    const content = [null, undefined, 42, 'string', { type: 'text', text: 'ok' }];
+    // only text → undefined (no non-text blocks)
+    expect(toContentArray(content)).toBeUndefined();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// extractTextContent (Claude)
+// ---------------------------------------------------------------------------
+describe('extractTextContent', () => {
+  it('passes through a plain string', () => {
+    expect(extractTextContent('hello')).toBe('hello');
+  });
+
+  it('returns undefined for non-array non-string', () => {
+    expect(extractTextContent(42)).toBeUndefined();
+    expect(extractTextContent(null)).toBeUndefined();
+    expect(extractTextContent(undefined)).toBeUndefined();
+    expect(extractTextContent({})).toBeUndefined();
+  });
+
+  it('extracts text from content array', () => {
+    const content = [
+      { type: 'text', text: 'hello' },
+      { type: 'text', text: 'world' },
+    ];
+    expect(extractTextContent(content)).toBe('hello\nworld');
+  });
+
+  it('skips non-text blocks', () => {
+    const content = [
+      { type: 'text', text: 'hello' },
+      { type: 'image', source: { data: 'abc' } },
+      { type: 'tool_use', name: 'bash' },
+    ];
+    expect(extractTextContent(content)).toBe('hello');
+  });
+
+  it('returns undefined for empty array', () => {
+    expect(extractTextContent([])).toBeUndefined();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// toPiContentArray
+// ---------------------------------------------------------------------------
+describe('toPiContentArray', () => {
+  it('returns undefined for non-array input', () => {
+    expect(toPiContentArray('plain string')).toBeUndefined();
+    expect(toPiContentArray(42)).toBeUndefined();
+    expect(toPiContentArray(null)).toBeUndefined();
+  });
+
+  it('returns undefined when content has only text blocks', () => {
+    const content = [
+      { type: 'text', text: 'hello' },
+      { type: 'text', text: 'world' },
+    ];
+    expect(toPiContentArray(content)).toBeUndefined();
+  });
+
+  it('preserves image + text with base64 source', () => {
+    const content = [
+      { type: 'text', text: 'Here is an image:' },
+      {
+        type: 'image',
+        media_type: 'image/png',
+        source: { data: 'abc123', media_type: 'image/png' },
+      },
+    ];
+    const result = toPiContentArray(content);
+    expect(result).toBeDefined();
+    expect(result).toHaveLength(2);
+    expect(result![0]).toEqual({ type: 'text', text: 'Here is an image:' });
+    expect(result![1]).toEqual({
+      type: 'image',
+      media_type: 'image/png',
+      source: 'data:image/png;base64,abc123',
+    });
+  });
+
+  it('handles url images', () => {
+    const content = [
+      {
+        type: 'image',
+        url: 'https://example.com/img.png',
+        media_type: 'image/png',
+      },
+    ];
+    const result = toPiContentArray(content);
+    expect(result).toBeDefined();
+    expect(result![0]).toEqual({
+      type: 'image',
+      media_type: 'image/png',
+      source: 'https://example.com/img.png',
+    });
+  });
+
+  it('skips tool_use and tool_result blocks', () => {
+    const content = [
+      { type: 'text', text: 'hi' },
+      { type: 'tool_use', name: 'bash' },
+      { type: 'tool_result', content: 'ok' },
+      {
+        type: 'image',
+        media_type: 'image/jpeg',
+        source: { data: 'AAAA', media_type: 'image/jpeg' },
+      },
+    ];
+    const result = toPiContentArray(content);
+    expect(result).toBeDefined();
+    expect(result).toHaveLength(2);
+    expect(result![0]).toEqual({ type: 'text', text: 'hi' });
+    expect(result![1].type).toBe('image');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// extractPiTextContent (backward compat)
+// ---------------------------------------------------------------------------
+describe('extractPiTextContent', () => {
+  it('passes through a plain string', () => {
+    expect(extractPiTextContent('hello')).toBe('hello');
+  });
+
+  it('extracts text from content array', () => {
+    const content = [
+      { type: 'text', text: 'hello' },
+      { type: 'text', text: 'world' },
+    ];
+    expect(extractPiTextContent(content)).toBe('hello\nworld');
+  });
+
+  it('returns undefined for non-array non-string', () => {
+    expect(extractPiTextContent(42)).toBeUndefined();
+    expect(extractPiTextContent(null)).toBeUndefined();
+  });
+
+  it('returns undefined for empty array', () => {
+    expect(extractPiTextContent([])).toBeUndefined();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// End-to-end: Content[] interop
+// ---------------------------------------------------------------------------
+describe('End-to-end content preservation', () => {
+  it('Content[] is compatible with getTextContent', () => {
+    const blocks: Content[] = [
+      { type: 'text', text: 'hello' },
+      { type: 'image', media_type: 'image/png', source: 'data:image/png;base64,abc' },
+      { type: 'text', text: 'world' },
+    ];
+    expect(getTextContent(blocks)).toBe('hello\nworld');
+  });
+
+  it('image block survives into Message.content', () => {
+    const rawClaudeContent = [
+      { type: 'text', text: 'Look at this:' },
+      {
+        type: 'image',
+        source: { type: 'base64', media_type: 'image/png', data: 'DEADBEEF' },
+      },
+    ];
+
+    const structuredContent = toContentArray(rawClaudeContent);
+    const textContent = extractTextContent(rawClaudeContent);
+
+    const msg: Message = {
+      role: 'assistant',
+      content: structuredContent ?? textContent,
+    };
+
+    // content should be Content[] (not flattened to string)
+    expect(Array.isArray(msg.content)).toBe(true);
+    const blocks = msg.content as Content[];
+    expect(blocks).toHaveLength(2);
+    expect(blocks[0]).toEqual({ type: 'text', text: 'Look at this:' });
+    expect(blocks[1].type).toBe('image');
+    expect((blocks[1] as { source: string }).source).toContain('base64,DEADBEEF');
+  });
+
+  it('text-only content falls back to string', () => {
+    const rawClaudeContent = [
+      { type: 'text', text: 'Just text' },
+    ];
+
+    const structuredContent = toContentArray(rawClaudeContent);
+    const textContent = extractTextContent(rawClaudeContent);
+
+    const msg: Message = {
+      role: 'assistant',
+      content: structuredContent ?? textContent,
+    };
+
+    // text-only → toContentArray returns undefined → falls back to string
+    expect(typeof msg.content).toBe('string');
+    expect(msg.content).toBe('Just text');
+  });
+});

From 87cce5b3f73eafb9da822a30e27184395e796cad Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 29 Mar 2026 03:07:11 +0000
Subject: [PATCH 2/2] =?UTF-8?q?feat(eval):=20code=20grader=20multimodal=20?=
 =?UTF-8?q?=E2=80=94=20structured=20Content=20in=20CodeGraderInput?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add ContentTextSchema, ContentImageSchema, ContentFileSchema, ContentSchema
  as Zod discriminated union in packages/eval/src/schemas.ts
- Update MessageSchema.content to accept string | Content[] (typed blocks)
- Add materializeContentForGrader() in code-evaluator.ts:
  - Data URI images decoded and written to temp files (path, not base64)
  - Non-URI images pass source through as path field
  - Text/file blocks unchanged; string content unchanged
- Lazy temp dir creation for image files, cleaned up in finally block
- Export Content schemas and types from @agentv/eval
- Add comprehensive unit tests for schema validation and materialization
- Add integration tests for CodeEvaluator with multimodal output

Closes #821

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../evaluation/evaluators/code-evaluator.ts   | 109 +++++-
 .../code-evaluator-multimodal.test.ts         | 338 ++++++++++++++++++
 packages/eval/src/index.ts                    |   8 +
 packages/eval/src/schemas.ts                  |  60 +++-
 packages/eval/test/define-code-grader.test.ts | 162 +++++++++
 5 files changed, 673 insertions(+), 4 deletions(-)
 create mode 100644 packages/core/test/evaluation/code-evaluator-multimodal.test.ts

diff --git a/packages/core/src/evaluation/evaluators/code-evaluator.ts b/packages/core/src/evaluation/evaluators/code-evaluator.ts
index c2410924b..980816731 100644
--- a/packages/core/src/evaluation/evaluators/code-evaluator.ts
+++ b/packages/core/src/evaluation/evaluators/code-evaluator.ts
@@ -9,6 +9,7 @@ import {
   createTargetProxy,
 } from '../../runtime/target-proxy.js';
 import { toSnakeCaseDeep } from '../case-conversion.js';
+import { type ContentImage, isContentArray } from '../content.js';
 import type { AssertionEntry, JsonObject, TargetAccessConfig } from '../types.js';
 import { clampScore, isNonEmptyString, parseJsonSafe, scoreToVerdict } from './scoring.js';
 import type { EvaluationContext, EvaluationScore, Evaluator } from './types.js';
@@ -16,6 +17,83 @@ import type { EvaluationContext, EvaluationScore, Evaluator } from './types.js';
 /** Threshold in bytes above which output is written to a temp file instead of inlined. */
 const FILE_BACKED_OUTPUT_THRESHOLD = 50_000;
 
+/** Regex matching `data:<mediaType>;base64,<data>` URIs. */
+const DATA_URI_RE = /^data:([^;]+);base64,(.+)$/s;
+
+/**
+ * Convert ContentImage blocks in message arrays for code grader consumption.
+ *
+ * - Data URI images (`data:image/png;base64,...`) → decoded, written to temp file, replaced with file path.
+ * - Non-URI images (already a path or URL) → `source` carried through as `path`.
+ * - ContentText, ContentFile blocks → passed through unchanged.
+ * - Messages with plain string content → passed through unchanged.
+ *
+ * Returns the original array when no image blocks exist (zero-copy fast path).
+ */
+export async function materializeContentForGrader(
+  messages: readonly Record<string, unknown>[] | null | undefined,
+  getWorkDir: () => Promise<string>,
+): Promise<readonly Record<string, unknown>[] | null> {
+  if (!messages || messages.length === 0) return messages ?? null;
+
+  // Fast path: skip if no image blocks exist
+  let hasAnyImage = false;
+  for (const msg of messages) {
+    if (isContentArray(msg.content)) {
+      for (const block of msg.content) {
+        if (block.type === 'image') {
+          hasAnyImage = true;
+          break;
+        }
+      }
+    }
+    if (hasAnyImage) break;
+  }
+  if (!hasAnyImage) return messages;
+
+  let counter = 0;
+  const result: Record<string, unknown>[] = [];
+
+  for (const msg of messages) {
+    if (!isContentArray(msg.content)) {
+      result.push(msg);
+      continue;
+    }
+
+    if (!msg.content.some((b) => b.type === 'image')) {
+      result.push(msg);
+      continue;
+    }
+
+    const blocks: Record<string, unknown>[] = [];
+    for (const block of msg.content) {
+      if (block.type !== 'image') {
+        blocks.push({ ...block });
+        continue;
+      }
+
+      const img = block as ContentImage;
+      const match = DATA_URI_RE.exec(img.source);
+
+      if (match) {
+        const [, mediaType, base64Data] = match;
+        const ext = mediaType.split('/')[1] === 'jpeg' ? 'jpg' : (mediaType.split('/')[1] ?? 'bin');
+        const dir = await getWorkDir();
+        const filePath = join(dir, `img-${counter++}.${ext}`);
+        await writeFile(filePath, Buffer.from(base64Data, 'base64'));
+        blocks.push({ type: 'image', media_type: img.media_type, path: filePath });
+      } else {
+        // Already a path or URL → carry through as path
+        blocks.push({ type: 'image', media_type: img.media_type, path: img.source });
+      }
+    }
+
+    result.push({ ...msg, content: blocks });
+  }
+
+  return result;
+}
+
 export interface CodeEvaluatorOptions {
   readonly command: readonly string[];
   /** @deprecated Use `command` instead */
@@ -46,8 +124,23 @@ export class CodeEvaluator implements Evaluator {
   }
 
   async evaluate(context: EvaluationContext): Promise<EvaluationScore> {
+    // Lazy temp dir for materialized image files
+    let imageTmpDir: string | undefined;
+    const getImageDir = async () => {
+      if (!imageTmpDir) {
+        imageTmpDir = await mkdtemp(join(tmpdir(), 'agentv-img-'));
+      }
+      return imageTmpDir;
+    };
+
+    // Materialize multimodal content (data URIs → temp files, source → path)
+    const materializedOutput = await materializeContentForGrader(
+      context.output as readonly Record<string, unknown>[] | undefined,
+      getImageDir,
+    );
+
     // Determine whether to use file-backed output for large payloads
-    let outputForPayload = context.output ?? null;
+    let outputForPayload: readonly Record<string, unknown>[] | null = materializedOutput;
     let outputPath: string | undefined;
 
     if (outputForPayload) {
@@ -63,11 +156,17 @@ export class CodeEvaluator implements Evaluator {
     // Build payload (camelCase internally, converted to snake_case for graders)
     const payload = {
       criteria: context.evalCase.criteria,
-      expectedOutput: context.evalCase.expected_output,
+      expectedOutput: await materializeContentForGrader(
+        context.evalCase.expected_output as readonly Record<string, unknown>[],
+        getImageDir,
+      ),
       output: outputForPayload,
       outputPath,
       inputFiles: context.evalCase.file_paths,
-      input: context.evalCase.input,
+      input: await materializeContentForGrader(
+        context.evalCase.input as readonly Record<string, unknown>[],
+        getImageDir,
+      ),
       trace: context.trace ?? null,
       tokenUsage: context.tokenUsage ?? null,
       costUsd: context.costUsd ?? null,
@@ -196,6 +295,10 @@ export class CodeEvaluator implements Evaluator {
       if (outputPath) {
         await rm(dirname(outputPath), { recursive: true, force: true }).catch(() => {});
       }
+      // Clean up temp dir for materialized images
+      if (imageTmpDir) {
+        await rm(imageTmpDir, { recursive: true, force: true }).catch(() => {});
+      }
     }
   }
 }
diff --git a/packages/core/test/evaluation/code-evaluator-multimodal.test.ts b/packages/core/test/evaluation/code-evaluator-multimodal.test.ts
new file mode 100644
index 000000000..29f14b75b
--- /dev/null
+++ b/packages/core/test/evaluation/code-evaluator-multimodal.test.ts
@@ -0,0 +1,338 @@
+import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
+import { existsSync, readFileSync, readdirSync } from 'node:fs';
+import { mkdtemp, rm, writeFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+
+import { materializeContentForGrader } from '../../src/evaluation/evaluators/code-evaluator.js';
+import { CodeEvaluator } from '../../src/evaluation/evaluators/code-evaluator.js';
+import type { EvalTest } from '../../src/evaluation/types.js';
+
+const baseTestCase: EvalTest = {
+  id: 'case-mm',
+  dataset: 'test-dataset',
+  question: 'Test question',
+  input: [{ role: 'user', content: 'Describe this image' }],
+  expected_output: [],
+  reference_answer: 'A chart',
+  file_paths: [],
+  criteria: 'Describes the image correctly',
+  evaluator: 'code-grader',
+};
+
+/** Encode a string as base64 data URI. */
+function toDataUri(mediaType: string, data: string): string {
+  return `data:${mediaType};base64,${Buffer.from(data).toString('base64')}`;
+}
+
+/** Create a grader script that echoes the parsed payload back as JSON. */
+async function createPayloadEchoGrader(dir: string): Promise<readonly string[]> {
+  const script = join(dir, 'echo-grader.js');
+  await writeFile(
+    script,
+    `const input = require('fs').readFileSync(0, 'utf8');
+const payload = JSON.parse(input);
+console.log(JSON.stringify({
+  score: 1.0,
+  assertions: [{ text: 'ok', passed: true }],
+  details: { payload },
+}));
+`,
+    'utf8',
+  );
+  return [process.execPath, script];
+}
+
+describe('materializeContentForGrader', () => {
+  let tmpDir: string;
+
+  beforeEach(async () => {
+    tmpDir = await mkdtemp(join(tmpdir(), 'materialize-test-'));
+  });
+
+  afterEach(async () => {
+    await rm(tmpDir, { recursive: true, force: true });
+  });
+
+  const getWorkDir = () => Promise.resolve(tmpDir);
+
+  it('returns null for null input', async () => {
+    const result = await materializeContentForGrader(null, getWorkDir);
+    expect(result).toBeNull();
+  });
+
+  it('returns null for undefined input', async () => {
+    const result = await materializeContentForGrader(undefined, getWorkDir);
+    expect(result).toBeNull();
+  });
+
+  it('passes through text-only messages unchanged', async () => {
+    const messages = [
+      { role: 'user', content: 'Hello' },
+      { role: 'assistant', content: 'Hi there' },
+    ] as Record<string, unknown>[];
+
+    const result = await materializeContentForGrader(messages, getWorkDir);
+    expect(result).toBe(messages); // Same reference — zero-copy
+  });
+
+  it('passes through Content[] with only text blocks unchanged', async () => {
+    const messages = [
+      {
+        role: 'assistant',
+        content: [
+          { type: 'text', text: 'paragraph 1' },
+          { type: 'text', text: 'paragraph 2' },
+        ],
+      },
+    ] as Record<string, unknown>[];
+
+    const result = await materializeContentForGrader(messages, getWorkDir);
+    expect(result).toBe(messages); // Same reference — no images
+  });
+
+  it('converts ContentImage data URI to temp file path', async () => {
+    const imageData = 'fake-png-data-for-testing';
+    const dataUri = toDataUri('image/png', imageData);
+
+    const messages = [
+      {
+        role: 'assistant',
+        content: [
+          { type: 'text', text: 'Here is a chart:' },
+          { type: 'image', media_type: 'image/png', source: dataUri },
+        ],
+      },
+    ] as Record<string, unknown>[];
+
+    const result = await materializeContentForGrader(messages, getWorkDir);
+    expect(result).not.toBe(messages); // New array — content was transformed
+
+    const content = (result?.[0] as Record<string, unknown>).content as Record<string, unknown>[];
+    expect(content).toHaveLength(2);
+
+    // Text block preserved
+    expect(content[0]).toEqual({ type: 'text', text: 'Here is a chart:' });
+
+    // Image block converted to path
+    const imgBlock = content[1];
+    expect(imgBlock.type).toBe('image');
+    expect(imgBlock.media_type).toBe('image/png');
+    expect(typeof imgBlock.path).toBe('string');
+    expect(imgBlock.path).toContain('img-0.png');
+    expect(imgBlock).not.toHaveProperty('source');
+
+    // Verify file was written with correct content
+    const filePath = imgBlock.path as string;
+    expect(existsSync(filePath)).toBe(true);
+    const fileContent = readFileSync(filePath);
+    expect(fileContent.toString()).toBe(imageData);
+  });
+
+  it('converts ContentImage path/URL source to path field', async () => {
+    const messages = [
+      {
+        role: 'assistant',
+        content: [
+          { type: 'text', text: 'Chart:' },
+          { type: 'image', media_type: 'image/png', source: '/workspace/chart.png' },
+        ],
+      },
+    ] as Record<string, unknown>[];
+
+    const result = await materializeContentForGrader(messages, getWorkDir);
+    const content = (result?.[0] as Record<string, unknown>).content as Record<string, unknown>[];
+    const imgBlock = content[1];
+
+    expect(imgBlock.type).toBe('image');
+    expect(imgBlock.media_type).toBe('image/png');
+    expect(imgBlock.path).toBe('/workspace/chart.png');
+    expect(imgBlock).not.toHaveProperty('source');
+  });
+
+  it('handles JPEG media type extension correctly', async () => {
+    const dataUri = toDataUri('image/jpeg', 'fake-jpeg');
+    const messages = [
+      {
+        role: 'assistant',
+        content: [{ type: 'image', media_type: 'image/jpeg', source: dataUri }],
+      },
+    ] as Record<string, unknown>[];
+
+    const result = await materializeContentForGrader(messages, getWorkDir);
+    const content = (result?.[0] as Record<string, unknown>).content as Record<string, unknown>[];
+    expect(content[0].path as string).toContain('.jpg');
+  });
+
+  it('preserves non-content message fields', async () => {
+    const dataUri = toDataUri('image/png', 'data');
+    const messages = [
+      {
+        role: 'assistant',
+        content: [{ type: 'image', media_type: 'image/png', source: dataUri }],
+        toolCalls: [{ tool: 'screenshot', input: {} }],
+        metadata: { provider: 'test' },
+      },
+    ] as Record<string, unknown>[];
+
+    const result = await materializeContentForGrader(messages, getWorkDir);
+    const msg = result?.[0] as Record<string, unknown>;
+    expect(msg.role).toBe('assistant');
+    expect(msg.toolCalls).toEqual([{ tool: 'screenshot', input: {} }]);
+    expect(msg.metadata).toEqual({ provider: 'test' });
+  });
+
+  it('handles multiple images across multiple messages', async () => {
+    const uri1 = toDataUri('image/png', 'image1');
+    const uri2 = toDataUri('image/webp', 'image2');
+
+    const messages = [
+      {
+        role: 'assistant',
+        content: [
+          { type: 'text', text: 'First chart:' },
+          { type: 'image', media_type: 'image/png', source: uri1 },
+        ],
+      },
+      {
+        role: 'assistant',
+        content: [
+          { type: 'text', text: 'Second chart:' },
+          { type: 'image', media_type: 'image/webp', source: uri2 },
+        ],
+      },
+    ] as Record<string, unknown>[];
+
+    const result = await materializeContentForGrader(messages, getWorkDir);
+    expect(result).toHaveLength(2);
+
+    const content0 = (result?.[0] as Record<string, unknown>).content as Record<string, unknown>[];
+    const content1 = (result?.[1] as Record<string, unknown>).content as Record<string, unknown>[];
+
+    expect(content0[1].path as string).toContain('img-0.png');
+    expect(content1[1].path as string).toContain('img-1.webp');
+
+    // Both files exist
+    expect(existsSync(content0[1].path as string)).toBe(true);
+    expect(existsSync(content1[1].path as string)).toBe(true);
+  });
+
+  it('preserves ContentFile blocks unchanged', async () => {
+    const dataUri = toDataUri('image/png', 'data');
+    const messages = [
+      {
+        role: 'assistant',
+        content: [
+          { type: 'file', media_type: 'text/csv', path: '/workspace/data.csv' },
+          { type: 'image', media_type: 'image/png', source: dataUri },
+        ],
+      },
+    ] as Record<string, unknown>[];
+
+    const result = await materializeContentForGrader(messages, getWorkDir);
+    const content = (result?.[0] as Record<string, unknown>).content as Record<string, unknown>[];
+
+    // File block preserved exactly
+    expect(content[0]).toEqual({
+      type: 'file',
+      media_type: 'text/csv',
+      path: '/workspace/data.csv',
+    });
+    // Image block converted
+    expect(content[1].type).toBe('image');
+    expect(typeof content[1].path).toBe('string');
+  });
+});
+
+describe('CodeEvaluator multimodal integration', () => {
+  let tmpDir: string;
+
+  beforeEach(async () => {
+    tmpDir = await mkdtemp(join(tmpdir(), 'code-eval-mm-'));
+  });
+
+  afterEach(async () => {
+    await rm(tmpDir, { recursive: true, force: true });
+  });
+
+  it('sends text-only output unchanged to grader', async () => {
+    const command = await createPayloadEchoGrader(tmpDir);
+    const output = [{ role: 'assistant' as const, content: 'Hello world' }];
+
+    const evaluator = new CodeEvaluator({ command });
+    const result = await evaluator.evaluate({
+      evalCase: baseTestCase,
+      candidate: 'answer',
+      output,
+    });
+
+    expect(result.score).toBe(1.0);
+    const details = result.details as Record<string, unknown>;
+    const payload = details.payload as Record<string, unknown>;
+    const outputMsgs = payload.output as Record<string, unknown>[];
+    expect(outputMsgs[0].content).toBe('Hello world');
+  });
+
+  it('materializes image data URIs in output for grader', async () => {
+    const command = await createPayloadEchoGrader(tmpDir);
+    const imageData = 'test-image-bytes';
+    const dataUri = toDataUri('image/png', imageData);
+
+    const output = [
+      {
+        role: 'assistant' as const,
+        content: [
+          { type: 'text' as const, text: 'Generated chart:' },
+          { type: 'image' as const, media_type: 'image/png', source: dataUri },
+        ],
+      },
+    ];
+
+    const evaluator = new CodeEvaluator({ command });
+    const result = await evaluator.evaluate({
+      evalCase: baseTestCase,
+      candidate: 'answer',
+      output,
+    });
+
+    expect(result.score).toBe(1.0);
+
+    // Verify the grader received the payload with image paths (not data URIs)
+    const details = result.details as Record<string, unknown>;
+    const payload = details.payload as Record<string, unknown>;
+    const outputMsgs = payload.output as Record<string, unknown>[];
+    const content = outputMsgs[0].content as Record<string, unknown>[];
+
+    // Text block preserved
+    expect(content[0]).toEqual({ type: 'text', text: 'Generated chart:' });
+
+    // Image block has path, not source
+    expect(content[1].type).toBe('image');
+    expect(content[1].media_type).toBe('image/png');
+    expect(typeof content[1].path).toBe('string');
+    expect(content[1]).not.toHaveProperty('source');
+  });
+
+  it('cleans up materialized image temp files after grading', async () => {
+    const command = await createPayloadEchoGrader(tmpDir);
+    const dataUri = toDataUri('image/png', 'cleanup-test');
+
+    const output = [
+      {
+        role: 'assistant' as const,
+        content: [{ type: 'image' as const, media_type: 'image/png', source: dataUri }],
+      },
+    ];
+
+    const evaluator = new CodeEvaluator({ command });
+    await evaluator.evaluate({
+      evalCase: baseTestCase,
+      candidate: 'answer',
+      output,
+    });
+
+    // Image temp dirs should be cleaned up after evaluation
+    const agentVImgDirs = readdirSync(tmpdir()).filter((d) => d.startsWith('agentv-img-'));
+    // Can't assert zero (concurrent tests), but the cleanup logic was exercised
+  });
+});
diff --git a/packages/eval/src/index.ts b/packages/eval/src/index.ts
index c814b698d..2eff5be90 100644
--- a/packages/eval/src/index.ts
+++ b/packages/eval/src/index.ts
@@ -43,6 +43,10 @@ export {
   ToolCallSchema,
   TokenUsageSchema,
   PromptTemplateInputSchema,
+  ContentTextSchema,
+  ContentImageSchema,
+  ContentFileSchema,
+  ContentSchema,
   type CodeGraderInput,
   type CodeGraderResult,
   type TraceSummary,
@@ -50,6 +54,10 @@ export {
   type ToolCall,
   type TokenUsage,
   type PromptTemplateInput,
+  type ContentText,
+  type ContentImage,
+  type ContentFile,
+  type Content,
 } from './schemas.js';
 
 // Re-export target client
diff --git a/packages/eval/src/schemas.ts b/packages/eval/src/schemas.ts
index 3385ac5dd..658ae1bde 100644
--- a/packages/eval/src/schemas.ts
+++ b/packages/eval/src/schemas.ts
@@ -1,6 +1,22 @@
 /**
  * Zod schemas for code grader input/output validation.
  * Provides both compile-time types and runtime validation.
+ *
+ * ## Content model
+ *
+ * `Message.content` accepts `string | Content[]`:
+ * - `string` — backward-compatible plain text (most common case)
+ * - `Content[]` — typed content blocks for multimodal messages
+ *
+ * Content variants:
+ * - `ContentText`  — `{ type: 'text', text: string }`
+ * - `ContentImage` — `{ type: 'image', media_type: string, path: string }` (file path, not base64)
+ * - `ContentFile`  — `{ type: 'file', media_type: string, path: string }`
+ *
+ * To add a new content variant:
+ * 1. Define a new Zod schema with a unique `type` literal
+ * 2. Add it to `ContentSchema` discriminated union
+ * 3. Re-export from `index.ts`
  */
 import { z } from 'zod';
 
@@ -37,12 +53,49 @@ export const ToolCallSchema = z.object({
   durationMs: z.number().optional(),
 });
 
+// ---------------------------------------------------------------------------
+// Content block schemas (discriminated union on `type`)
+// ---------------------------------------------------------------------------
+
+/** Text content block. */
+export const ContentTextSchema = z.object({
+  type: z.literal('text'),
+  text: z.string(),
+});
+
+/**
+ * Image content block.
+ * `path` is a filesystem path — never inline base64.
+ */
+export const ContentImageSchema = z.object({
+  type: z.literal('image'),
+  media_type: z.string(),
+  path: z.string(),
+});
+
+/** File content block. */
+export const ContentFileSchema = z.object({
+  type: z.literal('file'),
+  media_type: z.string(),
+  path: z.string(),
+});
+
+/** Discriminated union of all content block types. */
+export const ContentSchema = z.discriminatedUnion('type', [
+  ContentTextSchema,
+  ContentImageSchema,
+  ContentFileSchema,
+]);
+
 /**
  * Unified message schema for input, expected, and output messages.
+ *
+ * `content` is either a plain string or a `Content[]` array of typed blocks.
+ * Use `getTextContent()` from `@agentv/core` to extract plain text from either form.
  */
 export const MessageSchema = z.object({
   role: z.enum(['assistant', 'user', 'system', 'tool']),
-  content: z.union([z.string(), z.record(z.unknown()), z.array(z.record(z.unknown()))]).optional(),
+  content: z.union([z.string(), z.array(ContentSchema)]).optional(),
   toolCalls: z.array(ToolCallSchema).optional(),
   name: z.string().optional(),
   startTime: z.string().optional(),
@@ -106,6 +159,11 @@ export type Message = z.infer<typeof MessageSchema>;
 export type ToolCall = z.infer<typeof ToolCallSchema>;
 export type TokenUsage = z.infer<typeof TokenUsageSchema>;
 
+export type ContentText = z.infer<typeof ContentTextSchema>;
+export type ContentImage = z.infer<typeof ContentImageSchema>;
+export type ContentFile = z.infer<typeof ContentFileSchema>;
+export type Content = z.infer<typeof ContentSchema>;
+
 /**
  * Prompt template input schema (camelCase, converted from snake_case wire format).
  * Uses the same schema as CodeGraderInput since the orchestrator sends identical payloads.
diff --git a/packages/eval/test/define-code-grader.test.ts b/packages/eval/test/define-code-grader.test.ts
index 67a77e878..2db1be399 100644
--- a/packages/eval/test/define-code-grader.test.ts
+++ b/packages/eval/test/define-code-grader.test.ts
@@ -7,8 +7,132 @@ import {
   // Backward-compat aliases
   CodeJudgeInputSchema,
   CodeJudgeResultSchema,
+  ContentFileSchema,
+  ContentImageSchema,
+  ContentSchema,
+  ContentTextSchema,
+  MessageSchema,
 } from '../src/schemas.js';
 
+// ---------------------------------------------------------------------------
+// Content schemas
+// ---------------------------------------------------------------------------
+
+describe('ContentSchema', () => {
+  it('parses ContentText', () => {
+    const result = ContentTextSchema.parse({ type: 'text', text: 'hello' });
+    expect(result).toEqual({ type: 'text', text: 'hello' });
+  });
+
+  it('parses ContentImage with path', () => {
+    const result = ContentImageSchema.parse({
+      type: 'image',
+      media_type: 'image/png',
+      path: '/workspace/chart.png',
+    });
+    expect(result).toEqual({
+      type: 'image',
+      media_type: 'image/png',
+      path: '/workspace/chart.png',
+    });
+  });
+
+  it('parses ContentFile', () => {
+    const result = ContentFileSchema.parse({
+      type: 'file',
+      media_type: 'text/csv',
+      path: '/workspace/data.csv',
+    });
+    expect(result).toEqual({ type: 'file', media_type: 'text/csv', path: '/workspace/data.csv' });
+  });
+
+  it('discriminated union resolves correct variant', () => {
+    const text = ContentSchema.parse({ type: 'text', text: 'hi' });
+    expect(text.type).toBe('text');
+
+    const image = ContentSchema.parse({
+      type: 'image',
+      media_type: 'image/jpeg',
+      path: '/img.jpg',
+    });
+    expect(image.type).toBe('image');
+
+    const file = ContentSchema.parse({
+      type: 'file',
+      media_type: 'application/pdf',
+      path: '/doc.pdf',
+    });
+    expect(file.type).toBe('file');
+  });
+
+  it('rejects unknown content type', () => {
+    expect(() => ContentSchema.parse({ type: 'audio', data: '...' })).toThrow();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// MessageSchema with Content[]
+// ---------------------------------------------------------------------------
+
+describe('MessageSchema content variants', () => {
+  it('accepts string content (backward compat)', () => {
+    const msg = MessageSchema.parse({ role: 'assistant', content: 'Hello' });
+    expect(msg.content).toBe('Hello');
+  });
+
+  it('accepts Content[] with text blocks', () => {
+    const msg = MessageSchema.parse({
+      role: 'assistant',
+      content: [
+        { type: 'text', text: 'paragraph 1' },
+        { type: 'text', text: 'paragraph 2' },
+      ],
+    });
+    expect(Array.isArray(msg.content)).toBe(true);
+    expect(msg.content as unknown[]).toHaveLength(2);
+  });
+
+  it('accepts Content[] with image blocks', () => {
+    const msg = MessageSchema.parse({
+      role: 'assistant',
+      content: [
+        { type: 'text', text: 'Chart:' },
+        { type: 'image', media_type: 'image/png', path: '/chart.png' },
+      ],
+    });
+    const content = msg.content as { type: string }[];
+    expect(content[1].type).toBe('image');
+  });
+
+  it('accepts Content[] with file blocks', () => {
+    const msg = MessageSchema.parse({
+      role: 'assistant',
+      content: [{ type: 'file', media_type: 'text/csv', path: '/data.csv' }],
+    });
+    const content = msg.content as { type: string }[];
+    expect(content[0].type).toBe('file');
+  });
+
+  it('accepts mixed Content[] (text + image + file)', () => {
+    const msg = MessageSchema.parse({
+      role: 'assistant',
+      content: [
+        { type: 'text', text: 'Analysis results:' },
+        { type: 'image', media_type: 'image/png', path: '/chart.png' },
+        { type: 'file', media_type: 'text/csv', path: '/data.csv' },
+      ],
+    });
+    const content = msg.content as { type: string }[];
+    expect(content).toHaveLength(3);
+    expect(content.map((c) => c.type)).toEqual(['text', 'image', 'file']);
+  });
+
+  it('accepts undefined content', () => {
+    const msg = MessageSchema.parse({ role: 'tool' });
+    expect(msg.content).toBeUndefined();
+  });
+});
+
 describe('CodeGraderInputSchema', () => {
   const validInput = {
     criteria: 'The answer should be 4',
@@ -68,6 +192,44 @@ describe('CodeGraderInputSchema', () => {
     const result = CodeGraderInputSchema.parse(inputWithOutput);
     expect(result.output?.[0].toolCalls?.[0].tool).toBe('read');
   });
+
+  it('accepts output with Content[] containing image blocks', () => {
+    const inputWithImages = {
+      ...validInput,
+      output: [
+        {
+          role: 'assistant',
+          content: [
+            { type: 'text', text: 'Generated chart:' },
+            { type: 'image', media_type: 'image/png', path: '/workspace/chart.png' },
+          ],
+        },
+      ],
+    };
+    const result = CodeGraderInputSchema.parse(inputWithImages);
+    const content = result.output?.[0].content as { type: string; path?: string }[];
+    expect(content).toHaveLength(2);
+    expect(content[1].type).toBe('image');
+    expect(content[1].path).toBe('/workspace/chart.png');
+  });
+
+  it('accepts input with Content[] messages', () => {
+    const inputWithContentArray = {
+      ...validInput,
+      input: [
+        {
+          role: 'user',
+          content: [
+            { type: 'text', text: 'Describe this image:' },
+            { type: 'image', media_type: 'image/jpeg', path: '/workspace/photo.jpg' },
+          ],
+        },
+      ],
+    };
+    const result = CodeGraderInputSchema.parse(inputWithContentArray);
+    const content = result.input[0].content as { type: string }[];
+    expect(content).toHaveLength(2);
+  });
 });
 
 describe('CodeGraderResultSchema', () => {