Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions packages/core/src/evaluation/content.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/**
* Multimodal content types for the AgentV pipeline.
*
* Models structured content blocks (text, images, files) that flow end-to-end
* without lossy flattening. Modeled after Inspect AI's discriminated union approach.
*
* ## Content model
*
* `Message.content` accepts `string | Content[]`:
* - `string` — backward-compatible plain text (most common case)
* - `Content[]` — array of typed content blocks for multimodal messages
*
* Binary data (images, files) is referenced by URL/base64 string or filesystem
* path — never raw bytes. This keeps payloads serializable and lets code graders
* access files via path without decoding.
*
* ## How to extend
*
* To add a new content variant (e.g., `ContentAudio`):
* 1. Define the interface with a unique `type` discriminant
* 2. Add it to the `Content` union
* 3. Update `getTextContent()` if the new type has extractable text
* 4. Update `isContent()` type guard with the new type string
*/

// ---------------------------------------------------------------------------
// Content block types
// ---------------------------------------------------------------------------

/** A text content block. */
export interface ContentText {
readonly type: 'text';
readonly text: string;
}

/**
* An image content block.
* `source` is a URL, data URI (base64), or filesystem path.
*/
export interface ContentImage {
readonly type: 'image';
readonly media_type: string;
readonly source: string;
}

/**
* A file content block.
* `path` is a filesystem path or URL referencing the file.
*/
export interface ContentFile {
readonly type: 'file';
readonly media_type: string;
readonly path: string;
}

/** Discriminated union of all content block types. */
export type Content = ContentText | ContentImage | ContentFile;

// ---------------------------------------------------------------------------
// Type guards
// ---------------------------------------------------------------------------

const CONTENT_TYPES = new Set<string>(['text', 'image', 'file']);

/** Check whether a value is a valid `Content` block. */
export function isContent(value: unknown): value is Content {
if (!value || typeof value !== 'object') return false;
const v = value as Record<string, unknown>;
return typeof v.type === 'string' && CONTENT_TYPES.has(v.type);
}

/** Check whether a value is a `Content[]` array (at least one valid block). */
export function isContentArray(value: unknown): value is Content[] {
return Array.isArray(value) && value.length > 0 && value.every(isContent);
}

// ---------------------------------------------------------------------------
// Accessors
// ---------------------------------------------------------------------------

/**
* Extract plain text from `string | Content[]`.
*
* - If `content` is a string, returns it directly.
* - If `content` is a `Content[]`, concatenates all `ContentText.text` values
* (separated by newlines) and returns the result.
* - Returns `''` for `undefined`/`null`/unrecognized shapes.
*
* This is a **non-destructive** accessor — the original `Content[]` is preserved.
*/
export function getTextContent(content: string | Content[] | undefined | null): string {
if (content == null) return '';
if (typeof content === 'string') return content;
if (!Array.isArray(content)) return '';

const parts: string[] = [];
for (const block of content) {
if (block.type === 'text') {
parts.push(block.text);
}
}
return parts.join('\n');
}
47 changes: 46 additions & 1 deletion packages/core/src/evaluation/providers/claude-cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type { WriteStream } from 'node:fs';
import { mkdir } from 'node:fs/promises';
import path from 'node:path';

import type { Content } from '../content.js';
import { recordClaudeLogEntry } from './claude-log-tracker.js';
import { buildPromptDocument, normalizeInputFiles } from './preread.js';
import type { ClaudeResolvedConfig } from './targets.js';
Expand Down Expand Up @@ -78,12 +79,13 @@ export class ClaudeCliProvider implements Provider {
if (betaMessage && typeof betaMessage === 'object') {
const msg = betaMessage as Record<string, unknown>;
const content = msg.content;
const structuredContent = toContentArray(content);
const textContent = extractTextContent(content);
const toolCalls = extractToolCalls(content);

const outputMsg: Message = {
role: 'assistant',
content: textContent,
content: structuredContent ?? textContent,
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
};
output.push(outputMsg);
Expand Down Expand Up @@ -477,6 +479,49 @@ function summarizeEvent(event: Record<string, unknown>): string | undefined {
}
}

/**
* Convert Claude's content array to Content[] preserving non-text blocks.
* Returns undefined if content is a plain string or has only text blocks
* (no benefit over the simpler string representation).
*/
function toContentArray(content: unknown): Content[] | undefined {
if (!Array.isArray(content)) return undefined;

let hasNonText = false;
const blocks: Content[] = [];

for (const part of content) {
if (!part || typeof part !== 'object') continue;
const p = part as Record<string, unknown>;

if (p.type === 'text' && typeof p.text === 'string') {
blocks.push({ type: 'text', text: p.text });
} else if (p.type === 'image' && typeof p.source === 'object' && p.source !== null) {
const src = p.source as Record<string, unknown>;
const mediaType =
typeof p.media_type === 'string'
? p.media_type
: typeof src.media_type === 'string'
? src.media_type
: 'application/octet-stream';
const data =
typeof src.data === 'string'
? `data:${mediaType};base64,${src.data}`
: typeof p.url === 'string'
? (p.url as string)
: '';
blocks.push({ type: 'image', media_type: mediaType, source: data });
hasNonText = true;
} else if (p.type === 'tool_use') {
// tool_use blocks are handled separately as ToolCall — skip
} else if (p.type === 'tool_result') {
// tool_result blocks are not user content — skip
}
}

return hasNonText && blocks.length > 0 ? blocks : undefined;
}

/**
* Extract text content from Claude's content array format.
*/
Expand Down
8 changes: 7 additions & 1 deletion packages/core/src/evaluation/providers/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import { promisify } from 'node:util';

import { z } from 'zod';

import type { Content } from '../content.js';
import { isContentArray } from '../content.js';
import { readTextFile } from '../file-utils.js';
import type { CliResolvedConfig } from './targets.js';
import type {
Expand Down Expand Up @@ -124,7 +126,11 @@ function convertMessages(
return messages.map((msg) => ({
role: msg.role,
name: msg.name,
content: msg.content,
content: isContentArray(msg.content)
? (msg.content as Content[])
: typeof msg.content === 'string'
? msg.content
: undefined,
toolCalls: msg.tool_calls?.map((tc) => ({
tool: tc.tool,
input: tc.input,
Expand Down
2 changes: 2 additions & 0 deletions packages/core/src/evaluation/providers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ export type {
ToolCall,
} from './types.js';

export { extractLastAssistantContent } from './types.js';

export type {
AgentVResolvedConfig,
AnthropicResolvedConfig,
Expand Down
11 changes: 9 additions & 2 deletions packages/core/src/evaluation/providers/types.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import type { Content } from '../content.js';
import { getTextContent, isContentArray } from '../content.js';
import type { JsonObject } from '../types.js';

export type ChatMessageRole = 'system' | 'user' | 'assistant' | 'tool' | 'function';
Expand Down Expand Up @@ -169,8 +171,8 @@ export interface Message {
readonly role: string;
/** Optional name for the message sender */
readonly name?: string;
/** Message content */
readonly content?: unknown;
/** Message content — plain string or structured content blocks for multimodal data. */
readonly content?: string | Content[];
/** Tool calls made in this message */
readonly toolCalls?: readonly ToolCall[];
/** ISO 8601 timestamp when the message started */
Expand Down Expand Up @@ -222,6 +224,8 @@ export interface ProviderResponse {
/**
* Extract the content from the last assistant message in an output message array.
* Returns empty string if no assistant message found.
*
* Handles both plain-string content and Content[] (extracts text blocks).
*/
export function extractLastAssistantContent(messages: readonly Message[] | undefined): string {
if (!messages || messages.length === 0) {
Expand All @@ -235,6 +239,9 @@ export function extractLastAssistantContent(messages: readonly Message[] | undef
if (typeof msg.content === 'string') {
return msg.content;
}
if (isContentArray(msg.content)) {
return getTextContent(msg.content);
}
return JSON.stringify(msg.content);
}
}
Expand Down
1 change: 1 addition & 0 deletions packages/core/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
export * from './evaluation/content.js';
export * from './evaluation/types.js';
export * from './evaluation/trace.js';
export * from './evaluation/yaml-parser.js';
Expand Down
Loading
Loading