Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 72 additions & 43 deletions packages/core/src/tracing/ai/messageTruncation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,20 @@ type ContentMessage = {
content: string;
};

/**
* One block inside OpenAI / Anthropic `content: [...]` arrays (text, image_url, etc.).
*/
type ContentArrayBlock = {
[key: string]: unknown;
type: string;
};

/**
* Message format used by OpenAI and Anthropic APIs for media.
*/
type ContentArrayMessage = {
[key: string]: unknown;
content: {
[key: string]: unknown;
type: string;
}[];
content: ContentArrayBlock[];
};

/**
Expand All @@ -47,6 +52,11 @@ type MediaPart = {
content: string;
};

/**
* One element of an array-based message: OpenAI/Anthropic `content[]` or Google `parts`.
*/
type ArrayMessageItem = TextPart | MediaPart | ContentArrayBlock;

/**
* Calculate the UTF-8 byte length of a string.
*/
Expand Down Expand Up @@ -95,31 +105,33 @@ function truncateTextByBytes(text: string, maxBytes: number): string {
}

/**
* Extract text content from a Google GenAI message part.
* Parts are either plain strings or objects with a text property.
* Extract text content from a message item.
* Handles plain strings and objects with a text property.
*
* @returns The text content
*/
function getPartText(part: TextPart | MediaPart): string {
if (typeof part === 'string') {
return part;
function getItemText(item: ArrayMessageItem): string {
if (typeof item === 'string') {
return item;
}
if ('text' in item && typeof item.text === 'string') {
return item.text;
}
if ('text' in part) return part.text;
return '';
}

/**
* Create a new part with updated text content while preserving the original structure.
* Create a new item with updated text content while preserving the original structure.
*
* @param part - Original part (string or object)
* @param item - Original item (string or object)
* @param text - New text content
* @returns New part with updated text
* @returns New item with updated text
*/
function withPartText(part: TextPart | MediaPart, text: string): TextPart {
if (typeof part === 'string') {
function withItemText(item: ArrayMessageItem, text: string): ArrayMessageItem {
if (typeof item === 'string') {
return text;
}
return { ...part, text };
return { ...item, text };
}

/**
Expand Down Expand Up @@ -176,56 +188,78 @@ function truncateContentMessage(message: ContentMessage, maxBytes: number): unkn
}

/**
* Truncate a message with `parts: [...]` format (Google GenAI).
* Keeps as many complete parts as possible, only truncating the first part if needed.
* Extracts the array items and their key from an array-based message.
* Returns `null` key if neither `parts` nor `content` is a valid array.
*/
function getArrayItems(message: PartsMessage | ContentArrayMessage): {
key: 'parts' | 'content' | null;
items: ArrayMessageItem[];
} {
if ('parts' in message && Array.isArray(message.parts)) {
return { key: 'parts', items: message.parts };
}
if ('content' in message && Array.isArray(message.content)) {
return { key: 'content', items: message.content };
}
return { key: null, items: [] };
}

/**
* Truncate a message with an array-based format.
* Handles both `parts: [...]` (Google GenAI) and `content: [...]` (OpenAI/Anthropic multimodal).
* Keeps as many complete items as possible, only truncating the first item if needed.
*
* @param message - Message with parts array
* @param message - Message with parts or content array
* @param maxBytes - Maximum byte limit
* @returns Array with truncated message, or empty array if it doesn't fit
*/
function truncatePartsMessage(message: PartsMessage, maxBytes: number): unknown[] {
const { parts } = message;
function truncateArrayMessage(message: PartsMessage | ContentArrayMessage, maxBytes: number): unknown[] {
const { key, items } = getArrayItems(message);

// Calculate overhead by creating empty text parts
const emptyParts = parts.map(part => withPartText(part, ''));
const overhead = jsonBytes({ ...message, parts: emptyParts });
if (key === null || items.length === 0) {
return [];
}

// Calculate overhead by creating empty text items
const emptyItems = items.map(item => withItemText(item, ''));
const overhead = jsonBytes({ ...message, [key]: emptyItems });
let remainingBytes = maxBytes - overhead;

if (remainingBytes <= 0) {
return [];
}

// Include parts until we run out of space
const includedParts: (TextPart | MediaPart)[] = [];
// Include items until we run out of space
const includedItems: ArrayMessageItem[] = [];

for (const part of parts) {
const text = getPartText(part);
for (const item of items) {
const text = getItemText(item);
const textSize = utf8Bytes(text);

if (textSize <= remainingBytes) {
// Part fits: include it as-is
includedParts.push(part);
// Item fits: include it as-is
includedItems.push(item);
remainingBytes -= textSize;
} else if (includedParts.length === 0) {
// First part doesn't fit: truncate it
} else if (includedItems.length === 0) {
// First item doesn't fit: truncate it
const truncated = truncateTextByBytes(text, remainingBytes);
if (truncated) {
includedParts.push(withPartText(part, truncated));
includedItems.push(withItemText(item, truncated));
}
break;
} else {
// Subsequent part doesn't fit: stop here
// Subsequent item doesn't fit: stop here
break;
}
}

/* c8 ignore start
* for type safety only, algorithm guarantees SOME text included */
if (includedParts.length <= 0) {
if (includedItems.length <= 0) {
return [];
} else {
/* c8 ignore stop */
return [{ ...message, parts: includedParts }];
return [{ ...message, [key]: includedItems }];
}
}

Expand Down Expand Up @@ -258,13 +292,8 @@ function truncateSingleMessage(message: unknown, maxBytes: number): unknown[] {
return truncateContentMessage(message, maxBytes);
}

if (isContentArrayMessage(message)) {
// Content array messages are returned as-is without truncation
return [message];
}

if (isPartsMessage(message)) {
return truncatePartsMessage(message, maxBytes);
if (isContentArrayMessage(message) || isPartsMessage(message)) {
return truncateArrayMessage(message, maxBytes);
}

// Unknown message format: cannot truncate safely
Expand Down
61 changes: 61 additions & 0 deletions packages/core/test/lib/tracing/ai-message-truncation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -547,5 +547,66 @@ describe('message truncation utilities', () => {
},
]);
});

it('truncates content array message when first text item does not fit', () => {
const messages = [
{
role: 'user',
content: [{ type: 'text', text: `2 ${humongous}` }],
},
];
const result = truncateGenAiMessages(messages);
const truncLen =
20_000 -
2 -
JSON.stringify({
role: 'user',
content: [{ type: 'text', text: '' }],
}).length;
expect(result).toStrictEqual([
{
role: 'user',
content: [{ type: 'text', text: `2 ${humongous}`.substring(0, truncLen) }],
},
]);
});

it('drops subsequent content array items that do not fit', () => {
const messages = [
{
role: 'assistant',
content: [
{ type: 'text', text: `1 ${big}` },
{ type: 'image_url', url: 'https://example.com/img.png' },
{ type: 'text', text: `2 ${big}` },
{ type: 'text', text: `3 ${big}` },
{ type: 'text', text: `4 ${giant}` },
{ type: 'text', text: `5 ${giant}` },
],
},
];
const result = truncateGenAiMessages(messages);
expect(result).toStrictEqual([
{
role: 'assistant',
content: [
{ type: 'text', text: `1 ${big}` },
{ type: 'image_url', url: 'https://example.com/img.png' },
{ type: 'text', text: `2 ${big}` },
{ type: 'text', text: `3 ${big}` },
],
},
]);
});

it('drops content array message if overhead is too large', () => {
const messages = [
{
some_other_field: humongous,
content: [{ type: 'text', text: 'hello' }],
},
];
expect(truncateGenAiMessages(messages)).toStrictEqual([]);
});
});
});
Loading