Skip to content

Commit 1b2eea3

Browse files
authored
feat(core): simplify truncation logic to only keep the newest message (#18906)
**Changes** - This simplifies AI message truncation to always keep only the last message and truncate it if needed. Previously we dropped oldest messages one by one until we fit within the limit. - Set embeddings input as a separate attribute `gen_ai.embeddings.input` and do not truncate that. - Also added support for truncating plain string arrays. Not sure if we need that at the moment, since we no longer truncate embeddings, but shouldn't hurt. **Test Updates** - Updated all node integration and unit tests to work with the new logic. - Extended the node integration suite to always test the two main scenarios (keep the large message and then either truncate if the last message is large or leave it as is if it is small). - Removed the embeddings truncation tests and instead added a new embeddings scenario that checks that embeddings inputs are all kept (even if an array with multiple entries is passed). Closes #18916
1 parent e29ac20 commit 1b2eea3

19 files changed

+353
-264
lines changed

dev-packages/node-integration-tests/suites/tracing/anthropic/scenario-media-truncation.mjs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,15 @@ async function run() {
4949
const client = instrumentAnthropicAiClient(mockClient);
5050

5151
// Send the image showing the number 3
52+
// Put the image in the last message so it doesn't get dropped
5253
await client.messages.create({
5354
model: 'claude-3-haiku-20240307',
5455
max_tokens: 1024,
5556
messages: [
57+
{
58+
role: 'user',
59+
content: 'what number is this?',
60+
},
5661
{
5762
role: 'user',
5863
content: [
@@ -66,10 +71,6 @@ async function run() {
6671
},
6772
],
6873
},
69-
{
70-
role: 'user',
71-
content: 'what number is this?',
72-
},
7374
],
7475
temperature: 0.7,
7576
});

dev-packages/node-integration-tests/suites/tracing/anthropic/scenario-message-truncation.mjs

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,11 @@ async function run() {
4848

4949
const client = instrumentAnthropicAiClient(mockClient);
5050

51-
// Create 3 large messages where:
52-
// - First 2 messages are very large (will be dropped)
53-
// - Last message is large but will be truncated to fit within the 20KB limit
51+
// Test 1: Given an array of messages only the last message should be kept
52+
// The last message should be truncated to fit within the 20KB limit
5453
const largeContent1 = 'A'.repeat(15000); // ~15KB
5554
const largeContent2 = 'B'.repeat(15000); // ~15KB
56-
const largeContent3 = 'C'.repeat(25000); // ~25KB (will be truncated)
55+
const largeContent3 = 'C'.repeat(25000) + 'D'.repeat(25000); // ~50KB (will be truncated, only C's remain)
5756

5857
await client.messages.create({
5958
model: 'claude-3-haiku-20240307',
@@ -65,6 +64,20 @@ async function run() {
6564
],
6665
temperature: 0.7,
6766
});
67+
68+
// Test 2: Given an array of messages only the last message should be kept
69+
// The last message is small, so it should be kept intact
70+
const smallContent = 'This is a small message that fits within the limit';
71+
await client.messages.create({
72+
model: 'claude-3-haiku-20240307',
73+
max_tokens: 100,
74+
messages: [
75+
{ role: 'user', content: largeContent1 },
76+
{ role: 'assistant', content: largeContent2 },
77+
{ role: 'user', content: smallContent },
78+
],
79+
temperature: 0.7,
80+
});
6881
});
6982
}
7083

dev-packages/node-integration-tests/suites/tracing/anthropic/test.ts

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,7 @@ describe('Anthropic integration', () => {
8686
data: expect.objectContaining({
8787
'gen_ai.operation.name': 'messages',
8888
'gen_ai.request.max_tokens': 100,
89-
'gen_ai.request.messages':
90-
'[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"What is the capital of France?"}]',
89+
'gen_ai.request.messages': '[{"role":"user","content":"What is the capital of France?"}]',
9190
'gen_ai.request.model': 'claude-3-haiku-20240307',
9291
'gen_ai.request.temperature': 0.7,
9392
'gen_ai.response.id': 'msg_mock123',
@@ -638,6 +637,7 @@ describe('Anthropic integration', () => {
638637
transaction: {
639638
transaction: 'main',
640639
spans: expect.arrayContaining([
640+
// First call: Last message is large and gets truncated (only C's remain, D's are cropped)
641641
expect.objectContaining({
642642
data: expect.objectContaining({
643643
'gen_ai.operation.name': 'messages',
@@ -653,6 +653,24 @@ describe('Anthropic integration', () => {
653653
origin: 'auto.ai.anthropic',
654654
status: 'ok',
655655
}),
656+
// Second call: Last message is small and kept without truncation
657+
expect.objectContaining({
658+
data: expect.objectContaining({
659+
'gen_ai.operation.name': 'messages',
660+
'sentry.op': 'gen_ai.messages',
661+
'sentry.origin': 'auto.ai.anthropic',
662+
'gen_ai.system': 'anthropic',
663+
'gen_ai.request.model': 'claude-3-haiku-20240307',
664+
// Small message should be kept intact
665+
'gen_ai.request.messages': JSON.stringify([
666+
{ role: 'user', content: 'This is a small message that fits within the limit' },
667+
]),
668+
}),
669+
description: 'messages claude-3-haiku-20240307',
670+
op: 'gen_ai.messages',
671+
origin: 'auto.ai.anthropic',
672+
status: 'ok',
673+
}),
656674
]),
657675
},
658676
})
@@ -677,6 +695,7 @@ describe('Anthropic integration', () => {
677695
'sentry.origin': 'auto.ai.anthropic',
678696
'gen_ai.system': 'anthropic',
679697
'gen_ai.request.model': 'claude-3-haiku-20240307',
698+
// Only the last message (with filtered media) should be kept
680699
'gen_ai.request.messages': JSON.stringify([
681700
{
682701
role: 'user',
@@ -691,10 +710,6 @@ describe('Anthropic integration', () => {
691710
},
692711
],
693712
},
694-
{
695-
role: 'user',
696-
content: 'what number is this?',
697-
},
698713
]),
699714
}),
700715
description: 'messages claude-3-haiku-20240307',

dev-packages/node-integration-tests/suites/tracing/google-genai/scenario-message-truncation.mjs

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,11 @@ async function run() {
4343

4444
const client = instrumentGoogleGenAIClient(mockClient);
4545

46-
// Create 3 large messages where:
47-
// - First 2 messages are very large (will be dropped)
48-
// - Last message is large but will be truncated to fit within the 20KB limit
46+
// Test 1: Given an array of messages only the last message should be kept
47+
// The last message should be truncated to fit within the 20KB limit
4948
const largeContent1 = 'A'.repeat(15000); // ~15KB
5049
const largeContent2 = 'B'.repeat(15000); // ~15KB
51-
const largeContent3 = 'C'.repeat(25000); // ~25KB (will be truncated)
50+
const largeContent3 = 'C'.repeat(25000) + 'D'.repeat(25000); // ~50KB (will be truncated, only C's remain)
5251

5352
await client.models.generateContent({
5453
model: 'gemini-1.5-flash',
@@ -63,6 +62,23 @@ async function run() {
6362
{ role: 'user', parts: [{ text: largeContent3 }] },
6463
],
6564
});
65+
66+
// Test 2: Given an array of messages only the last message should be kept
67+
// The last message is small, so it should be kept intact
68+
const smallContent = 'This is a small message that fits within the limit';
69+
await client.models.generateContent({
70+
model: 'gemini-1.5-flash',
71+
config: {
72+
temperature: 0.7,
73+
topP: 0.9,
74+
maxOutputTokens: 100,
75+
},
76+
contents: [
77+
{ role: 'user', parts: [{ text: largeContent1 }] },
78+
{ role: 'model', parts: [{ text: largeContent2 }] },
79+
{ role: 'user', parts: [{ text: smallContent }] },
80+
],
81+
});
6682
});
6783
}
6884

dev-packages/node-integration-tests/suites/tracing/google-genai/test.ts

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,7 @@ describe('Google GenAI integration', () => {
9494
'gen_ai.request.temperature': 0.8,
9595
'gen_ai.request.top_p': 0.9,
9696
'gen_ai.request.max_tokens': 150,
97-
'gen_ai.request.messages': expect.stringMatching(
98-
/\[\{"role":"system","content":"You are a friendly robot who likes to be funny."\},/,
99-
), // Should include history when recordInputs: true
97+
'gen_ai.request.messages': '[{"role":"user","parts":[{"text":"Hello, how are you?"}]}]',
10098
}),
10199
description: 'chat gemini-1.5-pro create',
102100
op: 'gen_ai.chat',
@@ -504,6 +502,7 @@ describe('Google GenAI integration', () => {
504502
transaction: {
505503
transaction: 'main',
506504
spans: expect.arrayContaining([
505+
// First call: Last message is large and gets truncated (only C's remain, D's are cropped)
507506
expect.objectContaining({
508507
data: expect.objectContaining({
509508
'gen_ai.operation.name': 'models',
@@ -521,6 +520,27 @@ describe('Google GenAI integration', () => {
521520
origin: 'auto.ai.google_genai',
522521
status: 'ok',
523522
}),
523+
// Second call: Last message is small and kept without truncation
524+
expect.objectContaining({
525+
data: expect.objectContaining({
526+
'gen_ai.operation.name': 'models',
527+
'sentry.op': 'gen_ai.models',
528+
'sentry.origin': 'auto.ai.google_genai',
529+
'gen_ai.system': 'google_genai',
530+
'gen_ai.request.model': 'gemini-1.5-flash',
531+
// Small message should be kept intact
532+
'gen_ai.request.messages': JSON.stringify([
533+
{
534+
role: 'user',
535+
parts: [{ text: 'This is a small message that fits within the limit' }],
536+
},
537+
]),
538+
}),
539+
description: 'models gemini-1.5-flash',
540+
op: 'gen_ai.models',
541+
origin: 'auto.ai.google_genai',
542+
status: 'ok',
543+
}),
524544
]),
525545
},
526546
})

dev-packages/node-integration-tests/suites/tracing/langchain/scenario-message-truncation.mjs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,27 @@ async function run() {
5151

5252
const largeContent1 = 'A'.repeat(15000); // ~15KB
5353
const largeContent2 = 'B'.repeat(15000); // ~15KB
54-
const largeContent3 = 'C'.repeat(25000); // ~25KB (will be truncated)
54+
const largeContent3 = 'C'.repeat(25000) + 'D'.repeat(25000); // ~50KB (will be truncated, only C's remain)
5555

56-
// Create one very large string that gets truncated to only include Cs
57-
await model.invoke(largeContent3 + largeContent2);
56+
// Test 1: Create one very large string that gets truncated to only include Cs
57+
await model.invoke(largeContent3);
5858

59-
// Create an array of messages that gets truncated to only include the last message (result should again contain only Cs)
59+
// Test 2: Create an array of messages that gets truncated to only include the last message
60+
// The last message should be truncated to fit within the 20KB limit (result should again contain only Cs)
6061
await model.invoke([
6162
{ role: 'system', content: largeContent1 },
6263
{ role: 'user', content: largeContent2 },
6364
{ role: 'user', content: largeContent3 },
6465
]);
66+
67+
// Test 3: Given an array of messages only the last message should be kept
68+
// The last message is small, so it should be kept intact
69+
const smallContent = 'This is a small message that fits within the limit';
70+
await model.invoke([
71+
{ role: 'system', content: largeContent1 },
72+
{ role: 'user', content: largeContent2 },
73+
{ role: 'user', content: smallContent },
74+
]);
6575
});
6676

6777
await Sentry.flush(2000);

dev-packages/node-integration-tests/suites/tracing/langchain/test.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ describe('LangChain integration', () => {
198198
const EXPECTED_TRANSACTION_MESSAGE_TRUNCATION = {
199199
transaction: 'main',
200200
spans: expect.arrayContaining([
201+
// First call: String input truncated (only C's remain, D's are cropped)
201202
expect.objectContaining({
202203
data: expect.objectContaining({
203204
'gen_ai.operation.name': 'chat',
@@ -213,6 +214,7 @@ describe('LangChain integration', () => {
213214
origin: 'auto.ai.langchain',
214215
status: 'ok',
215216
}),
217+
// Second call: Array input, last message truncated (only C's remain, D's are cropped)
216218
expect.objectContaining({
217219
data: expect.objectContaining({
218220
'gen_ai.operation.name': 'chat',
@@ -228,6 +230,24 @@ describe('LangChain integration', () => {
228230
origin: 'auto.ai.langchain',
229231
status: 'ok',
230232
}),
233+
// Third call: Last message is small and kept without truncation
234+
expect.objectContaining({
235+
data: expect.objectContaining({
236+
'gen_ai.operation.name': 'chat',
237+
'sentry.op': 'gen_ai.chat',
238+
'sentry.origin': 'auto.ai.langchain',
239+
'gen_ai.system': 'anthropic',
240+
'gen_ai.request.model': 'claude-3-5-sonnet-20241022',
241+
// Small message should be kept intact
242+
'gen_ai.request.messages': JSON.stringify([
243+
{ role: 'user', content: 'This is a small message that fits within the limit' },
244+
]),
245+
}),
246+
description: 'chat claude-3-5-sonnet-20241022',
247+
op: 'gen_ai.chat',
248+
origin: 'auto.ai.langchain',
249+
status: 'ok',
250+
}),
231251
]),
232252
};
233253

dev-packages/node-integration-tests/suites/tracing/langchain/v1/scenario-message-truncation.mjs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,27 @@ async function run() {
5151

5252
const largeContent1 = 'A'.repeat(15000); // ~15KB
5353
const largeContent2 = 'B'.repeat(15000); // ~15KB
54-
const largeContent3 = 'C'.repeat(25000); // ~25KB (will be truncated)
54+
const largeContent3 = 'C'.repeat(25000) + 'D'.repeat(25000); // ~50KB (will be truncated, only C's remain)
5555

56-
// Create one very large string that gets truncated to only include Cs
57-
await model.invoke(largeContent3 + largeContent2);
56+
// Test 1: Create one very large string that gets truncated to only include Cs
57+
await model.invoke(largeContent3);
5858

59-
// Create an array of messages that gets truncated to only include the last message (result should again contain only Cs)
59+
// Test 2: Create an array of messages that gets truncated to only include the last message
60+
// The last message should be truncated to fit within the 20KB limit (result should again contain only Cs)
6061
await model.invoke([
6162
{ role: 'system', content: largeContent1 },
6263
{ role: 'user', content: largeContent2 },
6364
{ role: 'user', content: largeContent3 },
6465
]);
66+
67+
// Test 3: Given an array of messages only the last message should be kept
68+
// The last message is small, so it should be kept intact
69+
const smallContent = 'This is a small message that fits within the limit';
70+
await model.invoke([
71+
{ role: 'system', content: largeContent1 },
72+
{ role: 'user', content: largeContent2 },
73+
{ role: 'user', content: smallContent },
74+
]);
6575
});
6676

6777
await Sentry.flush(2000);

dev-packages/node-integration-tests/suites/tracing/langchain/v1/test.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ conditionalTest({ min: 20 })('LangChain integration (v1)', () => {
241241
const EXPECTED_TRANSACTION_MESSAGE_TRUNCATION = {
242242
transaction: 'main',
243243
spans: expect.arrayContaining([
244+
// First call: String input truncated (only C's remain, D's are cropped)
244245
expect.objectContaining({
245246
data: expect.objectContaining({
246247
'gen_ai.operation.name': 'chat',
@@ -256,6 +257,7 @@ conditionalTest({ min: 20 })('LangChain integration (v1)', () => {
256257
origin: 'auto.ai.langchain',
257258
status: 'ok',
258259
}),
260+
// Second call: Array input, last message truncated (only C's remain, D's are cropped)
259261
expect.objectContaining({
260262
data: expect.objectContaining({
261263
'gen_ai.operation.name': 'chat',
@@ -271,6 +273,24 @@ conditionalTest({ min: 20 })('LangChain integration (v1)', () => {
271273
origin: 'auto.ai.langchain',
272274
status: 'ok',
273275
}),
276+
// Third call: Last message is small and kept without truncation
277+
expect.objectContaining({
278+
data: expect.objectContaining({
279+
'gen_ai.operation.name': 'chat',
280+
'sentry.op': 'gen_ai.chat',
281+
'sentry.origin': 'auto.ai.langchain',
282+
'gen_ai.system': 'anthropic',
283+
'gen_ai.request.model': 'claude-3-5-sonnet-20241022',
284+
// Small message should be kept intact
285+
'gen_ai.request.messages': JSON.stringify([
286+
{ role: 'user', content: 'This is a small message that fits within the limit' },
287+
]),
288+
}),
289+
description: 'chat claude-3-5-sonnet-20241022',
290+
op: 'gen_ai.chat',
291+
origin: 'auto.ai.langchain',
292+
status: 'ok',
293+
}),
274294
]),
275295
};
276296

dev-packages/node-integration-tests/suites/tracing/openai/scenario-embeddings.mjs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ async function run() {
6767
} catch {
6868
// Error is expected and handled
6969
}
70+
71+
// Third test: embeddings API with multiple inputs
72+
await client.embeddings.create({
73+
input: ['First input text', 'Second input text', 'Third input text'],
74+
model: 'text-embedding-3-small',
75+
});
7076
});
7177

7278
server.close();

0 commit comments

Comments
 (0)