From 610c1e93be3e64c89f6f7b91b71ecf9176bacc91 Mon Sep 17 00:00:00 2001 From: Jaganpro Date: Mon, 9 Mar 2026 14:36:27 -0400 Subject: [PATCH 1/2] feat(run-eval): whitelist state/setupSessionContext, translate contextVariables, preserve outputs - Whitelist `state`, `setupSessionContext`, and `context_variables` in evalNormalizer's VALID_AGENT_FIELDS for agent.create_session so the normalizer no longer strips fields needed for auth bypass and session context injection. - Translate YAML TestSpec `contextVariables` into `context_variables` on the agent.create_session step in yamlSpecTranslator, enabling YAML specs to inject context variables without raw JSON payloads. - Include `outputs[]` array in RunEvalResult's --json output so CI pipelines retain agent responses, topic routing, and planner state for debugging. --- src/commands/agent/test/run-eval.ts | 5 +-- src/evalNormalizer.ts | 12 ++++++- src/yamlSpecTranslator.ts | 12 +++++-- test/evalNormalizer.test.ts | 54 +++++++++++++++++++++++++++++ test/yamlSpecTranslator.test.ts | 45 ++++++++++++++++++++++++ 5 files changed, 123 insertions(+), 5 deletions(-) diff --git a/src/commands/agent/test/run-eval.ts b/src/commands/agent/test/run-eval.ts index 0c176f47..f364c434 100644 --- a/src/commands/agent/test/run-eval.ts +++ b/src/commands/agent/test/run-eval.ts @@ -26,7 +26,7 @@ Messages.importMessagesDirectoryFromMetaUrl(import.meta.url); const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.run-eval'); export type RunEvalResult = { - tests: Array<{ id: string; status: string; evaluations: unknown[] }>; + tests: Array<{ id: string; status: string; evaluations: unknown[]; outputs: unknown[] }>; summary: { passed: number; failed: number; scored: number; errors: number }; }; @@ -122,7 +122,7 @@ function buildResultSummary(mergedResponse: EvalApiResponse): { testSummaries: RunEvalResult['tests']; } { const summary = { passed: 0, failed: 0, scored: 0, errors: 0 }; - const testSummaries: Array<{ id: string; status: string; evaluations: unknown[] }> = []; + const testSummaries: Array<{ id: string; status: string; evaluations: unknown[]; outputs: unknown[] }> = []; for (const testResult of mergedResponse.results ?? []) { const tr = testResult as Record; @@ -143,6 +143,7 @@ function buildResultSummary(mergedResponse: EvalApiResponse): { id: testId, status: failed > 0 || testErrors.length > 0 ? 'failed' : 'passed', evaluations: evalResults, + outputs: (tr.outputs as unknown[]) ?? [], }); } diff --git a/src/evalNormalizer.ts b/src/evalNormalizer.ts index 7d2a1921..f9901cd1 100644 --- a/src/evalNormalizer.ts +++ b/src/evalNormalizer.ts @@ -73,7 +73,17 @@ const ASSERTION_VALID_FIELDS = new Set([ ]); const VALID_AGENT_FIELDS: Record> = { - 'agent.create_session': new Set(['type', 'id', 'agent_id', 'agent_version_id', 'use_agent_api', 'planner_id']), + 'agent.create_session': new Set([ + 'type', + 'id', + 'agent_id', + 'agent_version_id', + 'use_agent_api', + 'planner_id', + 'state', + 'setupSessionContext', + 'context_variables', + ]), 'agent.send_message': new Set(['type', 'id', 'session_id', 'utterance']), 'agent.get_state': new Set(['type', 'id', 'session_id']), }; diff --git a/src/yamlSpecTranslator.ts b/src/yamlSpecTranslator.ts index 6c3fb862..efbbdf84 100644 --- a/src/yamlSpecTranslator.ts +++ b/src/yamlSpecTranslator.ts @@ -101,11 +101,19 @@ export function translateTestCase(testCase: TestCase, index: number, specName?: const steps: EvalStep[] = []; // 1. agent.create_session - steps.push({ + const createSessionStep: EvalStep = { type: 'agent.create_session', id: 'cs', use_agent_api: true, - }); + }; + + if (testCase.contextVariables && testCase.contextVariables.length > 0) { + createSessionStep.context_variables = Object.fromEntries( + testCase.contextVariables.map((cv) => [cv.name, cv.value]) + ); + } + + steps.push(createSessionStep); // 2. Conversation history — only user messages become send_message steps let historyIdx = 0; diff --git a/test/evalNormalizer.test.ts b/test/evalNormalizer.test.ts index 7c15e352..3ddab444 100644 --- a/test/evalNormalizer.test.ts +++ b/test/evalNormalizer.test.ts @@ -337,6 +337,60 @@ describe('evalNormalizer', () => { expect(result[0]).to.have.property('generated_output', 'test'); }); + it('should preserve state field on agent.create_session', () => { + const steps: EvalStep[] = [ + { + type: 'agent.create_session', + id: 's1', + planner_id: 'p1', + state: { + state: { + plannerType: 'Atlas', + sessionContext: {}, + conversationHistory: [], + lastExecution: {}, + }, + }, + }, + ]; + const result = stripUnrecognizedFields(steps); + expect(result[0]).to.have.property('state'); + expect((result[0] as Record).state).to.deep.equal(steps[0].state); + }); + + it('should preserve setupSessionContext on agent.create_session', () => { + const steps: EvalStep[] = [ + { + type: 'agent.create_session', + id: 's1', + planner_id: 'p1', + setupSessionContext: { tags: { botId: '0Xx123', botVersionId: '0X9456' } }, + }, + ]; + const result = stripUnrecognizedFields(steps); + expect(result[0]).to.have.property('setupSessionContext'); + expect((result[0] as Record).setupSessionContext).to.deep.equal({ + tags: { botId: '0Xx123', botVersionId: '0X9456' }, + }); + }); + + it('should preserve context_variables on agent.create_session', () => { + const steps: EvalStep[] = [ + { + type: 'agent.create_session', + id: 's1', + use_agent_api: true, + context_variables: { RoutableId: '0Mw123', CaseId: '500456' }, + }, + ]; + const result = stripUnrecognizedFields(steps); + expect(result[0]).to.have.property('context_variables'); + expect((result[0] as Record).context_variables).to.deep.equal({ + RoutableId: '0Mw123', + CaseId: '500456', + }); + }); + it('should not strip fields from unknown types', () => { const steps: EvalStep[] = [{ type: 'evaluator.future_type', id: 'e1', custom_field: 'keep' }]; const result = stripUnrecognizedFields(steps); diff --git a/test/yamlSpecTranslator.test.ts b/test/yamlSpecTranslator.test.ts index 8b2776f0..612f22ef 100644 --- a/test/yamlSpecTranslator.test.ts +++ b/test/yamlSpecTranslator.test.ts @@ -622,6 +622,51 @@ testCases: [] expect(result.id).to.equal('My_Spec_case_2'); }); + it('injects context_variables when contextVariables present', () => { + const tc: TestCase = { + utterance: 'Help with my camera', + expectedTopic: 'Product_Help', + expectedActions: undefined, + expectedOutcome: undefined, + contextVariables: [ + { name: 'RoutableId', value: '0Mw123' }, + { name: 'CaseId', value: '500456' }, + ], + }; + const result = translateTestCase(tc, 0); + const cs = result.steps.find((s) => s.type === 'agent.create_session'); + expect(cs).to.have.property('context_variables'); + expect((cs as Record).context_variables).to.deep.equal({ + RoutableId: '0Mw123', + CaseId: '500456', + }); + }); + + it('does not add context_variables when contextVariables absent', () => { + const tc: TestCase = { + utterance: 'Hello', + expectedTopic: undefined, + expectedActions: undefined, + expectedOutcome: undefined, + }; + const result = translateTestCase(tc, 0); + const cs = result.steps.find((s) => s.type === 'agent.create_session'); + expect(cs).to.not.have.property('context_variables'); + }); + + it('does not add context_variables when contextVariables is empty', () => { + const tc: TestCase = { + utterance: 'Hello', + expectedTopic: undefined, + expectedActions: undefined, + expectedOutcome: undefined, + contextVariables: [], + }; + const result = translateTestCase(tc, 0); + const cs = result.steps.find((s) => s.type === 'agent.create_session'); + expect(cs).to.not.have.property('context_variables'); + }); + it('sets use_agent_api true on create_session', () => { const tc: TestCase = { utterance: 'Hello', From bb452518e93c7f63b76a2574f6cbce843ff05d53 Mon Sep 17 00:00:00 2001 From: Jaganpro Date: Mon, 9 Mar 2026 14:42:42 -0400 Subject: [PATCH 2/2] chore: regenerate JSON schemas after RunEvalResult type change --- schemas/agent-test-run__eval.json | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/schemas/agent-test-run__eval.json b/schemas/agent-test-run__eval.json index 351919a2..f4094406 100644 --- a/schemas/agent-test-run__eval.json +++ b/schemas/agent-test-run__eval.json @@ -19,9 +19,13 @@ "evaluations": { "type": "array", "items": {} + }, + "outputs": { + "type": "array", + "items": {} } }, - "required": ["id", "status", "evaluations"], + "required": ["id", "status", "evaluations", "outputs"], "additionalProperties": false } },