From 610c1e93be3e64c89f6f7b91b71ecf9176bacc91 Mon Sep 17 00:00:00 2001
From: Jaganpro <Jag87@outlook.com>
Date: Mon, 9 Mar 2026 14:36:27 -0400
Subject: [PATCH 1/2] feat(run-eval): whitelist state/setupSessionContext,
 translate contextVariables, preserve outputs

- Whitelist `state`, `setupSessionContext`, and `context_variables` in
  evalNormalizer's VALID_AGENT_FIELDS for agent.create_session so the
  normalizer no longer strips fields needed for auth bypass and session
  context injection.

- Translate YAML TestSpec `contextVariables` into `context_variables` on
  the agent.create_session step in yamlSpecTranslator, enabling YAML
  specs to inject context variables without raw JSON payloads.

- Include `outputs[]` array in RunEvalResult's --json output so CI
  pipelines retain agent responses, topic routing, and planner state
  for debugging.
---
 src/commands/agent/test/run-eval.ts |  5 +--
 src/evalNormalizer.ts               | 12 ++++++-
 src/yamlSpecTranslator.ts           | 12 +++++--
 test/evalNormalizer.test.ts         | 54 +++++++++++++++++++++++++++++
 test/yamlSpecTranslator.test.ts     | 45 ++++++++++++++++++++++++
 5 files changed, 123 insertions(+), 5 deletions(-)
diff --git a/src/commands/agent/test/run-eval.ts b/src/commands/agent/test/run-eval.ts
index 0c176f47..f364c434 100644
--- a/src/commands/agent/test/run-eval.ts
+++ b/src/commands/agent/test/run-eval.ts
@@ -26,7 +26,7 @@ Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
 const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.run-eval');
 
 export type RunEvalResult = {
-  tests: Array<{ id: string; status: string; evaluations: unknown[] }>;
+  tests: Array<{ id: string; status: string; evaluations: unknown[]; outputs: unknown[] }>;
   summary: { passed: number; failed: number; scored: number; errors: number };
 };
 
@@ -122,7 +122,7 @@ function buildResultSummary(mergedResponse: EvalApiResponse): {
   testSummaries: RunEvalResult['tests'];
 } {
   const summary = { passed: 0, failed: 0, scored: 0, errors: 0 };
-  const testSummaries: Array<{ id: string; status: string; evaluations: unknown[] }> = [];
+  const testSummaries: Array<{ id: string; status: string; evaluations: unknown[]; outputs: unknown[] }> = [];
 
   for (const testResult of mergedResponse.results ?? []) {
     const tr = testResult as Record<string, unknown>;
@@ -143,6 +143,7 @@ function buildResultSummary(mergedResponse: EvalApiResponse): {
       id: testId,
       status: failed > 0 || testErrors.length > 0 ? 'failed' : 'passed',
       evaluations: evalResults,
+      outputs: (tr.outputs as unknown[]) ?? [],
     });
   }
 
diff --git a/src/evalNormalizer.ts b/src/evalNormalizer.ts
index 7d2a1921..f9901cd1 100644
--- a/src/evalNormalizer.ts
+++ b/src/evalNormalizer.ts
@@ -73,7 +73,17 @@ const ASSERTION_VALID_FIELDS = new Set([
 ]);
 
 const VALID_AGENT_FIELDS: Record<string, Set<string>> = {
-  'agent.create_session': new Set(['type', 'id', 'agent_id', 'agent_version_id', 'use_agent_api', 'planner_id']),
+  'agent.create_session': new Set([
+    'type',
+    'id',
+    'agent_id',
+    'agent_version_id',
+    'use_agent_api',
+    'planner_id',
+    'state',
+    'setupSessionContext',
+    'context_variables',
+  ]),
   'agent.send_message': new Set(['type', 'id', 'session_id', 'utterance']),
   'agent.get_state': new Set(['type', 'id', 'session_id']),
 };
diff --git a/src/yamlSpecTranslator.ts b/src/yamlSpecTranslator.ts
index 6c3fb862..efbbdf84 100644
--- a/src/yamlSpecTranslator.ts
+++ b/src/yamlSpecTranslator.ts
@@ -101,11 +101,19 @@ export function translateTestCase(testCase: TestCase, index: number, specName?:
   const steps: EvalStep[] = [];
 
   // 1. agent.create_session
-  steps.push({
+  const createSessionStep: EvalStep = {
     type: 'agent.create_session',
     id: 'cs',
     use_agent_api: true,
-  });
+  };
+
+  if (testCase.contextVariables && testCase.contextVariables.length > 0) {
+    createSessionStep.context_variables = Object.fromEntries(
+      testCase.contextVariables.map((cv) => [cv.name, cv.value])
+    );
+  }
+
+  steps.push(createSessionStep);
 
   // 2. Conversation history — only user messages become send_message steps
   let historyIdx = 0;
diff --git a/test/evalNormalizer.test.ts b/test/evalNormalizer.test.ts
index 7c15e352..3ddab444 100644
--- a/test/evalNormalizer.test.ts
+++ b/test/evalNormalizer.test.ts
@@ -337,6 +337,60 @@ describe('evalNormalizer', () => {
       expect(result[0]).to.have.property('generated_output', 'test');
     });
 
+    it('should preserve state field on agent.create_session', () => {
+      const steps: EvalStep[] = [
+        {
+          type: 'agent.create_session',
+          id: 's1',
+          planner_id: 'p1',
+          state: {
+            state: {
+              plannerType: 'Atlas',
+              sessionContext: {},
+              conversationHistory: [],
+              lastExecution: {},
+            },
+          },
+        },
+      ];
+      const result = stripUnrecognizedFields(steps);
+      expect(result[0]).to.have.property('state');
+      expect((result[0] as Record<string, unknown>).state).to.deep.equal(steps[0].state);
+    });
+
+    it('should preserve setupSessionContext on agent.create_session', () => {
+      const steps: EvalStep[] = [
+        {
+          type: 'agent.create_session',
+          id: 's1',
+          planner_id: 'p1',
+          setupSessionContext: { tags: { botId: '0Xx123', botVersionId: '0X9456' } },
+        },
+      ];
+      const result = stripUnrecognizedFields(steps);
+      expect(result[0]).to.have.property('setupSessionContext');
+      expect((result[0] as Record<string, unknown>).setupSessionContext).to.deep.equal({
+        tags: { botId: '0Xx123', botVersionId: '0X9456' },
+      });
+    });
+
+    it('should preserve context_variables on agent.create_session', () => {
+      const steps: EvalStep[] = [
+        {
+          type: 'agent.create_session',
+          id: 's1',
+          use_agent_api: true,
+          context_variables: { RoutableId: '0Mw123', CaseId: '500456' },
+        },
+      ];
+      const result = stripUnrecognizedFields(steps);
+      expect(result[0]).to.have.property('context_variables');
+      expect((result[0] as Record<string, unknown>).context_variables).to.deep.equal({
+        RoutableId: '0Mw123',
+        CaseId: '500456',
+      });
+    });
+
     it('should not strip fields from unknown types', () => {
       const steps: EvalStep[] = [{ type: 'evaluator.future_type', id: 'e1', custom_field: 'keep' }];
       const result = stripUnrecognizedFields(steps);
diff --git a/test/yamlSpecTranslator.test.ts b/test/yamlSpecTranslator.test.ts
index 8b2776f0..612f22ef 100644
--- a/test/yamlSpecTranslator.test.ts
+++ b/test/yamlSpecTranslator.test.ts
@@ -622,6 +622,51 @@ testCases: []
       expect(result.id).to.equal('My_Spec_case_2');
     });
 
+    it('injects context_variables when contextVariables present', () => {
+      const tc: TestCase = {
+        utterance: 'Help with my camera',
+        expectedTopic: 'Product_Help',
+        expectedActions: undefined,
+        expectedOutcome: undefined,
+        contextVariables: [
+          { name: 'RoutableId', value: '0Mw123' },
+          { name: 'CaseId', value: '500456' },
+        ],
+      };
+      const result = translateTestCase(tc, 0);
+      const cs = result.steps.find((s) => s.type === 'agent.create_session');
+      expect(cs).to.have.property('context_variables');
+      expect((cs as Record<string, unknown>).context_variables).to.deep.equal({
+        RoutableId: '0Mw123',
+        CaseId: '500456',
+      });
+    });
+
+    it('does not add context_variables when contextVariables absent', () => {
+      const tc: TestCase = {
+        utterance: 'Hello',
+        expectedTopic: undefined,
+        expectedActions: undefined,
+        expectedOutcome: undefined,
+      };
+      const result = translateTestCase(tc, 0);
+      const cs = result.steps.find((s) => s.type === 'agent.create_session');
+      expect(cs).to.not.have.property('context_variables');
+    });
+
+    it('does not add context_variables when contextVariables is empty', () => {
+      const tc: TestCase = {
+        utterance: 'Hello',
+        expectedTopic: undefined,
+        expectedActions: undefined,
+        expectedOutcome: undefined,
+        contextVariables: [],
+      };
+      const result = translateTestCase(tc, 0);
+      const cs = result.steps.find((s) => s.type === 'agent.create_session');
+      expect(cs).to.not.have.property('context_variables');
+    });
+
     it('sets use_agent_api true on create_session', () => {
       const tc: TestCase = {
         utterance: 'Hello',

From bb452518e93c7f63b76a2574f6cbce843ff05d53 Mon Sep 17 00:00:00 2001
From: Jaganpro <Jag87@outlook.com>
Date: Mon, 9 Mar 2026 14:42:42 -0400
Subject: [PATCH 2/2] chore: regenerate JSON schemas after RunEvalResult type
 change

---
 schemas/agent-test-run__eval.json | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/schemas/agent-test-run__eval.json b/schemas/agent-test-run__eval.json
index 351919a2..f4094406 100644
--- a/schemas/agent-test-run__eval.json
+++ b/schemas/agent-test-run__eval.json
@@ -19,9 +19,13 @@
               "evaluations": {
                 "type": "array",
                 "items": {}
+              },
+              "outputs": {
+                "type": "array",
+                "items": {}
               }
             },
-            "required": ["id", "status", "evaluations"],
+            "required": ["id", "status", "evaluations", "outputs"],
             "additionalProperties": false
           }
         },