Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion schemas/agent-test-run__eval.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@
"evaluations": {
"type": "array",
"items": {}
},
"outputs": {
"type": "array",
"items": {}
}
},
"required": ["id", "status", "evaluations"],
"required": ["id", "status", "evaluations", "outputs"],
"additionalProperties": false
}
},
Expand Down
5 changes: 3 additions & 2 deletions src/commands/agent/test/run-eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.run-eval');

export type RunEvalResult = {
tests: Array<{ id: string; status: string; evaluations: unknown[] }>;
tests: Array<{ id: string; status: string; evaluations: unknown[]; outputs: unknown[] }>;
summary: { passed: number; failed: number; scored: number; errors: number };
};

Expand Down Expand Up @@ -122,7 +122,7 @@ function buildResultSummary(mergedResponse: EvalApiResponse): {
testSummaries: RunEvalResult['tests'];
} {
const summary = { passed: 0, failed: 0, scored: 0, errors: 0 };
const testSummaries: Array<{ id: string; status: string; evaluations: unknown[] }> = [];
const testSummaries: Array<{ id: string; status: string; evaluations: unknown[]; outputs: unknown[] }> = [];

for (const testResult of mergedResponse.results ?? []) {
const tr = testResult as Record<string, unknown>;
Expand All @@ -143,6 +143,7 @@ function buildResultSummary(mergedResponse: EvalApiResponse): {
id: testId,
status: failed > 0 || testErrors.length > 0 ? 'failed' : 'passed',
evaluations: evalResults,
outputs: (tr.outputs as unknown[]) ?? [],
});
}

Expand Down
12 changes: 11 additions & 1 deletion src/evalNormalizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,17 @@ const ASSERTION_VALID_FIELDS = new Set([
]);

const VALID_AGENT_FIELDS: Record<string, Set<string>> = {
'agent.create_session': new Set(['type', 'id', 'agent_id', 'agent_version_id', 'use_agent_api', 'planner_id']),
'agent.create_session': new Set([
'type',
'id',
'agent_id',
'agent_version_id',
'use_agent_api',
'planner_id',
'state',
'setupSessionContext',
'context_variables',
]),
'agent.send_message': new Set(['type', 'id', 'session_id', 'utterance']),
'agent.get_state': new Set(['type', 'id', 'session_id']),
};
Expand Down
12 changes: 10 additions & 2 deletions src/yamlSpecTranslator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,19 @@ export function translateTestCase(testCase: TestCase, index: number, specName?:
const steps: EvalStep[] = [];

// 1. agent.create_session
steps.push({
const createSessionStep: EvalStep = {
type: 'agent.create_session',
id: 'cs',
use_agent_api: true,
});
};

if (testCase.contextVariables && testCase.contextVariables.length > 0) {
createSessionStep.context_variables = Object.fromEntries(
testCase.contextVariables.map((cv) => [cv.name, cv.value])
);
}

steps.push(createSessionStep);

// 2. Conversation history — only user messages become send_message steps
let historyIdx = 0;
Expand Down
54 changes: 54 additions & 0 deletions test/evalNormalizer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,60 @@ describe('evalNormalizer', () => {
expect(result[0]).to.have.property('generated_output', 'test');
});

it('should preserve state field on agent.create_session', () => {
const steps: EvalStep[] = [
{
type: 'agent.create_session',
id: 's1',
planner_id: 'p1',
state: {
state: {
plannerType: 'Atlas',
sessionContext: {},
conversationHistory: [],
lastExecution: {},
},
},
},
];
const result = stripUnrecognizedFields(steps);
expect(result[0]).to.have.property('state');
expect((result[0] as Record<string, unknown>).state).to.deep.equal(steps[0].state);
});

it('should preserve setupSessionContext on agent.create_session', () => {
const steps: EvalStep[] = [
{
type: 'agent.create_session',
id: 's1',
planner_id: 'p1',
setupSessionContext: { tags: { botId: '0Xx123', botVersionId: '0X9456' } },
},
];
const result = stripUnrecognizedFields(steps);
expect(result[0]).to.have.property('setupSessionContext');
expect((result[0] as Record<string, unknown>).setupSessionContext).to.deep.equal({
tags: { botId: '0Xx123', botVersionId: '0X9456' },
});
});

it('should preserve context_variables on agent.create_session', () => {
const steps: EvalStep[] = [
{
type: 'agent.create_session',
id: 's1',
use_agent_api: true,
context_variables: { RoutableId: '0Mw123', CaseId: '500456' },
},
];
const result = stripUnrecognizedFields(steps);
expect(result[0]).to.have.property('context_variables');
expect((result[0] as Record<string, unknown>).context_variables).to.deep.equal({
RoutableId: '0Mw123',
CaseId: '500456',
});
});

it('should not strip fields from unknown types', () => {
const steps: EvalStep[] = [{ type: 'evaluator.future_type', id: 'e1', custom_field: 'keep' }];
const result = stripUnrecognizedFields(steps);
Expand Down
45 changes: 45 additions & 0 deletions test/yamlSpecTranslator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,51 @@ testCases: []
expect(result.id).to.equal('My_Spec_case_2');
});

it('injects context_variables when contextVariables present', () => {
const tc: TestCase = {
utterance: 'Help with my camera',
expectedTopic: 'Product_Help',
expectedActions: undefined,
expectedOutcome: undefined,
contextVariables: [
{ name: 'RoutableId', value: '0Mw123' },
{ name: 'CaseId', value: '500456' },
],
};
const result = translateTestCase(tc, 0);
const cs = result.steps.find((s) => s.type === 'agent.create_session');
expect(cs).to.have.property('context_variables');
expect((cs as Record<string, unknown>).context_variables).to.deep.equal({
RoutableId: '0Mw123',
CaseId: '500456',
});
});

it('does not add context_variables when contextVariables absent', () => {
const tc: TestCase = {
utterance: 'Hello',
expectedTopic: undefined,
expectedActions: undefined,
expectedOutcome: undefined,
};
const result = translateTestCase(tc, 0);
const cs = result.steps.find((s) => s.type === 'agent.create_session');
expect(cs).to.not.have.property('context_variables');
});

it('does not add context_variables when contextVariables is empty', () => {
const tc: TestCase = {
utterance: 'Hello',
expectedTopic: undefined,
expectedActions: undefined,
expectedOutcome: undefined,
contextVariables: [],
};
const result = translateTestCase(tc, 0);
const cs = result.steps.find((s) => s.type === 'agent.create_session');
expect(cs).to.not.have.property('context_variables');
});

it('sets use_agent_api true on create_session', () => {
const tc: TestCase = {
utterance: 'Hello',
Expand Down
Loading