Merge pull request #48 from salesforcecli/mdonnalley/improve-gen-testset

mdonnalley · web-flow · commit 0161e366bba6 · 2024-12-18T15:07:34.000-07:00
fix: require all expectations
diff --git a/README.md b/README.md
@@ -111,7 +111,7 @@ EXAMPLES
     $ sf agent create --name CustomerSupportAgent --spec ./config/agentSpec.json --target-org my-org
 ```
 
-_See code: [src/commands/agent/create.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/create.ts)_
+_See code: [src/commands/agent/create.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/create.ts)_
 
 ## `sf agent generate definition`
 
@@ -136,7 +136,7 @@ EXAMPLES
   $ sf agent generate definition
 ```
 
-_See code: [src/commands/agent/generate/definition.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/generate/definition.ts)_
+_See code: [src/commands/agent/generate/definition.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/generate/definition.ts)_
 
 ## `sf agent generate spec`
 
@@ -197,7 +197,7 @@ EXAMPLES
     $ sf agent generate spec --output-dir specs --target-org my-org
 ```
 
-_See code: [src/commands/agent/generate/spec.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/generate/spec.ts)_
+_See code: [src/commands/agent/generate/spec.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/generate/spec.ts)_
 
 ## `sf agent generate testset`
 
@@ -220,7 +220,7 @@ EXAMPLES
   $ sf agent generate testset
 ```
 
-_See code: [src/commands/agent/generate/testset.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/generate/testset.ts)_
+_See code: [src/commands/agent/generate/testset.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/generate/testset.ts)_
 
 ## `sf agent preview`
 
@@ -255,7 +255,7 @@ FLAG DESCRIPTIONS
     the API name of the agent? (TBD based on agents library)
 ```
 
-_See code: [src/commands/agent/preview.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/preview.ts)_
+_See code: [src/commands/agent/preview.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/preview.ts)_
 
 ## `sf agent test cancel`
 
@@ -292,7 +292,7 @@ EXAMPLES
     $ sf agent test cancel --job-id 4KBfake0000003F4AQ --target-org my-org
 ```
 
-_See code: [src/commands/agent/test/cancel.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/test/cancel.ts)_
+_See code: [src/commands/agent/test/cancel.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/test/cancel.ts)_
 
 ## `sf agent test results`
 
@@ -348,7 +348,7 @@ FLAG DESCRIPTIONS
     test results aren't written.
 ```
 
-_See code: [src/commands/agent/test/results.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/test/results.ts)_
+_See code: [src/commands/agent/test/results.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/test/results.ts)_
 
 ## `sf agent test resume`
 
@@ -411,7 +411,7 @@ FLAG DESCRIPTIONS
     test results aren't written.
 ```
 
-_See code: [src/commands/agent/test/resume.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/test/resume.ts)_
+_See code: [src/commands/agent/test/resume.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/test/resume.ts)_
 
 ## `sf agent test run`
 
@@ -474,6 +474,6 @@ FLAG DESCRIPTIONS
     test results aren't written.
 ```
 
-_See code: [src/commands/agent/test/run.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/test/run.ts)_
+_See code: [src/commands/agent/test/run.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/test/run.ts)_
 
 <!-- commandsstop -->
diff --git a/package.json b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@salesforce/plugin-agent",
   "description": "Commands to interact with Salesforce agents",
-  "version": "1.7.1",
+  "version": "1.7.2-dev.1",
   "author": "Salesforce",
   "bugs": "https://github.com/forcedotcom/cli/issues",
   "dependencies": {
@@ -11,7 +11,7 @@
     "@inquirer/select": "^4.0.1",
     "@oclif/core": "^4",
     "@oclif/multi-stage-output": "^0.7.12",
-    "@salesforce/agents": "^0.5.1",
+    "@salesforce/agents": "^0.5.2",
     "@salesforce/core": "^8.8.0",
     "@salesforce/kit": "^3.2.1",
     "@salesforce/sf-plugins-core": "^12.1.0",
diff --git a/schemas/agent-test-results.json b/schemas/agent-test-results.json
@@ -56,6 +56,9 @@
         "number": {
           "type": "string"
         },
+        "utterance": {
+          "type": "string"
+        },
         "startTime": {
           "type": "string"
         },
@@ -151,7 +154,7 @@
           }
         }
       },
-      "required": ["status", "number", "startTime", "generatedData", "expectationResults"],
+      "required": ["status", "number", "utterance", "startTime", "generatedData", "expectationResults"],
       "additionalProperties": false
     }
   }
diff --git a/src/commands/agent/generate/testset.ts b/src/commands/agent/generate/testset.ts
@@ -9,19 +9,17 @@ import { mkdir, writeFile } from 'node:fs/promises';
 import { SfCommand } from '@salesforce/sf-plugins-core';
 import { Messages } from '@salesforce/core';
 import input from '@inquirer/input';
-import select from '@inquirer/select';
 import confirm from '@inquirer/confirm';
 import { theme } from '../../../inquirer-theme.js';
 
 Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
 const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.testset');
 
-type ExpectationType = 'topic_sequence_match' | 'action_sequence_match' | 'bot_response_rating';
-
 export type TestSetInputs = {
   utterance: string;
-  expectationType: ExpectationType;
-  expectedValue: string;
+  actionSequenceExpectedValue: string;
+  botRatingExpectedValue: string;
+  topicSequenceExpectedValue: string;
 };
 
 async function promptForTestCase(): Promise<TestSetInputs> {
@@ -31,21 +29,33 @@ async function promptForTestCase(): Promise<TestSetInputs> {
     theme,
   });
 
-  const expectationType = await select<ExpectationType>({
-    message: 'What type of expectation would you like to test for the utterance?',
-    choices: ['topic_sequence_match', 'action_sequence_match', 'bot_response_rating'],
+  const topicSequenceExpectedValue = await input({
+    message: 'What is the expected value for the topic expectation?',
+    validate: (d: string): boolean | string => {
+      if (!d.length) {
+        return 'expected value cannot be empty';
+      }
+      return true;
+    },
     theme,
   });
 
-  const expectedValue = await input({
-    message: 'What is the expected value for the expectation?',
+  const actionSequenceExpectedValue = await input({
+    message: 'What is the expected value for the action expectation?',
     validate: (d: string): boolean | string => {
       if (!d.length) {
         return 'expected value cannot be empty';
       }
+      return true;
+    },
+    theme,
+  });
 
-      if (expectationType === 'action_sequence_match') {
-        return d.split(',').length > 1 || 'expected value must be a comma-separated list of actions';
+  const botRatingExpectedValue = await input({
+    message: 'What is the expected value for the bot rating expectation?',
+    validate: (d: string): boolean | string => {
+      if (!d.length) {
+        return 'expected value cannot be empty';
       }
 
       return true;
@@ -55,31 +65,36 @@ async function promptForTestCase(): Promise<TestSetInputs> {
 
   return {
     utterance,
-    expectationType,
-    expectedValue,
+    actionSequenceExpectedValue,
+    botRatingExpectedValue,
+    topicSequenceExpectedValue,
   };
 }
 
 export function constructTestSetXML(testCases: TestSetInputs[]): string {
   const tab = '  ';
   let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<AiEvaluationTestSet>\n${tab}<subjectType>AGENT</subjectType>\n`;
   testCases.forEach((testCase, i) => {
-    const expectedValue =
-      testCase.expectationType === 'action_sequence_match'
-        ? `[${testCase.expectedValue
-            .split(',')
-            .map((v) => `"${v}"`)
-            .join(',')}]`
-        : testCase.expectedValue;
     xml += `  <testCase>
     <number>${i + 1}</number>
     <inputs>
       <utterance>${testCase.utterance}</utterance>
     </inputs>
     <expectations>
       <expectation>
-        <name>${testCase.expectationType}</name>
-        <expectedValue>${expectedValue}</expectedValue>
+        <name>topic_sequence_match</name>
+        <expectedValue>${testCase.topicSequenceExpectedValue}</expectedValue>
+      </expectation>
+      <expectation>
+        <name>action_sequence_match</name>
+        <expectedValue>${`[${testCase.actionSequenceExpectedValue
+          .split(',')
+          .map((v) => `"${v}"`)
+          .join(',')}]`}</expectedValue>
+      </expectation>
+      <expectation>
+        <name>bot_response_rating</name>
+        <expectedValue>${testCase.botRatingExpectedValue}</expectedValue>
       </expectation>
     </expectations>
   </testCase>\n`;
diff --git a/src/testStages.ts b/src/testStages.ts
@@ -43,7 +43,7 @@ export class TestStages {
         {
           stage: 'Polling for Test Results',
           type: 'dynamic-key-value',
-          label: 'Completed Tests',
+          label: 'Completed Test Cases',
           get: (data): string | undefined =>
             data?.totalTestCases && data?.passingTestCases && data?.failingTestCases
               ? `${data?.passingTestCases + data?.failingTestCases}/${data?.totalTestCases}`
@@ -52,13 +52,13 @@ export class TestStages {
         {
           stage: 'Polling for Test Results',
           type: 'dynamic-key-value',
-          label: 'Passing Tests',
+          label: 'Passing Test Cases',
           get: (data): string | undefined => data?.passingTestCases?.toString(),
         },
         {
           stage: 'Polling for Test Results',
           type: 'dynamic-key-value',
-          label: 'Failing Tests',
+          label: 'Failing Test Cases',
           get: (data): string | undefined => data?.failingTestCases?.toString(),
         },
       ],
diff --git a/test/commands/agent/generate/testset.test.ts b/test/commands/agent/generate/testset.test.ts
@@ -12,18 +12,21 @@ describe('constructTestSetXML', () => {
     const testCases = [
       {
         utterance: 'hello',
-        expectationType: 'topic_sequence_match',
-        expectedValue: 'greeting',
+        actionSequenceExpectedValue: 'foo,bar',
+        botRatingExpectedValue: 'baz',
+        topicSequenceExpectedValue: 'qux',
       },
       {
         utterance: 'goodbye',
-        expectationType: 'action_sequence_match',
-        expectedValue: 'farewell,seeya',
+        actionSequenceExpectedValue: 'foo,bar',
+        botRatingExpectedValue: 'baz',
+        topicSequenceExpectedValue: 'qux',
       },
       {
         utterance: 'how are you',
-        expectationType: 'bot_response_rating',
-        expectedValue: '.5',
+        actionSequenceExpectedValue: 'foo,bar',
+        botRatingExpectedValue: 'baz',
+        topicSequenceExpectedValue: 'qux',
       },
     ] satisfies TestSetInputs[];
 
@@ -40,7 +43,15 @@ describe('constructTestSetXML', () => {
     <expectations>
       <expectation>
         <name>topic_sequence_match</name>
-        <expectedValue>greeting</expectedValue>
+        <expectedValue>qux</expectedValue>
+      </expectation>
+      <expectation>
+        <name>action_sequence_match</name>
+        <expectedValue>["foo","bar"]</expectedValue>
+      </expectation>
+      <expectation>
+        <name>bot_response_rating</name>
+        <expectedValue>baz</expectedValue>
       </expectation>
     </expectations>
   </testCase>
@@ -50,9 +61,17 @@ describe('constructTestSetXML', () => {
       <utterance>goodbye</utterance>
     </inputs>
     <expectations>
+      <expectation>
+        <name>topic_sequence_match</name>
+        <expectedValue>qux</expectedValue>
+      </expectation>
       <expectation>
         <name>action_sequence_match</name>
-        <expectedValue>["farewell","seeya"]</expectedValue>
+        <expectedValue>["foo","bar"]</expectedValue>
+      </expectation>
+      <expectation>
+        <name>bot_response_rating</name>
+        <expectedValue>baz</expectedValue>
       </expectation>
     </expectations>
   </testCase>
@@ -62,9 +81,17 @@ describe('constructTestSetXML', () => {
       <utterance>how are you</utterance>
     </inputs>
     <expectations>
+      <expectation>
+        <name>topic_sequence_match</name>
+        <expectedValue>qux</expectedValue>
+      </expectation>
+      <expectation>
+        <name>action_sequence_match</name>
+        <expectedValue>["foo","bar"]</expectedValue>
+      </expectation>
       <expectation>
         <name>bot_response_rating</name>
-        <expectedValue>.5</expectedValue>
+        <expectedValue>baz</expectedValue>
       </expectation>
     </expectations>
   </testCase>
diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json
diff --git a/yarn.lock b/yarn.lock

Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,9 @@`
`56`	`56`	`"number": {`
`57`	`57`	`"type": "string"`
`58`	`58`	`},`
	`59`	`+ "utterance": {`
	`60`	`+ "type": "string"`
	`61`	`+ },`
`59`	`62`	`"startTime": {`
`60`	`63`	`"type": "string"`
`61`	`64`	`},`
`@@ -151,7 +154,7 @@`
`151`	`154`	`}`
`152`	`155`	`}`
`153`	`156`	`},`
`154`		`- "required": ["status", "number", "startTime", "generatedData", "expectationResults"],`
	`157`	`+ "required": ["status", "number", "utterance", "startTime", "generatedData", "expectationResults"],`
`155`	`158`	`"additionalProperties": false`
`156`	`159`	`}`
`157`	`160`	`}`