diff --git a/docs/mcp.md b/docs/mcp.md index e24a348..d90fddc 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -1401,7 +1401,7 @@ Triggers a Provar Automation test run using the currently loaded properties file | --------- | -------- | -------- | ------------------------------------------------------------------------ | | `flags` | string[] | no | Raw CLI flags to forward (e.g. `["--project-path", "/path/to/project"]`) | -**Output** — `{ requestId, exitCode, stdout, stderr[, output_lines_suppressed][, steps][, details.warning] }` +**Output** — `{ requestId, exitCode, stdout, stderr[, output_lines_suppressed][, steps][, details.warning][, warnings] }` The `stdout` field is filtered before returning: Java schema-validator lines (`com.networknt.schema.*`) and stale logger-lock `SEVERE` warnings are stripped. If any lines were suppressed, `output_lines_suppressed` contains the count. @@ -1410,20 +1410,32 @@ After each run, the tool scans the results directory for JUnit XML files and add ```json "steps": [ { "testItemId": "1", "title": "TC-Login-001-LoginAndVerify.testcase", "status": "pass" }, - { "testItemId": "2", "title": "TC-Login-002-ForgotPassword.testcase", "status": "fail", "errorMessage": "TimeoutException: page did not load", "error_category": "TIMEOUT", "retryable": true } + { "testItemId": "2", "title": "TC-Login-002-ForgotPassword.testcase", "status": "fail", "errorMessage": "TimeoutException: page did not load", + "error_category": "TIMEOUT", "retryable": true } ] -``` -Each entry represents one test case. `status` is `"pass"`, `"fail"`, or `"skip"`. If the results directory cannot be located or contains no JUnit XML, `details.warning` explains why and `steps` is absent. +Each entry represents one test case. status is "pass", "fail", or "skip". If the results directory cannot be located or contains no JUnit XML, +details.warning explains why and steps is absent. Failed steps may include two optional classification fields: -- `error_category` — one of `INFRASTRUCTURE`, `ASSERTION`, `LOCATOR`, `TIMEOUT`, `OTHER`, set when the failure text matches a known pattern. -- `retryable` — `true` when `error_category` is `INFRASTRUCTURE` or `TIMEOUT` (transient causes), `false` for `ASSERTION`/`LOCATOR`/`OTHER`. Absent when no pattern matched. +- error_category — one of INFRASTRUCTURE, ASSERTION, LOCATOR, TIMEOUT, OTHER, set when the failure text matches a known pattern. +- retryable — true when error_category is INFRASTRUCTURE or TIMEOUT (transient causes), false for ASSERTION/LOCATOR/OTHER. Absent when no +pattern matched. -**Error codes:** `AUTOMATION_TESTRUN_FAILED`, `SF_NOT_FOUND` +Zero-tests guard (RUN-001): when the sf command exits 0, the results directory was located, and at least one JUnit XML file parsed successfully +but contains zero executed test cases, the response includes a warnings[] array containing a RUN-001 (#warning-codes) message. This is almost +always a typo such as testCase vs testCases (or some other unknown key) in provardx-properties.json — the run silently selected nothing. The +warning is additive and never flips exitCode or sets isError; the failure surface remains driven by the underlying sf exit code. ---- +▎ Why RUN-001 stays silent when no JUnit data is available: if the results directory cannot be located, contains no XML files, or every XML file +▎ fails to parse, the tool genuinely has no data on which to assert "zero tests ran" — the absence of parsed results is just "we don't know +▎ what ran". In those cases the response carries details.warning (explaining why structured step data is missing) and RUN-001 is suppressed to +▎ avoid misdirecting the agent toward a typo when the real issue is a missing/unreadable results dir. + +Error codes: AUTOMATION_TESTRUN_FAILED, SF_NOT_FOUND +Warning codes: RUN-001 (zero tests executed despite success) +``` ### `provar_automation_compile` diff --git a/src/mcp/tools/antTools.ts b/src/mcp/tools/antTools.ts index 92d5354..69a4c1b 100644 --- a/src/mcp/tools/antTools.ts +++ b/src/mcp/tools/antTools.ts @@ -1024,6 +1024,12 @@ export function isStepRetryable(category: JUnitErrorCategory | undefined): boole export interface JUnitParseResult { steps: JUnitStepResult[]; warning?: string; + /** + * True iff at least one JUnit XML file was located AND parsed without throwing. + * Distinguishes "we have data and the test selector matched zero cases" (legit RUN-001 signal) + * from "we have no data because nothing parsed" (insufficient info — must stay silent). + */ + parsedAny: boolean; } function extractFailureText(el: unknown): string | undefined { @@ -1112,7 +1118,7 @@ function findXmlFiles(dir: string): string[] { */ export function parseJUnitResults(resultsDir: string): JUnitParseResult { if (!fs.existsSync(resultsDir)) { - return { steps: [], warning: `Results directory not found: ${resultsDir}` }; + return { steps: [], warning: `Results directory not found: ${resultsDir}`, parsedAny: false }; } const xmlFiles = findXmlFiles(resultsDir); @@ -1120,6 +1126,7 @@ export function parseJUnitResults(resultsDir: string): JUnitParseResult { return { steps: [], warning: 'No JUnit XML files found in results directory — structured step output unavailable.', + parsedAny: false, }; } @@ -1152,12 +1159,17 @@ export function parseJUnitResults(resultsDir: string): JUnitParseResult { return { steps: [], warning: 'JUnit XML files found but could not be parsed — structured step output unavailable.', + parsedAny: false, }; } if (allSteps.length === 0) { + // We did parse at least one file; the file just had zero entries (or none we could + // recognise as steps). This is the legitimate "selector matched nothing" signal that RUN-001 + // is built to catch. return { steps: [], warning: 'JUnit XML found but no test steps could be extracted — files may not be standard JUnit format.', + parsedAny: true, }; } @@ -1165,7 +1177,7 @@ export function parseJUnitResults(resultsDir: string): JUnitParseResult { parseFailures > 0 ? `${parseFailures} JUnit XML file(s) could not be parsed — step data may be incomplete.` : undefined; - return { steps: allSteps, warning }; + return { steps: allSteps, warning, parsedAny: true }; } // ── Registration ────────────────────────────────────────────────────────────── diff --git a/src/mcp/tools/automationTools.ts b/src/mcp/tools/automationTools.ts index 7b42c4c..ed15db9 100644 --- a/src/mcp/tools/automationTools.ts +++ b/src/mcp/tools/automationTools.ts @@ -15,6 +15,7 @@ import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; import type { ServerConfig } from '../server.js'; import { assertPathAllowed, PathPolicyError } from '../security/pathPolicy.js'; +import { WARNING_CODES, formatWarning } from '../utils/warningCodes.js'; import { parseJUnitResults } from './antTools.js'; import { runSfCommand } from './sfSpawn.js'; import { desc } from './descHelper.js'; @@ -226,6 +227,38 @@ function readResultsPathFromSfConfig(config: ServerConfig): string | null { // ── Tool: provar_automation_testrun ─────────────────────────────────────────── +/** + * JUnit introspection for the testrun response. Returns enough structure that + * downstream warning emitters (RUN-001 zero-tests, future JUNIT-001 expected-vs- + * actual mismatch) can read a single object instead of re-parsing. + */ +type JUnitIntrospection = { + steps: ReturnType['steps']; + stepCount: number; + parseWarning: string | undefined; + resultsPathResolved: boolean; + /** + * True iff at least one JUnit XML file was located AND parsed without throwing. + * Gates RUN-001: a `stepCount === 0` only means "zero tests executed" when we know we + * actually have parseable data. With `parsedAny === false` the count is "we don't know", + * which must stay silent (details.warning already covers it). + */ + parsedAny: boolean; +}; + +function introspectJUnit(config: ServerConfig): JUnitIntrospection { + const resultsPath = readResultsPathFromSfConfig(config); + if (!resultsPath) { + return { steps: [], stepCount: 0, parseWarning: undefined, resultsPathResolved: false, parsedAny: false }; + } + const { steps, warning, parsedAny } = parseJUnitResults(resultsPath); + return { steps, stepCount: steps.length, parseWarning: warning, resultsPathResolved: true, parsedAny }; +} + +const ZERO_TESTS_MESSAGE = + 'Test run exited successfully but zero tests were executed. ' + + 'Check the testCase / testCases (note spelling) field in provardx-properties.json.'; + export function registerAutomationTestRun(server: McpServer, config: ServerConfig): void { server.registerTool( 'provar_automation_testrun', @@ -240,11 +273,12 @@ export function registerAutomationTestRun(server: McpServer, config: ServerConfi 'For grid/CI execution via Provar Quality Hub instead of running locally, use provar_qualityhub_testrun.', 'Output buffer: a 50 MB maxBuffer is set so ENOBUFS on verbose Provar runs is now rare.', 'If ENOBUFS still occurs (extremely verbose logging), run `sf provar automation test run --json` directly in the terminal and pipe or tail the output instead of retrying this tool.', + 'Zero-tests guard: if the sf exit code is 0, the results directory was located, and at least one JUnit XML file parsed successfully but contains zero executed tests, a RUN-001 warning is added to `warnings[]` — usually a typo such as `testCase` vs `testCases` in provardx-properties.json. When no JUnit data is available (dir missing or all XML unparseable), `details.warning` is set instead and RUN-001 stays silent.', 'Typical local AI loop: config.load → compile → testrun → inspect results.', 'Each failed step in `steps[]` may include optional error_category (INFRASTRUCTURE|ASSERTION|LOCATOR|TIMEOUT|OTHER)', 'and retryable (boolean) fields when the failure text matches a known pattern — use these to drive automated retry policy.', ].join(' '), - 'Run local Provar tests via sf CLI; requires config_load first.' + 'Run local Provar tests via sf CLI; requires config_load first. Surfaces RUN-001 on zero-tests-executed.' ), inputSchema: { flags: z @@ -276,11 +310,10 @@ export function registerAutomationTestRun(server: McpServer, config: ServerConfi const result = runSfCommand(['provar', 'automation', 'test', 'run', ...flags], sf_path); const { filtered, suppressed } = filterTestRunOutput(result.stdout); - // Attempt to enrich the response with structured step data from JUnit XML - const resultsPath = readResultsPathFromSfConfig(config); - const { steps, warning: junitWarning } = resultsPath - ? parseJUnitResults(resultsPath) - : { steps: [], warning: undefined }; + // Enrich the response with structured step data + warning hooks from JUnit XML. + // Single introspection call keeps the wiring extensible (e.g. future JUNIT-001 + // expected-vs-actual mismatch can read stepCount from the same struct). + const junit = introspectJUnit(config); if (result.exitCode !== 0) { const { filtered: filteredErr, suppressed: suppressedErr } = filterTestRunOutput( @@ -290,11 +323,11 @@ export function registerAutomationTestRun(server: McpServer, config: ServerConfi ...makeError('AUTOMATION_TESTRUN_FAILED', filteredErr, requestId), ...(suppressedErr > 0 ? { output_lines_suppressed: suppressedErr } : {}), }; - if (steps.length > 0) errBody['steps'] = steps; - if (!resultsPath || junitWarning) { + if (junit.steps.length > 0) errBody['steps'] = junit.steps; + if (!junit.resultsPathResolved || junit.parseWarning) { errBody['details'] = { warning: - junitWarning ?? + junit.parseWarning ?? 'Could not locate results directory — step-level output unavailable. Run provar_automation_config_load first.', }; } @@ -308,8 +341,27 @@ export function registerAutomationTestRun(server: McpServer, config: ServerConfi stderr: result.stderr, }; if (suppressed > 0) response['output_lines_suppressed'] = suppressed; - if (steps.length > 0) response['steps'] = steps; - if (junitWarning) response['details'] = { warning: junitWarning }; + if (junit.steps.length > 0) response['steps'] = junit.steps; + if (junit.parseWarning) response['details'] = { warning: junit.parseWarning }; + + // RUN-001: sf reported success but zero tests actually executed. + // Almost always a typo in the testCase / testCases field of provardx-properties.json. + // Only fires when: + // 1. The results dir was located (resultsPathResolved), AND + // 2. At least one JUnit XML file was successfully parsed (parsedAny). + // Without (2) `stepCount === 0` just means "we don't have parseable data" — not + // "zero tests ran" — and the agent would be misdirected toward a typo when the + // real issue is a missing/unparseable results dir. That case is already surfaced + // via `details.warning` from the parse layer. With parsedAny === true and zero + // extracted steps, we know the selector genuinely matched nothing. + if (junit.resultsPathResolved && junit.parsedAny && junit.stepCount === 0) { + const warningStr = formatWarning(WARNING_CODES.RUN_001, ZERO_TESTS_MESSAGE); + // Append rather than overwrite so future warning emitters (e.g. JUNIT-001 mismatch + // in PDX-491) can coexist on the same response without stepping on each other. + const existing = response['warnings'] as string[] | undefined; + response['warnings'] = existing ? existing.concat(warningStr) : [warningStr]; + } + return { content: [{ type: 'text' as const, text: JSON.stringify(response) }], structuredContent: response }; } catch (err) { return handleSpawnError(err, requestId, 'provar_automation_testrun'); diff --git a/test/unit/mcp/antTools.test.ts b/test/unit/mcp/antTools.test.ts index f2fd636..3f02709 100644 --- a/test/unit/mcp/antTools.test.ts +++ b/test/unit/mcp/antTools.test.ts @@ -800,12 +800,14 @@ describe('parseJUnitResults', () => { const result = parseJUnitResults(path.join(junitTmpDir, 'nonexistent')); assert.deepEqual(result.steps, []); assert.ok(result.warning?.includes('not found')); + assert.equal(result.parsedAny, false, 'parsedAny must be false when dir is missing'); }); it('returns warning when directory contains no XML files', () => { const result = parseJUnitResults(junitTmpDir); assert.deepEqual(result.steps, []); assert.ok(result.warning?.includes('No JUnit XML')); + assert.equal(result.parsedAny, false, 'parsedAny must be false when no XML files exist'); }); it('extracts steps from a bare JUnit file', () => { @@ -818,6 +820,7 @@ describe('parseJUnitResults', () => { assert.equal(result.steps[1].status, 'fail'); assert.ok(result.steps[1].errorMessage?.includes('Element not found')); assert.equal(result.warning, undefined); + assert.equal(result.parsedAny, true, 'parsedAny must be true when at least one file parsed'); }); it('extracts steps from a wrapper JUnit file', () => { @@ -836,6 +839,18 @@ describe('parseJUnitResults', () => { const result = parseJUnitResults(junitTmpDir); assert.deepEqual(result.steps, []); assert.ok((result.warning?.length ?? 0) > 0); + // parsedAny must be TRUE here: the file was readable and parsed, it just has zero + // entries. This is the legitimate RUN-001 signal — distinct from "we have + // no data at all". + assert.equal(result.parsedAny, true, 'parsedAny must be true when XML parsed but had no steps'); + }); + + it('returns parsedAny=false when all XML files fail to parse', () => { + fs.writeFileSync(path.join(junitTmpDir, 'broken.xml'), ' { diff --git a/test/unit/mcp/automationTools.test.ts b/test/unit/mcp/automationTools.test.ts index 2a9c3c3..2f63ab9 100644 --- a/test/unit/mcp/automationTools.test.ts +++ b/test/unit/mcp/automationTools.test.ts @@ -20,6 +20,7 @@ import { filterTestRunOutput, setSfResultsPathForTesting, } from '../../../src/mcp/tools/automationTools.js'; +import { WARNING_CODES, formatWarning } from '../../../src/mcp/utils/warningCodes.js'; // ── Minimal mock server ─────────────────────────────────────────────────────── @@ -198,7 +199,10 @@ describe('automationTools', () => { } }); - it('omits steps[] and adds details.warning when results dir has no XML', () => { + it('omits steps[] and adds details.warning when results dir has no XML — RUN-001 stays silent', () => { + // PDX-486 follow-up: when the results dir is resolvable but contains zero XML files, + // we have no data on which to claim "zero tests executed". parsedAny is false, so + // RUN-001 must NOT fire; the missing-data case is surfaced via details.warning only. const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'junit-empty-')); setSfResultsPathForTesting(tmpDir); @@ -210,15 +214,22 @@ describe('automationTools', () => { assert.ok(body.details, 'details should be present'); const details = body.details as Record; assert.ok(typeof details.warning === 'string', 'details.warning should be a string'); + assert.equal(body.warnings, undefined, 'RUN-001 must NOT fire when no XML data is parseable'); } finally { setSfResultsPathForTesting(undefined); fs.rmSync(tmpDir, { recursive: true, force: true }); } }); - it('omits steps[] and adds details.warning when results dir contains malformed XML', () => { + it('omits steps[] and adds details.warning when results dir contains malformed XML — RUN-001 stays silent', () => { + // PDX-486 follow-up: if every XML file in the results dir fails to parse, parsedAny + // is false. RUN-001 must stay silent — the agent should be steered toward "why is + // the JUnit output malformed?" (via details.warning), not toward a properties typo. const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'junit-bad-')); - fs.writeFileSync(path.join(tmpDir, 'results.xml'), '', 'utf-8'); + // fast-xml-parser is permissive about *some* malformed text — this input shape (truncated + // before any matchable close tag) is one that genuinely throws under the strict isArray + // config used in antTools.parseJUnitResults, exercising the parsedAny=false branch. + fs.writeFileSync(path.join(tmpDir, 'results.xml'), ' { const body = parseBody(result); assert.equal(body.steps, undefined, 'steps should be absent when XML is malformed'); assert.ok(body.details, 'details should be present with warning'); + const details = body.details as Record; + assert.ok(typeof details.warning === 'string', 'details.warning should describe the parse failure'); + assert.equal(body.warnings, undefined, 'RUN-001 must NOT fire when XML data is unparseable'); } finally { setSfResultsPathForTesting(undefined); fs.rmSync(tmpDir, { recursive: true, force: true }); } }); + + // ── RUN-001 zero-tests guard (PDX-486) ────────────────────────────────── + + describe('RUN-001 zero-tests-executed warning', () => { + const expectedWarning = formatWarning( + WARNING_CODES.RUN_001, + 'Test run exited successfully but zero tests were executed. ' + + 'Check the testCase / testCases (note spelling) field in provardx-properties.json.' + ); + + it('fires when exit is 0 and JUnit step count is 0 (empty )', () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'junit-zero-')); + // Valid JUnit XML but containing zero elements — the canonical + // signal that the test selector (e.g. a misspelled testCase field) matched nothing. + const emptySuiteXml = ` +`; + fs.writeFileSync(path.join(tmpDir, 'results.xml'), emptySuiteXml, 'utf-8'); + setSfResultsPathForTesting(tmpDir); + + try { + spawnStub.returns(makeSpawnResult('tests done', '', 0)); + const result = server.call('provar_automation_testrun', { flags: [] }); + assert.ok(!isError(result), 'tool should still report success — RUN-001 is additive'); + const body = parseBody(result); + assert.equal(body.exitCode, 0); + assert.ok(Array.isArray(body.warnings), 'warnings[] should be present'); + const warnings = body.warnings as string[]; + assert.equal(warnings.length, 1); + assert.equal(warnings[0], expectedWarning, 'warning string must match formatWarning output exactly'); + // Sanity-check the format envelope without inline strings: + assert.ok(warnings[0].startsWith(`WARNING [${WARNING_CODES.RUN_001}]:`)); + } finally { + setSfResultsPathForTesting(undefined); + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('does NOT fire when exit is 0 and step count >= 1', () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'junit-some-')); + const oneCaseXml = ` + + +`; + fs.writeFileSync(path.join(tmpDir, 'results.xml'), oneCaseXml, 'utf-8'); + setSfResultsPathForTesting(tmpDir); + + try { + spawnStub.returns(makeSpawnResult('tests done', '', 0)); + const result = server.call('provar_automation_testrun', { flags: [] }); + const body = parseBody(result); + assert.equal(body.exitCode, 0); + const steps = body.steps as unknown[] | undefined; + assert.ok(steps && steps.length === 1, 'steps should reflect the one parsed testcase'); + assert.equal(body.warnings, undefined, 'warnings[] must be absent when at least one test ran'); + } finally { + setSfResultsPathForTesting(undefined); + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('does NOT fire when exit is non-zero (genuine failure path)', () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'junit-fail-')); + // Even if zero steps were extracted, a non-zero exit is a real failure, not a typo. + const emptySuiteXml = ` +`; + fs.writeFileSync(path.join(tmpDir, 'results.xml'), emptySuiteXml, 'utf-8'); + setSfResultsPathForTesting(tmpDir); + + try { + spawnStub.returns(makeSpawnResult('', 'compilation error', 1)); + const result = server.call('provar_automation_testrun', { flags: [] }); + assert.ok(isError(result), 'should report error on non-zero exit'); + const body = parseBody(result); + assert.equal(body.error_code, 'AUTOMATION_TESTRUN_FAILED'); + // Failure path uses the error body shape — RUN-001 lives on the success-path response only. + assert.equal(body.warnings, undefined, 'warnings[] must not be set on the failure path'); + } finally { + setSfResultsPathForTesting(undefined); + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('does NOT fire when results dir cannot be located (stay silent — config_load issue, not a typo)', () => { + // Explicitly disable the results path override so introspection returns "not resolved". + // RUN-001 must stay silent here so the agent isn't misdirected toward a typo when the + // real issue is "config_load was never called" (or the results dir is outside allowed paths). + // The existing failure-path details.warning channel covers the config_load case on errors; + // on the success path with no resolvable results dir we simply don't emit RUN-001 — silence + // is correct because we genuinely don't know whether tests ran or not. + setSfResultsPathForTesting(null); + + try { + spawnStub.returns(makeSpawnResult('tests done', '', 0)); + const result = server.call('provar_automation_testrun', { flags: [] }); + const body = parseBody(result); + assert.equal(body.exitCode, 0); + assert.equal(body.warnings, undefined, 'no RUN-001 when results dir is unresolved'); + } finally { + setSfResultsPathForTesting(undefined); + } + }); + }); }); // ── provar_automation_compile ─────────────────────────────────────────────