From 061f140b470d2c6733e41477eb8e9e5346d3eb57 Mon Sep 17 00:00:00 2001 From: sarthak Date: Tue, 28 Apr 2026 18:43:51 +0530 Subject: [PATCH 1/2] feat: Implement validation alignment for Column Management, Run Prompt, Eval, and Summary APIs - Added validation alignment plan for Column Management APIs, addressing security gaps and backend validation inconsistencies. - Created new MCP tools for Run Prompt APIs, enhancing validation and organization filtering. - Established shared validation utilities for Eval APIs, fixing organization filtering issues and ensuring consistent validation across layers. - Implemented organization filtering and bug fixes in Summary APIs to prevent cross-org data leaks and improve error handling. - Introduced centralized API base URL configuration for frontend integration. --- src/components/Sidebar.astro | 12 +- src/lib/api-navigation.ts | 15 -- src/lib/navigation.ts | 3 - src/lib/redirects.ts | 1 - .../docs/api/run-tests/addevalconfigs.mdx | 61 ++++++-- .../api/run-tests/compareevalsummaries.mdx | 35 ++++- .../docs/api/run-tests/createruntest.mdx | 67 +++++++-- .../docs/api/run-tests/deleteevalconfig.mdx | 11 +- .../api/run-tests/deletetestexecutions.mdx | 12 +- .../docs/api/run-tests/executeruntest.mdx | 12 +- .../docs/api/run-tests/getcallexecutions.mdx | 133 ------------------ .../docs/api/run-tests/getevalsummary.mdx | 8 +- .../api/run-tests/reruntestexecutions.mdx | 14 +- .../run-tests/runnewevalsontestexecution.mdx | 35 +++-- .../docs/api/run-tests/updateevalconfig.mdx | 55 ++++++-- .../api/run-tests/updatetestcomponents.mdx | 69 --------- src/pages/docs/api/scenarios/addcolumns.mdx | 53 +++++-- .../api/scenarios/addemptyrowstodataset.mdx | 61 -------- .../api/scenarios/addscenariorowswithai.mdx | 37 +++-- .../docs/api/scenarios/createscenario.mdx | 126 +++++++++++------ .../docs/api/scenarios/deletescenario.mdx | 12 +- src/pages/docs/api/scenarios/editscenario.mdx | 39 +++-- src/pages/docs/api/scenarios/getscenario.mdx | 42 ++++-- .../docs/api/scenarios/listscenarios.mdx | 94 ++++++++++--- .../docs/api/test-executions/reruncalls.mdx | 14 +- 25 files changed, 551 insertions(+), 470 deletions(-) delete mode 100644 src/pages/docs/api/run-tests/getcallexecutions.mdx delete mode 100644 src/pages/docs/api/run-tests/updatetestcomponents.mdx delete mode 100644 src/pages/docs/api/scenarios/addemptyrowstodataset.mdx diff --git a/src/components/Sidebar.astro b/src/components/Sidebar.astro index 7f30287a..d0e11e5b 100644 --- a/src/components/Sidebar.astro +++ b/src/components/Sidebar.astro @@ -94,18 +94,18 @@ const isApiTab = activeTab?.tab === 'API'; function inferApiMethod(title: string): { method: string; css: string } | null { const t = title.toLowerCase(); + if (/\b(delete|remove)\b/.test(t)) { + return { method: 'DELETE', css: 'api-method-delete' }; + } + if (/\b(update|edit|apply|restore)\b/.test(t)) { + return { method: 'PATCH', css: 'api-method-patch' }; + } if (/\b(list|get|retrieve|health|find|export|progress|analytics|agreement|compare|stats|summary|voices|tts)\b/.test(t)) { return { method: 'GET', css: 'api-method-get' }; } if (/\b(create|add|generate|execute|submit|assign|bulk|complete|skip|release|pause|unpause|check|upload|start|duplicate|fetch|run|rerun|cancel|clone|merge)\b/.test(t)) { return { method: 'POST', css: 'api-method-post' }; } - if (/\b(delete|remove)\b/.test(t)) { - return { method: 'DEL', css: 'api-method-delete' }; - } - if (/\b(update|edit|apply|restore)\b/.test(t)) { - return { method: 'PATCH', css: 'api-method-patch' }; - } return null; } --- diff --git a/src/lib/api-navigation.ts b/src/lib/api-navigation.ts index 59ac2f60..21f2fefb 100644 --- a/src/lib/api-navigation.ts +++ b/src/lib/api-navigation.ts @@ -61,11 +61,6 @@ export const apiNavigation: ApiNavGroup[] = [ "title": "Add columns to a scenario", "href": "/docs/api/scenarios/addcolumns", "method": "POST" - }, - { - "title": "Add empty rows to a scenario", - "href": "/docs/api/scenarios/addemptyrowstodataset", - "method": "POST" } ] }, @@ -187,11 +182,6 @@ export const apiNavigation: ApiNavGroup[] = [ "href": "/docs/api/run-tests/executeruntest", "method": "POST" }, - { - "title": "Update test run components", - "href": "/docs/api/run-tests/updatetestcomponents", - "method": "PATCH" - }, { "title": "Get test executions", "href": "/docs/api/run-tests/gettestexecutions", @@ -202,11 +192,6 @@ export const apiNavigation: ApiNavGroup[] = [ "href": "/docs/api/run-tests/gettestscenarios", "method": "GET" }, - { - "title": "Get call executions for a test run", - "href": "/docs/api/run-tests/getcallexecutions", - "method": "GET" - }, { "title": "Get evaluation summary", "href": "/docs/api/run-tests/getevalsummary", diff --git a/src/lib/navigation.ts b/src/lib/navigation.ts index bbd60ee7..21957c78 100644 --- a/src/lib/navigation.ts +++ b/src/lib/navigation.ts @@ -1015,7 +1015,6 @@ export const tabNavigation: NavTab[] = [ { title: 'Delete Scenario', href: '/docs/api/scenarios/deletescenario' }, { title: 'Add Rows with AI', href: '/docs/api/scenarios/addscenariorowswithai' }, { title: 'Add Columns', href: '/docs/api/scenarios/addcolumns' }, - { title: 'Add Empty Rows', href: '/docs/api/scenarios/addemptyrowstodataset' }, ] }, { @@ -1056,10 +1055,8 @@ export const tabNavigation: NavTab[] = [ { title: 'Get Test Run Details', href: '/docs/api/run-tests/getruntestdetails' }, { title: 'Delete Test Run', href: '/docs/api/run-tests/deleteruntest' }, { title: 'Execute Run Test', href: '/docs/api/run-tests/executeruntest' }, - { title: 'Update Components', href: '/docs/api/run-tests/updatetestcomponents' }, { title: 'Get Test Executions', href: '/docs/api/run-tests/gettestexecutions' }, { title: 'Get Test Scenarios', href: '/docs/api/run-tests/gettestscenarios' }, - { title: 'Get Call Executions', href: '/docs/api/run-tests/getcallexecutions' }, { title: 'Get Eval Summary', href: '/docs/api/run-tests/getevalsummary' }, { title: 'Compare Eval Summaries', href: '/docs/api/run-tests/compareevalsummaries' }, { title: 'Add Eval Configs', href: '/docs/api/run-tests/addevalconfigs' }, diff --git a/src/lib/redirects.ts b/src/lib/redirects.ts index 4bcb505d..1b4aba6a 100644 --- a/src/lib/redirects.ts +++ b/src/lib/redirects.ts @@ -17,7 +17,6 @@ export const redirectMap: Record = { '/api-reference/prompt-workbench/get-prompt-version-by-name': '/docs/api', '/api-reference/run-tests/create-a-new-test-run': '/docs/api/run-tests/createruntest', '/api-reference/run-tests/execute-a-test-run': '/docs/api/run-tests/executeruntest', - '/api-reference/scenarios/add-empty-rows-to-a-scenario': '/docs/api/scenarios/addemptyrowstodataset', '/api-reference/scenarios/add-rows-to-a-scenario-using-ai': '/docs/api/scenarios/addscenariorowswithai', '/api-reference/scenarios/edit-a-scenario': '/docs/api/scenarios/editscenario', '/api-reference/scenarios/generate-or-create-a-scenario': '/docs/api/scenarios/createscenario', diff --git a/src/pages/docs/api/run-tests/addevalconfigs.mdx b/src/pages/docs/api/run-tests/addevalconfigs.mdx index bba830e2..90fed7d2 100644 --- a/src/pages/docs/api/run-tests/addevalconfigs.mdx +++ b/src/pages/docs/api/run-tests/addevalconfigs.mdx @@ -10,10 +10,27 @@ description: "Adds evaluation configurations to a test run." parameters={[ {"name": "run_test_id", "in": "path", "required": true, "description": "UUID of the test run to add evaluation configurations to.", "type": "string"} ]} - requestBody={{"evaluationsConfig": [{"templateId": "your-template-id", "name": "My Eval Config", "config": {}, "mapping": {}, "errorLocalizer": false, "model": "turing_large"}]}} - responseExample={{"message": "Evaluation configs added successfully"}} - responseStatus={200} - responseStatusText="OK" + requestBody={{"evaluations_config": [{"template_id": "your-template-id", "name": "My Eval Config", "config": {}, "mapping": {}, "filters": {}, "error_localizer": false, "model": "turing_large"}]}} + responseExample={{ + message: "Successfully added 1 evaluation config(s) to run test", + created_eval_configs: [ + { + id: "ec1b2c3d-e5f6-7890-abcd-ef1234567890", + name: "My Eval Config", + config: {}, + mapping: {}, + filters: {}, + error_localizer: false, + model: "turing_large", + status: null, + eval_group: null, + template_id: "your-template-uuid" + } + ], + run_test_id: "f7a8b9c0-d1e2-3456-789a-bcdef0123456" + }} + responseStatus={201} + responseStatusText="Created" /> @@ -32,12 +49,12 @@ description: "Adds evaluation configurations to a test run." - + Array of evaluation configuration objects. Each object supports the following fields: - - **`templateId`** (string, UUID, required) -- UUID of the evaluation template to use. + - **`template_id`** (string, UUID, required) -- UUID of the evaluation template to use. - - **`name`** (string, required) -- Name for this evaluation configuration. Must be unique within the test run. + - **`name`** (string, optional) -- Name for this evaluation configuration. Defaults to `Eval-` if omitted. Must be unique within the test run. - **`config`** (object, optional) -- Template-specific configuration parameters. @@ -45,27 +62,47 @@ description: "Adds evaluation configurations to a test run." - **`filters`** (object, optional) -- Filter criteria to restrict which test results are evaluated. - - **`errorLocalizer`** (boolean, optional) -- Enables granular error localization on evaluation failures. Defaults to `false`. + - **`error_localizer`** (boolean, optional) -- Enables granular error localization on evaluation failures. Defaults to `false`. - **`model`** (string, optional) -- Model to use for running this evaluation. - - Confirmation of successful addition. + + Confirmation message indicating how many evaluation configs were added. + + Array of created evaluation configuration objects. Each object contains: `id`, `name`, `config`, `mapping`, `filters`, `error_localizer`, `model`, `status`, `eval_group`, and `template_id`. + + UUID of the parent test run. + Non-fatal issues encountered while processing individual configs. Only present if partial failures occurred. - Invalid or missing fields such as a non-existent `templateId`, duplicate `name`, or malformed `config`/`mapping`. + Validation error. Common causes: empty `evaluations_config`, duplicate `name` within request, name already exists in test run, non-existent `template_id`. + ```json + { + "evaluations_config": ["Duplicate eval name 'My Eval Config' found in the request. Each evaluation config must have a unique name."] + } + ``` + Or for existing name conflict: + ```json + {"error": "An evaluation config with the name 'My Eval Config' already exists in this run test. Please use a different name."} + ``` Missing or invalid `X-Api-Key` or `X-Secret-Key` headers. No test run found with the specified `run_test_id`. + ```json + {"detail": "No RunTest matches the given query."} + ``` - Unexpected server error. Retry later or contact support. + Unexpected server error. + ```json + {"error": "Failed to add evaluation configs: "} + ``` diff --git a/src/pages/docs/api/run-tests/compareevalsummaries.mdx b/src/pages/docs/api/run-tests/compareevalsummaries.mdx index 1c89ebb9..427e600d 100644 --- a/src/pages/docs/api/run-tests/compareevalsummaries.mdx +++ b/src/pages/docs/api/run-tests/compareevalsummaries.mdx @@ -11,7 +11,10 @@ description: "Compares evaluation summaries across multiple test executions." {"name": "run_test_id", "in": "path", "required": true, "description": "UUID of the test run containing the executions to compare.", "type": "string"}, {"name": "execution_ids", "in": "query", "required": false, "description": "JSON-encoded array of test execution UUIDs to compare.", "type": "string"} ]} - responseExample={{"execution-uuid-1": {"evaluations": [{"name": "Tone Check", "average_score": 0.85}]}, "execution-uuid-2": {"evaluations": [{"name": "Tone Check", "average_score": 0.92}]}}} + responseExample={{ + "execution-uuid-1": [{"name": "Tone Check", "average_score": 0.85, "total_runs": 10, "passed": 8, "failed": 2}], + "execution-uuid-2": [{"name": "Tone Check", "average_score": 0.92, "total_runs": 10, "passed": 9, "failed": 1}] + }} responseStatus={200} responseStatusText="OK" /> @@ -33,25 +36,47 @@ description: "Compares evaluation summaries across multiple test executions." - JSON-encoded array of test execution UUIDs to compare. Must be URL-encoded. + JSON-encoded array of test execution UUIDs to compare. Must be URL-encoded. Example: `["uuid1","uuid2"]`. - Dictionary keyed by execution ID, each mapping to its evaluation summary metrics. + + Dictionary keyed by execution UUID. Each value is an array of evaluation summary objects for that execution. + + + Name of the evaluation configuration. + Average score across all evaluated calls. + Total evaluation runs for this config. + Number of passing evaluations. + Number of failing evaluations. + - Missing, malformed, or invalid `execution_ids` parameter. + Missing, malformed, or empty `execution_ids` parameter. + ```json + {"execution_ids": ["execution_ids must be valid JSON"]} + ``` + Or when empty: + ```json + {"execution_ids": ["execution_ids list is required"]} + ``` Missing or invalid `X-Api-Key` or `X-Secret-Key` headers. No test run found with the specified `run_test_id`. + ```json + {"error": "RunTest not found."} + ``` - Unexpected server error. Retry later or contact support. + Unexpected server error. + ```json + {"error": "Unable to fetch eval summary"} + ``` diff --git a/src/pages/docs/api/run-tests/createruntest.mdx b/src/pages/docs/api/run-tests/createruntest.mdx index c8587dc8..f617407c 100644 --- a/src/pages/docs/api/run-tests/createruntest.mdx +++ b/src/pages/docs/api/run-tests/createruntest.mdx @@ -7,8 +7,8 @@ description: "Creates a new test run." method="POST" endpoint="/simulate/run-tests/create/" baseUrl="https://api.futureagi.com" - requestBody={{"name": "your-name", "description": "your-description", "scenarioIds": [], "agentDefinitionId": "your-agentDefinitionId", "agentVersion": "your-agentVersion", "evalConfigIds": [], "evaluationsConfig": [], "datasetRowIds": [], "enableToolEvaluation": true}} - responseExample={{"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "name": "your-name", "description": "your-description", "agent_definition": "your-agentDefinitionId", "scenarios": [], "enable_tool_evaluation": true, "created_at": "2026-04-04T12:00:00Z", "updated_at": "2026-04-04T12:00:00Z"}} + requestBody={{"name": "your-name", "description": "your-description", "scenario_ids": [], "agent_definition_id": "your-agent-definition-id", "eval_config_ids": [], "evaluations_config": [], "dataset_row_ids": [], "enable_tool_evaluation": true}} + responseExample={{"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "name": "your-name", "description": "your-description", "agent_definition": "your-agent-definition-id", "agent_version": null, "agent_definition_detail": null, "source_type": "agent_definition", "source_type_display": "Agent Definition", "scenarios": [], "scenarios_detail": [], "dataset_row_ids": [], "simulator_agent": null, "simulator_agent_detail": null, "simulate_eval_configs": [], "simulate_eval_configs_detail": [], "evals_detail": [], "organization": "org-uuid", "enable_tool_evaluation": true, "created_at": "2026-04-04T12:00:00Z", "updated_at": "2026-04-04T12:00:00Z", "last_run_at": null, "deleted": false, "deleted_at": null}} responseStatus={201} responseStatusText="Created" /> @@ -29,27 +29,27 @@ description: "Creates a new test run." Optional free-text description of the test run. - + Array of scenario UUIDs to execute against. Must contain at least one valid scenario ID. - + UUID of the agent definition to evaluate. - - UUID of a specific agent version to test against. Defaults to the currently active version if omitted. - - + Array of existing evaluation configuration UUIDs to associate with this test run. - + Array of inline evaluation configuration objects to create and associate. Each object must include `template_id`, `name`, `config`, and `mapping`. - + Array of dataset row UUIDs to restrict execution to specific data entries. If omitted, all rows are included. - + When `true`, evaluates correctness of tool calls made by the agent. Defaults to `false`. + + Optional UUID of a session to replay. When provided, execution replays the specified session. + @@ -65,9 +65,45 @@ description: "Creates a new test run." UUID of the associated agent definition. + + UUID of the specific agent version, or `null` if using the active version. + + + Detailed agent definition object, or `null`. + + + Source type identifier (e.g. `"agent_definition"`). + + + Human-readable source type label (e.g. `"Agent Definition"`). + Array of linked scenario UUIDs. + + Array of detailed scenario objects. + + + Array of dataset row UUIDs associated with this test run. + + + UUID of the simulator agent, or `null`. + + + Detailed simulator agent object, or `null`. + + + Array of evaluation configuration UUIDs. + + + Array of detailed evaluation configuration objects. + + + Array of detailed evaluation result objects. + + + UUID of the owning organization. + Whether tool evaluation is enabled. @@ -77,6 +113,15 @@ description: "Creates a new test run." ISO 8601 last-modified timestamp. + + ISO 8601 timestamp of the most recent execution, or `null`. + + + Whether the test run has been soft-deleted. + + + ISO 8601 timestamp of soft-deletion, or `null`. + diff --git a/src/pages/docs/api/run-tests/deleteevalconfig.mdx b/src/pages/docs/api/run-tests/deleteevalconfig.mdx index ed13dcfe..2c58cf38 100644 --- a/src/pages/docs/api/run-tests/deleteevalconfig.mdx +++ b/src/pages/docs/api/run-tests/deleteevalconfig.mdx @@ -41,14 +41,23 @@ description: "Deletes an evaluation configuration from a test run." Cannot delete the last remaining evaluation configuration in the test run. + ```json + {"error": "Cannot delete the last evaluation config. At least one evaluation config must remain."} + ``` Missing or invalid `X-Api-Key` or `X-Secret-Key` headers. Test run or evaluation configuration not found. + ```json + {"error": "Evaluation config not found"} + ``` - Unexpected server error. Retry later or contact support. + Unexpected server error. + ```json + {"error": "Failed to delete evaluation config: "} + ``` diff --git a/src/pages/docs/api/run-tests/deletetestexecutions.mdx b/src/pages/docs/api/run-tests/deletetestexecutions.mdx index 6855a0c9..195e47a6 100644 --- a/src/pages/docs/api/run-tests/deletetestexecutions.mdx +++ b/src/pages/docs/api/run-tests/deletetestexecutions.mdx @@ -10,7 +10,7 @@ description: "Bulk-deletes test executions from a test run." parameters={[ {"name": "run_test_id", "in": "path", "required": true, "description": "UUID of the test run from which to delete test executions.", "type": "string"} ]} - requestBody={{"testExecutionIds": ["execution-uuid-1", "execution-uuid-2"], "selectAll": false}} + requestBody={{"test_execution_ids": ["execution-uuid-1", "execution-uuid-2"], "select_all": false}} responseExample={{"message": "Successfully deleted 2 test execution(s).", "runTestId": "run-test-uuid", "deletedCount": 2, "deletedIds": ["execution-uuid-1", "execution-uuid-2"]}} responseStatus={200} responseStatusText="OK" @@ -32,11 +32,11 @@ description: "Bulk-deletes test executions from a test run." - - Array of test execution UUIDs to delete. Required when `selectAll` is `false`. Executions in `RUNNING`, `PENDING`, or `CANCELLING` status cannot be deleted. + + Array of test execution UUIDs to delete. Required when `select_all` is `false`. Executions in `RUNNING`, `PENDING`, or `CANCELLING` status cannot be deleted. - - When `true`, deletes all eligible executions, ignoring `testExecutionIds`. Defaults to `false`. + + When `true`, deletes all eligible executions, ignoring `test_execution_ids`. Defaults to `false`. @@ -49,7 +49,7 @@ description: "Bulk-deletes test executions from a test run." - Invalid request, empty `testExecutionIds`, or targeted executions are still running/pending/cancelling. + Invalid request, empty `test_execution_ids`, or targeted executions are still running/pending/cancelling. Missing or invalid `X-Api-Key` or `X-Secret-Key` headers. diff --git a/src/pages/docs/api/run-tests/executeruntest.mdx b/src/pages/docs/api/run-tests/executeruntest.mdx index 3a3b5042..0a0544e5 100644 --- a/src/pages/docs/api/run-tests/executeruntest.mdx +++ b/src/pages/docs/api/run-tests/executeruntest.mdx @@ -10,7 +10,7 @@ description: "Executes a test run." parameters={[ {"name": "run_test_id", "in": "path", "required": true, "description": "UUID of the test run to execute.", "type": "string"} ]} - requestBody={{"selectAll": true, "scenarioIds": [], "simulatorId": "your-simulatorId"}} + requestBody={{"select_all": true, "scenario_ids": [], "simulator_id": "your-simulator-id"}} responseExample={{"message": "Test execution started successfully", "execution_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "run_test_id": "b2c3d4e5-f6a7-8901-bcde-f12345678901", "status": "PENDING", "total_scenarios": 5, "total_calls": 25, "scenario_ids": []}} responseStatus={200} responseStatusText="OK" @@ -32,13 +32,13 @@ description: "Executes a test run." - - When `true`, all scenarios run except those in `scenarioIds` (exclusion mode). When `false`, only those in `scenarioIds` run (inclusion mode). + + When `true`, all scenarios run except those in `scenario_ids` (exclusion mode). When `false`, only those in `scenario_ids` run (inclusion mode). - - Array of scenario UUIDs to include or exclude based on `selectAll`. If empty, all scenarios run. + + Array of scenario UUIDs to include or exclude based on `select_all`. If empty, all scenarios run. - + UUID of a simulator agent to use. Defaults to the test run or organization default if omitted. diff --git a/src/pages/docs/api/run-tests/getcallexecutions.mdx b/src/pages/docs/api/run-tests/getcallexecutions.mdx deleted file mode 100644 index a489c106..00000000 --- a/src/pages/docs/api/run-tests/getcallexecutions.mdx +++ /dev/null @@ -1,133 +0,0 @@ ---- -title: "Get call executions for a test run" -description: "Lists call executions for a test run." ---- - - - - - - Your Future AGI API key used to authenticate requests. You can find and manage your API keys in the [Dashboard](https://app.futureagi.com) under Settings. - - - Your Future AGI secret key, used alongside the API key for request authentication. This is generated when you create an API key in the [Dashboard](https://app.futureagi.com). - - - - - - UUID of the test run whose call executions to retrieve. - - - - - - Case-insensitive partial match on phone number or scenario name. - - - Filter by call status. - - - Number of records per page. Defaults to `10`. Must be a positive integer. - - - Page number to retrieve. Defaults to `1`. - - - - - - Total matching call executions across all pages. - - - URL to the next page, or `null` if on the last page. - - - URL to the previous page, or `null` if on the first page. - - - Array of call execution objects for the current page. - - - - UUID of the call execution. - - - Session identifier for external correlation, or `null`. - - - Current call status. - - - Call duration in seconds, or `null` if not completed. - - - ISO 8601 call start timestamp, or `null` if not started. - - - Ordered conversation transcript. Empty array if unavailable. - - - Object with `id` and `name` of the source scenario, or `null` if deleted. - - - Aggregate evaluation score (0-100), or `null` if not yet computed. - - - Raw outputs from each evaluation metric, or `null` if not processed. - - - Computed evaluation metric values by metric name, or `null`. - - - Simulated customer name from scenario data, or `null`. - - - Auto-generated call summary, or `null`. - - - Reason the call ended (e.g., `"completed"`, `"timeout"`, `"error"`), or `null`. - - - Average agent response latency in seconds, or `null`. - - - UUID of the source scenario, or `null`. - - - Whether this is a preserved historical snapshot. - - - Type of rerun, or `null` for original executions. - - - ISO 8601 creation timestamp. - - - - - - - Missing or invalid `X-Api-Key` or `X-Secret-Key` headers. - - - No test run found with the specified `run_test_id`. - - - Unexpected server error. Contact support if it persists. - - diff --git a/src/pages/docs/api/run-tests/getevalsummary.mdx b/src/pages/docs/api/run-tests/getevalsummary.mdx index eec23def..6179ee3b 100644 --- a/src/pages/docs/api/run-tests/getevalsummary.mdx +++ b/src/pages/docs/api/run-tests/getevalsummary.mdx @@ -66,8 +66,14 @@ description: "Returns the evaluation summary for a test run." No test run found with the specified `run_test_id`. + ```json + {"error": "RunTest not found."} + ``` - Unexpected server error. Contact support if it persists. + Unexpected server error. + ```json + {"error": "Unable to fetch eval summary"} + ``` diff --git a/src/pages/docs/api/run-tests/reruntestexecutions.mdx b/src/pages/docs/api/run-tests/reruntestexecutions.mdx index 5498de5e..f8099b89 100644 --- a/src/pages/docs/api/run-tests/reruntestexecutions.mdx +++ b/src/pages/docs/api/run-tests/reruntestexecutions.mdx @@ -10,7 +10,7 @@ description: "Reruns test executions." parameters={[ {"name": "run_test_id", "in": "path", "required": true, "description": "UUID of the test run containing the executions to rerun.", "type": "string"} ]} - requestBody={{"rerunType": "eval_only", "testExecutionIds": ["execution-uuid-1"], "selectAll": false}} + requestBody={{"rerun_type": "eval_only", "test_execution_ids": ["execution-uuid-1"], "select_all": false}} responseExample={{"message": "Rerun initiated successfully"}} responseStatus={200} responseStatusText="OK" @@ -32,14 +32,14 @@ description: "Reruns test executions." - + Type of rerun. `eval_only` re-runs evaluations on existing call data. `call_and_eval` re-executes calls and evaluations from scratch. - - Array of test execution UUIDs to rerun. Required when `selectAll` is `false`. + + Array of test execution UUIDs to rerun. Required when `select_all` is `false`. - - When `true`, reruns all executions, ignoring `testExecutionIds`. Defaults to `false`. + + When `true`, reruns all executions, ignoring `test_execution_ids`. Defaults to `false`. @@ -49,7 +49,7 @@ description: "Reruns test executions." - Invalid or missing `rerunType`, or no executions specified. + Invalid or missing `rerun_type`, or no executions specified. Missing or invalid `X-Api-Key` or `X-Secret-Key` headers. diff --git a/src/pages/docs/api/run-tests/runnewevalsontestexecution.mdx b/src/pages/docs/api/run-tests/runnewevalsontestexecution.mdx index 72d0396d..5ef9624b 100644 --- a/src/pages/docs/api/run-tests/runnewevalsontestexecution.mdx +++ b/src/pages/docs/api/run-tests/runnewevalsontestexecution.mdx @@ -10,7 +10,7 @@ description: "Runs new evaluations on completed test executions." parameters={[ {"name": "run_test_id", "in": "path", "required": true, "description": "UUID of the test run containing the executions to evaluate.", "type": "string"} ]} - requestBody={{"testExecutionIds": ["execution-uuid-1"], "selectAll": false, "evalConfigIds": ["eval-config-uuid-1"], "enableToolEvaluation": false}} + requestBody={{"test_execution_ids": ["execution-uuid-1"], "select_all": false, "eval_config_ids": ["eval-config-uuid-1"], "enable_tool_evaluation": false}} responseExample={{"message": "Evaluations started successfully", "run_test_id": "run-test-uuid", "call_execution_count": 5}} responseStatus={200} responseStatusText="OK" @@ -32,16 +32,16 @@ description: "Runs new evaluations on completed test executions." - - Array of test execution UUIDs to evaluate. Required when `selectAll` is `false`. Only `COMPLETED` executions are eligible. + + Array of test execution UUIDs to evaluate. Required when `select_all` is `false`. Only `COMPLETED` executions are eligible. - - When `true`, evaluates all completed executions, ignoring `testExecutionIds`. Defaults to `false`. + + When `true`, evaluates all completed executions, ignoring `test_execution_ids`. Defaults to `false`. - + Array of evaluation configuration UUIDs to run on the selected executions. - + When `true`, also evaluates tool usage by the agent. Defaults to `false`. @@ -54,15 +54,32 @@ description: "Runs new evaluations on completed test executions." - Missing or empty `evalConfigIds`, no executions specified, or no completed executions found. + Validation error. Common causes: missing `eval_config_ids`, neither `select_all` nor `test_execution_ids` provided, no completed executions found. + ```json + {"error": "Either 'select_all' must be True or 'test_execution_ids' must be provided"} + ``` + Or when no completed executions exist: + ```json + {"error": "No test executions found to run evaluations on."} + ``` + Or when executions are not completed: + ```json + {"error": "Only test executions with COMPLETED status can have new evaluations run on them."} + ``` Missing or invalid `X-Api-Key` or `X-Secret-Key` headers. No test run found with the specified `run_test_id`. + ```json + {"detail": "No RunTest matches the given query."} + ``` - Unexpected server error. Retry later or contact support. + Unexpected server error. + ```json + {"error": "Failed to run evaluations: "} + ``` diff --git a/src/pages/docs/api/run-tests/updateevalconfig.mdx b/src/pages/docs/api/run-tests/updateevalconfig.mdx index f357947c..18415384 100644 --- a/src/pages/docs/api/run-tests/updateevalconfig.mdx +++ b/src/pages/docs/api/run-tests/updateevalconfig.mdx @@ -1,18 +1,29 @@ --- title: "Update evaluation configuration" -description: "Updates an evaluation configuration for a test run." +description: "Updates an evaluation configuration for a test run, optionally triggering a rerun." --- @@ -45,38 +56,58 @@ description: "Updates an evaluation configuration for a test run." Model to use for evaluations. - + Enable granular error localization in evaluation results. - - UUID of a knowledge base to use for grounding. + + UUID of a knowledge base to use for grounding. Pass `null` to clear. - Updated name for the evaluation configuration. + Updated name for the evaluation configuration. Cannot be blank. - When `true`, triggers an immediate rerun after updating. Defaults to `false`. + When `true`, triggers an immediate rerun after updating. Defaults to `false`. Requires `test_execution_id` when set to `true`. - + UUID of the test execution to rerun against. Required when `run` is `true`. - Confirmation message. + Confirmation of successful update. + UUID of the updated evaluation config. + UUID of the parent test run. + UUID of the test execution that was rerun. Only present when `run=true`. + Number of call executions queued for re-evaluation. Only present when `run=true`. + Additional context about parallel task spawning. Only present when `run=true`. - Invalid data or missing `testExecutionId` when `run` is `true`. + Validation error. The response includes a `details` object with per-field errors. + ```json + { + "test_execution_id": ["test_execution_id is required when run is true"] + } + ``` + Or when the test execution has an incompatible status: + ```json + {"error": "Only test executions with COMPLETED, CANCELLED, or FAILED status can have evaluations rerun"} + ``` Invalid or missing API credentials. Test run or evaluation configuration not found. + ```json + {"detail": "No RunTest matches the given query."} + ``` Unexpected server error. + ```json + {"error": "Failed to update evaluation config: "} + ``` diff --git a/src/pages/docs/api/run-tests/updatetestcomponents.mdx b/src/pages/docs/api/run-tests/updatetestcomponents.mdx deleted file mode 100644 index faa7b26a..00000000 --- a/src/pages/docs/api/run-tests/updatetestcomponents.mdx +++ /dev/null @@ -1,69 +0,0 @@ ---- -title: "Update test run components" -description: "Updates components of a test run." ---- - - - - - - Your Future AGI API key used to authenticate requests. You can find and manage your API keys in the [Dashboard](https://app.futureagi.com) under Settings. - - - Your Future AGI secret key, used alongside the API key for request authentication. This is generated when you create an API key in the [Dashboard](https://app.futureagi.com). - - - - - - The test run ID. - - - - - - UUID of the new agent definition. - - - UUID of a specific agent version. Defaults to the active version if omitted. - - - UUID of the simulator agent. - - - Array of scenario UUIDs. Replaces the entire set. - - - Enable tool call evaluation. Requires `api_key` and `assistant_id` on the agent. Defaults to `false`. - - - - - Updated test run object. - - - - - Invalid data or missing prerequisites for tool evaluation. - - - Invalid or missing API credentials. - - - Test run not found. - - - Unexpected server error. - - diff --git a/src/pages/docs/api/scenarios/addcolumns.mdx b/src/pages/docs/api/scenarios/addcolumns.mdx index 4a573518..ab2267e8 100644 --- a/src/pages/docs/api/scenarios/addcolumns.mdx +++ b/src/pages/docs/api/scenarios/addcolumns.mdx @@ -1,6 +1,6 @@ --- title: "Add Columns to Scenario" -description: "Adds new AI-generated columns to a scenario's dataset." +description: "Adds new AI-generated columns to a scenario's dataset. Returns 202 Accepted and runs asynchronously." --- - The scenario ID. The dataset must have at least one row. + The scenario ID. The scenario must have an associated dataset with at least one row. - Column definitions to add (1--10 per request). Names must be unique in the dataset. Each column: `name` (string, max 50 chars), `dataType` (`text`, `boolean`, `integer`, `float`, `json`, `array`, `image`, `images`, `datetime`, `audio`, `document`, `others`, `persona`), `description` (string, max 200 chars, guides AI generation). + Column definitions to add. Min 1, max 10 per request. Column names must be unique within the request and must not already exist in the dataset. + + Each column object: + + + Column name. Max 50 characters. Cannot be blank or whitespace-only. Must be unique within the request and not already present in the dataset. + + + Column data type. One of: `text`, `boolean`, `integer`, `float`, `json`, `array`, `image`, `images`, `datetime`, `audio`, `document`, `others`, `persona`. + + + Column description. Max 200 characters. Guides the AI when generating values. + + Confirmation that column generation has started. UUID of the scenario. UUID of the underlying dataset. - Names of the columns being generated. + Names of the columns being generated. - Invalid columns: missing fields, duplicates, exceeds 10-column limit, or dataset has no rows. + Invalid request or dataset state. The response includes an `error` message and (for field errors) a `details` object. + ```json + { + "columns": "Column 'expected_outcome' already exists in the dataset." + } + ``` + Or: + ```json + { + "columns": "Duplicate column name(s): difficulty_level" + } + ``` + Common causes: + - No associated dataset + - Dataset has no rows + - Column name already exists in the dataset + - Duplicate column names within the request + - More than 10 columns submitted + - Invalid `data_type` value Invalid or missing API credentials. - Scenario not found. + Scenario not found or does not belong to your organization. + ```json + {"error": "Scenario not found."} + ``` Unexpected server error. + ```json + {"error": "Failed to add columns: "} + ``` diff --git a/src/pages/docs/api/scenarios/addemptyrowstodataset.mdx b/src/pages/docs/api/scenarios/addemptyrowstodataset.mdx deleted file mode 100644 index 7898a134..00000000 --- a/src/pages/docs/api/scenarios/addemptyrowstodataset.mdx +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: "Add Empty Rows to Scenario" -description: "Adds empty rows to a scenario's dataset." ---- - - - - - - Your Future AGI API key used to authenticate requests. You can find and manage your API keys in the [Dashboard](https://app.futureagi.com) under Settings. - - - Your Future AGI secret key, used alongside the API key for request authentication. This is generated when you create an API key in the [Dashboard](https://app.futureagi.com). - - - - - - The dataset ID. - - - - - - Number of empty rows to add. Must be a positive integer. - - - - - `true` on success. - Confirmation message. - - - - - Invalid `num_rows` value or dataset not found. - - - Invalid or missing API credentials. - - - Dataset not found. - - - Unexpected server error. - - diff --git a/src/pages/docs/api/scenarios/addscenariorowswithai.mdx b/src/pages/docs/api/scenarios/addscenariorowswithai.mdx index 1369b86f..6fcb8466 100644 --- a/src/pages/docs/api/scenarios/addscenariorowswithai.mdx +++ b/src/pages/docs/api/scenarios/addscenariorowswithai.mdx @@ -1,6 +1,6 @@ --- title: "Add Rows to Scenario with AI" -description: "Generates and adds new rows to a scenario's dataset using AI." +description: "Generates and adds new rows to a scenario's dataset using AI. Returns 202 Accepted and runs asynchronously." --- - The scenario ID. Must have an associated dataset. + The scenario ID. The scenario must have an associated dataset. - - Number of rows to generate. Range: 10--100. + + Number of rows to generate. Range: 10–20000. @@ -51,20 +51,39 @@ description: "Generates and adds new rows to a scenario's dataset using AI." Confirmation that row generation has started. UUID of the scenario. UUID of the underlying dataset. - Number of rows being generated. + Number of rows being generated. - No associated dataset or `numRows` outside 10--100. + Invalid request or scenario state. The response includes an `error` message. + ```json + {"error": "Scenario does not have an associated dataset."} + ``` + Or for validation failures: + ```json + { + "error": "…", + "details": { + "num_rows": ["Number of rows must be at least 10."] + } + } + ``` + Common causes: no associated dataset, `num_rows` below 10 or above 20000. Invalid or missing API credentials. - Scenario not found. + Scenario not found or does not belong to your organization. + ```json + {"error": "Scenario not found."} + ``` Unexpected server error. + ```json + {"error": "Failed to add rows: "} + ``` diff --git a/src/pages/docs/api/scenarios/createscenario.mdx b/src/pages/docs/api/scenarios/createscenario.mdx index e7485fe7..8328b531 100644 --- a/src/pages/docs/api/scenarios/createscenario.mdx +++ b/src/pages/docs/api/scenarios/createscenario.mdx @@ -1,6 +1,6 @@ --- title: "Create Scenario" -description: "Creates a new scenario from a dataset, script, or conversation graph." +description: "Creates a new scenario from a dataset, script, or conversation graph. Returns 202 Accepted and enqueues generation asynchronously." --- - Name for the scenario. Max 255 characters. + Name for the scenario. Max 255 characters. Cannot be blank or whitespace-only. + + + + Scenario type: `"dataset"` (default), `"script"`, or `"graph"`. + + + + Source for AI-powered generation: `"agent_definition"` (default) or `"prompt"`. + When `"prompt"`, both `prompt_template_id` and `prompt_version_id` are required. Optional description of the scenario. - - Scenario type: `"dataset"` (default), `"script"`, or `"graph"`. + + UUID of the source dataset. **Required** when `kind` is `"dataset"`. + + + + URL of the call script file. **Required** when `kind` is `"script"`. - - UUID of the agent definition to test. Required when `generateGraph` is `true` or `sourceType` is `"agent_definition"`. + + UUID of the agent definition to test. Required when `generate_graph` is `true` and `source_type` is `"agent_definition"`. - + UUID of a specific agent version. Defaults to the latest version. - - UUID of the source dataset. Required when `kind` is `"dataset"`. + + Auto-generate a conversation graph from the agent definition or prompt template. Default: `false`. - - URL of the script file. Required when `kind` is `"script"`. + + Conversation graph data. **Required** when `kind` is `"graph"` and `generate_graph` is `false`. - - Number of test case rows to generate. Range: 10--100. Default: 20. + + Number of test case rows to generate. Range: 10–20000. Default: `20`. - + Automatically assign diverse personas to generated test cases. Default: `false`. - - Conversation graph defining the simulated flow. Required when `kind` is `"graph"` and `generateGraph` is `false`. + + List of persona UUIDs to include in the scenario. - - Auto-generate a conversation graph from the agent definition. Requires `agentDefinitionId`. Default: `false`. + + Custom column definitions (max 10). No duplicate names allowed. + Each column must have: + - `name` (string, max 50 chars) + - `data_type` (one of: `text`, `boolean`, `integer`, `float`, `json`, `array`, `image`, `images`, `datetime`, `audio`, `document`, `others`, `persona`) + - `description` (string, max 200 chars) - - List of persona UUIDs to include in the scenario. + + UUID of the prompt template. **Required** when `source_type` is `"prompt"`. - - Source for AI-powered generation: `"agent_definition"` (default) or `"prompt"`. `"prompt"` requires `promptTemplateId` and `promptVersionId`. + + UUID of the prompt version. **Required** when `source_type` is `"prompt"`. Must belong to `prompt_template_id`. - - UUID of the prompt template. Required when `sourceType` is `"prompt"`. + + Additional instruction to steer AI scenario generation. - - UUID of the prompt version. Required when `sourceType` is `"prompt"`. Must belong to `promptTemplateId`. + + Voice provider for simulator agent. Default: `"elevenlabs"`. - - Custom column definitions (max 10). Each column: `name` (string, max 50 chars, unique), `dataType` (`text`, `boolean`, `integer`, `float`, `json`, `array`, `image`, `images`, `datetime`, `audio`, `document`, `others`, `persona`), `description` (string, max 200 chars). + + Voice name for simulator agent. Default: `"marissa"`. - - Additional instruction to steer AI scenario generation. + + LLM model for simulator agent. Default: `"gpt-4"`. - Confirmation that scenario creation has been queued. - Created scenario with `id` and `name`. - Processing status. Initially `"processing"`, then `"completed"`. + Confirmation that scenario creation has been queued (e.g. `"Dataset scenario creation started"`). + Created scenario object (full `ScenarioSchema` — see [List Scenarios](/docs/api/scenarios/listscenarios) for field reference). + Always `"processing"` on initial response. Poll [Get Scenario](/docs/api/scenarios/getscenario) for the final status. - Missing or invalid fields such as blank `name`, missing `datasetId`/`scriptUrl`, or invalid custom columns. + Validation error. The response includes an `error` message and a `details` object with per-field errors. + ```json + { + "error": "Invalid data", + "details": { + "dataset_id": ["dataset_id is required for dataset kind."], + "custom_columns": ["Duplicate column name(s): col_name"] + } + } + ``` + Common causes: + - `name` is blank or whitespace-only + - `dataset_id` missing when `kind="dataset"` + - `script_url` missing when `kind="script"` + - `graph` and `generate_graph` both absent when `kind="graph"` + - `prompt_template_id` or `prompt_version_id` missing when `source_type="prompt"` + - Duplicate column names in `custom_columns` + - Persona column in source dataset has wrong `data_type` Invalid or missing API credentials. Unexpected server error. + ```json + {"error": "Failed to create scenario: "} + ``` diff --git a/src/pages/docs/api/scenarios/deletescenario.mdx b/src/pages/docs/api/scenarios/deletescenario.mdx index da47d376..01b3269d 100644 --- a/src/pages/docs/api/scenarios/deletescenario.mdx +++ b/src/pages/docs/api/scenarios/deletescenario.mdx @@ -1,6 +1,6 @@ --- title: "Delete Scenario" -description: "Soft-deletes a scenario by marking it as deleted." +description: "Soft-deletes a scenario by marking it as deleted. The scenario is not removed from the database." --- - Confirmation of successful deletion. + Confirmation of successful deletion: `"Scenario deleted successfully"`. @@ -39,9 +39,15 @@ description: "Soft-deletes a scenario by marking it as deleted." Invalid or missing API credentials. - Scenario not found. + Scenario not found or does not belong to your organization. + ```json + {"error": "Scenario not found."} + ``` Unexpected server error. + ```json + {"error": "Failed to delete scenario: "} + ``` diff --git a/src/pages/docs/api/scenarios/editscenario.mdx b/src/pages/docs/api/scenarios/editscenario.mdx index 3ee16087..585707f6 100644 --- a/src/pages/docs/api/scenarios/editscenario.mdx +++ b/src/pages/docs/api/scenarios/editscenario.mdx @@ -1,6 +1,6 @@ --- title: "Edit Scenario" -description: "Updates a scenario's name, description, graph, or simulator prompt." +description: "Updates a scenario's name, description, graph data, or simulator agent prompt." --- - Updated scenario name. Max 255 characters, cannot be blank. + Updated scenario name. Max 255 characters. Cannot be blank or whitespace-only. @@ -46,30 +53,44 @@ description: "Updates a scenario's name, description, graph, or simulator prompt - Updated conversation graph structure. + Updated conversation graph structure. Replaces the active `ScenarioGraph.graph_config.graph_data`. If no active graph exists, a new one is created. - Updated simulator agent prompt. Supports `{{persona}}` and `{{situation}}` template variables. + Updated simulator agent prompt text. Replaces the `simulator_agent.prompt` field. - Confirmation of successful update. - Updated scenario object. + Confirmation of successful update: `"Scenario updated successfully"`. + Updated scenario object (full `ScenarioSchema` — see [List Scenarios](/docs/api/scenarios/listscenarios) for field reference). - Invalid request body, such as an empty `name`. + Validation error. The response includes an `error` message and a `details` object with per-field errors. + ```json + { + "error": "…", + "details": { + "name": ["Name cannot be empty or just whitespace."] + } + } + ``` Invalid or missing API credentials. - Scenario not found. + Scenario not found or does not belong to your organization. + ```json + {"error": "Scenario not found."} + ``` Unexpected server error. + ```json + {"error": "Failed to update scenario: "} + ``` diff --git a/src/pages/docs/api/scenarios/getscenario.mdx b/src/pages/docs/api/scenarios/getscenario.mdx index 9f19d10b..4282c4dd 100644 --- a/src/pages/docs/api/scenarios/getscenario.mdx +++ b/src/pages/docs/api/scenarios/getscenario.mdx @@ -14,12 +14,16 @@ description: "Retrieves a scenario by ID, including its graph, prompts, and data description: "Tests billing questions", source: "dataset", scenario_type: "dataset", - dataset_id: "uuid-1", + dataset_id: "d4e5f6a7-b8c9-0123-def0-123456789abc", + organization: "org-uuid", + dataset: "d4e5f6a7-b8c9-0123-def0-123456789abc", agent_type: "voice", - status: "completed", + status: "Completed", graph: {}, - prompts: [{ role: "system", content: "You are simulating a customer..." }], + prompts: [{ role: "system", content: "You are simulating a customer calling about a billing issue." }], dataset_rows: 20, + deleted: false, + deleted_at: null, created_at: "2026-03-15T10:30:00Z", updated_at: "2026-03-15T10:35:00Z" }} @@ -47,17 +51,25 @@ description: "Retrieves a scenario by ID, including its graph, prompts, and data Scenario name. Scenario description. Data source used to create the scenario. - Type: `dataset`, `script`, or `graph`. - UUID of the underlying dataset. `null` if none. - Agent type: `voice` or `text`. - Status: `processing`, `completed`, or `failed`. + Type: `"dataset"`, `"script"`, or `"graph"`. + UUID of the underlying dataset. `null` if none. + Organization UUID. + UUID of the underlying dataset (same as `dataset_id`). `null` if none. + + Raw agent type value: `"voice"` or `"text"`. `null` if undetermined. + + **Note:** This endpoint returns the raw `AgentDefinition.agent_type` value (`"voice"` or `"text"`), which differs from the List Scenarios endpoint (which returns `"inbound"`, `"outbound"`, `"chat"`, or `"prompt"`). + + Current status (e.g. `"Processing"`, `"Completed"`, `"Failed"`). Conversation graph structure. `{}` if none. Simulator agent prompts. - - Prompt role (e.g., `"system"`). - Prompt text. + + Prompt role: `"system"`, `"user"`, or `"assistant"`. + Prompt text content. - Number of test case rows. + Number of test case rows. `0` if no dataset. + Whether the scenario is soft-deleted. + Deletion timestamp. `null` if not deleted. ISO 8601 creation timestamp. ISO 8601 last-modified timestamp. @@ -67,9 +79,15 @@ description: "Retrieves a scenario by ID, including its graph, prompts, and data Invalid or missing API credentials. - Scenario not found. + Scenario not found or does not belong to your organization. + ```json + {"error": "Scenario not found."} + ``` Unexpected server error. + ```json + {"error": "Failed to retrieve scenario: "} + ``` diff --git a/src/pages/docs/api/scenarios/listscenarios.mdx b/src/pages/docs/api/scenarios/listscenarios.mdx index de8966ca..196d1554 100644 --- a/src/pages/docs/api/scenarios/listscenarios.mdx +++ b/src/pages/docs/api/scenarios/listscenarios.mdx @@ -8,17 +8,44 @@ description: "Returns a paginated list of scenarios with optional search and fil endpoint="/simulate/scenarios/" baseUrl="https://api.futureagi.com" parameters={[ - {"name": "search", "in": "query", "required": false, "description": "Filter scenarios by name or source.", "type": "string"}, - {"name": "limit", "in": "query", "required": false, "description": "Number of items per page. Default: 10.", "type": "integer"}, - {"name": "page", "in": "query", "required": false, "description": "Page number. Default: 1.", "type": "integer"}, + {"name": "search", "in": "query", "required": false, "description": "Filter scenarios by name, source, or type.", "type": "string"}, {"name": "agent_definition_id", "in": "query", "required": false, "description": "Filter by agent definition UUID.", "type": "string"}, - {"name": "agent_type", "in": "query", "required": false, "description": "Filter by agent type.", "type": "string"} + {"name": "agent_type", "in": "query", "required": false, "description": "Filter by agent type: \"voice\" or \"text\".", "type": "string"}, + {"name": "page", "in": "query", "required": false, "description": "Page number, starting from 1. Default: 1.", "type": "integer"}, + {"name": "limit", "in": "query", "required": false, "description": "Results per page. Default: 10.", "type": "integer"} ]} responseExample={{ count: 2, next: null, previous: null, - results: [{ id: "uuid-1", name: "billing-inquiry-scenario", description: "Tests billing questions", source: "dataset", scenario_type: "dataset", dataset_rows: 20, status: "completed", created_at: "2026-03-15T10:30:00Z" }] + results: [ + { + id: "f7a8b9c0-d1e2-3456-789a-bcdef0123456", + name: "billing-inquiry-scenario", + description: "Tests billing questions", + source: "dataset", + scenario_type: "dataset", + scenario_type_display: "Dataset", + source_type: "agent_definition", + source_type_display: "Agent Definition", + organization: "org-uuid", + dataset: "dataset-uuid", + dataset_rows: 20, + dataset_column_config: {}, + graph: {}, + agent: null, + prompt_template: null, + prompt_template_detail: null, + prompt_version: null, + prompt_version_detail: null, + agent_type: "outbound", + status: "Completed", + deleted: false, + deleted_at: null, + created_at: "2026-03-15T10:30:00Z", + updated_at: "2026-03-15T10:35:00Z" + } + ] }} responseStatus={200} responseStatusText="OK" @@ -35,13 +62,7 @@ description: "Returns a paginated list of scenarios with optional search and fil - Case-insensitive search against scenario name and source. - - - Results per page. Default: `10`. - - - Page number, starting from `1`. Default: `1`. + Case-insensitive filter against scenario name, source, and type. Filter by agent definition UUID. @@ -49,26 +70,44 @@ description: "Returns a paginated list of scenarios with optional search and fil Filter by agent type: `"voice"` or `"text"`. + + Page number, starting from `1`. Default: `1`. + + + Results per page. Default: `10`. + Total matching scenarios. - URL of the next page, or `null`. - URL of the previous page, or `null`. + URL of the next page, or `null`. + URL of the previous page, or `null`. Array of scenario objects. - + UUID of the scenario. Scenario name. Scenario description. - Data source. - Scenario type. - Display label for scenario type. - Source type classification. - Display label for source type. - Number of test case rows. - Agent type: `voice` or `text`. - Current status. + Data source label. + `"dataset"` | `"script"` | `"graph"`. + Human-readable scenario type label. + `"agent_definition"` | `"prompt"`. + Human-readable source type label. + Organization UUID. + UUID of the underlying dataset. `null` if none. + Number of test case rows. `0` if no dataset. + Map of column ID → `{name, type}`. `[]` if no dataset. + Conversation graph data. `{}` if none. + Simulator agent object. `null` if none. + `"inbound"` | `"outbound"` | `"chat"` | `"prompt"` | `null`. + Prompt template UUID. `null` if none. + Prompt template details. `null` if none. + Prompt version UUID. `null` if none. + Prompt version details. `null` if none. + Processing status (e.g. `"Processing"`, `"Completed"`, `"Failed"`). + Whether the scenario is soft-deleted. + Deletion timestamp. `null` if not deleted. ISO 8601 creation timestamp. + ISO 8601 last-modified timestamp. @@ -76,7 +115,16 @@ description: "Returns a paginated list of scenarios with optional search and fil Invalid or missing API credentials. + + Organization not found for the authenticated user. + ```json + {"error": "Organization not found for the user."} + ``` + Unexpected server error. + ```json + {"error": "Failed to retrieve scenarios: "} + ``` diff --git a/src/pages/docs/api/test-executions/reruncalls.mdx b/src/pages/docs/api/test-executions/reruncalls.mdx index 8c0a8ed2..3e8e0135 100644 --- a/src/pages/docs/api/test-executions/reruncalls.mdx +++ b/src/pages/docs/api/test-executions/reruncalls.mdx @@ -10,7 +10,7 @@ description: "Reruns call executions within a test execution." parameters={[ {"name": "test_execution_id", "in": "path", "required": true, "description": "UUID of the test execution.", "type": "string"} ]} - requestBody={{"rerunType": "eval_only", "callExecutionIds": ["a1b2c3d4-e5f6-7890-abcd-ef1234567890"], "selectAll": false}} + requestBody={{"rerun_type": "eval_only", "call_execution_ids": ["a1b2c3d4-e5f6-7890-abcd-ef1234567890"], "select_all": false}} responseExample={{ message: "Rerun initiated successfully", testExecutionId: "f7a8b9c0-d1e2-3456-789a-bcdef0123456", @@ -41,14 +41,14 @@ description: "Reruns call executions within a test execution." - + The type of rerun to perform. Use `eval_only` to re-evaluate existing call data without re-executing the actual calls -- this is useful when you have updated your evaluation configurations and want to see updated scores without the cost of re-running calls. Use `call_and_eval` to fully re-execute the calls and then evaluate the new results -- this produces fresh conversations and is useful when you have modified the agent under test. Note that text agents only support `eval_only` reruns; attempting `call_and_eval` on a text agent will return a 400 error. - - An array of call execution UUIDs to rerun. Required when `selectAll` is `false` or not provided. Each ID must correspond to a valid call execution within the specified test execution. If a provided ID does not exist or does not belong to the test execution, it will appear in the `failedReruns` array of the response. + + An array of call execution UUIDs to rerun. Required when `select_all` is `false` or not provided. Each ID must correspond to a valid call execution within the specified test execution. If a provided ID does not exist or does not belong to the test execution, it will appear in the `failedReruns` array of the response. - - When set to `true`, all call executions within the test execution will be rerun, and the `callExecutionIds` field is ignored. Defaults to `false`. You must provide either `selectAll: true` or a non-empty `callExecutionIds` array -- the request will fail with a 400 error if neither is specified. + + When set to `true`, all call executions within the test execution will be rerun, and the `call_execution_ids` field is ignored. Defaults to `false`. You must provide either `select_all: true` or a non-empty `call_execution_ids` array -- the request will fail with a 400 error if neither is specified. @@ -81,7 +81,7 @@ description: "Reruns call executions within a test execution." - The rerun request could not be processed. This error occurs when: the `rerunType` field is missing or contains an invalid value; neither `callExecutionIds` nor `selectAll` was provided; the test execution is still in an active state (`pending`, `running`, or `cancelling`) and cannot accept reruns; or a `call_and_eval` rerun was requested for a text agent, which only supports `eval_only` reruns. Check the error message in the response body for specific details on which validation failed. + The rerun request could not be processed. This error occurs when: the `rerun_type` field is missing or contains an invalid value; neither `call_execution_ids` nor `select_all` was provided; the test execution is still in an active state (`pending`, `running`, or `cancelling`) and cannot accept reruns; or a `call_and_eval` rerun was requested for a text agent, which only supports `eval_only` reruns. Check the error message in the response body for specific details on which validation failed. The request could not be authenticated. Verify that both `X-Api-Key` and `X-Secret-Key` headers are present and contain valid, non-expired credentials. Ensure the API key has access to the workspace that owns this test execution. From e7659c924a4082d9bbb4e5e35819baec3b35aaf3 Mon Sep 17 00:00:00 2001 From: Anmol Shikha Date: Tue, 2 Jun 2026 17:29:05 +0530 Subject: [PATCH 2/2] docs(release-notes): add week of 2026-05-28 --- src/pages/docs/release-notes.mdx | 48 ++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/pages/docs/release-notes.mdx b/src/pages/docs/release-notes.mdx index fea9aad8..69f84087 100644 --- a/src/pages/docs/release-notes.mdx +++ b/src/pages/docs/release-notes.mdx @@ -3,6 +3,54 @@ title: "Future AGI Release Notes: Features, Fixes, and Updates" description: "Latest Future AGI release notes covering new features, improvements, and bug fixes across datasets, evaluations, simulation, and observability products." --- +## Week of 2026-05-28 + +
+ +
Features
+ +- **Perplexity Sonar Models Now Available for Evaluations:** You can now use Perplexity's full Sonar model family (sonar, sonar-pro, sonar-reasoning, sonar-reasoning-pro, and sonar-deep-research), including the Agent API for agentic workflows with built-in web search, when running evaluations. Contributed by the Perplexity team. [PR #650](https://github.com/future-agi/future-agi/pull/650). + +- **Use System Prompt as Context in Evals:** You can now leverage your agent's system prompt as context when running system and custom evals, giving you a more complete view of how your prompts affect model outputs. + +- **New API: Access Eval Task Data Across Two Axes:** Two new API endpoints are now live. You can access eval task results via API in two ways: a per-evaluator summary (pass rates, average scores, and choice distributions across the full task) and a per-span breakdown (each evaluator's result for every individual span). Both support optional date range filtering. + +
Bugs/Improvements
+ +- **Eval Results in Observe Now Display Correctly:** In some cases, eval results in Trace Observe were not displaying correctly. This has been resolved and results now appear as expected. + +- **Eval Type Now Saved Correctly When Creating New Evaluations:** When creating an LLM-as-judge or Code evaluation, the selected type was previously always saved as Agent. The correct eval type is now preserved in all cases. + +- **Eval Experience Improvements Across the Platform:** A set of improvements to the eval creation and review experience: linking directly to a specific eval version now opens on that version; long task error messages collapse to a one-line summary with a Show more toggle; results no longer show stale data when switching between dataset, tracing, and simulation panels; variable highlighting in the prompt editor reflects which variables are actually mapped; clicking an execution row now opens that specific run rather than always opening the latest; and the ground-truth embedding status now updates in realtime, with no page refresh needed. + +- **Sessions View from the Users Tab No Longer Times Out:** When navigating to the Sessions view from the Users tab, the page could get stuck on a loading screen or time out. Sessions now loads reliably from that entry point. + +- **Composite Evals No Longer Accept Other Composites as Children:** When building a composite evaluation, the child picker now only lists individual, non-composite evaluators. Previously, composite evals could be selected as children, which produced unexpected results. + +- **Usage and Billing Page Display Accuracy Improved:** Several display issues on the Usage and Billing page have been fixed: AI credits were showing incorrect units, time period labels on usage cards were inaccurate, chart axis labels showed duplicates or mixed formatting, and the pricing tier table now includes column headers and correct unit labels. + +- **Observe Span and Trace List Loads Faster and More Reliably:** Several issues that caused slow or incomplete loading in Trace Observe have been fixed. Projects with larger trace volumes should see improved load times when browsing spans and traces. + +- **Filtering in Trace Observe Now Works Correctly:** Several filter issues have been resolved: multi-select filters such as node type, model, and span name were in some cases not being applied; Trace ID and Span ID fields now accept a single value and continue filtering correctly after a page reload; the icon next to active filter chips now opens the filter panel as expected; and cleared filters no longer reappear when returning to the same page. + +- **Column Order in Observe Grids Now Persists Across Refreshes:** Reordering columns in Trace, Spans, Sessions, and Voice grids would silently reset to the original order on the next auto-refresh. Column order now sticks across refreshes, and the display panel stays in sync with any changes. + +- **Custom Prompt Evaluators Now More Reliable:** In some cases, evaluation criteria that included output format instructions caused the evaluator to return no result. Evaluators now handle this reliably regardless of how the criteria are phrased. + +- **Nested Variable References Now Work in LLM-as-Judge Templates:** Variables that reference nested properties using dot notation were not rendering correctly in custom prompt evaluator templates. They now resolve and render as expected. + +- **Fix with Falcon Now Only Appears on Failing Evals:** The Fix with Falcon option previously appeared on both passing and failing eval rows. It now only appears on evals that are failing, not on every row. + +- **Resuming a Completed Eval Task Now Shows a Clear Message:** In some cases, clicking Resume on a task that had already finished showed a raw error. It now shows a clear message indicating the task may have already completed. + +- **Instruction Validation Errors Now Visible When Creating Evals from Sessions:** When adding an LLM-as-judge evaluation from the Sessions view, validation errors on the instructions field were not shown, causing saves to silently fail. Error messages now appear inline as expected. + +- **Eval and Task List Chips Now Have Hover Feedback and a Stable Popover:** Chips in the Tasks and Evals list had no hover state, and the popover showing additional items closed as soon as the cursor moved toward it. Chips now darken on hover and the popover stays open as the cursor moves into it. + +- **Task Usage Table Columns No Longer Get Cut Off:** The Task Usage table was clipping right-side columns. The table now scrolls horizontally so every column stays visible. + +
+ ## Week of 2026-05-21