From f2dc402f506824f3227a24bf0fbc2e9a4598cf8d Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Wed, 10 Jun 2026 06:34:59 +0200 Subject: [PATCH] docs(results): clarify remote sync CLI contract --- apps/cli/src/commands/results/index.ts | 2 +- .../src/content/docs/docs/tools/dashboard.mdx | 9 ++++++ .../src/content/docs/docs/tools/results.mdx | 17 +++++++++- .../2026-06-10-remote-results-cli-contract.md | 32 +++++++++++++++++++ 4 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 docs/plans/2026-06-10-remote-results-cli-contract.md diff --git a/apps/cli/src/commands/results/index.ts b/apps/cli/src/commands/results/index.ts index e51d77d64..14687011d 100644 --- a/apps/cli/src/commands/results/index.ts +++ b/apps/cli/src/commands/results/index.ts @@ -11,7 +11,7 @@ import { resultsValidateCommand } from './validate.js'; export const resultsCommand = subcommands({ name: 'results', - description: 'Inspect, export, and manage evaluation results', + description: 'Inspect, export, and manage local evaluation results', cmds: { combine: resultsCombineCommand, delete: resultsDeleteCommand, diff --git a/apps/web/src/content/docs/docs/tools/dashboard.mdx b/apps/web/src/content/docs/docs/tools/dashboard.mdx index b60c97b1d..af29b089f 100644 --- a/apps/web/src/content/docs/docs/tools/dashboard.mdx +++ b/apps/web/src/content/docs/docs/tools/dashboard.mdx @@ -285,6 +285,8 @@ The `source` block and the `results` block sync different repositories: Use project-level **Sync Project** as the results exchange workflow. It handles pulled remote runs, locally edited metadata, dirty state, and blocked conflict feedback in one project-scoped action. +There is no separate `agentv results remote status` or `agentv results remote sync` command. The `agentv results` CLI stays focused on local run workspaces; manual remote exchange is Dashboard/API-only, with eval auto-export covering the common CI/publisher path. + Each run writes to a unique timestamped directory, so concurrent pushes from multiple machines are safe — non-fast-forward conflicts are resolved automatically via rebase retry. ### What happens to existing local runs? @@ -303,6 +305,13 @@ Uses `gh` CLI and `git` credentials already configured on the machine. If authen Once configured, Dashboard reads local runs and the configured results repo clone for that project. The status endpoint does not fetch from the remote on every page load; use **Sync Project** when you want to exchange changes with the results repo remote. +Automation can use the same API that Dashboard uses: + +- `GET /api/projects/:projectId/remote/status` +- `POST /api/projects/:projectId/remote/sync` + +Single-project sessions also expose `GET /api/remote/status` and `POST /api/remote/sync`. + In the default multi-project flow, open a project card first, then use **Sync Project** in that project's toolbar. The toolbar shows the project display name, sync state, last synced time, configured repo, and remote run count. Statuses include clean, unavailable, behind, ahead, dirty, diverged, conflicted, and syncing. Use the **All Sources / Local Only / Remote Only** filter to narrow the run list by origin. diff --git a/apps/web/src/content/docs/docs/tools/results.mdx b/apps/web/src/content/docs/docs/tools/results.mdx index 9c798dd32..a00d9b67a 100644 --- a/apps/web/src/content/docs/docs/tools/results.mdx +++ b/apps/web/src/content/docs/docs/tools/results.mdx @@ -9,7 +9,9 @@ import { Image } from 'astro:assets'; import resultsReportOverview from '../../../../assets/screenshots/results-report-overview.png'; import resultsReportDetails from '../../../../assets/screenshots/results-report-details.png'; -The `results` command family works on existing AgentV run workspaces and `index.jsonl` manifests. Use it after an eval run to inspect failures, validate manifests, export artifact layouts, or generate a shareable HTML report. +The `results` command family works on existing local AgentV run workspaces and `index.jsonl` manifests. Use it after an eval run to inspect failures, validate manifests, export artifact layouts, combine/delete local run workspaces, or generate a shareable HTML report. + +Remote result repository exchange is intentionally not part of `agentv results`. New eval runs can auto-export to a configured results repo when `auto_push: true`; manual remote status and sync are Dashboard/API workflows. See [Dashboard Remote Results](/docs/tools/dashboard/#remote-results) for configuration and sync behavior. ## Subcommands @@ -17,6 +19,8 @@ The `results` command family works on existing AgentV run workspaces and `index. |-----------|---------| | `results report` | Generate a self-contained static HTML report from an existing run workspace | | `results export` | Materialize or normalize the artifact workspace structure for a manifest | +| `results combine` | Combine partial local run workspaces into a new local run workspace | +| `results delete` | Delete one or more local run workspaces | | `results summary` | Print aggregate metrics for a run | | `results failures` | Show only failing cases | | `results show` | Display case-level rows from a run workspace | @@ -79,3 +83,14 @@ agentv results validate .agentv/results/runs/ ``` For a review-centric workflow built around these artifacts, see [Human Review Checkpoint](/docs/guides/human-review/). + +## Remote results sync/status + +The CLI contract is deliberately narrow: `agentv results` manages local result artifacts only. It does not expose `results remote status` or `results remote sync` subcommands. + +Use these supported remote workflows instead: + +- **Automatic publishing:** configure `projects[].results.auto_push: true`; new `agentv eval` and `agentv pipeline bench` runs push their artifacts after the run completes. +- **Manual Dashboard sync:** run `agentv dashboard`, open the project, and use **Sync Project**. +- **Manual API sync:** while Dashboard is running, call `GET /api/projects/:projectId/remote/status` or `POST /api/projects/:projectId/remote/sync` for project-scoped automation. Single-project sessions also expose `GET /api/remote/status` and `POST /api/remote/sync`. +- **Git escape hatch:** for advanced recovery, inspect or repair the configured `projects[].results.path` clone with `git` directly, then sync again. diff --git a/docs/plans/2026-06-10-remote-results-cli-contract.md b/docs/plans/2026-06-10-remote-results-cli-contract.md new file mode 100644 index 000000000..8d520d3f0 --- /dev/null +++ b/docs/plans/2026-06-10-remote-results-cli-contract.md @@ -0,0 +1,32 @@ +# Remote Results CLI Contract Decision + +## Decision + +AgentV should not add `agentv results remote status` or `agentv results remote sync` for the current production remote-results release. + +The production contract is: + +- `agentv eval` and `agentv pipeline bench` may auto-export newly created runs to the configured results repository when `auto_push: true`. +- `agentv results` remains a local-result workspace command family: combine, delete, export, report, summary, failures, show, and validate. +- Manual remote status and sync are Dashboard/API capabilities: + - `GET /api/remote/status` + - `POST /api/remote/sync` + - `GET /api/projects/:projectId/remote/status` + - `POST /api/projects/:projectId/remote/sync` +- Advanced CLI automation should call those Dashboard API endpoints while `agentv dashboard` is running, or use `git` directly in the configured `projects[].results.path` clone. + +## Rationale + +The remote sync operation is not a simple result-artifact primitive. It is project-scoped and coordinates git fetch/fast-forward/push behavior, mutable remote metadata overlays, dirty-state detection, conflict blocking, and safe recovery guidance. Dashboard already owns the project context and exposes the status/sync API that the UI uses. + +Adding another CLI subcommand now would duplicate the Dashboard/API contract, widen the command surface, and force users to learn two manual sync entry points before there is evidence that the API-only automation path is insufficient. That conflicts with AgentV's lightweight-core and YAGNI principles. + +Keeping `agentv results` local also preserves a clean mental model: + +- Use `agentv results ...` for local artifacts already on disk. +- Use Dashboard/API for exchanging local and remote result repositories. +- Use eval auto-export for the common CI/publisher path. + +## Future extension trigger + +Add an explicit CLI sync command only if production users need headless manual sync without a long-running Dashboard server and cannot reasonably use the existing API or direct `git` workflow. If that demand appears, start with one project-scoped primitive that mirrors the existing API response shape instead of inventing a second contract.