From 7e89f2e07de6490e9c864c73adde9f7a768c794f Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Tue, 9 Jun 2026 11:43:53 +0200 Subject: [PATCH] fix(dashboard): compact run labels --- .../src/components/RunList.mobile.spec.tsx | 18 ++ apps/dashboard/src/components/RunList.tsx | 64 +++++-- apps/dashboard/src/components/Sidebar.tsx | 28 ++- apps/dashboard/src/lib/run-label.test.ts | 65 ++++++- apps/dashboard/src/lib/run-label.ts | 162 ++++++++++++++++-- packages/core/src/evaluation/results-repo.ts | 4 +- .../core/test/evaluation/results-repo.test.ts | 4 +- 7 files changed, 304 insertions(+), 41 deletions(-) diff --git a/apps/dashboard/src/components/RunList.mobile.spec.tsx b/apps/dashboard/src/components/RunList.mobile.spec.tsx index a918411f..4f355763 100644 --- a/apps/dashboard/src/components/RunList.mobile.spec.tsx +++ b/apps/dashboard/src/components/RunList.mobile.spec.tsx @@ -45,4 +45,22 @@ describe('buildRunListItemView', () => { expect(view.isActive).toBe(true); expect(view.passing).toBe(false); }); + + it('uses compact run display without duplicating the pass-rate column', () => { + const view = buildRunListItemView( + runMeta({ + display_name: '2026-03-27T05-00-00-000Z', + filename: 'remote::2026-03-27T05-00-00-000Z', + target: 'remote-target', + timestamp: '2026-03-27T05:00:00.000Z', + pass_rate: 1, + source: 'remote', + }), + 0.8, + ); + + expect(view.display.primary).toBe('27/03 05:00'); + expect(view.display.secondary).toBe('remote-target'); + expect(view.label).toBe('27/03 05:00 · remote-target'); + }); }); diff --git a/apps/dashboard/src/components/RunList.tsx b/apps/dashboard/src/components/RunList.tsx index 9da2dfc6..d21599ce 100644 --- a/apps/dashboard/src/components/RunList.tsx +++ b/apps/dashboard/src/components/RunList.tsx @@ -30,7 +30,7 @@ import { useStudioConfig, } from '~/lib/api'; import { executionErrorCount } from '~/lib/result-summary'; -import { formatRunLabel } from '~/lib/run-label'; +import { type RunDisplay, formatRunDisplay } from '~/lib/run-label'; import { buildCombineSuccessMessage, buildDeleteSuccessMessage, @@ -63,6 +63,7 @@ interface RunListItemView { ts: { date: string; full: string }; isActive: boolean; label: string; + display: RunDisplay; errors: number; qualityCount: number; passing: boolean; @@ -94,7 +95,8 @@ function formatDate(ts: string | undefined | null): { date: string; full: string export function buildRunListItemView(run: RunMeta, passThreshold: number): RunListItemView { const ts = formatDate(run.timestamp); const isActive = run.status === 'starting' || run.status === 'running'; - const label = formatRunLabel(run); + const display = formatRunDisplay(run, { includePassRate: false }); + const label = display.label; const errors = executionErrorCount(run); const qualityCount = Math.max(0, run.test_count - errors); const passing = qualityCount > 0 ? run.pass_rate >= passThreshold : errors === 0; @@ -107,6 +109,7 @@ export function buildRunListItemView(run: RunMeta, passThreshold: number): RunLi ts, isActive, label, + display, errors, qualityCount, passing, @@ -380,8 +383,17 @@ export function RunList({ )}
{runViews.map((view) => { - const { run, ts, label, errors, qualityCount, passedCount, failedCount, metadataDirty } = - view; + const { + run, + ts, + label, + display, + errors, + qualityCount, + passedCount, + failedCount, + metadataDirty, + } = view; const selectionDisabledReason = runSelectionDisabledReason(run); const selectable = !selectionDisabledReason && selectableRunIds.includes(run.filename); @@ -411,9 +423,15 @@ export function RunList({ + {display.secondary ? ( +

+ {display.secondary} +

+ ) : null}
{metadataDirty ? : null} @@ -479,6 +497,7 @@ export function RunList({ run, ts, label, + display, errors, qualityCount, passedCount, @@ -514,14 +533,25 @@ export function RunList({ {/* Run name */} -
- - {metadataDirty ? : null} +
+
+ + {metadataDirty ? : null} +
+ {display.secondary ? ( +
+ {display.secondary} +
+ ) : null}
@@ -581,11 +611,13 @@ function RunNameLink({ projectId, runId, label, + title, className, }: { projectId?: string; runId: string; label: string; + title: string; className: string; }) { return projectId ? ( @@ -593,12 +625,12 @@ function RunNameLink({ to="/projects/$projectId/runs/$runId" params={{ projectId, runId }} className={className} - title={label} + title={title} > {label} ) : ( - + {label} ); diff --git a/apps/dashboard/src/components/Sidebar.tsx b/apps/dashboard/src/components/Sidebar.tsx index a35bf735..f93df921 100644 --- a/apps/dashboard/src/components/Sidebar.tsx +++ b/apps/dashboard/src/components/Sidebar.tsx @@ -34,7 +34,7 @@ import { useStudioConfig, } from '~/lib/api'; import { resolveProjectDisplayName } from '~/lib/project-display-name'; -import { formatRunLabel, timeAgo } from '~/lib/run-label'; +import { formatRunDisplay } from '~/lib/run-label'; import { useSidebarContext } from '~/lib/sidebar-context'; import { BrandName } from './BrandName'; @@ -88,6 +88,17 @@ function BrandHeader({ projectId }: { projectId?: string }) { ); } +function SidebarRunText({ display }: { display: ReturnType }) { + return ( + <> + {display.primary} + {display.secondary ? ( + {display.secondary} + ) : null} + + ); +} + function useProjectDisplayName(projectId: string): string { const { data } = useProjectList(); return resolveProjectDisplayName(projectId, data?.projects); @@ -274,6 +285,7 @@ function RunSidebar() {
{data?.runs.map((run) => { + const display = formatRunDisplay(run); const isActive = isHome === false && runMatch && @@ -289,10 +301,9 @@ function RunSidebar() { to="/projects/$projectId/runs/$runId" params={{ projectId: run.project_id, runId: run.filename }} className="mb-0.5 block rounded-md px-2 py-1.5 text-sm text-gray-400 transition-colors hover:bg-gray-800/50 hover:text-gray-200" - title={run.project_name} + title={`${display.title}\nProject: ${run.project_name}`} > - {formatRunLabel(run)} - {timeAgo(run.timestamp)} + ); } @@ -307,9 +318,9 @@ function RunSidebar() { ? 'bg-gray-800 text-cyan-400' : 'text-gray-400 hover:bg-gray-800/50 hover:text-gray-200' }`} + title={display.title} > - {formatRunLabel(run)} - {timeAgo(run.timestamp)} + ); })} @@ -507,6 +518,7 @@ function ProjectRunDetailSidebar({ Runs
{data?.runs.map((run) => { + const display = formatRunDisplay(run); const isActive = currentRunId === run.filename; return ( - {formatRunLabel(run)} - {timeAgo(run.timestamp)} + ); })} diff --git a/apps/dashboard/src/lib/run-label.test.ts b/apps/dashboard/src/lib/run-label.test.ts index 298479ec..647b272a 100644 --- a/apps/dashboard/src/lib/run-label.test.ts +++ b/apps/dashboard/src/lib/run-label.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from 'bun:test'; -import { formatRunLabel } from './run-label'; +import { formatRunDisplay, formatRunLabel } from './run-label'; describe('formatRunLabel', () => { it('starts with the run display name when available', () => { @@ -15,7 +15,7 @@ describe('formatRunLabel', () => { ).toBe('dogfood-run-a · 01/06 10:00 · codex · 100%'); }); - it('shows DD/MM HH:mm · target · experiment · score', () => { + it('uses a non-default experiment as the primary label when no display name is present', () => { expect( formatRunLabel({ target: 'llm-dry-run', @@ -23,7 +23,7 @@ describe('formatRunLabel', () => { timestamp: '2026-04-29T09:17:30.111Z', pass_rate: 0.8, }), - ).toBe('29/04 09:17 · llm-dry-run · issue-1198 · 80%'); + ).toBe('issue-1198 · 29/04 09:17 · llm-dry-run · 80%'); }); it('omits experiment when it is the default', () => { @@ -55,4 +55,63 @@ describe('formatRunLabel', () => { }), ).toBe('07/05 10:56 · wtalms-stg · 0%'); }); + + it('uses one compact timestamp for remote timestamp-only run names', () => { + const display = formatRunDisplay({ + display_name: '2026-03-27T05-00-00-000Z', + filename: 'remote::2026-03-27T05-00-00-000Z', + target: 'av-fis-target', + timestamp: '2026-03-27T05:00:00.000Z', + pass_rate: 1, + }); + + expect(display.primary).toBe('27/03 05:00'); + expect(display.secondary).toBe('av-fis-target · 100%'); + expect(display.label).toBe('27/03 05:00 · av-fis-target · 100%'); + expect(display.label.match(/27\/03 05:00/g)).toHaveLength(1); + expect(display.title).toContain('Run ID: remote::2026-03-27T05-00-00-000Z'); + expect(display.title).toContain('Display name: 2026-03-27T05-00-00-000Z'); + }); + + it('keeps a local human display name as the primary label', () => { + const display = formatRunDisplay({ + display_name: 'local fixture run', + filename: '2026-06-08T20-00-00-000Z', + target: 'local-target', + timestamp: '2026-06-08T20:00:00.000Z', + pass_rate: 1, + }); + + expect(display.primary).toBe('local fixture run'); + expect(display.secondary).toBe('08/06 20:00 · local-target · 100%'); + expect(display.label).toBe('local fixture run · 08/06 20:00 · local-target · 100%'); + }); + + it('falls back to a non-default experiment before timestamp-only run IDs', () => { + const display = formatRunDisplay({ + display_name: '2026-03-27T05-00-00-000Z', + filename: 'remote::smoke-regression::2026-03-27T05-00-00-000Z', + experiment: 'smoke-regression', + target: 'azure', + timestamp: '2026-03-27T05:00:00.000Z', + pass_rate: 0.5, + }); + + expect(display.primary).toBe('smoke-regression'); + expect(display.secondary).toBe('27/03 05:00 · azure · 50%'); + }); + + it('can omit pass rate when another UI column already shows it', () => { + const display = formatRunDisplay( + { + display_name: 'local fixture run', + target: 'local-target', + timestamp: '2026-06-08T20:00:00.000Z', + pass_rate: 1, + }, + { includePassRate: false }, + ); + + expect(display.label).toBe('local fixture run · 08/06 20:00 · local-target'); + }); }); diff --git a/apps/dashboard/src/lib/run-label.ts b/apps/dashboard/src/lib/run-label.ts index 1ea0c096..7a11d0c9 100644 --- a/apps/dashboard/src/lib/run-label.ts +++ b/apps/dashboard/src/lib/run-label.ts @@ -1,7 +1,22 @@ import type { RunMeta } from './types'; type RunLabelInput = Pick & - Partial>; + Partial> & { + title?: string; + }; + +export interface RunDisplay { + primary: string; + secondary: string; + label: string; + title: string; +} + +interface RunDisplayOptions { + includePassRate?: boolean; +} + +const REMOTE_RUN_PREFIX = 'remote::'; /** DD/MM HH:mm — short human-readable slice of the run's timestamp. */ function shortTimestamp(ts: string): string { @@ -18,6 +33,100 @@ function shortTimestamp(ts: string): string { } } +function cleanOptional(value: string | undefined): string | undefined { + const trimmed = value?.trim(); + return trimmed ? trimmed : undefined; +} + +function nonDefaultExperiment(experiment: string | undefined): string | undefined { + const trimmed = cleanOptional(experiment); + return trimmed && trimmed !== 'default' && trimmed !== '-' ? trimmed : undefined; +} + +function normalizeTimestampCandidate(value: string): string { + let candidate = value.trim(); + if (candidate.startsWith(REMOTE_RUN_PREFIX)) { + candidate = candidate.slice(REMOTE_RUN_PREFIX.length); + } + return candidate.replace(/\.(jsonl|json)$/i, ''); +} + +function parseRunTimestampLike(value: string): Date | undefined { + const candidate = normalizeTimestampCandidate(value); + const match = candidate.match( + /^(\d{4})-(\d{2})-(\d{2})T(\d{2})[:-](\d{2})[:-](\d{2})(?:[.-](\d{3}))?Z?$/, + ); + if (!match) return undefined; + + const [, year, month, day, hour, minute, second, millis = '000'] = match; + const parsed = new Date(`${year}-${month}-${day}T${hour}:${minute}:${second}.${millis}Z`); + return Number.isNaN(parsed.getTime()) ? undefined : parsed; +} + +function isTimestampOnlyName(value: string | undefined, timestamp: string): boolean { + const trimmed = cleanOptional(value); + if (!trimmed) return false; + + if (parseRunTimestampLike(trimmed)) { + return true; + } + + return normalizeTimestampCandidate(trimmed) === shortTimestamp(timestamp); +} + +function displayableFilename(filename: string | undefined, timestamp: string): string | undefined { + const trimmed = cleanOptional(filename); + if (!trimmed) return undefined; + + const withoutSource = trimmed.startsWith(REMOTE_RUN_PREFIX) + ? trimmed.slice(REMOTE_RUN_PREFIX.length) + : trimmed; + const separatorIndex = withoutSource.lastIndexOf('::'); + if (separatorIndex !== -1) { + const suffix = withoutSource.slice(separatorIndex + 2); + if (isTimestampOnlyName(suffix, timestamp)) { + return cleanOptional(withoutSource.slice(0, separatorIndex)); + } + } + + return withoutSource; +} + +function firstHumanName(run: RunLabelInput): string | undefined { + const candidates = [ + cleanOptional(run.display_name), + cleanOptional(run.title), + displayableFilename(run.filename, run.timestamp), + ]; + + return candidates.find( + (candidate): candidate is string => + Boolean(candidate) && !isTimestampOnlyName(candidate, run.timestamp), + ); +} + +function formatPassRate(passRate: number): string { + return `${Math.round(passRate * 100)}%`; +} + +function buildRunDisplayTitle(run: RunLabelInput, label: string): string { + const parts = [label]; + const filename = cleanOptional(run.filename); + const displayName = cleanOptional(run.display_name); + const title = cleanOptional(run.title); + + if (filename && filename !== label) parts.push(`Run ID: ${filename}`); + if (displayName && displayName !== filename && displayName !== label) { + parts.push(`Display name: ${displayName}`); + } + if (title && title !== displayName && title !== filename && title !== label) { + parts.push(`Title: ${title}`); + } + parts.push(`Timestamp: ${run.timestamp}`); + + return parts.join('\n'); +} + /** Human-readable relative time string, e.g. "4 hr ago". */ export function timeAgo(ts: string): string { try { @@ -34,19 +143,48 @@ export function timeAgo(ts: string): string { } } -/** Format a run label consistently across tables and nav surfaces. */ -export function formatRunLabel(run: RunLabelInput): string { - const primaryName = run.display_name || run.filename; - const parts: string[] = primaryName - ? [primaryName, shortTimestamp(run.timestamp)] - : [shortTimestamp(run.timestamp)]; +/** + * Build compact run display parts for Dashboard tables and sidebars. + * + * Primary text is a human name when one exists, otherwise a non-default + * experiment, otherwise a single compact timestamp. Raw run IDs stay available + * in the tooltip title so timestamp-only remote IDs do not crowd list cells. + */ +export function formatRunDisplay(run: RunLabelInput, options: RunDisplayOptions = {}): RunDisplay { + const timestampLabel = shortTimestamp(run.timestamp); + const experiment = nonDefaultExperiment(run.experiment); + const humanName = firstHumanName(run); + const experimentName = + experiment && !isTimestampOnlyName(experiment, run.timestamp) ? experiment : undefined; + const primary = humanName ?? experimentName ?? timestampLabel; + const includePassRate = options.includePassRate ?? true; + const secondaryParts: string[] = []; - if (run.target) parts.push(run.target); - if (run.experiment && run.experiment !== 'default' && run.experiment !== '-') { - parts.push(run.experiment); + if (primary !== timestampLabel) { + secondaryParts.push(timestampLabel); + } + if (run.target && run.target !== primary) { + secondaryParts.push(run.target); + } + if (experiment && experiment !== primary) { + secondaryParts.push(experiment); } + if (includePassRate) { + secondaryParts.push(formatPassRate(run.pass_rate)); + } + + const secondary = secondaryParts.join(' · '); + const label = secondary ? `${primary} · ${secondary}` : primary; - parts.push(`${Math.round(run.pass_rate * 100)}%`); + return { + primary, + secondary, + label, + title: buildRunDisplayTitle(run, label), + }; +} - return parts.join(' · '); +/** Format a run label consistently across tables and nav surfaces. */ +export function formatRunLabel(run: RunLabelInput): string { + return formatRunDisplay(run).label; } diff --git a/packages/core/src/evaluation/results-repo.ts b/packages/core/src/evaluation/results-repo.ts index 4826de0c..9a1ab0b0 100644 --- a/packages/core/src/evaluation/results-repo.ts +++ b/packages/core/src/evaluation/results-repo.ts @@ -1070,6 +1070,7 @@ type GitBatchBlob = { type GitRunBenchmark = { readonly metadata?: { + readonly display_name?: string; readonly timestamp?: string; readonly experiment?: string; readonly targets?: readonly string[]; @@ -1242,6 +1243,7 @@ export async function listGitRuns(repoDir: string, ref = 'origin/main'): Promise const relativeRunPath = path.posix.relative(RESULTS_REPO_RUNS_DIR, runDir); const runId = buildGitRunId(relativeRunPath); const timestamp = benchmark.metadata?.timestamp?.trim() || path.posix.basename(runDir); + const displayName = benchmark.metadata?.display_name?.trim() || path.posix.basename(runDir); const targets = benchmark.metadata?.targets ?? []; const passRate = computeAveragePassRate(benchmark.run_summary); @@ -1254,7 +1256,7 @@ export async function listGitRuns(repoDir: string, ref = 'origin/main'): Promise ...(targets.length === 1 && targets[0] ? { target: targets[0] } : {}), manifest_path: path.posix.join(runDir, 'index.jsonl'), benchmark_path: benchmarkPath, - display_name: path.posix.basename(runDir), + display_name: displayName, test_count: benchmark.metadata?.tests_run?.length ?? 0, avg_score: 0, size_bytes: blob.size, diff --git a/packages/core/test/evaluation/results-repo.test.ts b/packages/core/test/evaluation/results-repo.test.ts index 286feb9e..0757959e 100644 --- a/packages/core/test/evaluation/results-repo.test.ts +++ b/packages/core/test/evaluation/results-repo.test.ts @@ -143,6 +143,7 @@ describe('listGitRuns', () => { JSON.stringify( { metadata: { + display_name: 'remote friendly run', timestamp: '2026-05-21T11:00:00.000Z', experiment: 'with-skills', targets: ['claude-sonnet', 'gpt-4o'], @@ -174,7 +175,7 @@ describe('listGitRuns', () => { expect(runs[0]).toMatchObject({ experiment: 'with-skills', timestamp: '2026-05-21T11:00:00.000Z', - display_name: '2026-05-21T11-00-00-000Z', + display_name: 'remote friendly run', manifest_path: '.agentv/results/runs/with-skills/2026-05-21T11-00-00-000Z/index.jsonl', benchmark_path: '.agentv/results/runs/with-skills/2026-05-21T11-00-00-000Z/benchmark.json', test_count: 3, @@ -184,6 +185,7 @@ describe('listGitRuns', () => { expect(runs[0].target).toBeUndefined(); expect(runs[1]).toMatchObject({ experiment: 'default', + display_name: '2026-05-20T10-00-00-000Z', target: 'gpt-4o', manifest_path: '.agentv/results/runs/default/2026-05-20T10-00-00-000Z/index.jsonl', test_count: 2,