Agenta-AI · junaway · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/.gitleaksignore b/.gitleaksignore
@@ -282,3 +282,7 @@ bb4b06cd13e5aca1c75886990f164b643360da2b:sdk/tests/legacy/debugging/simple-app/c
 ce2aa0c2d9990d25d5771b65e0fad6db01518e14:sdk/tests/integration/conftest.py:agenta-api-key:23
 a00f015276504fbf7a4820b26d17eb725c63635b:bench_traces.py:generic-api-key:19
 a00f015276504fbf7a4820b26d17eb725c63635b:bench_bulk_insert.py:generic-api-key:304
+70b9ab08f0f2c96f78ea436691ea161958ff9f18:docs/design/evaluation-runtime-heartbeats/README.md:generic-api-key:206
+70b9ab08f0f2c96f78ea436691ea161958ff9f18:docs/design/evaluation-runtime-heartbeats/README.md:generic-api-key:234
+70b9ab08f0f2c96f78ea436691ea161958ff9f18:docs/design/evaluation-runtime-heartbeats/plan.md:generic-api-key:46
+70b9ab08f0f2c96f78ea436691ea161958ff9f18:docs/design/evaluation-runtime-heartbeats/plan.md:generic-api-key:58
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,11 +1,28 @@
 repos:
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.11.2
-    hooks:
-      - id: ruff-format
-      - id: ruff
   - repo: local
     hooks:
+      - id: ruff-format
+        name: ruff format
+        entry: python3 -m ruff format
+        language: system
+        types: [python]
+      - id: ruff-check
+        name: ruff check
+        entry: python3 -m ruff check
+        language: system
+        types: [python]
-  - repo: local
-    hooks:
-      - id: ruff-format
-        name: ruff format
-        entry: ruff format
-        language: system
-        types: [python]
-      - id: ruff-check
-        name: ruff check
-        entry: ruff check
-        language: system
-        types: [python]
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.9
+    hooks:
+      - id: ruff-format
+      - id: ruff
+
+  - repo: local
+    hooks:
-  - repo: local
-    hooks:
-      - id: ruff-format
-        name: ruff format
-        entry: ruff format
-        language: system
-        types: [python]
-      - id: ruff-check
-        name: ruff check
-        entry: ruff check
-        language: system
-        types: [python]
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.9
+    hooks:
+      - id: ruff-format
+      - id: ruff
+
+  - repo: local
+    hooks:
+      - id: prettier-format
+        name: prettier --write (web)
+        entry: bash -c 'cd web && pnpm exec prettier --write --cache --log-level warn "**/*.{js,jsx,cjs,mjs,ts,tsx,cts,mts,mdx}" --ignore-path ./ee/.gitignore --ignore-path ./oss/.gitignore'
+        language: system
+        files: ^web/.*\.(js|jsx|cjs|mjs|ts|tsx|cts|mts|mdx)$
+        pass_filenames: false
+      - id: turbo-lint
+        name: turbo lint (web)
+        entry: bash -c 'cd web && if [ "$(sysctl -in hw.optional.arm64 2>/dev/null || echo 0)" = "1" ] && [ "$(node -p "process.arch")" = "x64" ]; then arch -arm64 pnpm exec turbo run lint; else pnpm exec turbo run lint; fi'
+        language: system
+        files: ^web/.*\.(js|jsx|cjs|mjs|ts|tsx|cts|mts|mdx)$
+        pass_filenames: false
       - id: gitleaks-pre-commit
         name: gitleaks git (staged only)
         entry: bash -c 'gitleaks --config .gitleaks.toml --exit-code 1 --verbose git --staged'

diff --git a/AGENTS.md b/AGENTS.md
@@ -2,7 +2,7 @@
 
 ## Dev Environment Tips
 - If you make changes to the frontend, make sure to run `pnpm lint-fix` within the web folder
-- If you make changes to the API or SDK, make sure to run `ruff format` and `ruff check --fix` within the SDK or API folder
+- If you make changes to the API or SDK, make sure to run `ruff format` and `ruff check --fix` within the SDK or API folder (run from the repo root: `ruff format` then `ruff check`; fix all errors before committing)
 - If you update Ant Design tokens, run `pnpm generate:tailwind-tokens` in the web folder and commit the generated file
 
 ## Environment Config Conventions

diff --git a/agents/skills/README.md b/agents/skills/README.md
@@ -0,0 +1,213 @@
+# Findings Skills
+
+This folder contains the canonical skill definitions for findings-driven review work in this repo.
+
+The workflow is organized around five generic skills:
+
+1. `scan-codebase`
+2. `test-codebase`
+3. `sync-findings`
+4. `triage-findings`
+5. `resolve-findings`
+
+The split is by activity, not by a CR/QA lane name:
+
+- `scan-codebase` is verification-oriented review from code and docs into findings
+- `test-codebase` is validation-oriented execution from tests and runtime behavior into findings
+- `sync-findings` keeps GitHub and the findings record aligned
+- `triage-findings` is the discussion and planning layer
+- `resolve-findings` is the execution layer back into code, tests, and docs
+
+All skills accept optional `path=`.
+
+- If `path` is provided, use that local design or findings folder.
+- If `path` is omitted, infer it and state the inferred value before starting.
+
+Preferred master document:
+
+- `findings.md`
+
+Shared references:
+
+- `shared/references/findings.schema.md`
+- `shared/references/findings.lifecycle.md`
+
+## Diagram
+
+```text
+                 Git / PR / Review Threads
+                           ^
+                           | sync-findings
+                           v
+Code + Docs -- scan-codebase --> Findings <-- test-codebase -- Tests / Runtime / Docs
+                                    |
+                                    v
+                              triage-findings
+                                    |
+                                    v
+                              resolve-findings
+                                    |
+                                    v
+                           Code / Tests / Docs
+```
+
+## Invocation
+
+For Codex:
+
+- invoke a skill with `$skill-name`
+- `agents/skills/` is the canonical source, but Codex auto-discovers from `~/.codex/skills`
+- if a skill does not appear under `$`, refresh the symlink or install it into `~/.codex/skills` and reload Codex
+
+For Claude:
+
+- invoke a project skill with `/project:skill-name` when your Claude client exposes project skills that way
+- otherwise use the Claude project skills picker after reloading the project
+- `.claude/skills/` contains the Claude-facing wrappers for the canonical skills in `agents/skills/`
+
+## Skills
+
+### `scan-codebase`
+
+Use for a fresh-context scan of code and docs that turns review observations into findings.
+
+Parameters:
+
+- `path=<local-folder>` optional
+- `depth=deep` by default
+- supported values: `shallow`, `deep`
+
+Use when:
+
+- you want a review pass anchored in current code and docs
+- you want verification findings before planning
+- you want to surface missing tests or coverage gaps as review findings without running tests yet
+
+For Codex:
+
+```text
+$scan-codebase
+```
+
+For Claude:
+
+```text
+/project:scan-codebase
+```
+
+### `test-codebase`
+
+Use to run or inspect the relevant validation paths and turn failures, regressions, or missing coverage into findings.
+
+Parameters:
+
+- `path=<local-folder>` optional
+- `depth=deep` by default
+- supported values: `shallow`, `deep`
+
+Use when:
+
+- you want validation findings from actual test execution or targeted repro
+- you want to confirm whether missing or broken behavior is observable
+- you want missing test coverage turned into findings
+
+For Codex:
+
+```text
+$test-codebase
+```
+
+For Claude:
+
+```text
+/project:test-codebase
+```
+
+### `sync-findings`
+
+Use to sync the findings record against local review artifacts and optionally a GitHub PR.
+
+Parameters:
+
+- `path=<local-folder>` optional
+- `url=<github-pr-url>` for remote + local sync
+- omitted `url` means local-only sync
+
+Use when:
+
+- you want findings updated from open PR comments or local notes
+- you want clearly closed threads acknowledged and resolved
+- you want the master findings file to match current GitHub state and current local code state
+
+For Codex:
+
+```text
+$sync-findings
+```
+
+For Claude:
+
+```text
+/project:sync-findings
+```
+
+### `triage-findings`
+
+Use to coordinate the next review or testing actions with the user and turn findings into a plan.
+
+Parameters:
+
+- `path=<local-folder>` optional
+- `url=<github-pr-url>` optional when PR context matters
+
+Use when:
+
+- you need follow-up questions answered before acting
+- you need to decide whether to run `scan-codebase`, `test-codebase`, or `sync-findings`
+- you need severity, confidence, status, owner questions, and next action clarified
+
+For Codex:
+
+```text
+$triage-findings
+```
+
+For Claude:
+
+```text
+/project:triage-findings
+```
+
+### `resolve-findings`
+
+Use to implement the chosen fix path for findings and update the findings record afterward.
+
+Parameters:
+
+- `path=<local-folder>` optional
+- default `priority=next-highest`
+- explicit values: `P0`, `P1`, `P2`, `P3`, `all`
+
+Default behavior:
+
+- if unresolved findings exist at `P0`, resolve `P0` on this run
+- if `P0` is exhausted, the next run resolves `P1`
+- then `P2`, then `P3`
+
+Use when:
+
+- findings are implementation-ready or nearly so
+- you want code, test, and docs changes applied
+- you want targeted verification or validation rerun after the fix
+
+For Codex:
+
+```text
+$resolve-findings
+```
+
+For Claude:
+
+```text
+/project:resolve-findings
+```
diff --git a/agents/skills/resolve-findings/SKILL.md b/agents/skills/resolve-findings/SKILL.md
@@ -0,0 +1,72 @@
+---
+name: resolve-findings
+description: Resolve findings by implementing the chosen fix path in code, tests, or docs. Accept optional `path` and a `priority` selector; by default resolve only the next highest remaining priority bucket, in order `P0`, `P1`, `P2`, `P3`. Also accept explicit levels or `all`. Default to `path=infer`. Confirm effective variables before starting.
+---
+
+# Resolve Findings
+
+Read these shared references when needed:
+
+- `../shared/references/findings.schema.md`
+- `../shared/references/findings.lifecycle.md`
+
+## Role
+
+Resolve is execution mode from findings back into code, tests, and docs.
+
+- It may change production code for verification findings.
+- It may change tests or test harnesses for validation findings.
+- It should update the active findings record after implementation and rerun targeted checks when feasible.
+
+## Priority Input
+
+Accept a `priority` parameter from the prompt:
+
+- omitted priority: resolve the next highest remaining bucket only
+- explicit level: `P0`, `P1`, `P2`, or `P3`
+- `all`: resolve all remaining buckets
+
+Default:
+
+- `priority=next-highest`
+
+## Path Input
+
+Accept an optional `path` from the prompt.
+
+- If `path` is provided, use it as the local design or findings folder.
+- If `path` is omitted, infer it from the branch, subsystem, or matching docs and state the inferred value before starting.
+
+Default:
+
+- `path=infer`
+
+## Workflow
+
+1. Determine the selected bucket.
+   Confirm the effective variables first:
+   - `path`
+   - `priority`
+   - target findings files when inferable
+
+   Use the requested `priority`, or the next highest unresolved bucket in the active findings record.
+
+2. Load the active findings record.
+   Use `path/findings.md`.
+
+3. Check readiness before coding.
+   If the intended resolution path, policy boundary, or data contract is still ambiguous, ask the next follow-up question before editing.
+
+4. Implement the selected fixes.
+   Make the smallest coherent set of code, test, and doc changes needed for the selected findings bucket.
+
+5. Re-run targeted checks.
+   Use the narrowest useful verification or validation pass that demonstrates the fix.
+
+6. Update the findings record.
+   Move findings between open and closed sections, preserve notes and open questions ordering, and record what was fixed or what remains blocked.
+
+## Rules
+
+- Do not hide ambiguity behind `open` or `needs-user-decision` when the user already started answering. Ask the next concrete question.
+- Do not silently widen scope from the selected priority bucket unless the fix is tightly coupled.
diff --git a/agents/skills/resolve-findings/agents/openai.yaml b/agents/skills/resolve-findings/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Resolve Findings"
+  short_description: "Implement fixes from findings"
+  default_prompt: "Use $resolve-findings with priority=next-highest to implement the next unresolved findings bucket and update the findings record."
+
+policy:
+  allow_implicit_invocation: true