From 84b2b6e33313459314ff89debf57185bb0bec1ef Mon Sep 17 00:00:00 2001 From: haasonsaas Date: Tue, 9 Jun 2026 12:46:37 -0700 Subject: [PATCH] cleanup: move org automation to private .github-private Remove the internal org automation engine from the public evalops/.github and leave only the org profile and minimal README/SECURITY. The engine now lives in the private repo evalops/.github-private. Removed (moved to .github-private): - services.yaml (internal service catalog) - .github/scripts/ (14 Ruby helper scripts: PR-lens engine, webhook relay, guardrails) - .github/workflows/ (16 review + guardrail workflows) - .github/contracts/, .github/evalopsbot-review-targets.yml, .github/pr-lens-routing.yml - AGENTS.md, test/, labels.yml, renovate-config.json, scripts/check-positioning.mjs - .github/codex/, .github/agent-mcp/, .github/workflow-templates/, .github/ISSUE_TEMPLATE/, .github/pull_request_template.md, .github/CODEOWNERS, .github/actionlint.yaml Kept public: profile/, README.md (minimized), SECURITY.md. MERGE LAST. See PR body for safe merge order. Co-Authored-By: Claude Fable 5 --- .github/CODEOWNERS | 8 - .github/ISSUE_TEMPLATE/bug_report.yml | 53 - .github/ISSUE_TEMPLATE/ci_failure.yml | 47 - .github/ISSUE_TEMPLATE/codex_followup.yml | 45 - .../ISSUE_TEMPLATE/cross_repo_contract.yml | 58 - .github/ISSUE_TEMPLATE/feature_request.yml | 45 - .github/actionlint.yaml | 4 - .github/agent-mcp/templates/agents-section.md | 9 - .github/agent-mcp/templates/codex-config.toml | 2 - .github/agent-mcp/templates/cursor-mcp.json | 8 - .../agent-mcp/templates/gitignore.fragment | 3 - .github/agent-mcp/templates/mcp.json | 8 - .github/codex/hooks/evalops-hooks.toml | 12 - .github/codex/prompts/ci-failure-triage.md | 25 - .github/codex/prompts/label-churn-audit.md | 20 - .github/codex/prompts/local-traffic-canary.md | 22 - .github/codex/prompts/post-merge-verify.md | 21 - .github/codex/prompts/pr-review.md | 28 - .github/codex/prompts/structured-pr-review.md | 28 - .github/codex/schemas/pr-review.schema.json | 95 - .github/contracts/engineering-practices.yml | 222 --- .github/contracts/org-control-plane.yml | 217 --- .github/evalopsbot-review-targets.yml | 31 - .github/pr-lens-routing.yml | 18 - .github/pull_request_template.md | 42 - .github/scripts/audit-archived-dependabot.rb | 137 -- .../scripts/audit-engineering-practices.rb | 921 ---------- .github/scripts/check-pr-review-threads.rb | 432 ----- .github/scripts/classify-agent-authorship.rb | 83 - .github/scripts/evalops-codex-hook-guard.rb | 102 -- .github/scripts/evalops-pr-lens-review.rb | 1531 ----------------- .github/scripts/evalopsbot-webhook-relay.rb | 147 -- .../publish-codex-structured-review.rb | 177 -- .../scripts/sweep-recent-review-feedback.rb | 1027 ----------- .github/scripts/sync-agent-mcp-config.rb | 164 -- .github/scripts/sync-labels.rb | 325 ---- .github/scripts/validate-services-catalog.rb | 99 -- .../scripts/verify-evalopsbot-review-setup.rb | 140 -- .../verify-org-control-plane-contract.rb | 287 --- .../agent-authorship-labels.properties.json | 9 - .../agent-authorship-labels.yml | 16 - .../codex-ci-triage.properties.json | 9 - .../workflow-templates/codex-ci-triage.yml | 74 - .../codex-label-churn-audit.properties.json | 9 - .../codex-label-churn-audit.yml | 77 - .../codex-post-merge-verify.properties.json | 9 - .../codex-post-merge-verify.yml | 70 - .../codex-pr-review.properties.json | 10 - .../workflow-templates/codex-pr-review.yml | 75 - ...codex-structured-pr-review.properties.json | 10 - .../codex-structured-pr-review.yml | 142 -- .../workflow-templates/pysa.properties.json | 9 - .github/workflow-templates/pysa.yml | 19 - .../review-thread-guard.properties.json | 6 - .../review-thread-guard.yml | 23 - .github/workflows/agent-authorship-label.yml | 318 ---- .../workflows/agent-mcp-config-rollout.yml | 107 -- .../workflows/archived-dependabot-audit.yml | 45 - .github/workflows/codex-rails-check.yml | 267 --- .../workflows/engineering-practices-audit.yml | 88 - .github/workflows/evalops-pr-lens-review.yml | 370 ---- .../workflows/evalopsbot-review-canary.yml | 127 -- .../evalopsbot-review-request-dispatch.yml | 92 - .../workflows/evalopsbot-review-request.yml | 71 - .../evalopsbot-review-setup-audit.yml | 61 - .github/workflows/positioning-guardrail.yml | 30 - .github/workflows/pysa.yml | 131 -- .../workflows/review-feedback-backfill.yml | 97 -- .../workflows/review-feedback-sentinel.yml | 96 -- .github/workflows/review-thread-guard.yml | 99 -- .github/workflows/sync-labels.yml | 92 - AGENTS.md | 35 - README.md | 422 +---- labels.yml | 78 - renovate-config.json | 100 -- scripts/check-positioning.mjs | 50 - services.yaml | 645 ------- test/audit_archived_dependabot_test.rb | 39 - test/audit_engineering_practices_test.rb | 385 ----- test/check_pr_review_threads_test.rb | 341 ---- test/classify_agent_authorship_test.rb | 83 - test/evalops_codex_hook_guard_test.rb | 50 - test/evalops_pr_lens_review_test.rb | 621 ------- test/evalopsbot_webhook_relay_test.rb | 63 - test/publish_codex_structured_review_test.rb | 78 - test/sweep_recent_review_feedback_test.rb | 958 ----------- test/sync_agent_mcp_config_test.rb | 71 - test/sync_labels_test.rb | 98 -- test/validate_services_catalog_test.rb | 107 -- test/verify_evalopsbot_review_setup_test.rb | 42 - .../verify_org_control_plane_contract_test.rb | 96 -- test/workflow_pr_ref_guard_test.rb | 75 - 92 files changed, 14 insertions(+), 13724 deletions(-) delete mode 100644 .github/CODEOWNERS delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml delete mode 100644 .github/ISSUE_TEMPLATE/ci_failure.yml delete mode 100644 .github/ISSUE_TEMPLATE/codex_followup.yml delete mode 100644 .github/ISSUE_TEMPLATE/cross_repo_contract.yml delete mode 100644 .github/ISSUE_TEMPLATE/feature_request.yml delete mode 100644 .github/actionlint.yaml delete mode 100644 .github/agent-mcp/templates/agents-section.md delete mode 100644 .github/agent-mcp/templates/codex-config.toml delete mode 100644 .github/agent-mcp/templates/cursor-mcp.json delete mode 100644 .github/agent-mcp/templates/gitignore.fragment delete mode 100644 .github/agent-mcp/templates/mcp.json delete mode 100644 .github/codex/hooks/evalops-hooks.toml delete mode 100644 .github/codex/prompts/ci-failure-triage.md delete mode 100644 .github/codex/prompts/label-churn-audit.md delete mode 100644 .github/codex/prompts/local-traffic-canary.md delete mode 100644 .github/codex/prompts/post-merge-verify.md delete mode 100644 .github/codex/prompts/pr-review.md delete mode 100644 .github/codex/prompts/structured-pr-review.md delete mode 100644 .github/codex/schemas/pr-review.schema.json delete mode 100644 .github/contracts/engineering-practices.yml delete mode 100644 .github/contracts/org-control-plane.yml delete mode 100644 .github/evalopsbot-review-targets.yml delete mode 100644 .github/pr-lens-routing.yml delete mode 100644 .github/pull_request_template.md delete mode 100644 .github/scripts/audit-archived-dependabot.rb delete mode 100644 .github/scripts/audit-engineering-practices.rb delete mode 100644 .github/scripts/check-pr-review-threads.rb delete mode 100644 .github/scripts/classify-agent-authorship.rb delete mode 100644 .github/scripts/evalops-codex-hook-guard.rb delete mode 100644 .github/scripts/evalops-pr-lens-review.rb delete mode 100644 .github/scripts/evalopsbot-webhook-relay.rb delete mode 100644 .github/scripts/publish-codex-structured-review.rb delete mode 100644 .github/scripts/sweep-recent-review-feedback.rb delete mode 100644 .github/scripts/sync-agent-mcp-config.rb delete mode 100644 .github/scripts/sync-labels.rb delete mode 100755 .github/scripts/validate-services-catalog.rb delete mode 100644 .github/scripts/verify-evalopsbot-review-setup.rb delete mode 100644 .github/scripts/verify-org-control-plane-contract.rb delete mode 100644 .github/workflow-templates/agent-authorship-labels.properties.json delete mode 100644 .github/workflow-templates/agent-authorship-labels.yml delete mode 100644 .github/workflow-templates/codex-ci-triage.properties.json delete mode 100644 .github/workflow-templates/codex-ci-triage.yml delete mode 100644 .github/workflow-templates/codex-label-churn-audit.properties.json delete mode 100644 .github/workflow-templates/codex-label-churn-audit.yml delete mode 100644 .github/workflow-templates/codex-post-merge-verify.properties.json delete mode 100644 .github/workflow-templates/codex-post-merge-verify.yml delete mode 100644 .github/workflow-templates/codex-pr-review.properties.json delete mode 100644 .github/workflow-templates/codex-pr-review.yml delete mode 100644 .github/workflow-templates/codex-structured-pr-review.properties.json delete mode 100644 .github/workflow-templates/codex-structured-pr-review.yml delete mode 100644 .github/workflow-templates/pysa.properties.json delete mode 100644 .github/workflow-templates/pysa.yml delete mode 100644 .github/workflow-templates/review-thread-guard.properties.json delete mode 100644 .github/workflow-templates/review-thread-guard.yml delete mode 100644 .github/workflows/agent-authorship-label.yml delete mode 100644 .github/workflows/agent-mcp-config-rollout.yml delete mode 100644 .github/workflows/archived-dependabot-audit.yml delete mode 100644 .github/workflows/codex-rails-check.yml delete mode 100644 .github/workflows/engineering-practices-audit.yml delete mode 100644 .github/workflows/evalops-pr-lens-review.yml delete mode 100644 .github/workflows/evalopsbot-review-canary.yml delete mode 100644 .github/workflows/evalopsbot-review-request-dispatch.yml delete mode 100644 .github/workflows/evalopsbot-review-request.yml delete mode 100644 .github/workflows/evalopsbot-review-setup-audit.yml delete mode 100644 .github/workflows/positioning-guardrail.yml delete mode 100644 .github/workflows/pysa.yml delete mode 100644 .github/workflows/review-feedback-backfill.yml delete mode 100644 .github/workflows/review-feedback-sentinel.yml delete mode 100644 .github/workflows/review-thread-guard.yml delete mode 100644 .github/workflows/sync-labels.yml delete mode 100644 AGENTS.md delete mode 100644 labels.yml delete mode 100644 renovate-config.json delete mode 100644 scripts/check-positioning.mjs delete mode 100644 services.yaml delete mode 100644 test/audit_archived_dependabot_test.rb delete mode 100644 test/audit_engineering_practices_test.rb delete mode 100644 test/check_pr_review_threads_test.rb delete mode 100644 test/classify_agent_authorship_test.rb delete mode 100644 test/evalops_codex_hook_guard_test.rb delete mode 100644 test/evalops_pr_lens_review_test.rb delete mode 100644 test/evalopsbot_webhook_relay_test.rb delete mode 100644 test/publish_codex_structured_review_test.rb delete mode 100644 test/sweep_recent_review_feedback_test.rb delete mode 100644 test/sync_agent_mcp_config_test.rb delete mode 100644 test/sync_labels_test.rb delete mode 100644 test/validate_services_catalog_test.rb delete mode 100644 test/verify_evalopsbot_review_setup_test.rb delete mode 100644 test/verify_org_control_plane_contract_test.rb delete mode 100644 test/workflow_pr_ref_guard_test.rb diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS deleted file mode 100644 index d8a79b2..0000000 --- a/.github/CODEOWNERS +++ /dev/null @@ -1,8 +0,0 @@ -# EvalOps org-default changes are broad by definition. -* @haasonsaas - -# Workflow, contract, and helper-script changes deserve extra operator attention. -.github/contracts/ @haasonsaas -.github/scripts/ @haasonsaas -.github/workflows/ @haasonsaas -services.yaml @haasonsaas diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml deleted file mode 100644 index 63a9e09..0000000 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: Bug Report -description: Report a bug or unexpected behavior -labels: ["bug", "triage"] -body: - - type: input - id: service - attributes: - label: Service / Repo - description: Which service is affected? - placeholder: e.g. identity, governance, console - validations: - required: true - - type: textarea - id: description - attributes: - label: Description - description: What happened? What did you expect? - validations: - required: true - - type: textarea - id: reproduce - attributes: - label: Steps to Reproduce - description: How can we reproduce this? - validations: - required: false - - type: input - id: environment - attributes: - label: Environment - description: Staging, production, local? - placeholder: e.g. production, staging - validations: - required: false - - type: textarea - id: evidence - attributes: - label: Evidence - description: Link logs, GitHub Actions runs, Argo apps, screenshots, traces, or reproduction artifacts. - placeholder: | - - GH run: - - PR: - - Argo app: - - Logs/traces: - validations: - required: false - - type: textarea - id: impact - attributes: - label: Impact / Blast Radius - description: Which users, services, repos, contracts, or releases are affected? - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/ci_failure.yml b/.github/ISSUE_TEMPLATE/ci_failure.yml deleted file mode 100644 index 3f4fd1a..0000000 --- a/.github/ISSUE_TEMPLATE/ci_failure.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: CI / Release Failure -description: Track a failing GitHub Actions, release-train, Argo, or deploy verification issue -labels: ["ci", "triage"] -body: - - type: input - id: repo - attributes: - label: Repo - description: Which repo owns the failing workflow or release gate? - placeholder: evalops/platform - validations: - required: true - - type: input - id: run_url - attributes: - label: Run / Job / App URL - description: Link the exact failing GitHub Actions run, job, Argo app, or release-train PR. - placeholder: https://github.com/evalops/platform/actions/runs/... - validations: - required: true - - type: textarea - id: failure - attributes: - label: Failure Signature - description: Paste the smallest useful error text or summarize the failing check. - validations: - required: true - - type: textarea - id: suspected_cause - attributes: - label: Suspected Root Cause - description: Mark uncertain analysis as suspected. Avoid guessing when logs are missing. - placeholder: "Suspected: ..." - validations: - required: false - - type: textarea - id: minimal_fix - attributes: - label: Minimal Fix / Merge Action - description: What is the smallest safe change or merge action that should clear this? - placeholder: | - - [ ] Patch workflow/action version - - [ ] Fix test expectation - - [ ] Merge existing green PR - - [ ] Rerun after upstream outage - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/codex_followup.yml b/.github/ISSUE_TEMPLATE/codex_followup.yml deleted file mode 100644 index 05f6ee1..0000000 --- a/.github/ISSUE_TEMPLATE/codex_followup.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: Codex Follow-Up -description: Capture an agent-discovered gap that should become concrete backlog -labels: ["enhancement", "triage"] -body: - - type: input - id: source - attributes: - label: Source - description: Where did this follow-up come from? - placeholder: Codex review, PR #123, CI triage, org review, meeting notes - validations: - required: true - - type: textarea - id: finding - attributes: - label: Finding - description: What did the agent find? Include file paths, PRs, issue links, or commands when possible. - validations: - required: true - - type: textarea - id: why_now - attributes: - label: Why It Matters - description: Explain the leverage, risk, or repeated workflow this would improve. - validations: - required: true - - type: textarea - id: proposed_work - attributes: - label: Proposed Work - description: Turn the finding into a concrete implementation slice. - placeholder: | - - [ ] Add guardrail - - [ ] Add regression test - - [ ] Update generated artifacts - - [ ] Document rollout or operator path - validations: - required: true - - type: textarea - id: verification - attributes: - label: Verification - description: What should Codex or a reviewer run before closing this? - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/cross_repo_contract.yml b/.github/ISSUE_TEMPLATE/cross_repo_contract.yml deleted file mode 100644 index 541ad98..0000000 --- a/.github/ISSUE_TEMPLATE/cross_repo_contract.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: Cross-Repo Contract Change -description: Track work that changes proto, API, event, SDK, deploy, or runtime contracts across repos -labels: ["enhancement", "triage"] -body: - - type: input - id: owner_repo - attributes: - label: Owning Repo - description: Which repo should own the first PR? - placeholder: evalops/platform - validations: - required: true - - type: textarea - id: contract_surface - attributes: - label: Contract Surface - description: Describe the proto/API/event/SDK/deploy/runtime surface that changes. - placeholder: e.g. chronicle.v1 Heartbeat, maestro event bus payload, Helm values, generated TS SDK - validations: - required: true - - type: textarea - id: dependent_repos - attributes: - label: Dependent Repos - description: List repos that must consume, verify, or deploy the change. - placeholder: | - - evalops/proto - - evalops/platform - - evalops/deploy - - evalops/maestro-internal - validations: - required: true - - type: textarea - id: rollout_plan - attributes: - label: Rollout Plan - description: How should the change be sequenced safely? - placeholder: | - 1. Add backward-compatible contract field. - 2. Regenerate SDKs and fixtures. - 3. Update producer. - 4. Update consumer. - 5. Add deploy flag or rollout gate if needed. - validations: - required: true - - type: textarea - id: verification - attributes: - label: Verification - description: What checks prove producer, consumer, generated artifacts, and deploy surfaces are aligned? - placeholder: | - - [ ] Generated artifacts updated - - [ ] Producer regression test - - [ ] Consumer regression test - - [ ] Deploy/render check - - [ ] GitHub Actions checked - validations: - required: true diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml deleted file mode 100644 index a2e6b3c..0000000 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: Feature Request -description: Suggest a new feature or improvement -labels: ["enhancement"] -body: - - type: input - id: service - attributes: - label: Service / Repo - description: Which service does this relate to? - placeholder: e.g. identity, governance, console - validations: - required: false - - type: textarea - id: problem - attributes: - label: Problem - description: What problem does this solve? - validations: - required: true - - type: textarea - id: solution - attributes: - label: Proposed Solution - description: How should this work? - validations: - required: false - - type: textarea - id: acceptance - attributes: - label: Acceptance Criteria - description: What must be true for this to be done? - placeholder: | - - [ ] User-visible or operator-visible behavior is implemented - - [ ] Tests or validation cover the new path - - [ ] Docs/runbook/flags are updated where needed - validations: - required: false - - type: textarea - id: cross_repo - attributes: - label: Cross-Repo Impact - description: List dependent repos, contracts, deploy flags, generated artifacts, or rollout gates. - placeholder: evalops/platform, evalops/deploy, evalops/proto, evalops/maestro-internal - validations: - required: false diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml deleted file mode 100644 index 7f3c5aa..0000000 --- a/.github/actionlint.yaml +++ /dev/null @@ -1,4 +0,0 @@ -self-hosted-runner: - labels: - -config-variables: null diff --git a/.github/agent-mcp/templates/agents-section.md b/.github/agent-mcp/templates/agents-section.md deleted file mode 100644 index d3d2c94..0000000 --- a/.github/agent-mcp/templates/agents-section.md +++ /dev/null @@ -1,9 +0,0 @@ -## EvalOps Integration - -This project uses EvalOps for agent governance, metering, and memory. The MCP -server is configured in `.mcp.json`, `.codex/config.toml`, and -`.cursor/mcp.json` so supported coding agents can connect automatically. - -On first use, you may be prompted to authenticate with EvalOps. After that, -agent governance and audit context should be available without committing local -API keys. diff --git a/.github/agent-mcp/templates/codex-config.toml b/.github/agent-mcp/templates/codex-config.toml deleted file mode 100644 index 74acca8..0000000 --- a/.github/agent-mcp/templates/codex-config.toml +++ /dev/null @@ -1,2 +0,0 @@ -[mcp_servers.evalops] -url = "https://mcp.evalops.dev/mcp" diff --git a/.github/agent-mcp/templates/cursor-mcp.json b/.github/agent-mcp/templates/cursor-mcp.json deleted file mode 100644 index 809da30..0000000 --- a/.github/agent-mcp/templates/cursor-mcp.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "mcpServers": { - "evalops": { - "type": "http", - "url": "https://mcp.evalops.dev/mcp" - } - } -} diff --git a/.github/agent-mcp/templates/gitignore.fragment b/.github/agent-mcp/templates/gitignore.fragment deleted file mode 100644 index 7f67b90..0000000 --- a/.github/agent-mcp/templates/gitignore.fragment +++ /dev/null @@ -1,3 +0,0 @@ -# EvalOps MCP local credentials -.env -.env.local diff --git a/.github/agent-mcp/templates/mcp.json b/.github/agent-mcp/templates/mcp.json deleted file mode 100644 index 809da30..0000000 --- a/.github/agent-mcp/templates/mcp.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "mcpServers": { - "evalops": { - "type": "http", - "url": "https://mcp.evalops.dev/mcp" - } - } -} diff --git a/.github/codex/hooks/evalops-hooks.toml b/.github/codex/hooks/evalops-hooks.toml deleted file mode 100644 index 623ee1a..0000000 --- a/.github/codex/hooks/evalops-hooks.toml +++ /dev/null @@ -1,12 +0,0 @@ -# Example Codex hook pack for EvalOps repos. Install at user or repo scope and -# adjust command paths to match the checked-out evalops/.github location. - -[hooks.SessionStart.evalops_context] -command = "ruby .github/scripts/evalops-codex-hook-guard.rb session-start" - -[hooks.PreToolUse.evalops_git_guard] -matcher = "Bash" -command = "ruby .github/scripts/evalops-codex-hook-guard.rb pretool-git" - -[hooks.Stop.evalops_review_readiness] -command = "ruby .github/scripts/evalops-codex-hook-guard.rb stop-readiness" diff --git a/.github/codex/prompts/ci-failure-triage.md b/.github/codex/prompts/ci-failure-triage.md deleted file mode 100644 index ebb1f26..0000000 --- a/.github/codex/prompts/ci-failure-triage.md +++ /dev/null @@ -1,25 +0,0 @@ -# EvalOps Codex CI Failure Triage - -Investigate the failing GitHub Actions run for this repository and produce a -minimal fix plan or patch. - -Required checks: - -- Start from the exact failing run, job, and step. Do not infer from workflow - names alone. -- Fetch failed logs with `gh run view --log-failed` and fall back to the - Actions jobs API when the log output is empty. -- Distinguish stale failures on superseded SHAs from failures on the live PR or - `main` tip. -- Group related failures by root cause and avoid unrelated refactors. -- If the failure is a workflow issue, inspect path filters, generated workflow - surfaces, branch protection expectations, and pinned action policy. -- If the failure is test or code behavior, run the smallest local reproduction - before proposing broader gates. - -Output: - -- Root cause with run/job evidence. -- Minimal fix or the exact reason no code change is appropriate. -- Commands run locally. -- Remaining CI or review-thread work. diff --git a/.github/codex/prompts/label-churn-audit.md b/.github/codex/prompts/label-churn-audit.md deleted file mode 100644 index 7291084..0000000 --- a/.github/codex/prompts/label-churn-audit.md +++ /dev/null @@ -1,20 +0,0 @@ -# EvalOps Codex Label Churn Audit - -Audit PR labels that are being added and removed repeatedly by automation. - -Required checks: - -- Inspect the PR timeline, issue events, workflow runs, bot comments, and - repository workflows that can mutate labels. -- Group label changes by actor, label, timestamp, and likely workflow source. -- Distinguish intended mutually exclusive labels from automation loops. -- Check whether human-authored code is expected to be agent-authored in this - repo before treating agent labels as suspicious. -- Identify the smallest durable fix: workflow condition, label ownership rule, - branch filter, debounce, or documentation update. - -Output: - -- A concise timeline of label mutations. -- The likely source workflow or automation. -- The durable fix and how to verify it. diff --git a/.github/codex/prompts/local-traffic-canary.md b/.github/codex/prompts/local-traffic-canary.md deleted file mode 100644 index 6a88685..0000000 --- a/.github/codex/prompts/local-traffic-canary.md +++ /dev/null @@ -1,22 +0,0 @@ -# EvalOps Codex Local Traffic Canary - -Investigate a failure in local developer tooling, traffic simulation, or -distributed tracing. - -Required checks: - -- Start from the failing command and preserve its output. -- Inspect `AGENTS.md`, Makefile targets, local compose files, traffic profiles, - and tracing docs before changing behavior. -- Prefer dry-run validations first, then dependency-backed local smoke only - when Docker and local ports are available. -- Verify that generated trace IDs, `traceparent`, NATS subjects, and manifest - paths match the repo contract. -- Keep fixes local-tooling focused unless the failure exposes a production - contract bug. - -Output: - -- Failing command and root cause. -- Patch or precise follow-up if credentials/local services are unavailable. -- Verification commands that future developers can run. diff --git a/.github/codex/prompts/post-merge-verify.md b/.github/codex/prompts/post-merge-verify.md deleted file mode 100644 index 0cf7f50..0000000 --- a/.github/codex/prompts/post-merge-verify.md +++ /dev/null @@ -1,21 +0,0 @@ -# EvalOps Codex Post-Merge Verification - -Verify that a recently merged PR is actually healthy on the default branch. - -Required checks: - -- Identify the merge commit and affected workflows on `main`. -- Check the latest default-branch GitHub Actions runs, not stale PR checks. -- For deploy or runtime changes, describe the GitOps or live-state validation - path and whether credentials were available. -- For local tooling, run the relevant local smoke or dry-run target. -- For tracing/event-bus work, verify trace propagation, subject/catalog - alignment, and local simulation manifests. -- If a follow-up is needed, create or describe a precise issue with acceptance - criteria. - -Output: - -- Healthy / unhealthy / inconclusive status. -- Evidence links or command outputs summarized in prose. -- Follow-up PR or issue recommendations. diff --git a/.github/codex/prompts/pr-review.md b/.github/codex/prompts/pr-review.md deleted file mode 100644 index 3c5ea9b..0000000 --- a/.github/codex/prompts/pr-review.md +++ /dev/null @@ -1,28 +0,0 @@ -# EvalOps Codex PR Review - -Review the pull request as an EvalOps maintainer. Focus on defects, behavioral -regressions, missing tests, generated artifact drift, security footguns, and -operational risk. Prefer concise findings over broad summaries. - -Required checks: - -- Inspect the diff against the PR base and identify the affected repos, - services, workflows, contracts, generated files, and deployment surfaces. -- Read any `AGENTS.md` files that apply to changed paths before reviewing. -- Use live GitHub context when available: PR description, labels, checks, - review comments, unresolved review threads, and recent CI failures. -- For generated code, verify whether the generator or checked-in output is the - source of truth before recommending direct edits. -- For infrastructure or workflow changes, call out whether the change affects - labels, branch protection, automation, release trains, or GitOps desired - state. -- For tracing or event-bus changes, verify trace context, subject/catalog - alignment, and local simulation coverage. - -Output: - -- Start with actionable findings ordered by severity. -- Include file paths and line references when possible. -- Include a short residual-risk note when the diff looks clean. -- Do not approve a PR solely because tests pass if unresolved review threads or - failing checks remain. diff --git a/.github/codex/prompts/structured-pr-review.md b/.github/codex/prompts/structured-pr-review.md deleted file mode 100644 index 7d41218..0000000 --- a/.github/codex/prompts/structured-pr-review.md +++ /dev/null @@ -1,28 +0,0 @@ -# EvalOps Codex Structured PR Review - -Review the pull request as an EvalOps maintainer and return only JSON that -matches the configured schema. - -Focus on actionable defects introduced by the pull request: - -- correctness, security, performance, operational, CI, release, and developer - workflow regressions -- generated artifact drift where the generator output and checked-in artifacts - disagree -- GitHub workflow issues, label churn, missing permissions, and unsafe secret or - sandbox usage -- distributed tracing, local simulation, and event-contract regressions -- missing tests where the changed behavior is not otherwise covered - -Rules: - -- Read applicable `AGENTS.md` files before judging touched paths. -- Use live GitHub context when available: PR body, labels, checks, review - threads, and recent CI failures. -- Treat unresolved, non-outdated high-priority review threads as blockers. Do - not assume a later summary comment addressed them unless the thread itself is - resolved or the current diff makes it stale. -- Report only issues you would leave as review comments for a human maintainer. -- Use paths and line ranges from the pull request head side. -- If the patch is clean, return an empty `findings` array and explain the - residual risk in `overall_explanation`. diff --git a/.github/codex/schemas/pr-review.schema.json b/.github/codex/schemas/pr-review.schema.json deleted file mode 100644 index 8f28240..0000000 --- a/.github/codex/schemas/pr-review.schema.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "type": "object", - "additionalProperties": false, - "required": [ - "overall_correctness", - "overall_explanation", - "overall_confidence_score", - "findings" - ], - "properties": { - "overall_correctness": { - "type": "string", - "enum": [ - "patch is correct", - "patch is incorrect" - ] - }, - "overall_explanation": { - "type": "string", - "minLength": 1 - }, - "overall_confidence_score": { - "type": "number", - "minimum": 0, - "maximum": 1 - }, - "findings": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": false, - "required": [ - "title", - "body", - "confidence_score", - "priority", - "code_location" - ], - "properties": { - "title": { - "type": "string", - "minLength": 1, - "maxLength": 80 - }, - "body": { - "type": "string", - "minLength": 1 - }, - "confidence_score": { - "type": "number", - "minimum": 0, - "maximum": 1 - }, - "priority": { - "type": "integer", - "minimum": 0, - "maximum": 3 - }, - "code_location": { - "type": "object", - "additionalProperties": false, - "required": [ - "absolute_file_path", - "line_range" - ], - "properties": { - "absolute_file_path": { - "type": "string", - "minLength": 1 - }, - "line_range": { - "type": "object", - "additionalProperties": false, - "required": [ - "start", - "end" - ], - "properties": { - "start": { - "type": "integer", - "minimum": 1 - }, - "end": { - "type": "integer", - "minimum": 1 - } - } - } - } - } - } - } - } - } -} diff --git a/.github/contracts/engineering-practices.yml b/.github/contracts/engineering-practices.yml deleted file mode 100644 index f56bf2c..0000000 --- a/.github/contracts/engineering-practices.yml +++ /dev/null @@ -1,222 +0,0 @@ -schema_version: evalops.engineering_practices.v1 -contract_id: evalops.github.engineering-practices -owner_repo: evalops/.github -status: proposed -workflow: - name: engineering-practices-control-plane - correctness_model: > - EvalOps engineering practices are correct when they are backed by live - GitHub evidence, scoped by repository tier, and connected to a runnable - check or adoption ledger instead of living only as prose. - threat_model: > - The highest-risk failure mode is high-throughput agent-assisted change - landing without a durable review, release, security, or evidence contract. - Audits must degrade to non-mutating reports when credentials are missing - and must never publish a green report from partial or empty data. -source_records: - - id: evalops.github.engineering-practices.source.org-contract - path: profile/ENGINEERING_PRACTICES.md - digest: sha256 - - id: evalops.github.engineering-practices.source.service-catalog - path: services.yaml - digest: sha256 - - id: evalops.github.engineering-practices.source.control-plane-readme - path: README.md - digest: sha256 -repo_tiers: - critical: - description: "Product, runtime, deployment, and org-control-plane repos that should block merges on practice drift." - repos: - - evalops/platform - - evalops/deploy - - evalops/ensemble - - evalops/maestro-internal - - evalops/maestro - - evalops/cerebro - - evalops/chat - - evalops/.github - required_controls: - - org-rulesets - - agent-review-lane - - backlog-lifecycle - - release-train-state - - security-slo - - operating-rails - - evidence-first-done - standard: - description: "Actively maintained product, SDK, data, and infrastructure repos that should report drift before enforcement." - repos: - - evalops/hopper - - evalops/nimbus - - evalops/kestrel - - evalops/diffscope - - evalops/conductor - - evalops/console - - evalops/eval2otel - - evalops/agent-pm - required_controls: - - backlog-lifecycle - - security-slo - - operating-rails - - evidence-first-done - experimental: - description: "Research and spike repos where lightweight reporting is preferred over blocking policy." - repos: [] - required_controls: - - operating-rails -practices: - - id: org-rulesets - title: "GitHub-native rulesets for repo tiers" - why: "Branch protection is currently repo-local and uneven; org rulesets give EvalOps a central merge-safety contract." - adoption: "Start in evaluate mode for critical repos, then promote required checks once each repo has the matching workflows." - source: - path: profile/ENGINEERING_PRACTICES.md - heading: "Org Rulesets" - checked_by: - - .github/scripts/audit-engineering-practices.rb - - .github/workflows/engineering-practices-audit.yml - signals: - - org_ruleset_count - - protected_critical_repos - - id: backlog-lifecycle - title: "Generated backlog lifecycle" - why: "Guardrail and conformance issues are useful only when fingerprints, ownership, and close conditions stay machine-readable." - adoption: "Require generated backlog issues to carry a class key, source fingerprints, last-seen window, and explicit close evidence." - source: - path: profile/ENGINEERING_PRACTICES.md - heading: "Backlog Lifecycle" - checked_by: - - .github/scripts/audit-engineering-practices.rb - - .github/scripts/sweep-recent-review-feedback.rb - signals: - - open_guardrail_backlog_issues - - stale_closing_comments - - id: release-train-state - title: "Release-train state machine" - why: "Repeated hold and image-sync PRs should converge on a single desired-state record instead of multiplying operational PRs." - adoption: "Track one active train record per environment with owner, TTL, receipt, rollback receipt, and idempotent PR updates." - source: - path: profile/ENGINEERING_PRACTICES.md - heading: "Release Trains" - checked_by: - - .github/scripts/audit-engineering-practices.rb - signals: - - deploy_release_train_duplicate_prs - - deploy_image_sync_prs - - id: agent-review-lane - title: "Required agent review lane" - why: "Agent-assisted throughput is high enough that review-thread closure, EvalOpsBot review, and CODEOWNERS need to be standard rails." - adoption: "Critical repos require EvalOpsBot review request plumbing, review-thread guard, CODEOWNERS, and stable check contexts." - source: - path: profile/ENGINEERING_PRACTICES.md - heading: "Agent Review" - checked_by: - - .github/scripts/audit-engineering-practices.rb - - .github/scripts/verify-evalopsbot-review-setup.rb - signals: - - evalopsbot_workflow_adoption - - review_thread_guard_adoption - - codeowners_adoption - - id: security-slo - title: "Security remediation SLOs" - why: "Security defaults exist, but open alerts need explicit tiered owners, burn-down windows, and suppression evidence without enabling expensive default scanners." - adoption: "Critical repos should track critical/high Dependabot and secret-scanning alerts against age-based SLOs. CodeQL and GitHub default code scanning are explicitly not part of this baseline." - source: - path: profile/ENGINEERING_PRACTICES.md - heading: "Security SLO" - checked_by: - - .github/scripts/audit-engineering-practices.rb - signals: - - dependabot_open_alerts - - secret_scanning_open_alerts - - id: operating-rails - title: "Repo operating rails by class" - why: "AGENTS.md, CODEOWNERS, dependency policy, Codex rails, Pysa, and runner-label config should be applied by repo class, not memory." - adoption: "Critical repos get the full rail set; standard repos report missing rails until promoted." - source: - path: profile/ENGINEERING_PRACTICES.md - heading: "Operating Rails" - checked_by: - - .github/scripts/audit-engineering-practices.rb - - .github/workflows/codex-rails-check.yml - signals: - - agents_adoption - - codex_rails_adoption - - dependency_policy_adoption - - runner_label_config_adoption - - id: evidence-first-done - title: "Evidence-first definition of done" - why: "EvalOps sells governance and operational proof; engineering changes should leave smoke evidence, artifact receipts, and withheld-data notes." - adoption: "Every critical repo PR should connect user-visible changes to smoke fixtures, artifact receipts, telemetry, and rollback evidence." - source: - path: profile/ENGINEERING_PRACTICES.md - heading: "Evidence First" - checked_by: - - .github/scripts/audit-engineering-practices.rb - - .github/pull_request_template.md - signals: - - pr_template_evidence_checklist - - runtime_smoke_guardrail_backlog -live_audit: - owner: evalops - sampled_repos: - - evalops/platform - - evalops/deploy - - evalops/ensemble - - evalops/maestro-internal - - evalops/maestro - - evalops/cerebro - - evalops/chat - - evalops/.github - - evalops/hopper - - evalops/nimbus - - evalops/kestrel - required_files: - critical: - - AGENTS.md - - .github/CODEOWNERS - - .github/workflows/review-thread-guard.yml - - .github/workflows/evalopsbot-review-request.yml - - .github/workflows/codex-rails-check.yml - standard: - - AGENTS.md - issue_queries: - guardrail_candidate: 'org:evalops is:issue is:open archived:false "Guardrail candidate" in:title' - acceptance_harness: 'org:evalops is:issue is:open archived:false "Add a research-backed acceptance harness" in:title' - conformance_contract: 'org:evalops is:issue is:open archived:false "Promote latent specs into a documented conformance contract" in:title' - provenance_evidence: 'org:evalops is:issue is:open archived:false "Make provenance and evidence traceability first-class" in:title' - telemetry_slo: 'org:evalops is:issue is:open archived:false "Expose operational telemetry and SLO gates" in:title' - release_train_queries: - deploy_hold_prs: 'repo:evalops/deploy is:pr is:merged merged:>=2026-05-06 "Hold prod-continuous release train" in:title' - deploy_image_sync_prs: 'repo:evalops/deploy is:pr is:merged merged:>=2026-05-06 "sync" "image" in:title' - release_train_state: - dashboard_repo: evalops/deploy - dashboard_issue: 1344 - marker: '' - security_alert_slo: - critical_days: 1 - high_days: 7 - medium_days: 30 - excluded_scanners: - - codeql - - github-code-scanning-default-setup - no_codeql: - security_configuration_id: 245233 - required_settings: - advanced_security: secret_protection - code_scanning_default_setup: disabled - dependency_graph_autosubmit_action: disabled - forbidden_required_check_patterns: - - codeql - - code scanning - - code-scanning - - github/codeql-action - forbidden_workflow_queries: - codeql_action_workflows: 'github/codeql-action org:evalops path:.github/workflows' - codeql_named_workflows: 'codeql org:evalops path:.github/workflows' - code_scanning_api_workflows: 'code-scanning/sarifs org:evalops path:.github/workflows' - sarif_upload_workflows: 'upload-sarif org:evalops path:.github/workflows' - security_events_write_workflows: '"security-events: write" org:evalops path:.github/workflows' -commands: - local_contract_check: "ruby .github/scripts/audit-engineering-practices.rb --contract-only" - live_report: "ruby .github/scripts/audit-engineering-practices.rb --json-output engineering-practices-audit.json --markdown-output engineering-practices-audit.md" diff --git a/.github/contracts/org-control-plane.yml b/.github/contracts/org-control-plane.yml deleted file mode 100644 index 27f5cf9..0000000 --- a/.github/contracts/org-control-plane.yml +++ /dev/null @@ -1,217 +0,0 @@ -schema_version: evalops.org_control_plane_contract.v1 -contract_id: evalops.github.org-defaults -owner_repo: evalops/.github -status: enforced -related_issues: - - 63 - - 64 - - 65 - - 66 - - 67 - - 81 -workflow: - name: org-defaults-control-plane - correctness_model: > - EvalOps org defaults should be explicit, reviewable, and enforced before - they can silently change downstream repository behavior. - threat_model: > - Treat org-default prompts, templates, workflow helpers, and catalog data as - agent-facing control inputs. Unsafe inputs must fail closed or degrade to a - non-mutating report instead of reaching privileged GitHub writes. - research_assumptions: - - Retrieval-backed agents need cited, stable source records rather than - implicit memory when applying org conventions. - - Conformance checks should cover happy-path, degraded, and adversarial - cases because org defaults are reused outside this repository. - - Evidence should be machine-readable first and prose-rendered second. -requirements: - - id: live-github-context - title: Live GitHub state precedes org-default changes - source: - path: AGENTS.md - lines: 8-11 - evidence_fields: - - source_id - - decision_id - - output_id - checked_by: - - .github/scripts/verify-org-control-plane-contract.rb - - .github/workflows/codex-rails-check.yml - - id: portable-defaults - title: Defaults stay portable and avoid private operational assumptions - source: - path: README.md - lines: 7-9 - evidence_fields: - - source_id - - decision_id - - output_id - checked_by: - - .github/scripts/verify-org-control-plane-contract.rb - - id: reusable-workflow-evidence - title: Reusable workflows expose their required secrets and verification path - source: - path: README.md - lines: 39-58 - evidence_fields: - - source_id - - decision_id - - output_id - checked_by: - - .github/workflows/codex-rails-check.yml - - id: catalog-shape - title: Service catalog entries stay lightweight and internally referential - source: - path: README.md - lines: 144-163 - evidence_fields: - - source_id - - decision_id - - output_id - checked_by: - - .github/scripts/validate-services-catalog.rb - - test/validate_services_catalog_test.rb - - id: no-codeql-org-defaults - title: GitHub CodeQL and default code scanning stay disabled - source: - path: SECURITY.md - lines: 26-44 - evidence_fields: - - source_id - - decision_id - - output_id - checked_by: - - .github/scripts/verify-org-control-plane-contract.rb - - .github/workflows/codex-rails-check.yml -github_security_configuration: - id: 245233 - name: EvalOps security baseline recommended - default_for_new_repos: all - required_settings: - advanced_security: secret_protection - code_scanning_default_setup: disabled - dependency_graph: enabled - dependency_graph_autosubmit_action: disabled - dependabot_alerts: enabled - secret_scanning: enabled - secret_scanning_push_protection: enabled - forbidden_workflows: - checked_in_path_globs: - - .github/workflows/*codeql* - - .github/workflow-templates/*codeql* - generated_paths: - - dynamic/github-code-scanning/codeql - actions: - - github/codeql-action -provenance: - stable_id_pattern: "evalops.github..." - source_records: - - id: evalops.github.org-defaults.source.agents-rails - path: AGENTS.md - digest: sha256 - - id: evalops.github.org-defaults.source.readme-contract - path: README.md - digest: sha256 - - id: evalops.github.org-defaults.source.service-catalog - path: services.yaml - digest: sha256 - - id: evalops.github.org-defaults.source.security-policy - path: SECURITY.md - digest: sha256 - derived_decisions: - - id: evalops.github.org-defaults.decision.codex-rails - path: .github/workflows/codex-rails-check.yml - derived_from: - - evalops.github.org-defaults.source.agents-rails - - evalops.github.org-defaults.source.readme-contract - - id: evalops.github.org-defaults.decision.review-feedback-sentinel - path: .github/workflows/review-feedback-sentinel.yml - derived_from: - - evalops.github.org-defaults.source.readme-contract - emitted_outputs: - - id: evalops.github.org-defaults.output.contract-report - path: org-control-plane-contract-report.json - produced_by: .github/scripts/verify-org-control-plane-contract.rb - - id: evalops.github.org-defaults.output.feedback-ledger - path: review-feedback-ledger.json - produced_by: .github/scripts/sweep-recent-review-feedback.rb -slo_gates: - - id: codex-rails-local - user_journey: Maintainer validates org-default changes before opening a PR - target: "Ruby validation completes in under 10 minutes in CI" - dimensions: - - latency - - correctness - - degraded_mode - - evidence - success_signal: "contract_report.status == pass" - failure_signal: "contract_report.status == fail with failing check IDs" - fallback: "non-mutating markdown report" - - id: review-feedback-sentinel - user_journey: Sentinel turns high-severity merged PR feedback into guardrails - target: "six-hour sweep publishes ledger/backlog/lifecycle artifacts" - dimensions: - - latency - - correctness - - cost - - degraded_mode - - evidence - success_signal: "review-feedback-guardrail-lifecycle.json emitted" - failure_signal: "token preflight or artifact generation failure" - fallback: "fail before publishing empty or misleading artifacts" -golden_workflows: - - id: codex-rails - workflow: .github/workflows/codex-rails-check.yml - verifier: .github/scripts/verify-org-control-plane-contract.rb - tests: - - test/verify_org_control_plane_contract_test.rb - success_fixture: - name: valid-contract - expected_outcome: pass - degraded_fixture: - name: missing-optional-actionlint - expected_outcome: degraded_report - failure_fixture: - name: missing-source-record - expected_outcome: fail_closed - - id: review-feedback-sentinel - workflow: .github/workflows/review-feedback-sentinel.yml - verifier: .github/scripts/sweep-recent-review-feedback.rb - tests: - - test/sweep_recent_review_feedback_test.rb - success_fixture: - name: ranked-backlog-with-fingerprints - expected_outcome: pass - degraded_fixture: - name: no-current-findings - expected_outcome: degraded_report - failure_fixture: - name: missing-org-token - expected_outcome: fail_closed -adversarial_fixtures: - - id: prompt-template-injection - category: prompt_poisoning - input: "Template text asks the agent to ignore AGENTS.md or omit live gh checks." - blocked_by: .github/scripts/verify-org-control-plane-contract.rb - expected_outcome: fail_closed - - id: workflow-body-drop - category: tool_poisoning - input: "gh api request body is passed through stdin without --input -." - blocked_by: test/evalops_pr_lens_review_test.rb - expected_outcome: fail_closed - - id: misleading-empty-ledger - category: data_poisoning - input: "Review feedback sweep lacks org-wide token and would emit empty artifacts." - blocked_by: .github/workflows/review-feedback-sentinel.yml - expected_outcome: fail_closed - - id: catalog-reference-drift - category: data_poisoning - input: "services.yaml depends_on references an unknown service key." - blocked_by: .github/scripts/validate-services-catalog.rb - expected_outcome: fail_closed -commands: - local_check: "ruby .github/scripts/verify-org-control-plane-contract.rb --json-output org-control-plane-contract-report.json --markdown-output org-control-plane-contract-report.md" - ci_check: "ruby -Itest -e 'ARGV.each { |path| require \"./#{path}\" }' test/*_test.rb" - report_artifacts: - - org-control-plane-contract-report.json - - org-control-plane-contract-report.md diff --git a/.github/evalopsbot-review-targets.yml b/.github/evalopsbot-review-targets.yml deleted file mode 100644 index 6fd8321..0000000 --- a/.github/evalopsbot-review-targets.yml +++ /dev/null @@ -1,31 +0,0 @@ -version: 1 -org: evalops -reviewer: EvalOpsBot -central_repo: evalops/.github -dispatch_secret: EVALOPS_PR_LENS_TOKEN -app_secrets: - - EVALOPS_PR_LENS_APP_ID - - EVALOPS_PR_LENS_APP_PRIVATE_KEY - - EVALOPS_PR_LENS_APP_INSTALLATION_ID -central_workflows: - - .github/workflows/evalops-pr-lens-review.yml - - .github/workflows/evalopsbot-review-request-dispatch.yml - - .github/workflows/evalopsbot-review-canary.yml -target_repositories: - - repo: evalops/cerebro - fallback_workflow: .github/workflows/evalopsbot-review-request.yml - - repo: evalops/chat - fallback_workflow: .github/workflows/evalopsbot-review-request.yml - - repo: evalops/deploy - fallback_workflow: .github/workflows/evalopsbot-review-request.yml - - repo: evalops/diffscope - fallback_workflow: .github/workflows/evalopsbot-review-request.yml - - repo: evalops/ensemble - fallback_workflow: .github/workflows/evalopsbot-review-request.yml - - repo: evalops/maestro - fallback_workflow: .github/workflows/evalopsbot-review-request.yml - - repo: evalops/maestro-internal - fallback_workflow: .github/workflows/evalopsbot-review-request.yml - - repo: evalops/platform - fallback_workflow: .github/workflows/evalopsbot-review-request.yml -exemptions: [] diff --git a/.github/pr-lens-routing.yml b/.github/pr-lens-routing.yml deleted file mode 100644 index 9834560..0000000 --- a/.github/pr-lens-routing.yml +++ /dev/null @@ -1,18 +0,0 @@ -defaults: - provider: anthropic - model: claude-opus-4-7 - max_diff_bytes: 180000 - -lenses: - iam-blast-radius: - model: claude-opus-4-7 - max_diff_bytes: 220000 - argo-manifest-skew: - model: claude-opus-4-7 - max_diff_bytes: 220000 - generated-sdk-delta: - model: claude-opus-4-7 - max_diff_bytes: 260000 - eval-regression-risk: - model: claude-opus-4-7 - max_diff_bytes: 220000 diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md deleted file mode 100644 index 01b306b..0000000 --- a/.github/pull_request_template.md +++ /dev/null @@ -1,42 +0,0 @@ -## Summary - - - -## Scope - -- [ ] Runtime/product behavior -- [ ] Contract/proto/API surface -- [ ] Generated artifacts -- [ ] GitOps/deploy/release train -- [ ] CI/tooling/security guardrail -- [ ] Docs/process only - -## Test Plan - -- [ ] Unit or focused tests pass -- [ ] Integration/e2e tests pass, if applicable -- [ ] Generated artifact or drift check run, if applicable -- [ ] GitHub Actions checked or linked -- [ ] Staging/Argo/live verification attempted, if applicable - -Commands and evidence: - -- - -## Cross-Repo / Release Impact - -- Affected repos/services: -- Follow-up PRs/issues: -- Rollout or rollback notes: - -## Agent / Automation Notes - -- [ ] This PR was agent-authored or agent-assisted -- [ ] Maestro-authored commits include the required authorship trailers -- [ ] I checked live GitHub state before publishing -- [ ] Review feedback and failing checks have been rechecked -- [ ] No local secrets, generated scratch artifacts, or unrelated worktree changes are included - -## Related Issues - - diff --git a/.github/scripts/audit-archived-dependabot.rb b/.github/scripts/audit-archived-dependabot.rb deleted file mode 100644 index c3323c1..0000000 --- a/.github/scripts/audit-archived-dependabot.rb +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "json" -require "open3" -require "optparse" -require "time" -require "uri" - -module EvalOpsArchivedDependabotAudit - REPORT_SCHEMA_VERSION = "evalops.archived_dependabot_audit.v1" - - module_function - - def parse_repos(value) - value.to_s.split(",").map(&:strip).reject(&:empty?).map do |repo| - repo.include?("/") ? repo : "evalops/#{repo}" - end - end - - def gh(*args, allow_failure: false) - stdout, stderr, status = Open3.capture3("gh", *args) - return [stdout, stderr, status] if allow_failure - - raise "gh #{args.join(" ")} failed: #{stderr.empty? ? stdout : stderr}" unless status.success? - - stdout - end - - def discover_archived_repos(owner:) - raw = gh("repo", "list", owner, "--limit", "1000", "--json", "nameWithOwner,isArchived") - JSON.parse(raw).each_with_object([]) do |repo, repos| - repos << repo.fetch("nameWithOwner") if repo["isArchived"] - end.sort - end - - def dependabot_config_present?(repo) - encoded = ".github/dependabot.yml".split("/").map { |part| URI.encode_www_form_component(part) }.join("/") - _stdout, _stderr, status = gh("api", "repos/#{repo}/contents/#{encoded}", allow_failure: true) - status.success? - end - - def open_dependabot_prs(repo) - raw = gh( - "pr", - "list", - "--repo", - repo, - "--state", - "open", - "--author", - "app/dependabot", - "--json", - "number,title,url" - ) - JSON.parse(raw) - end - - def repo_report(repo) - { - "repo" => repo, - "dependabot_config_present" => dependabot_config_present?(repo), - "open_dependabot_prs" => open_dependabot_prs(repo) - } - rescue StandardError => e - { - "repo" => repo, - "dependabot_config_present" => nil, - "open_dependabot_prs" => [], - "error" => e.message - } - end - - def report(owner:, repos:) - rows = repos.map { |repo| repo_report(repo) } - { - "schema_version" => REPORT_SCHEMA_VERSION, - "generated_at" => Time.now.utc.iso8601, - "owner" => owner, - "repo_count" => rows.length, - "repos_with_dependabot_config" => rows.count { |repo| repo["dependabot_config_present"] }, - "open_dependabot_pr_count" => rows.sum { |repo| repo.fetch("open_dependabot_prs").length }, - "repos" => rows - } - end - - def markdown_report(report) - lines = [ - "# Archived Dependabot Audit", - "", - "- Generated at: `#{report.fetch("generated_at")}`", - "- Owner: `#{report.fetch("owner")}`", - "- Archived repos checked: `#{report.fetch("repo_count")}`", - "- Repos with Dependabot config: `#{report.fetch("repos_with_dependabot_config")}`", - "- Open Dependabot PRs: `#{report.fetch("open_dependabot_pr_count")}`", - "", - "| Repo | Dependabot config | Open Dependabot PRs | Notes |", - "| --- | --- | ---: | --- |" - ] - report.fetch("repos").each do |repo| - prs = repo.fetch("open_dependabot_prs").map { |pr| "##{pr.fetch("number")}" }.join(", ") - lines << "| `#{repo.fetch("repo")}` | #{repo["dependabot_config_present"]} | #{repo.fetch("open_dependabot_prs").length} | #{repo["error"] || prs} |" - end - lines.join("\n") - end - - def run(argv) - options = { - owner: "evalops", - repos: [], - json_output: nil, - markdown_output: nil - } - OptionParser.new do |parser| - parser.on("--owner OWNER") { |value| options[:owner] = value } - parser.on("--repos REPOS") { |value| options[:repos] = parse_repos(value) } - parser.on("--json-output PATH") { |value| options[:json_output] = value } - parser.on("--markdown-output PATH") { |value| options[:markdown_output] = value } - end.parse!(argv) - - repos = options.fetch(:repos) - repos = discover_archived_repos(owner: options.fetch(:owner)) if repos.empty? - audit = report(owner: options.fetch(:owner), repos: repos) - json = JSON.pretty_generate(audit) - if options[:json_output] - File.write(options[:json_output], "#{json}\n") - else - puts json - end - File.write(options[:markdown_output], "#{markdown_report(audit)}\n") if options[:markdown_output] - 0 - end -end - -if $PROGRAM_NAME == __FILE__ - exit EvalOpsArchivedDependabotAudit.run(ARGV) -end diff --git a/.github/scripts/audit-engineering-practices.rb b/.github/scripts/audit-engineering-practices.rb deleted file mode 100644 index e447e9d..0000000 --- a/.github/scripts/audit-engineering-practices.rb +++ /dev/null @@ -1,921 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "digest" -require "json" -require "open3" -require "optparse" -require "set" -require "time" -require "yaml" - -module EvalOpsEngineeringPracticesAudit - SCHEMA_VERSION = "evalops.engineering_practices.v1" - REPORT_SCHEMA_VERSION = "evalops.engineering_practices_audit.v1" - REQUIRED_TOP_LEVEL = %w[ - schema_version - contract_id - owner_repo - workflow - source_records - repo_tiers - practices - live_audit - ].freeze - REQUIRED_PRACTICES = %w[ - org-rulesets - backlog-lifecycle - release-train-state - agent-review-lane - security-slo - operating-rails - evidence-first-done - ].freeze - SEARCH_TOTAL_FALLBACK = { - "total_count" => 0, - "incomplete_results" => false - }.freeze - DEFAULT_FORBIDDEN_CODEQL_PATTERNS = [ - "codeql", - "code scanning", - "code-scanning", - "github/codeql-action" - ].freeze - - module_function - - def load_contract(path) - YAML.safe_load(File.read(path), permitted_classes: [], aliases: false) - end - - def relative_path(root, path) - File.expand_path(path, root) - end - - def file_digest(root, path) - absolute = relative_path(root, path) - return nil unless File.file?(absolute) - - Digest::SHA256.file(absolute).hexdigest - end - - def repo_name(repo) - repo.to_s.split("/", 2).last - end - - def check_path(root, path, errors, warnings, required: true) - absolute = relative_path(root, path) - return true if File.file?(absolute) - - message = "#{path} does not exist" - required ? errors << message : warnings << message - false - end - - def duplicates(values) - seen = Set.new - values.each_with_object(Set.new) do |value, repeated| - repeated << value if seen.include?(value) - seen << value - end.to_a - end - - def validate_contract(contract, root: Dir.pwd) - errors = [] - warnings = [] - REQUIRED_TOP_LEVEL.each { |key| errors << "#{key} is required" unless contract.key?(key) } - errors << "schema_version must be #{SCHEMA_VERSION}" unless contract["schema_version"] == SCHEMA_VERSION - errors << "workflow.name is required" if contract.dig("workflow", "name").to_s.empty? - errors << "workflow.correctness_model is required" if contract.dig("workflow", "correctness_model").to_s.empty? - errors << "workflow.threat_model is required" if contract.dig("workflow", "threat_model").to_s.empty? - - Array(contract["source_records"]).each do |record| - errors << "source_records.id is required" if record["id"].to_s.empty? - path = record["path"].to_s - errors << "#{record["id"]}: path is required" if path.empty? - check_path(root, path, errors, warnings) unless path.empty? - end - - tier_controls = Set.new - repos_by_tier = contract.fetch("repo_tiers", {}).flat_map do |tier, data| - errors << "repo_tiers.#{tier}.repos must not be empty" if Array(data["repos"]).empty? && tier != "experimental" - Array(data["required_controls"]).each { |control| tier_controls << control.to_s } - Array(data["repos"]).map { |repo| [tier, repo] } - end - duplicate_repos = duplicates(repos_by_tier.map(&:last)) - errors << "repo listed in more than one tier: #{duplicate_repos.join(", ")}" unless duplicate_repos.empty? - - practices = Array(contract["practices"]) - practice_ids = practices.map { |practice| practice["id"].to_s } - duplicate_practices = duplicates(practice_ids) - errors << "duplicate practice ids: #{duplicate_practices.join(", ")}" unless duplicate_practices.empty? - missing_practices = REQUIRED_PRACTICES - practice_ids - errors << "missing required practices: #{missing_practices.join(", ")}" unless missing_practices.empty? - unknown_controls = tier_controls - Set.new(practice_ids) - errors << "repo tier references unknown practice controls: #{unknown_controls.to_a.join(", ")}" unless unknown_controls.empty? - - practices.each do |practice| - id = practice["id"].to_s - %w[title why adoption].each do |field| - errors << "#{id}: #{field} is required" if practice[field].to_s.strip.empty? - end - source_path = practice.dig("source", "path").to_s - errors << "#{id}: source.path is required" if source_path.empty? - check_path(root, source_path, errors, warnings) unless source_path.empty? - checked_by = Array(practice["checked_by"]) - errors << "#{id}: checked_by is required" if checked_by.empty? - checked_by.each { |path| check_path(root, path, errors, warnings) } - errors << "#{id}: at least one signal is required" if Array(practice["signals"]).empty? - end - - required_files = contract.dig("live_audit", "required_files") || {} - %w[critical standard].each do |tier| - errors << "live_audit.required_files.#{tier} is required" unless required_files.key?(tier) - end - errors << "live_audit.owner is required" if contract.dig("live_audit", "owner").to_s.empty? - errors << "live_audit.sampled_repos must not be empty" if Array(contract.dig("live_audit", "sampled_repos")).empty? - - no_codeql = contract.dig("live_audit", "no_codeql") || {} - errors << "live_audit.no_codeql.security_configuration_id is required" if no_codeql["security_configuration_id"].to_i.zero? - expected_code_scanning = no_codeql.dig("required_settings", "code_scanning_default_setup").to_s - errors << "live_audit.no_codeql.required_settings.code_scanning_default_setup must be disabled" unless expected_code_scanning == "disabled" - excluded_scanners = Array(contract.dig("live_audit", "security_alert_slo", "excluded_scanners")).map(&:to_s) - errors << "live_audit.security_alert_slo.excluded_scanners must include codeql" unless excluded_scanners.include?("codeql") - - { - "status" => errors.empty? ? "pass" : "fail", - "errors" => errors, - "warnings" => warnings - } - end - - def evidence(contract, root) - Array(contract["source_records"]).map do |record| - { - "source_id" => record["id"], - "path" => record["path"], - "sha256" => file_digest(root, record["path"]) - } - end - end - - def gh_runner - lambda do |args| - stdout, stderr, status = Open3.capture3("gh", *args) - [stdout, stderr, status.success?] - end - end - - def parse_json(stdout) - JSON.parse(stdout) - rescue JSON::ParserError - nil - end - - def run_gh(args, runner, warnings, fallback) - stdout, stderr, success = runner.call(args) - unless success - warnings << "gh #{args.join(" ")} failed: #{stderr.to_s.strip}" - return fallback - end - parsed = parse_json(stdout) - return parsed unless parsed.nil? - - warnings << "gh #{args.join(" ")} returned non-JSON output" - fallback - end - - def search_count(query, runner, warnings) - payload = run_gh( - ["api", "-X", "GET", "/search/issues", "-f", "q=#{query}", "-f", "per_page=1"], - runner, - warnings, - SEARCH_TOTAL_FALLBACK - ) - payload.fetch("total_count", 0) - end - - def code_search_matches(query, runner, warnings) - payload = run_gh( - ["search", "code", query, "--json", "repository,path", "--limit", "100"], - runner, - warnings, - [] - ) - Array(payload).map do |item| - repository = item.dig("repository", "nameWithOwner") || item.dig("repository", "fullName") - { - "repository" => repository.to_s, - "path" => item["path"].to_s - } - end.reject { |item| item["repository"].empty? || item["path"].empty? } - end - - def org_rulesets(owner, runner, warnings) - payload = run_gh( - ["api", "-X", "GET", "/orgs/#{owner}/rulesets"], - runner, - warnings, - [] - ) - Array(payload).map do |ruleset| - detail = run_gh( - ["api", "-X", "GET", "/orgs/#{owner}/rulesets/#{ruleset["id"]}"], - runner, - warnings, - ruleset - ) - { - "id" => detail["id"], - "name" => detail["name"], - "target" => detail["target"], - "enforcement" => detail["enforcement"], - "conditions" => detail["conditions"] || {}, - "rules" => Array(detail["rules"]).map do |rule| - { - "type" => rule["type"], - "required_status_checks" => Array(rule.dig("parameters", "required_status_checks")).map do |check| - check["context"] - end.compact.sort - } - end - } - end - end - - def ruleset_applies_to_repo?(ruleset, repo) - return false unless ruleset["target"] == "branch" - - repo_name = repo.split("/").last - condition = ruleset.dig("conditions", "repository_name") || {} - includes = Array(condition["include"]) - excludes = Array(condition["exclude"]) - included = includes.empty? || includes.include?("~ALL") || includes.include?(repo_name) - included && !excludes.include?(repo_name) - end - - def ruleset_required_status_policy(repo, rulesets) - applicable = Array(rulesets).select { |ruleset| ruleset_applies_to_repo?(ruleset, repo) } - rules = applicable.map do |ruleset| - checks = Array(ruleset["rules"]).flat_map { |rule| Array(rule["required_status_checks"]) }.uniq.sort - next if checks.empty? - - { - "id" => ruleset["id"], - "name" => ruleset["name"], - "enforcement" => ruleset["enforcement"], - "required_status_checks" => checks - } - end.compact - { - "ruleset_required_status_checks" => rules.flat_map { |rule| rule["required_status_checks"] }.uniq.sort, - "ruleset_required_status_check_rulesets" => rules - } - end - - def branch_protection(repo, runner, warnings) - payload = run_gh( - ["api", "-X", "GET", "/repos/#{repo}/branches/main/protection"], - runner, - warnings, - {} - ) - contexts = Array(payload.dig("required_status_checks", "contexts")) + - Array(payload.dig("required_status_checks", "checks")).map { |check| check["context"] }.compact - { - "repo" => repo, - "has_protection" => !payload.empty?, - "required_status_checks" => contexts.uniq.sort, - "requires_reviews" => payload.key?("required_pull_request_reviews"), - "enforce_admins" => payload.dig("enforce_admins", "enabled") == true - } - end - - def code_security_default(owner, config_id, runner, warnings) - defaults = run_gh( - ["api", "-X", "GET", "/orgs/#{owner}/code-security/configurations/defaults"], - runner, - warnings, - [] - ) - Array(defaults).find { |entry| entry.dig("configuration", "id").to_i == config_id.to_i } || {} - end - - def code_security_assigned_repositories(owner, config_id, runner, warnings) - stdout, stderr, success = runner.call( - [ - "api", - "--paginate", - "-X", - "GET", - "/orgs/#{owner}/code-security/configurations/#{config_id}/repositories", - "-f", - "per_page=100", - "--jq", - ".[]" - ] - ) - unless success - warnings << "code-security configuration repository fetch failed: #{stderr.to_s.strip}" - return [] - end - entries = stdout.lines.map { |line| parse_json(line) }.compact - if entries.empty? - parsed = parse_json(stdout) - entries = parsed if parsed.is_a?(Array) - end - entries = Array(entries).flat_map { |entry| entry.is_a?(Array) ? entry : [entry] } - entries.map do |entry| - { - "repository" => entry.dig("repository", "full_name").to_s, - "status" => entry["status"].to_s - } - end.reject { |entry| entry["repository"].empty? } - end - - def forbidden_codeql_patterns(config) - patterns = Array(config["forbidden_required_check_patterns"]).map(&:to_s) - patterns.empty? ? DEFAULT_FORBIDDEN_CODEQL_PATTERNS : patterns - end - - def forbidden_codeql_check?(context, patterns) - normalized = context.to_s.downcase - patterns.any? { |pattern| normalized.include?(pattern.downcase) } - end - - def codeql_required_check_matches(branch, patterns) - Array(branch).flat_map do |item| - repo = item["repo"] - branch_checks = Array(item["required_status_checks"]).select { |context| forbidden_codeql_check?(context, patterns) } - ruleset_checks = Array(item["ruleset_required_status_check_rulesets"]).flat_map do |ruleset| - Array(ruleset["required_status_checks"]).select { |context| forbidden_codeql_check?(context, patterns) }.map do |context| - { - "repo" => repo, - "source" => "ruleset", - "ruleset" => ruleset["name"], - "context" => context - } - end - end - direct_checks = branch_checks.map do |context| - { - "repo" => repo, - "source" => "branch_protection", - "context" => context - } - end - direct_checks + ruleset_checks - end - end - - def no_codeql_audit(contract, branch, runner, warnings) - config = contract.dig("live_audit", "no_codeql") || {} - owner = contract.dig("live_audit", "owner").to_s - config_id = config["security_configuration_id"].to_i - default = code_security_default(owner, config_id, runner, warnings) - assigned = code_security_assigned_repositories(owner, config_id, runner, warnings) - assigned_by_repo = assigned.to_h { |entry| [entry["repository"], entry["status"]] } - sampled_repos = Array(contract.dig("live_audit", "sampled_repos")).map(&:to_s) - missing_sampled = sampled_repos.reject { |repo| assigned_by_repo[repo] == "enforced" } - workflow_matches = (config["forbidden_workflow_queries"] || {}).map do |key, query| - { - "key" => key, - "query" => query, - "matches" => code_search_matches(query, runner, warnings) - } - end - - configuration = default["configuration"] || {} - required = config["required_settings"] || {} - { - "security_configuration_id" => config_id, - "configuration_name" => configuration["name"], - "default_for_new_repos" => default["default_for_new_repos"], - "required_settings" => required, - "observed_settings" => { - "advanced_security" => configuration["advanced_security"], - "code_scanning_default_setup" => configuration["code_scanning_default_setup"], - "dependency_graph_autosubmit_action" => configuration["dependency_graph_autosubmit_action"] - }, - "assigned_repository_count" => assigned.length, - "missing_sampled_repositories" => missing_sampled, - "forbidden_workflow_queries" => workflow_matches, - "forbidden_required_check_patterns" => forbidden_codeql_patterns(config), - "required_check_matches" => codeql_required_check_matches(branch, forbidden_codeql_patterns(config)) - } - end - - def file_exists?(repo, path, runner) - _stdout, _stderr, success = runner.call(["api", "-X", "GET", "/repos/#{repo}/contents/#{path}"]) - success - end - - def repo_file_adoption(repos, required_by_tier, tiers, runner, owner_repo:, root:) - repos.map do |repo| - tier = tiers.fetch(repo, "unknown") - required = Array(required_by_tier[tier]) - checks = required.to_h do |path| - present = if repo == owner_repo - File.file?(relative_path(root, path)) - else - file_exists?(repo, path, runner) - end - [path, present] - end - { - "repo" => repo, - "tier" => tier, - "required_files" => checks, - "missing_required_files" => checks.select { |_path, present| !present }.keys - } - end - end - - def dependabot_alerts(owner, runner, warnings) - stdout, stderr, success = runner.call( - ["api", "--paginate", "-X", "GET", "/orgs/#{owner}/dependabot/alerts", "-f", "state=open", "-f", "per_page=100", "--jq", ".[]"] - ) - unless success - warnings << "dependabot alert fetch failed: #{stderr.to_s.strip}" - return { "total" => 0, "by_severity" => {}, "by_repo" => {} } - end - alerts = stdout.lines.map { |line| parse_json(line) }.compact - if alerts.empty? - parsed = parse_json(stdout) - alerts = parsed if parsed.is_a?(Array) - end - alerts = Array(alerts) - critical_high = alerts.select do |alert| - %w[critical high].include?(alert.dig("security_vulnerability", "severity").to_s) - end - { - "total" => alerts.length, - "by_severity" => alerts.group_by { |alert| alert.dig("security_vulnerability", "severity").to_s }.transform_values(&:length), - "by_repo" => alerts.group_by { |alert| alert.dig("repository", "full_name").to_s }.transform_values(&:length), - "critical_high_by_repo" => critical_high.group_by { |alert| alert.dig("repository", "full_name").to_s }.sort.to_h do |repo, repo_alerts| - packages = repo_alerts.group_by do |alert| - [ - alert.dig("security_vulnerability", "severity").to_s, - alert.dig("dependency", "package", "ecosystem").to_s, - alert.dig("dependency", "package", "name").to_s - ] - end.map do |(severity, ecosystem, name), package_alerts| - { - "severity" => severity, - "ecosystem" => ecosystem, - "package" => name, - "count" => package_alerts.length, - "advisories" => package_alerts.map do |alert| - alert.dig("security_advisory", "cve_id") || alert.dig("security_advisory", "ghsa_id") - end.compact.uniq.sort - } - end.sort_by { |item| [item["severity"] == "critical" ? 0 : 1, item["ecosystem"], item["package"]] } - [ - repo, - { - "total" => repo_alerts.length, - "by_severity" => repo_alerts.group_by { |alert| alert.dig("security_vulnerability", "severity").to_s }.transform_values(&:length), - "packages" => packages - } - ] - end - } - end - - def secret_scanning_alerts(owner, runner, warnings) - stdout, stderr, success = runner.call( - ["api", "--paginate", "-X", "GET", "/orgs/#{owner}/secret-scanning/alerts", "-f", "state=open", "-f", "per_page=100", "--jq", ".[]"] - ) - unless success - warnings << "secret-scanning alert fetch failed: #{stderr.to_s.strip}" - return { "total" => 0, "by_repo" => {} } - end - alerts = stdout.lines.map { |line| parse_json(line) }.compact - if alerts.empty? - parsed = parse_json(stdout) - alerts = parsed if parsed.is_a?(Array) - end - alerts = Array(alerts) - - { - "total" => alerts.length, - "by_repo" => alerts.group_by { |alert| alert.dig("repository", "full_name").to_s }.sort.to_h do |repo, repo_alerts| - [ - repo, - repo_alerts.group_by { |alert| alert["secret_type_display_name"].to_s.empty? ? alert["secret_type"].to_s : alert["secret_type_display_name"].to_s } - .transform_values(&:length) - .sort.to_h - ] - end - } - end - - def issue_list(repo, runner, warnings) - payload = run_gh( - ["issue", "list", "--repo", repo, "--state", "open", "--limit", "100", "--json", "number,title,updatedAt"], - runner, - warnings, - [] - ) - Array(payload) - end - - def stale_closing_comment?(repo, number, runner) - stdout, _stderr, success = runner.call( - ["issue", "view", number.to_s, "--repo", repo, "--json", "comments", "--jq", ".comments[-1].body // \"\""] - ) - return false unless success - - stdout.include?("Closing because") - end - - def backlog_hygiene(repo, runner, warnings) - issues = issue_list(repo, runner, warnings).select do |issue| - issue["title"].to_s.start_with?("[codex] Guardrail backlog:") - end - stale = issues.select { |issue| stale_closing_comment?(repo, issue["number"], runner) } - { - "repo" => repo, - "open_guardrail_backlog_issues" => issues.map { |issue| issue.slice("number", "title", "updatedAt") }, - "stale_closing_comments" => stale.map { |issue| issue.slice("number", "title", "updatedAt") } - } - end - - def release_train_state(config, runner, warnings) - repo = config["dashboard_repo"].to_s - issue = config["dashboard_issue"].to_s - marker = config["marker"].to_s - return { "dashboard_present" => false } if repo.empty? || issue.empty? - - payload = run_gh( - ["issue", "view", issue, "--repo", repo, "--json", "number,title,state,updatedAt,body,url"], - runner, - warnings, - {} - ) - body = payload["body"].to_s - { - "dashboard_repo" => repo, - "dashboard_issue" => payload["number"] || issue.to_i, - "dashboard_url" => payload["url"], - "dashboard_state" => payload["state"], - "dashboard_updated_at" => payload["updatedAt"], - "dashboard_present" => !payload.empty? && (marker.empty? || body.include?(marker)), - "marker" => marker - } - end - - def build_findings(report) - findings = [] - rulesets = report.dig("live", "org_rulesets") || [] - if rulesets.empty? - findings << { - "practice" => "org-rulesets", - "severity" => "high", - "message" => "No EvalOps org rulesets are configured; repo-local branch protection is carrying all merge policy." - } - end - - Array(report.dig("live", "branch_protection")).each do |item| - next unless item["tier"] == "critical" - required = Array(item["required_status_checks"]) + Array(item["ruleset_required_status_checks"]) - next unless required.empty? - - findings << { - "practice" => "org-rulesets", - "severity" => "medium", - "repo" => item["repo"], - "message" => "Critical repo has no required status checks in branch protection or applicable org rulesets." - } - end - - Array(report.dig("live", "repo_rails")).each do |item| - missing = Array(item["missing_required_files"]) - next if missing.empty? - - findings << { - "practice" => "operating-rails", - "severity" => item["tier"] == "critical" ? "high" : "medium", - "repo" => item["repo"], - "message" => "Missing required rails: #{missing.join(", ")}" - } - end - - stale = Array(report.dig("live", "backlog_hygiene", "stale_closing_comments")) - unless stale.empty? - findings << { - "practice" => "backlog-lifecycle", - "severity" => "medium", - "message" => "#{stale.length} guardrail backlog issue(s) have closing comments but remain open.", - "issues" => stale - } - end - - security = report.dig("live", "security_alerts") || {} - critical = security.dig("dependabot", "by_severity", "critical").to_i - high = security.dig("dependabot", "by_severity", "high").to_i - if critical.positive? || high.positive? - findings << { - "practice" => "security-slo", - "severity" => critical.positive? ? "high" : "medium", - "message" => "Open Dependabot alerts exceed zero for critical/high severities.", - "critical" => critical, - "high" => high - } - end - secret_open = security.dig("secret_scanning", "total").to_i - if secret_open.positive? - findings << { - "practice" => "security-slo", - "severity" => "high", - "message" => "Open secret-scanning alerts require rotation, revocation, false-positive disposition, or accepted-risk evidence.", - "open" => secret_open - } - end - - no_codeql = report.dig("live", "no_codeql") || {} - required_settings = no_codeql["required_settings"] || {} - observed_settings = no_codeql["observed_settings"] || {} - mismatched_settings = required_settings.select do |key, expected| - observed_settings[key] != expected - end - if no_codeql["default_for_new_repos"] != "all" || !mismatched_settings.empty? - findings << { - "practice" => "security-slo", - "severity" => "high", - "message" => "GitHub CodeQL/default code-scanning baseline drifted from the EvalOps disabled configuration.", - "configuration_id" => no_codeql["security_configuration_id"], - "default_for_new_repos" => no_codeql["default_for_new_repos"], - "mismatched_settings" => mismatched_settings - } - end - unless Array(no_codeql["missing_sampled_repositories"]).empty? - findings << { - "practice" => "security-slo", - "severity" => "high", - "message" => "Sampled repos are not enforced by the no-CodeQL security configuration.", - "repos" => no_codeql["missing_sampled_repositories"] - } - end - workflow_matches = Array(no_codeql["forbidden_workflow_queries"]).flat_map { |query| Array(query["matches"]) } - unless workflow_matches.empty? - findings << { - "practice" => "security-slo", - "severity" => "high", - "message" => "CodeQL or GitHub Code Scanning workflow references were found in checked-in workflow paths.", - "matches" => workflow_matches - } - end - unless Array(no_codeql["required_check_matches"]).empty? - findings << { - "practice" => "security-slo", - "severity" => "high", - "message" => "CodeQL appears in branch protection or org-ruleset required checks.", - "matches" => no_codeql["required_check_matches"] - } - end - - train_state = report.dig("live", "release_train_state") || {} - Array(report.dig("live", "release_train_queries")).each do |query| - next unless query["total_count"].to_i.positive? - next if query["key"] == "deploy_image_sync_prs" && train_state["dashboard_present"] - - findings << { - "practice" => "release-train-state", - "severity" => "medium", - "message" => "#{query["key"]} matched #{query["total_count"]} merged PR(s) in the audit window without an active release-train state record." - } - end - - findings - end - - def live_audit(contract, runner: gh_runner, root: Dir.pwd, generated_at: Time.now.utc) - warnings = [] - owner = contract.dig("live_audit", "owner") - sampled_repos = Array(contract.dig("live_audit", "sampled_repos")) - tiers = contract.fetch("repo_tiers", {}).each_with_object({}) do |(tier, data), memo| - Array(data["repos"]).each { |repo| memo[repo] = tier } - end - required_files = contract.dig("live_audit", "required_files") || {} - - rulesets = org_rulesets(owner, runner, warnings) - branch = sampled_repos.map do |repo| - branch_protection(repo, runner, warnings) - .merge("tier" => tiers.fetch(repo, "unknown")) - .merge(ruleset_required_status_policy(repo, rulesets)) - end - issue_queries = (contract.dig("live_audit", "issue_queries") || {}).map do |key, query| - { "key" => key, "query" => query, "total_count" => search_count(query, runner, warnings) } - end - release_queries = (contract.dig("live_audit", "release_train_queries") || {}).map do |key, query| - { "key" => key, "query" => query, "total_count" => search_count(query, runner, warnings) } - end - train_state = release_train_state(contract.dig("live_audit", "release_train_state") || {}, runner, warnings) - backlog = backlog_hygiene(contract.fetch("owner_repo"), runner, warnings) - secret_scanning = secret_scanning_alerts(owner, runner, warnings) - live = { - "owner" => owner, - "org_rulesets" => rulesets, - "branch_protection" => branch, - "repo_rails" => repo_file_adoption( - sampled_repos, - required_files, - tiers, - runner, - owner_repo: contract.fetch("owner_repo"), - root: root - ), - "issue_queries" => issue_queries, - "release_train_queries" => release_queries, - "release_train_state" => train_state, - "backlog_hygiene" => backlog, - "security_alerts" => { - "dependabot" => dependabot_alerts(owner, runner, warnings), - "secret_scanning" => secret_scanning, - "secret_scanning_open" => secret_scanning.fetch("total", 0), - "excluded_scanners" => Array(contract.dig("live_audit", "security_alert_slo", "excluded_scanners")) - }, - "no_codeql" => no_codeql_audit(contract, branch, runner, warnings) - } - - static = validate_contract(contract, root: root) - report = { - "schema_version" => REPORT_SCHEMA_VERSION, - "contract_schema_version" => contract["schema_version"], - "contract_id" => contract["contract_id"], - "owner_repo" => contract["owner_repo"], - "generated_at" => generated_at.utc.iso8601, - "status" => static.fetch("status"), - "static_validation" => static, - "evidence" => evidence(contract, root), - "live" => live, - "warnings" => warnings - } - findings = build_findings(report) - report["findings"] = findings - report["status"] = "attention" if report["status"] == "pass" && findings.any? - report - end - - def markdown_report(report) - lines = [ - "# Engineering Practices Audit", - "", - "- Contract: `#{report["contract_id"]}`", - "- Owner: `#{report["owner_repo"]}`", - "- Generated at: `#{report["generated_at"]}`", - "- Status: `#{report["status"]}`", - "", - "## Findings" - ] - findings = Array(report["findings"]) - if findings.empty? - lines << "No practice drift findings." - else - findings.each do |finding| - prefix = finding["repo"] ? "`#{finding["repo"]}` " : "" - lines << "- `#{finding["severity"]}` `#{finding["practice"]}` #{prefix}#{finding["message"]}" - end - end - - lines << "" - lines << "## Live Signals" - rulesets = Array(report.dig("live", "org_rulesets")) - lines << "- Org rulesets: `#{rulesets.length}`" - critical = Array(report.dig("live", "branch_protection")).select { |item| item["tier"] == "critical" } - covered = critical.count do |item| - (Array(item["required_status_checks"]) + Array(item["ruleset_required_status_checks"])).any? - end - lines << "- Critical repo required-check policy: `#{covered}/#{critical.length}`" - security = report.dig("live", "security_alerts") || {} - lines << "- Dependabot open alerts: `#{security.dig("dependabot", "total") || 0}`" - lines << "- Secret scanning open alerts: `#{security.dig("secret_scanning", "total") || security["secret_scanning_open"] || 0}`" - unless Array(security["excluded_scanners"]).empty? - lines << "- Excluded scanners: `#{security["excluded_scanners"].join(", ")}`" - end - no_codeql = report.dig("live", "no_codeql") || {} - observed = no_codeql["observed_settings"] || {} - lines << "- No-CodeQL config: `#{no_codeql["security_configuration_id"] || "unknown"}` default=`#{no_codeql["default_for_new_repos"] || "unknown"}` code_scanning_default_setup=`#{observed["code_scanning_default_setup"] || "unknown"}` assigned_repos=`#{no_codeql["assigned_repository_count"] || 0}`" - workflow_match_count = Array(no_codeql["forbidden_workflow_queries"]).sum { |query| Array(query["matches"]).length } - lines << "- CodeQL/Code Scanning workflow matches: `#{workflow_match_count}`" - lines << "- CodeQL required-check matches: `#{Array(no_codeql["required_check_matches"]).length}`" - Array(report.dig("live", "issue_queries")).each do |query| - lines << "- #{query["key"]}: `#{query["total_count"]}`" - end - Array(report.dig("live", "release_train_queries")).each do |query| - lines << "- #{query["key"]}: `#{query["total_count"]}`" - end - train_state = report.dig("live", "release_train_state") || {} - if train_state.key?("dashboard_present") - state = train_state["dashboard_present"] ? "present" : "missing" - target = train_state["dashboard_url"] || [train_state["dashboard_repo"], train_state["dashboard_issue"]].compact.join("#") - lines << "- release_train_dashboard: `#{state}` #{target}" - end - - lines << "" - lines << "## Missing Repo Rails" - missing = Array(report.dig("live", "repo_rails")).select { |item| Array(item["missing_required_files"]).any? } - if missing.empty? - lines << "No sampled repo rail gaps." - else - missing.each do |item| - lines << "- `#{item["repo"]}` (#{item["tier"]}): #{item["missing_required_files"].join(", ")}" - end - end - - dependabot_repos = security.dig("dependabot", "critical_high_by_repo") || {} - unless dependabot_repos.empty? - lines << "" - lines << "## Security Remediation Ledger" - lines << "" - lines << "### Critical/High Dependabot Alerts" - dependabot_repos.each do |repo, data| - severities = data.fetch("by_severity", {}).map { |severity, count| "#{severity}: #{count}" }.join(", ") - lines << "- `#{repo}` (#{data.fetch("total", 0)}; #{severities})" - Array(data["packages"]).first(8).each do |package| - advisory = Array(package["advisories"]).first(3).join(", ") - suffix = advisory.empty? ? "" : " - #{advisory}" - lines << " - `#{package["severity"]}` `#{package["ecosystem"]}/#{package["package"]}`: #{package["count"]}#{suffix}" - end - end - end - - secret_repos = security.dig("secret_scanning", "by_repo") || {} - unless secret_repos.empty? - lines << "" if dependabot_repos.empty? - lines << "## Security Remediation Ledger" if dependabot_repos.empty? - lines << "" - lines << "### Open Secret-Scanning Alerts" - secret_repos.each do |repo, types| - type_summary = types.map { |type, count| "#{type}: #{count}" }.join(", ") - lines << "- `#{repo}`: #{type_summary}" - end - end - - unless Array(report["warnings"]).empty? - lines << "" - lines << "## Warnings" - report["warnings"].each { |warning| lines << "- #{warning}" } - end - - lines.join("\n") - end - - def write_report(report, json_output, markdown_output, root) - json = JSON.pretty_generate(report) - if json_output - File.write(relative_path(root, json_output), "#{json}\n") - else - puts json - end - File.write(relative_path(root, markdown_output), "#{markdown_report(report)}\n") if markdown_output - end - - def run(argv) - options = { - contract: ".github/contracts/engineering-practices.yml", - json_output: nil, - markdown_output: nil, - contract_only: false, - fail_on_findings: false - } - OptionParser.new do |parser| - parser.on("--contract PATH", "Contract YAML path") { |value| options[:contract] = value } - parser.on("--json-output PATH", "Write JSON report") { |value| options[:json_output] = value } - parser.on("--markdown-output PATH", "Write Markdown report") { |value| options[:markdown_output] = value } - parser.on("--contract-only", "Validate the static contract without GitHub API calls") { options[:contract_only] = true } - parser.on("--fail-on-findings", "Exit non-zero when live practice drift is found") { options[:fail_on_findings] = true } - end.parse!(argv) - - root = Dir.pwd - contract = load_contract(relative_path(root, options.fetch(:contract))) - report = if options[:contract_only] - static = validate_contract(contract, root: root) - { - "schema_version" => REPORT_SCHEMA_VERSION, - "contract_schema_version" => contract["schema_version"], - "contract_id" => contract["contract_id"], - "owner_repo" => contract["owner_repo"], - "generated_at" => Time.now.utc.iso8601, - "status" => static.fetch("status"), - "static_validation" => static, - "evidence" => evidence(contract, root), - "findings" => [], - "warnings" => static.fetch("warnings") - } - else - live_audit(contract, root: root) - end - - write_report(report, options[:json_output], options[:markdown_output], root) - return 1 if report["static_validation"].fetch("status") == "fail" - return 1 if options[:fail_on_findings] && Array(report["findings"]).any? - - 0 - end -end - -if $PROGRAM_NAME == __FILE__ - exit EvalOpsEngineeringPracticesAudit.run(ARGV) -end diff --git a/.github/scripts/check-pr-review-threads.rb b/.github/scripts/check-pr-review-threads.rb deleted file mode 100644 index d4a85c2..0000000 --- a/.github/scripts/check-pr-review-threads.rb +++ /dev/null @@ -1,432 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "json" -require "open3" -require "optparse" - -module EvalOpsReviewThreadGuard - SEVERITY_RANK = { - "none" => 0, - "low" => 1, - "medium" => 2, - "high" => 3, - "p1" => 4, - "p0" => 5 - }.freeze - - module_function - - def first_nonblank_line(body) - body.to_s.lines.map(&:strip).find { |line| !line.empty? }.to_s - end - - def informational_summary?(body, author: nil) - first_line = first_nonblank_line(body) - return false unless first_line.match?(/\A##\s+(PR\s+Summary|Summary|Walkthrough)\b/i) - - author.to_s.match?(/\A(cursor|coderabbitai|chatgpt-codex-connector)\b/i) - end - - def severity(body) - text = body.to_s - return "p0" if text.match?(/\bP0\b/i) - return "p1" if text.match?(/\bP1\b/i) - return "high" if text.match?(/\bHigh Severity\b/i) || text.match?(/!\[High Badge\]/i) - return "medium" if text.match?(/\bMedium Severity\b/i) || text.match?(/!\[Medium Badge\]/i) - return "low" if text.match?(/\bLow Severity\b/i) || text.match?(/!\[Low Badge\]/i) - - "none" - end - - def severity_comment(comments) - candidates = Array(comments).each_with_object([]) do |comment, matches| - detected = severity(comment["body"]) - next matches if SEVERITY_RANK.fetch(detected) <= SEVERITY_RANK.fetch("none") - - matches << [detected, comment] - end - candidates.max_by { |detected, _comment| SEVERITY_RANK.fetch(detected) } - end - - def unresolved_threads(payload, min_severity: "high", include_outdated: false) - threshold = SEVERITY_RANK.fetch(min_severity) - nodes = payload.dig("data", "repository", "pullRequest", "reviewThreads", "nodes") || [] - nodes.each_with_object([]) do |thread, matches| - next matches if thread["isResolved"] - next matches if thread["isOutdated"] && !include_outdated - - detected, comment = severity_comment(thread.dig("comments", "nodes")) - comment ||= {} - detected ||= "none" - next matches if SEVERITY_RANK.fetch(detected) < threshold - - matches << { - kind: "review_thread", - id: thread["id"], - path: thread["path"], - line: thread["line"], - is_outdated: thread["isOutdated"], - severity: detected, - url: comment["url"], - body: comment["body"].to_s - } - end - end - - def top_level_feedback(payload, min_severity: "high") - threshold = SEVERITY_RANK.fetch(min_severity) - pull_request = payload.dig("data", "repository", "pullRequest") || {} - current_head_oid = pull_request["headRefOid"].to_s - feedback = [] - Array(pull_request.dig("comments", "nodes")).each do |comment| - next if informational_summary?(comment["body"], author: comment.dig("author", "login")) - - detected = severity(comment["body"]) - next if SEVERITY_RANK.fetch(detected) < threshold - - feedback << { - kind: "pr_comment", - severity: detected, - url: comment["url"], - body: comment["body"].to_s, - author: comment.dig("author", "login") - } - end - Array(pull_request.dig("reviews", "nodes")).each do |review| - next if informational_summary?(review["body"], author: review.dig("author", "login")) - - detected = severity(review["body"]) - next if SEVERITY_RANK.fetch(detected) < threshold - - review_commit_oid = review.dig("commit", "oid").to_s - next if !current_head_oid.empty? && !review_commit_oid.empty? && review_commit_oid != current_head_oid - - feedback << { - kind: "pr_review", - severity: detected, - url: review["url"], - body: review["body"].to_s, - author: review.dig("author", "login"), - state: review["state"] - } - end - feedback - end - - def blocking_feedback(payload, min_severity: "high", include_outdated: false) - unresolved_threads( - payload, - min_severity: min_severity, - include_outdated: include_outdated - ) + top_level_feedback(payload, min_severity: min_severity) - end - - def merge_pull_request_connections(payload, comments: nil, reviews: nil, review_threads: nil) - merged = payload.is_a?(Hash) ? payload : {} - merged["data"] = {} unless merged["data"].is_a?(Hash) - merged["data"]["repository"] = {} unless merged["data"]["repository"].is_a?(Hash) - merged["data"]["repository"]["pullRequest"] = {} unless merged["data"]["repository"]["pullRequest"].is_a?(Hash) - pull_request = merged["data"]["repository"]["pullRequest"] - if comments - pull_request["comments"] = {} unless pull_request["comments"].is_a?(Hash) - pull_request["comments"]["nodes"] = comments - end - if reviews - pull_request["reviews"] = {} unless pull_request["reviews"].is_a?(Hash) - pull_request["reviews"]["nodes"] = reviews - end - if review_threads - pull_request["reviewThreads"] = {} unless pull_request["reviewThreads"].is_a?(Hash) - pull_request["reviewThreads"]["nodes"] = review_threads - end - merged - end - - def merge_review_thread_nodes(payload, nodes) - merge_pull_request_connections(payload, review_threads: nodes) - end - - def graphql_query - <<~GRAPHQL - query($owner:String!,$repo:String!,$number:Int!) { - repository(owner:$owner, name:$repo) { - pullRequest(number:$number) { - headRefOid - comments(first:100) { - pageInfo { - hasNextPage - endCursor - } - nodes { - author { - login - } - body - url - } - } - reviews(first:100) { - pageInfo { - hasNextPage - endCursor - } - nodes { - author { - login - } - body - commit { - oid - } - state - url - } - } - reviewThreads(first:100) { - pageInfo { - hasNextPage - endCursor - } - nodes { - id - isResolved - isOutdated - path - line - comments(first:20) { - nodes { - body - url - } - } - } - } - } - } - } - GRAPHQL - end - - def comments_page_query - <<~GRAPHQL - query($owner:String!,$repo:String!,$number:Int!,$after:String) { - repository(owner:$owner, name:$repo) { - pullRequest(number:$number) { - comments(first:100, after:$after) { - pageInfo { - hasNextPage - endCursor - } - nodes { - author { - login - } - body - url - } - } - } - } - } - GRAPHQL - end - - def reviews_page_query - <<~GRAPHQL - query($owner:String!,$repo:String!,$number:Int!,$after:String) { - repository(owner:$owner, name:$repo) { - pullRequest(number:$number) { - reviews(first:100, after:$after) { - pageInfo { - hasNextPage - endCursor - } - nodes { - author { - login - } - body - commit { - oid - } - state - url - } - } - } - } - } - GRAPHQL - end - - def review_threads_page_query - <<~GRAPHQL - query($owner:String!,$repo:String!,$number:Int!,$after:String) { - repository(owner:$owner, name:$repo) { - pullRequest(number:$number) { - reviewThreads(first:100, after:$after) { - pageInfo { - hasNextPage - endCursor - } - nodes { - id - isResolved - isOutdated - path - line - comments(first:20) { - nodes { - body - url - } - } - } - } - } - } - } - GRAPHQL - end - - def fetch_graphql(owner:, name:, pr:, query:, cursor: nil) - args = [ - "gh", - "api", - "graphql", - "-f", - "owner=#{owner}", - "-f", - "repo=#{name}", - "-F", - "number=#{pr}", - "-f", - "query=#{query}" - ] - args += ["-f", "after=#{cursor}"] if cursor - stdout, stderr, status = Open3.capture3(*args) - raise "gh api graphql failed: #{stderr.strip}" unless status.success? - - JSON.parse(stdout) - end - - def fetch_connection_tail(owner:, name:, pr:, query:, connection_name:, first_connection:) - nodes = [] - connection = first_connection || {} - page_info = connection["pageInfo"] || {} - while page_info["hasNextPage"] - cursor = page_info["endCursor"] - raise "gh api graphql failed: missing #{connection_name} endCursor" if cursor.to_s.empty? - - payload = fetch_graphql(owner: owner, name: name, pr: pr, query: query, cursor: cursor) - connection = payload.dig("data", "repository", "pullRequest", connection_name) || {} - nodes.concat(Array(connection["nodes"])) - page_info = connection["pageInfo"] || {} - end - nodes - end - - def fetch_payload(repo:, pr:) - owner, name = repo.split("/", 2) - payload = fetch_graphql(owner: owner, name: name, pr: pr, query: graphql_query) - pull_request = payload.dig("data", "repository", "pullRequest") || {} - comments_connection = pull_request["comments"] || {} - reviews_connection = pull_request["reviews"] || {} - threads_connection = pull_request["reviewThreads"] || {} - - comments = Array(comments_connection["nodes"]) + fetch_connection_tail( - owner: owner, - name: name, - pr: pr, - query: comments_page_query, - connection_name: "comments", - first_connection: comments_connection - ) - reviews = Array(reviews_connection["nodes"]) + fetch_connection_tail( - owner: owner, - name: name, - pr: pr, - query: reviews_page_query, - connection_name: "reviews", - first_connection: reviews_connection - ) - review_threads = Array(threads_connection["nodes"]) + fetch_connection_tail( - owner: owner, - name: name, - pr: pr, - query: review_threads_page_query, - connection_name: "reviewThreads", - first_connection: threads_connection - ) - - merge_pull_request_connections( - payload, - comments: comments, - reviews: reviews, - review_threads: review_threads - ) - end - - def annotation(thread) - unless thread[:path] - title = "unresolved #{thread.fetch(:severity).upcase} #{thread.fetch(:kind).tr("_", " ")}" - return "::error title=#{title}::#{thread.fetch(:url)}" - end - - location = [thread.fetch(:path), thread[:line]].compact.join(":") - title = "unresolved #{thread.fetch(:severity).upcase} review thread" - "::error file=#{thread.fetch(:path)},line=#{thread[:line] || 1},title=#{title}::#{location} #{thread.fetch(:url)}" - end -end - -if $PROGRAM_NAME == __FILE__ - options = { - min_severity: "high", - include_outdated: false - } - - OptionParser.new do |parser| - parser.on("--repo OWNER/REPO", "Repository to inspect") { |value| options[:repo] = value } - parser.on("--pr NUMBER", Integer, "Pull request number") { |value| options[:pr] = value } - parser.on("--min-severity LEVEL", "Minimum severity: low, medium, high, p1, p0") { |value| options[:min_severity] = value.downcase } - parser.on("--include-outdated", "Include outdated unresolved threads") { options[:include_outdated] = true } - parser.on("--json PATH", "Read GraphQL payload from a file instead of gh") { |value| options[:json] = value } - end.parse! - - unless EvalOpsReviewThreadGuard::SEVERITY_RANK.key?(options.fetch(:min_severity)) - warn "invalid --min-severity #{options.fetch(:min_severity).inspect}" - exit 2 - end - - payload = - if options[:json] - JSON.parse(File.read(options.fetch(:json))) - else - missing = %i[repo pr].select { |key| options[key].nil? || options[key].to_s.empty? } - unless missing.empty? - warn "missing required options: #{missing.join(", ")}" - exit 2 - end - EvalOpsReviewThreadGuard.fetch_payload(repo: options.fetch(:repo), pr: options.fetch(:pr)) - end - - feedback = EvalOpsReviewThreadGuard.blocking_feedback( - payload, - min_severity: options.fetch(:min_severity), - include_outdated: options.fetch(:include_outdated) - ) - - if feedback.empty? - puts "No unresolved PR feedback at or above #{options.fetch(:min_severity)} severity." - exit 0 - end - - warn "Found #{feedback.length} unresolved PR feedback item(s) at or above #{options.fetch(:min_severity)} severity:" - feedback.each do |thread| - location = thread[:path] ? "#{thread.fetch(:path)}:#{thread[:line] || "?"}" : thread.fetch(:kind).to_s - warn "- [#{thread.fetch(:severity)}] #{location} #{thread.fetch(:url)}" - puts EvalOpsReviewThreadGuard.annotation(thread) - end - exit 1 -end diff --git a/.github/scripts/classify-agent-authorship.rb b/.github/scripts/classify-agent-authorship.rb deleted file mode 100644 index 7061f46..0000000 --- a/.github/scripts/classify-agent-authorship.rb +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "json" -require "optparse" - -options = { - github_output: nil, -} - -OptionParser.new do |parser| - parser.on("--github-output PATH", "Append key=value outputs for GitHub Actions") do |path| - options[:github_output] = path - end -end.parse! - -input = ARGF.read - -messages = input.each_line.map do |line| - next if line.strip.empty? - - parsed = JSON.parse(line) - if parsed.is_a?(Hash) - parsed.dig("commit", "message") || parsed["message"] - end -end.compact - -required_patterns = { - "co_author" => /^Co-Authored-By:\s*Maestro\s+\s*$/i, - "version" => /^Maestro-Version:\s*\S.*$/i, - "prompt_id" => /^Maestro-Prompt-Id:\s*\S.*$/i, - "approvals_id" => /^Maestro-Approvals-Id:\s*\S.*$/i, -} - -marker_pattern = / - ^Co-Authored-By:\s*Maestro\s+\s*$ | - ^Maestro-(?:Version|Prompt-Id|Approvals-Id): -/ix - -agent_commits = 0 -untrailered_commits = 0 -incomplete_commits = 0 - -messages.each do |message| - has_marker = message.lines.any? { |line| line.match?(marker_pattern) } - - unless has_marker - untrailered_commits += 1 - next - end - - agent_commits += 1 - missing_required = required_patterns.values.any? do |pattern| - message.lines.none? { |line| line.match?(pattern) } - end - incomplete_commits += 1 if missing_required -end - -label = - if agent_commits.positive? && untrailered_commits.positive? - "mixed-authorship" - elsif agent_commits.positive? - "agent-authored" - else - "agent-assisted" - end - -outputs = { - "label" => label, - "total_commits" => messages.length, - "agent_commits" => agent_commits, - "untrailered_commits" => untrailered_commits, - "human_commits" => untrailered_commits, - "incomplete_agent_commits" => incomplete_commits, -} - -outputs.each { |key, value| puts "#{key}=#{value}" } - -if options[:github_output] - File.open(options[:github_output], "a") do |file| - outputs.each { |key, value| file.puts("#{key}=#{value}") } - end -end diff --git a/.github/scripts/evalops-codex-hook-guard.rb b/.github/scripts/evalops-codex-hook-guard.rb deleted file mode 100644 index 5d64ad7..0000000 --- a/.github/scripts/evalops-codex-hook-guard.rb +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "json" -require "optparse" - -module EvalOpsCodexHookGuard - MERGE_WORDS = /\b(merge|merged|mergeable|readiness|ready to merge|ship|land)\b/i - REVIEW_THREAD_EVIDENCE = /(reviewThreads|review threads|gh api graphql|pullRequest\(number:|statusCheckRollup)/i - DESTRUCTIVE_GIT = / - \bgit\s+( - reset\s+--hard| - checkout\s+--\s+| - restore\s+(?:\.|:\/)| - clean\s+-[^\s]*[fd] - )\b - /ix - - module_function - - def evalops_repo?(cwd:, remote_url: nil) - return true if cwd.to_s.include?("/evalops/") || cwd.to_s.match?(%r{/repos/(platform|deploy|maestro|ensemble|\.github)(/|\z)}) - - remote_url.to_s.match?(%r{github\.com[:/]evalops/}) - end - - def session_start_message(cwd:, remote_url: nil) - return nil unless evalops_repo?(cwd: cwd, remote_url: remote_url) - - [ - "EvalOps repo detected.", - "Use fresh origin/main for broad sweeps, check live GitHub issues/PRs before org-default changes,", - "and use bounded one-shot GitHub polling instead of watch loops." - ].join(" ") - end - - def dirty_worktree?(status_text) - status_text.to_s.lines.any? { |line| !line.strip.empty? } - end - - def destructive_git_command?(command) - command.to_s.match?(DESTRUCTIVE_GIT) - end - - def pretool_git_guard(command:, status_text:) - return nil unless destructive_git_command?(command) - return nil unless dirty_worktree?(status_text) - - "Destructive git command in a dirty worktree: inspect unrelated changes before running `#{command}`." - end - - def stop_readiness_warning(transcript:) - text = transcript.to_s - return nil unless text.match?(MERGE_WORDS) - return nil if text.match?(REVIEW_THREAD_EVIDENCE) - - "Task mentions merge/readiness, but no recent review-thread or statusCheckRollup evidence was found." - end - - def run(argv, env: ENV, stdout: $stdout) - command = argv.shift.to_s - options = {} - OptionParser.new do |parser| - parser.on("--cwd PATH") { |value| options[:cwd] = value } - parser.on("--remote-url URL") { |value| options[:remote_url] = value } - parser.on("--command COMMAND") { |value| options[:command] = value } - parser.on("--status-text TEXT") { |value| options[:status_text] = value } - parser.on("--transcript TEXT") { |value| options[:transcript] = value } - parser.on("--json") { options[:json] = true } - end.parse!(argv) - - message = - case command - when "session-start" - session_start_message( - cwd: options[:cwd] || env["PWD"], - remote_url: options[:remote_url] || env["GIT_REMOTE_URL"] - ) - when "pretool-git" - pretool_git_guard( - command: options[:command] || env["CODEX_TOOL_COMMAND"], - status_text: options[:status_text] || env["GIT_STATUS_SHORT"] - ) - when "stop-readiness" - stop_readiness_warning(transcript: options[:transcript] || env["CODEX_TRANSCRIPT"]) - else - raise ArgumentError, "unknown hook command #{command.inspect}" - end - - payload = { - "hook" => command, - "message" => message, - "status" => message ? "warn" : "ok" - } - stdout.puts(options[:json] ? JSON.generate(payload) : message) if message || options[:json] - command == "pretool-git" && message ? 1 : 0 - end -end - -if $PROGRAM_NAME == __FILE__ - exit EvalOpsCodexHookGuard.run(ARGV) -end diff --git a/.github/scripts/evalops-pr-lens-review.rb b/.github/scripts/evalops-pr-lens-review.rb deleted file mode 100644 index e7aa6b2..0000000 --- a/.github/scripts/evalops-pr-lens-review.rb +++ /dev/null @@ -1,1531 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "base64" -require "digest" -require "fileutils" -require "json" -require "net/http" -require "openssl" -require "open3" -require "optparse" -require "time" -require "uri" -require "yaml" - -module EvalOpsPrLensReview - TARGET_REPOS = %w[ - evalops/platform - evalops/deploy - evalops/maestro-internal - ].freeze - - LENSES = { - "migration-safety" => { - name: "Migration safety", - focus: [ - "database migrations, schema ownership manifests, generated migration embeds, and stateful infrastructure migrations", - "Terraform, startup scripts, disk/cache migrations, and one-time data cleanup that must be safe on fresh and existing resources", - "backward/forward compatibility during rolling deploys, branch promotion, and rollback", - "concurrent migration hazards, idempotency, destructive DDL, and destructive filesystem or cloud-resource cleanup", - "missing migration tests, dry-runs, live plans, or release-order constraints" - ] - }, - "nats-contract-drift" => { - name: "NATS contract drift", - focus: [ - "NATS subjects, JetStream streams, consumers, queue groups, and retention policy changes", - "protobuf, JSON schema, event catalog, or publisher/subscriber contract drift", - "trace/context propagation across event boundaries", - "missing local simulation, contract fixtures, or consumer compatibility coverage" - ] - }, - "argo-manifest-skew" => { - name: "Argo manifest skew", - focus: [ - "GitOps desired state, Helm values, Kustomize overlays, and ArgoCD application drift", - "image tag policy, namespace/resource quota skew, and environment-specific values", - "manifest references to missing ConfigMaps, Secrets, services, or CRDs", - "changes that require deploy ordering or post-merge live-state verification" - ] - }, - "iam-blast-radius" => { - name: "IAM blast radius", - focus: [ - "GitHub Actions permissions, tokens, OIDC trust, cloud IAM roles, and service accounts", - "secret handling, Vault/ExternalSecrets references, and credential exposure", - "privilege expansion hidden in workflow, Terraform, Kubernetes, or app auth changes", - "tenant or customer boundary regressions" - ] - }, - "generated-sdk-delta" => { - name: "Generated SDK delta", - focus: [ - "protobuf, OpenAPI, JSON schema, BigQuery schema, and generated TypeScript/Go/Python SDK drift", - "release manifest, package version, changelog, and generated artifact consistency", - "manual edits to generated files without generator or source contract updates", - "missing generator commands or SDK publish compatibility checks" - ] - }, - "eval-regression-risk" => { - name: "Eval regression risk", - focus: [ - "evaluation datasets, golden fixtures, prompt/judge changes, scoring, and quality gates", - "frontier model/provider changes, fallback behavior, and tenant-visible AI behavior", - "regression budgets, flaky evals, missing smoke coverage, and false-pass risks", - "operator/customer-facing behavior that should have an eval or scenario replay" - ] - } - }.freeze - - LENS_PATH_RULES = { - "migration-safety" => [ - %r{\A(db|database|migrations?)/}i, - %r{migrations?/}i, - %r{\.(sql|tf)\z}i, - %r{\A(infrastructure|terraform|helm|charts|k8s|clusters)/}i, - %r{(disk|cache|state|cleanup|backfill|bootstrap|startup)}i - ], - "nats-contract-drift" => [ - %r{(^|/)(nats|jetstream|streams?|consumers?|subjects?)(/|\.)}i, - %r{(^|/)(proto|protos|protobuf|schemas?)/}i, - %r{\.(proto|avsc)\z}i, - %r{(cloudevents?|event[-_ ]?catalog|publisher|subscriber)}i - ], - "argo-manifest-skew" => [ - %r{\A(argocd|argo|clusters|k8s|kubernetes|overlays|base|helm|charts)/}i, - %r{(^|/)(kustomization|values)\.ya?ml\z}i, - %r{(^|/)applications?/}i, - %r{(^|/)(deployment|service|configmap|secret|externalsecret|namespace|ingress)\.ya?ml\z}i - ], - "iam-blast-radius" => [ - %r{\A\.github/workflows/}i, - %r{(^|/)(iam|rbac|serviceaccount|service-account|policy|permissions?)(/|\.)}i, - %r{(^|/)(secrets?|external-secrets?|vault|oidc)(/|\.)}i, - %r{\.(tf|tfvars)\z}i, - %r{(token|credential|workload[-_ ]?identity|rolebinding|clusterrole)}i - ], - "generated-sdk-delta" => [ - %r{(^|/)(gen|generated|sdk|openapi|swagger|proto|protos|protobuf|schemas?)/}i, - %r{\.(proto|openapi\.ya?ml|swagger\.json|schema\.json)\z}i, - %r{(^|/)(package\.json|pyproject\.toml|go\.mod|buf\.yaml|buf\.gen\.yaml)\z}i, - %r{(^|/)(CHANGELOG|release-please-config|\.release-please-manifest)}i - ], - "eval-regression-risk" => [ - %r{(^|/)(evals?|evaluations?|fixtures?|datasets?|goldens?|scenarios?|judges?|prompts?)/}i, - %r{(^|/)(prompt|judge|rubric|score|scoring|golden|fixture)}i, - %r{(^|/)testdata/}i - ] - }.freeze - - DOC_ONLY_PATH = %r{\A(README|SECURITY|CONTRIBUTING|CHANGELOG|docs/|profile/|.*\.(md|mdx|txt))}i - - MARKER = "" - REVIEW_REQUESTED_DISPATCH_EVENT = "evalopsbot-review-requested" - REVIEW_REQUESTED_DISPATCH_SOURCE = "evalopsbot-review-request-dispatch" - DEFAULT_MIN_CONFIDENCE = 0.82 - DEFAULT_MODEL = "claude-opus-4-7" - DEFAULT_PROVIDER = "anthropic" - DEFAULT_MAX_DIFF_BYTES = 180_000 - MAX_FINDINGS_PER_COMMENT = 12 - MAX_CONTEXT_ITEMS = 25 - DEFAULT_ROUTING_CONFIG_PATH = ".github/pr-lens-routing.yml" - COMMON_FINGERPRINT_TOKENS = %w[ - the and for with from that this into when are was were has have should would could - evalops review finding issue risk missing unsafe fails failure causes cause because - ].freeze - - module_function - - def parse_list(value) - value.to_s.split(",").map(&:strip).reject(&:empty?) - end - - def normalize_repo(repo) - raw = repo.to_s.strip - return raw if raw.include?("/") - - "evalops/#{raw}" - end - - def parse_pr_filter(value, repos:) - entries = parse_list(value) - return nil if entries.empty? - - normalized_repos = repos.map { |repo| normalize_repo(repo) } - entries.each_with_object(Hash.new { |hash, key| hash[key] = [] }) do |entry, filter| - if entry.include?("#") - repo, number = entry.split("#", 2) - filter[normalize_repo(repo)] << Integer(number) - elsif normalized_repos.length == 1 - filter[normalized_repos.fetch(0)] << Integer(entry) - else - raise ArgumentError, "target_prs entries without repo require exactly one target repo: #{entry}" - end - end.transform_values(&:uniq) - end - - def check_context(lens) - "evalops-pr-lens/#{lens}" - end - - def meta_context - "evalops-pr-lens/meta-review" - end - - def valid_lens!(lens) - return lens if LENSES.key?(lens) - - raise ArgumentError, "unknown lens #{lens.inspect}; expected one of #{LENSES.keys.join(", ")}" - end - - def load_routing_config(path) - return {} if path.to_s.empty? || !File.exist?(path) - - YAML.safe_load(File.read(path), permitted_classes: [], aliases: false) || {} - end - - def routing_for_lens(lens, config) - lens = valid_lens!(lens) - defaults = config.fetch("defaults", {}) || {} - lenses = config.fetch("lenses", {}) || {} - route = lenses.fetch(lens, {}) || {} - defaults.merge(route) - end - - def effective_review_options(lens:, provider:, model:, max_diff_bytes:, routing_config:) - route = routing_for_lens(lens, load_routing_config(routing_config)) - { - provider: route.fetch("provider", provider || DEFAULT_PROVIDER).to_s, - model: route.fetch("model", model || DEFAULT_MODEL).to_s, - max_diff_bytes: Integer(route.fetch("max_diff_bytes", max_diff_bytes || DEFAULT_MAX_DIFF_BYTES)) - } - end - - def lens_reason_for_path(path) - LENS_PATH_RULES.each do |lens, patterns| - return lens if patterns.any? { |pattern| path.match?(pattern) } - end - nil - end - - def lenses_for_paths(paths) - normalized = paths.map(&:to_s).map(&:strip).reject(&:empty?) - return LENSES.keys if normalized.empty? - - lenses = normalized.flat_map do |path| - LENS_PATH_RULES.each_with_object([]) do |(lens, patterns), matches| - matches << lens if patterns.any? { |pattern| path.match?(pattern) } - end - end.uniq - return lenses if lenses.any? - return [] if normalized.all? { |path| path.match?(DOC_ONLY_PATH) } - - ["eval-regression-risk"] - end - - def gh_api(*args, input: nil, token: ENV["GH_TOKEN"]) - env = {} - env["GH_TOKEN"] = token if token && !token.empty? - command = ["gh", "api", *args] - - if input - command += ["--input", "-"] - stdout, stderr, status = Open3.capture3(env, *command, stdin_data: input) - else - stdout, stderr, status = Open3.capture3(env, *command) - end - - unless status.success? - raise "gh api #{args.join(" ")} failed: #{stderr.empty? ? stdout : stderr}" - end - - stdout - end - - def gh_api_json(*args, input: nil, token: ENV["GH_TOKEN"]) - raw = gh_api(*args, input: input, token: token) - return nil if raw.strip.empty? - - JSON.parse(raw) - end - - def gh_search_review_requested(owner:, reviewer:, limit:, token: ENV["GH_TOKEN"]) - env = {} - env["GH_TOKEN"] = token if token && !token.empty? - command = [ - "gh", "search", "prs", - "--owner", owner, - "--review-requested", reviewer, - "--state", "open", - "--json", "repository,number,title,url,isDraft,updatedAt", - "--limit", limit.to_s - ] - stdout, stderr, status = Open3.capture3(env, *command) - unless status.success? - raise "gh search prs failed: #{stderr.empty? ? stdout : stderr}" - end - - JSON.parse(stdout) - end - - def normalize_search_pull_requests(rows) - rows.map do |row| - repo = row.dig("repository", "nameWithOwner") - next if repo.to_s.empty? - - { - "repo" => normalize_repo(repo), - "repo_slug" => normalize_repo(repo).tr("/", "-"), - "number" => Integer(row.fetch("number")), - "title" => row.fetch("title", ""), - "url" => row.fetch("url", ""), - "draft" => !!row["isDraft"], - "updated_at" => row.fetch("updatedAt", nil) - } - rescue ArgumentError, KeyError, TypeError - nil - end.compact.uniq { |pr| [pr.fetch("repo"), pr.fetch("number")] } - end - - def review_requested_prs(owner:, reviewer:, limit:) - normalize_search_pull_requests( - gh_search_review_requested(owner: owner, reviewer: reviewer, limit: limit) - ) - end - - def pr_head_sha(repo:, pr:) - pr_metadata(repo: repo, pr: pr).fetch("head").fetch("sha") - end - - def pr_status_contexts(repo:, head_sha:) - status = gh_api_json("repos/#{repo}/commits/#{head_sha}/status") - Array(status.fetch("statuses", [])).map { |row| row.fetch("context", "") } - end - - def review_started_for_head?(repo:, head_sha:) - pr_status_contexts(repo: repo, head_sha: head_sha).include?(meta_context) - end - - def dispatch_review_requested(repo:, pr:, requested_reviewer:) - payload = { - event_type: REVIEW_REQUESTED_DISPATCH_EVENT, - client_payload: { - target_repo: repo, - target_pr: "#{repo}##{pr}", - requested_reviewer: requested_reviewer, - source: REVIEW_REQUESTED_DISPATCH_SOURCE - } - } - gh_api("--method", "POST", "repos/evalops/.github/dispatches", input: JSON.generate(payload)) - end - - def base64url(value) - Base64.strict_encode64(value).tr("+/", "-_").delete("=") - end - - def normalize_private_key(raw) - raw.to_s.gsub("\\n", "\n") - end - - def github_app_jwt(app_id:, private_key:, now: Time.now) - key = OpenSSL::PKey.read(normalize_private_key(private_key)) - header = base64url(JSON.generate({ alg: "RS256", typ: "JWT" })) - payload = base64url( - JSON.generate( - { - iat: now.to_i - 60, - exp: now.to_i + 540, - iss: app_id.to_s - } - ) - ) - unsigned = "#{header}.#{payload}" - "#{unsigned}.#{base64url(key.sign(OpenSSL::Digest::SHA256.new, unsigned))}" - end - - def github_app_installation_id(owner:, jwt:) - gh_api_json("orgs/#{owner}/installation", token: jwt).fetch("id") - end - - def create_app_installation_token(app_id:, private_key:, owner:, installation_id: nil, repositories: [], permissions: {}) - jwt = github_app_jwt(app_id: app_id, private_key: private_key) - resolved_installation_id = installation_id.to_s.empty? ? github_app_installation_id(owner: owner, jwt: jwt) : installation_id - payload = {} - repo_names = repositories.map { |repo| repo.to_s.split("/").last }.reject(&:empty?).uniq - payload["repositories"] = repo_names unless repo_names.empty? - payload["permissions"] = permissions unless permissions.empty? - response = gh_api_json( - "--method", - "POST", - "app/installations/#{resolved_installation_id}/access_tokens", - input: JSON.generate(payload), - token: jwt - ) - response.fetch("token") - end - - def default_app_token_permissions - { - "checks" => "write", - "contents" => "write", - "issues" => "write", - "pull_requests" => "write", - "statuses" => "write" - } - end - - def mark_review_queued(repo:, head_sha:, target_url:) - post_status( - repo: repo, - sha: head_sha, - context: meta_context, - state: "pending", - description: "Queued EvalOpsBot requested deep review", - target_url: target_url - ) - end - - def dispatch_requested_reviews(owner:, reviewer:, limit:, dry_run:, target_url:, output: nil) - candidates = review_requested_prs(owner: owner, reviewer: reviewer, limit: limit) - results = candidates.map do |candidate| - repo = candidate.fetch("repo") - pr = candidate.fetch("number") - head_sha = pr_head_sha(repo: repo, pr: pr) - row = candidate.merge("head_sha" => head_sha) - - if review_started_for_head?(repo: repo, head_sha: head_sha) - row.merge("action" => "skipped", "reason" => "review already queued or completed for head sha") - elsif dry_run - row.merge("action" => "would_dispatch") - else - dispatch_review_requested(repo: repo, pr: pr, requested_reviewer: reviewer) - mark_review_queued(repo: repo, head_sha: head_sha, target_url: target_url) - row.merge("action" => "dispatched") - end - end - - summary = { - "schema_version" => 1, - "generated_at" => Time.now.utc.iso8601, - "owner" => owner, - "requested_reviewer" => reviewer, - "dry_run" => dry_run, - "candidate_count" => candidates.length, - "dispatched_count" => results.count { |row| row.fetch("action") == "dispatched" }, - "skipped_count" => results.count { |row| row.fetch("action") == "skipped" }, - "results" => results - } - File.write(output, JSON.pretty_generate(summary)) if output - summary - end - - def pr_files_metadata(repo:, pr:) - gh_api_json("repos/#{repo}/pulls/#{pr}/files?per_page=100") - end - - def discover_open_prs(repos:, pr_filter: nil, force_lenses: nil) - repos.flat_map do |repo| - normalized_repo = normalize_repo(repo) - prs = gh_api_json("repos/#{normalized_repo}/pulls?state=open&per_page=100") - prs.select! { |pr| pr_filter.fetch(normalized_repo, []).include?(Integer(pr.fetch("number"))) } if pr_filter - prs.map do |pr| - files = pr_files_metadata(repo: normalized_repo, pr: Integer(pr.fetch("number"))) - filenames = files.map { |file| file.fetch("filename") } - lenses = force_lenses || lenses_for_paths(filenames) - { - "repo" => normalized_repo, - "repo_slug" => normalized_repo.tr("/", "-"), - "number" => Integer(pr.fetch("number")), - "title" => pr.fetch("title"), - "url" => pr.fetch("html_url"), - "draft" => !!pr.fetch("draft"), - "head_sha" => pr.fetch("head").fetch("sha"), - "base_sha" => pr.fetch("base").fetch("sha"), - "base_ref" => pr.fetch("base").fetch("ref"), - "head_ref" => pr.fetch("head").fetch("ref"), - "changed_files" => filenames, - "lenses" => lenses - } - end - end - end - - def matrix_for(prs, lenses: LENSES.keys) - prs.flat_map do |pr| - pr_lenses = pr.fetch("lenses", lenses) - pr_lenses.map do |lens| - valid_lens!(lens) - { - "repo" => pr.fetch("repo"), - "repo_slug" => pr.fetch("repo_slug"), - "pr" => pr.fetch("number"), - "lens" => lens, - "check_context" => check_context(lens), - "head_sha" => pr.fetch("head_sha"), - "base_sha" => pr.fetch("base_sha", nil), - "base_ref" => pr.fetch("base_ref", nil), - "head_ref" => pr.fetch("head_ref", nil) - } - end - end - end - - def write_github_outputs(path, outputs) - return if path.to_s.empty? - - File.open(path, "a") do |file| - outputs.each do |key, value| - file.puts("#{key}=#{value}") - end - end - end - - def post_status(repo:, sha:, context:, state:, description:, target_url: nil) - fields = [ - "-f", "state=#{state}", - "-f", "context=#{context}", - "-f", "description=#{description.to_s[0, 140]}" - ] - fields += ["-f", "target_url=#{target_url}"] if target_url && !target_url.empty? - - gh_api("--method", "POST", "repos/#{repo}/statuses/#{sha}", *fields) - post_check_run( - repo: repo, - sha: sha, - name: context, - state: state, - description: description, - target_url: target_url - ) - end - - def check_run_external_id(name:, sha:) - digest = Digest::SHA256.hexdigest("#{name}\0#{sha}") - "evalops-pr-lens:#{digest}" - end - - def check_run_state(state) - case state.to_s - when "pending" - ["in_progress", nil] - when "success" - ["completed", "success"] - when "failure" - ["completed", "failure"] - when "error" - ["completed", "failure"] - else - ["completed", "neutral"] - end - end - - def existing_check_run_id(repo:, sha:, name:, external_id:) - encoded_name = URI.encode_www_form_component(name) - response = gh_api_json("repos/#{repo}/commits/#{sha}/check-runs?check_name=#{encoded_name}&per_page=100") - Array(response.fetch("check_runs", [])).find { |row| row["external_id"] == external_id }&.fetch("id", nil) - end - - def post_check_run(repo:, sha:, name:, state:, description:, target_url: nil) - status, conclusion = check_run_state(state) - external_id = check_run_external_id(name: name, sha: sha) - output = { - title: description.to_s[0, 255], - summary: description.to_s.empty? ? name : description.to_s - } - payload = { - name: name, - external_id: external_id, - details_url: target_url, - status: status, - output: output - }.compact - if status == "completed" - payload[:conclusion] = conclusion - payload[:completed_at] = Time.now.utc.iso8601 - else - payload[:started_at] = Time.now.utc.iso8601 - end - - existing_id = existing_check_run_id(repo: repo, sha: sha, name: name, external_id: external_id) - if existing_id - gh_api("--method", "PATCH", "repos/#{repo}/check-runs/#{existing_id}", input: JSON.generate(payload)) - else - gh_api( - "--method", - "POST", - "repos/#{repo}/check-runs", - input: JSON.generate(payload.merge(head_sha: sha)) - ) - end - rescue StandardError => e - warn "check-run publish skipped for #{repo}@#{sha} #{name}: #{e.message.lines.first.to_s.strip}" - end - - def write_json(path, payload) - File.write(path, JSON.pretty_generate(payload)) - payload - end - - def skipped_lens_review(repo:, pr:, lens:, head_sha:, reason:, output:) - write_json( - output, - { - "schema_version" => 1, - "repo" => repo, - "pr" => Integer(pr), - "lens" => lens, - "check_id" => check_context(lens), - "head_sha" => head_sha.to_s, - "generated_at" => Time.now.utc.iso8601, - "status" => "skipped", - "skip_reason" => reason, - "summary" => "Skipped #{lens} lens review: #{reason}", - "confidence_score" => 0.0, - "findings" => [] - } - ) - end - - def write_prepare_outputs(path, outputs) - write_github_outputs(path, outputs) - end - - def git_authorization_header(token) - return nil if token.to_s.empty? - - "AUTHORIZATION: basic #{Base64.strict_encode64("x-access-token:#{token}")}" - end - - def git_capture_auth(workspace, *args, token: nil) - env = { "GIT_TERMINAL_PROMPT" => "0" } - command = ["git"] - command += ["-C", workspace] if workspace - header = git_authorization_header(token) - command += ["-c", "http.https://github.com/.extraheader=#{header}"] if header - command += args - - stdout, stderr, status = Open3.capture3(env, *command) - raise "git #{args.join(" ")} failed: #{stderr.empty? ? stdout : stderr}" unless status.success? - - stdout - end - - def git_capture(workspace, *args) - stdout, stderr, status = Open3.capture3("git", "-C", workspace, *args) - raise "git #{args.join(" ")} failed: #{stderr}" unless status.success? - - stdout - end - - def prepare_workspace(repo:, pr:, lens:, workspace:, output:, github_output:, snapshot_head_sha:, snapshot_base_sha:, token:) - head_sha = snapshot_head_sha.to_s - begin - pr_json = pr_metadata(repo: repo, pr: pr) - current_state = pr_json.fetch("state", "").downcase - current_head_sha = pr_json.fetch("head").fetch("sha") - current_base_sha = pr_json.fetch("base").fetch("sha") - base_ref = pr_json.fetch("base").fetch("ref") - reason = nil - - if current_state != "open" - reason = "pull request is #{current_state.empty? ? "not open" : current_state}" - elsif !snapshot_head_sha.to_s.empty? && current_head_sha != snapshot_head_sha - reason = "pull request head changed since discovery" - elsif !snapshot_base_sha.to_s.empty? && current_base_sha != snapshot_base_sha - reason = "pull request base changed since discovery" - end - - if reason - skipped_lens_review(repo: repo, pr: pr, lens: lens, head_sha: head_sha.empty? ? current_head_sha : head_sha, reason: reason, output: output) - write_prepare_outputs( - github_output, - "skip" => "true", - "skip_reason" => reason, - "head_sha" => head_sha.empty? ? current_head_sha : head_sha, - "base_sha" => snapshot_base_sha.to_s.empty? ? current_base_sha : snapshot_base_sha - ) - return { "skip" => true, "reason" => reason } - end - - FileUtils.rm_rf(workspace) - FileUtils.mkdir_p(workspace) - git_capture_auth(nil, "init", workspace, token: token) - git_capture_auth(workspace, "remote", "add", "origin", "https://github.com/#{repo}.git", token: token) - git_capture_auth(workspace, "fetch", "--no-tags", "origin", base_ref, "+refs/pull/#{pr}/head:refs/remotes/pull/#{pr}/head", token: token) - git_capture_auth(workspace, "checkout", "--detach", current_head_sha, token: token) - - checked_out = git_capture_auth(workspace, "rev-parse", "HEAD", token: token).strip - if checked_out != current_head_sha - raise "checked out #{checked_out}, expected #{current_head_sha}" - end - - write_prepare_outputs( - github_output, - "skip" => "false", - "skip_reason" => "", - "base_ref" => base_ref, - "base_sha" => current_base_sha, - "head_sha" => current_head_sha - ) - { - "skip" => false, - "base_ref" => base_ref, - "base_sha" => current_base_sha, - "head_sha" => current_head_sha - } - rescue StandardError => e - reason = "target ref unavailable: #{e.message.lines.first.to_s.strip}" - skipped_lens_review(repo: repo, pr: pr, lens: lens, head_sha: head_sha, reason: reason, output: output) - write_prepare_outputs( - github_output, - "skip" => "true", - "skip_reason" => reason, - "head_sha" => head_sha, - "base_sha" => snapshot_base_sha.to_s - ) - { "skip" => true, "reason" => reason } - end - end - - def truncated(text, max_bytes) - raw = text.to_s - return [raw, false] if raw.bytesize <= max_bytes - - [raw.byteslice(0, max_bytes).to_s, true] - end - - def git_diff(workspace:, base_sha:, head_sha:, max_bytes:) - diff = git_capture(workspace, "--no-pager", "diff", "--unified=5", "--no-ext-diff", base_sha, head_sha) - truncated(diff, max_bytes) - end - - def changed_files(workspace:, base_sha:, head_sha:) - git_capture(workspace, "--no-pager", "diff", "--name-status", base_sha, head_sha) - end - - def pr_metadata(repo:, pr:) - gh_api_json("repos/#{repo}/pulls/#{pr}") - end - - def pr_file_summary(repo:, pr:) - files = gh_api_json("repos/#{repo}/pulls/#{pr}/files?per_page=100") - files.map do |file| - [ - file.fetch("status"), - file.fetch("filename"), - "+#{file.fetch("additions")}", - "-#{file.fetch("deletions")}" - ].join("\t") - end.join("\n") - end - - def utf8_text(value) - text = value.to_s - text = text.dup.force_encoding(Encoding::UTF_8) unless text.encoding == Encoding::UTF_8 - text.scrub - end - - def short_text(value, max_bytes: 1_500) - text = utf8_text(value).strip - return "" if text.empty? - return text if text.bytesize <= max_bytes - - "#{text.byteslice(0, max_bytes).to_s.scrub}\n...[truncated]" - end - - def list_section(title, rows) - body = rows.compact.map { |row| utf8_text(row).strip }.reject(&:empty?) - return "#{title}:\n(none)" if body.empty? - - "#{title}:\n#{body.first(MAX_CONTEXT_ITEMS).join("\n")}" - end - - def pr_review_context(repo:, pr:, pr_json:, head_sha:) - issue_comments = gh_api_json("repos/#{repo}/issues/#{pr}/comments?per_page=100") - reviews = gh_api_json("repos/#{repo}/pulls/#{pr}/reviews?per_page=100") - review_comments = gh_api_json("repos/#{repo}/pulls/#{pr}/comments?per_page=100") - check_runs = gh_api_json("repos/#{repo}/commits/#{head_sha}/check-runs?per_page=100").fetch("check_runs", []) - combined_status = gh_api_json("repos/#{repo}/commits/#{head_sha}/status") - - comments = issue_comments.last(MAX_CONTEXT_ITEMS).map do |comment| - "- #{comment.dig("user", "login")} at #{comment.fetch("created_at", "")}: #{short_text(comment["body"], max_bytes: 900)}" - end - review_rows = reviews.last(MAX_CONTEXT_ITEMS).map do |review| - body = short_text(review["body"], max_bytes: 900) - "- #{review.dig("user", "login")} #{review.fetch("state", "")} at #{review.fetch("submitted_at", "")}: #{body.empty? ? "(no body)" : body}" - end - inline_rows = review_comments.last(MAX_CONTEXT_ITEMS).map do |comment| - line = comment["line"] || comment["original_line"] || "?" - "- #{comment.dig("user", "login")} #{comment.fetch("path", "unknown")}:#{line}: #{short_text(comment["body"], max_bytes: 900)}" - end - check_rows = check_runs.select do |check| - !%w[success skipped neutral].include?(check["conclusion"].to_s.downcase) - end.map do |check| - "- check-run #{check.fetch("name", "unknown")}: status=#{check.fetch("status", "")} conclusion=#{check["conclusion"] || "pending"}" - end - status_rows = Array(combined_status["statuses"]).select do |status| - status["state"].to_s != "success" - end.map do |status| - "- status #{status.fetch("context", "unknown")}: state=#{status.fetch("state", "")} description=#{status["description"]}" - end - - [ - "Pull request body:\n#{short_text(pr_json["body"], max_bytes: 2_500).empty? ? "(none)" : short_text(pr_json["body"], max_bytes: 2_500)}", - list_section("Issue comments", comments), - list_section("PR review bodies", review_rows), - list_section("Inline review comments", inline_rows), - list_section("Non-green checks and statuses", check_rows + status_rows) - ].join("\n\n") - end - - def build_lens_prompt(repo:, pr:, lens:, pr_json:, file_summary:, review_context:, changed_files_text:, diff_text:, diff_truncated:) - lens_config = LENSES.fetch(valid_lens!(lens)) - <<~PROMPT - You are reviewing an EvalOps pull request through one narrow lens: #{lens_config.fetch(:name)}. - - Repository: #{repo} - Pull request: ##{pr} #{pr_json.fetch("title")} - URL: #{pr_json.fetch("html_url")} - Base: #{pr_json.fetch("base").fetch("ref")} #{pr_json.fetch("base").fetch("sha")} - Head: #{pr_json.fetch("head").fetch("ref")} #{pr_json.fetch("head").fetch("sha")} - Draft: #{pr_json.fetch("draft")} - - Lens focus: - #{lens_config.fetch(:focus).map { |item| "- #{item}" }.join("\n")} - - Rules: - - Return JSON only. No markdown fences. - - Report only actionable defects introduced by this PR that fit the lens. - - Prefer no finding over a speculative finding. - - Confidence must reflect direct evidence from the diff or live PR metadata. - - Existing bot or human review comments are evidence, but verify them - against the diff before turning them into a finding. - - Use head-side file paths and line numbers where possible. - - If no high-signal finding exists, return an empty findings array. - - Do not ask for broad architecture redesigns, style-only changes, or unrelated cleanup. - - JSON shape: - { - "summary": "short lens summary", - "confidence_score": 0.0, - "findings": [ - { - "title": "max 80 chars", - "body": "why this is a real defect and how to fix it", - "confidence_score": 0.0, - "priority": 0, - "code_location": { - "path": "relative/path", - "line": 1 - } - } - ] - } - - Priority scale: 0 is release blocking, 1 is high, 2 is medium, 3 is low. - - Pull request files from GitHub: - #{file_summary.empty? ? "(no file metadata)" : file_summary} - - Pull request context: - #{review_context.empty? ? "(no PR context)" : review_context} - - Changed files from git: - #{changed_files_text.empty? ? "(no changed files)" : changed_files_text} - - Unified diff#{diff_truncated ? " (truncated)" : ""}: - #{diff_text.empty? ? "(empty diff)" : diff_text} - PROMPT - end - - def extract_json(text) - raw = text.to_s.strip - return JSON.parse(raw) if raw.start_with?("{") && raw.end_with?("}") - - start = raw.index("{") - finish = raw.rindex("}") - raise "model response did not contain a JSON object" unless start && finish && finish > start - - JSON.parse(raw[start..finish]) - end - - def call_anthropic(prompt:, model:, api_key:) - raise "ANTHROPIC_API_KEY is required for PR lens review" if api_key.to_s.empty? - - uri = URI("https://api.anthropic.com/v1/messages") - request = Net::HTTP::Post.new(uri) - request["anthropic-version"] = "2023-06-01" - request["content-type"] = "application/json" - request["x-api-key"] = api_key - request.body = JSON.generate( - model: model, - max_tokens: 6000, - system: "You are a careful EvalOps PR reviewer. Return valid JSON only.", - messages: [ - { - role: "user", - content: prompt - } - ] - ) - - response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http| - http.request(request) - end - unless response.is_a?(Net::HTTPSuccess) - raise "Anthropic API failed with HTTP #{response.code}: #{response.body}" - end - - body = JSON.parse(response.body) - body.fetch("content").map { |part| part["text"] }.compact.join("\n") - end - - def call_openai(prompt:, model:, api_key:) - raise "OPENAI_API_KEY is required for OpenAI PR lens review" if api_key.to_s.empty? - - uri = URI("https://api.openai.com/v1/responses") - request = Net::HTTP::Post.new(uri) - request["authorization"] = "Bearer #{api_key}" - request["content-type"] = "application/json" - request.body = JSON.generate( - model: model, - input: [ - { - role: "system", - content: "You are a careful EvalOps PR reviewer. Return valid JSON only." - }, - { - role: "user", - content: prompt - } - ] - ) - - response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http| - http.request(request) - end - unless response.is_a?(Net::HTTPSuccess) - raise "OpenAI API failed with HTTP #{response.code}: #{response.body}" - end - - body = JSON.parse(response.body) - return body["output_text"] if body["output_text"].to_s.strip.length.positive? - - Array(body["output"]).flat_map do |item| - Array(item["content"]).map { |part| part["text"] } - end.compact.join("\n") - end - - def call_llm(prompt:, provider:, model:) - case provider - when "anthropic" - call_anthropic( - prompt: prompt, - model: model, - api_key: ENV["ANTHROPIC_API_KEY"] || ENV["EVALOPS_ANTHROPIC_API_KEY"] - ) - when "openai" - call_openai( - prompt: prompt, - model: model, - api_key: ENV["OPENAI_API_KEY"] || ENV["EVALOPS_OPENAI_API_KEY"] - ) - else - raise "unsupported PR lens provider #{provider.inspect}; expected anthropic or openai" - end - end - - def coerce_number(value, default:, min:, max:) - number = Float(value) - [[number, min].max, max].min - rescue ArgumentError, TypeError - default - end - - def normalize_finding(finding) - location = finding.fetch("code_location", {}) - path = location["path"] || location["absolute_file_path"] || location["file"] || "unknown" - line = location["line"] || location.dig("line_range", "start") || 1 - - { - "title" => finding.fetch("title").to_s.strip[0, 80], - "body" => finding.fetch("body").to_s.strip, - "confidence_score" => coerce_number( - finding.fetch("confidence_score", 0.0), - default: 0.0, - min: 0.0, - max: 1.0 - ), - "priority" => Integer(finding.fetch("priority", 2)).clamp(0, 3), - "code_location" => { - "path" => path.to_s.sub(%r{\A\./}, ""), - "line" => Integer(line) - } - } - rescue ArgumentError, KeyError, TypeError - nil - end - - def normalize_lens_review(raw_review, repo:, pr:, lens:, head_sha:) - findings = Array(raw_review["findings"]).map { |finding| normalize_finding(finding) }.compact - top_confidence = findings.map { |finding| finding.fetch("confidence_score") }.max || 0.0 - confidence = coerce_number( - raw_review.fetch("confidence_score", top_confidence), - default: top_confidence, - min: 0.0, - max: 1.0 - ) - - { - "schema_version" => 1, - "repo" => repo, - "pr" => Integer(pr), - "lens" => lens, - "check_id" => check_context(lens), - "head_sha" => head_sha, - "generated_at" => Time.now.utc.iso8601, - "summary" => raw_review.fetch("summary", "").to_s.strip, - "confidence_score" => confidence, - "findings" => findings - } - end - - def run_lens(repo:, pr:, lens:, workspace:, base_sha:, head_sha:, output:, provider:, model:, max_diff_bytes:, routing_config: nil) - effective = effective_review_options( - lens: lens, - provider: provider, - model: model, - max_diff_bytes: max_diff_bytes, - routing_config: routing_config - ) - pr_json = pr_metadata(repo: repo, pr: pr) - file_summary = pr_file_summary(repo: repo, pr: pr) - review_context = pr_review_context(repo: repo, pr: pr, pr_json: pr_json, head_sha: head_sha) - changed_files_text = changed_files(workspace: workspace, base_sha: base_sha, head_sha: head_sha) - diff_text, diff_truncated = git_diff( - workspace: workspace, - base_sha: base_sha, - head_sha: head_sha, - max_bytes: effective.fetch(:max_diff_bytes) - ) - prompt = build_lens_prompt( - repo: repo, - pr: pr, - lens: lens, - pr_json: pr_json, - file_summary: file_summary, - review_context: review_context, - changed_files_text: changed_files_text, - diff_text: diff_text, - diff_truncated: diff_truncated - ) - raw_response = call_llm( - prompt: prompt, - provider: effective.fetch(:provider), - model: effective.fetch(:model) - ) - normalized = normalize_lens_review( - extract_json(raw_response), - repo: repo, - pr: pr, - lens: lens, - head_sha: head_sha - ) - File.write(output, JSON.pretty_generate(normalized)) - normalized - end - - def lens_status_description(review) - if review.fetch("status", "") == "skipped" - reason = review.fetch("skip_reason", "not applicable") - return "Skipped: #{reason}" - end - - findings = review.fetch("findings", []) - confidence = findings.map { |finding| finding.fetch("confidence_score") }.max || 0.0 - "#{findings.length} finding#{findings.length == 1 ? "" : "s"}; top confidence #{format("%.2f", confidence)}" - end - - def read_lens_reviews(root) - Dir.glob(File.join(root, "**", "lens-review.json")).sort.map do |path| - JSON.parse(File.read(path)).merge("_artifact_path" => path) - end - end - - def read_expected_reviews(root) - Dir.glob(File.join(root, "**", "pr-lens-targets.json")).sort.flat_map do |path| - JSON.parse(File.read(path)).flat_map do |pr| - Array(pr.fetch("lenses", [])).map do |lens| - { - "repo" => pr.fetch("repo"), - "pr" => Integer(pr.fetch("number")), - "lens" => lens, - "head_sha" => pr.fetch("head_sha"), - "check_id" => check_context(lens), - "_artifact_path" => path - } - end - end - end - end - - def high_confidence_findings(reviews, min_confidence:) - reviews.flat_map do |review| - review.fetch("findings", []).map do |finding| - finding.merge( - "repo" => review.fetch("repo"), - "pr" => review.fetch("pr"), - "lens" => review.fetch("lens"), - "head_sha" => review.fetch("head_sha"), - "check_id" => review.fetch("check_id") - ) - end - end.select do |finding| - finding.fetch("confidence_score") >= min_confidence - end - end - - def fingerprint_tokens(text) - text.to_s.downcase.scan(/[a-z0-9][a-z0-9_-]{2,}/).map do |token| - normalized = token.sub(/ies\z/, "y").sub(/s\z/, "") - normalized = "repository" if %w[repo repository].include?(normalized) - normalized = "write" if %w[write writes writable].include?(normalized) - normalized - end.reject do |token| - COMMON_FINGERPRINT_TOKENS.include?(token) - end.uniq - end - - def token_similarity(left, right) - left_tokens = fingerprint_tokens(left) - right_tokens = fingerprint_tokens(right) - return 0.0 if left_tokens.empty? || right_tokens.empty? - - intersection = (left_tokens & right_tokens).length - union = (left_tokens | right_tokens).length - intersection.to_f / union - end - - def duplicate_finding?(left, right) - return false unless left.fetch("repo") == right.fetch("repo") - return false unless Integer(left.fetch("pr")) == Integer(right.fetch("pr")) - return false unless left.fetch("head_sha") == right.fetch("head_sha") - - left_location = left.fetch("code_location") - right_location = right.fetch("code_location") - same_path = left_location.fetch("path") == right_location.fetch("path") - return false unless same_path - - line_distance = (Integer(left_location.fetch("line")) - Integer(right_location.fetch("line"))).abs - title_similarity = token_similarity(left.fetch("title"), right.fetch("title")) - body_similarity = token_similarity(left.fetch("body"), right.fetch("body")) - - title_similarity >= 0.74 || body_similarity >= 0.82 || (line_distance <= 5 && title_similarity >= 0.5) - end - - def better_finding(left, right) - [left, right].max_by do |finding| - [ - finding.fetch("confidence_score"), - -finding.fetch("priority"), - finding.fetch("body").to_s.length - ] - end - end - - def dedupe_and_rank(findings) - deduped = [] - findings.each do |finding| - existing_index = deduped.index { |candidate| duplicate_finding?(candidate, finding) } - if existing_index - deduped[existing_index] = better_finding(deduped.fetch(existing_index), finding) - else - deduped << finding - end - end - - deduped.sort_by do |finding| - [ - -finding.fetch("confidence_score"), - finding.fetch("priority"), - finding.fetch("lens") - ] - end - end - - def grouped_by_pr(findings) - findings.group_by { |finding| [finding.fetch("repo"), finding.fetch("pr"), finding.fetch("head_sha")] } - end - - def run_url - return ENV["RUN_URL"] if ENV["RUN_URL"] && !ENV["RUN_URL"].empty? - - server = ENV.fetch("GITHUB_SERVER_URL", "https://github.com") - repo = ENV["GITHUB_REPOSITORY"] - run_id = ENV["GITHUB_RUN_ID"] - return nil if repo.to_s.empty? || run_id.to_s.empty? - - "#{server}/#{repo}/actions/runs/#{run_id}" - end - - def comment_body(repo:, pr:, findings:, min_confidence:, target_url:) - lines = [ - MARKER, - "**EvalOps PR lens review**", - "", - "High-confidence findings only. Threshold: #{format("%.2f", min_confidence)}.", - "Run: #{target_url || "unavailable"}", - "" - ] - - findings.first(MAX_FINDINGS_PER_COMMENT).each_with_index do |finding, index| - location = finding.fetch("code_location") - lines << "#{index + 1}. **P#{finding.fetch("priority")} #{format("%.2f", finding.fetch("confidence_score"))} #{finding.fetch("lens")}**: #{finding.fetch("title")}" - lines << " - Location: `#{location.fetch("path")}:#{location.fetch("line")}`" - lines << " - Check: `#{finding.fetch("check_id")}`" - lines << " - #{finding.fetch("body")}" - lines << "" - end - - if findings.length > MAX_FINDINGS_PER_COMMENT - lines << "_#{findings.length - MAX_FINDINGS_PER_COMMENT} additional high-confidence finding(s) were omitted from the comment; inspect the workflow artifact for the full ledger._" - lines << "" - end - - lines << "_Repo: #{repo} PR: ##{pr}_" - lines.join("\n") - end - - def marker_comment_ids(repo:, pr:) - raw = gh_api( - "--paginate", - "repos/#{repo}/issues/#{pr}/comments", - "--jq", - ".[] | select(.body | contains(\"#{MARKER}\")) | .id" - ) - raw.lines.map(&:strip).reject(&:empty?) - end - - def upsert_comment(repo:, pr:, body:) - ids = marker_comment_ids(repo: repo, pr: pr) - if ids.empty? - gh_api( - "--method", "POST", "repos/#{repo}/issues/#{pr}/comments", - input: JSON.generate({ body: body }) - ) - else - first, *stale = ids - gh_api( - "--method", "PATCH", "repos/#{repo}/issues/comments/#{first}", - input: JSON.generate({ body: body }) - ) - stale.each { |id| gh_api("--method", "DELETE", "repos/#{repo}/issues/comments/#{id}") } - end - end - - def delete_marker_comments(repo:, pr:) - marker_comment_ids(repo: repo, pr: pr).each do |id| - gh_api("--method", "DELETE", "repos/#{repo}/issues/comments/#{id}") - end - end - - def meta_state(findings, coverage_incomplete: false) - return "error" if coverage_incomplete - - findings.any? { |finding| finding.fetch("priority") <= 1 } ? "failure" : "success" - end - - def meta_description(findings, missing_count: 0, skipped_count: 0) - if missing_count.positive? - "PR lens coverage incomplete: #{missing_count} missing" - elsif findings.empty? && skipped_count.positive? - "No findings; #{skipped_count} lens review#{skipped_count == 1 ? "" : "s"} skipped" - elsif findings.empty? - "No high-confidence PR lens findings" - else - "#{findings.length} high-confidence finding#{findings.length == 1 ? "" : "s"}" - end - end - - def meta_review(artifact_root:, min_confidence:, output:) - reviews = read_lens_reviews(artifact_root) - expected_reviews = read_expected_reviews(artifact_root) - reviews_by_key = reviews.each_with_object({}) do |review, hash| - hash[[review.fetch("repo"), Integer(review.fetch("pr")), review.fetch("lens"), review.fetch("head_sha")]] = review - end - ranked = dedupe_and_rank(high_confidence_findings(reviews, min_confidence: min_confidence)) - grouped = grouped_by_pr(ranked) - target_url = run_url - coverage_by_pr = {} - - expected_reviews.group_by { |review| [review.fetch("repo"), review.fetch("pr"), review.fetch("head_sha")] }.each do |key, rows| - coverage_by_pr[key] = { - "expected" => rows.length, - "missing" => rows.count do |row| - !reviews_by_key.key?([row.fetch("repo"), row.fetch("pr"), row.fetch("lens"), row.fetch("head_sha")]) - end, - "skipped" => rows.count do |row| - review = reviews_by_key[[row.fetch("repo"), row.fetch("pr"), row.fetch("lens"), row.fetch("head_sha")]] - review && review.fetch("status", "") == "skipped" - end, - "lenses" => rows.map { |row| row.fetch("lens") }.sort - } - end - - review_keys = reviews.group_by { |review| [review.fetch("repo"), review.fetch("pr"), review.fetch("head_sha")] }.keys - (coverage_by_pr.keys + review_keys).uniq.each do |repo, pr, head_sha| - findings = grouped.fetch([repo, pr, head_sha], []) - coverage = coverage_by_pr.fetch( - [repo, pr, head_sha], - { - "expected" => reviews.count { |review| review.fetch("repo") == repo && review.fetch("pr") == pr && review.fetch("head_sha") == head_sha }, - "missing" => 0, - "skipped" => 0, - "lenses" => reviews.select { |review| review.fetch("repo") == repo && review.fetch("pr") == pr && review.fetch("head_sha") == head_sha }.map { |review| review.fetch("lens") }.sort - } - ) - if findings.empty? - delete_marker_comments(repo: repo, pr: pr) - else - upsert_comment( - repo: repo, - pr: pr, - body: comment_body(repo: repo, pr: pr, findings: findings, min_confidence: min_confidence, target_url: target_url) - ) - end - post_status( - repo: repo, - sha: head_sha, - context: meta_context, - state: meta_state(findings, coverage_incomplete: coverage.fetch("missing").positive?), - description: meta_description( - findings, - missing_count: coverage.fetch("missing"), - skipped_count: coverage.fetch("skipped") - ), - target_url: target_url - ) - end - - result = { - "schema_version" => 1, - "generated_at" => Time.now.utc.iso8601, - "min_confidence" => min_confidence, - "reviews" => reviews.length, - "expected_reviews" => expected_reviews.length, - "coverage" => coverage_by_pr.map do |(repo, pr, head_sha), coverage| - coverage.merge("repo" => repo, "pr" => pr, "head_sha" => head_sha) - end, - "published_findings" => ranked, - "run_url" => target_url - } - File.write(output, JSON.pretty_generate(result)) - result - end - - def markdown_meta_report(result) - lines = [ - "## EvalOps PR Lens Review", - "", - "- Reviews: #{result.fetch("reviews")}", - "- Expected reviews: #{result.fetch("expected_reviews")}", - "- Published findings: #{result.fetch("published_findings").length}", - "- Run: #{result.fetch("run_url") || "unavailable"}", - "", - "### Coverage" - ] - result.fetch("coverage", []).each do |row| - lines << "- #{row.fetch("repo")}##{row.fetch("pr")}: #{row.fetch("expected")} expected, #{row.fetch("missing")} missing, #{row.fetch("skipped")} skipped; lenses #{Array(row.fetch("lenses", [])).join(", ")}" - end - if result.fetch("published_findings", []).empty? - lines << "" - lines << "No high-confidence findings cleared the publication threshold." - else - lines << "" - lines << "### Findings" - result.fetch("published_findings").first(MAX_FINDINGS_PER_COMMENT).each do |finding| - location = finding.fetch("code_location") - lines << "- P#{finding.fetch("priority")} #{format("%.2f", finding.fetch("confidence_score"))} #{finding.fetch("repo")}##{finding.fetch("pr")} #{location.fetch("path")}:#{location.fetch("line")} #{finding.fetch("title")}" - end - end - lines.join("\n") - end -end - -if $PROGRAM_NAME == __FILE__ - command = ARGV.shift - - case command - when "discover" - options = { - repos: EvalOpsPrLensReview::TARGET_REPOS, - lenses: EvalOpsPrLensReview::LENSES.keys - } - OptionParser.new do |parser| - parser.on("--repos CSV") { |value| options[:repos] = EvalOpsPrLensReview.parse_list(value) } - parser.on("--target-prs CSV") { |value| options[:target_prs] = value } - parser.on("--force-lenses CSV") { |value| options[:force_lenses] = EvalOpsPrLensReview.parse_list(value) } - parser.on("--github-output PATH") { |value| options[:github_output] = value } - parser.on("--matrix-output PATH") { |value| options[:matrix_output] = value } - parser.on("--targets-output PATH") { |value| options[:targets_output] = value } - end.parse! - - pr_filter = EvalOpsPrLensReview.parse_pr_filter(options[:target_prs], repos: options[:repos]) if options[:target_prs] - prs = EvalOpsPrLensReview.discover_open_prs( - repos: options[:repos], - pr_filter: pr_filter, - force_lenses: options[:force_lenses] - ) - matrix = EvalOpsPrLensReview.matrix_for(prs, lenses: options[:lenses]) - matrix_json = JSON.generate({ include: matrix }) - - File.write(options[:matrix_output], JSON.pretty_generate({ include: matrix })) if options[:matrix_output] - File.write(options[:targets_output], JSON.pretty_generate(prs)) if options[:targets_output] - EvalOpsPrLensReview.write_github_outputs( - options[:github_output], - "matrix" => matrix_json, - "has_work" => (!matrix.empty?).to_s, - "pr_count" => prs.length.to_s - ) - puts "Discovered #{prs.length} open PR(s), #{matrix.length} lens job(s)." - when "post-status" - options = {} - OptionParser.new do |parser| - parser.on("--repo OWNER/REPO") { |value| options[:repo] = value } - parser.on("--sha SHA") { |value| options[:sha] = value } - parser.on("--context CONTEXT") { |value| options[:context] = value } - parser.on("--state STATE") { |value| options[:state] = value } - parser.on("--description TEXT") { |value| options[:description] = value } - parser.on("--target-url URL") { |value| options[:target_url] = value } - end.parse! - EvalOpsPrLensReview.post_status(**options) - when "prepare-workspace" - options = { - token: ENV["REVIEW_TOKEN"] || ENV["GH_TOKEN"], - github_output: ENV["GITHUB_OUTPUT"], - snapshot_head_sha: "", - snapshot_base_sha: "" - } - OptionParser.new do |parser| - parser.on("--repo OWNER/REPO") { |value| options[:repo] = value } - parser.on("--pr NUMBER", Integer) { |value| options[:pr] = value } - parser.on("--lens LENS") { |value| options[:lens] = value } - parser.on("--workspace PATH") { |value| options[:workspace] = value } - parser.on("--output PATH") { |value| options[:output] = value } - parser.on("--github-output PATH") { |value| options[:github_output] = value } - parser.on("--snapshot-head-sha SHA") { |value| options[:snapshot_head_sha] = value } - parser.on("--snapshot-base-sha SHA") { |value| options[:snapshot_base_sha] = value } - parser.on("--token TOKEN") { |value| options[:token] = value } - end.parse! - required = %i[repo pr lens workspace output] - missing = required.select { |key| options[key].nil? || options[key].to_s.empty? } - raise OptionParser::MissingArgument, missing.join(", ") unless missing.empty? - - result = EvalOpsPrLensReview.prepare_workspace(**options) - puts(result.fetch("skip") ? "Skipped #{options.fetch(:lens)}: #{result.fetch("reason")}" : "Prepared #{options.fetch(:repo)}##{options.fetch(:pr)}") - when "run-lens" - options = { - provider: ENV.fetch("PR_LENS_PROVIDER", EvalOpsPrLensReview::DEFAULT_PROVIDER), - model: ENV.fetch("PR_LENS_MODEL", EvalOpsPrLensReview::DEFAULT_MODEL), - max_diff_bytes: Integer(ENV.fetch("PR_LENS_MAX_DIFF_BYTES", EvalOpsPrLensReview::DEFAULT_MAX_DIFF_BYTES)), - routing_config: ENV.fetch("PR_LENS_ROUTING_CONFIG", nil) - } - OptionParser.new do |parser| - parser.on("--repo OWNER/REPO") { |value| options[:repo] = value } - parser.on("--pr NUMBER", Integer) { |value| options[:pr] = value } - parser.on("--lens LENS") { |value| options[:lens] = value } - parser.on("--workspace PATH") { |value| options[:workspace] = value } - parser.on("--base-sha SHA") { |value| options[:base_sha] = value } - parser.on("--head-sha SHA") { |value| options[:head_sha] = value } - parser.on("--output PATH") { |value| options[:output] = value } - parser.on("--provider PROVIDER") { |value| options[:provider] = value } - parser.on("--model MODEL") { |value| options[:model] = value } - parser.on("--max-diff-bytes BYTES", Integer) { |value| options[:max_diff_bytes] = value } - parser.on("--routing-config PATH") { |value| options[:routing_config] = value } - end.parse! - required = %i[repo pr lens workspace base_sha head_sha output] - missing = required.select { |key| options[key].nil? || options[key].to_s.empty? } - raise OptionParser::MissingArgument, missing.join(", ") unless missing.empty? - - review = EvalOpsPrLensReview.run_lens(**options) - puts EvalOpsPrLensReview.lens_status_description(review) - when "lens-status-description" - options = {} - OptionParser.new do |parser| - parser.on("--review-json PATH") { |value| options[:review_json] = value } - end.parse! - review = JSON.parse(File.read(options.fetch(:review_json))) - puts EvalOpsPrLensReview.lens_status_description(review) - when "meta-review" - options = { - min_confidence: Float(ENV.fetch("PR_LENS_MIN_CONFIDENCE", EvalOpsPrLensReview::DEFAULT_MIN_CONFIDENCE)), - output: "meta-review.json" - } - OptionParser.new do |parser| - parser.on("--artifact-root PATH") { |value| options[:artifact_root] = value } - parser.on("--min-confidence NUMBER", Float) { |value| options[:min_confidence] = value } - parser.on("--output PATH") { |value| options[:output] = value } - parser.on("--markdown-output PATH") { |value| options[:markdown_output] = value } - end.parse! - raise OptionParser::MissingArgument, "artifact-root" if options[:artifact_root].to_s.empty? - - markdown_output = options.delete(:markdown_output) - result = EvalOpsPrLensReview.meta_review(**options) - File.write(markdown_output, EvalOpsPrLensReview.markdown_meta_report(result)) if markdown_output - puts "Published #{result.fetch("published_findings").length} high-confidence finding(s)." - when "dispatch-review-requests" - options = { - owner: "evalops", - reviewer: "EvalOpsBot", - limit: 100, - dry_run: false, - target_url: EvalOpsPrLensReview.run_url - } - OptionParser.new do |parser| - parser.on("--owner OWNER") { |value| options[:owner] = value } - parser.on("--reviewer LOGIN") { |value| options[:reviewer] = value } - parser.on("--limit NUMBER", Integer) { |value| options[:limit] = value } - parser.on("--dry-run") { options[:dry_run] = true } - parser.on("--target-url URL") { |value| options[:target_url] = value } - parser.on("--output PATH") { |value| options[:output] = value } - parser.on("--github-output PATH") { |value| options[:github_output] = value } - end.parse! - - result = EvalOpsPrLensReview.dispatch_requested_reviews(**options.slice(:owner, :reviewer, :limit, :dry_run, :target_url, :output)) - EvalOpsPrLensReview.write_github_outputs( - options[:github_output], - "candidate_count" => result.fetch("candidate_count"), - "dispatched_count" => result.fetch("dispatched_count"), - "skipped_count" => result.fetch("skipped_count") - ) - puts "Found #{result.fetch("candidate_count")} EvalOpsBot review request(s); dispatched #{result.fetch("dispatched_count")}, skipped #{result.fetch("skipped_count")}." - when "mint-app-token" - options = { - app_id: ENV["EVALOPS_PR_LENS_APP_ID"] || ENV["GITHUB_APP_ID"], - private_key: ENV["EVALOPS_PR_LENS_APP_PRIVATE_KEY"] || ENV["GITHUB_APP_PRIVATE_KEY"], - installation_id: ENV["EVALOPS_PR_LENS_APP_INSTALLATION_ID"] || ENV["GITHUB_APP_INSTALLATION_ID"], - owner: "evalops", - repositories: [], - permissions: EvalOpsPrLensReview.default_app_token_permissions - } - OptionParser.new do |parser| - parser.on("--app-id ID") { |value| options[:app_id] = value } - parser.on("--private-key-file PATH") { |value| options[:private_key] = File.read(value) } - parser.on("--installation-id ID") { |value| options[:installation_id] = value } - parser.on("--owner OWNER") { |value| options[:owner] = value } - parser.on("--repositories CSV") { |value| options[:repositories] = EvalOpsPrLensReview.parse_list(value) } - parser.on("--permission KEY=VALUE") do |value| - key, permission = value.split("=", 2) - options[:permissions][key] = permission - end - end.parse! - raise OptionParser::MissingArgument, "app-id" if options[:app_id].to_s.empty? - raise OptionParser::MissingArgument, "private-key" if options[:private_key].to_s.empty? - - puts EvalOpsPrLensReview.create_app_installation_token(**options) - else - warn "usage: #{$PROGRAM_NAME} discover|post-status|prepare-workspace|run-lens|lens-status-description|meta-review|dispatch-review-requests|mint-app-token" - exit 2 - end -end diff --git a/.github/scripts/evalopsbot-webhook-relay.rb b/.github/scripts/evalopsbot-webhook-relay.rb deleted file mode 100644 index e4ebcad..0000000 --- a/.github/scripts/evalopsbot-webhook-relay.rb +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "json" -require "net/http" -require "openssl" -require "optparse" -require "time" -require "uri" - -module EvalOpsBotWebhookRelay - DISPATCH_EVENT = "evalopsbot-review-requested" - DISPATCH_SOURCE = "evalopsbot-webhook-relay" - DEFAULT_TARGET_REPO = "evalops/.github" - - module_function - - def secure_compare(left, right) - return false unless left.bytesize == right.bytesize - - left.bytes.zip(right.bytes).reduce(0) { |memo, pair| memo | (pair.fetch(0) ^ pair.fetch(1)) }.zero? - end - - def verify_signature!(body:, signature:, secret:) - return true if secret.to_s.empty? - - expected = "sha256=#{OpenSSL::HMAC.hexdigest("SHA256", secret, body)}" - unless signature.to_s.start_with?("sha256=") && secure_compare(signature.to_s, expected) - raise "invalid GitHub webhook signature" - end - - true - end - - def dispatch_payload(event_name:, body:, reviewer:, delivery: nil) - return skip("unsupported event #{event_name}") unless event_name == "pull_request" - - payload = JSON.parse(body) - return skip("unsupported action #{payload["action"]}") unless payload["action"] == "review_requested" - - requested_reviewer = payload.dig("requested_reviewer", "login").to_s - return skip("review requested for #{requested_reviewer}") unless requested_reviewer == reviewer - - repo = payload.dig("repository", "full_name").to_s - return skip("repository is not in evalops org") unless repo.start_with?("evalops/") - - pr = payload.dig("pull_request", "number") - return skip("missing pull request number") if pr.nil? - - { - "event_type" => DISPATCH_EVENT, - "client_payload" => { - "target_repo" => repo, - "target_pr" => "#{repo}##{Integer(pr)}", - "requested_reviewer" => requested_reviewer, - "source" => DISPATCH_SOURCE, - "delivery" => delivery.to_s - }.reject { |_key, value| value.to_s.empty? } - } - rescue JSON::ParserError - skip("invalid JSON payload") - end - - def skip(reason) - { - "skipped" => true, - "reason" => reason - } - end - - def dispatch_to_github(payload:, token:, target_repo: DEFAULT_TARGET_REPO) - raise "GITHUB_TOKEN is required" if token.to_s.empty? - - uri = URI("https://api.github.com/repos/#{target_repo}/dispatches") - request = Net::HTTP::Post.new(uri) - request["accept"] = "application/vnd.github+json" - request["authorization"] = "Bearer #{token}" - request["content-type"] = "application/json" - request["user-agent"] = "evalopsbot-webhook-relay" - request.body = JSON.generate(payload) - - response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http| - http.request(request) - end - unless response.code.to_i.between?(200, 299) - raise "repository dispatch failed with HTTP #{response.code}: #{response.body}" - end - - { - "dispatched" => true, - "target_repo" => target_repo, - "event_type" => payload.fetch("event_type"), - "generated_at" => Time.now.utc.iso8601 - } - end -end - -if $PROGRAM_NAME == __FILE__ - options = { - event_name: ENV["GITHUB_WEBHOOK_EVENT"], - delivery: ENV["GITHUB_WEBHOOK_DELIVERY"], - signature: ENV["GITHUB_WEBHOOK_SIGNATURE_256"], - secret: ENV["GITHUB_WEBHOOK_SECRET"], - reviewer: "EvalOpsBot", - token: ENV["GITHUB_TOKEN"], - target_repo: EvalOpsBotWebhookRelay::DEFAULT_TARGET_REPO, - dry_run: false - } - OptionParser.new do |parser| - parser.on("--event EVENT") { |value| options[:event_name] = value } - parser.on("--delivery ID") { |value| options[:delivery] = value } - parser.on("--signature SIGNATURE") { |value| options[:signature] = value } - parser.on("--secret SECRET") { |value| options[:secret] = value } - parser.on("--reviewer LOGIN") { |value| options[:reviewer] = value } - parser.on("--token TOKEN") { |value| options[:token] = value } - parser.on("--target-repo OWNER/REPO") { |value| options[:target_repo] = value } - parser.on("--input PATH") { |value| options[:input] = value } - parser.on("--output PATH") { |value| options[:output] = value } - parser.on("--dry-run") { options[:dry_run] = true } - end.parse! - - body = options[:input] ? File.read(options[:input]) : STDIN.read - EvalOpsBotWebhookRelay.verify_signature!( - body: body, - signature: options[:signature], - secret: options[:secret] - ) - payload = EvalOpsBotWebhookRelay.dispatch_payload( - event_name: options[:event_name], - body: body, - reviewer: options[:reviewer], - delivery: options[:delivery] - ) - result = if payload["skipped"] - payload - elsif options[:dry_run] - { "would_dispatch" => true, "payload" => payload } - else - EvalOpsBotWebhookRelay.dispatch_to_github( - payload: payload, - token: options[:token], - target_repo: options[:target_repo] - ) - end - File.write(options[:output], JSON.pretty_generate(result)) if options[:output] - puts JSON.pretty_generate(result) -end diff --git a/.github/scripts/publish-codex-structured-review.rb b/.github/scripts/publish-codex-structured-review.rb deleted file mode 100644 index 63b0b82..0000000 --- a/.github/scripts/publish-codex-structured-review.rb +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "json" -require "optparse" -require "open3" -require "tempfile" - -module CodexStructuredReview - MARKER = "" - - module_function - - def normalize_path(path, workspace: ENV.fetch("GITHUB_WORKSPACE", Dir.pwd)) - raw = path.to_s.strip - root = File.expand_path(workspace.to_s) - expanded = File.expand_path(raw) - - if expanded.start_with?("#{root}/") - expanded.delete_prefix("#{root}/") - else - raw.sub(%r{\A\./}, "") - end - end - - def finding_payload(finding, commit:) - location = finding.fetch("code_location") - line_range = location.fetch("line_range") - start_line = Integer(line_range.fetch("start")) - end_line = Integer(line_range.fetch("end")) - start_line, end_line = [end_line, start_line] if start_line > end_line - - body = +"#{finding.fetch("title")}\n\n#{finding.fetch("body")}" - body << "\n\nPriority: P#{finding.fetch("priority")}" - body << "\nConfidence: #{finding.fetch("confidence_score")}" - - payload = { - body: body, - commit_id: commit, - path: normalize_path(location.fetch("absolute_file_path")), - line: end_line, - side: "RIGHT" - } - if start_line != end_line - payload[:start_line] = start_line - payload[:start_side] = "RIGHT" - end - payload - end - - def summary_body(review) - findings = review.fetch("findings", []) - [ - MARKER, - "**Codex structured review**", - "", - "Verdict: #{review.fetch("overall_correctness")}", - "Confidence: #{review.fetch("overall_confidence_score")}", - "Findings: #{findings.length}", - "", - review.fetch("overall_explanation") - ].join("\n") - end - - def gh_api(*args, input: nil) - command = ["gh", "api", *args] - if input - Tempfile.create(["codex-review", ".json"]) do |file| - file.write(input) - file.flush - command += ["--input", file.path] - stdout, stderr, status = Open3.capture3(*command) - return [stdout, stderr, status] - end - end - - Open3.capture3(*command) - end - - def post_line_comment(repo:, pr:, payload:, dry_run: false) - return ["DRY_RUN #{JSON.generate(payload)}", "", true] if dry_run - - stdout, stderr, status = gh_api( - "--method", - "POST", - "repos/#{repo}/pulls/#{pr}/comments", - input: JSON.generate(payload) - ) - [stdout, stderr, status.success?] - end - - def upsert_summary(repo:, pr:, body:, dry_run: false) - return ["DRY_RUN #{body}", "", true] if dry_run - - stdout, stderr, status = gh_api( - "--paginate", - "repos/#{repo}/issues/#{pr}/comments", - "--jq", - ".[] | select(.body | contains(\"#{MARKER}\")) | .id" - ) - return [stdout, stderr, false] unless status.success? - - existing_id = stdout.lines.first&.strip - if existing_id && !existing_id.empty? - gh_api( - "--method", - "PATCH", - "repos/#{repo}/issues/comments/#{existing_id}", - input: JSON.generate({ body: body }) - ).then { |out, err, patch_status| [out, err, patch_status.success?] } - else - gh_api( - "--method", - "POST", - "repos/#{repo}/issues/#{pr}/comments", - input: JSON.generate({ body: body }) - ).then { |out, err, create_status| [out, err, create_status.success?] } - end - end -end - -if $PROGRAM_NAME == __FILE__ - options = { - dry_run: false - } - - OptionParser.new do |parser| - parser.on("--review-json PATH", "Codex structured review JSON") { |value| options[:review_json] = value } - parser.on("--repo OWNER/REPO", "GitHub repository") { |value| options[:repo] = value } - parser.on("--pr NUMBER", Integer, "Pull request number") { |value| options[:pr] = value } - parser.on("--commit SHA", "Head commit SHA") { |value| options[:commit] = value } - parser.on("--dry-run", "Print payloads without calling GitHub") { options[:dry_run] = true } - end.parse! - - missing = %i[review_json repo pr commit].select { |key| options[key].nil? || options[key].to_s.empty? } - unless missing.empty? - warn "missing required options: #{missing.join(", ")}" - exit 2 - end - - review = JSON.parse(File.read(options.fetch(:review_json))) - failures = [] - - review.fetch("findings", []).each do |finding| - payload = CodexStructuredReview.finding_payload(finding, commit: options.fetch(:commit)) - stdout, stderr, ok = CodexStructuredReview.post_line_comment( - repo: options.fetch(:repo), - pr: options.fetch(:pr), - payload: payload, - dry_run: options.fetch(:dry_run) - ) - puts stdout unless stdout.empty? - next if ok - - failures << "#{payload[:path]}:#{payload[:line]} #{stderr.strip}" - warn "failed to post Codex finding for #{payload[:path]}:#{payload[:line]}: #{stderr.strip}" - end - - stdout, stderr, ok = CodexStructuredReview.upsert_summary( - repo: options.fetch(:repo), - pr: options.fetch(:pr), - body: CodexStructuredReview.summary_body(review), - dry_run: options.fetch(:dry_run) - ) - puts stdout unless stdout.empty? - unless ok - warn "failed to upsert Codex summary: #{stderr.strip}" - failures << "summary #{stderr.strip}" - end - - if failures.empty? - puts "Published Codex structured review." - else - warn "Codex structured review completed with #{failures.length} publishing failure(s)." - exit 1 - end -end diff --git a/.github/scripts/sweep-recent-review-feedback.rb b/.github/scripts/sweep-recent-review-feedback.rb deleted file mode 100644 index fcf4408..0000000 --- a/.github/scripts/sweep-recent-review-feedback.rb +++ /dev/null @@ -1,1027 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "json" -require "digest" -require "open3" -require "optparse" -require "set" -require "tempfile" -require "time" -require_relative "check-pr-review-threads" - -module EvalOpsReviewFeedbackSweep - module_function - - DEFAULT_TITLE = "[codex] Recent unresolved review feedback" - DEFAULT_WEEKLY_REPORT_TITLE = "[codex] Weekly review feedback guardrail report" - GUARDRAIL_ISSUE_TITLE_PREFIX = "[codex] Guardrail backlog:" - REPO_GUARDRAIL_ISSUE_TITLE_PREFIX = "[codex] Guardrail candidate:" - LEDGER_SCHEMA_VERSION = "evalops.review_feedback_ledger.v1" - GUARDRAIL_BACKLOG_SCHEMA_VERSION = "evalops.review_feedback_guardrail_backlog.v1" - GUARDRAIL_LIFECYCLE_SCHEMA_VERSION = "evalops.review_feedback_guardrail_lifecycle.v1" - - GUARDRAIL_CLASSES = [ - { - "key" => "workflow-shell-footgun", - "title" => "Workflow shell footgun", - "patterns" => [/\.github\/workflows/, /\bactionlint\b/, /\bshell\b/, /\bbash\b/, /\bset -e\b/, /\bworkflow\b/], - "recommended_guardrail" => "Add or extend workflow lint/security checks so fragile shell and GitHub Actions mistakes fail before review." - }, - { - "key" => "parser-cli-contract", - "title" => "Parser and CLI contract drift", - "patterns" => [/\bparse\b/, /\bparser\b/, /\bcli\b/, /\bargv\b/, /\bflag\b/, /\bsubstring\b/, /\bcommand\b/], - "recommended_guardrail" => "Add parser-backed tests that fail when command text, flags, or structured inputs are accepted by substring matching instead of the real parser." - }, - { - "key" => "visual-capture-resilience", - "title" => "Visual capture resilience gap", - "patterns" => [/\bvisual\b/, /\bsampler\b/, /\bcapture\b/, /\bframe\b/, /\bperception\b/, /\bscreenshot\b/, /\bimage\b/, /\bocr\b/], - "recommended_guardrail" => "Add capture/perception regression coverage that preserves partial frame results and surfaces provider errors without dropping the whole capture." - }, - { - "key" => "generated-contract-drift", - "title" => "Generated contract drift", - "patterns" => [/\bproto\b/, /\bprotobuf\b/, /\bbuf\b/, /\bgenerated\b/, %r{\bgen/}, /\bsdk\b/, /\bopenapi\b/, /\bjsonschema\b/], - "recommended_guardrail" => "Add generated-output drift checks and fixture coverage around the touched API or schema surface." - }, - { - "key" => "release-train-drift", - "title" => "Release train drift", - "patterns" => [/\brelease\b/, /\bversion\b/, /\bpublish\b/, /\bpackage\b/, /\bchangelog\b/, /\bcutover\b/, /\bregistry\b/], - "recommended_guardrail" => "Add release metadata and changelog coverage checks tied to the package or deploy artifact that changed." - }, - { - "key" => "runtime-smoke-coverage", - "title" => "Runtime smoke coverage gap", - "patterns" => [/\bsmoke\b/, /\bruntime\b/, /\bagentruntime\b/, /\breplay\b/, /\breceipt\b/, /\boutbox\b/, /\breadiness\b/, /\bhealth\b/, /\bstaging\b/, /\bmetadata\b/, /\bcorrelation\b/, /\bevidence\b/], - "recommended_guardrail" => "Add a smoke or preflight fixture that proves the runtime-visible behavior and required evidence fields." - }, - { - "key" => "configuration-safety", - "title" => "Configuration safety", - "patterns" => [/\bkustomize\b/, /\bkubernetes\b/, /\bk8s\b/, /\bterraform\b/, /\bselector\b/, /\bnamespace\b/, /\bdesired-state\b/, /\byaml\b/], - "recommended_guardrail" => "Add desired-state validation that renders and checks configuration invariants before apply or merge." - }, - { - "key" => "security-authz", - "title" => "Security or authorization gap", - "patterns" => [/\bauth\b/, /\bauthoriz/, /\bpermission\b/, /\btoken\b/, /\bcredential\b/, /\bsecret\b/, /\bcsrf\b/, /\binjection\b/, /\bsecurity\b/], - "recommended_guardrail" => "Add an auth/security regression test or static rule covering the vulnerable boundary." - }, - { - "key" => "test-coverage-gap", - "title" => "Test coverage gap", - "patterns" => [/\btest\b/, /\bcoverage\b/, /\bfixture\b/, /\bassert\b/, /\bfuzz\b/, /\bregression\b/, /\bmissing case\b/], - "recommended_guardrail" => "Add focused regression or fuzz coverage for the exact missed case, then wire it into the smallest relevant CI target." - }, - { - "key" => "docs-runbook-drift", - "title" => "Docs or runbook drift", - "patterns" => [/\breadme\b/, /\bdocs?\b/, /\brunbook\b/, /\bguide\b/, /\bcomment\b/, /\bdescription\b/], - "recommended_guardrail" => "Add docs/runbook coverage or metadata checks only when the documented operator path changed." - } - ].freeze - - SEVERITY_SCORE = { - "p0" => 100, - "p1" => 80, - "high" => 50, - "medium" => 20, - "low" => 5, - "none" => 0 - }.freeze - - def gh(*args, input: nil) - command = ["gh", *args] - if input - Tempfile.create(["review-feedback-sweep", ".md"]) do |file| - file.write(input) - file.flush - stdout, stderr, status = Open3.capture3(*command, "--body-file", file.path) - return [stdout, stderr, status] - end - end - - Open3.capture3(*command) - end - - def search_recent_prs(owner:, since:, limit: 100) - stdout, stderr, status = gh( - "search", - "prs", - "--owner", - owner, - "--merged", - "--merged-at", - ">=#{since}", - "--limit", - limit.to_s, - "--json", - "repository,number,url,title,closedAt" - ) - raise "gh search prs failed: #{stderr.strip}" unless status.success? - - JSON.parse(stdout) - end - - def feedback_items(owner:, since:, min_severity:, pr_limit: 100, progress: false) - prs = search_recent_prs(owner: owner, since: since, limit: pr_limit) - warn "review feedback sweep: inspecting #{prs.length} merged PRs since #{since}" if progress - prs.each_with_index.flat_map do |pr, index| - repo = pr.fetch("repository").fetch("nameWithOwner") - warn "review feedback sweep: #{index + 1}/#{prs.length} #{repo}##{pr.fetch("number")}" if progress - payload = EvalOpsReviewThreadGuard.fetch_payload(repo: repo, pr: pr.fetch("number")) - EvalOpsReviewThreadGuard.blocking_feedback(payload, min_severity: min_severity).map do |item| - item.merge( - repo: repo, - pr_number: pr.fetch("number"), - pr_title: pr.fetch("title"), - pr_url: pr.fetch("url"), - merged_at: pr["closedAt"] - ) - end - end - end - - def report_markdown(items, owner:, since:, min_severity:) - lines = [ - "# Recent unresolved review feedback", - "", - "- Owner: `#{owner}`", - "- Merged since: `#{since}`", - "- Minimum severity: `#{min_severity}`", - "- Findings: `#{items.length}`", - "", - "" - ] - if items.empty? - lines << "" - lines << "No unresolved review feedback found." - return lines.join("\n") - end - - lines << "" - items.each do |item| - location = item[:path] ? "#{item[:path]}:#{item[:line] || "?"}" : item.fetch(:kind).to_s - lines << "- `#{item.fetch(:severity)}` #{item.fetch(:repo)}##{item.fetch(:pr_number)} #{location}" - lines << " - PR: #{item.fetch(:pr_url)}" - lines << " - Feedback: #{item[:url]}" - first_line = body_first_line(item.fetch(:body)) - lines << " - #{first_line}" unless first_line.empty? - end - lines.join("\n") - end - - def feedback_class(item) - case item.fetch(:kind) - when "review_thread" - "review_thread" - when "pr_comment" - "top_level_pr_comment" - when "pr_review" - "top_level_pr_review" - else - "unknown" - end - end - - def body_first_line(body) - line = body.to_s.lines.map(&:strip).find do |candidate| - next false if candidate.empty? - next false if candidate.match?(/\A###\s+.*Codex Review\b/i) - next false if candidate.match?(%r{\Ahttps://github\.com/}i) - next false if candidate.match?(/\A
/i) - - true - end.to_s - return line unless line.match?(/]+>}, "") - .gsub("**", "") - .squeeze(" ") - .strip - end - - def ledger_entry(item) - body = item.fetch(:body).to_s - { - "repo" => item.fetch(:repo), - "pr_number" => item.fetch(:pr_number), - "pr_title" => item.fetch(:pr_title), - "pr_url" => item.fetch(:pr_url), - "merged_at" => item["merged_at"] || item[:merged_at], - "kind" => item.fetch(:kind), - "feedback_class" => feedback_class(item), - "severity" => item.fetch(:severity), - "feedback_url" => item[:url], - "path" => item[:path], - "line" => item[:line], - "author" => item[:author], - "state" => item[:state], - "is_outdated" => item[:is_outdated], - "body_first_line" => body_first_line(body), - "body_sha256" => Digest::SHA256.hexdigest(body) - }.compact - end - - def ledger_json(items, owner:, since:, min_severity:, generated_at: Time.now.utc) - { - "schema_version" => LEDGER_SCHEMA_VERSION, - "generated_at" => generated_at.utc.iso8601, - "owner" => owner, - "merged_since" => since, - "min_severity" => min_severity, - "finding_count" => items.length, - "findings" => items.map { |item| ledger_entry(item) } - } - end - - def guardrail_class(finding) - haystack = [ - finding["repo"], - finding["pr_title"], - finding["path"], - finding["body_first_line"], - finding["feedback_class"], - finding["kind"] - ].compact.join("\n").downcase - - GUARDRAIL_CLASSES.find do |candidate| - candidate.fetch("patterns").any? { |pattern| haystack.match?(pattern) } - end || { - "key" => "other-feedback", - "title" => "Other feedback", - "recommended_guardrail" => "Review manually before converting this class into a repo-local guardrail." - } - end - - def severity_score(severity) - SEVERITY_SCORE.fetch(severity.to_s.downcase, 0) - end - - def finding_summary(finding) - finding.slice( - "repo", - "pr_number", - "pr_title", - "feedback_url", - "path", - "line", - "severity", - "body_sha256", - "body_first_line" - ) - end - - def guardrail_backlog_json(ledger, generated_at: Time.now.utc, sample_limit: 3) - findings = Array(ledger.fetch("findings")) - grouped = findings.group_by { |finding| guardrail_class(finding).fetch("key") } - classes = grouped.map do |key, class_findings| - metadata = guardrail_class(class_findings.first) - repos = class_findings.map { |finding| finding.fetch("repo") }.uniq.sort - findings_by_repo = class_findings.group_by { |finding| finding.fetch("repo") } - score = class_findings.sum { |finding| severity_score(finding["severity"]) } + ((repos.length - 1) * 10) - { - "key" => key, - "title" => metadata.fetch("title"), - "score" => score, - "finding_count" => class_findings.length, - "repo_count" => repos.length, - "repos" => repos, - "recommended_guardrail" => metadata.fetch("recommended_guardrail"), - "finding_fingerprints" => class_findings.map { |finding| guardrail_finding_fingerprint(finding) }.uniq.sort, - "repo_fingerprints" => findings_by_repo.transform_values { |repo_findings| repo_findings.map { |finding| guardrail_finding_fingerprint(finding) }.uniq.sort }, - "repo_sample_findings" => findings_by_repo.transform_values { |repo_findings| repo_findings.first(sample_limit).map { |finding| finding_summary(finding) } }, - "sample_findings" => class_findings.first(sample_limit).map { |finding| finding_summary(finding) } - } - end.sort_by { |entry| [-entry.fetch("score"), -entry.fetch("finding_count"), entry.fetch("key")] } - - { - "schema_version" => GUARDRAIL_BACKLOG_SCHEMA_VERSION, - "source_schema_version" => ledger.fetch("schema_version"), - "generated_at" => generated_at.utc.iso8601, - "owner" => ledger.fetch("owner"), - "merged_since" => ledger.fetch("merged_since"), - "min_severity" => ledger.fetch("min_severity"), - "source_finding_count" => findings.length, - "class_count" => classes.length, - "classes" => classes - } - end - - def guardrail_backlog_markdown(backlog) - lines = [ - "# Review feedback guardrail backlog", - "", - "- Owner: `#{backlog.fetch("owner")}`", - "- Merged since: `#{backlog.fetch("merged_since")}`", - "- Minimum severity: `#{backlog.fetch("min_severity")}`", - "- Source findings: `#{backlog.fetch("source_finding_count")}`", - "- Classes: `#{backlog.fetch("class_count")}`", - "", - "" - ] - - if backlog.fetch("classes").empty? - lines << "" - lines << "No guardrail candidates found." - return lines.join("\n") - end - - lines << "" - lines << "| Rank | Class | Score | Findings | Repos | Recommended guardrail |" - lines << "| --- | --- | ---: | ---: | --- | --- |" - backlog.fetch("classes").each_with_index do |entry, index| - lines << "| #{index + 1} | `#{entry.fetch("key")}` #{entry.fetch("title")} | #{entry.fetch("score")} | #{entry.fetch("finding_count")} | #{entry.fetch("repos").join(", ")} | #{entry.fetch("recommended_guardrail")} |" - end - - backlog.fetch("classes").each do |entry| - lines << "" - lines << "## #{entry.fetch("title")}" - entry.fetch("sample_findings").each do |finding| - location = finding["path"] ? "#{finding["path"]}:#{finding["line"] || "?"}" : "top-level feedback" - lines << "- `#{finding.fetch("severity")}` #{finding.fetch("repo")}##{finding.fetch("pr_number")} #{location}" - lines << " - #{finding.fetch("body_first_line")}" unless finding.fetch("body_first_line", "").empty? - lines << " - #{finding.fetch("feedback_url")}" if finding["feedback_url"] - end - end - - lines.join("\n") - end - - def guardrail_issue_title(entry) - "#{GUARDRAIL_ISSUE_TITLE_PREFIX} #{entry.fetch("title")} (#{entry.fetch("key")})" - end - - def repo_guardrail_issue_title(entry) - "#{REPO_GUARDRAIL_ISSUE_TITLE_PREFIX} #{entry.fetch("title")} (#{entry.fetch("key")})" - end - - def finding_fingerprint_lines(fingerprints) - lines = [ - "" - ] - fingerprints.to_a.sort.each do |fingerprint| - lines << "- `#{fingerprint}`" - end - lines - end - - def guardrail_issue_body(backlog, entry) - lines = [ - "", - "# #{entry.fetch("title")}", - "", - "This issue tracks a recurring review-feedback class from the EvalOps review feedback sentinel.", - "", - "- Class: `#{entry.fetch("key")}`", - "- Score: `#{entry.fetch("score")}`", - "- Findings: `#{entry.fetch("finding_count")}`", - "- Repos: `#{entry.fetch("repos").join("`, `")}`", - "- Generated at: `#{backlog.fetch("generated_at")}`", - "- Window: merged since `#{backlog.fetch("merged_since")}` with minimum severity `#{backlog.fetch("min_severity")}`", - "", - "## Guardrail to build", - "", - entry.fetch("recommended_guardrail"), - "", - "## Representative feedback", - "" - ] - - entry.fetch("sample_findings").each do |finding| - location = finding["path"] ? "#{finding["path"]}:#{finding["line"] || "?"}" : "top-level feedback" - lines << "- `#{finding.fetch("severity")}` #{finding.fetch("repo")}##{finding.fetch("pr_number")} #{location}" - lines << " - #{finding.fetch("body_first_line")}" unless finding.fetch("body_first_line", "").empty? - lines << " - #{finding.fetch("feedback_url")}" if finding["feedback_url"] - end - - lines.concat( - [ - "", - "## Finding fingerprints", - "" - ] - ) - lines.concat(finding_fingerprint_lines(current_guardrail_fingerprints(entry))) - - lines.concat( - [ - "", - "## Acceptance criteria", - "", - "- The class has an owner repo and a concrete guardrail location.", - "- The guardrail fails for at least one representative feedback shape listed above.", - "- The guardrail is wired into the smallest relevant CI or preflight target.", - "- The issue is closed only after the guardrail has merged and the feedback sentinel no longer ranks this class as an unaddressed candidate." - ] - ) - lines.join("\n") - end - - def repo_sample_findings(entry, repo) - by_repo = entry.fetch("repo_sample_findings", {}) - return by_repo.fetch(repo) if by_repo.key?(repo) - - entry.fetch("sample_findings").select { |finding| finding.fetch("repo") == repo } - end - - def repo_guardrail_issue_body(backlog, entry, repo:, org_issue_url: nil) - repo_findings = repo_sample_findings(entry, repo) - fingerprints = repo_guardrail_fingerprints(entry, repo) - lines = [ - "", - "# #{entry.fetch("title")}", - "", - "This issue routes a recurring review-feedback class to the repo that needs the prevention guardrail.", - "", - "- Repo: `#{repo}`", - "- Class: `#{entry.fetch("key")}`", - "- Repo findings: `#{fingerprints.length}`", - "- Class findings: `#{entry.fetch("finding_count")}`", - "- Generated at: `#{backlog.fetch("generated_at")}`", - "- Window: merged since `#{backlog.fetch("merged_since")}` with minimum severity `#{backlog.fetch("min_severity")}`" - ] - lines << "- Org tracker: #{org_issue_url}" if org_issue_url - lines.concat( - [ - "", - "## Guardrail to build", - "", - entry.fetch("recommended_guardrail"), - "", - "## Representative feedback in this repo", - "" - ] - ) - - repo_findings.each do |finding| - location = finding["path"] ? "#{finding["path"]}:#{finding["line"] || "?"}" : "top-level feedback" - lines << "- `#{finding.fetch("severity")}` #{finding.fetch("repo")}##{finding.fetch("pr_number")} #{location}" - lines << " - #{finding.fetch("body_first_line")}" unless finding.fetch("body_first_line", "").empty? - lines << " - #{finding.fetch("feedback_url")}" if finding["feedback_url"] - end - - lines.concat( - [ - "", - "## Finding fingerprints", - "" - ] - ) - lines.concat(finding_fingerprint_lines(fingerprints)) - - lines.concat( - [ - "", - "## Acceptance criteria", - "", - "- A repo-local guardrail fails for at least one representative feedback shape listed above.", - "- The guardrail is wired into the smallest relevant CI, preflight, or test target in this repo.", - "- The issue is closed only after the guardrail merges and the feedback sentinel reports this repo/class fingerprint set as already closed or absent." - ] - ) - lines.join("\n") - end - - def find_issue_by_title(repo:, title:) - stdout, stderr, status = gh( - "issue", - "list", - "--repo", - repo, - "--state", - "all", - "--search", - "\"#{title}\" in:title", - "--limit", - "10", - "--json", - "number,title,state,url,body" - ) - raise "gh issue list failed: #{stderr.strip}" unless status.success? - - JSON.parse(stdout).find { |issue| issue.fetch("title") == title } - end - - def guardrail_finding_fingerprint(finding) - Digest::SHA256.hexdigest( - [ - finding.fetch("repo"), - finding.fetch("pr_number").to_s, - finding["feedback_url"].to_s, - finding["path"].to_s, - finding["line"].to_s, - finding["body_sha256"].to_s.empty? ? Digest::SHA256.hexdigest(finding.fetch("body_first_line", "")) : finding["body_sha256"] - ].join("\n") - ) - end - - def guardrail_issue_fingerprints(body) - body.to_s.scan(/`([0-9a-f]{64})`/).flatten.to_set - end - - def current_guardrail_fingerprints(entry) - fingerprints = Array(entry["finding_fingerprints"]) - return fingerprints.to_set unless fingerprints.empty? - - entry.fetch("sample_findings").map { |finding| guardrail_finding_fingerprint(finding) }.to_set - end - - def repo_guardrail_fingerprints(entry, repo) - repo_fingerprints = entry.fetch("repo_fingerprints", {}) - fingerprints = Array(repo_fingerprints[repo]) - return fingerprints.to_set unless fingerprints.empty? - - repo_sample_findings(entry, repo).map { |finding| guardrail_finding_fingerprint(finding) }.to_set - end - - def guardrail_issue_key_from_title(title) - prefix = "#{GUARDRAIL_ISSUE_TITLE_PREFIX} " - return nil unless title.to_s.start_with?(prefix) - - match = title.to_s.match(/\(([^()]+)\)\z/) - match[1] if match - end - - def upsert_guardrail_class_issue(repo:, backlog:, entry:) - title = guardrail_issue_title(entry) - body = guardrail_issue_body(backlog, entry) - issue = find_issue_by_title(repo: repo, title: title) - - if issue - number = issue.fetch("number").to_s - if issue.fetch("state") == "CLOSED" - issue_fingerprints = guardrail_issue_fingerprints(issue.fetch("body", "")) - if current_guardrail_fingerprints(entry).subset?(issue_fingerprints) - return { - "class_key" => entry.fetch("key"), - "title" => title, - "issue_number" => issue.fetch("number"), - "issue_url" => issue.fetch("url"), - "action" => "already_closed" - } - end - - gh("issue", "reopen", number, "--repo", repo).then do |_out, err, ok| - raise "gh issue reopen failed: #{err.strip}" unless ok.success? - end - end - gh("issue", "edit", number, "--repo", repo, input: body).then do |_out, err, ok| - raise "gh issue edit failed: #{err.strip}" unless ok.success? - end - return { - "class_key" => entry.fetch("key"), - "title" => title, - "issue_number" => issue.fetch("number"), - "issue_url" => issue.fetch("url"), - "action" => issue.fetch("state") == "CLOSED" ? "reopened" : "updated" - } - end - - gh("issue", "create", "--repo", repo, "--title", title, input: body).then do |out, err, ok| - raise "gh issue create failed: #{err.strip}" unless ok.success? - - issue_url = out.strip - { - "class_key" => entry.fetch("key"), - "title" => title, - "issue_number" => issue_number_from_url(issue_url), - "issue_url" => issue_url, - "action" => "created" - }.compact - end - end - - def upsert_repo_guardrail_issue(repo:, backlog:, entry:, org_issue_url: nil) - title = repo_guardrail_issue_title(entry) - body = repo_guardrail_issue_body(backlog, entry, repo: repo, org_issue_url: org_issue_url) - issue = find_issue_by_title(repo: repo, title: title) - - if issue - number = issue.fetch("number").to_s - if issue.fetch("state") == "CLOSED" - issue_fingerprints = guardrail_issue_fingerprints(issue.fetch("body", "")) - if repo_guardrail_fingerprints(entry, repo).subset?(issue_fingerprints) - return { - "scope" => "repo", - "repo" => repo, - "class_key" => entry.fetch("key"), - "title" => title, - "issue_number" => issue.fetch("number"), - "issue_url" => issue.fetch("url"), - "action" => "already_closed" - } - end - - gh("issue", "reopen", number, "--repo", repo).then do |_out, err, ok| - raise "gh issue reopen failed: #{err.strip}" unless ok.success? - end - end - gh("issue", "edit", number, "--repo", repo, input: body).then do |_out, err, ok| - raise "gh issue edit failed: #{err.strip}" unless ok.success? - end - return { - "scope" => "repo", - "repo" => repo, - "class_key" => entry.fetch("key"), - "title" => title, - "issue_number" => issue.fetch("number"), - "issue_url" => issue.fetch("url"), - "action" => issue.fetch("state") == "CLOSED" ? "reopened" : "updated" - } - end - - gh("issue", "create", "--repo", repo, "--title", title, input: body).then do |out, err, ok| - raise "gh issue create failed: #{err.strip}" unless ok.success? - - issue_url = out.strip - { - "scope" => "repo", - "repo" => repo, - "class_key" => entry.fetch("key"), - "title" => title, - "issue_number" => issue_number_from_url(issue_url), - "issue_url" => issue_url, - "action" => "created" - }.compact - end - end - - def upsert_repo_guardrail_issues(backlog:, org_issue_results: []) - org_issue_by_class = org_issue_results.each_with_object({}) do |result, by_class| - next unless result["class_key"] && result["issue_url"] - - by_class[result.fetch("class_key")] = result.fetch("issue_url") - end - already_closed_classes = org_issue_results - .select { |result| result["action"] == "already_closed" } - .map { |result| result.fetch("class_key") } - .to_set - backlog.fetch("classes").flat_map do |entry| - next [] if already_closed_classes.include?(entry.fetch("key")) - - entry.fetch("repos").map do |repo| - upsert_repo_guardrail_issue( - repo: repo, - backlog: backlog, - entry: entry, - org_issue_url: org_issue_by_class[entry.fetch("key")] - ) - end - end - end - - def issue_number_from_url(url) - match = url.to_s.match(%r{/issues/(\d+)\z}) - match[1].to_i if match - end - - def upsert_guardrail_class_issues(repo:, backlog:) - backlog.fetch("classes").map do |entry| - upsert_guardrail_class_issue(repo: repo, backlog: backlog, entry: entry) - end - end - - def list_open_guardrail_class_issues(repo:) - stdout, stderr, status = gh( - "issue", - "list", - "--repo", - repo, - "--state", - "open", - "--search", - "\"#{GUARDRAIL_ISSUE_TITLE_PREFIX}\" in:title", - "--limit", - "100", - "--json", - "number,title,url" - ) - raise "gh issue list failed: #{stderr.strip}" unless status.success? - - JSON.parse(stdout) - end - - def close_stale_guardrail_class_issues(repo:, backlog:) - active_keys = backlog.fetch("classes").map { |entry| entry.fetch("key") }.to_set - list_open_guardrail_class_issues(repo: repo).each_with_object([]) do |issue, results| - class_key = guardrail_issue_key_from_title(issue.fetch("title")) - next if class_key.nil? || active_keys.include?(class_key) - - comment = "Closing because the review feedback sentinel no longer ranks `#{class_key}` as an active guardrail candidate in the current backlog window." - gh("issue", "close", issue.fetch("number").to_s, "--repo", repo, "--comment", comment).then do |_out, err, ok| - raise "gh issue close failed: #{err.strip}" unless ok.success? - end - result = { - "class_key" => class_key, - "title" => issue.fetch("title"), - "issue_number" => issue.fetch("number"), - "issue_url" => issue.fetch("url"), - "action" => "closed_stale" - } - results << result - end - end - - def guardrail_lifecycle_json(backlog, issue_results:, generated_at: Time.now.utc) - { - "schema_version" => GUARDRAIL_LIFECYCLE_SCHEMA_VERSION, - "source_schema_version" => backlog.fetch("schema_version"), - "generated_at" => generated_at.utc.iso8601, - "owner" => backlog.fetch("owner"), - "merged_since" => backlog.fetch("merged_since"), - "min_severity" => backlog.fetch("min_severity"), - "class_count" => backlog.fetch("class_count"), - "issue_count" => issue_results.length, - "issues" => issue_results - } - end - - def repeat_rate_metrics(ledger, generated_at: Time.now.utc, bucket_days: 7) - current_start = generated_at.utc - (bucket_days * 24 * 60 * 60) - previous_start = generated_at.utc - (bucket_days * 2 * 24 * 60 * 60) - buckets = Hash.new { |hash, key| hash[key] = { "current" => 0, "previous" => 0 } } - Array(ledger.fetch("findings")).each do |finding| - merged_at = Time.parse(finding.fetch("merged_at")).utc - next if merged_at < previous_start || merged_at > generated_at.utc - - class_key = guardrail_class(finding).fetch("key") - bucket = merged_at >= current_start ? "current" : "previous" - buckets[class_key][bucket] += 1 - rescue ArgumentError, KeyError - next - end - - buckets.map do |class_key, counts| - current = counts.fetch("current") - previous = counts.fetch("previous") - delta = current - previous - change_percent = if previous.zero? - current.zero? ? 0 : nil - else - ((delta.to_f / previous) * 100).round - end - { - "class_key" => class_key, - "current_count" => current, - "previous_count" => previous, - "delta" => delta, - "change_percent" => change_percent - } - end.sort_by { |entry| [-entry.fetch("current_count"), -entry.fetch("previous_count"), entry.fetch("class_key")] } - end - - def weekly_guardrail_report_markdown(backlog, lifecycle: nil, ledger: nil, generated_at: Time.now.utc, top_limit: 5) - classes = backlog.fetch("classes") - repo_counts = classes.each_with_object(Hash.new(0)) do |entry, counts| - entry.fetch("repos").each do |repo| - counts[repo] += Array(entry.fetch("repo_fingerprints", {})[repo]).length - counts[repo] += repo_sample_findings(entry, repo).length if counts[repo].zero? - counts[repo] += entry.fetch("finding_count") if counts[repo].zero? - end - end.sort_by { |repo, count| [-count, repo] } - prevented = Array(lifecycle&.fetch("issues", nil)).select { |issue| issue["action"] == "already_closed" } - repeat_rates = ledger ? repeat_rate_metrics(ledger, generated_at: generated_at) : [] - active = classes.first(top_limit) - - lines = [ - "# Weekly review feedback guardrail report", - "", - "- Generated at: `#{generated_at.utc.iso8601}`", - "- Owner: `#{backlog.fetch("owner")}`", - "- Window: merged since `#{backlog.fetch("merged_since")}` with minimum severity `#{backlog.fetch("min_severity")}`", - "- Source findings: `#{backlog.fetch("source_finding_count")}`", - "- Ranked classes: `#{backlog.fetch("class_count")}`", - "", - "" - ] - - if classes.empty? - lines << "" - lines << "No guardrail candidates found in this window." - return lines.join("\n") - end - - lines.concat( - [ - "", - "## Top guardrail candidates", - "", - "| Rank | Class | Score | Findings | Repos | Next guardrail |", - "| --- | --- | ---: | ---: | --- | --- |" - ] - ) - active.each_with_index do |entry, index| - lines << "| #{index + 1} | `#{entry.fetch("key")}` #{entry.fetch("title")} | #{entry.fetch("score")} | #{entry.fetch("finding_count")} | #{entry.fetch("repos").join(", ")} | #{entry.fetch("recommended_guardrail")} |" - end - - lines.concat( - [ - "", - "## Repos with feedback", - "", - "| Repo | Findings in ranked classes |", - "| --- | ---: |" - ] - ) - repo_counts.first(top_limit).each do |repo, count| - lines << "| #{repo} | #{count} |" - end - - lines.concat( - [ - "", - "## Repeat-rate trend", - "", - "| Class | Current 7d | Previous 7d | Delta | Change |", - "| --- | ---: | ---: | ---: | ---: |" - ] - ) - if repeat_rates.empty? - lines << "| _No dated findings in the last two 7-day buckets_ | 0 | 0 | 0 | 0% |" - else - repeat_rates.first(top_limit).each do |entry| - change = entry.fetch("change_percent").nil? ? "new" : "#{entry.fetch("change_percent")}%" - lines << "| `#{entry.fetch("class_key")}` | #{entry.fetch("current_count")} | #{entry.fetch("previous_count")} | #{entry.fetch("delta")} | #{change} |" - end - end - - lines.concat( - [ - "", - "## Newly prevented or suppressed", - "" - ] - ) - if prevented.empty? - lines << "No already-closed guardrail fingerprints were seen in this run." - else - prevented.first(top_limit).each do |issue| - lines << "- `#{issue.fetch("class_key")}` #{issue.fetch("issue_url")}" - end - end - - lines.concat( - [ - "", - "## Next actions", - "" - ] - ) - active.each do |entry| - lines << "- `#{entry.fetch("key")}`: #{entry.fetch("recommended_guardrail")}" - end - - lines.join("\n") - end - - def upsert_issue(repo:, title:, body:) - stdout, stderr, status = gh( - "issue", - "list", - "--repo", - repo, - "--state", - "open", - "--search", - "\"#{title}\" in:title", - "--limit", - "1", - "--json", - "number" - ) - raise "gh issue list failed: #{stderr.strip}" unless status.success? - - number = JSON.parse(stdout).first&.fetch("number", nil) - if number - gh("issue", "comment", number.to_s, "--repo", repo, input: body).then do |_out, err, ok| - raise "gh issue comment failed: #{err.strip}" unless ok.success? - end - number - else - gh("issue", "create", "--repo", repo, "--title", title, input: body).then do |out, err, ok| - raise "gh issue create failed: #{err.strip}" unless ok.success? - - out.strip - end - end - end -end - -if $PROGRAM_NAME == __FILE__ - options = { - owner: "evalops", - since_hours: 72, - min_severity: "high", - issue_repo: nil, - issue_title: EvalOpsReviewFeedbackSweep::DEFAULT_TITLE, - json_output: nil, - guardrail_backlog_output: nil, - guardrail_backlog_json_output: nil, - guardrail_issue_repo: nil, - guardrail_lifecycle_json_output: nil, - guardrail_repo_issues: false, - weekly_report_issue_repo: nil, - weekly_report_issue_title: EvalOpsReviewFeedbackSweep::DEFAULT_WEEKLY_REPORT_TITLE, - progress: false, - pr_limit: 100, - dry_run: false - } - - OptionParser.new do |parser| - parser.on("--owner OWNER", "GitHub owner/org to sweep") { |value| options[:owner] = value } - parser.on("--since-hours HOURS", Integer, "Merged PR lookback window") { |value| options[:since_hours] = value } - parser.on("--pr-limit LIMIT", Integer, "Maximum merged PRs to inspect") { |value| options[:pr_limit] = value } - parser.on("--min-severity LEVEL", "Minimum severity to report") { |value| options[:min_severity] = value.downcase } - parser.on("--issue-repo OWNER/REPO", "Create or comment on this issue repo when findings exist") { |value| options[:issue_repo] = value } - parser.on("--issue-title TITLE", "Issue title for sweep findings") { |value| options[:issue_title] = value } - parser.on("--json-output PATH", "Write machine-readable feedback ledger JSON to this path") { |value| options[:json_output] = value } - parser.on("--guardrail-backlog-output PATH", "Write ranked guardrail backlog markdown to this path") { |value| options[:guardrail_backlog_output] = value } - parser.on("--guardrail-backlog-json-output PATH", "Write ranked guardrail backlog JSON to this path") { |value| options[:guardrail_backlog_json_output] = value } - parser.on("--guardrail-issue-repo OWNER/REPO", "Create or update one stable issue per ranked guardrail class") { |value| options[:guardrail_issue_repo] = value } - parser.on("--guardrail-repo-issues", "Create or update repo-local guardrail candidate issues for each ranked class/repo pair") { options[:guardrail_repo_issues] = true } - parser.on("--guardrail-lifecycle-json-output PATH", "Write guardrail issue lifecycle JSON to this path") { |value| options[:guardrail_lifecycle_json_output] = value } - parser.on("--weekly-report-issue-repo OWNER/REPO", "Create or comment on this issue repo with the guardrail report") { |value| options[:weekly_report_issue_repo] = value } - parser.on("--weekly-report-issue-title TITLE", "Issue title for the guardrail report") { |value| options[:weekly_report_issue_title] = value } - parser.on("--progress", "Print PR inspection progress to stderr") { options[:progress] = true } - parser.on("--dry-run", "Print report and skip issue writes") { options[:dry_run] = true } - end.parse! - - unless EvalOpsReviewThreadGuard::SEVERITY_RANK.key?(options.fetch(:min_severity)) - warn "invalid --min-severity #{options.fetch(:min_severity).inspect}" - exit 2 - end - - since = (Time.now.utc - (options.fetch(:since_hours) * 3600)).strftime("%Y-%m-%d") - items = EvalOpsReviewFeedbackSweep.feedback_items( - owner: options.fetch(:owner), - since: since, - min_severity: options.fetch(:min_severity), - pr_limit: options.fetch(:pr_limit), - progress: options.fetch(:progress) - ) - ledger = EvalOpsReviewFeedbackSweep.ledger_json( - items, - owner: options.fetch(:owner), - since: since, - min_severity: options.fetch(:min_severity) - ) - body = EvalOpsReviewFeedbackSweep.report_markdown( - items, - owner: options.fetch(:owner), - since: since, - min_severity: options.fetch(:min_severity) - ) - puts body - - if options[:json_output] - File.write(options.fetch(:json_output), "#{JSON.pretty_generate(ledger)}\n") - end - - backlog = nil - lifecycle = nil - if options[:guardrail_backlog_output] || options[:guardrail_backlog_json_output] || options[:guardrail_issue_repo] || options[:guardrail_lifecycle_json_output] || options[:weekly_report_issue_repo] - backlog = EvalOpsReviewFeedbackSweep.guardrail_backlog_json(ledger) - File.write(options.fetch(:guardrail_backlog_json_output), "#{JSON.pretty_generate(backlog)}\n") if options[:guardrail_backlog_json_output] - File.write(options.fetch(:guardrail_backlog_output), "#{EvalOpsReviewFeedbackSweep.guardrail_backlog_markdown(backlog)}\n") if options[:guardrail_backlog_output] - issue_results = [] - unless options.fetch(:dry_run) - if options[:guardrail_issue_repo] && backlog.fetch("classes").any? - issue_results.concat(EvalOpsReviewFeedbackSweep.upsert_guardrail_class_issues(repo: options.fetch(:guardrail_issue_repo), backlog: backlog)) - end - if options[:guardrail_repo_issues] && backlog.fetch("classes").any? - issue_results.concat(EvalOpsReviewFeedbackSweep.upsert_repo_guardrail_issues(backlog: backlog, org_issue_results: issue_results)) - end - issue_results.concat(EvalOpsReviewFeedbackSweep.close_stale_guardrail_class_issues(repo: options.fetch(:guardrail_issue_repo), backlog: backlog)) if options[:guardrail_issue_repo] - end - if options[:guardrail_lifecycle_json_output] - lifecycle = EvalOpsReviewFeedbackSweep.guardrail_lifecycle_json(backlog, issue_results: issue_results) - File.write(options.fetch(:guardrail_lifecycle_json_output), "#{JSON.pretty_generate(lifecycle)}\n") - end - lifecycle ||= EvalOpsReviewFeedbackSweep.guardrail_lifecycle_json(backlog, issue_results: issue_results) - end - - if options[:weekly_report_issue_repo] && !options.fetch(:dry_run) - report_body = EvalOpsReviewFeedbackSweep.weekly_guardrail_report_markdown(backlog || EvalOpsReviewFeedbackSweep.guardrail_backlog_json(ledger), lifecycle: lifecycle, ledger: ledger) - EvalOpsReviewFeedbackSweep.upsert_issue( - repo: options.fetch(:weekly_report_issue_repo), - title: options.fetch(:weekly_report_issue_title), - body: report_body - ) - end - - if items.any? && options[:issue_repo] && !options.fetch(:dry_run) - EvalOpsReviewFeedbackSweep.upsert_issue( - repo: options.fetch(:issue_repo), - title: options.fetch(:issue_title), - body: body - ) - end - - exit(items.empty? ? 0 : 1) -end diff --git a/.github/scripts/sync-agent-mcp-config.rb b/.github/scripts/sync-agent-mcp-config.rb deleted file mode 100644 index cd071d9..0000000 --- a/.github/scripts/sync-agent-mcp-config.rb +++ /dev/null @@ -1,164 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "json" -require "optparse" -require "time" - -module EvalOpsAgentMcpConfig - REPORT_SCHEMA_VERSION = "evalops.agent_mcp_config_sync.v1" - MANAGED_FILES = { - ".mcp.json" => "mcp.json", - ".codex/config.toml" => "codex-config.toml", - ".cursor/mcp.json" => "cursor-mcp.json" - }.freeze - AGENTS_HEADING = "## EvalOps Integration" - - module_function - - def read_template(template_dir, name) - File.read(File.join(template_dir, name)).sub(/\s*\z/, "\n") - end - - def existing_file(path) - File.file?(path) ? File.read(path) : nil - end - - def ensure_trailing_newline(text) - text.to_s.sub(/\s*\z/, "\n") - end - - def merge_agents(existing, section) - return section if existing.to_s.strip.empty? - return ensure_trailing_newline(existing) if existing.include?(AGENTS_HEADING) - - "#{ensure_trailing_newline(existing)}\n#{section}" - end - - def merge_gitignore(existing, fragment) - current = existing.to_s - additions = fragment.lines.map(&:chomp).reject do |line| - line.empty? || current.lines.map(&:chomp).include?(line) - end - return ensure_trailing_newline(current) if additions.empty? - - base = ensure_trailing_newline(current) - base = "#{base}\n" unless base.strip.empty? - "#{base}#{additions.join("\n")}\n" - end - - def desired_files(workspace:, template_dir:) - files = {} - MANAGED_FILES.each do |target, template| - files[target] = read_template(template_dir, template) - end - agents_section = read_template(template_dir, "agents-section.md") - gitignore_fragment = read_template(template_dir, "gitignore.fragment") - files["AGENTS.md"] = merge_agents(existing_file(File.join(workspace, "AGENTS.md")), agents_section) - files[".gitignore"] = merge_gitignore(existing_file(File.join(workspace, ".gitignore")), gitignore_fragment) - files - end - - def plan(workspace:, template_dir:) - desired_files(workspace: workspace, template_dir: template_dir).map do |path, desired| - absolute = File.join(workspace, path) - existing = existing_file(absolute) - status = - if existing.nil? - "create" - elsif ensure_trailing_newline(existing) == desired - "in_sync" - else - "update" - end - { - "path" => path, - "status" => status, - "bytes" => desired.bytesize - } - end - end - - def write_files(workspace:, template_dir:) - desired_files(workspace: workspace, template_dir: template_dir).each do |path, content| - absolute = File.join(workspace, path) - FileUtils.mkdir_p(File.dirname(absolute)) - File.write(absolute, content) - end - end - - def report(workspace:, template_dir:, write:) - file_plan = plan(workspace: workspace, template_dir: template_dir) - write_files(workspace: workspace, template_dir: template_dir) if write - { - "schema_version" => REPORT_SCHEMA_VERSION, - "generated_at" => Time.now.utc.iso8601, - "workspace" => workspace, - "write" => write, - "files" => file_plan, - "totals" => { - "create" => file_plan.count { |file| file.fetch("status") == "create" }, - "update" => file_plan.count { |file| file.fetch("status") == "update" }, - "in_sync" => file_plan.count { |file| file.fetch("status") == "in_sync" } - } - } - end - - def markdown_report(report) - lines = [ - "# EvalOps Agent MCP Config Report", - "", - "- Generated at: `#{report.fetch("generated_at")}`", - "- Mode: `#{report.fetch("write") ? "write" : "check"}`", - "- Creates: `#{report.dig("totals", "create")}`", - "- Updates: `#{report.dig("totals", "update")}`", - "- In sync: `#{report.dig("totals", "in_sync")}`", - "", - "| Path | Status | Bytes |", - "| --- | --- | ---: |" - ] - report.fetch("files").each do |file| - lines << "| `#{file.fetch("path")}` | #{file.fetch("status")} | #{file.fetch("bytes")} |" - end - lines.join("\n") - end - - def run(argv) - options = { - workspace: Dir.pwd, - template_dir: ".github/agent-mcp/templates", - write: false, - json_output: nil, - markdown_output: nil - } - OptionParser.new do |parser| - parser.on("--workspace PATH", "Repository workspace to check or update") { |value| options[:workspace] = value } - parser.on("--templates PATH", "Template directory") { |value| options[:template_dir] = value } - parser.on("--check", "Check only") { options[:write] = false } - parser.on("--write", "Write missing or drifted files") { options[:write] = true } - parser.on("--json-output PATH", "Write JSON report") { |value| options[:json_output] = value } - parser.on("--markdown-output PATH", "Write Markdown report") { |value| options[:markdown_output] = value } - end.parse!(argv) - - require "fileutils" if options[:write] - - sync_report = report( - workspace: File.expand_path(options.fetch(:workspace)), - template_dir: File.expand_path(options.fetch(:template_dir)), - write: options.fetch(:write) - ) - json = JSON.pretty_generate(sync_report) - if options[:json_output] - File.write(options[:json_output], "#{json}\n") - else - puts json - end - File.write(options[:markdown_output], "#{markdown_report(sync_report)}\n") if options[:markdown_output] - needs_change = sync_report.fetch("files").any? { |file| %w[create update].include?(file.fetch("status")) } - !options.fetch(:write) && needs_change ? 1 : 0 - end -end - -if $PROGRAM_NAME == __FILE__ - exit EvalOpsAgentMcpConfig.run(ARGV) -end diff --git a/.github/scripts/sync-labels.rb b/.github/scripts/sync-labels.rb deleted file mode 100644 index c308bf5..0000000 --- a/.github/scripts/sync-labels.rb +++ /dev/null @@ -1,325 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "json" -require "open3" -require "optparse" -require "set" -require "time" -require "uri" -require "yaml" - -module EvalOpsLabelSync - SCHEMA_VERSION = "evalops.labels.v1" - REPORT_SCHEMA_VERSION = "evalops.label_sync_report.v1" - DEFAULT_OPT_OUT_FILE = ".github/labels-sync.disabled" - - module_function - - def normalize_color(color) - color.to_s.delete_prefix("#").downcase - end - - def normalize_description(description) - description.to_s.strip - end - - def load_config(path) - data = YAML.safe_load(File.read(path), permitted_classes: [], aliases: false) - labels = Array(data.fetch("labels")) - { - "schema_version" => data["schema_version"], - "source_repo" => data["source_repo"], - "sync" => data["sync"] || {}, - "labels" => labels.map do |label| - { - "name" => label.fetch("name").to_s, - "description" => normalize_description(label["description"]), - "color" => normalize_color(label.fetch("color")) - } - end - } - end - - def validation_errors(config) - errors = [] - errors << "schema_version must be #{SCHEMA_VERSION}" unless config["schema_version"] == SCHEMA_VERSION - names = Set.new - config.fetch("labels").each do |label| - name = label.fetch("name") - errors << "label name is required" if name.empty? - key = name.downcase - errors << "duplicate label #{name}" if names.include?(key) - names << key - errors << "#{name}: color must be six hex characters" unless label.fetch("color").match?(/\A[0-9a-f]{6}\z/) - end - errors - end - - def parse_repos(value) - value.to_s.split(",").map(&:strip).reject(&:empty?).map do |repo| - repo.include?("/") ? repo : "evalops/#{repo}" - end - end - - def path_component_escape(value) - URI.encode_www_form_component(value.to_s).gsub("+", "%20") - end - - def gh_api(*args, input: nil, allow_failure: false) - command = ["gh", "api", *args] - if input - command += ["-H", "Content-Type: application/json", "--input", "-"] - stdout, stderr, status = Open3.capture3(*command, stdin_data: input) - else - stdout, stderr, status = Open3.capture3(*command) - end - return [stdout, stderr, status] if allow_failure - - raise "gh api #{args.join(" ")} failed: #{stderr.empty? ? stdout : stderr}" unless status.success? - - stdout - end - - def gh_api_json(*args) - raw = gh_api(*args) - raw.strip.empty? ? nil : JSON.parse(raw) - end - - def discover_repos(owner:, include_archived: false) - stdout, stderr, status = Open3.capture3( - "gh", - "repo", - "list", - owner, - "--limit", - "1000", - "--json", - "nameWithOwner,isArchived" - ) - raise "gh repo list failed: #{stderr.empty? ? stdout : stderr}" unless status.success? - - JSON.parse(stdout).each_with_object([]) do |repo, repos| - next if repo["isArchived"] && !include_archived - - repos << repo.fetch("nameWithOwner") - end.sort - end - - def repo_opted_out?(repo, opt_out_file:) - path = opt_out_file.split("/").map { |part| path_component_escape(part) }.join("/") - _stdout, _stderr, status = gh_api("repos/#{repo}/contents/#{path}", allow_failure: true) - status.success? - end - - def current_labels(repo) - Array(gh_api_json("repos/#{repo}/labels?per_page=100")).map do |label| - { - "name" => label.fetch("name").to_s, - "description" => normalize_description(label["description"]), - "color" => normalize_color(label.fetch("color")) - } - end - end - - def plan_repo(repo:, desired_labels:, existing_labels:, archived: false, opted_out: false) - result = { - "repo" => repo, - "status" => "planned", - "additions" => [], - "updates" => [], - "skips" => [], - "errors" => [] - } - if archived - result["status"] = "skipped" - result["skips"] << "archived" - return result - end - if opted_out - result["status"] = "skipped" - result["skips"] << "opted out" - return result - end - - existing_by_name = existing_labels.to_h { |label| [label.fetch("name").downcase, label] } - desired_labels.each do |desired| - existing = existing_by_name[desired.fetch("name").downcase] - if existing.nil? - result["additions"] << desired - next - end - - changes = {} - if normalize_color(existing["color"]) != desired.fetch("color") - changes["color"] = { - "from" => normalize_color(existing["color"]), - "to" => desired.fetch("color") - } - end - if normalize_description(existing["description"]) != desired.fetch("description") - changes["description"] = { - "from" => normalize_description(existing["description"]), - "to" => desired.fetch("description") - } - end - result["updates"] << desired.merge("changes" => changes) unless changes.empty? - end - result["status"] = "in_sync" if result["additions"].empty? && result["updates"].empty? - result - end - - def apply_repo_plan(plan) - repo = plan.fetch("repo") - plan.fetch("additions").each do |label| - gh_api( - "--method", - "POST", - "repos/#{repo}/labels", - input: JSON.generate( - name: label.fetch("name"), - color: label.fetch("color"), - description: label.fetch("description") - ) - ) - end - plan.fetch("updates").each do |label| - encoded = path_component_escape(label.fetch("name")) - gh_api( - "--method", - "PATCH", - "repos/#{repo}/labels/#{encoded}", - input: JSON.generate( - new_name: label.fetch("name"), - color: label.fetch("color"), - description: label.fetch("description") - ) - ) - end - end - - def build_report(config:, repos:, dry_run:, include_archived:, opt_out_file:) - labels = config.fetch("labels") - repo_reports = repos.map do |repo| - begin - opted_out = repo_opted_out?(repo, opt_out_file: opt_out_file) - existing = opted_out ? [] : current_labels(repo) - plan_repo(repo: repo, desired_labels: labels, existing_labels: existing, opted_out: opted_out) - rescue StandardError => e - { - "repo" => repo, - "status" => "error", - "additions" => [], - "updates" => [], - "skips" => [], - "errors" => [e.message] - } - end - end - - { - "schema_version" => REPORT_SCHEMA_VERSION, - "generated_at" => Time.now.utc.iso8601, - "dry_run" => dry_run, - "include_archived" => include_archived, - "source_repo" => config["source_repo"], - "label_count" => labels.length, - "target_count" => repos.length, - "additive" => config.dig("sync", "additive") != false, - "opt_out_file" => opt_out_file, - "repos" => repo_reports, - "totals" => { - "additions" => repo_reports.sum { |repo| repo.fetch("additions").length }, - "updates" => repo_reports.sum { |repo| repo.fetch("updates").length }, - "errors" => repo_reports.sum { |repo| repo.fetch("errors").length }, - "skipped" => repo_reports.count { |repo| repo.fetch("status") == "skipped" }, - "in_sync" => repo_reports.count { |repo| repo.fetch("status") == "in_sync" } - } - } - end - - def markdown_report(report) - lines = [ - "# EvalOps Label Sync Report", - "", - "- Generated at: `#{report.fetch("generated_at")}`", - "- Mode: `#{report.fetch("dry_run") ? "dry-run" : "apply"}`", - "- Labels: `#{report.fetch("label_count")}`", - "- Target repos: `#{report.fetch("target_count")}`", - "- Additions: `#{report.dig("totals", "additions")}`", - "- Updates: `#{report.dig("totals", "updates")}`", - "- Errors: `#{report.dig("totals", "errors")}`", - "", - "| Repo | Status | Add | Update | Notes |", - "| --- | --- | ---: | ---: | --- |" - ] - report.fetch("repos").each do |repo| - notes = (repo.fetch("skips") + repo.fetch("errors")).join("; ") - lines << "| `#{repo.fetch("repo")}` | #{repo.fetch("status")} | #{repo.fetch("additions").length} | #{repo.fetch("updates").length} | #{notes} |" - end - lines.join("\n") - end - - def run(argv) - options = { - labels: "labels.yml", - owner: "evalops", - repos: [], - dry_run: true, - include_archived: false, - validate_only: false, - json_output: nil, - markdown_output: nil - } - OptionParser.new do |parser| - parser.on("--labels PATH", "Canonical labels YAML") { |value| options[:labels] = value } - parser.on("--owner OWNER", "GitHub owner for repo discovery") { |value| options[:owner] = value } - parser.on("--repos REPOS", "Comma-separated repo list") { |value| options[:repos] = parse_repos(value) } - parser.on("--dry-run", "Plan without applying changes") { options[:dry_run] = true } - parser.on("--apply", "Apply label additions and updates") { options[:dry_run] = false } - parser.on("--include-archived", "Include archived repos") { options[:include_archived] = true } - parser.on("--validate-only", "Validate labels.yml only") { options[:validate_only] = true } - parser.on("--json-output PATH", "Write JSON report") { |value| options[:json_output] = value } - parser.on("--markdown-output PATH", "Write Markdown report") { |value| options[:markdown_output] = value } - end.parse!(argv) - - config = load_config(options.fetch(:labels)) - errors = validation_errors(config) - unless errors.empty? - warn errors.join("\n") - return 1 - end - return 0 if options[:validate_only] - - repos = options.fetch(:repos) - repos = discover_repos(owner: options.fetch(:owner), include_archived: options.fetch(:include_archived)) if repos.empty? - opt_out_file = config.dig("sync", "opt_out_file") || DEFAULT_OPT_OUT_FILE - report = build_report( - config: config, - repos: repos, - dry_run: options.fetch(:dry_run), - include_archived: options.fetch(:include_archived), - opt_out_file: opt_out_file - ) - unless options.fetch(:dry_run) - report.fetch("repos").each do |repo_report| - next unless repo_report.fetch("status") == "planned" - - apply_repo_plan(repo_report) - end - end - - json = JSON.pretty_generate(report) - if options[:json_output] - File.write(options[:json_output], "#{json}\n") - else - puts json - end - File.write(options[:markdown_output], "#{markdown_report(report)}\n") if options[:markdown_output] - report.dig("totals", "errors").positive? ? 1 : 0 - end -end - -if $PROGRAM_NAME == __FILE__ - exit EvalOpsLabelSync.run(ARGV) -end diff --git a/.github/scripts/validate-services-catalog.rb b/.github/scripts/validate-services-catalog.rb deleted file mode 100755 index e6d60d1..0000000 --- a/.github/scripts/validate-services-catalog.rb +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "optparse" -require "yaml" - -ALLOWED_TIERS = %w[critical standard experimental].freeze -ALLOWED_RUNTIMES = %w[gke none standalone].freeze -REQUIRED_FIELDS = %w[description team language tier runtime repo].freeze - -options = { - path: "services.yaml", -} - -OptionParser.new do |parser| - parser.banner = "Usage: validate-services-catalog.rb [services.yaml]" -end.parse! - -options[:path] = ARGV.fetch(0, options[:path]) - -def error(errors, service, message) - errors << "#{service}: #{message}" -end - -catalog = YAML.load_file(options[:path]) -services = catalog.fetch("services") -errors = [] - -unless services.is_a?(Hash) && services.any? - abort "#{options[:path]}: services must be a non-empty mapping" -end - -seen_repos = {} -services.each do |name, service| - unless name.to_s.match?(/\A[a-z0-9][a-z0-9-]*\z/) - error(errors, name, "service key must be kebab-safe lowercase") - end - - unless service.is_a?(Hash) - error(errors, name, "service entry must be a mapping") - next - end - - REQUIRED_FIELDS.each do |field| - value = service[field] - error(errors, name, "missing #{field}") if value.nil? || value.to_s.strip.empty? - end - - description = service["description"].to_s.strip - error(errors, name, "description must be at least 8 characters") if description.length < 8 - - tier = service["tier"].to_s - error(errors, name, "tier must be one of #{ALLOWED_TIERS.join(", ")}") unless ALLOWED_TIERS.include?(tier) - - runtime = service["runtime"].to_s - unless ALLOWED_RUNTIMES.include?(runtime) - error(errors, name, "runtime must be one of #{ALLOWED_RUNTIMES.join(", ")}") - end - - repo = service["repo"].to_s - unless repo.match?(/\Aevalops\/[a-z0-9][a-z0-9._-]*\z/) - error(errors, name, "repo must look like evalops/") - end - - if (previous = seen_repos[repo]) - error(errors, name, "repo duplicates #{previous}") - else - seen_repos[repo] = name - end - - depends_on = service.fetch("depends_on", []) - if depends_on.is_a?(Array) - depends_on.each do |dependency| - unless services.key?(dependency) - error(errors, name, "depends_on references unknown service #{dependency.inspect}") - end - end - else - error(errors, name, "depends_on must be a list when present") - end - - next unless service.key?("proto_consumer") - - proto_consumer = service["proto_consumer"] - unless proto_consumer == true || proto_consumer == false - error(errors, name, "proto_consumer must be true or false when present") - end - if proto_consumer == true && depends_on.is_a?(Array) && !depends_on.include?("proto") - error(errors, name, "proto_consumer services must include proto in depends_on") - end -end - -if errors.any? - warn "#{options[:path]} failed validation:" - errors.each { |message| warn "- #{message}" } - exit 1 -end - -puts "ok #{options[:path]} (#{services.length} services)" diff --git a/.github/scripts/verify-evalopsbot-review-setup.rb b/.github/scripts/verify-evalopsbot-review-setup.rb deleted file mode 100644 index 92bd478..0000000 --- a/.github/scripts/verify-evalopsbot-review-setup.rb +++ /dev/null @@ -1,140 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "json" -require "open3" -require "optparse" -require "time" -require "yaml" - -module EvalOpsBotReviewSetup - module_function - - def gh_api_json(path) - stdout, stderr, status = Open3.capture3("gh", "api", path) - raise "gh api #{path} failed: #{stderr.empty? ? stdout : stderr}" unless status.success? - - JSON.parse(stdout) - end - - def load_contract(path) - YAML.safe_load(File.read(path), permitted_classes: [], aliases: false) - end - - def workflow_active?(repo, path) - encoded = path.split("/").last - workflow = gh_api_json("repos/#{repo}/actions/workflows/#{encoded}") - workflow.fetch("path") == path && workflow.fetch("state") == "active" - rescue StandardError - false - end - - def selected_secret_repositories(org:, secret:) - response = gh_api_json("orgs/#{org}/actions/secrets/#{secret}/repositories?per_page=100") - Array(response.fetch("repositories", [])).map { |repo| repo.fetch("full_name") }.sort - rescue StandardError - [] - end - - def verify(contract, live: true, generated_at: Time.now.utc) - errors = [] - warnings = [] - org = contract.fetch("org") - central_repo = contract.fetch("central_repo") - dispatch_secret = contract.fetch("dispatch_secret") - target_repos = Array(contract.fetch("target_repositories", [])) - exemptions = Array(contract.fetch("exemptions", [])) - - errors << "reviewer must be EvalOpsBot" unless contract.fetch("reviewer") == "EvalOpsBot" - errors << "target_repositories must not be empty" if target_repos.empty? - errors << "exemptions must be empty unless an owner and expiry are recorded" unless exemptions.all? do |row| - row["repo"].to_s.start_with?("#{org}/") && row["owner"].to_s.length.positive? && row["expires"].to_s.length.positive? - end - - central_workflows = Array(contract.fetch("central_workflows", [])) - central_workflows.each do |path| - local_path = File.expand_path("../../#{path}", __dir__) - errors << "missing central workflow #{path}" unless File.exist?(local_path) - end - - missing_secret_repos = [] - inactive_workflows = [] - if live - secret_repos = selected_secret_repositories(org: org, secret: dispatch_secret) - target_repos.each do |target| - repo = target.fetch("repo") - fallback_workflow = target.fetch("fallback_workflow") - missing_secret_repos << repo unless secret_repos.include?(repo) - inactive_workflows << "#{repo}:#{fallback_workflow}" unless workflow_active?(repo, fallback_workflow) - end - central_workflows.each do |path| - errors << "central workflow #{path} is not active" unless workflow_active?(central_repo, path) - end - end - - errors.concat(missing_secret_repos.map { |repo| "#{repo} is missing from #{dispatch_secret} selected repositories" }) - errors.concat(inactive_workflows.map { |entry| "#{entry} is not active" }) - app_secrets = Array(contract.fetch("app_secrets", [])) - warnings << "GitHub App secrets are declared but cannot be value-verified by this audit" unless app_secrets.empty? - - { - "schema_version" => 1, - "generated_at" => generated_at.iso8601, - "status" => errors.empty? ? "pass" : "fail", - "org" => org, - "reviewer" => contract.fetch("reviewer"), - "central_repo" => central_repo, - "target_repository_count" => target_repos.length, - "central_workflows" => central_workflows, - "missing_secret_repositories" => missing_secret_repos, - "inactive_fallback_workflows" => inactive_workflows, - "warnings" => warnings, - "errors" => errors - } - end - - def markdown_report(report) - lines = [ - "## EvalOpsBot Review Setup Audit", - "", - "- Status: `#{report.fetch("status")}`", - "- Reviewer: `#{report.fetch("reviewer")}`", - "- Central repo: `#{report.fetch("central_repo")}`", - "- Target repos: #{report.fetch("target_repository_count")}", - "", - "### Errors" - ] - errors = report.fetch("errors") - lines.concat(errors.empty? ? ["- None"] : errors.map { |error| "- #{error}" }) - warnings = report.fetch("warnings") - unless warnings.empty? - lines << "" - lines << "### Warnings" - lines.concat(warnings.map { |warning| "- #{warning}" }) - end - lines.join("\n") - end -end - -if $PROGRAM_NAME == __FILE__ - options = { - contract: ".github/evalopsbot-review-targets.yml", - live: true, - output: "evalopsbot-review-setup-audit.json" - } - OptionParser.new do |parser| - parser.on("--contract PATH") { |value| options[:contract] = value } - parser.on("--offline") { options[:live] = false } - parser.on("--output PATH") { |value| options[:output] = value } - parser.on("--markdown-output PATH") { |value| options[:markdown_output] = value } - end.parse! - - report = EvalOpsBotReviewSetup.verify( - EvalOpsBotReviewSetup.load_contract(options.fetch(:contract)), - live: options.fetch(:live) - ) - File.write(options.fetch(:output), JSON.pretty_generate(report)) - File.write(options[:markdown_output], EvalOpsBotReviewSetup.markdown_report(report)) if options[:markdown_output] - puts JSON.pretty_generate(report) - exit(report.fetch("status") == "pass" ? 0 : 1) -end diff --git a/.github/scripts/verify-org-control-plane-contract.rb b/.github/scripts/verify-org-control-plane-contract.rb deleted file mode 100644 index 955ca38..0000000 --- a/.github/scripts/verify-org-control-plane-contract.rb +++ /dev/null @@ -1,287 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "digest" -require "json" -require "optparse" -require "time" -require "yaml" - -module EvalOpsOrgControlPlaneContract - SCHEMA_VERSION = "evalops.org_control_plane_contract.v1" - ALLOWED_FIXTURE_OUTCOMES = %w[pass degraded_report fail_closed degraded_safe].freeze - REQUIRED_EVIDENCE_FIELDS = %w[source_id decision_id output_id].freeze - - module_function - - def load_contract(path) - YAML.safe_load(File.read(path), permitted_classes: [], aliases: false) - end - - def relative_path(root, path) - File.expand_path(path, root) - end - - def file_digest(root, path) - absolute = relative_path(root, path) - return nil unless File.file?(absolute) - - Digest::SHA256.file(absolute).hexdigest - end - - def check_path(root, path, errors, warnings, required: true) - absolute = relative_path(root, path) - return true if File.file?(absolute) - - message = "#{path} does not exist" - required ? errors << message : warnings << message - false - end - - def check_top_level(contract, errors) - errors << "schema_version must be #{SCHEMA_VERSION}" unless contract["schema_version"] == SCHEMA_VERSION - %w[contract_id owner_repo workflow requirements provenance slo_gates golden_workflows adversarial_fixtures].each do |key| - errors << "#{key} is required" unless contract.key?(key) - end - end - - def check_requirements(contract, root, errors, warnings) - Array(contract["requirements"]).each do |requirement| - id = requirement["id"].to_s - errors << "requirement id is required" if id.empty? - source_path = requirement.dig("source", "path") - errors << "#{id}: source.path is required" if source_path.to_s.empty? - check_path(root, source_path, errors, warnings) unless source_path.to_s.empty? - missing_fields = REQUIRED_EVIDENCE_FIELDS - Array(requirement["evidence_fields"]).map(&:to_s) - errors << "#{id}: evidence_fields missing #{missing_fields.join(", ")}" unless missing_fields.empty? - Array(requirement["checked_by"]).each do |path| - check_path(root, path, errors, warnings) - end - end - end - - def check_provenance(contract, root, errors, warnings) - provenance = contract["provenance"] || {} - stable_id_pattern = provenance["stable_id_pattern"].to_s - errors << "provenance.stable_id_pattern is required" if stable_id_pattern.empty? - - source_ids = Array(provenance["source_records"]).map { |record| record["id"] } - Array(provenance["source_records"]).each do |record| - errors << "source record id is required" if record["id"].to_s.empty? - path = record["path"].to_s - errors << "#{record["id"]}: path is required" if path.empty? - check_path(root, path, errors, warnings) unless path.empty? - end - - decision_ids = Array(provenance["derived_decisions"]).map { |record| record["id"] } - Array(provenance["derived_decisions"]).each do |record| - check_path(root, record["path"], errors, warnings) if record["path"] - Array(record["derived_from"]).each do |source_id| - errors << "#{record["id"]}: unknown source #{source_id}" unless source_ids.include?(source_id) - end - end - - Array(provenance["emitted_outputs"]).each do |record| - errors << "emitted output id is required" if record["id"].to_s.empty? - produced_by = record["produced_by"].to_s - errors << "#{record["id"]}: produced_by is required" if produced_by.empty? - check_path(root, produced_by, errors, warnings) unless produced_by.empty? - end - - errors << "at least one source record is required" if source_ids.empty? - errors << "at least one derived decision is required" if decision_ids.empty? - end - - def check_slo_gates(contract, errors) - gates = Array(contract["slo_gates"]) - errors << "at least one slo_gate is required" if gates.empty? - gates.each do |gate| - id = gate["id"].to_s - dimensions = Array(gate["dimensions"]).map(&:to_s) - %w[latency correctness degraded_mode evidence].each do |dimension| - errors << "#{id}: missing SLO dimension #{dimension}" unless dimensions.include?(dimension) - end - errors << "#{id}: fallback is required" if gate["fallback"].to_s.empty? - errors << "#{id}: success_signal is required" if gate["success_signal"].to_s.empty? - errors << "#{id}: failure_signal is required" if gate["failure_signal"].to_s.empty? - end - end - - def check_github_security_configuration(contract, errors) - config = contract["github_security_configuration"] || {} - errors << "github_security_configuration is required" if config.empty? - - errors << "github_security_configuration.id must be 245233" unless config["id"] == 245_233 - errors << "github_security_configuration.default_for_new_repos must be all" unless config["default_for_new_repos"] == "all" - - required = config["required_settings"] || {} - { - "advanced_security" => "secret_protection", - "code_scanning_default_setup" => "disabled", - "dependency_graph" => "enabled", - "dependency_graph_autosubmit_action" => "disabled", - "dependabot_alerts" => "enabled", - "secret_scanning" => "enabled", - "secret_scanning_push_protection" => "enabled" - }.each do |key, expected| - errors << "github_security_configuration.required_settings.#{key} must be #{expected}" unless required[key] == expected - end - - forbidden = config["forbidden_workflows"] || {} - actions = Array(forbidden["actions"]) - generated_paths = Array(forbidden["generated_paths"]) - checked_in_globs = Array(forbidden["checked_in_path_globs"]) - errors << "github_security_configuration.forbidden_workflows.actions must include github/codeql-action" unless actions.include?("github/codeql-action") - unless generated_paths.include?("dynamic/github-code-scanning/codeql") - errors << "github_security_configuration.forbidden_workflows.generated_paths must include dynamic/github-code-scanning/codeql" - end - unless checked_in_globs.any? { |glob| glob.include?("codeql") } - errors << "github_security_configuration.forbidden_workflows.checked_in_path_globs must include a codeql glob" - end - end - - def check_golden_workflows(contract, root, errors, warnings) - workflows = Array(contract["golden_workflows"]) - errors << "at least one golden_workflow is required" if workflows.empty? - workflows.each do |workflow| - id = workflow["id"].to_s - %w[workflow verifier].each do |key| - path = workflow[key].to_s - errors << "#{id}: #{key} is required" if path.empty? - check_path(root, path, errors, warnings) unless path.empty? - end - Array(workflow["tests"]).each { |path| check_path(root, path, errors, warnings) } - %w[success_fixture degraded_fixture failure_fixture].each do |key| - fixture = workflow[key] || {} - outcome = fixture["expected_outcome"].to_s - errors << "#{id}: #{key}.expected_outcome is required" if outcome.empty? - errors << "#{id}: unsupported #{key}.expected_outcome #{outcome}" unless outcome.empty? || ALLOWED_FIXTURE_OUTCOMES.include?(outcome) - end - end - end - - def check_adversarial_fixtures(contract, root, errors, warnings) - fixtures = Array(contract["adversarial_fixtures"]) - errors << "at least one adversarial_fixture is required" if fixtures.empty? - categories = fixtures.map { |fixture| fixture["category"].to_s } - %w[prompt_poisoning tool_poisoning data_poisoning].each do |category| - errors << "missing adversarial fixture category #{category}" unless categories.include?(category) - end - fixtures.each do |fixture| - id = fixture["id"].to_s - errors << "adversarial fixture id is required" if id.empty? - check_path(root, fixture["blocked_by"], errors, warnings) if fixture["blocked_by"] - outcome = fixture["expected_outcome"].to_s - unless %w[fail_closed degraded_safe].include?(outcome) - errors << "#{id}: adversarial expected_outcome must be fail_closed or degraded_safe" - end - errors << "#{id}: input is required" if fixture["input"].to_s.empty? - end - end - - def evidence(contract, root) - provenance = contract["provenance"] || {} - Array(provenance["source_records"]).map do |record| - { - "source_id" => record["id"], - "path" => record["path"], - "sha256" => file_digest(root, record["path"]) - } - end - end - - def verify(contract, root: Dir.pwd, generated_at: Time.now.utc) - errors = [] - warnings = [] - check_top_level(contract, errors) - check_requirements(contract, root, errors, warnings) - check_provenance(contract, root, errors, warnings) - check_slo_gates(contract, errors) - check_github_security_configuration(contract, errors) - check_golden_workflows(contract, root, errors, warnings) - check_adversarial_fixtures(contract, root, errors, warnings) - - { - "schema_version" => "#{SCHEMA_VERSION}.report", - "contract_schema_version" => contract["schema_version"], - "contract_id" => contract["contract_id"], - "owner_repo" => contract["owner_repo"], - "generated_at" => generated_at.iso8601, - "status" => errors.empty? ? "pass" : "fail", - "metrics" => { - "requirements_checked" => Array(contract["requirements"]).length, - "source_records" => Array(contract.dig("provenance", "source_records")).length, - "derived_decisions" => Array(contract.dig("provenance", "derived_decisions")).length, - "emitted_outputs" => Array(contract.dig("provenance", "emitted_outputs")).length, - "slo_gates" => Array(contract["slo_gates"]).length, - "github_security_configuration" => contract["github_security_configuration"] ? 1 : 0, - "golden_workflows" => Array(contract["golden_workflows"]).length, - "adversarial_fixtures" => Array(contract["adversarial_fixtures"]).length - }, - "evidence" => evidence(contract, root), - "errors" => errors, - "warnings" => warnings - } - end - - def markdown_report(report) - lines = [ - "# Org Control Plane Contract Report", - "", - "- Contract: `#{report["contract_id"]}`", - "- Owner: `#{report["owner_repo"]}`", - "- Generated at: `#{report["generated_at"]}`", - "- Status: `#{report["status"]}`", - "", - "## Metrics" - ] - report.fetch("metrics").each do |key, value| - lines << "- #{key}: `#{value}`" - end - lines << "" - lines << "## Evidence" - report.fetch("evidence").each do |item| - lines << "- `#{item["source_id"]}` #{item["path"]} sha256=#{item["sha256"]}" - end - unless report.fetch("errors").empty? - lines << "" - lines << "## Errors" - report.fetch("errors").each { |error| lines << "- #{error}" } - end - unless report.fetch("warnings").empty? - lines << "" - lines << "## Warnings" - report.fetch("warnings").each { |warning| lines << "- #{warning}" } - end - lines.join("\n") - end - - def run(argv) - options = { - contract: ".github/contracts/org-control-plane.yml", - json_output: nil, - markdown_output: nil - } - OptionParser.new do |parser| - parser.on("--contract PATH", "Contract YAML path") { |value| options[:contract] = value } - parser.on("--json-output PATH", "Write JSON report") { |value| options[:json_output] = value } - parser.on("--markdown-output PATH", "Write Markdown report") { |value| options[:markdown_output] = value } - end.parse!(argv) - - root = Dir.pwd - contract = load_contract(relative_path(root, options.fetch(:contract))) - report = verify(contract, root: root) - json = JSON.pretty_generate(report) - if options[:json_output] - File.write(relative_path(root, options[:json_output]), "#{json}\n") - else - puts json - end - File.write(relative_path(root, options[:markdown_output]), "#{markdown_report(report)}\n") if options[:markdown_output] - report["status"] == "pass" ? 0 : 1 - end -end - -if $PROGRAM_NAME == __FILE__ - exit EvalOpsOrgControlPlaneContract.run(ARGV) -end diff --git a/.github/workflow-templates/agent-authorship-labels.properties.json b/.github/workflow-templates/agent-authorship-labels.properties.json deleted file mode 100644 index 9330c32..0000000 --- a/.github/workflow-templates/agent-authorship-labels.properties.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "name": "Agent authorship labels", - "description": "Apply agent-authored, agent-assisted, or mixed-authorship labels to pull requests based on Maestro commit trailers.", - "iconName": "octicon tag", - "categories": [ - "Automation", - "Code review" - ] -} diff --git a/.github/workflow-templates/agent-authorship-labels.yml b/.github/workflow-templates/agent-authorship-labels.yml deleted file mode 100644 index fa5bea4..0000000 --- a/.github/workflow-templates/agent-authorship-labels.yml +++ /dev/null @@ -1,16 +0,0 @@ -name: Agent authorship labels - -on: - pull_request_target: - types: [opened, synchronize, reopened, ready_for_review, edited] - -permissions: - contents: read - pull-requests: write - issues: write - -jobs: - label: - uses: evalops/.github/.github/workflows/agent-authorship-label.yml@main - with: - runner_label: ubuntu-latest diff --git a/.github/workflow-templates/codex-ci-triage.properties.json b/.github/workflow-templates/codex-ci-triage.properties.json deleted file mode 100644 index 2854ba9..0000000 --- a/.github/workflow-templates/codex-ci-triage.properties.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "name": "Codex CI failure triage", - "description": "Manually run Codex against a failed GitHub Actions run and optionally post the fix summary to a PR.", - "iconName": "octicon pulse", - "categories": [ - "Automation", - "Continuous integration" - ] -} diff --git a/.github/workflow-templates/codex-ci-triage.yml b/.github/workflow-templates/codex-ci-triage.yml deleted file mode 100644 index 2a70d15..0000000 --- a/.github/workflow-templates/codex-ci-triage.yml +++ /dev/null @@ -1,74 +0,0 @@ -name: Codex CI failure triage - -on: - workflow_dispatch: - inputs: - run_id: - description: "GitHub Actions run id to triage" - required: true - type: string - pr_number: - description: "Optional PR number to comment on" - required: false - type: string - -permissions: - contents: write - actions: read - pull-requests: write - issues: write - -jobs: - triage: - runs-on: ubuntu-latest - timeout-minutes: 45 - outputs: - final_message: ${{ steps.run-codex.outputs.final-message }} - steps: - - uses: actions/checkout@v5 - with: - fetch-depth: 0 - - - name: Capture failed run evidence - env: - GH_TOKEN: ${{ github.token }} - RUN_ID: ${{ inputs.run_id }} - run: | - { - echo "# GitHub Actions failure" - echo - gh run view "${RUN_ID}" --repo "${GITHUB_REPOSITORY}" --json url,name,displayTitle,event,headBranch,headSha,conclusion,createdAt,updatedAt - echo - gh run view "${RUN_ID}" --repo "${GITHUB_REPOSITORY}" --log-failed || true - } > codex-ci-evidence.md - - - name: Run Codex CI triage - id: run-codex - uses: openai/codex-action@5c3f4ccdb2b8790f73d6b21751ac00e602aa0c02 - with: - openai-api-key: ${{ secrets.OPENAI_API_KEY }} - prompt: | - Investigate the failure described in codex-ci-evidence.md. Start - from the exact failed run/job/step, distinguish stale failures from - live failures, and make the smallest safe patch when appropriate. - Report commands run and remaining CI or review-thread work. - codex-args: '["--full-auto"]' - output-file: codex-ci-triage.md - safety-strategy: drop-sudo - sandbox: workspace-write - - - name: Post triage summary - if: ${{ inputs.pr_number != '' && steps.run-codex.outputs.final-message != '' }} - uses: actions/github-script@v7 - env: - CODEX_FINAL_MESSAGE: ${{ steps.run-codex.outputs.final-message }} - PR_NUMBER: ${{ inputs.pr_number }} - with: - github-token: ${{ github.token }} - script: | - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: Number(process.env.PR_NUMBER), - body: process.env.CODEX_FINAL_MESSAGE, - }); diff --git a/.github/workflow-templates/codex-label-churn-audit.properties.json b/.github/workflow-templates/codex-label-churn-audit.properties.json deleted file mode 100644 index c185e30..0000000 --- a/.github/workflow-templates/codex-label-churn-audit.properties.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "name": "Codex label churn audit", - "description": "Have Codex inspect PR label mutation events and identify automation loops.", - "iconName": "octicon tag", - "categories": [ - "Automation", - "Code review" - ] -} diff --git a/.github/workflow-templates/codex-label-churn-audit.yml b/.github/workflow-templates/codex-label-churn-audit.yml deleted file mode 100644 index 8ff491d..0000000 --- a/.github/workflow-templates/codex-label-churn-audit.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: Codex label churn audit - -on: - workflow_dispatch: - inputs: - pr_number: - description: "Pull request number to audit" - required: true - type: string - -permissions: - contents: read - pull-requests: read - issues: write - -jobs: - audit: - runs-on: ubuntu-latest - timeout-minutes: 30 - steps: - - uses: actions/checkout@v5 - with: - fetch-depth: 0 - - - name: Capture label timeline - env: - GH_TOKEN: ${{ github.token }} - PR_NUMBER: ${{ inputs.pr_number }} - run: | - { - echo "# Label timeline" - echo - gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/events" --paginate - echo - echo "# Workflows that mention labels" - search_paths=() - for path in .github/workflows scripts; do - if [ -e "${path}" ]; then - search_paths+=("${path}") - fi - done - - if [ "${#search_paths[@]}" -gt 0 ]; then - grep -RInE "add-label|remove-label|gh pr edit|issues.addLabels|issues.removeLabel|labels" "${search_paths[@]}" || true - fi - } > codex-label-churn-evidence.md - - - name: Run Codex label audit - id: run-codex - uses: openai/codex-action@5c3f4ccdb2b8790f73d6b21751ac00e602aa0c02 - with: - openai-api-key: ${{ secrets.OPENAI_API_KEY }} - prompt: | - Audit codex-label-churn-evidence.md. Identify which automation is - adding and removing labels, whether the churn is intentional, and - the smallest durable fix. Remember that EvalOps human committed - code is usually LLM-authored, so agent-authorship labels should not - be treated as suspicious by default. - output-file: codex-label-churn-audit.md - safety-strategy: drop-sudo - sandbox: read-only - - - name: Comment with audit - if: ${{ steps.run-codex.outputs.final-message != '' }} - uses: actions/github-script@v7 - env: - CODEX_FINAL_MESSAGE: ${{ steps.run-codex.outputs.final-message }} - PR_NUMBER: ${{ inputs.pr_number }} - with: - github-token: ${{ github.token }} - script: | - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: Number(process.env.PR_NUMBER), - body: process.env.CODEX_FINAL_MESSAGE, - }); diff --git a/.github/workflow-templates/codex-post-merge-verify.properties.json b/.github/workflow-templates/codex-post-merge-verify.properties.json deleted file mode 100644 index 7fa647f..0000000 --- a/.github/workflow-templates/codex-post-merge-verify.properties.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "name": "Codex post-merge verification", - "description": "Have Codex inspect recent main-branch runs and summarize post-merge health.", - "iconName": "octicon checklist", - "categories": [ - "Automation", - "Continuous integration" - ] -} diff --git a/.github/workflow-templates/codex-post-merge-verify.yml b/.github/workflow-templates/codex-post-merge-verify.yml deleted file mode 100644 index 928d315..0000000 --- a/.github/workflow-templates/codex-post-merge-verify.yml +++ /dev/null @@ -1,70 +0,0 @@ -name: Codex post-merge verification - -on: - workflow_dispatch: - inputs: - merge_sha: - description: "Merge commit or main-branch SHA to verify" - required: false - type: string - schedule: - - cron: "37 */6 * * *" - -permissions: - contents: read - actions: read - issues: write - -jobs: - verify: - runs-on: ubuntu-latest - timeout-minutes: 30 - steps: - - uses: actions/checkout@v5 - with: - fetch-depth: 0 - - - name: Capture main-branch evidence - env: - GH_TOKEN: ${{ github.token }} - MERGE_SHA: ${{ inputs.merge_sha }} - run: | - { - echo "# Default-branch verification" - echo - echo "repository=${GITHUB_REPOSITORY}" - echo "merge_sha=${MERGE_SHA:-${GITHUB_SHA}}" - echo - gh run list --repo "${GITHUB_REPOSITORY}" --branch main --limit 20 \ - --json databaseId,name,event,status,conclusion,headSha,createdAt,updatedAt,url - } > codex-post-merge-evidence.md - - - name: Run Codex verifier - id: run-codex - uses: openai/codex-action@5c3f4ccdb2b8790f73d6b21751ac00e602aa0c02 - with: - openai-api-key: ${{ secrets.OPENAI_API_KEY }} - prompt: | - Verify default-branch health using codex-post-merge-evidence.md and - the repository's local guidance. Decide whether the latest main - state is healthy, unhealthy, or inconclusive. If unhealthy, propose - the smallest follow-up with acceptance criteria. - output-file: codex-post-merge-verify.md - safety-strategy: drop-sudo - sandbox: read-only - - - name: Publish verification report - if: ${{ steps.run-codex.outputs.final-message != '' }} - env: - GH_TOKEN: ${{ github.token }} - run: | - title="[codex] Post-merge verification" - if issue_number="$(gh issue list --state open --search "\"${title}\" in:title" --limit 1 --json number --jq '.[0].number // empty')" && [ -n "${issue_number}" ]; then - gh issue comment "${issue_number}" --body-file codex-post-merge-verify.md - else - gh issue create --title "${title}" --body-file codex-post-merge-verify.md - fi - - - name: Append report to summary - if: ${{ always() && hashFiles('codex-post-merge-verify.md') != '' }} - run: cat codex-post-merge-verify.md >> "${GITHUB_STEP_SUMMARY}" diff --git a/.github/workflow-templates/codex-pr-review.properties.json b/.github/workflow-templates/codex-pr-review.properties.json deleted file mode 100644 index a6fac6d..0000000 --- a/.github/workflow-templates/codex-pr-review.properties.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "name": "Codex pull request review", - "description": "Run OpenAI Codex on PRs with EvalOps review guidance and post the findings back to the thread.", - "iconName": "octicon code-review", - "categories": [ - "Automation", - "Code review", - "Continuous integration" - ] -} diff --git a/.github/workflow-templates/codex-pr-review.yml b/.github/workflow-templates/codex-pr-review.yml deleted file mode 100644 index 24200d6..0000000 --- a/.github/workflow-templates/codex-pr-review.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: Codex pull request review - -on: - pull_request: - types: [opened, synchronize, reopened, ready_for_review] - workflow_dispatch: - inputs: - pr_number: - description: "Pull request number to review" - required: false - type: string - -permissions: - contents: read - pull-requests: write - issues: write - -jobs: - codex-review: - if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }} - runs-on: ubuntu-latest - timeout-minutes: 30 - outputs: - final_message: ${{ steps.run-codex.outputs.final-message }} - steps: - - name: Determine pull request number - id: pr - env: - EVENT_NAME: ${{ github.event_name }} - EVENT_PR_NUMBER: ${{ github.event.pull_request.number }} - INPUT_PR_NUMBER: ${{ inputs.pr_number }} - run: | - if [ "${EVENT_NAME}" = "pull_request" ]; then - echo "number=${EVENT_PR_NUMBER}" >> "${GITHUB_OUTPUT}" - else - echo "number=${INPUT_PR_NUMBER}" >> "${GITHUB_OUTPUT}" - fi - - - uses: actions/checkout@v5 - with: - ref: ${{ steps.pr.outputs.number != '' && format('refs/pull/{0}/head', steps.pr.outputs.number) || github.ref }} - fetch-depth: 0 - - - name: Pre-fetch pull request refs - if: ${{ steps.pr.outputs.number != '' }} - env: - GH_TOKEN: ${{ github.token }} - PR_NUMBER: ${{ steps.pr.outputs.number }} - run: | - base_ref="$(gh pr view "${PR_NUMBER}" --json baseRefName --jq .baseRefName)" - git fetch --no-tags origin \ - "${base_ref}" \ - "+refs/pull/${PR_NUMBER}/head" - - - name: Run Codex review - id: run-codex - uses: openai/codex-action@5c3f4ccdb2b8790f73d6b21751ac00e602aa0c02 - with: - openai-api-key: ${{ secrets.OPENAI_API_KEY }} - prompt: | - Review this EvalOps pull request. Read applicable AGENTS.md files, - inspect the diff and live GitHub context, and focus on actionable - defects, missing tests, generated drift, CI risk, workflow label - churn, and distributed tracing regressions. Include file/line - references when possible. - output-file: codex-review.md - safety-strategy: drop-sudo - sandbox: read-only - - - name: Post Codex feedback - if: ${{ steps.run-codex.outputs.final-message != '' && steps.pr.outputs.number != '' }} - env: - GH_TOKEN: ${{ github.token }} - PR_NUMBER: ${{ steps.pr.outputs.number }} - run: gh pr comment "${PR_NUMBER}" --body-file codex-review.md diff --git a/.github/workflow-templates/codex-structured-pr-review.properties.json b/.github/workflow-templates/codex-structured-pr-review.properties.json deleted file mode 100644 index ecb27cc..0000000 --- a/.github/workflow-templates/codex-structured-pr-review.properties.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "name": "Codex structured pull request review", - "description": "Run Codex with a JSON schema and publish actionable findings as inline PR review comments.", - "iconName": "octicon code-review", - "categories": [ - "Automation", - "Code review", - "Continuous integration" - ] -} diff --git a/.github/workflow-templates/codex-structured-pr-review.yml b/.github/workflow-templates/codex-structured-pr-review.yml deleted file mode 100644 index 2f159f2..0000000 --- a/.github/workflow-templates/codex-structured-pr-review.yml +++ /dev/null @@ -1,142 +0,0 @@ -name: Codex structured pull request review - -on: - pull_request: - types: [opened, synchronize, reopened, ready_for_review] - workflow_dispatch: - inputs: - pr_number: - description: "Pull request number to review" - required: false - type: string - -permissions: - contents: read - pull-requests: write - issues: write - -concurrency: - group: codex-structured-review-${{ github.event_name == 'pull_request' && github.event.pull_request.number || inputs.pr_number || github.run_id }} - cancel-in-progress: true - -jobs: - codex-structured-review: - if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }} - runs-on: ubuntu-latest - timeout-minutes: 35 - steps: - - name: Determine pull request number - id: pr - env: - EVENT_NAME: ${{ github.event_name }} - EVENT_PR_NUMBER: ${{ github.event.pull_request.number }} - INPUT_PR_NUMBER: ${{ inputs.pr_number }} - run: | - set -euo pipefail - if [ "${EVENT_NAME}" = "pull_request" ]; then - number="${EVENT_PR_NUMBER}" - else - number="${INPUT_PR_NUMBER}" - fi - if [ -z "${number}" ]; then - echo "::error::pr_number is required for workflow_dispatch." - exit 1 - fi - echo "number=${number}" >> "${GITHUB_OUTPUT}" - - - uses: actions/checkout@v5 - with: - ref: refs/pull/${{ steps.pr.outputs.number }}/head - fetch-depth: 0 - - - name: Fetch pull request refs - id: refs - env: - GH_TOKEN: ${{ github.token }} - PR_NUMBER: ${{ steps.pr.outputs.number }} - run: | - set -euo pipefail - pr_json="$(gh pr view "${PR_NUMBER}" --json baseRefName,baseRefOid,headRefOid,headRefName,title,url)" - base_ref="$(jq -r .baseRefName <<<"${pr_json}")" - base_sha="$(jq -r .baseRefOid <<<"${pr_json}")" - head_sha="$(jq -r .headRefOid <<<"${pr_json}")" - head_ref="$(jq -r .headRefName <<<"${pr_json}")" - title="$(jq -r .title <<<"${pr_json}")" - url="$(jq -r .url <<<"${pr_json}")" - git fetch --no-tags origin \ - "${base_ref}" \ - "+refs/pull/${PR_NUMBER}/head" - { - echo "base_ref=${base_ref}" - echo "base_sha=${base_sha}" - echo "head_sha=${head_sha}" - echo "head_ref=${head_ref}" - echo "title=${title}" - echo "url=${url}" - } >> "${GITHUB_OUTPUT}" - - - name: Build Codex review prompt - env: - PR_NUMBER: ${{ steps.pr.outputs.number }} - PR_TITLE: ${{ steps.refs.outputs.title }} - PR_URL: ${{ steps.refs.outputs.url }} - BASE_SHA: ${{ steps.refs.outputs.base_sha }} - HEAD_SHA: ${{ steps.refs.outputs.head_sha }} - BASE_REF: ${{ steps.refs.outputs.base_ref }} - HEAD_REF: ${{ steps.refs.outputs.head_ref }} - run: | - set -euo pipefail - prompt_file="codex-structured-review-prompt.md" - cat .github/codex/prompts/structured-pr-review.md > "${prompt_file}" - { - echo - echo "Repository: ${GITHUB_REPOSITORY}" - echo "Pull request: #${PR_NUMBER} ${PR_TITLE}" - echo "Pull request URL: ${PR_URL}" - echo "Base: ${BASE_REF} ${BASE_SHA}" - echo "Head: ${HEAD_REF} ${HEAD_SHA}" - echo - echo "Changed files:" - git --no-pager diff --name-status "${BASE_SHA}" "${HEAD_SHA}" - echo - echo "Unified diff with five lines of context:" - git --no-pager diff --unified=5 "${BASE_SHA}" "${HEAD_SHA}" - } >> "${prompt_file}" - - - name: Run Codex structured review - id: run-codex - uses: openai/codex-action@5c3f4ccdb2b8790f73d6b21751ac00e602aa0c02 - with: - openai-api-key: ${{ secrets.OPENAI_API_KEY }} - prompt-file: codex-structured-review-prompt.md - output-schema-file: .github/codex/schemas/pr-review.schema.json - output-file: codex-structured-review.json - safety-strategy: drop-sudo - sandbox: read-only - model: ${{ vars.CODEX_REVIEW_MODEL || 'gpt-5.5' }} - effort: high - - - name: Publish structured review - if: ${{ always() && steps.run-codex.outputs.final-message != '' }} - env: - GH_TOKEN: ${{ github.token }} - PR_NUMBER: ${{ steps.pr.outputs.number }} - HEAD_SHA: ${{ steps.refs.outputs.head_sha }} - run: | - set -euo pipefail - ruby .github/scripts/publish-codex-structured-review.rb \ - --review-json codex-structured-review.json \ - --repo "${GITHUB_REPOSITORY}" \ - --pr "${PR_NUMBER}" \ - --commit "${HEAD_SHA}" - - - name: Upload Codex review artifact - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: codex-structured-review - path: | - codex-structured-review-prompt.md - codex-structured-review.json - if-no-files-found: ignore - retention-days: 7 diff --git a/.github/workflow-templates/pysa.properties.json b/.github/workflow-templates/pysa.properties.json deleted file mode 100644 index ea486d4..0000000 --- a/.github/workflow-templates/pysa.properties.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "name": "Pysa static analysis", - "description": "Run Pyre/Pysa taint analysis for Python repositories.", - "iconName": "octicon shield-check", - "categories": [ - "Security", - "Continuous integration" - ] -} diff --git a/.github/workflow-templates/pysa.yml b/.github/workflow-templates/pysa.yml deleted file mode 100644 index 7a7d49d..0000000 --- a/.github/workflow-templates/pysa.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: Pysa static analysis - -on: - pull_request: - paths: - - "**/*.py" - - "pyproject.toml" - - "requirements*.txt" - - ".pyre_configuration*" - - ".pysa/**" - - ".github/workflows/pysa.yml" - workflow_dispatch: - -permissions: - contents: read - -jobs: - pysa: - uses: evalops/.github/.github/workflows/pysa.yml@main diff --git a/.github/workflow-templates/review-thread-guard.properties.json b/.github/workflow-templates/review-thread-guard.properties.json deleted file mode 100644 index f694504..0000000 --- a/.github/workflow-templates/review-thread-guard.properties.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name": "Review thread guard", - "description": "Fail pull requests that still have unresolved high-priority review feedback.", - "iconName": "octicon stop", - "categories": ["Automation", "Code review"] -} diff --git a/.github/workflow-templates/review-thread-guard.yml b/.github/workflow-templates/review-thread-guard.yml deleted file mode 100644 index 019b35b..0000000 --- a/.github/workflow-templates/review-thread-guard.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Review thread guard - -on: - pull_request: - types: [opened, synchronize, reopened, ready_for_review] - pull_request_review: - types: [submitted, edited, dismissed] - pull_request_review_comment: - types: [created, edited, deleted] - -permissions: - contents: read - pull-requests: read - -jobs: - unresolved-review-threads: - if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }} - uses: evalops/.github/.github/workflows/review-thread-guard.yml@c02a97ba9b92c6b2ac837aab77dc3becb77f301c - with: - pr_number: ${{ github.event.pull_request.number }} - min_severity: high - guard_ref: c02a97ba9b92c6b2ac837aab77dc3becb77f301c - settle_seconds: "90" diff --git a/.github/workflows/agent-authorship-label.yml b/.github/workflows/agent-authorship-label.yml deleted file mode 100644 index 92ebe61..0000000 --- a/.github/workflows/agent-authorship-label.yml +++ /dev/null @@ -1,318 +0,0 @@ -name: agent-authorship-label - -on: - workflow_call: - inputs: - fail_on_incomplete: - description: "Fail when a Maestro-marked commit is missing one or more required Maestro trailers" - required: false - type: boolean - default: false - runner_label: - description: "Runner label used for the label job" - required: false - type: string - default: ubuntu-latest - helper_ref: - description: "evalops/.github ref used to checkout helper scripts" - required: false - type: string - default: main - -permissions: - contents: read - pull-requests: write - issues: write - -jobs: - label: - runs-on: ${{ inputs.runner_label }} - timeout-minutes: 10 - steps: - - name: Checkout org workflow helpers - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 - with: - repository: evalops/.github - ref: ${{ inputs.helper_ref }} - path: org-defaults - - - name: Resolve pull request - id: pr - shell: bash - run: | - set -euo pipefail - - number="$(jq -r '.pull_request.number // empty' "${GITHUB_EVENT_PATH}")" - if [ -z "${number}" ]; then - echo "::error::agent-authorship-label must run from a pull_request or pull_request_target event." - exit 1 - fi - - echo "number=${number}" >> "${GITHUB_OUTPUT}" - - - name: Fetch pull request commits - shell: bash - env: - GH_TOKEN: ${{ github.token }} - PR_NUMBER: ${{ steps.pr.outputs.number }} - run: | - set -euo pipefail - gh api --paginate "repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/commits" \ - --jq '.[] | {sha: .sha, message: .commit.message}' > commits.jsonl - - - name: Classify authorship - id: classify - shell: bash - run: | - set -euo pipefail - ruby org-defaults/.github/scripts/classify-agent-authorship.rb \ - --github-output "${GITHUB_OUTPUT}" \ - commits.jsonl - - - name: Ensure authorship labels exist - shell: bash - env: - GH_TOKEN: ${{ github.token }} - run: | - set -euo pipefail - - label_api_denied() { - local output_file="$1" - grep -qiE '(Bad credentials|HTTP 401|Resource not accessible|HTTP 403)' "${output_file}" - } - - warn_label_api_denied() { - local action="$1" - local output_file="$2" - echo "::warning::Skipping authorship label maintenance while ${action}; GitHub API denied label access." - cat "${output_file}" >&2 - } - - ensure_label() { - local name="$1" - local color="$2" - local description="$3" - local label_json - local lookup_stderr - local lookup_status=0 - lookup_stderr="$(mktemp)" - label_json="$(gh api "repos/${GITHUB_REPOSITORY}/labels/${name}" 2>"${lookup_stderr}")" || lookup_status=$? - - if [ "${lookup_status}" -eq 0 ]; then - rm -f "${lookup_stderr}" - local existing_color - local existing_description - existing_color="$(jq -r '.color // ""' <<<"${label_json}")" - existing_description="$(jq -r '.description // ""' <<<"${label_json}")" - - if [ "${existing_color}" != "${color}" ] || [ "${existing_description}" != "${description}" ]; then - local update_stderr - local update_status=0 - update_stderr="$(mktemp)" - gh api --method PATCH "repos/${GITHUB_REPOSITORY}/labels/${name}" \ - -f color="${color}" \ - -f description="${description}" >/dev/null 2>"${update_stderr}" || update_status=$? - if [ "${update_status}" -ne 0 ]; then - if label_api_denied "${update_stderr}"; then - warn_label_api_denied "updating ${name}" "${update_stderr}" - rm -f "${update_stderr}" - return 0 - fi - cat "${update_stderr}" >&2 - rm -f "${update_stderr}" - return "${update_status}" - fi - rm -f "${update_stderr}" - fi - return 0 - fi - - if label_api_denied "${lookup_stderr}"; then - warn_label_api_denied "looking up ${name}" "${lookup_stderr}" - rm -f "${lookup_stderr}" - return 0 - fi - - if ! grep -qiE '(not found|404)' "${lookup_stderr}" && ! jq -e '(.status | tostring) == "404" or .message == "Not Found"' <<<"${label_json}" >/dev/null 2>&1; then - cat "${lookup_stderr}" >&2 - if [ -n "${label_json}" ]; then - printf '%s\n' "${label_json}" >&2 - fi - rm -f "${lookup_stderr}" - return "${lookup_status}" - fi - rm -f "${lookup_stderr}" - - local output_file - local create_status=0 - output_file="$(mktemp)" - - gh api --method POST "repos/${GITHUB_REPOSITORY}/labels" \ - -f name="${name}" \ - -f color="${color}" \ - -f description="${description}" >"${output_file}" 2>&1 || create_status=$? - - if [ "${create_status}" -eq 0 ]; then - rm -f "${output_file}" - return 0 - fi - - if grep -qiE '(already.?exists|already_exists|Validation Failed)' "${output_file}"; then - echo "Label ${name} was created concurrently; updating metadata if needed." - rm -f "${output_file}" - create_status=0 - output_file="$(mktemp)" - gh api --method PATCH "repos/${GITHUB_REPOSITORY}/labels/${name}" \ - -f color="${color}" \ - -f description="${description}" >"${output_file}" 2>&1 || create_status=$? - if [ "${create_status}" -ne 0 ]; then - if label_api_denied "${output_file}"; then - warn_label_api_denied "updating concurrently-created ${name}" "${output_file}" - rm -f "${output_file}" - return 0 - fi - cat "${output_file}" >&2 - rm -f "${output_file}" - return "${create_status}" - fi - rm -f "${output_file}" - return 0 - fi - - if label_api_denied "${output_file}"; then - warn_label_api_denied "creating ${name}" "${output_file}" - rm -f "${output_file}" - return 0 - fi - - cat "${output_file}" >&2 - rm -f "${output_file}" - return "${create_status}" - } - - ensure_label "agent-authored" "6f42c1" "All PR commits carry explicit Maestro authorship trailers" - ensure_label "agent-assisted" "1d76db" "PR commits are assumed LLM-assisted but do not carry explicit Maestro trailers" - ensure_label "mixed-authorship" "fbca04" "Some PR commits carry explicit Maestro trailers and some are untrailered" - - - name: Apply authorship label - shell: bash - env: - GH_TOKEN: ${{ github.token }} - PR_NUMBER: ${{ steps.pr.outputs.number }} - AUTHORSHIP_LABEL: ${{ steps.classify.outputs.label }} - run: | - set -euo pipefail - - label_api_denied() { - local output_file="$1" - grep -qiE '(Bad credentials|HTTP 401|Resource not accessible|HTTP 403)' "${output_file}" - } - - skip_label_apply() { - local action="$1" - local output_file="$2" - echo "::warning::Skipping authorship label apply while ${action}; GitHub API denied label access." - cat "${output_file}" >&2 - exit 0 - } - - current_labels_file="$(mktemp)" - current_labels_stderr="$(mktemp)" - current_labels_status=0 - gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/labels" \ - --jq '.[].name' >"${current_labels_file}" 2>"${current_labels_stderr}" || current_labels_status=$? - if [ "${current_labels_status}" -ne 0 ]; then - if label_api_denied "${current_labels_stderr}"; then - skip_label_apply "listing labels on #${PR_NUMBER}" "${current_labels_stderr}" - fi - cat "${current_labels_stderr}" >&2 - rm -f "${current_labels_file}" "${current_labels_stderr}" - exit "${current_labels_status}" - fi - current_labels="$(cat "${current_labels_file}")" - rm -f "${current_labels_file}" "${current_labels_stderr}" - - has_label() { - local label="$1" - grep -Fxq "${label}" <<<"${current_labels}" - } - - authorship_labels=(agent-authored agent-assisted mixed-authorship human-authored) - conflicting_labels=() - has_desired_label=false - - for label in "${authorship_labels[@]}"; do - if ! has_label "${label}"; then - continue - fi - if [ "${label}" = "${AUTHORSHIP_LABEL}" ]; then - has_desired_label=true - else - conflicting_labels+=("${label}") - fi - done - - if [ "${has_desired_label}" = true ] && [ "${#conflicting_labels[@]}" -eq 0 ]; then - echo "Authorship label ${AUTHORSHIP_LABEL} is already current on #${PR_NUMBER}; no label changes needed." - exit 0 - fi - - delete_label_if_present() { - local label="$1" - local stderr_file - local status=0 - stderr_file="$(mktemp)" - - gh api --method DELETE \ - "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/labels/${label}" >/dev/null 2>"${stderr_file}" || status=$? - - if [ "${status}" -eq 0 ]; then - rm -f "${stderr_file}" - return 0 - fi - - if grep -qiE '(not found|404)' "${stderr_file}"; then - echo "Label ${label} was already absent; continuing." - rm -f "${stderr_file}" - return 0 - fi - - if label_api_denied "${stderr_file}"; then - skip_label_apply "removing ${label} from #${PR_NUMBER}" "${stderr_file}" - fi - - cat "${stderr_file}" >&2 - rm -f "${stderr_file}" - return "${status}" - } - - for label in "${conflicting_labels[@]}"; do - delete_label_if_present "${label}" - done - - if [ "${has_desired_label}" != true ]; then - add_stderr="$(mktemp)" - add_status=0 - gh api --method POST "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/labels" \ - -f "labels[]=${AUTHORSHIP_LABEL}" >/dev/null 2>"${add_stderr}" || add_status=$? - if [ "${add_status}" -ne 0 ]; then - if label_api_denied "${add_stderr}"; then - skip_label_apply "adding ${AUTHORSHIP_LABEL} to #${PR_NUMBER}" "${add_stderr}" - fi - cat "${add_stderr}" >&2 - rm -f "${add_stderr}" - exit "${add_status}" - fi - rm -f "${add_stderr}" - fi - - echo "Applied ${AUTHORSHIP_LABEL} to #${PR_NUMBER}." - - - name: Check required Maestro trailers - if: ${{ inputs.fail_on_incomplete && steps.classify.outputs.incomplete_agent_commits != '0' }} - shell: bash - env: - INCOMPLETE_AGENT_COMMITS: ${{ steps.classify.outputs.incomplete_agent_commits }} - run: | - echo "::error::${INCOMPLETE_AGENT_COMMITS} Maestro-marked commit(s) are missing required authorship trailers." - exit 1 diff --git a/.github/workflows/agent-mcp-config-rollout.yml b/.github/workflows/agent-mcp-config-rollout.yml deleted file mode 100644 index fd16337..0000000 --- a/.github/workflows/agent-mcp-config-rollout.yml +++ /dev/null @@ -1,107 +0,0 @@ -name: agent-mcp-config-rollout - -on: - pull_request: - paths: - - .github/agent-mcp/templates/** - - .github/scripts/sync-agent-mcp-config.rb - - .github/workflows/agent-mcp-config-rollout.yml - - test/sync_agent_mcp_config_test.rb - workflow_dispatch: - inputs: - target_repos: - description: "Comma-separated repos. Empty means all active public evalops repos." - required: false - default: "" - apply: - description: "Open rollout PRs" - required: false - type: boolean - default: false - -permissions: - contents: read - pull-requests: write - -jobs: - validate: - runs-on: ubuntu-latest - timeout-minutes: 10 - steps: - - uses: actions/checkout@v5 - - name: Validate templates against empty workspace - run: | - set -euo pipefail - tmp="$(mktemp -d)" - ruby .github/scripts/sync-agent-mcp-config.rb \ - --workspace "${tmp}" \ - --templates .github/agent-mcp/templates \ - --write \ - --json-output agent-mcp-config-report.json \ - --markdown-output agent-mcp-config-report.md - ruby .github/scripts/sync-agent-mcp-config.rb \ - --workspace "${tmp}" \ - --templates .github/agent-mcp/templates \ - --check \ - --json-output agent-mcp-config-check.json \ - --markdown-output agent-mcp-config-check.md - cat agent-mcp-config-report.md >> "${GITHUB_STEP_SUMMARY}" - - rollout: - if: ${{ github.event_name == 'workflow_dispatch' && inputs.apply == true }} - runs-on: ubuntu-latest - timeout-minutes: 45 - env: - GH_TOKEN: ${{ secrets.EVALOPS_MCP_ROLLOUT_TOKEN || secrets.EVALOPS_ORG_WRITE_TOKEN }} - TARGET_REPOS: ${{ inputs.target_repos }} - BRANCH_NAME: codex/evalops-agent-mcp-config - steps: - - uses: actions/checkout@v5 - - - name: Require rollout token - run: | - if [ -z "${GH_TOKEN}" ]; then - echo "::error::Set EVALOPS_MCP_ROLLOUT_TOKEN or EVALOPS_ORG_WRITE_TOKEN with cross-repo contents:write and pull-requests:write access." - exit 2 - fi - - - name: Open rollout PRs - run: | - set -euo pipefail - git config --global user.name "evalops-automation" - git config --global user.email "automation@evalops.dev" - - if [ -n "${TARGET_REPOS}" ]; then - repos="$(echo "${TARGET_REPOS}" | tr ',' '\n' | sed 's/^ *//; s/ *$//' | sed '/^$/d')" - else - repos="$(gh repo list evalops --limit 1000 --json nameWithOwner,isArchived,visibility --jq '.[] | select((.isArchived|not) and .visibility == "PUBLIC") | .nameWithOwner')" - fi - - while IFS= read -r repo; do - [ -n "${repo}" ] || continue - workdir="$(mktemp -d)" - echo "Rolling out EvalOps MCP config to ${repo}" - gh repo clone "${repo}" "${workdir}/repo" -- --depth=1 - cd "${workdir}/repo" - git switch -c "${BRANCH_NAME}" - ruby "${GITHUB_WORKSPACE}/.github/scripts/sync-agent-mcp-config.rb" \ - --workspace "${PWD}" \ - --templates "${GITHUB_WORKSPACE}/.github/agent-mcp/templates" \ - --write \ - --json-output "${workdir}/agent-mcp-config-report.json" \ - --markdown-output "${workdir}/agent-mcp-config-report.md" - if git diff --quiet; then - echo "${repo}: already in sync" - cd "${GITHUB_WORKSPACE}" - continue - fi - git add .mcp.json .codex/config.toml .cursor/mcp.json AGENTS.md .gitignore - git commit -m "chore: add EvalOps agent MCP config" - git push --force-with-lease origin "${BRANCH_NAME}" - gh pr create \ - --repo "${repo}" \ - --head "${BRANCH_NAME}" \ - --title "chore: add EvalOps agent MCP config" \ - --body-file "${workdir}/agent-mcp-config-report.md" || true - cd "${GITHUB_WORKSPACE}" - done <<< "${repos}" diff --git a/.github/workflows/archived-dependabot-audit.yml b/.github/workflows/archived-dependabot-audit.yml deleted file mode 100644 index dcb40cf..0000000 --- a/.github/workflows/archived-dependabot-audit.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: archived-dependabot-audit - -on: - schedule: - - cron: "18 15 * * 1" - workflow_dispatch: - inputs: - repos: - description: "Comma-separated archived repos. Empty means discover all archived evalops repos." - required: false - default: "" - -permissions: - contents: read - pull-requests: read - -jobs: - audit: - runs-on: ubuntu-latest - timeout-minutes: 15 - env: - GH_TOKEN: ${{ secrets.EVALOPS_ORG_READ_TOKEN || github.token }} - REPOS: ${{ inputs.repos || '' }} - steps: - - uses: actions/checkout@v5 - - - name: Audit archived Dependabot state - run: | - set -euo pipefail - args=(--owner evalops --json-output archived-dependabot-audit.json --markdown-output archived-dependabot-audit.md) - if [ -n "${REPOS}" ]; then - args+=(--repos "${REPOS}") - fi - ruby .github/scripts/audit-archived-dependabot.rb "${args[@]}" - cat archived-dependabot-audit.md >> "${GITHUB_STEP_SUMMARY}" - - - name: Upload audit - uses: actions/upload-artifact@v4 - with: - name: archived-dependabot-audit - path: | - archived-dependabot-audit.json - archived-dependabot-audit.md - if-no-files-found: error - retention-days: 30 diff --git a/.github/workflows/codex-rails-check.yml b/.github/workflows/codex-rails-check.yml deleted file mode 100644 index f277025..0000000 --- a/.github/workflows/codex-rails-check.yml +++ /dev/null @@ -1,267 +0,0 @@ -name: codex-rails-check - -on: - pull_request: - paths: - - "AGENTS.md" - - "**/AGENTS.md" - - "labels.yml" - - "README.md" - - ".github/CODEOWNERS" - - ".agents/skills/**" - - ".github/agent-mcp/**" - - ".github/actionlint.yaml" - - ".github/codex/hooks/**" - - ".github/codex/schemas/**" - - ".github/contracts/**" - - ".github/scripts/**" - - ".github/ISSUE_TEMPLATE/**" - - ".github/pull_request_template.md" - - ".github/workflows/**" - - ".github/workflow-templates/**" - - "profile/**" - - "services.yaml" - - "test/**" - workflow_call: - inputs: - require_agents: - description: "Fail when the repository does not have AGENTS.md" - required: false - type: boolean - default: false - runner_label: - description: "Runner label used for the validation job" - required: false - type: string - default: ubuntu-latest - -permissions: - contents: read - -jobs: - validate: - runs-on: ${{ inputs.runner_label || 'ubuntu-latest' }} - timeout-minutes: 10 - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 - - - name: Validate issue template YAML - shell: bash - run: | - set -euo pipefail - shopt -s globstar nullglob - files=(.github/ISSUE_TEMPLATE/*.yml .github/ISSUE_TEMPLATE/*.yaml) - if [ "${#files[@]}" -eq 0 ]; then - echo "No issue template YAML files found." - exit 0 - fi - ruby -e 'require "yaml"; ARGV.each { |f| YAML.load_file(f); puts "ok #{f}" }' "${files[@]}" - - - name: Validate workflow YAML - shell: bash - run: | - set -euo pipefail - shopt -s nullglob - files=(.github/workflows/*.yml .github/workflows/*.yaml .github/workflow-templates/*.yml .github/workflow-templates/*.yaml) - if [ "${#files[@]}" -eq 0 ]; then - echo "No workflow YAML files found." - exit 0 - fi - ruby -e 'require "yaml"; ARGV.each { |f| YAML.load_file(f); puts "ok #{f}" }' "${files[@]}" - - - name: Lint workflow semantics - shell: bash - run: | - set -euo pipefail - shopt -s nullglob - files=(.github/workflows/*.yml .github/workflows/*.yaml .github/workflow-templates/*.yml .github/workflow-templates/*.yaml) - if [ "${#files[@]}" -eq 0 ]; then - echo "No workflow YAML files found." - exit 0 - fi - if command -v actionlint >/dev/null 2>&1; then - actionlint -shellcheck= -pyflakes= "${files[@]}" - else - go run github.com/rhysd/actionlint/cmd/actionlint@v1.7.12 -shellcheck= -pyflakes= "${files[@]}" - fi - - - name: Reject prohibited code scanning actions - shell: bash - run: | - set -euo pipefail - shopt -s nullglob - files=(.github/workflows/*.yml .github/workflows/*.yaml .github/workflow-templates/*.yml .github/workflow-templates/*.yaml) - if [ "${#files[@]}" -eq 0 ]; then - echo "No workflow YAML files found." - exit 0 - fi - blocked_owner="github" - blocked_prefix="code" - blocked_suffix="ql-action" - blocked="${blocked_owner}/${blocked_prefix}${blocked_suffix}" - if grep -RInE "^[[:space:]]*(-[[:space:]]*)?uses:[[:space:]]*${blocked}([/@[:space:]]|$)" "${files[@]}"; then - echo "::error::EvalOps does not use GitHub default code scanning. Remove this workflow action and use bounded repo-owned checks instead." - exit 1 - fi - echo "ok: no prohibited GitHub default code-scanning action references" - - - name: Validate workflow template metadata - shell: bash - run: | - set -euo pipefail - shopt -s nullglob - files=(.github/workflow-templates/*.properties.json) - if [ "${#files[@]}" -eq 0 ]; then - echo "No workflow template metadata files found." - exit 0 - fi - ruby -e ' - require "json" - ARGV.each do |f| - data = JSON.parse(File.read(f)) - icon = data["iconName"].to_s - if !icon.empty? && !icon.match?(/\Aocticon [a-z0-9-]+\z/) && !File.exist?(".github/workflow-templates/#{icon}.svg") - warn "#{f}: iconName must be an octicon reference like \"octicon tag\" or a local SVG basename" - exit 1 - end - puts "ok #{f}" - end - ' "${files[@]}" - - - name: Validate Codex JSON schemas - shell: bash - run: | - set -euo pipefail - shopt -s nullglob - files=(.github/codex/schemas/*.json) - if [ "${#files[@]}" -eq 0 ]; then - echo "No Codex JSON schemas found." - exit 0 - fi - ruby -e 'require "json"; ARGV.each { |f| JSON.parse(File.read(f)); puts "ok #{f}" }' "${files[@]}" - - - name: Validate org control plane contract - shell: bash - run: | - set -euo pipefail - if [ ! -f .github/contracts/org-control-plane.yml ]; then - echo "No org control plane contract found." - exit 0 - fi - - ruby .github/scripts/verify-org-control-plane-contract.rb \ - --json-output org-control-plane-contract-report.json \ - --markdown-output org-control-plane-contract-report.md - cat org-control-plane-contract-report.md >> "${GITHUB_STEP_SUMMARY}" - - - name: Validate engineering practices contract - shell: bash - run: | - set -euo pipefail - if [ ! -f .github/contracts/engineering-practices.yml ]; then - echo "No engineering practices contract found." - exit 0 - fi - - ruby .github/scripts/audit-engineering-practices.rb \ - --contract-only \ - --json-output engineering-practices-contract-report.json \ - --markdown-output engineering-practices-contract-report.md - cat engineering-practices-contract-report.md >> "${GITHUB_STEP_SUMMARY}" - - - name: Validate canonical labels - shell: bash - run: | - set -euo pipefail - if [ ! -f labels.yml ]; then - echo "No labels.yml found." - exit 0 - fi - - ruby .github/scripts/sync-labels.rb --validate-only --labels labels.yml - - - name: Check AGENTS.md files - shell: bash - env: - REQUIRE_AGENTS: ${{ inputs.require_agents || false }} - run: | - set -euo pipefail - shopt -s globstar nullglob - if [ "${REQUIRE_AGENTS}" = "true" ] && [ ! -f AGENTS.md ]; then - echo "::error::AGENTS.md is required for this repository." - exit 1 - fi - - agents=(**/AGENTS.md) - if [ "${#agents[@]}" -eq 0 ]; then - echo "No AGENTS.md files found." - exit 0 - fi - - for agent in "${agents[@]}"; do - test -s "${agent}" || { echo "::error::${agent} is empty"; exit 1; } - echo "ok ${agent}" - done - - - name: Validate skill frontmatter - shell: bash - run: | - set -euo pipefail - shopt -s globstar nullglob - skills=(.agents/skills/**/SKILL.md) - if [ "${#skills[@]}" -eq 0 ]; then - echo "No repo skills found." - exit 0 - fi - - for skill in "${skills[@]}"; do - echo "checking ${skill}" - ruby -e ' - path = ARGV.fetch(0) - text = File.read(path) - unless text.start_with?("---\n") - warn "#{path}: missing YAML frontmatter" - exit 1 - end - _, yaml, = text.split(/^---\s*$/, 3) - require "yaml" - data = YAML.safe_load(yaml) - unless data.is_a?(Hash) && data["name"].to_s.match?(/\A[a-z0-9][a-z0-9_-]*\z/) - warn "#{path}: frontmatter must include kebab/snake-safe name" - exit 1 - end - unless data["description"].to_s.strip.length >= 20 - warn "#{path}: frontmatter must include a useful description" - exit 1 - end - ' "${skill}" - done - - - name: Validate service catalog - shell: bash - run: | - set -euo pipefail - if [ ! -f services.yaml ]; then - echo "No services.yaml found." - exit 0 - fi - - if [ ! -f .github/scripts/validate-services-catalog.rb ]; then - echo "::error::.github/scripts/validate-services-catalog.rb is required when services.yaml exists." - exit 1 - fi - - ruby .github/scripts/validate-services-catalog.rb services.yaml - - - name: Run repo Ruby tests - shell: bash - run: | - set -euo pipefail - shopt -s globstar nullglob - tests=(test/**/*_test.rb) - if [ "${#tests[@]}" -eq 0 ]; then - echo "No Ruby tests found." - exit 0 - fi - - ruby -Itest -e 'ARGV.each { |path| require "./#{path}" }' "${tests[@]}" diff --git a/.github/workflows/engineering-practices-audit.yml b/.github/workflows/engineering-practices-audit.yml deleted file mode 100644 index 2fe73d3..0000000 --- a/.github/workflows/engineering-practices-audit.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: Engineering Practices Audit - -on: - pull_request: - paths: - - ".github/contracts/engineering-practices.yml" - - ".github/scripts/audit-engineering-practices.rb" - - ".github/workflows/engineering-practices-audit.yml" - - "profile/ENGINEERING_PRACTICES.md" - - "test/audit_engineering_practices_test.rb" - schedule: - - cron: "31 16 * * 1" - workflow_dispatch: - inputs: - fail_on_findings: - description: "Fail the run when live practice drift is found" - required: false - default: "false" - -permissions: - contents: read - -jobs: - contract: - runs-on: ubuntu-latest - timeout-minutes: 10 - steps: - - uses: actions/checkout@v5 - - - name: Validate engineering practices contract - run: | - set -euo pipefail - ruby .github/scripts/audit-engineering-practices.rb \ - --contract-only \ - --json-output engineering-practices-contract.json \ - --markdown-output engineering-practices-contract.md - cat engineering-practices-contract.md >> "${GITHUB_STEP_SUMMARY}" - - - name: Upload contract report - uses: actions/upload-artifact@v4 - with: - name: engineering-practices-contract - path: | - engineering-practices-contract.json - engineering-practices-contract.md - if-no-files-found: error - retention-days: 30 - - live-audit: - if: ${{ github.event_name != 'pull_request' }} - runs-on: ubuntu-latest - timeout-minutes: 20 - env: - GH_TOKEN: ${{ secrets.EVALOPS_ORG_READ_TOKEN }} - FAIL_ON_FINDINGS: ${{ inputs.fail_on_findings || 'false' }} - steps: - - uses: actions/checkout@v5 - - - name: Require org read token - run: | - set -euo pipefail - if [ -z "${GH_TOKEN}" ]; then - echo "::error::Set secrets.EVALOPS_ORG_READ_TOKEN with org-wide repo, issue, and security read access before running the live engineering practices audit." - exit 2 - fi - - - name: Audit live engineering practice drift - run: | - set -euo pipefail - args=( - --json-output engineering-practices-audit.json - --markdown-output engineering-practices-audit.md - ) - if [ "${FAIL_ON_FINDINGS}" = "true" ]; then - args+=(--fail-on-findings) - fi - ruby .github/scripts/audit-engineering-practices.rb "${args[@]}" - cat engineering-practices-audit.md >> "${GITHUB_STEP_SUMMARY}" - - - name: Upload live audit report - uses: actions/upload-artifact@v4 - with: - name: engineering-practices-audit - path: | - engineering-practices-audit.json - engineering-practices-audit.md - if-no-files-found: error - retention-days: 30 diff --git a/.github/workflows/evalops-pr-lens-review.yml b/.github/workflows/evalops-pr-lens-review.yml deleted file mode 100644 index e36e6fd..0000000 --- a/.github/workflows/evalops-pr-lens-review.yml +++ /dev/null @@ -1,370 +0,0 @@ -name: EvalOps PR lens review - -on: - schedule: - - cron: "23 */2 * * *" - repository_dispatch: - types: [evalopsbot-review-requested] - workflow_dispatch: - inputs: - target_repos: - description: "Comma-separated repositories to sweep" - required: false - default: "evalops/platform,evalops/deploy,evalops/maestro-internal" - target_prs: - description: "Optional comma-separated repo#number filters, for example platform#2023,deploy#17" - required: false - default: "" - min_confidence: - description: "Minimum confidence for PR comment publication" - required: false - default: "0.82" - model: - description: "Model for lens reviewers" - required: false - default: "claude-opus-4-7" - provider: - description: "LLM provider for lens reviewers: anthropic or openai" - required: false - default: "anthropic" - max_diff_bytes: - description: "Maximum unified diff bytes sent to each lens reviewer" - required: false - default: "180000" - -permissions: - contents: read - -concurrency: - group: evalops-pr-lens-review-${{ github.event_name == 'repository_dispatch' && (github.event.client_payload.target_prs || github.event.client_payload.target_pr) || github.event_name == 'workflow_dispatch' && inputs.target_prs || 'all-open' }} - cancel-in-progress: false - -jobs: - discover: - runs-on: ubuntu-latest - timeout-minutes: 10 - outputs: - matrix: ${{ steps.discover.outputs.matrix }} - has_work: ${{ steps.discover.outputs.has_work }} - pr_count: ${{ steps.discover.outputs.pr_count }} - env: - GH_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN || secrets.EVALOPS_REVIEW_GUARD_TOKEN || secrets.EVALOPS_ORG_READ_TOKEN }} - TARGET_REPOS: ${{ github.event_name == 'repository_dispatch' && (github.event.client_payload.target_repos || github.event.client_payload.target_repo) || inputs.target_repos || 'evalops/platform,evalops/deploy,evalops/maestro-internal' }} - TARGET_PRS: ${{ github.event_name == 'repository_dispatch' && (github.event.client_payload.target_prs || github.event.client_payload.target_pr) || inputs.target_prs || '' }} - FORCE_EXPLICIT_REVIEW_LENSES: ${{ github.event_name == 'repository_dispatch' && 'true' || 'false' }} - EXPLICIT_REVIEW_LENSES: migration-safety,nats-contract-drift,argo-manifest-skew,iam-blast-radius,generated-sdk-delta,eval-regression-risk - PR_LENS_APP_REPOSITORIES: ".github,platform,deploy,maestro-internal,maestro,ensemble,diffscope,chat,cerebro" - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 - - - name: Configure GitHub App token - shell: bash - env: - APP_ID: ${{ secrets.EVALOPS_PR_LENS_APP_ID }} - APP_PRIVATE_KEY: ${{ secrets.EVALOPS_PR_LENS_APP_PRIVATE_KEY }} - APP_INSTALLATION_ID: ${{ secrets.EVALOPS_PR_LENS_APP_INSTALLATION_ID }} - run: | - set -euo pipefail - if [ -n "${APP_ID}" ] && [ -n "${APP_PRIVATE_KEY}" ]; then - token="$( - EVALOPS_PR_LENS_APP_ID="${APP_ID}" \ - EVALOPS_PR_LENS_APP_PRIVATE_KEY="${APP_PRIVATE_KEY}" \ - EVALOPS_PR_LENS_APP_INSTALLATION_ID="${APP_INSTALLATION_ID}" \ - ruby .github/scripts/evalops-pr-lens-review.rb mint-app-token \ - --owner evalops \ - --repositories "${PR_LENS_APP_REPOSITORIES}" - )" - echo "::add-mask::${token}" - echo "GH_TOKEN=${token}" >> "${GITHUB_ENV}" - fi - - - name: Require cross-repo GitHub token - shell: bash - run: | - set -euo pipefail - if [ -z "${GH_TOKEN}" ]; then - echo "::error::Set EVALOPS_ORG_READ_TOKEN or EVALOPS_PR_LENS_TOKEN with read access to platform, deploy, and maestro-internal." - exit 2 - fi - - - name: Discover open PRs - id: discover - shell: bash - run: | - set -euo pipefail - args=( - discover - --repos "${TARGET_REPOS}" - --target-prs "${TARGET_PRS}" - --github-output "${GITHUB_OUTPUT}" - --matrix-output pr-lens-matrix.json - --targets-output pr-lens-targets.json - ) - if [ "${FORCE_EXPLICIT_REVIEW_LENSES}" = "true" ] && [ -n "${TARGET_PRS}" ]; then - args+=(--force-lenses "${EXPLICIT_REVIEW_LENSES}") - fi - ruby .github/scripts/evalops-pr-lens-review.rb "${args[@]}" - - - name: Upload discovery ledger - uses: actions/upload-artifact@v4 - with: - name: pr-lens-discovery - path: | - pr-lens-matrix.json - pr-lens-targets.json - if-no-files-found: error - retention-days: 30 - - lens-review: - needs: discover - if: ${{ needs.discover.outputs.has_work == 'true' }} - runs-on: ubuntu-latest - timeout-minutes: 40 - strategy: - fail-fast: false - matrix: ${{ fromJson(needs.discover.outputs.matrix) }} - env: - GH_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN || secrets.EVALOPS_REVIEW_GUARD_TOKEN }} - REVIEW_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN || secrets.EVALOPS_REVIEW_GUARD_TOKEN }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY || secrets.EVALOPS_ANTHROPIC_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY || secrets.EVALOPS_OPENAI_API_KEY }} - PR_LENS_PROVIDER_OVERRIDE: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.provider || github.event_name == 'workflow_dispatch' && inputs.provider || '' }} - PR_LENS_MODEL_OVERRIDE: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.model || github.event_name == 'workflow_dispatch' && inputs.model || '' }} - PR_LENS_MAX_DIFF_BYTES_OVERRIDE: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.max_diff_bytes || github.event_name == 'workflow_dispatch' && inputs.max_diff_bytes || '' }} - PR_LENS_APP_REPOSITORIES: ".github,platform,deploy,maestro-internal,maestro,ensemble,diffscope,chat,cerebro" - RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - steps: - - name: Checkout org review helpers - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 - with: - path: org-defaults - - - name: Configure GitHub App token - shell: bash - env: - APP_ID: ${{ secrets.EVALOPS_PR_LENS_APP_ID }} - APP_PRIVATE_KEY: ${{ secrets.EVALOPS_PR_LENS_APP_PRIVATE_KEY }} - APP_INSTALLATION_ID: ${{ secrets.EVALOPS_PR_LENS_APP_INSTALLATION_ID }} - run: | - set -euo pipefail - if [ -n "${APP_ID}" ] && [ -n "${APP_PRIVATE_KEY}" ]; then - token="$( - EVALOPS_PR_LENS_APP_ID="${APP_ID}" \ - EVALOPS_PR_LENS_APP_PRIVATE_KEY="${APP_PRIVATE_KEY}" \ - EVALOPS_PR_LENS_APP_INSTALLATION_ID="${APP_INSTALLATION_ID}" \ - ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb mint-app-token \ - --owner evalops \ - --repositories "${PR_LENS_APP_REPOSITORIES}" - )" - echo "::add-mask::${token}" - echo "GH_TOKEN=${token}" >> "${GITHUB_ENV}" - echo "REVIEW_TOKEN=${token}" >> "${GITHUB_ENV}" - fi - - - name: Require cross-repo write token - shell: bash - run: | - set -euo pipefail - if [ -z "${REVIEW_TOKEN}" ]; then - echo "::error::Set EVALOPS_PR_LENS_TOKEN with read/write access to platform, deploy, and maestro-internal." - exit 2 - fi - - - name: Require model provider key - shell: bash - run: | - set -euo pipefail - if [ -z "${ANTHROPIC_API_KEY}" ] && [ -z "${OPENAI_API_KEY}" ]; then - echo "::error::Set ANTHROPIC_API_KEY/EVALOPS_ANTHROPIC_API_KEY or OPENAI_API_KEY/EVALOPS_OPENAI_API_KEY for PR lens review." - exit 2 - fi - - - name: Mark lens pending - shell: bash - run: | - set -euo pipefail - ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb post-status \ - --repo "${{ matrix.repo }}" \ - --sha "${{ matrix.head_sha }}" \ - --context "${{ matrix.check_context }}" \ - --state pending \ - --description "Running ${{ matrix.lens }} lens review" \ - --target-url "${RUN_URL}" - - - name: Prepare target pull request head - id: refs - shell: bash - env: - TARGET_REPO: ${{ matrix.repo }} - PR_NUMBER: ${{ matrix.pr }} - LENS: ${{ matrix.lens }} - SNAPSHOT_HEAD_SHA: ${{ matrix.head_sha }} - SNAPSHOT_BASE_SHA: ${{ matrix.base_sha }} - run: | - set -euo pipefail - ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb prepare-workspace \ - --repo "${TARGET_REPO}" \ - --pr "${PR_NUMBER}" \ - --lens "${LENS}" \ - --workspace target \ - --output lens-review.json \ - --github-output "${GITHUB_OUTPUT}" \ - --snapshot-head-sha "${SNAPSHOT_HEAD_SHA}" \ - --snapshot-base-sha "${SNAPSHOT_BASE_SHA}" - - - name: Run lens reviewer - if: ${{ steps.refs.outputs.skip != 'true' }} - shell: bash - env: - TARGET_REPO: ${{ matrix.repo }} - PR_NUMBER: ${{ matrix.pr }} - LENS: ${{ matrix.lens }} - BASE_SHA: ${{ steps.refs.outputs.base_sha }} - HEAD_SHA: ${{ steps.refs.outputs.head_sha }} - run: | - set -euo pipefail - args=( - run-lens - --repo "${TARGET_REPO}" \ - --pr "${PR_NUMBER}" \ - --lens "${LENS}" \ - --workspace target \ - --base-sha "${BASE_SHA}" \ - --head-sha "${HEAD_SHA}" \ - --output lens-review.json - --routing-config org-defaults/.github/pr-lens-routing.yml - ) - if [ -n "${PR_LENS_PROVIDER_OVERRIDE}" ]; then - args+=(--provider "${PR_LENS_PROVIDER_OVERRIDE}") - fi - if [ -n "${PR_LENS_MODEL_OVERRIDE}" ]; then - args+=(--model "${PR_LENS_MODEL_OVERRIDE}") - fi - if [ -n "${PR_LENS_MAX_DIFF_BYTES_OVERRIDE}" ]; then - args+=(--max-diff-bytes "${PR_LENS_MAX_DIFF_BYTES_OVERRIDE}") - fi - ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb "${args[@]}" - - - name: Complete lens status - if: ${{ success() && steps.refs.outputs.skip != 'true' }} - shell: bash - run: | - set -euo pipefail - description="$( - ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb \ - lens-status-description --review-json lens-review.json - )" - ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb post-status \ - --repo "${{ matrix.repo }}" \ - --sha "${{ steps.refs.outputs.head_sha }}" \ - --context "${{ matrix.check_context }}" \ - --state success \ - --description "${description}" \ - --target-url "${RUN_URL}" - - - name: Complete skipped lens status - if: ${{ steps.refs.outputs.skip == 'true' }} - shell: bash - run: | - set -euo pipefail - description="$( - ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb \ - lens-status-description --review-json lens-review.json - )" - ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb post-status \ - --repo "${{ matrix.repo }}" \ - --sha "${{ steps.refs.outputs.head_sha || matrix.head_sha }}" \ - --context "${{ matrix.check_context }}" \ - --state success \ - --description "${description}" \ - --target-url "${RUN_URL}" - - - name: Mark lens errored - if: ${{ failure() }} - shell: bash - run: | - ruby org-defaults/.github/scripts/evalops-pr-lens-review.rb post-status \ - --repo "${{ matrix.repo }}" \ - --sha "${{ steps.refs.outputs.head_sha || matrix.head_sha }}" \ - --context "${{ matrix.check_context }}" \ - --state error \ - --description "${{ matrix.lens }} lens review failed" \ - --target-url "${RUN_URL}" - - - name: Upload lens review - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: pr-lens-${{ matrix.repo_slug }}-${{ matrix.pr }}-${{ matrix.lens }} - path: lens-review.json - if-no-files-found: ignore - retention-days: 30 - - meta-review: - needs: - - discover - - lens-review - if: ${{ always() && needs.discover.outputs.has_work == 'true' }} - runs-on: ubuntu-latest - timeout-minutes: 10 - env: - GH_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN || secrets.EVALOPS_REVIEW_GUARD_TOKEN }} - PR_LENS_MIN_CONFIDENCE: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.min_confidence || inputs.min_confidence || '0.82' }} - PR_LENS_APP_REPOSITORIES: ".github,platform,deploy,maestro-internal,maestro,ensemble,diffscope,chat,cerebro" - RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - steps: - - name: Checkout org review helpers - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 - - - name: Configure GitHub App token - shell: bash - env: - APP_ID: ${{ secrets.EVALOPS_PR_LENS_APP_ID }} - APP_PRIVATE_KEY: ${{ secrets.EVALOPS_PR_LENS_APP_PRIVATE_KEY }} - APP_INSTALLATION_ID: ${{ secrets.EVALOPS_PR_LENS_APP_INSTALLATION_ID }} - run: | - set -euo pipefail - if [ -n "${APP_ID}" ] && [ -n "${APP_PRIVATE_KEY}" ]; then - token="$( - EVALOPS_PR_LENS_APP_ID="${APP_ID}" \ - EVALOPS_PR_LENS_APP_PRIVATE_KEY="${APP_PRIVATE_KEY}" \ - EVALOPS_PR_LENS_APP_INSTALLATION_ID="${APP_INSTALLATION_ID}" \ - ruby .github/scripts/evalops-pr-lens-review.rb mint-app-token \ - --owner evalops \ - --repositories "${PR_LENS_APP_REPOSITORIES}" - )" - echo "::add-mask::${token}" - echo "GH_TOKEN=${token}" >> "${GITHUB_ENV}" - fi - - - name: Download lens artifacts - uses: actions/download-artifact@v4 - with: - path: artifacts - - - name: Require cross-repo write token - shell: bash - run: | - set -euo pipefail - if [ -z "${GH_TOKEN}" ]; then - echo "::error::Set EVALOPS_PR_LENS_TOKEN with read/write access to platform, deploy, and maestro-internal." - exit 2 - fi - - - name: Publish high-confidence findings - shell: bash - run: | - set -euo pipefail - ruby .github/scripts/evalops-pr-lens-review.rb meta-review \ - --artifact-root artifacts \ - --min-confidence "${PR_LENS_MIN_CONFIDENCE}" \ - --output meta-review.json \ - --markdown-output "${GITHUB_STEP_SUMMARY}" - - - name: Upload meta review ledger - uses: actions/upload-artifact@v4 - with: - name: pr-lens-meta-review - path: meta-review.json - if-no-files-found: error - retention-days: 30 diff --git a/.github/workflows/evalopsbot-review-canary.yml b/.github/workflows/evalopsbot-review-canary.yml deleted file mode 100644 index 5df88de..0000000 --- a/.github/workflows/evalopsbot-review-canary.yml +++ /dev/null @@ -1,127 +0,0 @@ -name: EvalOpsBot review request canary - -on: - schedule: - - cron: "37 15 * * *" - workflow_dispatch: - inputs: - cleanup: - description: "Close the canary PR after the deep review is observed" - required: false - default: "true" - -permissions: - checks: read - contents: write - pull-requests: write - statuses: read - -concurrency: - group: evalopsbot-review-canary - cancel-in-progress: false - -jobs: - canary: - runs-on: ubuntu-latest - timeout-minutes: 20 - env: - GH_TOKEN: ${{ github.token }} - CANARY_BRANCH: evalopsbot-review-canary - CANARY_REPO: evalops/.github - CLEANUP: ${{ github.event_name == 'workflow_dispatch' && inputs.cleanup || 'true' }} - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 - with: - persist-credentials: false - - - name: Create canary review request - id: canary - shell: bash - run: | - set -euo pipefail - git config user.name "EvalOpsBot canary" - git config user.email "evalopsbot-canary@users.noreply.github.com" - gh auth setup-git - git remote set-url origin "https://x-access-token:${GH_TOKEN}@github.com/${CANARY_REPO}.git" - git fetch origin main --depth=1 - git checkout -B "${CANARY_BRANCH}" origin/main - mkdir -p .github/evalopsbot-canary - cat > .github/evalopsbot-canary/review-request.md </dev/null - head_sha="$(git rev-parse HEAD)" - { - echo "pr_number=${pr_number}" - echo "head_sha=${head_sha}" - } >> "${GITHUB_OUTPUT}" - - - name: Wait for deep review signal - shell: bash - env: - PR_NUMBER: ${{ steps.canary.outputs.pr_number }} - HEAD_SHA: ${{ steps.canary.outputs.head_sha }} - run: | - set -euo pipefail - ruby -rjson -ropen3 -e ' - repo = ENV.fetch("CANARY_REPO") - sha = ENV.fetch("HEAD_SHA") - context = "evalops-pr-lens/meta-review" - 12.times do |attempt| - status_json, status_err, status = Open3.capture3("gh", "api", "repos/#{repo}/commits/#{sha}/status") - check_json, check_err, check_status = Open3.capture3("gh", "api", "repos/#{repo}/commits/#{sha}/check-runs?check_name=#{context}&per_page=100") - statuses = status.success? ? JSON.parse(status_json).fetch("statuses", []) : [] - checks = check_status.success? ? JSON.parse(check_json).fetch("check_runs", []) : [] - status_match = statuses.find { |row| row["context"] == context && row["state"] != "pending" } - check_match = checks.find { |row| row["name"] == context && row["status"] == "completed" } - if status_match || check_match - puts "observed #{context} for #{repo}@#{sha}" - exit 0 - end - warn "attempt #{attempt + 1}/12: no completed #{context} yet" - sleep 45 - end - abort "timed out waiting for #{context} on #{repo}@#{sha}" - ' - - - name: Close canary PR - if: ${{ always() && env.CLEANUP == 'true' && steps.canary.outputs.pr_number != '' }} - shell: bash - env: - PR_NUMBER: ${{ steps.canary.outputs.pr_number }} - run: | - set -euo pipefail - gh pr close "${PR_NUMBER}" \ - --repo "${CANARY_REPO}" \ - --comment "EvalOpsBot requested-review canary complete for ${GITHUB_RUN_ID}." \ - --delete-branch || true diff --git a/.github/workflows/evalopsbot-review-request-dispatch.yml b/.github/workflows/evalopsbot-review-request-dispatch.yml deleted file mode 100644 index d0de6e4..0000000 --- a/.github/workflows/evalopsbot-review-request-dispatch.yml +++ /dev/null @@ -1,92 +0,0 @@ -name: EvalOpsBot review request dispatch - -on: - schedule: - - cron: "17 * * * *" - workflow_dispatch: - inputs: - reviewer: - description: "GitHub login that should trigger deep review" - required: false - default: "EvalOpsBot" - dry_run: - description: "Discover matching PRs without dispatching deep review" - required: false - default: "false" - -permissions: - contents: read - -concurrency: - group: evalopsbot-review-request-dispatch - cancel-in-progress: false - -jobs: - dispatch: - runs-on: ubuntu-latest - timeout-minutes: 10 - env: - GH_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN || secrets.EVALOPS_REVIEW_GUARD_TOKEN }} - REVIEWER: ${{ github.event_name == 'workflow_dispatch' && inputs.reviewer || 'EvalOpsBot' }} - DRY_RUN: ${{ github.event_name == 'workflow_dispatch' && inputs.dry_run || 'false' }} - RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - PR_LENS_APP_REPOSITORIES: ".github,platform,deploy,maestro-internal,maestro,ensemble,diffscope,chat,cerebro" - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 - - - name: Configure GitHub App token - shell: bash - env: - APP_ID: ${{ secrets.EVALOPS_PR_LENS_APP_ID }} - APP_PRIVATE_KEY: ${{ secrets.EVALOPS_PR_LENS_APP_PRIVATE_KEY }} - APP_INSTALLATION_ID: ${{ secrets.EVALOPS_PR_LENS_APP_INSTALLATION_ID }} - run: | - set -euo pipefail - if [ -n "${APP_ID}" ] && [ -n "${APP_PRIVATE_KEY}" ]; then - token="$( - EVALOPS_PR_LENS_APP_ID="${APP_ID}" \ - EVALOPS_PR_LENS_APP_PRIVATE_KEY="${APP_PRIVATE_KEY}" \ - EVALOPS_PR_LENS_APP_INSTALLATION_ID="${APP_INSTALLATION_ID}" \ - ruby .github/scripts/evalops-pr-lens-review.rb mint-app-token \ - --owner evalops \ - --repositories "${PR_LENS_APP_REPOSITORIES}" - )" - echo "::add-mask::${token}" - echo "GH_TOKEN=${token}" >> "${GITHUB_ENV}" - fi - - - name: Require cross-repo review token - shell: bash - run: | - set -euo pipefail - if [ -z "${GH_TOKEN}" ]; then - echo "::error::Set EVALOPS_PR_LENS_TOKEN with org PR search, status write, and repository_dispatch access." - exit 2 - fi - - - name: Dispatch requested reviews - id: dispatch - shell: bash - run: | - set -euo pipefail - args=( - dispatch-review-requests - --owner evalops - --reviewer "${REVIEWER}" - --limit 100 - --target-url "${RUN_URL}" - --output evalopsbot-review-request-dispatch.json - --github-output "${GITHUB_OUTPUT}" - ) - if [ "${DRY_RUN}" = "true" ]; then - args+=(--dry-run) - fi - ruby .github/scripts/evalops-pr-lens-review.rb "${args[@]}" - - - name: Upload dispatch ledger - uses: actions/upload-artifact@v4 - with: - name: evalopsbot-review-request-dispatch - path: evalopsbot-review-request-dispatch.json - if-no-files-found: error - retention-days: 30 diff --git a/.github/workflows/evalopsbot-review-request.yml b/.github/workflows/evalopsbot-review-request.yml deleted file mode 100644 index 0c207f4..0000000 --- a/.github/workflows/evalopsbot-review-request.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: EvalOpsBot requested review - -on: - pull_request_target: - types: [review_requested] - -permissions: - contents: read - -jobs: - dispatch: - if: ${{ github.event.requested_reviewer.login == 'EvalOpsBot' }} - runs-on: ubuntu-latest - timeout-minutes: 5 - env: - GH_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN }} - TARGET_REPO: ${{ github.repository }} - TARGET_PR_NUMBER: ${{ github.event.pull_request.number }} - HEAD_SHA: ${{ github.event.pull_request.head.sha }} - REQUESTED_REVIEWER: ${{ github.event.requested_reviewer.login }} - RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - steps: - - name: Skip when dispatch token is unavailable - if: ${{ env.GH_TOKEN == '' }} - run: | - echo "EVALOPS_PR_LENS_TOKEN is unavailable; scheduled dispatcher remains the fallback." - - - name: Dispatch deep review - if: ${{ env.GH_TOKEN != '' }} - shell: bash - run: | - set -euo pipefail - dispatch_payload="$( - jq -n \ - --arg target_repo "${TARGET_REPO}" \ - --arg target_pr "${TARGET_REPO}#${TARGET_PR_NUMBER}" \ - --arg requested_reviewer "${REQUESTED_REVIEWER}" \ - --arg source "repo-review-request-workflow" \ - --arg requester "${GITHUB_ACTOR}" \ - '{ - event_type: "evalopsbot-review-requested", - client_payload: { - target_repo: $target_repo, - target_pr: $target_pr, - requested_reviewer: $requested_reviewer, - source: $source, - requester: $requester - } - }' - )" - gh api --method POST repos/evalops/.github/dispatches --input - <<<"${dispatch_payload}" - - - name: Mark deep review queued - if: ${{ env.GH_TOKEN != '' }} - shell: bash - run: | - set -euo pipefail - status_payload="$( - jq -n \ - --arg state "pending" \ - --arg context "evalops-pr-lens/meta-review" \ - --arg description "Queued EvalOpsBot requested deep review" \ - --arg target_url "${RUN_URL}" \ - '{ - state: $state, - context: $context, - description: $description, - target_url: $target_url - }' - )" - gh api --method POST "repos/${TARGET_REPO}/statuses/${HEAD_SHA}" --input - <<<"${status_payload}" diff --git a/.github/workflows/evalopsbot-review-setup-audit.yml b/.github/workflows/evalopsbot-review-setup-audit.yml deleted file mode 100644 index ac4105b..0000000 --- a/.github/workflows/evalopsbot-review-setup-audit.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: EvalOpsBot review setup audit - -on: - schedule: - - cron: "19 16 * * 1" - workflow_dispatch: - -permissions: - contents: read - -concurrency: - group: evalopsbot-review-setup-audit - cancel-in-progress: false - -jobs: - audit: - runs-on: ubuntu-latest - timeout-minutes: 10 - env: - GH_TOKEN: ${{ secrets.EVALOPS_PR_LENS_TOKEN || secrets.EVALOPS_REVIEW_GUARD_TOKEN || secrets.EVALOPS_ORG_READ_TOKEN }} - PR_LENS_APP_REPOSITORIES: ".github,platform,deploy,maestro-internal,maestro,ensemble,diffscope,chat,cerebro" - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 - - - name: Configure GitHub App token - shell: bash - env: - APP_ID: ${{ secrets.EVALOPS_PR_LENS_APP_ID }} - APP_PRIVATE_KEY: ${{ secrets.EVALOPS_PR_LENS_APP_PRIVATE_KEY }} - APP_INSTALLATION_ID: ${{ secrets.EVALOPS_PR_LENS_APP_INSTALLATION_ID }} - run: | - set -euo pipefail - if [ -n "${APP_ID}" ] && [ -n "${APP_PRIVATE_KEY}" ]; then - token="$( - EVALOPS_PR_LENS_APP_ID="${APP_ID}" \ - EVALOPS_PR_LENS_APP_PRIVATE_KEY="${APP_PRIVATE_KEY}" \ - EVALOPS_PR_LENS_APP_INSTALLATION_ID="${APP_INSTALLATION_ID}" \ - ruby .github/scripts/evalops-pr-lens-review.rb mint-app-token \ - --owner evalops \ - --repositories "${PR_LENS_APP_REPOSITORIES}" - )" - echo "::add-mask::${token}" - echo "GH_TOKEN=${token}" >> "${GITHUB_ENV}" - fi - - - name: Verify review request setup - shell: bash - run: | - set -euo pipefail - ruby .github/scripts/verify-evalopsbot-review-setup.rb \ - --output evalopsbot-review-setup-audit.json \ - --markdown-output "${GITHUB_STEP_SUMMARY}" - - - name: Upload audit ledger - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: evalopsbot-review-setup-audit - path: evalopsbot-review-setup-audit.json - if-no-files-found: error - retention-days: 30 diff --git a/.github/workflows/positioning-guardrail.yml b/.github/workflows/positioning-guardrail.yml deleted file mode 100644 index a1b0e5f..0000000 --- a/.github/workflows/positioning-guardrail.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: positioning-guardrail - -on: - push: - branches: [main] - paths: - - "profile/README.md" - - "README.md" - - "AGENTS.md" - - "SECURITY.md" - - "scripts/check-positioning.mjs" - - ".github/workflows/positioning-guardrail.yml" - pull_request: - paths: - - "profile/README.md" - - "README.md" - - "AGENTS.md" - - "SECURITY.md" - - "scripts/check-positioning.mjs" - - ".github/workflows/positioning-guardrail.yml" - -jobs: - positioning: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-node@v4 - with: - node-version: "20" - - run: node scripts/check-positioning.mjs diff --git a/.github/workflows/pysa.yml b/.github/workflows/pysa.yml deleted file mode 100644 index c6510ab..0000000 --- a/.github/workflows/pysa.yml +++ /dev/null @@ -1,131 +0,0 @@ -name: pysa - -on: - workflow_call: - inputs: - python_version: - description: "Python version used to install and run Pyre/Pysa" - required: false - type: string - default: "3.12" - runner_label: - description: "Runner label used for Pysa" - required: false - type: string - default: ubuntu-latest - working_directory: - description: "Repository-relative directory to analyze" - required: false - type: string - default: "." - source_directories: - description: "Comma-separated Python source directories for generated Pyre config" - required: false - type: string - default: "." - taint_models_path: - description: "Optional Pysa taint-model directory" - required: false - type: string - default: ".pysa" - requirements_file: - description: "Optional requirements file to install before analysis" - required: false - type: string - default: "" - setup_command: - description: "Optional shell command to install repo-specific dependencies" - required: false - type: string - default: "" - pyre_package: - description: "pip package spec for Pyre/Pysa" - required: false - type: string - default: "pyre-check" - -permissions: - contents: read - -jobs: - analyze: - runs-on: ${{ inputs.runner_label }} - timeout-minutes: 25 - defaults: - run: - working-directory: ${{ inputs.working_directory }} - env: - PYRE_PACKAGE: ${{ inputs.pyre_package }} - REQUIREMENTS_FILE: ${{ inputs.requirements_file }} - SETUP_COMMAND: ${{ inputs.setup_command }} - SOURCE_DIRECTORIES: ${{ inputs.source_directories }} - TAINT_MODELS_PATH: ${{ inputs.taint_models_path }} - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 - - - uses: actions/setup-python@v5 - with: - python-version: ${{ inputs.python_version }} - - - name: Install dependencies - shell: bash - run: | - set -euo pipefail - python -m pip install --upgrade pip - if [ -n "${REQUIREMENTS_FILE}" ]; then - python -m pip install -r "${REQUIREMENTS_FILE}" - fi - if [ -n "${SETUP_COMMAND}" ]; then - bash -lc "${SETUP_COMMAND}" - fi - python -m pip install "${PYRE_PACKAGE}" - - - name: Ensure Pyre configuration - shell: bash - run: | - set -euo pipefail - if [ -f ".pyre_configuration" ]; then - echo "using repository Pyre configuration" - exit 0 - fi - python - <<'PY' - import json - import os - - sources = [ - item.strip() - for item in os.environ["SOURCE_DIRECTORIES"].split(",") - if item.strip() - ] - if not sources: - sources = ["."] - with open(".pyre_configuration", "w", encoding="utf-8") as fh: - json.dump({"source_directories": sources}, fh, indent=2) - fh.write("\n") - PY - - - name: Run Pysa - shell: bash - run: | - set -euo pipefail - mkdir -p pysa-results - args=(analyze --save-results-to pysa-results) - if [ -n "${TAINT_MODELS_PATH}" ]; then - if [ -d "${TAINT_MODELS_PATH}" ]; then - args+=(--taint-models-path "${TAINT_MODELS_PATH}") - else - echo "::warning::Pysa taint model path '${TAINT_MODELS_PATH}' does not exist; running with bundled models only." - fi - fi - pyre "${args[@]}" | tee pysa-output.txt - - - name: Upload Pysa output - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: pysa-output - path: | - ${{ inputs.working_directory }}/pysa-output.txt - ${{ inputs.working_directory }}/pysa-results/** - if-no-files-found: ignore - retention-days: 7 diff --git a/.github/workflows/review-feedback-backfill.yml b/.github/workflows/review-feedback-backfill.yml deleted file mode 100644 index 3e8411b..0000000 --- a/.github/workflows/review-feedback-backfill.yml +++ /dev/null @@ -1,97 +0,0 @@ -name: Review feedback 30-day backfill - -on: - schedule: - - cron: "43 11 * * 0" - workflow_dispatch: - inputs: - since_hours: - description: "Merged PR lookback window" - required: false - default: "720" - min_severity: - description: "Minimum severity to report" - required: false - default: "high" - pr_limit: - description: "Maximum merged PRs to inspect" - required: false - default: "250" - -permissions: - contents: read - issues: write - pull-requests: read - -jobs: - backfill: - runs-on: ubuntu-latest - timeout-minutes: 60 - env: - GH_TOKEN: ${{ secrets.EVALOPS_REVIEW_GUARD_TOKEN || secrets.EVALOPS_ORG_READ_TOKEN }} - SINCE_HOURS: ${{ inputs.since_hours || '720' }} - MIN_SEVERITY: ${{ inputs.min_severity || 'high' }} - PR_LIMIT: ${{ inputs.pr_limit || '250' }} - steps: - - uses: actions/checkout@v5 - - - name: Require org review guard token - run: | - if [ -z "${GH_TOKEN}" ]; then - echo "::error::Set secrets.EVALOPS_REVIEW_GUARD_TOKEN or secrets.EVALOPS_ORG_READ_TOKEN with org-wide PR read access before running the 30-day feedback backfill." - exit 2 - fi - - - name: Run review feedback backfill - id: backfill - run: | - set +e - ruby .github/scripts/sweep-recent-review-feedback.rb \ - --owner evalops \ - --since-hours "${SINCE_HOURS}" \ - --pr-limit "${PR_LIMIT}" \ - --min-severity "${MIN_SEVERITY}" \ - --progress \ - --json-output review-feedback-30d-ledger.json \ - --guardrail-backlog-output review-feedback-30d-guardrail-backlog.md \ - --guardrail-backlog-json-output review-feedback-30d-guardrail-backlog.json \ - --guardrail-lifecycle-json-output review-feedback-30d-guardrail-lifecycle.json \ - --weekly-report-issue-repo evalops/.github \ - > review-feedback-30d-summary.md - status=$? - cat review-feedback-30d-summary.md >> "${GITHUB_STEP_SUMMARY}" - { - echo "" - cat review-feedback-30d-guardrail-backlog.md - } >> "${GITHUB_STEP_SUMMARY}" - if [ "${status}" -eq 0 ]; then - exit 0 - fi - if [ "${status}" -eq 1 ]; then - echo "::notice::30-day review feedback backfill found candidate guardrail classes" - exit 0 - fi - exit "${status}" - - - name: Upload review feedback 30-day ledger - if: ${{ always() && steps.backfill.outcome != 'skipped' }} - uses: actions/upload-artifact@v4 - with: - name: review-feedback-30d-ledger - path: | - review-feedback-30d-summary.md - review-feedback-30d-ledger.json - if-no-files-found: error - retention-days: 90 - - - name: Upload review feedback 30-day guardrail backlog - if: ${{ always() && steps.backfill.outcome != 'skipped' }} - uses: actions/upload-artifact@v4 - with: - name: review-feedback-30d-guardrail-backlog - path: | - review-feedback-30d-guardrail-backlog.md - review-feedback-30d-guardrail-backlog.json - review-feedback-30d-guardrail-lifecycle.json - if-no-files-found: error - retention-days: 90 diff --git a/.github/workflows/review-feedback-sentinel.yml b/.github/workflows/review-feedback-sentinel.yml deleted file mode 100644 index 6068735..0000000 --- a/.github/workflows/review-feedback-sentinel.yml +++ /dev/null @@ -1,96 +0,0 @@ -name: Review feedback sentinel - -on: - schedule: - - cron: "17 */6 * * *" - workflow_dispatch: - inputs: - since_hours: - description: "Merged PR lookback window" - required: false - default: "72" - min_severity: - description: "Minimum severity to report" - required: false - default: "high" - pr_limit: - description: "Maximum merged PRs to inspect" - required: false - default: "100" - -permissions: - contents: read - issues: write - pull-requests: read - -jobs: - sweep: - runs-on: ubuntu-latest - timeout-minutes: 15 - env: - GH_TOKEN: ${{ secrets.EVALOPS_REVIEW_GUARD_TOKEN || secrets.EVALOPS_ORG_READ_TOKEN }} - SINCE_HOURS: ${{ inputs.since_hours || '72' }} - MIN_SEVERITY: ${{ inputs.min_severity || 'high' }} - PR_LIMIT: ${{ inputs.pr_limit || '100' }} - steps: - - uses: actions/checkout@v5 - - - name: Require org review guard token - run: | - if [ -z "${GH_TOKEN}" ]; then - echo "::error::Set secrets.EVALOPS_REVIEW_GUARD_TOKEN or secrets.EVALOPS_ORG_READ_TOKEN with org-wide PR read access before running the review feedback sentinel." - exit 2 - fi - - - name: Sweep recent merged PR feedback - id: sweep - run: | - set +e - ruby .github/scripts/sweep-recent-review-feedback.rb \ - --owner evalops \ - --since-hours "${SINCE_HOURS}" \ - --pr-limit "${PR_LIMIT}" \ - --min-severity "${MIN_SEVERITY}" \ - --issue-repo evalops/.github \ - --json-output review-feedback-ledger.json \ - --guardrail-backlog-output review-feedback-guardrail-backlog.md \ - --guardrail-backlog-json-output review-feedback-guardrail-backlog.json \ - --guardrail-issue-repo evalops/.github \ - --guardrail-repo-issues \ - --guardrail-lifecycle-json-output review-feedback-guardrail-lifecycle.json \ - > review-feedback-sentinel.md - status=$? - cat review-feedback-sentinel.md >> "${GITHUB_STEP_SUMMARY}" - { - echo "" - cat review-feedback-guardrail-backlog.md - } >> "${GITHUB_STEP_SUMMARY}" - if [ "${status}" -eq 0 ]; then - exit 0 - fi - if [ "${status}" -eq 1 ]; then - echo "::warning::recent unresolved review feedback found; issue was updated" - exit 0 - fi - exit "${status}" - - - name: Upload review feedback ledger - if: ${{ always() && steps.sweep.outcome != 'skipped' }} - uses: actions/upload-artifact@v4 - with: - name: review-feedback-ledger - path: review-feedback-ledger.json - if-no-files-found: error - retention-days: 30 - - - name: Upload review feedback guardrail backlog - if: ${{ always() && steps.sweep.outcome != 'skipped' }} - uses: actions/upload-artifact@v4 - with: - name: review-feedback-guardrail-backlog - path: | - review-feedback-guardrail-backlog.md - review-feedback-guardrail-backlog.json - review-feedback-guardrail-lifecycle.json - if-no-files-found: error - retention-days: 30 diff --git a/.github/workflows/review-thread-guard.yml b/.github/workflows/review-thread-guard.yml deleted file mode 100644 index 892ffc7..0000000 --- a/.github/workflows/review-thread-guard.yml +++ /dev/null @@ -1,99 +0,0 @@ -name: review-thread-guard - -on: - workflow_call: - inputs: - pr_number: - description: "Pull request number to inspect" - required: true - type: string - min_severity: - description: "Minimum severity to block on" - required: false - default: "high" - type: string - runner_label: - description: "Runner label used for the validation job" - required: false - type: string - default: ubuntu-latest - guard_ref: - description: "evalops/.github ref to checkout for guard scripts" - required: false - type: string - settle_seconds: - description: "Seconds to wait before checking review feedback so bot reviews can finish before auto-merge" - required: false - type: string - default: "90" - workflow_dispatch: - inputs: - repo: - description: "Repository to inspect, as owner/name" - required: false - type: string - pr_number: - description: "Pull request number to inspect" - required: true - type: string - min_severity: - description: "Minimum severity to block on" - required: false - default: "high" - type: choice - options: - - high - - p1 - - p0 - guard_ref: - description: "evalops/.github ref to checkout for guard scripts" - required: false - type: string - settle_seconds: - description: "Seconds to wait before checking review feedback so bot reviews can finish before auto-merge" - required: false - type: string - default: "90" - -permissions: - contents: read - pull-requests: read - -jobs: - unresolved-review-threads: - runs-on: ${{ inputs.runner_label || 'ubuntu-latest' }} - timeout-minutes: 5 - steps: - - uses: actions/checkout@v5 - with: - repository: evalops/.github - ref: ${{ inputs.guard_ref || github.workflow_sha }} - path: evalops-github - - - name: Let asynchronous review bots settle - env: - SETTLE_SECONDS: ${{ inputs.settle_seconds || '90' }} - run: | - set -euo pipefail - case "${SETTLE_SECONDS}" in - ''|*[!0-9]*) - echo "::error::settle_seconds must be a non-negative integer" - exit 2 - ;; - esac - if [ "${SETTLE_SECONDS}" -gt 0 ]; then - echo "Waiting ${SETTLE_SECONDS}s before checking review feedback." - sleep "${SETTLE_SECONDS}" - fi - - - name: Fail on unresolved high-priority review threads - env: - GH_TOKEN: ${{ github.token }} - TARGET_REPO: ${{ inputs.repo || github.repository }} - PR_NUMBER: ${{ inputs.pr_number }} - MIN_SEVERITY: ${{ inputs.min_severity || 'high' }} - run: | - ruby evalops-github/.github/scripts/check-pr-review-threads.rb \ - --repo "${TARGET_REPO}" \ - --pr "${PR_NUMBER}" \ - --min-severity "${MIN_SEVERITY}" diff --git a/.github/workflows/sync-labels.yml b/.github/workflows/sync-labels.yml deleted file mode 100644 index 0dcb324..0000000 --- a/.github/workflows/sync-labels.yml +++ /dev/null @@ -1,92 +0,0 @@ -name: sync-labels - -on: - pull_request: - paths: - - labels.yml - - .github/scripts/sync-labels.rb - - .github/workflows/sync-labels.yml - - test/sync_labels_test.rb - push: - branches: - - main - paths: - - labels.yml - - .github/scripts/sync-labels.rb - - .github/workflows/sync-labels.yml - schedule: - - cron: "41 9 * * 1" - workflow_dispatch: - inputs: - target_repos: - description: "Comma-separated repos, defaults to all active evalops repos" - required: false - default: "" - apply: - description: "Apply changes instead of dry-run" - required: false - type: boolean - default: false - -permissions: - contents: read - issues: write - pull-requests: write - -jobs: - sync: - runs-on: ubuntu-latest - timeout-minutes: 20 - env: - GH_TOKEN: ${{ secrets.EVALOPS_LABEL_SYNC_TOKEN || secrets.EVALOPS_ORG_WRITE_TOKEN || github.token }} - ORG_WRITE_TOKEN_PRESENT: ${{ secrets.EVALOPS_LABEL_SYNC_TOKEN != '' || secrets.EVALOPS_ORG_WRITE_TOKEN != '' }} - TARGET_REPOS: ${{ inputs.target_repos || '' }} - REQUESTED_APPLY: ${{ inputs.apply || false }} - steps: - - uses: actions/checkout@v5 - - - name: Validate canonical labels - run: ruby .github/scripts/sync-labels.rb --validate-only --labels labels.yml - - - name: Plan or apply label sync - env: - IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }} - run: | - set -euo pipefail - apply=false - if [ "${IS_PULL_REQUEST}" != "true" ]; then - apply=true - fi - if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ "${REQUESTED_APPLY}" != "true" ]; then - apply=false - fi - if [ "${apply}" = "true" ] && [ "${ORG_WRITE_TOKEN_PRESENT}" != "true" ]; then - echo "::error::Set EVALOPS_LABEL_SYNC_TOKEN or EVALOPS_ORG_WRITE_TOKEN with org-wide issues:write access before applying labels." - exit 2 - fi - - args=(--owner evalops --json-output label-sync-report.json --markdown-output label-sync-report.md) - if [ -n "${TARGET_REPOS}" ]; then - args+=(--repos "${TARGET_REPOS}") - fi - if [ "${apply}" = "true" ]; then - args+=(--apply) - else - args+=(--dry-run) - fi - ruby .github/scripts/sync-labels.rb "${args[@]}" - cat label-sync-report.md >> "${GITHUB_STEP_SUMMARY}" - - - name: Comment dry-run summary - if: ${{ github.event_name == 'pull_request' }} - run: gh pr comment "${{ github.event.pull_request.number }}" --repo evalops/.github --body-file label-sync-report.md - - - name: Upload label sync report - uses: actions/upload-artifact@v4 - with: - name: label-sync-report - path: | - label-sync-report.json - label-sync-report.md - if-no-files-found: error - retention-days: 30 diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index 8328dfb..0000000 --- a/AGENTS.md +++ /dev/null @@ -1,35 +0,0 @@ -# Repository Instructions - -This repository provides org-level defaults for EvalOps repositories: default issue templates, PR templates, reusable workflow assets, community health files, and service catalog metadata. - -## Codex App Operating Rails - -- Treat changes here as org-wide process changes. Keep them small, explicit, and easy for downstream repos to override. -- Before editing templates, check live issues and PRs in `evalops/.github` so updates do not duplicate an existing convention effort. -- Template changes should improve evidence quality without making every PR or issue feel heavy. Prefer optional fields for specialized work and required fields only when missing them would block triage. -- When adding org-default guidance for agents, include concrete verification prompts: live `gh` checks, affected repos, generated artifacts, CI status, and release or deploy impact. -- Do not put repo-specific secrets, private environment details, or one-off local workflow assumptions in org defaults. - -## EvalOps-on-EvalOps Agent Practice - -- Start from live evidence: exact GitHub run, job, PR, log line, production signal, or runtime failure. Do not begin from repo guesses when `gh` can show current truth. -- Convert every actionable signal into either a merged PR or an accounted blocker. Accounted blockers must name the failing system, exact command or API, status, failure reason, and next unblocker. -- Prefer small action-factory PRs: one failure mode, one bounded fix, focused tests, clear rollback. -- Every PR must include `Summary`, `Test Plan`, and `Rollback`. Production or CI-failure PRs must also include the source signal URL or concrete runtime evidence. -- Never trust stale GitHub event payloads when enforcing gates. Refresh current PR metadata, current run state, and review-thread state from GitHub before deciding. -- After merge, verify the live follow-up surface. PR merge is not deployment proof. Check the relevant `main`, scheduled, dispatch, GitOps, Argo, or runtime workflow. -- A successful post-merge result may be resolved or transformed. If the original opaque or tooling failure moves to a real domain failure, record the new blocker explicitly. -- For EvalOps action-factory work, record shipped actions in Cerebro with lineage: repo, PR, branch, source signal, lane, checks, verifier artifact, policy flags, rollback, terminal state, and yield impact. -- Create independent verifier artifacts for shipped lanes under `outputs/evalops-action-factory/` when useful, especially for production or runtime fixes. -- No direct `main` mutation, no direct Kubernetes patching, and no `:latest` image tags. Use PRs and GitOps. -- Do not enable CodeQL, GitHub Code Scanning, SARIF upload, `github/codeql-action`, `upload-sarif`, or `security-events: write`. EvalOps does not use that surface. -- If touching GitHub Actions workflows that call Go, ensure Go is set up before the first `go run`, `go test`, `go build`, or `go install`, and add or prefer tests that enforce ordering. -- Use bounded one-shot polling only. Do not use `gh run watch`, `gh pr checks --watch`, or unbounded shell loops. - -## Verification - -- Validate YAML syntax for files under `.github/ISSUE_TEMPLATE/`. -- Inspect rendered Markdown for `.github/pull_request_template.md`. -- Run `ruby .github/scripts/validate-services-catalog.rb services.yaml` when touching the service catalog or catalog validation. -- Run `ruby -Itest -e 'ARGV.each { |path| require "./#{path}" }' test/*_test.rb` when touching helper scripts or validation rails. -- For default-template changes, create a PR and let repository maintainers review whether the org-wide wording is too broad. diff --git a/README.md b/README.md index 4ddba35..e5a63db 100644 --- a/README.md +++ b/README.md @@ -1,419 +1,25 @@ # evalops/.github -Org-level defaults for EvalOps repositories live here. Changes in this repo can -alter issue intake, pull request review prompts, reusable workflow behavior, -dependency update policy, and the public organization profile. - -Treat this repository as a small control plane: conventions should be explicit, -validated, and easy for downstream repos to adopt without copying private -operational assumptions. +Public organization defaults for EvalOps. This repository hosts the +organization profile and a set of operating-convention documents. ## What Lives Here | Path | Purpose | |---|---| -| `.github/ISSUE_TEMPLATE/` | Default issue forms for EvalOps repos that do not override them. | -| `.github/agent-mcp/` | Canonical EvalOps MCP client config templates for public repo rollout. | -| `.github/codex/hooks/` | Example Codex hook pack for local EvalOps agent guardrails. | -| `.github/pull_request_template.md` | Default PR evidence checklist. | -| `.github/workflows/` | Reusable or self-validating workflows owned by the org defaults repo. | -| `.github/workflow-templates/` | Workflow picker templates for downstream adoption. | -| `.github/contracts/` | Versioned org-default contracts and conformance expectations. | -| `.github/scripts/` | Small helper scripts used by reusable workflows and validation rails. | -| `profile/` | Public organization profile and operating conventions. | -| `labels.yml` | Canonical additive label taxonomy for EvalOps repositories. | -| `renovate-config.json` | Shared Renovate preset for dependency update policy. | -| `services.yaml` | Lightweight service catalog for ownership, topology, and runtime tiering. | - -## Maintainer Workflow - -1. Start from fresh `origin/main`. This repo is small, but its effects are - broad, so avoid stacking process changes on stale branches. -2. Check open issues and recent PRs in `evalops/.github` before adding a new - convention. If the change is really a downstream rollout, open tracking - issues in the owning repos instead of hiding the work here. -3. Keep defaults portable. Do not include repo-specific secrets, one-off runner - assumptions, or private environment details. -4. Pair every new convention with a validation path. Prefer a reusable workflow, - test, or script over prose-only policy when the rule can be checked. -5. Publish via PR and let downstream owners object if the wording or guardrail is - too broad. - -## Reusable Workflows - -### EvalOpsBot Review Requests - -Use the EvalOpsBot webhook relay as the primary bridge from GitHub review -requests to the deep PR lens workflow. The relay receives -`pull_request.review_requested`, filters for `requested_reviewer.login == -EvalOpsBot`, and dispatches `.github/workflows/evalops-pr-lens-review.yml` for -that exact `repo#PR`. - -`.github/workflows/evalopsbot-review-request-dispatch.yml` remains as the -hourly fallback. It searches open EvalOps PRs with -`review-requested:EvalOpsBot`, skips head SHAs that already have an -`evalops-pr-lens/meta-review` signal, marks new matches as pending, and -dispatches the same workflow contract. - -### Codex Workflow Templates - -Use the workflow templates under `.github/workflow-templates/` to add Codex -lanes to downstream repositories: - -- `codex-pr-review.yml` reviews PR diffs and posts focused findings. -- `codex-structured-pr-review.yml` reviews PR diffs with a JSON schema and - posts actionable findings as inline review comments. -- `review-thread-guard.yml` fails PRs that still have unresolved, non-outdated - high-priority review threads. -- `codex-ci-triage.yml` triages a specific failed Actions run. -- `codex-post-merge-verify.yml` checks default-branch health after merges. -- `codex-label-churn-audit.yml` audits PR label mutation loops. -- `pysa.yml` runs Pyre/Pysa taint analysis for Python repos. - -Each template expects an `OPENAI_API_KEY` repository secret. Repositories that -need stronger, repo-specific behavior should copy the matching prompt from -`.github/codex/prompts/` into their own `.github/codex/prompts/` directory and -point the workflow at that file. - -For deeper adoption patterns beyond PR comments, see -`profile/CODEX_HIGH_LEVERAGE_WORKFLOWS.md`. - -### Agent Authorship Labels - -Use `.github/workflows/agent-authorship-label.yml` to apply one authorship label -to each PR from commit trailers: - -- `agent-authored` -- `agent-assisted` -- `mixed-authorship` - -Downstream repos can adopt it from the workflow template picker or with: - -```yaml -name: Agent authorship labels - -on: - pull_request_target: - types: [opened, synchronize, reopened, ready_for_review, edited] - -permissions: - contents: read - pull-requests: write - issues: write - -jobs: - label: - uses: evalops/.github/.github/workflows/agent-authorship-label.yml@main -``` - -For production repos, pin the reusable workflow to a reviewed commit SHA and -pass the same SHA as `helper_ref`. That keeps the workflow and helper scripts on -one immutable revision. - -### Review Thread Guard - -Use `.github/workflow-templates/review-thread-guard.yml` on repos where review -threads should be merge blockers: - -```yaml -name: Review thread guard - -on: - pull_request: - types: [opened, synchronize, reopened, ready_for_review] - -permissions: - contents: read - pull-requests: read - -jobs: - unresolved-review-threads: - uses: evalops/.github/.github/workflows/review-thread-guard.yml@main - with: - pr_number: ${{ github.event.pull_request.number }} -``` - -The guard blocks unresolved, non-outdated review threads at `high` severity or -above by default. Use `workflow_dispatch` with `min_severity=p1` for repos that -only want release-blocking findings to fail. - -### Codex Rails Check - -Use `.github/workflows/codex-rails-check.yml` to validate repository operating -rails: - -- issue template YAML -- workflow and workflow-template YAML -- workflow template metadata -- org control-plane contract shape and evidence chain -- engineering-practices contract shape and live-audit entrypoint -- canonical `labels.yml` shape -- `AGENTS.md` presence and non-empty content -- skill frontmatter -- `services.yaml` catalog shape -- Ruby tests under `test/` - -The workflow can be called by downstream repos: - -```yaml -jobs: - codex-rails: - uses: evalops/.github/.github/workflows/codex-rails-check.yml@main - with: - require_agents: true -``` - -### Pysa Static Analysis - -Use `.github/workflows/pysa.yml` to add Pyre/Pysa taint analysis to Python -repositories. Downstream repos can adopt it from the workflow template picker or -with: - -```yaml -name: Pysa static analysis - -on: - pull_request: - paths: - - "**/*.py" - - "pyproject.toml" - - "requirements*.txt" - - ".pyre_configuration*" - - ".pysa/**" - workflow_dispatch: - -permissions: - contents: read - -jobs: - pysa: - uses: evalops/.github/.github/workflows/pysa.yml@main - with: - source_directories: "." - taint_models_path: ".pysa" -``` - -Repos with custom dependency bootstrapping can pass `requirements_file` or -`setup_command`. Repos without committed Pyre configuration get a minimal -generated `.pyre_configuration` from `source_directories`. - -## Service Catalog - -`services.yaml` is intentionally lightweight. It should answer: - -- which repo owns a service or tool -- which team is accountable for it -- whether it is critical, standard, or experimental -- where it runs -- which other catalog entries it depends on -- whether it consumes shared protobuf contracts - -Validate it locally with: - -```bash -ruby .github/scripts/validate-services-catalog.rb services.yaml -``` - -Use `depends_on` only for entries that also appear in `services.yaml`. Use -external links or notes in the owning repo for third-party dependencies. - -## Local Verification - -Before opening a PR from this repo, run the narrow checks that match the change: - -```bash -ruby .github/scripts/verify-org-control-plane-contract.rb \ - --json-output org-control-plane-contract-report.json \ - --markdown-output org-control-plane-contract-report.md -ruby -Itest -e 'ARGV.each { |path| require "./#{path}" }' test/*_test.rb -ruby .github/scripts/validate-services-catalog.rb services.yaml -git diff --check -``` - -If workflows changed and `actionlint` is available, run it on touched workflow -files. Then check the PR's live GitHub Actions results before merging. - -### Org Control Plane Contract - -The contract in `.github/contracts/org-control-plane.yml` turns the repo's -agent-facing defaults into explicit conformance requirements. It names the -correctness model, threat model, SLO dimensions, provenance IDs, and adversarial -fixtures for prompt, tool, and data poisoning. The verifier emits JSON and -Markdown reports with source digests so downstream agents can cite the exact -inputs and decisions behind an org-default change. - -See `profile/ORG_CONTROL_PLANE_CONTRACT.md` for the design note. - -### Engineering Practices Audit - -`.github/contracts/engineering-practices.yml` turns the current EvalOps -engineering-practice standard into an auditable contract. It covers org -rulesets, generated backlog lifecycle, release-train state, agent review, -security SLOs, repo operating rails, and evidence-first completion. - -Validate only the contract shape locally: - -```bash -ruby .github/scripts/audit-engineering-practices.rb --contract-only -``` - -Run the live audit with `gh` authenticated to EvalOps: - -```bash -ruby .github/scripts/audit-engineering-practices.rb \ - --json-output engineering-practices-audit.json \ - --markdown-output engineering-practices-audit.md -``` - -`.github/workflows/engineering-practices-audit.yml` validates the contract on -PRs and runs the live, non-mutating audit on schedule or manual dispatch with -`EVALOPS_ORG_READ_TOKEN`. - -### Label Taxonomy Sync - -`labels.yml` is the canonical EvalOps label set, seeded from -`evalops/platform`. `.github/workflows/sync-labels.yml` dry-runs on PRs and -comments a per-repo diff. On `main`, weekly schedule, or manual dispatch with -`apply=true`, it reconciles active `evalops/*` repos additively: missing labels -are created, matching names get color/description updates, and repo-local labels -are left alone. A repo can opt out by committing `.github/labels-sync.disabled`. - -Validate the taxonomy without touching GitHub: - -```bash -ruby .github/scripts/sync-labels.rb --validate-only --labels labels.yml -``` - -### Agent MCP Config Rollout - -The templates in `.github/agent-mcp/templates/` define the committed client -config for public repos: - -- `.mcp.json` for Claude Code and other MCP clients that read the common JSON - shape. -- `.codex/config.toml` for Codex. -- `.cursor/mcp.json` for Cursor. -- an `AGENTS.md` section explaining the EvalOps integration. -- `.gitignore` entries for local API-key fallbacks. - -Check or write those files in any repo checkout with: - -```bash -ruby .github/scripts/sync-agent-mcp-config.rb --workspace /path/to/repo --check -ruby .github/scripts/sync-agent-mcp-config.rb --workspace /path/to/repo --write -``` - -`.github/workflows/agent-mcp-config-rollout.yml` validates the templates on PRs. -Manual dispatch with `apply=true` and `EVALOPS_MCP_ROLLOUT_TOKEN` (or -`EVALOPS_ORG_WRITE_TOKEN`) opens rollout PRs against either the requested repos -or all active public `evalops/*` repos. - -### Codex Hook Guardrails - -`.github/scripts/evalops-codex-hook-guard.rb` implements warning-first local -guardrails for EvalOps agent work: session-start process reminders, dirty -worktree warnings before destructive git commands, and merge/readiness nudges -when review-thread evidence is missing. The example hook config is -`.github/codex/hooks/evalops-hooks.toml`. - -See `profile/CODEX_HOOK_GUARDRAILS.md` for install notes and limitations. - -### Strategy And Tooling Profiles - -`profile/GOVERN_EXISTING_AI_FLEET.md` records the current EvalOps positioning -thesis and concrete retrofit surfaces. `profile/TYPESCRIPT_TOOLING_STANDARD.md` -captures the `gts`/`wireit` standardization path, including pilot criteria and -non-goals. - -### Archived Dependabot Audit - -`.github/workflows/archived-dependabot-audit.yml` runs a read-only audit for -archived EvalOps repos that still have `.github/dependabot.yml` or open -Dependabot PRs. The pre-archive checklist in `profile/ARCHIVAL_RUNBOOK.md` -requires removing Dependabot config and clearing bot PRs before setting -`archived=true`. - -Run the audit locally with: - -```bash -ruby .github/scripts/audit-archived-dependabot.rb \ - --owner evalops \ - --json-output archived-dependabot-audit.json \ - --markdown-output archived-dependabot-audit.md -``` - -### EvalOps PR Lens Review - -`.github/workflows/evalops-pr-lens-review.yml` sweeps open PRs in -`evalops/platform`, `evalops/deploy`, and `evalops/maestro-internal` every two -hours, can be run manually for specific `repo#number` targets, and accepts -machine dispatches for on-demand EvalOpsBot review requests. It fans out one -reviewer per lens: - -- migration safety -- NATS contract drift -- Argo manifest skew -- IAM blast radius -- generated SDK delta -- eval regression risk - -Each lens writes a stable commit status context and best-effort Check Run named -`evalops-pr-lens/`. The meta-review step ranks findings by confidence, -updates `evalops-pr-lens/meta-review`, writes an operator summary to the workflow -run, and only posts a PR comment when findings clear the configured -high-confidence threshold. - -Required secrets in `evalops/.github`: - -- `EVALOPS_PR_LENS_TOKEN`: GitHub token with read/write access to the target - repos for statuses and PR comments. This is the fallback path. -- `EVALOPS_PR_LENS_APP_ID`, `EVALOPS_PR_LENS_APP_PRIVATE_KEY`, and - `EVALOPS_PR_LENS_APP_INSTALLATION_ID`: preferred GitHub App auth path for - dispatch, comments, statuses, and Checks. -- `ANTHROPIC_API_KEY` or `EVALOPS_ANTHROPIC_API_KEY`: Anthropic key for Opus - lens reviewers. -- `OPENAI_API_KEY` or `EVALOPS_OPENAI_API_KEY`: optional fallback when manually - dispatching with `provider=openai`. - -#### EvalOpsBot Review Requests - -`EvalOpsBot` review requests should enter through a small webhook relay, not -per-repository workflow copies. The relay should listen for GitHub -`pull_request` webhook deliveries where: - -- `action` is `review_requested` -- `requested_reviewer.login` is `EvalOpsBot` -- `repository.full_name` is an EvalOps repository -- `pull_request.number` is present +| `profile/README.md` | The public organization profile shown on github.com/evalops. | +| `profile/*.md` | EvalOps operating-convention notes (engineering practices, archival runbook, Codex workflow notes, tooling standards). | +| `SECURITY.md` | How to report a vulnerability in an EvalOps repository. | -`.github/scripts/evalopsbot-webhook-relay.rb` is the checked-in relay core for -that endpoint. It verifies `X-Hub-Signature-256` when `GITHUB_WEBHOOK_SECRET` -is set, ignores non-matching deliveries, and dispatches this repository's -review workflow: +## Org Automation -```bash -gh api --method POST repos/evalops/.github/dispatches --input - <<'JSON' -{ - "event_type": "evalopsbot-review-requested", - "client_payload": { - "target_repo": "evalops/deploy", - "target_pr": "deploy#1234", - "requested_reviewer": "EvalOpsBot" - } -} -JSON -``` +The org automation engine (review pipeline, guardrail workflows, helper +scripts, contracts, and the service catalog) lives in the private repository +`evalops/.github-private`. It was moved out of this public repository to reduce +public exposure of internal operational detail. Maintainers with access work in +`evalops/.github-private`. -The workflow also accepts `target_prs`, `target_repos`, `provider`, `model`, -`max_diff_bytes`, and `min_confidence` in `client_payload` for controlled -operator overrides. Keep the relay token scoped to dispatching workflows in -`evalops/.github`; the review workflow itself owns the cross-repo read/write -token and model-provider credentials. Lens-specific routing defaults live in -`.github/pr-lens-routing.yml`. +## Security -`.github/workflows/evalopsbot-review-canary.yml` creates a harmless canary PR, -requests review from `EvalOpsBot`, waits for the deep-review meta signal, and -then closes the canary PR. `.github/workflows/evalopsbot-review-setup-audit.yml` -checks the configured target repository list, fallback workflows, and selected -review secret coverage so onboarding drift is visible before a real review -request is missed. +See [`SECURITY.md`](SECURITY.md) to report a vulnerability. Report to +security@evalops.dev; do not open a public issue. diff --git a/labels.yml b/labels.yml deleted file mode 100644 index c9203d3..0000000 --- a/labels.yml +++ /dev/null @@ -1,78 +0,0 @@ -schema_version: evalops.labels.v1 -source_repo: evalops/platform -sync: - additive: true - opt_out_file: .github/labels-sync.disabled -labels: - - name: bug - description: Something is not working - color: d73a4a - - name: documentation - description: Improvements or additions to documentation - color: "0075ca" - - name: duplicate - description: This issue or pull request already exists - color: cfd3d7 - - name: enhancement - description: New feature or request - color: a2eeef - - name: good first issue - description: Good for newcomers - color: "7057ff" - - name: help wanted - description: Extra attention is needed - color: "008672" - - name: invalid - description: This does not seem right - color: e4e669 - - name: question - description: Further information is requested - color: d876e3 - - name: wontfix - description: This will not be worked on - color: ffffff - - name: dependencies - description: Pull requests that update a dependency file - color: "0366d6" - - name: go - description: Pull requests that update Go code - color: 16e2e2 - - name: javascript - description: Pull requests that update JavaScript code - color: "168700" - - name: sync - description: Crossover sync between standalone repos and _import/ mirrors - color: 1d76db - - name: operational - description: Operational practices, runbooks, and reliability - color: 0e8a16 - - name: architecture-review - description: Cross-service architecture review requested - color: "5319e7" - - name: security - description: Security vulnerabilities and hardening - color: d73a4a - - name: upstream-absorbed - description: Issue absorbed from an archived upstream service repository - color: 6f42c1 - - name: foundation - description: Unblocks multiple downstream issues; start here - color: "0052cc" - - name: agent-authored - description: All PR commits carry explicit EvalOps agent authorship trailers - color: 6f42c1 - - name: human-authored - description: No PR commits carry EvalOps agent authorship trailers - color: 0e8a16 - - name: mixed-authorship - description: Some PR commits carry explicit EvalOps agent trailers and some are untrailered - color: fbca04 - - name: agent-assisted - description: PR commits are assumed LLM-assisted but do not carry explicit EvalOps agent trailers - color: 1d76db - - name: "autorelease: pending" - description: Release automation has a pending release PR or tag action - color: ededed - - name: "autorelease: tagged" - description: Release automation has tagged the pending release - color: ededed diff --git a/renovate-config.json b/renovate-config.json deleted file mode 100644 index 83dc965..0000000 --- a/renovate-config.json +++ /dev/null @@ -1,100 +0,0 @@ -{ - "$schema": "https://docs.renovatebot.com/renovate-schema.json", - "description": "Shared Renovate policy for EvalOps repositories.", - "extends": [ - "config:recommended", - ":configMigration", - ":dependencyDashboard", - ":semanticCommits", - "helpers:pinGitHubActionDigests", - "helpers:githubDigestChangelogs", - "helpers:goXPackagesChangelogLink", - "helpers:goXPackagesNameLink" - ], - "timezone": "UTC", - "schedule": ["* 0-5 * * 1"], - "labels": ["dependencies"], - "prConcurrentLimit": 5, - "prHourlyLimit": 2, - "minimumReleaseAge": "3 days", - "vulnerabilityAlerts": { - "labels": ["dependencies", "security"], - "minimumReleaseAge": null, - "prCreation": "immediate", - "schedule": [], - "vulnerabilityFixStrategy": "lowest" - }, - "packageRules": [ - { - "description": "Require explicit approval for major updates.", - "matchUpdateTypes": ["major"], - "dependencyDashboardApproval": true, - "addLabels": ["major"] - }, - { - "description": "Require explicit approval for ESLint ecosystem updates that can change config export shape.", - "matchManagers": ["npm"], - "matchPackageNames": [ - "@eslint/**", - "@typescript-eslint/**", - "eslint", - "eslint-*", - "eslint-plugin-*", - "typescript-eslint" - ], - "matchUpdateTypes": ["minor", "major"], - "dependencyDashboardApproval": true, - "addLabels": ["eslint", "config-sensitive"] - }, - { - "description": "Automerge patch and digest maintenance after CI passes.", - "matchUpdateTypes": ["patch", "digest", "pin", "pinDigest"], - "automerge": true, - "automergeType": "pr" - }, - { - "description": "Batch non-major third-party Go module updates.", - "matchManagers": ["gomod"], - "matchPackageNames": ["*", "!github.com/evalops/**"], - "matchUpdateTypes": ["minor", "patch", "digest", "pin", "pinDigest"], - "groupName": "third-party Go modules", - "groupSlug": "third-party-go", - "addLabels": ["go"] - }, - { - "description": "Keep internal EvalOps Go module updates separate from third-party Go dependencies.", - "matchManagers": ["gomod"], - "matchPackageNames": ["github.com/evalops/**"], - "groupName": "evalops internal Go modules", - "groupSlug": "evalops-go", - "addLabels": ["go", "internal"] - }, - { - "description": "Track service-runtime bumps in their own PR for easier rollout review.", - "matchManagers": ["gomod"], - "matchPackageNames": [ - "github.com/evalops/service-runtime", - "github.com/evalops/service-runtime/**" - ], - "groupName": "service-runtime", - "groupSlug": "service-runtime", - "addLabels": ["go", "internal", "service-runtime"] - }, - { - "description": "Batch Terraform provider, module, and lockfile maintenance.", - "matchManagers": ["terraform", "terraform-version", "terragrunt", "terragrunt-version"], - "matchUpdateTypes": ["minor", "patch", "digest", "pin", "pinDigest"], - "groupName": "Terraform dependencies", - "groupSlug": "terraform", - "addLabels": ["terraform"] - }, - { - "description": "Batch GitHub Actions updates while pinning action digests.", - "matchManagers": ["github-actions"], - "matchUpdateTypes": ["minor", "patch", "digest", "pin", "pinDigest"], - "groupName": "GitHub Actions", - "groupSlug": "github-actions", - "addLabels": ["github-actions"] - } - ] -} diff --git a/scripts/check-positioning.mjs b/scripts/check-positioning.mjs deleted file mode 100644 index f2555d5..0000000 --- a/scripts/check-positioning.mjs +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env node -// Positioning guardrail for the EvalOps org profile and docs. -// -// Keeps the org front door on the visibility + governance + coverage frame and -// blocks regressions to the eval-era / execution-first framing the org moved -// away from (see evalops/hopper #192-197). Self-contained: no ripgrep or npm -// dependencies, runs on plain `node`. - -import { readFileSync, existsSync } from "node:fs"; - -const FILES = ["profile/README.md", "README.md", "AGENTS.md", "SECURITY.md"]; - -const banned = [ - // eval-era org tagline - "organi[sz]ational operating system", - "operating system for (ai )?agent", - "shipping accountable ai", - "evaluation, governance,? and observability", - // execution-first frame (mirrors the hopper positioning-frame guardrail) - "put (ai )?agents to work", - "operating layer", - "agents that actually work", - "from signal to done", - "governed agent work", - "proves? agents are trustworthy", -]; - -const re = new RegExp(banned.join("|"), "i"); - -let failed = false; -for (const file of FILES) { - if (!existsSync(file)) continue; - readFileSync(file, "utf8") - .split("\n") - .forEach((line, i) => { - if (re.test(line)) { - failed = true; - console.error(`positioning guardrail: ${file}:${i + 1}: ${line.trim()}`); - } - }); -} - -if (failed) { - console.error( - "\nOff-frame positioning copy found. Hold the visibility + governance + coverage frame.", - ); - process.exitCode = 1; -} else { - console.log("positioning guardrail passed"); -} diff --git a/services.yaml b/services.yaml deleted file mode 100644 index 081b706..0000000 --- a/services.yaml +++ /dev/null @@ -1,645 +0,0 @@ -# EvalOps Service Catalog -# Source of truth for service ownership, runtime tier, and topology. -# Updated manually or via automation when new services are created. - -services: - - # ────────────────────────────────────────────── - # Critical tier — core platform, must be up - # ────────────────────────────────────────────── - - identity: - description: "EvalOps identity and access plane" - team: platform-team - language: go - tier: critical - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/identity - - governance: - description: "Safety policies, PII, and data retention" - team: platform-team - language: go - tier: critical - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/governance - - audit: - description: "Tamper-evident event ingestion" - team: platform-team - language: go - tier: critical - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/audit - - approvals: - description: "Cross-surface action governance" - team: platform-team - language: go - tier: critical - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/approvals - - gate: - description: "Multi-protocol privileged access proxy" - team: platform-team - language: go - tier: critical - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/gate - - llm-gateway: - description: "Managed LLM gateway" - team: platform-team - language: go - tier: critical - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/llm-gateway - - keys: - description: "Provider credential vault" - team: platform-team - language: go - tier: critical - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/keys - - # ────────────────────────────────────────────── - # Standard tier — important but not critical path - # ────────────────────────────────────────────── - - meter: - description: "Usage metering and cost attribution" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/meter - - memory: - description: "Shared context and memory service" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/memory - - registry: - description: "Agent registry and capability mesh" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/registry - - traces: - description: "Agent execution tracing" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/traces - - objectives: - description: "Durable work tracking" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/objectives - - prompts: - description: "Prompt registry" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/prompts - - skills: - description: "Skill registry" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/skills - - connectors: - description: "Integration lifecycle and OAuth" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/connectors - - notifications: - description: "Multi-channel delivery routing" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/notifications - - entities: - description: "Cross-system identity correlation" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/entities - - compliance: - description: "Compliance evidence generator" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/compliance - - attribution: - description: "Revenue attribution" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/attribution - - pipeline: - description: "Internal CRM and revenue pipeline" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/pipeline - - parker: - description: "Internal HRIS" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/parker - - asb: - description: "Agents-first secret broker" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/asb - - agent-mcp: - description: "Unified MCP server for external agents" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/agent-mcp - - siphon: - description: "Webhook ingestion into NATS and ClickHouse" - team: platform-team - language: go - tier: standard - runtime: gke - depends_on: [proto, service-runtime] - proto_consumer: true - repo: evalops/siphon - - # ────────────────────────────────────────────── - # Products — frontend-team - # ────────────────────────────────────────────── - - console: - description: "Fleet dashboard" - team: frontend-team - language: typescript - tier: critical - runtime: gke - repo: evalops/console - - admin: - description: "Internal ops control plane" - team: frontend-team - language: typescript - tier: standard - runtime: gke - repo: evalops/admin - - maestro: - description: "Multi-model coding agent (public)" - team: frontend-team - language: typescript - tier: critical - runtime: gke - repo: evalops/maestro - - maestro-internal: - description: "Agentic coding assistant (internal)" - team: frontend-team - language: typescript - tier: critical - runtime: gke - repo: evalops/maestro-internal - - chat: - description: "AI chat platform" - team: frontend-team - language: typescript - tier: standard - runtime: gke - repo: evalops/chat - - ensemble: - description: "Open-source AI operating system" - team: frontend-team - language: typescript - tier: standard - runtime: gke - repo: evalops/ensemble - - # ────────────────────────────────────────────── - # Shared infrastructure — libraries and contracts - # ────────────────────────────────────────────── - - proto: - description: "Canonical protobuf definitions" - team: api-team - language: typescript - tier: critical - runtime: none - repo: evalops/proto - - service-runtime: - description: "Shared Go runtime helpers" - team: platform-team - language: go - tier: critical - runtime: none - repo: evalops/service-runtime - - deploy: - description: "GitOps delivery (K8s, Terraform, ArgoCD)" - team: sre-team - language: hcl - tier: critical - runtime: none - repo: evalops/deploy - - # ────────────────────────────────────────────── - # Data - # ────────────────────────────────────────────── - - dagster-user-code: - description: "Data pipelines" - team: data-team - language: typescript - tier: standard - runtime: gke - repo: evalops/dagster-user-code - - fermata: - description: "LLM evaluation platform" - team: qa-team - language: python - tier: standard - runtime: gke - repo: evalops/fermata - - cerebro: - description: "Entity intelligence engine" - team: security-team - language: go - tier: standard - runtime: gke - repo: evalops/cerebro - - # ────────────────────────────────────────────── - # Experimental and tools - # ────────────────────────────────────────────── - - hopper: - description: "Marketing site" - team: frontend-team - language: typescript - tier: experimental - runtime: standalone - repo: evalops/hopper - - lark: - description: "macOS desktop client for Claude" - team: frontend-team - language: typescript - tier: experimental - runtime: none - repo: evalops/lark - - mocktopus: - description: "Mock LLM APIs for testing" - team: qa-team - language: python - tier: experimental - runtime: none - repo: evalops/mocktopus - - mcp-firewall: - description: "MCP JSON-RPC firewall" - team: security-team - language: go - tier: experimental - runtime: none - repo: evalops/mcp-firewall - - mcp-openapi: - description: "OpenAPI to MCP bridge" - team: platform-team - language: typescript - tier: experimental - runtime: none - repo: evalops/mcp-openapi - - agent-harness: - description: "Unified agent SDK harness" - team: platform-team - language: python - tier: experimental - runtime: none - repo: evalops/agent-harness - - nimbus: - description: "Self-hosted CI for evals" - team: sre-team - language: python - tier: experimental - runtime: standalone - repo: evalops/nimbus - - explorer: - description: "Protobuf schema explorer" - team: api-team - language: typescript - tier: experimental - runtime: none - repo: evalops/explorer - - docs: - description: "Prospect and internal docs" - team: docs-team - language: tex - tier: experimental - runtime: none - repo: evalops/docs - - diagrams: - description: "Architecture diagrams" - team: docs-team - language: none - tier: experimental - runtime: none - repo: evalops/diagrams - - deliberate-reasoning-engine: - description: "Structured reasoning MCP" - team: platform-team - language: typescript - tier: experimental - runtime: none - repo: evalops/deliberate-reasoning-engine - - shared-memory-mcp: - description: "Shared memory MCP server" - team: platform-team - language: typescript - tier: experimental - runtime: none - repo: evalops/shared-memory-mcp - - keep: - description: "PoC zero-trust access stack" - team: security-team - language: go - tier: experimental - runtime: none - repo: evalops/keep - - fabric: - description: "Agent Slack reimagining" - team: frontend-team - language: typescript - tier: experimental - runtime: none - repo: evalops/fabric - - cadence: - description: "AI-native workspace" - team: frontend-team - language: typescript - tier: experimental - runtime: none - repo: evalops/cadence - - conductor: - description: "AI web companion" - team: frontend-team - language: typescript - tier: experimental - runtime: none - repo: evalops/conductor - - diffscope: - description: "Code review engine" - team: platform-team - language: rust - tier: experimental - runtime: none - repo: evalops/diffscope - - diffscope-web-poc: - description: "Diffscope web frontend" - team: frontend-team - language: javascript - tier: experimental - runtime: none - repo: evalops/diffscope-web-poc - - cerebro-frontend: - description: "Cerebro dashboard" - team: frontend-team - language: typescript - tier: experimental - runtime: none - repo: evalops/cerebro-frontend - - garak-skill: - description: "LLM vulnerability scanner skill" - team: security-team - language: none - tier: experimental - runtime: none - repo: evalops/garak-skill - - gemini-exfil-detector: - description: "Insider threat detection" - team: security-team - language: python - tier: experimental - runtime: none - repo: evalops/gemini-exfil-detector - - dspy-micro-agent: - description: "Minimal DSPy agent" - team: platform-team - language: python - tier: experimental - runtime: none - repo: evalops/dspy-micro-agent - - orbit-agent: - description: "Startup advisor agent" - team: platform-team - language: python - tier: experimental - runtime: none - repo: evalops/orbit-agent - - dspy-0to1-guide: - description: "DSPy guide" - team: platform-team - language: python - tier: experimental - runtime: none - repo: evalops/dspy-0to1-guide - - cognitive-dissonance-dspy: - description: "Multi-agent dissonance detection" - team: platform-team - language: python - tier: experimental - runtime: none - repo: evalops/cognitive-dissonance-dspy - - founder-email-optimizer: - description: "Email optimization" - team: platform-team - language: python - tier: experimental - runtime: none - repo: evalops/founder-email-optimizer - - dspy-advanced-prompting: - description: "Advanced prompting techniques" - team: platform-team - language: python - tier: experimental - runtime: none - repo: evalops/dspy-advanced-prompting - - folie-a-deux-dspy: - description: "LLM agreement training" - team: platform-team - language: python - tier: experimental - runtime: none - repo: evalops/folie-a-deux-dspy - - eval2otel: - description: "Eval results to OTel converter" - team: platform-team - language: typescript - tier: experimental - runtime: none - repo: evalops/eval2otel - - gam-workspace-config: - description: "Google Workspace config" - team: sre-team - language: shell - tier: experimental - runtime: none - repo: evalops/gam-workspace-config - - bandit-dspy: - description: "Security-aware LLM dev" - team: security-team - language: python - tier: experimental - runtime: none - repo: evalops/bandit-dspy - - override-cascade-dspy: - description: "Safety override cascade detection" - team: security-team - language: python - tier: experimental - runtime: none - repo: evalops/override-cascade-dspy - - agent-pm: - description: "AI product management orchestrator" - team: platform-team - language: python - tier: experimental - runtime: none - repo: evalops/agent-pm - - openclaw-safety-harness: - description: "Safety harness plugin" - team: security-team - language: typescript - tier: experimental - runtime: none - repo: evalops/openclaw-safety-harness - - deep-code-reasoning-mcp: - description: "Code analysis MCP server" - team: platform-team - language: typescript - tier: experimental - runtime: none - repo: evalops/deep-code-reasoning-mcp diff --git a/test/audit_archived_dependabot_test.rb b/test/audit_archived_dependabot_test.rb deleted file mode 100644 index 52984d5..0000000 --- a/test/audit_archived_dependabot_test.rb +++ /dev/null @@ -1,39 +0,0 @@ -# frozen_string_literal: true - -require "minitest/autorun" -require_relative "../.github/scripts/audit-archived-dependabot" - -class AuditArchivedDependabotTest < Minitest::Test - def test_parse_repos_normalizes_owner - assert_equal( - ["evalops/agent-mcp", "evalops/platform"], - EvalOpsArchivedDependabotAudit.parse_repos("agent-mcp,evalops/platform") - ) - end - - def test_markdown_report_lists_configs_and_prs - report = { - "generated_at" => "2026-05-15T12:00:00Z", - "owner" => "evalops", - "repo_count" => 1, - "repos_with_dependabot_config" => 1, - "open_dependabot_pr_count" => 2, - "repos" => [ - { - "repo" => "evalops/agent-mcp", - "dependabot_config_present" => true, - "open_dependabot_prs" => [ - { "number" => 46, "title" => "bump setup-go", "url" => "https://github.com/evalops/agent-mcp/pull/46" }, - { "number" => 47, "title" => "bump internal", "url" => "https://github.com/evalops/agent-mcp/pull/47" } - ] - } - ] - } - - markdown = EvalOpsArchivedDependabotAudit.markdown_report(report) - - assert_includes markdown, "Archived Dependabot Audit" - assert_includes markdown, "`evalops/agent-mcp`" - assert_includes markdown, "#46, #47" - end -end diff --git a/test/audit_engineering_practices_test.rb b/test/audit_engineering_practices_test.rb deleted file mode 100644 index 3f902af..0000000 --- a/test/audit_engineering_practices_test.rb +++ /dev/null @@ -1,385 +0,0 @@ -# frozen_string_literal: true - -require "json" -require "minitest/autorun" -require "time" -require_relative "../.github/scripts/audit-engineering-practices" - -class AuditEngineeringPracticesTest < Minitest::Test - def test_static_contract_passes_and_emits_source_evidence - contract = EvalOpsEngineeringPracticesAudit.load_contract(".github/contracts/engineering-practices.yml") - validation = EvalOpsEngineeringPracticesAudit.validate_contract(contract, root: Dir.pwd) - - assert_equal "pass", validation.fetch("status"), validation.fetch("errors").join("\n") - evidence = EvalOpsEngineeringPracticesAudit.evidence(contract, Dir.pwd) - assert evidence.all? { |item| item.fetch("sha256").match?(/\A[0-9a-f]{64}\z/) } - end - - def test_missing_required_practice_fails_static_validation - contract = EvalOpsEngineeringPracticesAudit.load_contract(".github/contracts/engineering-practices.yml") - contract["practices"].reject! { |practice| practice["id"] == "security-slo" } - - validation = EvalOpsEngineeringPracticesAudit.validate_contract(contract, root: Dir.pwd) - - assert_equal "fail", validation.fetch("status") - assert validation.fetch("errors").any? { |error| error.include?("missing required practices: security-slo") } - end - - def test_live_audit_reports_ruleset_rail_backlog_security_and_release_findings - contract = EvalOpsEngineeringPracticesAudit.load_contract(".github/contracts/engineering-practices.yml") - runner = FakeGhRunner.new - - report = EvalOpsEngineeringPracticesAudit.live_audit( - contract, - runner: runner, - root: Dir.pwd, - generated_at: Time.utc(2026, 5, 20, 4, 0, 0) - ) - - assert_equal "attention", report.fetch("status") - findings = report.fetch("findings") - assert findings.any? { |finding| finding.fetch("practice") == "org-rulesets" } - assert findings.any? { |finding| finding.fetch("practice") == "operating-rails" && finding.fetch("repo") == "evalops/platform" } - assert findings.any? { |finding| finding.fetch("practice") == "backlog-lifecycle" } - assert findings.any? { |finding| finding.fetch("practice") == "security-slo" } - assert findings.any? { |finding| finding.fetch("practice") == "release-train-state" } - refute findings.any? { |finding| finding.fetch("message").include?("CodeQL") } - - markdown = EvalOpsEngineeringPracticesAudit.markdown_report(report) - assert_includes markdown, "Engineering Practices Audit" - assert_includes markdown, "Missing Repo Rails" - assert_includes markdown, "No-CodeQL config" - JSON.parse(JSON.pretty_generate(report)) - end - - def test_required_status_ruleset_satisfies_critical_repo_policy - contract = EvalOpsEngineeringPracticesAudit.load_contract(".github/contracts/engineering-practices.yml") - contract["live_audit"]["sampled_repos"] = ["evalops/platform"] - contract["repo_tiers"]["critical"]["repos"] = ["evalops/platform"] - runner = RulesetPolicyGhRunner.new - - report = EvalOpsEngineeringPracticesAudit.live_audit( - contract, - runner: runner, - root: Dir.pwd, - generated_at: Time.utc(2026, 5, 20, 4, 0, 0) - ) - - policy = report.dig("live", "branch_protection").fetch(0) - assert_equal ["ci"], policy.fetch("ruleset_required_status_checks") - refute report.fetch("findings").any? { |finding| finding.fetch("practice") == "org-rulesets" } - end - - def test_codeql_drift_is_reported_without_fetching_code_scanning_alerts - contract = EvalOpsEngineeringPracticesAudit.load_contract(".github/contracts/engineering-practices.yml") - contract["live_audit"]["sampled_repos"] = ["evalops/platform"] - contract["repo_tiers"]["critical"]["repos"] = ["evalops/platform"] - runner = CodeqlDriftGhRunner.new - - report = EvalOpsEngineeringPracticesAudit.live_audit( - contract, - runner: runner, - root: Dir.pwd, - generated_at: Time.utc(2026, 5, 21, 18, 0, 0) - ) - - assert_equal "attention", report.fetch("status") - findings = report.fetch("findings") - assert findings.any? { |finding| finding.fetch("message").include?("CodeQL/default code-scanning baseline drifted") } - assert findings.any? { |finding| finding.fetch("message").include?("CodeQL or GitHub Code Scanning workflow references") } - assert findings.any? { |finding| finding.fetch("message").include?("CodeQL appears in branch protection") } - assert_equal 1, report.dig("live", "no_codeql", "required_check_matches").length - end - - class FakeGhRunner - def initialize - @files = { - "evalops/platform" => { - "AGENTS.md" => true, - ".github/workflows/review-thread-guard.yml" => true - }, - "evalops/deploy" => { - "AGENTS.md" => true, - ".github/CODEOWNERS" => true, - ".github/workflows/review-thread-guard.yml" => true, - ".github/workflows/evalopsbot-review-request.yml" => true, - ".github/workflows/codex-rails-check.yml" => true - } - } - end - - def call(args) - command = args.join(" ") - return json([]) if command == "api -X GET /orgs/evalops/rulesets" - return json(branch_protection(args)) if command.include?("/branches/main/protection") - return content_response(args) if command.include?("/contents/") - return search_response(args) if command.start_with?("api -X GET /search/issues") - return issue_list_response if command.start_with?("issue list") - return ["Closing because the sentinel no longer ranks this class.\n", "", true] if command.start_with?("issue view 69") - return [JSON.generate(dependabot_alert) + "\n", "", true] if command.include?("/dependabot/alerts") - return ["{}\n{}\n", "", true] if command.include?("/secret-scanning/alerts") - return code_security_defaults if command == "api -X GET /orgs/evalops/code-security/configurations/defaults" - return code_security_repositories if command.include?("/code-security/configurations/245233/repositories") - return json([]) if command.start_with?("search code ") - raise "audit must not fetch code scanning alerts" if command.include?("/code-scanning/alerts") - - json({}) - end - - private - - def json(value) - [JSON.generate(value), "", true] - end - - def branch_protection(args) - repo = args.find { |arg| arg.start_with?("/repos/") }.split("/")[2, 2].join("/") - return {} if repo == "evalops/platform" - - { - "required_status_checks" => { - "contexts" => ["ci"] - }, - "required_pull_request_reviews" => {}, - "enforce_admins" => { - "enabled" => true - } - } - end - - def content_response(args) - path = args.find { |arg| arg.start_with?("/repos/") } - parts = path.split("/") - repo = parts[2, 2].join("/") - file = parts[5, parts.length].join("/") - present = @files.fetch(repo, {}).fetch(file, false) - present ? json({ "path" => file }) : ["", "not found", false] - end - - def search_response(args) - query_arg = args.find { |arg| arg.start_with?("q=") }.to_s - count = if query_arg.include?("Hold prod-continuous") - 4 - elsif query_arg.include?("Guardrail candidate") - 2 - else - 0 - end - json({ "total_count" => count, "incomplete_results" => false }) - end - - def issue_list_response - json( - [ - { - "number" => 69, - "title" => "[codex] Guardrail backlog: Workflow shell footgun (workflow-shell-footgun)", - "updatedAt" => "2026-05-20T01:22:06Z" - } - ] - ) - end - - def dependabot_alert - { - "repository" => { - "full_name" => "evalops/platform" - }, - "security_vulnerability" => { - "severity" => "high" - } - } - end - - def code_security_defaults - json( - [ - { - "default_for_new_repos" => "all", - "configuration" => { - "id" => 245_233, - "name" => "EvalOps security baseline recommended", - "advanced_security" => "secret_protection", - "code_scanning_default_setup" => "disabled", - "dependency_graph_autosubmit_action" => "disabled" - } - } - ] - ) - end - - def code_security_repositories - repos = [ - "evalops/platform", - "evalops/deploy", - "evalops/ensemble", - "evalops/maestro-internal", - "evalops/maestro", - "evalops/cerebro", - "evalops/chat", - "evalops/.github", - "evalops/hopper", - "evalops/nimbus", - "evalops/kestrel" - ] - json(repos.map { |repo| { "repository" => { "full_name" => repo }, "status" => "enforced" } }) - end - end - - class RulesetPolicyGhRunner - def call(args) - command = args.join(" ") - return json([ruleset_summary]) if command == "api -X GET /orgs/evalops/rulesets" - return json(ruleset_detail) if command == "api -X GET /orgs/evalops/rulesets/1" - return json({}) if command.include?("/branches/main/protection") - return json({ "path" => "ok" }) if command.include?("/contents/") - return json({ "total_count" => 0, "incomplete_results" => false }) if command.start_with?("api -X GET /search/issues") - return code_security_defaults if command == "api -X GET /orgs/evalops/code-security/configurations/defaults" - return code_security_repositories if command.include?("/code-security/configurations/245233/repositories") - return json([]) if command.start_with?("search code ") - return json([]) if command.start_with?("issue list") - return ["", "", true] if command.start_with?("issue view") - return ["", "", true] if command.include?("/dependabot/alerts") - return ["", "", true] if command.include?("/secret-scanning/alerts") - - json({}) - end - - private - - def json(value) - [JSON.generate(value), "", true] - end - - def ruleset_summary - { - "id" => 1, - "name" => "EvalOps platform required checks (evaluate)", - "target" => "branch", - "enforcement" => "evaluate" - } - end - - def ruleset_detail - ruleset_summary.merge( - "conditions" => { - "repository_name" => { - "include" => ["platform"], - "exclude" => [] - }, - "ref_name" => { - "include" => ["~DEFAULT_BRANCH"], - "exclude" => [] - } - }, - "rules" => [ - { - "type" => "required_status_checks", - "parameters" => { - "required_status_checks" => [ - { - "context" => "ci" - } - ] - } - } - ] - ) - end - - def code_security_defaults - json( - [ - { - "default_for_new_repos" => "all", - "configuration" => { - "id" => 245_233, - "name" => "EvalOps security baseline recommended", - "advanced_security" => "secret_protection", - "code_scanning_default_setup" => "disabled", - "dependency_graph_autosubmit_action" => "disabled" - } - } - ] - ) - end - - def code_security_repositories - json([{ "repository" => { "full_name" => "evalops/platform" }, "status" => "enforced" }]) - end - end - - class CodeqlDriftGhRunner - def call(args) - command = args.join(" ") - return json([]) if command == "api -X GET /orgs/evalops/rulesets" - return branch_protection if command.include?("/branches/main/protection") - return json({ "path" => "ok" }) if command.include?("/contents/") - return json({ "total_count" => 0, "incomplete_results" => false }) if command.start_with?("api -X GET /search/issues") - return json([]) if command.start_with?("issue list") - return ["", "", true] if command.start_with?("issue view") - return ["", "", true] if command.include?("/dependabot/alerts") - return ["", "", true] if command.include?("/secret-scanning/alerts") - return code_security_defaults if command == "api -X GET /orgs/evalops/code-security/configurations/defaults" - return code_security_repositories if command.include?("/code-security/configurations/245233/repositories") - return code_search_match if command.start_with?("search code ") - raise "audit must not fetch code scanning alerts" if command.include?("/code-scanning/alerts") - - json({}) - end - - private - - def json(value) - [JSON.generate(value), "", true] - end - - def branch_protection - json( - { - "required_status_checks" => { - "contexts" => ["CodeQL"] - }, - "required_pull_request_reviews" => {}, - "enforce_admins" => { - "enabled" => true - } - } - ) - end - - def code_security_defaults - json( - [ - { - "default_for_new_repos" => "all", - "configuration" => { - "id" => 245_233, - "name" => "EvalOps security baseline recommended", - "advanced_security" => "enabled", - "code_scanning_default_setup" => "enabled", - "dependency_graph_autosubmit_action" => "disabled" - } - } - ] - ) - end - - def code_security_repositories - json([{ "repository" => { "full_name" => "evalops/platform" }, "status" => "enforced" }]) - end - - def code_search_match - json( - [ - { - "repository" => { - "nameWithOwner" => "evalops/platform" - }, - "path" => ".github/workflows/codeql.yml" - } - ] - ) - end - end -end diff --git a/test/check_pr_review_threads_test.rb b/test/check_pr_review_threads_test.rb deleted file mode 100644 index c927e82..0000000 --- a/test/check_pr_review_threads_test.rb +++ /dev/null @@ -1,341 +0,0 @@ -# frozen_string_literal: true - -require "minitest/autorun" -require_relative "../.github/scripts/check-pr-review-threads" - -class CheckPrReviewThreadsTest < Minitest::Test - def test_detects_priority_and_high_severity_markers - assert_equal "p1", EvalOpsReviewThreadGuard.severity("**P1 Badge** Stop the rollout") - assert_equal "high", EvalOpsReviewThreadGuard.severity("### Bug\n\n**High Severity**") - assert_equal "low", EvalOpsReviewThreadGuard.severity("**Low Severity**") - assert_equal "none", EvalOpsReviewThreadGuard.severity("nit: wording") - end - - def test_filters_unresolved_non_outdated_threads_at_threshold - payload = { - "data" => { - "repository" => { - "pullRequest" => { - "reviewThreads" => { - "nodes" => [ - thread("T1", resolved: false, outdated: false, body: "**P1 Badge** broken"), - thread("T2", resolved: false, outdated: true, body: "**High Severity** stale"), - thread("T3", resolved: true, outdated: false, body: "**High Severity** fixed"), - thread("T4", resolved: false, outdated: false, body: "**Low Severity** polish") - ] - } - } - } - } - } - - threads = EvalOpsReviewThreadGuard.unresolved_threads(payload, min_severity: "high") - - assert_equal ["T1"], threads.map { |thread| thread.fetch(:id) } - end - - def test_detects_severity_markers_on_later_thread_comments - payload = { - "data" => { - "repository" => { - "pullRequest" => { - "reviewThreads" => { - "nodes" => [ - thread( - "T1", - resolved: false, - outdated: false, - body: "initial note", - comments: [ - comment("initial note", url: "https://github.com/evalops/example/pull/1#discussion-1"), - comment("**High Severity** follow-up", url: "https://github.com/evalops/example/pull/1#discussion-2") - ] - ) - ] - } - } - } - } - } - - threads = EvalOpsReviewThreadGuard.unresolved_threads(payload, min_severity: "high") - - assert_equal ["T1"], threads.map { |thread| thread.fetch(:id) } - assert_equal "high", threads.first.fetch(:severity) - assert_equal "https://github.com/evalops/example/pull/1#discussion-2", threads.first.fetch(:url) - end - - def test_detects_top_level_pr_comment_severity_markers - payload = payload_with( - comments: [ - { - "author" => { "login" => "reviewer" }, - "body" => "**High Severity** release mirror can bypass review debt", - "url" => "https://github.com/evalops/example/pull/1#issuecomment-1" - } - ] - ) - - feedback = EvalOpsReviewThreadGuard.blocking_feedback(payload, min_severity: "high") - - assert_equal ["pr_comment"], feedback.map { |item| item.fetch(:kind) } - assert_equal "high", feedback.first.fetch(:severity) - end - - def test_detects_top_level_review_body_severity_markers - payload = payload_with( - head_ref_oid: "head-sha", - reviews: [ - { - "author" => { "login" => "reviewer" }, - "commit" => { "oid" => "head-sha" }, - "state" => "COMMENTED", - "body" => "**P1 Badge** paired public PR feedback is missing", - "url" => "https://github.com/evalops/example/pull/1#pullrequestreview-1" - } - ] - ) - - feedback = EvalOpsReviewThreadGuard.blocking_feedback(payload, min_severity: "high") - - assert_equal ["pr_review"], feedback.map { |item| item.fetch(:kind) } - assert_equal "p1", feedback.first.fetch(:severity) - end - - def test_skips_top_level_review_feedback_from_superseded_heads - payload = payload_with( - head_ref_oid: "new-head", - reviews: [ - { - "author" => { "login" => "chatgpt-codex-connector[bot]" }, - "commit" => { "oid" => "old-head" }, - "state" => "COMMENTED", - "body" => "**P1 Badge** stale feedback already fixed on the latest head", - "url" => "https://github.com/evalops/example/pull/1#pullrequestreview-1" - }, - { - "author" => { "login" => "reviewer" }, - "commit" => { "oid" => "new-head" }, - "state" => "COMMENTED", - "body" => "**High Severity** latest-head feedback still blocks", - "url" => "https://github.com/evalops/example/pull/1#pullrequestreview-2" - } - ] - ) - - feedback = EvalOpsReviewThreadGuard.blocking_feedback(payload, min_severity: "high") - - assert_equal ["https://github.com/evalops/example/pull/1#pullrequestreview-2"], feedback.map { |item| item.fetch(:url) } - end - - def test_skips_informational_bot_pr_summaries - payload = payload_with( - comments: [ - { - "author" => { "login" => "cursor" }, - "body" => "## PR Summary\n\n| Severity | Count |\n| --- | --- |\n| P0 | 0 |", - "url" => "https://github.com/evalops/example/pull/1#issuecomment-1" - }, - { - "author" => { "login" => "reviewer" }, - "body" => "**High Severity** release mirror can bypass review debt", - "url" => "https://github.com/evalops/example/pull/1#issuecomment-2" - } - ], - reviews: [ - { - "author" => { "login" => "cursor" }, - "state" => "COMMENTED", - "body" => "\n## Walkthrough\n\nMentions P1 as a summary bucket.", - "url" => "https://github.com/evalops/example/pull/1#pullrequestreview-1" - } - ] - ) - - feedback = EvalOpsReviewThreadGuard.blocking_feedback(payload, min_severity: "high") - - assert_equal ["pr_comment"], feedback.map { |item| item.fetch(:kind) } - assert_equal "https://github.com/evalops/example/pull/1#issuecomment-2", feedback.first.fetch(:url) - end - - def test_first_nonblank_line_normalizes_leading_blank_review_bodies - assert_equal "**P1 Badge** real feedback", EvalOpsReviewThreadGuard.first_nonblank_line("\n\n**P1 Badge** real feedback\n\nDetails") - end - - def test_can_include_outdated_threads_when_requested - payload = { - "data" => { - "repository" => { - "pullRequest" => { - "reviewThreads" => { - "nodes" => [ - thread("T1", resolved: false, outdated: true, body: "**High Severity** stale") - ] - } - } - } - } - } - - threads = EvalOpsReviewThreadGuard.unresolved_threads( - payload, - min_severity: "high", - include_outdated: true - ) - - assert_equal ["T1"], threads.map { |thread| thread.fetch(:id) } - end - - def test_merge_review_thread_nodes_is_nil_safe_for_partial_graphql_payloads - partial_payloads = [ - nil, - {}, - { "data" => nil }, - { "data" => {} }, - { "data" => { "repository" => nil } }, - { "data" => { "repository" => {} } }, - { "data" => { "repository" => { "pullRequest" => nil } } }, - { "data" => { "repository" => { "pullRequest" => {} } } }, - { "data" => { "repository" => { "pullRequest" => { "reviewThreads" => nil } } } } - ] - - partial_payloads.each do |payload| - merged = EvalOpsReviewThreadGuard.merge_review_thread_nodes(payload, [thread("T1", resolved: false, outdated: false, body: "**High Severity** broken")]) - - assert_equal ["T1"], merged.dig("data", "repository", "pullRequest", "reviewThreads", "nodes").map { |node| node.fetch("id") } - end - end - - def test_merge_review_thread_nodes_survives_random_sparse_payload_shapes - random = Random.new(12_345) - 250.times do - payload = random_payload(random, depth: 0) - nodes = random.rand(3).times.map do |index| - thread("T#{index}", resolved: false, outdated: false, body: "**High Severity** broken") - end - - merged = EvalOpsReviewThreadGuard.merge_review_thread_nodes(payload, nodes) - - assert_equal nodes, merged.dig("data", "repository", "pullRequest", "reviewThreads", "nodes") - assert_kind_of Hash, merged.dig("data", "repository", "pullRequest", "reviewThreads") - end - end - - def test_merge_pull_request_connections_sets_all_feedback_channels - merged = EvalOpsReviewThreadGuard.merge_pull_request_connections( - {}, - comments: [{ "body" => "**High Severity** top-level" }], - reviews: [{ "body" => "**P1 Badge** review" }], - review_threads: [thread("T1", resolved: false, outdated: false, body: "**High Severity** thread")] - ) - - pull_request = merged.dig("data", "repository", "pullRequest") - assert_equal ["**High Severity** top-level"], pull_request.dig("comments", "nodes").map { |node| node.fetch("body") } - assert_equal ["**P1 Badge** review"], pull_request.dig("reviews", "nodes").map { |node| node.fetch("body") } - assert_equal ["T1"], pull_request.dig("reviewThreads", "nodes").map { |node| node.fetch("id") } - end - - def test_fetch_connection_tail_uses_connection_specific_cursor - calls = [] - original = EvalOpsReviewThreadGuard.method(:fetch_graphql) - tail_payload = connection_payload("comments", [{ "body" => "**High Severity** later page" }], has_next: false) - EvalOpsReviewThreadGuard.define_singleton_method(:fetch_graphql) do |**kwargs| - calls << kwargs - tail_payload - end - - nodes = EvalOpsReviewThreadGuard.fetch_connection_tail( - owner: "evalops", - name: "example", - pr: 1, - query: "query", - connection_name: "comments", - first_connection: { - "nodes" => [{ "body" => "first page" }], - "pageInfo" => { "hasNextPage" => true, "endCursor" => "cursor-1" } - } - ) - - assert_equal [{ "body" => "**High Severity** later page" }], nodes - assert_equal ["cursor-1"], calls.map { |call| call.fetch(:cursor) } - ensure - EvalOpsReviewThreadGuard.define_singleton_method(:fetch_graphql) do |**kwargs| - original.call(**kwargs) - end - end - - private - - def payload_with(comments: [], reviews: [], threads: [], head_ref_oid: nil) - { - "data" => { - "repository" => { - "pullRequest" => { - "headRefOid" => head_ref_oid, - "comments" => { "nodes" => comments }, - "reviews" => { "nodes" => reviews }, - "reviewThreads" => { "nodes" => threads } - } - } - } - } - end - - def thread(id, resolved:, outdated:, body:, comments: nil) - { - "id" => id, - "isResolved" => resolved, - "isOutdated" => outdated, - "path" => "app/main.go", - "line" => 42, - "comments" => { - "nodes" => comments || [comment(body)] - } - } - end - - def comment(body, url: "https://github.com/evalops/example/pull/1#discussion") - { - "body" => body, - "url" => url - } - end - - def connection_payload(name, nodes, has_next:) - { - "data" => { - "repository" => { - "pullRequest" => { - name => { - "nodes" => nodes, - "pageInfo" => { - "hasNextPage" => has_next, - "endCursor" => has_next ? "next-cursor" : nil - } - } - } - } - } - } - end - - def random_payload(random, depth:) - return random_leaf(random) if depth > 4 - - case random.rand(5) - when 0 - nil - when 1 - random_leaf(random) - else - keys = %w[data repository pullRequest reviewThreads nodes pageInfo comments reviews body] - random.rand(0..4).times.each_with_object({}) do |_index, hash| - hash[keys.sample(random: random)] = random_payload(random, depth: depth + 1) - end - end - end - - def random_leaf(random) - [nil, true, false, random.rand(100), "value", []].sample(random: random) - end -end diff --git a/test/classify_agent_authorship_test.rb b/test/classify_agent_authorship_test.rb deleted file mode 100644 index eec8a3d..0000000 --- a/test/classify_agent_authorship_test.rb +++ /dev/null @@ -1,83 +0,0 @@ -# frozen_string_literal: true - -require "json" -require "minitest/autorun" -require "open3" -require "tempfile" - -class ClassifyAgentAuthorshipTest < Minitest::Test - ROOT = File.expand_path("..", __dir__) - SCRIPT = File.join(ROOT, ".github/scripts/classify-agent-authorship.rb") - - def test_untrailered_commits_are_agent_assisted - outputs = classify([{ "sha" => "abc", "message" => "fix: regular change" }]) - - assert_equal "agent-assisted", outputs.fetch("label") - assert_equal "1", outputs.fetch("total_commits") - assert_equal "0", outputs.fetch("agent_commits") - assert_equal "1", outputs.fetch("untrailered_commits") - assert_equal "0", outputs.fetch("incomplete_agent_commits") - end - - def test_complete_maestro_trailers_are_agent_authored - outputs = classify([{ "sha" => "abc", "message" => <<~MSG }]) - feat: ship change - - Co-Authored-By: Maestro - Maestro-Version: 2026.04.28 / gpt-5 - Maestro-Prompt-Id: prompt-123 - Maestro-Approvals-Id: approval-456 - MSG - - assert_equal "agent-authored", outputs.fetch("label") - assert_equal "1", outputs.fetch("agent_commits") - assert_equal "0", outputs.fetch("untrailered_commits") - assert_equal "0", outputs.fetch("incomplete_agent_commits") - end - - def test_mixed_authorship_and_incomplete_trailers_are_reported - outputs = classify( - [ - { "sha" => "abc", "message" => <<~MSG }, - feat: partial agent change - - Co-Authored-By: Maestro - Maestro-Version: 2026.04.28 / gpt-5 - MSG - { "sha" => "def", "message" => "docs: human follow-up" }, - ], - ) - - assert_equal "mixed-authorship", outputs.fetch("label") - assert_equal "1", outputs.fetch("agent_commits") - assert_equal "1", outputs.fetch("untrailered_commits") - assert_equal "1", outputs.fetch("incomplete_agent_commits") - end - - def test_github_output_file_gets_same_outputs - Tempfile.create("github-output") do |file| - outputs = classify( - [{ "sha" => "abc", "message" => "fix: regular change" }], - github_output: file.path, - ) - file_outputs = parse_outputs(File.read(file.path)) - - assert_equal outputs, file_outputs - end - end - - private - - def classify(commits, github_output: nil) - input = commits.map(&:to_json).join("\n") - args = ["ruby", SCRIPT] - args += ["--github-output", github_output] if github_output - stdout, stderr, status = Open3.capture3(*args, stdin_data: input) - assert status.success?, stderr - parse_outputs(stdout) - end - - def parse_outputs(text) - text.each_line(chomp: true).to_h { |line| line.split("=", 2) } - end -end diff --git a/test/evalops_codex_hook_guard_test.rb b/test/evalops_codex_hook_guard_test.rb deleted file mode 100644 index 0bc8184..0000000 --- a/test/evalops_codex_hook_guard_test.rb +++ /dev/null @@ -1,50 +0,0 @@ -# frozen_string_literal: true - -require "minitest/autorun" -require_relative "../.github/scripts/evalops-codex-hook-guard" - -class EvalOpsCodexHookGuardTest < Minitest::Test - def test_session_start_warns_inside_evalops_repo - message = EvalOpsCodexHookGuard.session_start_message( - cwd: "/Users/jonathanhaas/repos/platform", - remote_url: "git@github.com:evalops/platform.git" - ) - - assert_includes message, "EvalOps repo detected" - assert_includes message, "bounded one-shot GitHub polling" - end - - def test_pretool_git_guard_warns_for_destructive_command_in_dirty_tree - warning = EvalOpsCodexHookGuard.pretool_git_guard( - command: "git reset --hard origin/main", - status_text: " M README.md\n" - ) - - assert_includes warning, "Destructive git command" - end - - def test_pretool_git_guard_allows_clean_worktree - warning = EvalOpsCodexHookGuard.pretool_git_guard( - command: "git reset --hard origin/main", - status_text: "" - ) - - assert_nil warning - end - - def test_stop_readiness_warns_without_review_thread_evidence - warning = EvalOpsCodexHookGuard.stop_readiness_warning( - transcript: "The PR is ready to merge after tests passed." - ) - - assert_includes warning, "no recent review-thread" - end - - def test_stop_readiness_accepts_review_thread_evidence - warning = EvalOpsCodexHookGuard.stop_readiness_warning( - transcript: "Ready to merge after checking GraphQL reviewThreads and statusCheckRollup." - ) - - assert_nil warning - end -end diff --git a/test/evalops_pr_lens_review_test.rb b/test/evalops_pr_lens_review_test.rb deleted file mode 100644 index 11ae040..0000000 --- a/test/evalops_pr_lens_review_test.rb +++ /dev/null @@ -1,621 +0,0 @@ -# frozen_string_literal: true - -require "json" -require "minitest/autorun" -require "tmpdir" -require_relative "../.github/scripts/evalops-pr-lens-review" - -class EvalOpsPrLensReviewTest < Minitest::Test - def test_parse_pr_filter_requires_repo_when_multiple_repos - error = assert_raises(ArgumentError) do - EvalOpsPrLensReview.parse_pr_filter("2023", repos: %w[evalops/platform evalops/deploy]) - end - - assert_includes error.message, "require exactly one target repo" - end - - def test_parse_pr_filter_accepts_repo_number_pairs - filter = EvalOpsPrLensReview.parse_pr_filter( - "platform#2023,evalops/deploy#7", - repos: %w[evalops/platform evalops/deploy] - ) - - assert_equal [2023], filter.fetch("evalops/platform") - assert_equal [7], filter.fetch("evalops/deploy") - end - - def test_parse_pr_filter_accepts_bare_pr_number_for_single_repo_dispatch - filter = EvalOpsPrLensReview.parse_pr_filter( - "2023", - repos: %w[evalops/platform] - ) - - assert_equal [2023], filter.fetch("evalops/platform") - end - - def test_matrix_for_uses_stable_lens_contexts - prs = [ - { - "repo" => "evalops/platform", - "repo_slug" => "evalops-platform", - "number" => 2023, - "head_sha" => "abc123" - } - ] - - matrix = EvalOpsPrLensReview.matrix_for(prs, lenses: ["migration-safety"]) - - assert_equal 1, matrix.length - assert_equal "evalops-pr-lens/migration-safety", matrix.fetch(0).fetch("check_context") - assert_equal "abc123", matrix.fetch(0).fetch("head_sha") - end - - def test_matrix_for_uses_classified_lenses - prs = [ - { - "repo" => "evalops/deploy", - "repo_slug" => "evalops-deploy", - "number" => 10, - "head_sha" => "head", - "base_sha" => "base", - "base_ref" => "main", - "head_ref" => "branch", - "lenses" => %w[iam-blast-radius argo-manifest-skew] - } - ] - - matrix = EvalOpsPrLensReview.matrix_for(prs) - - assert_equal %w[iam-blast-radius argo-manifest-skew], matrix.map { |row| row.fetch("lens") } - assert_equal ["base"], matrix.map { |row| row.fetch("base_sha") }.uniq - end - - def test_lenses_for_paths_selects_targeted_review_lenses - lenses = EvalOpsPrLensReview.lenses_for_paths( - [ - ".github/workflows/release.yml", - "clusters/prod/kustomization.yaml", - "proto/platform/v1/service.proto" - ] - ) - - assert_includes lenses, "iam-blast-radius" - assert_includes lenses, "argo-manifest-skew" - assert_includes lenses, "nats-contract-drift" - assert_includes lenses, "generated-sdk-delta" - refute_includes lenses, "eval-regression-risk" - end - - def test_lenses_for_paths_skips_docs_only_prs - assert_empty EvalOpsPrLensReview.lenses_for_paths(["README.md", "docs/runbook.md"]) - end - - def test_review_context_helpers_scrub_invalid_utf8_bytes - invalid = "comment \xC3 body".b - - section = EvalOpsPrLensReview.list_section("Inline review comments", [" #{invalid} "]) - snippet = EvalOpsPrLensReview.short_text(invalid, max_bytes: 10) - - assert_includes section, "comment" - assert_predicate section, :valid_encoding? - assert_includes snippet, "...[truncated]" - assert_predicate snippet, :valid_encoding? - end - - def test_discover_open_prs_can_force_lenses_for_explicit_review_requests - pr = { - "number" => 103, - "title" => "Canary", - "html_url" => "https://github.com/evalops/.github/pull/103", - "draft" => false, - "head" => { "sha" => "head", "ref" => "evalopsbot-review-canary" }, - "base" => { "sha" => "base", "ref" => "main" } - } - api = lambda do |path, **_kwargs| - case path - when "repos/evalops/.github/pulls?state=open&per_page=100" - [pr] - when "repos/evalops/.github/pulls/103/files?per_page=100" - [{ "filename" => ".github/evalopsbot-canary/review-request.md" }] - else - flunk "unexpected gh api path #{path}" - end - end - - EvalOpsPrLensReview.stub(:gh_api_json, api) do - prs = EvalOpsPrLensReview.discover_open_prs( - repos: ["evalops/.github"], - pr_filter: { "evalops/.github" => [103] }, - force_lenses: %w[migration-safety iam-blast-radius] - ) - - assert_equal %w[migration-safety iam-blast-radius], prs.fetch(0).fetch("lenses") - end - end - - def test_normalize_lens_review_drops_invalid_findings - raw = { - "summary" => "Found one issue", - "confidence_score" => 0.9, - "findings" => [ - { - "title" => "Unsafe migration", - "body" => "The migration drops a column during a rolling deploy.", - "confidence_score" => 0.92, - "priority" => 1, - "code_location" => { - "path" => "./db/migrations/001.sql", - "line" => 12 - } - }, - { - "title" => "Missing body" - } - ] - } - - review = EvalOpsPrLensReview.normalize_lens_review( - raw, - repo: "evalops/platform", - pr: 2023, - lens: "migration-safety", - head_sha: "abc123" - ) - - assert_equal "evalops-pr-lens/migration-safety", review.fetch("check_id") - assert_equal 1, review.fetch("findings").length - assert_equal "db/migrations/001.sql", review.fetch("findings").fetch(0).dig("code_location", "path") - end - - def test_high_confidence_findings_filters_and_ranks_by_confidence - reviews = [ - { - "repo" => "evalops/platform", - "pr" => 2023, - "lens" => "migration-safety", - "head_sha" => "abc123", - "check_id" => "evalops-pr-lens/migration-safety", - "findings" => [ - finding("Lower", 0.83, 1, "a.go", 2), - finding("Higher", 0.95, 2, "b.go", 4), - finding("Low confidence", 0.7, 0, "c.go", 8) - ] - } - ] - - high = EvalOpsPrLensReview.high_confidence_findings(reviews, min_confidence: 0.82) - ranked = EvalOpsPrLensReview.dedupe_and_rank(high) - - assert_equal %w[Higher Lower], ranked.map { |finding| finding.fetch("title") } - end - - def test_dedupe_and_rank_merges_same_defect_across_nearby_lens_findings - findings = [ - finding("Workflow token can write every repo", 0.91, 1, ".github/workflows/release.yml", 22).merge( - "repo" => "evalops/deploy", - "pr" => 10, - "lens" => "iam-blast-radius", - "head_sha" => "abc123", - "check_id" => "evalops-pr-lens/iam-blast-radius" - ), - finding("Release workflow token writes every repository", 0.96, 1, ".github/workflows/release.yml", 24).merge( - "repo" => "evalops/deploy", - "pr" => 10, - "lens" => "migration-safety", - "head_sha" => "abc123", - "check_id" => "evalops-pr-lens/migration-safety" - ) - ] - - ranked = EvalOpsPrLensReview.dedupe_and_rank(findings) - - assert_equal 1, ranked.length - assert_equal "Release workflow token writes every repository", ranked.fetch(0).fetch("title") - end - - def test_post_status_also_attempts_check_run_without_breaking_status_publication - calls = [] - ok = Object.new - ok.define_singleton_method(:success?) { true } - capture = lambda do |_env, *command, stdin_data: nil| - calls << { command: command, stdin_data: stdin_data } - if command.include?("check-runs?check_name=evalops-pr-lens%2Fmeta-review&per_page=100") - [JSON.generate("check_runs" => []), "", ok] - else - ["{}", "", ok] - end - end - - Open3.stub(:capture3, capture) do - EvalOpsPrLensReview.post_status( - repo: "evalops/deploy", - sha: "abc123", - context: "evalops-pr-lens/meta-review", - state: "success", - description: "No high-confidence PR lens findings", - target_url: "https://github.com/evalops/.github/actions/runs/1" - ) - end - - assert calls.any? { |call| call.fetch(:command).include?("repos/evalops/deploy/statuses/abc123") } - check_create = calls.find { |call| call.fetch(:command).include?("repos/evalops/deploy/check-runs") } - assert check_create - body = JSON.parse(check_create.fetch(:stdin_data)) - assert_equal "evalops-pr-lens/meta-review", body.fetch("name") - assert_equal "completed", body.fetch("status") - assert_equal "success", body.fetch("conclusion") - end - - def test_github_app_jwt_uses_app_id_as_issuer - key = OpenSSL::PKey::RSA.generate(2048) - jwt = EvalOpsPrLensReview.github_app_jwt(app_id: "12345", private_key: key.to_pem, now: Time.utc(2026, 5, 20, 1, 2, 3)) - _header, payload, _signature = jwt.split(".") - decoded = JSON.parse(Base64.urlsafe_decode64(payload + ("=" * ((4 - payload.length % 4) % 4)))) - - assert_equal "12345", decoded.fetch("iss") - assert_equal Time.utc(2026, 5, 20, 1, 1, 3).to_i, decoded.fetch("iat") - end - - def test_lens_routing_config_overrides_default_review_options - Dir.mktmpdir do |dir| - config = File.join(dir, "routing.yml") - File.write( - config, - <<~YAML - defaults: - provider: anthropic - model: claude-opus-4-7 - max_diff_bytes: 180000 - lenses: - generated-sdk-delta: - model: claude-opus-4-7-generated - max_diff_bytes: 260000 - YAML - ) - - options = EvalOpsPrLensReview.effective_review_options( - lens: "generated-sdk-delta", - provider: "openai", - model: "gpt-5.2", - max_diff_bytes: 1000, - routing_config: config - ) - - assert_equal "anthropic", options.fetch(:provider) - assert_equal "claude-opus-4-7-generated", options.fetch(:model) - assert_equal 260000, options.fetch(:max_diff_bytes) - end - end - - def test_comment_body_contains_only_ranked_findings - findings = [ - finding("Unsafe IAM expansion", 0.94, 1, "infra/main.tf", 22).merge( - "repo" => "evalops/deploy", - "pr" => 10, - "lens" => "iam-blast-radius", - "head_sha" => "abc123", - "check_id" => "evalops-pr-lens/iam-blast-radius" - ) - ] - - body = EvalOpsPrLensReview.comment_body( - repo: "evalops/deploy", - pr: 10, - findings: findings, - min_confidence: 0.82, - target_url: "https://github.com/evalops/.github/actions/runs/1" - ) - - assert_includes body, EvalOpsPrLensReview::MARKER - assert_includes body, "High-confidence findings only" - assert_includes body, "`infra/main.tf:22`" - assert_includes body, "`evalops-pr-lens/iam-blast-radius`" - end - - def test_anthropic_request_omits_temperature_for_opus_4_7 - request_body = nil - fake_response = Struct.new(:body) do - def is_a?(klass) - klass == Net::HTTPSuccess || super - end - end.new(JSON.generate({ "content" => [{ "text" => "{\"findings\":[]}" }] })) - fake_http = Object.new - fake_http.define_singleton_method(:request) do |request| - request_body = JSON.parse(request.body) - fake_response - end - - http_start = ->(*_args, &block) { block.call(fake_http) } - Net::HTTP.stub(:start, http_start) do - EvalOpsPrLensReview.call_anthropic( - prompt: "Return JSON", - model: "claude-opus-4-7", - api_key: "test-key" - ) - end - - assert_equal "claude-opus-4-7", request_body.fetch("model") - refute_includes request_body.keys, "temperature" - end - - def test_gh_api_uses_input_flag_for_request_body - captured = nil - ok_status = Object.new - ok_status.define_singleton_method(:success?) { true } - capture = lambda do |env, *command, stdin_data: nil| - captured = { - env: env, - command: command, - stdin_data: stdin_data - } - ["{}", "", ok_status] - end - - Open3.stub(:capture3, capture) do - EvalOpsPrLensReview.gh_api( - "--method", - "POST", - "repos/evalops/.github/issues/1/comments", - input: JSON.generate(body: "hello"), - token: "test-token" - ) - end - - assert_equal "test-token", captured.fetch(:env).fetch("GH_TOKEN") - assert_equal( - ["gh", "api", "--method", "POST", "repos/evalops/.github/issues/1/comments", "--input", "-"], - captured.fetch(:command) - ) - assert_equal "{\"body\":\"hello\"}", captured.fetch(:stdin_data) - end - - def test_normalize_search_pull_requests_dedupes_review_requests - rows = [ - { - "repository" => { "nameWithOwner" => "evalops/deploy" }, - "number" => 3511, - "title" => "Runtime rollout aliases", - "url" => "https://github.com/evalops/deploy/pull/3511", - "isDraft" => false, - "updatedAt" => "2026-05-17T22:46:41Z" - }, - { - "repository" => { "nameWithOwner" => "evalops/deploy" }, - "number" => 3511, - "title" => "Duplicate search row", - "url" => "https://github.com/evalops/deploy/pull/3511", - "isDraft" => false - } - ] - - prs = EvalOpsPrLensReview.normalize_search_pull_requests(rows) - - assert_equal 1, prs.length - assert_equal "evalops/deploy", prs.fetch(0).fetch("repo") - assert_equal "evalops-deploy", prs.fetch(0).fetch("repo_slug") - assert_equal 3511, prs.fetch(0).fetch("number") - end - - def test_review_started_for_head_uses_meta_review_status_context - api = lambda do |*_args, **_kwargs| - { - "statuses" => [ - { "context" => "evalops-pr-lens/migration-safety" }, - { "context" => EvalOpsPrLensReview.meta_context } - ] - } - end - - EvalOpsPrLensReview.stub(:gh_api_json, api) do - assert EvalOpsPrLensReview.review_started_for_head?(repo: "evalops/deploy", head_sha: "abc123") - end - end - - def test_dispatch_requested_reviews_queues_and_marks_pending - candidate = { - "repo" => "evalops/deploy", - "repo_slug" => "evalops-deploy", - "number" => 3511, - "title" => "Runtime rollout aliases", - "url" => "https://github.com/evalops/deploy/pull/3511", - "draft" => false - } - dispatched = [] - marked = [] - - EvalOpsPrLensReview.stub(:review_requested_prs, ->(**_kwargs) { [candidate] }) do - EvalOpsPrLensReview.stub(:pr_head_sha, ->(**_kwargs) { "abc123" }) do - EvalOpsPrLensReview.stub(:review_started_for_head?, ->(**_kwargs) { false }) do - EvalOpsPrLensReview.stub(:dispatch_review_requested, ->(**kwargs) { dispatched << kwargs }) do - EvalOpsPrLensReview.stub(:mark_review_queued, ->(**kwargs) { marked << kwargs }) do - result = EvalOpsPrLensReview.dispatch_requested_reviews( - owner: "evalops", - reviewer: "EvalOpsBot", - limit: 100, - dry_run: false, - target_url: "https://github.com/evalops/.github/actions/runs/1" - ) - - assert_equal 1, result.fetch("dispatched_count") - assert_equal [{ repo: "evalops/deploy", pr: 3511, requested_reviewer: "EvalOpsBot" }], dispatched - assert_equal "abc123", marked.fetch(0).fetch(:head_sha) - end - end - end - end - end - end - - def test_build_lens_prompt_includes_review_context - pr_json = { - "title" => "Risky workflow", - "html_url" => "https://github.com/evalops/deploy/pull/1", - "draft" => false, - "base" => { - "ref" => "main", - "sha" => "base" - }, - "head" => { - "ref" => "branch", - "sha" => "head" - } - } - - prompt = EvalOpsPrLensReview.build_lens_prompt( - repo: "evalops/deploy", - pr: 1, - lens: "iam-blast-radius", - pr_json: pr_json, - file_summary: "modified\t.github/workflows/release.yml\t+10\t-2", - review_context: "Inline review comments:\n- cursor .github/workflows/release.yml:42: token now has write-all", - changed_files_text: "M\t.github/workflows/release.yml", - diff_text: "@@ workflow diff @@", - diff_truncated: false - ) - - assert_includes prompt, "Pull request context:" - assert_includes prompt, "token now has write-all" - assert_includes prompt, "Existing bot or human review comments are evidence" - end - - def test_lens_workflow_checks_out_pull_request_head_ref - workflow = File.read(File.expand_path("../.github/workflows/evalops-pr-lens-review.yml", __dir__)) - - assert_includes workflow, "Prepare target pull request head" - assert_includes workflow, "prepare-workspace" - refute_includes workflow, 'ref: refs/pull/${{ matrix.pr }}/merge' - end - - def test_prepare_workspace_writes_skipped_review_when_head_changes - Dir.mktmpdir do |dir| - output = File.join(dir, "lens-review.json") - github_output = File.join(dir, "github-output") - pr_json = { - "state" => "open", - "head" => { "sha" => "new-head" }, - "base" => { "sha" => "base", "ref" => "main" } - } - - EvalOpsPrLensReview.stub(:pr_metadata, ->(**_kwargs) { pr_json }) do - result = EvalOpsPrLensReview.prepare_workspace( - repo: "evalops/deploy", - pr: 10, - lens: "iam-blast-radius", - workspace: File.join(dir, "target"), - output: output, - github_output: github_output, - snapshot_head_sha: "old-head", - snapshot_base_sha: "base", - token: nil - ) - - review = JSON.parse(File.read(output)) - assert_equal true, result.fetch("skip") - assert_equal "skipped", review.fetch("status") - assert_equal "pull request head changed since discovery", review.fetch("skip_reason") - assert_includes File.read(github_output), "skip=true" - end - end - end - - def test_meta_review_marks_incomplete_coverage_when_expected_lens_artifact_is_missing - Dir.mktmpdir do |dir| - discovery_dir = File.join(dir, "pr-lens-discovery") - review_dir = File.join(dir, "pr-lens-evalops-deploy-10-iam-blast-radius") - FileUtils.mkdir_p(discovery_dir) - FileUtils.mkdir_p(review_dir) - File.write( - File.join(discovery_dir, "pr-lens-targets.json"), - JSON.pretty_generate( - [ - { - "repo" => "evalops/deploy", - "number" => 10, - "head_sha" => "head", - "lenses" => %w[iam-blast-radius argo-manifest-skew] - } - ] - ) - ) - File.write( - File.join(review_dir, "lens-review.json"), - JSON.pretty_generate( - { - "schema_version" => 1, - "repo" => "evalops/deploy", - "pr" => 10, - "lens" => "iam-blast-radius", - "check_id" => "evalops-pr-lens/iam-blast-radius", - "head_sha" => "head", - "findings" => [] - } - ) - ) - statuses = [] - - EvalOpsPrLensReview.stub(:run_url, "https://github.com/evalops/.github/actions/runs/1") do - EvalOpsPrLensReview.stub(:delete_marker_comments, ->(**_kwargs) {}) do - EvalOpsPrLensReview.stub(:post_status, ->(**kwargs) { statuses << kwargs }) do - result = EvalOpsPrLensReview.meta_review( - artifact_root: dir, - min_confidence: 0.82, - output: File.join(dir, "meta-review.json") - ) - - assert_equal 2, result.fetch("expected_reviews") - assert_equal 1, result.fetch("coverage").fetch(0).fetch("missing") - assert_equal "error", statuses.fetch(0).fetch(:state) - assert_includes statuses.fetch(0).fetch(:description), "coverage incomplete" - end - end - end - end - end - - def test_migration_safety_lens_covers_stateful_infra_rollouts - pr_json = { - "title" => "Buildfarm disk headroom", - "html_url" => "https://github.com/evalops/deploy/pull/2", - "draft" => false, - "base" => { - "ref" => "main", - "sha" => "base" - }, - "head" => { - "ref" => "branch", - "sha" => "head" - } - } - - prompt = EvalOpsPrLensReview.build_lens_prompt( - repo: "evalops/deploy", - pr: 2, - lens: "migration-safety", - pr_json: pr_json, - file_summary: "modified\tinfrastructure/gcp/stacks/60-bazel-remote-execution/main.tf\t+20\t-5", - review_context: "", - changed_files_text: "M\tinfrastructure/gcp/stacks/60-bazel-remote-execution/main.tf", - diff_text: "@@ terraform diff @@", - diff_truncated: false - ) - - assert_includes prompt, "stateful infrastructure migrations" - assert_includes prompt, "Terraform, startup scripts, disk/cache migrations" - assert_includes prompt, "destructive filesystem or cloud-resource cleanup" - end - - private - - def finding(title, confidence, priority, path, line) - { - "title" => title, - "body" => "Body for #{title}", - "confidence_score" => confidence, - "priority" => priority, - "code_location" => { - "path" => path, - "line" => line - } - } - end -end diff --git a/test/evalopsbot_webhook_relay_test.rb b/test/evalopsbot_webhook_relay_test.rb deleted file mode 100644 index cf737ea..0000000 --- a/test/evalopsbot_webhook_relay_test.rb +++ /dev/null @@ -1,63 +0,0 @@ -# frozen_string_literal: true - -require "json" -require "minitest/autorun" -require "openssl" -require_relative "../.github/scripts/evalopsbot-webhook-relay" - -class EvalOpsBotWebhookRelayTest < Minitest::Test - def test_dispatch_payload_for_evalopsbot_review_request - payload = EvalOpsBotWebhookRelay.dispatch_payload( - event_name: "pull_request", - reviewer: "EvalOpsBot", - delivery: "delivery-1", - body: JSON.generate( - "action" => "review_requested", - "requested_reviewer" => { "login" => "EvalOpsBot" }, - "repository" => { "full_name" => "evalops/deploy" }, - "pull_request" => { "number" => 3671 } - ) - ) - - assert_equal "evalopsbot-review-requested", payload.fetch("event_type") - assert_equal "evalops/deploy#3671", payload.dig("client_payload", "target_pr") - assert_equal "evalopsbot-webhook-relay", payload.dig("client_payload", "source") - end - - def test_dispatch_payload_skips_other_reviewers - result = EvalOpsBotWebhookRelay.dispatch_payload( - event_name: "pull_request", - reviewer: "EvalOpsBot", - body: JSON.generate( - "action" => "review_requested", - "requested_reviewer" => { "login" => "someone-else" }, - "repository" => { "full_name" => "evalops/deploy" }, - "pull_request" => { "number" => 1 } - ) - ) - - assert_equal true, result.fetch("skipped") - assert_includes result.fetch("reason"), "someone-else" - end - - def test_verify_signature_accepts_github_sha256_signature - body = JSON.generate("ok" => true) - signature = "sha256=#{OpenSSL::HMAC.hexdigest("SHA256", "secret", body)}" - - assert EvalOpsBotWebhookRelay.verify_signature!( - body: body, - signature: signature, - secret: "secret" - ) - end - - def test_verify_signature_rejects_mismatch - assert_raises RuntimeError do - EvalOpsBotWebhookRelay.verify_signature!( - body: "{}", - signature: "sha256=bad", - secret: "secret" - ) - end - end -end diff --git a/test/publish_codex_structured_review_test.rb b/test/publish_codex_structured_review_test.rb deleted file mode 100644 index 4f17bd9..0000000 --- a/test/publish_codex_structured_review_test.rb +++ /dev/null @@ -1,78 +0,0 @@ -# frozen_string_literal: true - -require "minitest/autorun" -require_relative "../.github/scripts/publish-codex-structured-review" - -class PublishCodexStructuredReviewTest < Minitest::Test - def test_normalize_path_strips_workspace_prefix - path = "/tmp/workspace/services/api/main.go" - - assert_equal( - "services/api/main.go", - CodexStructuredReview.normalize_path(path, workspace: "/tmp/workspace") - ) - end - - def test_finding_payload_builds_single_line_comment - finding = { - "title" => "Nil pointer on empty response", - "body" => "The new path dereferences response before checking nil.", - "confidence_score" => 0.87, - "priority" => 1, - "code_location" => { - "absolute_file_path" => "./internal/api/handler.go", - "line_range" => { - "start" => 42, - "end" => 42 - } - } - } - - payload = CodexStructuredReview.finding_payload(finding, commit: "abc123") - - assert_equal "abc123", payload.fetch(:commit_id) - assert_equal "internal/api/handler.go", payload.fetch(:path) - assert_equal 42, payload.fetch(:line) - assert_equal "RIGHT", payload.fetch(:side) - refute payload.key?(:start_line) - assert_includes payload.fetch(:body), "Priority: P1" - assert_includes payload.fetch(:body), "Confidence: 0.87" - end - - def test_finding_payload_orders_multiline_range - finding = { - "title" => "Range", - "body" => "Body", - "confidence_score" => 0.5, - "priority" => 2, - "code_location" => { - "absolute_file_path" => "pkg/foo.go", - "line_range" => { - "start" => 12, - "end" => 10 - } - } - } - - payload = CodexStructuredReview.finding_payload(finding, commit: "abc123") - - assert_equal 10, payload.fetch(:start_line) - assert_equal 12, payload.fetch(:line) - assert_equal "RIGHT", payload.fetch(:start_side) - end - - def test_summary_body_contains_marker_and_verdict - review = { - "overall_correctness" => "patch is incorrect", - "overall_explanation" => "One high-risk regression remains.", - "overall_confidence_score" => 0.91, - "findings" => [{ "title" => "Issue" }] - } - - body = CodexStructuredReview.summary_body(review) - - assert_includes body, CodexStructuredReview::MARKER - assert_includes body, "Verdict: patch is incorrect" - assert_includes body, "Findings: 1" - end -end diff --git a/test/sweep_recent_review_feedback_test.rb b/test/sweep_recent_review_feedback_test.rb deleted file mode 100644 index 20c2716..0000000 --- a/test/sweep_recent_review_feedback_test.rb +++ /dev/null @@ -1,958 +0,0 @@ -# frozen_string_literal: true - -require "json" -require "minitest/autorun" -require "time" -require_relative "../.github/scripts/sweep-recent-review-feedback" - -class SweepRecentReviewFeedbackTest < Minitest::Test - def test_ledger_json_emits_stable_machine_readable_feedback - generated_at = Time.utc(2026, 5, 10, 3, 0, 0) - items = [ - { - kind: "review_thread", - repo: "evalops/deploy", - pr_number: 2371, - pr_title: "test: harden A2A smoke metadata coverage", - pr_url: "https://github.com/evalops/deploy/pull/2371", - merged_at: "2026-05-10T02:50:06Z", - severity: "p1", - url: "https://github.com/evalops/deploy/pull/2371#discussion_r1", - path: "tests/preflight/test_agent_runtime_staging.py", - line: 1205, - is_outdated: false, - body: "\n**P1 Badge** correlation path can fall back to task id\n\nDetails" - }, - { - kind: "pr_review", - repo: "evalops/maestro-internal", - pr_number: 1885, - pr_title: "test: harden staged rollout coverage", - pr_url: "https://github.com/evalops/maestro-internal/pull/1885", - merged_at: "2026-05-10T02:48:17Z", - severity: "high", - url: "https://github.com/evalops/maestro-internal/pull/1885#pullrequestreview-1", - author: "reviewer", - state: "COMMENTED", - body: "**High Severity** hidden mode completion leaks" - } - ] - - ledger = EvalOpsReviewFeedbackSweep.ledger_json( - items, - owner: "evalops", - since: "2026-05-10", - min_severity: "high", - generated_at: generated_at - ) - - assert_equal "evalops.review_feedback_ledger.v1", ledger.fetch("schema_version") - assert_equal "2026-05-10T03:00:00Z", ledger.fetch("generated_at") - assert_equal "evalops", ledger.fetch("owner") - assert_equal "2026-05-10", ledger.fetch("merged_since") - assert_equal "high", ledger.fetch("min_severity") - assert_equal 2, ledger.fetch("finding_count") - - thread = ledger.fetch("findings").first - assert_equal "evalops/deploy", thread.fetch("repo") - assert_equal 2371, thread.fetch("pr_number") - assert_equal "review_thread", thread.fetch("feedback_class") - assert_equal "p1", thread.fetch("severity") - assert_equal "tests/preflight/test_agent_runtime_staging.py", thread.fetch("path") - assert_equal 1205, thread.fetch("line") - assert_equal false, thread.fetch("is_outdated") - assert_equal "**P1 Badge** correlation path can fall back to task id", thread.fetch("body_first_line") - assert_match(/\A[0-9a-f]{64}\z/, thread.fetch("body_sha256")) - - review = ledger.fetch("findings").last - assert_equal "top_level_pr_review", review.fetch("feedback_class") - assert_equal "reviewer", review.fetch("author") - assert_equal "COMMENTED", review.fetch("state") - refute review.key?("path") - - JSON.parse(JSON.pretty_generate(ledger)) - end - - def test_ledger_json_records_empty_sweeps - ledger = EvalOpsReviewFeedbackSweep.ledger_json( - [], - owner: "evalops", - since: "2026-05-10", - min_severity: "p1", - generated_at: Time.utc(2026, 5, 10, 3, 0, 0) - ) - - assert_equal 0, ledger.fetch("finding_count") - assert_equal [], ledger.fetch("findings") - end - - def test_feedback_items_progress_reports_search_and_pr_numbers - prs = [ - { - "repository" => { - "nameWithOwner" => "evalops/deploy" - }, - "number" => 2390, - "title" => "fix parser", - "url" => "https://github.com/evalops/deploy/pull/2390", - "closedAt" => "2026-05-10T05:00:00Z" - } - ] - payload = { - "repository" => { - "pullRequest" => { - "reviewThreads" => { - "nodes" => [] - }, - "comments" => { - "nodes" => [] - }, - "reviews" => { - "nodes" => [] - } - } - } - } - original_search = EvalOpsReviewFeedbackSweep.method(:search_recent_prs) - original_fetch = EvalOpsReviewThreadGuard.method(:fetch_payload) - EvalOpsReviewFeedbackSweep.define_singleton_method(:search_recent_prs) { |owner:, since:, limit:| prs } - EvalOpsReviewThreadGuard.define_singleton_method(:fetch_payload) { |repo:, pr:| payload } - - _stdout, stderr = capture_io do - assert_equal( - [], - EvalOpsReviewFeedbackSweep.feedback_items( - owner: "evalops", - since: "2026-04-10", - min_severity: "high", - pr_limit: 10, - progress: true - ) - ) - end - - assert_includes stderr, "review feedback sweep: inspecting 1 merged PRs since 2026-04-10" - assert_includes stderr, "review feedback sweep: 1/1 evalops/deploy#2390" - ensure - EvalOpsReviewFeedbackSweep.define_singleton_method(:search_recent_prs) do |owner:, since:, limit: 100| - original_search.call(owner: owner, since: since, limit: limit) - end - EvalOpsReviewThreadGuard.define_singleton_method(:fetch_payload) do |repo:, pr:| - original_fetch.call(repo: repo, pr: pr) - end - end - - def test_guardrail_backlog_ranks_recurring_feedback_classes - ledger = { - "schema_version" => "evalops.review_feedback_ledger.v1", - "owner" => "evalops", - "merged_since" => "2026-04-10", - "min_severity" => "high", - "finding_count" => 4, - "findings" => [ - { - "repo" => "evalops/platform", - "pr_number" => 1545, - "pr_title" => "proto: regenerate SDKs", - "feedback_url" => "https://github.com/evalops/platform/pull/1545#discussion_r1", - "path" => "proto/codex/v1/codex.proto", - "line" => 42, - "severity" => "p1", - "body_first_line" => "**P1 Badge** generated TypeScript SDK is stale" - }, - { - "repo" => "evalops/proto", - "pr_number" => 88, - "pr_title" => "buf: add meter event", - "feedback_url" => "https://github.com/evalops/proto/pull/88#discussion_r2", - "path" => "gen/go/meter/v1/event.pb.go", - "line" => 7, - "severity" => "high", - "body_first_line" => "**High Severity** generated Go output was not committed" - }, - { - "repo" => "evalops/deploy", - "pr_number" => 2137, - "pr_title" => "ci: tighten deploy workflow", - "feedback_url" => "https://github.com/evalops/deploy/pull/2137#discussion_r3", - "path" => ".github/workflows/deploy.yml", - "line" => 12, - "severity" => "high", - "body_first_line" => "**High Severity** workflow shell masks failed command" - }, - { - "repo" => "evalops/deploy", - "pr_number" => 2142, - "pr_title" => "test: add staging smoke", - "feedback_url" => "https://github.com/evalops/deploy/pull/2142#discussion_r4", - "path" => "tests/preflight/test_agent_runtime_staging.py", - "line" => 99, - "severity" => "medium", - "body_first_line" => "**Medium Severity** smoke evidence omits runtime metadata" - } - ] - } - - backlog = EvalOpsReviewFeedbackSweep.guardrail_backlog_json( - ledger, - generated_at: Time.utc(2026, 5, 10, 4, 30, 0) - ) - - assert_equal "evalops.review_feedback_guardrail_backlog.v1", backlog.fetch("schema_version") - assert_equal "evalops.review_feedback_ledger.v1", backlog.fetch("source_schema_version") - assert_equal "2026-05-10T04:30:00Z", backlog.fetch("generated_at") - assert_equal 4, backlog.fetch("source_finding_count") - assert_equal 3, backlog.fetch("class_count") - - first = backlog.fetch("classes").first - assert_equal "generated-contract-drift", first.fetch("key") - assert_equal 140, first.fetch("score") - assert_equal 2, first.fetch("finding_count") - assert_equal ["evalops/platform", "evalops/proto"], first.fetch("repos") - assert_equal "evalops/platform", first.fetch("sample_findings").first.fetch("repo") - assert_equal 2, first.fetch("finding_fingerprints").length - assert_match(/\A[0-9a-f]{64}\z/, first.fetch("finding_fingerprints").first) - assert_equal 1, first.fetch("repo_fingerprints").fetch("evalops/platform").length - assert_equal "evalops/proto", first.fetch("repo_sample_findings").fetch("evalops/proto").first.fetch("repo") - - markdown = EvalOpsReviewFeedbackSweep.guardrail_backlog_markdown(backlog) - assert_includes markdown, "# Review feedback guardrail backlog" - assert_includes markdown, "| 1 | `generated-contract-drift` Generated contract drift | 140 | 2 | evalops/platform, evalops/proto |" - assert_includes markdown, "" - - JSON.parse(JSON.pretty_generate(backlog)) - end - - def test_guardrail_backlog_keeps_security_secret_feedback_reachable - security_class = EvalOpsReviewFeedbackSweep.guardrail_class( - { - "repo" => "evalops/fathom", - "pr_title" => "notarization: configure credentials", - "path" => "scripts/bootstrap-notary-credentials.py", - "body_first_line" => "Credential secret can shadow the API key token", - "feedback_class" => "review_thread", - "kind" => "review_thread" - } - ) - config_class = EvalOpsReviewFeedbackSweep.guardrail_class( - { - "repo" => "evalops/deploy", - "pr_title" => "deploy: validate k8s selector YAML", - "path" => "k8s/ensemble/worker-deployment.yaml", - "body_first_line" => "Kubernetes desired-state selector is not validated", - "feedback_class" => "review_thread", - "kind" => "review_thread" - } - ) - - assert_equal "security-authz", security_class.fetch("key") - assert_equal "configuration-safety", config_class.fetch("key") - end - - def test_guardrail_backlog_classifies_parser_and_visual_capture_feedback - parser_class = EvalOpsReviewFeedbackSweep.guardrail_class( - { - "repo" => "evalops/deploy", - "pr_title" => "fix: harden Ensemble checksum guardrail parsing", - "path" => nil, - "body_first_line" => "Parse real CLI flags instead of substring matching", - "feedback_class" => "top_level_pr_comment", - "kind" => "pr_comment" - } - ) - visual_class = EvalOpsReviewFeedbackSweep.guardrail_class( - { - "repo" => "evalops/fathom", - "pr_title" => "capture: add native perception provider", - "path" => "macos/FathomCore/Sources/FathomCore/NativePerceptionProvider.swift", - "body_first_line" => "Visual sampler error prevents entire frame capture", - "feedback_class" => "review_thread", - "kind" => "review_thread" - } - ) - - assert_equal "parser-cli-contract", parser_class.fetch("key") - assert_equal "visual-capture-resilience", visual_class.fetch("key") - end - - def test_guardrail_backlog_records_empty_ledgers - backlog = EvalOpsReviewFeedbackSweep.guardrail_backlog_json( - { - "schema_version" => "evalops.review_feedback_ledger.v1", - "owner" => "evalops", - "merged_since" => "2026-04-10", - "min_severity" => "p1", - "finding_count" => 0, - "findings" => [] - }, - generated_at: Time.utc(2026, 5, 10, 4, 30, 0) - ) - - assert_equal 0, backlog.fetch("source_finding_count") - assert_equal 0, backlog.fetch("class_count") - assert_equal [], backlog.fetch("classes") - assert_includes EvalOpsReviewFeedbackSweep.guardrail_backlog_markdown(backlog), "No guardrail candidates found." - end - - def test_guardrail_issue_title_and_body_are_stable_lifecycle_artifacts - backlog = { - "schema_version" => "evalops.review_feedback_guardrail_backlog.v1", - "owner" => "evalops", - "generated_at" => "2026-05-10T05:40:00Z", - "merged_since" => "2026-04-10", - "min_severity" => "high", - "class_count" => 1, - "classes" => [ - { - "key" => "runtime-smoke-coverage", - "title" => "Runtime smoke coverage gap", - "score" => 100, - "finding_count" => 2, - "repo_count" => 1, - "repos" => ["evalops/platform"], - "recommended_guardrail" => "Add a smoke or preflight fixture that proves the runtime-visible behavior.", - "sample_findings" => [ - { - "repo" => "evalops/platform", - "pr_number" => 1676, - "feedback_url" => "https://github.com/evalops/platform/pull/1676#discussion_r1", - "path" => "internal/agentruntime/agentruntime/postgres_store.go", - "line" => 295, - "severity" => "p1", - "body_first_line" => "Roll back tx before loading idempotent receipt" - } - ] - } - ] - } - entry = backlog.fetch("classes").first - - assert_equal( - "[codex] Guardrail backlog: Runtime smoke coverage gap (runtime-smoke-coverage)", - EvalOpsReviewFeedbackSweep.guardrail_issue_title(entry) - ) - - body = EvalOpsReviewFeedbackSweep.guardrail_issue_body(backlog, entry) - assert_includes body, "" - assert_includes body, "- Class: `runtime-smoke-coverage`" - assert_includes body, "- Repos: `evalops/platform`" - assert_includes body, "Roll back tx before loading idempotent receipt" - assert_includes body, "## Finding fingerprints" - assert_match(/- `[0-9a-f]{64}`/, body) - assert_includes body, "The guardrail fails for at least one representative feedback shape listed above." - assert_includes body, "The issue is closed only after the guardrail has merged" - end - - def test_repo_guardrail_issue_title_and_body_route_feedback_to_source_repo - finding = { - "repo" => "evalops/platform", - "pr_number" => 1676, - "feedback_url" => "https://github.com/evalops/platform/pull/1676#discussion_r1", - "path" => "internal/agentruntime/agentruntime/postgres_store.go", - "line" => 295, - "severity" => "p1", - "body_first_line" => "Roll back tx before loading idempotent receipt" - } - fingerprint = EvalOpsReviewFeedbackSweep.guardrail_finding_fingerprint(finding) - backlog = { - "schema_version" => "evalops.review_feedback_guardrail_backlog.v1", - "owner" => "evalops", - "generated_at" => "2026-05-10T05:40:00Z", - "merged_since" => "2026-04-10", - "min_severity" => "high", - "class_count" => 1 - } - entry = { - "key" => "runtime-smoke-coverage", - "title" => "Runtime smoke coverage gap", - "score" => 100, - "finding_count" => 2, - "repos" => ["evalops/platform"], - "recommended_guardrail" => "Add a smoke or preflight fixture that proves the runtime-visible behavior.", - "repo_fingerprints" => { - "evalops/platform" => [fingerprint] - }, - "repo_sample_findings" => { - "evalops/platform" => [finding] - }, - "sample_findings" => [finding] - } - - assert_equal( - "[codex] Guardrail candidate: Runtime smoke coverage gap (runtime-smoke-coverage)", - EvalOpsReviewFeedbackSweep.repo_guardrail_issue_title(entry) - ) - - body = EvalOpsReviewFeedbackSweep.repo_guardrail_issue_body( - backlog, - entry, - repo: "evalops/platform", - org_issue_url: "https://github.com/evalops/.github/issues/49" - ) - assert_includes body, "" - assert_includes body, "- Repo: `evalops/platform`" - assert_includes body, "- Org tracker: https://github.com/evalops/.github/issues/49" - assert_includes body, "Representative feedback in this repo" - assert_includes body, "Roll back tx before loading idempotent receipt" - assert_includes body, "- `#{fingerprint}`" - end - - def test_guardrail_lifecycle_json_records_issue_actions - backlog = { - "schema_version" => "evalops.review_feedback_guardrail_backlog.v1", - "owner" => "evalops", - "merged_since" => "2026-04-10", - "min_severity" => "high", - "class_count" => 1, - "classes" => [] - } - lifecycle = EvalOpsReviewFeedbackSweep.guardrail_lifecycle_json( - backlog, - issue_results: [ - { - "class_key" => "runtime-smoke-coverage", - "title" => "[codex] Guardrail backlog: Runtime smoke coverage gap (runtime-smoke-coverage)", - "issue_number" => 49, - "issue_url" => "https://github.com/evalops/.github/issues/49", - "action" => "updated" - } - ], - generated_at: Time.utc(2026, 5, 10, 5, 45, 0) - ) - - assert_equal "evalops.review_feedback_guardrail_lifecycle.v1", lifecycle.fetch("schema_version") - assert_equal "evalops.review_feedback_guardrail_backlog.v1", lifecycle.fetch("source_schema_version") - assert_equal "2026-05-10T05:45:00Z", lifecycle.fetch("generated_at") - assert_equal 1, lifecycle.fetch("class_count") - assert_equal 1, lifecycle.fetch("issue_count") - assert_equal "updated", lifecycle.fetch("issues").first.fetch("action") - - JSON.parse(JSON.pretty_generate(lifecycle)) - end - - def test_weekly_guardrail_report_markdown_summarizes_candidates_and_prevented_classes - backlog = { - "schema_version" => "evalops.review_feedback_guardrail_backlog.v1", - "owner" => "evalops", - "merged_since" => "2026-04-10", - "min_severity" => "high", - "source_finding_count" => 3, - "class_count" => 2, - "classes" => [ - { - "key" => "generated-contract-drift", - "title" => "Generated contract drift", - "score" => 140, - "finding_count" => 2, - "repos" => ["evalops/platform", "evalops/proto"], - "repo_fingerprints" => { - "evalops/platform" => ["a" * 64], - "evalops/proto" => ["b" * 64] - }, - "recommended_guardrail" => "Add generated-output drift checks." - }, - { - "key" => "parser-cli-contract", - "title" => "Parser and CLI contract drift", - "score" => 80, - "finding_count" => 1, - "repos" => ["evalops/deploy"], - "repo_fingerprints" => { - "evalops/deploy" => ["c" * 64] - }, - "recommended_guardrail" => "Add parser-backed tests." - } - ] - } - lifecycle = { - "issues" => [ - { - "class_key" => "parser-cli-contract", - "issue_url" => "https://github.com/evalops/.github/issues/50", - "action" => "already_closed" - } - ] - } - ledger = { - "findings" => [ - { - "repo" => "evalops/platform", - "pr_title" => "proto: regenerate SDKs", - "path" => "proto/codex/v1/codex.proto", - "body_first_line" => "generated TypeScript SDK is stale", - "feedback_class" => "review_thread", - "kind" => "review_thread", - "merged_at" => "2026-05-09T06:00:00Z" - }, - { - "repo" => "evalops/proto", - "pr_title" => "buf: add meter event", - "path" => "gen/go/meter/v1/event.pb.go", - "body_first_line" => "generated Go output was not committed", - "feedback_class" => "review_thread", - "kind" => "review_thread", - "merged_at" => "2026-05-01T06:00:00Z" - }, - { - "repo" => "evalops/deploy", - "pr_title" => "fix: harden parser", - "path" => nil, - "body_first_line" => "Parse real CLI flags instead of substring matching", - "feedback_class" => "top_level_pr_comment", - "kind" => "pr_comment", - "merged_at" => "2026-05-08T06:00:00Z" - } - ] - } - - report = EvalOpsReviewFeedbackSweep.weekly_guardrail_report_markdown( - backlog, - lifecycle: lifecycle, - ledger: ledger, - generated_at: Time.utc(2026, 5, 10, 6, 15, 0) - ) - - assert_includes report, "# Weekly review feedback guardrail report" - assert_includes report, "" - assert_includes report, "| 1 | `generated-contract-drift` Generated contract drift | 140 | 2 | evalops/platform, evalops/proto |" - assert_includes report, "| evalops/platform | 1 |" - assert_includes report, "## Repeat-rate trend" - assert_includes report, "| `parser-cli-contract` | 1 | 0 | 1 | new |" - assert_includes report, "| `generated-contract-drift` | 1 | 1 | 0 | 0% |" - assert_includes report, "`parser-cli-contract` https://github.com/evalops/.github/issues/50" - assert_includes report, "`generated-contract-drift`: Add generated-output drift checks." - end - - def test_repeat_rate_metrics_buckets_findings_by_merged_at - ledger = { - "findings" => [ - { - "repo" => "evalops/deploy", - "pr_title" => "fix parser", - "body_first_line" => "Parse CLI flags", - "feedback_class" => "top_level_pr_comment", - "kind" => "pr_comment", - "merged_at" => "2026-05-09T00:00:00Z" - }, - { - "repo" => "evalops/deploy", - "pr_title" => "fix parser", - "body_first_line" => "Parse CLI args", - "feedback_class" => "top_level_pr_comment", - "kind" => "pr_comment", - "merged_at" => "2026-05-02T00:00:00Z" - }, - { - "repo" => "evalops/deploy", - "pr_title" => "old parser", - "body_first_line" => "Parse CLI command", - "feedback_class" => "top_level_pr_comment", - "kind" => "pr_comment", - "merged_at" => "2026-04-20T00:00:00Z" - } - ] - } - - metrics = EvalOpsReviewFeedbackSweep.repeat_rate_metrics( - ledger, - generated_at: Time.utc(2026, 5, 10, 0, 0, 0) - ) - - assert_equal 1, metrics.length - assert_equal "parser-cli-contract", metrics.first.fetch("class_key") - assert_equal 1, metrics.first.fetch("current_count") - assert_equal 1, metrics.first.fetch("previous_count") - assert_equal 0, metrics.first.fetch("delta") - assert_equal 0, metrics.first.fetch("change_percent") - end - - def test_weekly_guardrail_report_markdown_handles_empty_backlog - backlog = { - "schema_version" => "evalops.review_feedback_guardrail_backlog.v1", - "owner" => "evalops", - "merged_since" => "2026-04-10", - "min_severity" => "high", - "source_finding_count" => 0, - "class_count" => 0, - "classes" => [] - } - - report = EvalOpsReviewFeedbackSweep.weekly_guardrail_report_markdown(backlog) - - assert_includes report, "No guardrail candidates found in this window." - end - - def test_issue_number_from_url_extracts_github_issue_number - assert_equal 49, EvalOpsReviewFeedbackSweep.issue_number_from_url("https://github.com/evalops/.github/issues/49") - assert_nil EvalOpsReviewFeedbackSweep.issue_number_from_url("https://github.com/evalops/.github/pull/49") - end - - def test_guardrail_issue_key_from_title_extracts_stable_class_key - assert_equal( - "parser-cli-contract", - EvalOpsReviewFeedbackSweep.guardrail_issue_key_from_title("[codex] Guardrail backlog: Parser and CLI contract drift (parser-cli-contract)") - ) - assert_nil EvalOpsReviewFeedbackSweep.guardrail_issue_key_from_title("Parser and CLI contract drift (parser-cli-contract)") - end - - def test_upsert_guardrail_class_issue_keeps_closed_issue_closed_when_fingerprints_match - finding = { - "repo" => "evalops/deploy", - "pr_number" => 2390, - "feedback_url" => "https://github.com/evalops/deploy/pull/2390#issuecomment-1", - "path" => nil, - "line" => nil, - "severity" => "high", - "body_sha256" => Digest::SHA256.hexdigest("Parse real CLI flags instead of substring matching"), - "body_first_line" => "Parse real CLI flags instead of substring matching" - } - fingerprint = EvalOpsReviewFeedbackSweep.guardrail_finding_fingerprint(finding) - backlog = { - "generated_at" => "2026-05-10T05:40:00Z", - "merged_since" => "2026-05-09T05:40:00Z", - "min_severity" => "high", - "classes" => [] - } - entry = { - "key" => "parser-cli-contract", - "title" => "Parser and CLI contract drift", - "score" => 80, - "finding_count" => 1, - "repos" => ["evalops/deploy"], - "recommended_guardrail" => "Add parser-backed tests.", - "finding_fingerprints" => [fingerprint], - "sample_findings" => [finding] - } - issue = { - "number" => 50, - "title" => "[codex] Guardrail backlog: Parser and CLI contract drift (parser-cli-contract)", - "state" => "CLOSED", - "url" => "https://github.com/evalops/.github/issues/50", - "body" => "- `#{fingerprint}`" - } - handler = lambda do |args, _input| - if args[0, 2] == ["issue", "list"] - [JSON.generate([issue]), "", success_status] - else - flunk("unexpected gh call: #{args.inspect}") - end - end - - result = nil - calls = with_stubbed_gh(handler) do - result = EvalOpsReviewFeedbackSweep.upsert_guardrail_class_issue( - repo: "evalops/.github", - backlog: backlog, - entry: entry - ) - end - - assert_equal "already_closed", result.fetch("action") - assert_equal 50, result.fetch("issue_number") - assert_equal 1, calls.length - end - - def test_upsert_guardrail_class_issue_reopens_closed_issue_when_fingerprints_change - finding = { - "repo" => "evalops/deploy", - "pr_number" => 2391, - "feedback_url" => "https://github.com/evalops/deploy/pull/2391#issuecomment-1", - "path" => "scripts/check.py", - "line" => 17, - "severity" => "high", - "body_first_line" => "Parse command flags with the parser" - } - backlog = { - "generated_at" => "2026-05-10T05:40:00Z", - "merged_since" => "2026-05-09T05:40:00Z", - "min_severity" => "high", - "classes" => [] - } - entry = { - "key" => "parser-cli-contract", - "title" => "Parser and CLI contract drift", - "score" => 80, - "finding_count" => 1, - "repos" => ["evalops/deploy"], - "recommended_guardrail" => "Add parser-backed tests.", - "sample_findings" => [finding] - } - issue = { - "number" => 50, - "title" => "[codex] Guardrail backlog: Parser and CLI contract drift (parser-cli-contract)", - "state" => "CLOSED", - "url" => "https://github.com/evalops/.github/issues/50", - "body" => "- `#{Digest::SHA256.hexdigest("old")}`" - } - handler = lambda do |args, input| - if args[0, 2] == ["issue", "list"] - [JSON.generate([issue]), "", success_status] - elsif args[0, 3] == ["issue", "reopen", "50"] - ["", "", success_status] - elsif args[0, 3] == ["issue", "edit", "50"] - assert_includes input, "## Finding fingerprints" - ["", "", success_status] - else - flunk("unexpected gh call: #{args.inspect}") - end - end - - result = nil - calls = with_stubbed_gh(handler) do - result = EvalOpsReviewFeedbackSweep.upsert_guardrail_class_issue( - repo: "evalops/.github", - backlog: backlog, - entry: entry - ) - end - - assert_equal "reopened", result.fetch("action") - assert_equal ["issue", "reopen", "50"], calls[1].first(3) - assert_equal ["issue", "edit", "50"], calls[2].first(3) - end - - def test_upsert_repo_guardrail_issue_keeps_closed_issue_closed_when_fingerprints_match - finding = { - "repo" => "evalops/deploy", - "pr_number" => 2390, - "feedback_url" => "https://github.com/evalops/deploy/pull/2390#issuecomment-1", - "path" => nil, - "line" => nil, - "severity" => "high", - "body_first_line" => "Parse real CLI flags instead of substring matching" - } - fingerprint = EvalOpsReviewFeedbackSweep.guardrail_finding_fingerprint(finding) - backlog = { - "generated_at" => "2026-05-10T05:40:00Z", - "merged_since" => "2026-05-09T05:40:00Z", - "min_severity" => "high" - } - entry = { - "key" => "parser-cli-contract", - "title" => "Parser and CLI contract drift", - "score" => 80, - "finding_count" => 1, - "repos" => ["evalops/deploy"], - "recommended_guardrail" => "Add parser-backed tests.", - "repo_fingerprints" => { - "evalops/deploy" => [fingerprint] - }, - "repo_sample_findings" => { - "evalops/deploy" => [finding] - }, - "sample_findings" => [finding] - } - issue = { - "number" => 2400, - "title" => "[codex] Guardrail candidate: Parser and CLI contract drift (parser-cli-contract)", - "state" => "CLOSED", - "url" => "https://github.com/evalops/deploy/issues/2400", - "body" => "- `#{fingerprint}`" - } - handler = lambda do |args, _input| - if args[0, 2] == ["issue", "list"] - assert_equal "evalops/deploy", args[args.index("--repo") + 1] - [JSON.generate([issue]), "", success_status] - else - flunk("unexpected gh call: #{args.inspect}") - end - end - - result = nil - calls = with_stubbed_gh(handler) do - result = EvalOpsReviewFeedbackSweep.upsert_repo_guardrail_issue( - repo: "evalops/deploy", - backlog: backlog, - entry: entry, - org_issue_url: "https://github.com/evalops/.github/issues/50" - ) - end - - assert_equal "repo", result.fetch("scope") - assert_equal "evalops/deploy", result.fetch("repo") - assert_equal "already_closed", result.fetch("action") - assert_equal 1, calls.length - end - - def test_upsert_repo_guardrail_issues_creates_one_issue_per_source_repo - platform_finding = { - "repo" => "evalops/platform", - "pr_number" => 1545, - "feedback_url" => "https://github.com/evalops/platform/pull/1545#discussion_r1", - "path" => "proto/codex/v1/codex.proto", - "line" => 42, - "severity" => "p1", - "body_first_line" => "generated TypeScript SDK is stale" - } - proto_finding = { - "repo" => "evalops/proto", - "pr_number" => 88, - "feedback_url" => "https://github.com/evalops/proto/pull/88#discussion_r2", - "path" => "gen/go/meter/v1/event.pb.go", - "line" => 7, - "severity" => "high", - "body_first_line" => "generated Go output was not committed" - } - backlog = { - "generated_at" => "2026-05-10T05:40:00Z", - "merged_since" => "2026-04-10", - "min_severity" => "high", - "classes" => [ - { - "key" => "generated-contract-drift", - "title" => "Generated contract drift", - "score" => 140, - "finding_count" => 2, - "repos" => ["evalops/platform", "evalops/proto"], - "recommended_guardrail" => "Add generated-output drift checks.", - "repo_sample_findings" => { - "evalops/platform" => [platform_finding], - "evalops/proto" => [proto_finding] - }, - "sample_findings" => [platform_finding, proto_finding] - } - ] - } - created_urls = { - "evalops/platform" => "https://github.com/evalops/platform/issues/1", - "evalops/proto" => "https://github.com/evalops/proto/issues/2" - } - handler = lambda do |args, input| - if args[0, 2] == ["issue", "list"] - [JSON.generate([]), "", success_status] - elsif args[0, 2] == ["issue", "create"] - repo = args[args.index("--repo") + 1] - assert_includes input, "https://github.com/evalops/.github/issues/77" - [created_urls.fetch(repo), "", success_status] - else - flunk("unexpected gh call: #{args.inspect}") - end - end - - results = nil - with_stubbed_gh(handler) do - results = EvalOpsReviewFeedbackSweep.upsert_repo_guardrail_issues( - backlog: backlog, - org_issue_results: [ - { - "class_key" => "generated-contract-drift", - "issue_url" => "https://github.com/evalops/.github/issues/77" - } - ] - ) - end - - assert_equal ["evalops/platform", "evalops/proto"], results.map { |result| result.fetch("repo") } - assert_equal ["created", "created"], results.map { |result| result.fetch("action") } - end - - def test_upsert_repo_guardrail_issues_skips_classes_already_closed_at_org_level - backlog = { - "classes" => [ - { - "key" => "parser-cli-contract", - "title" => "Parser and CLI contract drift", - "repos" => ["evalops/deploy"], - "sample_findings" => [] - } - ] - } - - results = nil - with_stubbed_gh(lambda { |args, _input| flunk("unexpected gh call: #{args.inspect}") }) do - results = EvalOpsReviewFeedbackSweep.upsert_repo_guardrail_issues( - backlog: backlog, - org_issue_results: [ - { - "class_key" => "parser-cli-contract", - "issue_url" => "https://github.com/evalops/.github/issues/50", - "action" => "already_closed" - } - ] - ) - end - - assert_equal [], results - end - - def test_close_stale_guardrail_class_issues_closes_only_missing_classes - backlog = { - "classes" => [ - { - "key" => "parser-cli-contract" - } - ] - } - list_payload = [ - { - "number" => 48, - "title" => "[codex] Guardrail backlog: Other feedback (other-feedback)", - "url" => "https://github.com/evalops/.github/issues/48" - }, - { - "number" => 49, - "title" => "[codex] Guardrail backlog: Parser and CLI contract drift (parser-cli-contract)", - "url" => "https://github.com/evalops/.github/issues/49" - } - ] - - handler = lambda do |args, _input| - if args[0, 2] == ["issue", "list"] - [JSON.generate(list_payload), "", success_status] - elsif args[0, 3] == ["issue", "close", "48"] - ["", "", success_status] - else - flunk("unexpected gh call: #{args.inspect}") - end - end - results = nil - - calls = with_stubbed_gh(handler) do - results = EvalOpsReviewFeedbackSweep.close_stale_guardrail_class_issues(repo: "evalops/.github", backlog: backlog) - end - - assert_equal 1, results.length - assert_equal "other-feedback", results.first.fetch("class_key") - assert_equal "closed_stale", results.first.fetch("action") - assert_equal ["issue", "close", "48"], calls.last.first(3) - end - - def test_body_first_line_skips_codex_review_boilerplate - body = <<~BODY - - ### Codex Review - - https://github.com/evalops/platform/blob/abc/internal/agentruntime/store.go#L10-L12 - **![P1 Badge](https://img.shields.io/badge/P1-orange?style=flat) Roll back tx before loading idempotent receipt** - - Details. - BODY - - assert_equal "Roll back tx before loading idempotent receipt", EvalOpsReviewFeedbackSweep.body_first_line(body) - end - - private - - def success_status - Object.new.tap do |status| - def status.success? - true - end - end - end - - def with_stubbed_gh(handler) - original = EvalOpsReviewFeedbackSweep.method(:gh) - calls = [] - EvalOpsReviewFeedbackSweep.define_singleton_method(:gh) do |*args, input: nil| - calls << args - handler.call(args, input) - end - yield calls - calls - ensure - EvalOpsReviewFeedbackSweep.define_singleton_method(:gh) do |*args, input: nil| - original.call(*args, input: input) - end - end -end diff --git a/test/sync_agent_mcp_config_test.rb b/test/sync_agent_mcp_config_test.rb deleted file mode 100644 index f3f79ea..0000000 --- a/test/sync_agent_mcp_config_test.rb +++ /dev/null @@ -1,71 +0,0 @@ -# frozen_string_literal: true - -require "fileutils" -require "minitest/autorun" -require "tmpdir" -require_relative "../.github/scripts/sync-agent-mcp-config" - -class SyncAgentMcpConfigTest < Minitest::Test - TEMPLATE_DIR = File.expand_path("../.github/agent-mcp/templates", __dir__) - - def test_plan_creates_expected_agent_mcp_files - Dir.mktmpdir do |workspace| - plan = EvalOpsAgentMcpConfig.plan(workspace: workspace, template_dir: TEMPLATE_DIR) - - assert_equal( - [".codex/config.toml", ".cursor/mcp.json", ".gitignore", ".mcp.json", "AGENTS.md"], - plan.map { |file| file.fetch("path") }.sort - ) - assert plan.all? { |file| file.fetch("status") == "create" } - end - end - - def test_write_is_idempotent_and_preserves_existing_agents_text - Dir.mktmpdir do |workspace| - File.write(File.join(workspace, "AGENTS.md"), "# Repo Rails\n\nKeep tests green.\n") - - EvalOpsAgentMcpConfig.write_files(workspace: workspace, template_dir: TEMPLATE_DIR) - first_agents = File.read(File.join(workspace, "AGENTS.md")) - EvalOpsAgentMcpConfig.write_files(workspace: workspace, template_dir: TEMPLATE_DIR) - second_agents = File.read(File.join(workspace, "AGENTS.md")) - - assert_equal first_agents, second_agents - assert_includes first_agents, "# Repo Rails" - assert_includes first_agents, "## EvalOps Integration" - assert_equal "in_sync", EvalOpsAgentMcpConfig.plan(workspace: workspace, template_dir: TEMPLATE_DIR).first.fetch("status") - end - end - - def test_gitignore_fragment_adds_env_without_duplicates - existing = "*.log\n.env\n" - fragment = ".env\n.env.local\n" - - merged = EvalOpsAgentMcpConfig.merge_gitignore(existing, fragment) - - assert_equal "*.log\n.env\n\n.env.local\n", merged - end - - def test_markdown_report_lists_file_actions - report = { - "generated_at" => "2026-05-15T12:00:00Z", - "write" => false, - "totals" => { - "create" => 1, - "update" => 0, - "in_sync" => 4 - }, - "files" => [ - { - "path" => ".mcp.json", - "status" => "create", - "bytes" => 120 - } - ] - } - - markdown = EvalOpsAgentMcpConfig.markdown_report(report) - - assert_includes markdown, "EvalOps Agent MCP Config Report" - assert_includes markdown, "`.mcp.json`" - end -end diff --git a/test/sync_labels_test.rb b/test/sync_labels_test.rb deleted file mode 100644 index f7107c4..0000000 --- a/test/sync_labels_test.rb +++ /dev/null @@ -1,98 +0,0 @@ -# frozen_string_literal: true - -require "minitest/autorun" -require_relative "../.github/scripts/sync-labels" - -class SyncLabelsTest < Minitest::Test - def test_labels_yml_is_valid - config = EvalOpsLabelSync.load_config("labels.yml") - assert_equal [], EvalOpsLabelSync.validation_errors(config) - assert_equal "evalops.labels.v1", config.fetch("schema_version") - assert config.fetch("labels").any? { |label| label.fetch("name") == "architecture-review" } - end - - def test_plan_repo_is_additive_and_updates_matching_labels - desired = [ - { - "name" => "architecture-review", - "description" => "Cross-service architecture review requested", - "color" => "5319e7" - }, - { - "name" => "security", - "description" => "Security vulnerabilities and hardening", - "color" => "d73a4a" - } - ] - existing = [ - { - "name" => "architecture-review", - "description" => "Old description", - "color" => "000000" - }, - { - "name" => "repo-local", - "description" => "Do not delete me", - "color" => "cccccc" - } - ] - - plan = EvalOpsLabelSync.plan_repo( - repo: "evalops/example", - desired_labels: desired, - existing_labels: existing - ) - - assert_equal "planned", plan.fetch("status") - assert_equal ["security"], plan.fetch("additions").map { |label| label.fetch("name") } - assert_equal ["architecture-review"], plan.fetch("updates").map { |label| label.fetch("name") } - refute_includes plan.to_s, "repo-local" - end - - def test_opted_out_repo_is_skipped - plan = EvalOpsLabelSync.plan_repo( - repo: "evalops/example", - desired_labels: [{ "name" => "security", "description" => "Security", "color" => "d73a4a" }], - existing_labels: [], - opted_out: true - ) - - assert_equal "skipped", plan.fetch("status") - assert_equal ["opted out"], plan.fetch("skips") - end - - def test_label_names_are_escaped_as_path_components - assert_equal "autorelease%3A%20pending", EvalOpsLabelSync.path_component_escape("autorelease: pending") - assert_equal "area%2Fplatform", EvalOpsLabelSync.path_component_escape("area/platform") - end - - def test_markdown_report_summarizes_repo_diffs - report = { - "generated_at" => "2026-05-15T12:00:00Z", - "dry_run" => true, - "label_count" => 2, - "target_count" => 1, - "totals" => { - "additions" => 1, - "updates" => 1, - "errors" => 0 - }, - "repos" => [ - { - "repo" => "evalops/example", - "status" => "planned", - "additions" => [{}], - "updates" => [{}], - "skips" => [], - "errors" => [] - } - ] - } - - markdown = EvalOpsLabelSync.markdown_report(report) - - assert_includes markdown, "EvalOps Label Sync Report" - assert_includes markdown, "`evalops/example`" - assert_includes markdown, "| `evalops/example` | planned | 1 | 1 | |" - end -end diff --git a/test/validate_services_catalog_test.rb b/test/validate_services_catalog_test.rb deleted file mode 100644 index 03f0d0a..0000000 --- a/test/validate_services_catalog_test.rb +++ /dev/null @@ -1,107 +0,0 @@ -# frozen_string_literal: true - -require "minitest/autorun" -require "open3" -require "tempfile" -require "yaml" - -class ValidateServicesCatalogTest < Minitest::Test - ROOT = File.expand_path("..", __dir__) - SCRIPT = File.join(ROOT, ".github/scripts/validate-services-catalog.rb") - - def test_current_catalog_is_valid - stdout, stderr, status = Open3.capture3("ruby", SCRIPT, File.join(ROOT, "services.yaml")) - - assert status.success?, stderr - assert_match(/ok .*services\.yaml \(71 services\)/, stdout) - end - - def test_unknown_dependency_fails - catalog = minimal_catalog - catalog["services"]["identity"]["depends_on"] = ["missing-service"] - - stdout, stderr, status = run_validator(catalog) - - refute status.success?, stdout - assert_match(/identity: depends_on references unknown service "missing-service"/, stderr) - end - - def test_duplicate_repo_fails - catalog = minimal_catalog - catalog["services"]["proto"]["repo"] = "evalops/identity" - - stdout, stderr, status = run_validator(catalog) - - refute status.success?, stdout - assert_match(/proto: repo duplicates identity/, stderr) - end - - def test_invalid_enum_values_fail - catalog = minimal_catalog - catalog["services"]["identity"]["tier"] = "urgent" - catalog["services"]["identity"]["runtime"] = "laptop" - - stdout, stderr, status = run_validator(catalog) - - refute status.success?, stdout - assert_match(/identity: tier must be one of critical, standard, experimental/, stderr) - assert_match(/identity: runtime must be one of gke, none, standalone/, stderr) - end - - def test_proto_consumer_is_validated_even_when_depends_on_type_is_invalid - catalog = minimal_catalog - catalog["services"]["identity"]["depends_on"] = "proto" - catalog["services"]["identity"]["proto_consumer"] = "yes" - - stdout, stderr, status = run_validator(catalog) - - refute status.success?, stdout - assert_match(/identity: depends_on must be a list when present/, stderr) - assert_match(/identity: proto_consumer must be true or false when present/, stderr) - end - - def test_proto_consumers_must_depend_on_proto - catalog = minimal_catalog - catalog["services"]["identity"]["depends_on"] = [] - - stdout, stderr, status = run_validator(catalog) - - refute status.success?, stdout - assert_match(/identity: proto_consumer services must include proto in depends_on/, stderr) - end - - private - - def run_validator(catalog) - Tempfile.create(["services", ".yaml"]) do |file| - file.write(YAML.dump(catalog)) - file.flush - return Open3.capture3("ruby", SCRIPT, file.path) - end - end - - def minimal_catalog - { - "services" => { - "identity" => { - "description" => "Identity service", - "team" => "platform-team", - "language" => "go", - "tier" => "critical", - "runtime" => "gke", - "depends_on" => ["proto"], - "proto_consumer" => true, - "repo" => "evalops/identity", - }, - "proto" => { - "description" => "Shared protobuf contracts", - "team" => "api-team", - "language" => "typescript", - "tier" => "critical", - "runtime" => "none", - "repo" => "evalops/proto", - }, - }, - } - end -end diff --git a/test/verify_evalopsbot_review_setup_test.rb b/test/verify_evalopsbot_review_setup_test.rb deleted file mode 100644 index 85597f4..0000000 --- a/test/verify_evalopsbot_review_setup_test.rb +++ /dev/null @@ -1,42 +0,0 @@ -# frozen_string_literal: true - -require "minitest/autorun" -require_relative "../.github/scripts/verify-evalopsbot-review-setup" - -class VerifyEvalOpsBotReviewSetupTest < Minitest::Test - def test_contract_passes_offline - contract = EvalOpsBotReviewSetup.load_contract(".github/evalopsbot-review-targets.yml") - report = EvalOpsBotReviewSetup.verify(contract, live: false, generated_at: Time.utc(2026, 5, 20, 12, 0, 0)) - - assert_equal "pass", report.fetch("status") - assert_equal "EvalOpsBot", report.fetch("reviewer") - assert_equal 8, report.fetch("target_repository_count") - assert_includes report.fetch("central_workflows"), ".github/workflows/evalopsbot-review-canary.yml" - end - - def test_contract_requires_evalopsbot_reviewer - contract = EvalOpsBotReviewSetup.load_contract(".github/evalopsbot-review-targets.yml") - contract["reviewer"] = "someone-else" - - report = EvalOpsBotReviewSetup.verify(contract, live: false) - - assert_equal "fail", report.fetch("status") - assert_includes report.fetch("errors"), "reviewer must be EvalOpsBot" - end - - def test_markdown_report_surfaces_errors - report = { - "status" => "fail", - "reviewer" => "EvalOpsBot", - "central_repo" => "evalops/.github", - "target_repository_count" => 8, - "errors" => ["evalops/deploy missing"], - "warnings" => [] - } - - markdown = EvalOpsBotReviewSetup.markdown_report(report) - - assert_includes markdown, "Status: `fail`" - assert_includes markdown, "evalops/deploy missing" - end -end diff --git a/test/verify_org_control_plane_contract_test.rb b/test/verify_org_control_plane_contract_test.rb deleted file mode 100644 index 1220d3b..0000000 --- a/test/verify_org_control_plane_contract_test.rb +++ /dev/null @@ -1,96 +0,0 @@ -# frozen_string_literal: true - -require "fileutils" -require "minitest/autorun" -require "tmpdir" -require "yaml" -require_relative "../.github/scripts/verify-org-control-plane-contract" - -class VerifyOrgControlPlaneContractTest < Minitest::Test - def test_repo_contract_passes_and_emits_evidence - contract = EvalOpsOrgControlPlaneContract.load_contract(".github/contracts/org-control-plane.yml") - report = EvalOpsOrgControlPlaneContract.verify( - contract, - root: Dir.pwd, - generated_at: Time.utc(2026, 5, 15, 12, 0, 0) - ) - - assert_equal "pass", report.fetch("status") - assert_equal "evalops.github.org-defaults", report.fetch("contract_id") - assert_operator report.dig("metrics", "requirements_checked"), :>=, 4 - assert_equal 1, report.dig("metrics", "github_security_configuration") - assert_operator report.dig("metrics", "adversarial_fixtures"), :>=, 3 - assert report.fetch("evidence").all? { |item| item.fetch("sha256").match?(/\A[0-9a-f]{64}\z/) } - - markdown = EvalOpsOrgControlPlaneContract.markdown_report(report) - assert_includes markdown, "Org Control Plane Contract Report" - assert_includes markdown, "Status: `pass`" - end - - def test_missing_source_record_fails_closed - Dir.mktmpdir do |root| - write_minimal_repo(root) - contract = EvalOpsOrgControlPlaneContract.load_contract(".github/contracts/org-control-plane.yml") - contract["provenance"]["source_records"].first["path"] = "missing.md" - - report = EvalOpsOrgControlPlaneContract.verify(contract, root: root) - - assert_equal "fail", report.fetch("status") - assert_includes report.fetch("errors"), "missing.md does not exist" - end - end - - def test_adversarial_fixture_must_fail_closed_or_degrade_safely - Dir.mktmpdir do |root| - write_minimal_repo(root) - contract = EvalOpsOrgControlPlaneContract.load_contract(".github/contracts/org-control-plane.yml") - contract["adversarial_fixtures"].first["expected_outcome"] = "pass" - - report = EvalOpsOrgControlPlaneContract.verify(contract, root: root) - - assert_equal "fail", report.fetch("status") - assert report.fetch("errors").any? { |error| error.include?("adversarial expected_outcome") } - end - end - - def test_codeql_org_defaults_must_stay_disabled - contract = EvalOpsOrgControlPlaneContract.load_contract(".github/contracts/org-control-plane.yml") - contract["github_security_configuration"]["required_settings"]["code_scanning_default_setup"] = "enabled" - - report = EvalOpsOrgControlPlaneContract.verify(contract, root: Dir.pwd) - - assert_equal "fail", report.fetch("status") - assert_includes( - report.fetch("errors"), - "github_security_configuration.required_settings.code_scanning_default_setup must be disabled" - ) - end - - private - - def write_minimal_repo(root) - paths = [ - "AGENTS.md", - "README.md", - "services.yaml", - ".github/scripts/verify-org-control-plane-contract.rb", - ".github/scripts/validate-services-catalog.rb", - ".github/scripts/sweep-recent-review-feedback.rb", - ".github/scripts/audit-engineering-practices.rb", - ".github/workflows/codex-rails-check.yml", - ".github/workflows/engineering-practices-audit.yml", - ".github/workflows/review-feedback-sentinel.yml", - "profile/ENGINEERING_PRACTICES.md", - "test/verify_org_control_plane_contract_test.rb", - "test/audit_engineering_practices_test.rb", - "test/validate_services_catalog_test.rb", - "test/sweep_recent_review_feedback_test.rb", - "test/evalops_pr_lens_review_test.rb" - ] - paths.each do |path| - absolute = File.join(root, path) - FileUtils.mkdir_p(File.dirname(absolute)) - File.write(absolute, "#{path}\n") - end - end -end diff --git a/test/workflow_pr_ref_guard_test.rb b/test/workflow_pr_ref_guard_test.rb deleted file mode 100644 index 6d66cb7..0000000 --- a/test/workflow_pr_ref_guard_test.rb +++ /dev/null @@ -1,75 +0,0 @@ -# frozen_string_literal: true - -require "minitest/autorun" -require "yaml" - -class WorkflowPrRefGuardTest < Minitest::Test - def test_review_workflows_do_not_depend_on_synthetic_pull_request_merge_refs - offenders = [] - workflow_paths.each do |path| - File.readlines(path, chomp: true).each_with_index do |line, index| - next unless line.match?(%r{refs/pull/.*/merge}) - - offenders << "#{relative_path(path)}:#{index + 1}: #{line.strip}" - end - end - - assert_empty( - offenders, - "Synthetic PR merge refs disappear for open conflicting PRs. " \ - "Review automation should check out refs/pull//head, then fetch base/head SHAs for diffs.\n" \ - "#{offenders.join("\n")}" - ) - end - - def test_upload_artifact_steps_set_retention_days - offenders = [] - workflow_paths.each do |path| - data = YAML.safe_load(File.read(path), aliases: true) || {} - jobs = data.fetch("jobs", {}) || {} - jobs.each do |job_name, job| - Array(job && job["steps"]).each_with_index do |step, index| - next unless step.is_a?(Hash) && step["uses"].to_s.include?("actions/upload-artifact") - - with = step["with"].is_a?(Hash) ? step["with"] : {} - next if with.key?("retention-days") - - offenders << "#{relative_path(path)} #{job_name} step #{index + 1}" - end - end - end - - assert_empty( - offenders, - "Every upload-artifact step must set retention-days so diagnostic artifacts do not silently keep the repo default.\n" \ - "#{offenders.join("\n")}" - ) - end - - def test_agent_authorship_label_apply_is_best_effort_on_token_denial - workflow = File.read(File.join(root, ".github", "workflows", "agent-authorship-label.yml")) - - assert_includes workflow, "Skipping authorship label apply" - assert_match(/Bad credentials\|HTTP 401\|Resource not accessible\|HTTP 403/, workflow) - assert_operator( - workflow.index("Apply authorship label"), - :<, - workflow.index("Check required Maestro trailers"), - "The required trailer gate should still run after best-effort label application.", - ) - end - - private - - def root - File.expand_path("..", __dir__) - end - - def workflow_paths - Dir.glob(File.join(root, ".github", "{workflows,workflow-templates}", "*.{yml,yaml}")).sort - end - - def relative_path(path) - path.delete_prefix("#{root}/") - end -end