From 6f3e5f33d6568f153ac6b67391e074b9b39ecd45 Mon Sep 17 00:00:00 2001 From: omit-test Date: Sat, 23 May 2026 00:10:32 +0200 Subject: [PATCH 1/6] Add guided simplification review enforcement --- .vibe/skills/specfact-code-review/SKILL.md | 7 +- openspec/CHANGE_ORDER.md | 1 + .../.openspec.yaml | 2 + .../TDD_EVIDENCE.md | 61 ++++++ .../design.md | 70 +++++++ .../proposal.md | 46 +++++ .../guided-simplification-review/spec.md | 63 ++++++ .../specs/review-finding-model/spec.md | 18 ++ .../specs/review-run-command/spec.md | 25 +++ .../tasks.md | 52 +++++ .../specfact-code-review/module-package.yaml | 5 +- .../skills/specfact-code-review/SKILL.md | 12 +- .../src/specfact_code_review/rules/updater.py | 11 +- .../src/specfact_code_review/run/commands.py | 145 ++++++++++++++ .../src/specfact_code_review/run/findings.py | 153 ++++++++++++++- .../src/specfact_code_review/run/runner.py | 10 +- .../tools/ai_bloat_runner.py | 179 +++++++++++++++--- .../tools/semgrep_runner.py | 78 +++++++- packages/specfact-project/module-package.yaml | 5 +- .../resources/prompts/specfact.08-simplify.md | 46 ++++- skills/specfact-code-review/SKILL.md | 7 +- .../specfact_code_review/run/test_commands.py | 118 ++++++++++++ .../specfact_code_review/run/test_findings.py | 82 ++++++++ .../specfact_code_review/run/test_runner.py | 71 +++++++ .../tools/test_ai_bloat_runner.py | 42 +++- .../tools/test_semgrep_runner.py | 97 ++++++++++ tests/unit/test_guided_simplify_resources.py | 39 ++++ 27 files changed, 1385 insertions(+), 60 deletions(-) create mode 100644 openspec/changes/code-review-12-guided-simplification-enforcement/.openspec.yaml create mode 100644 openspec/changes/code-review-12-guided-simplification-enforcement/TDD_EVIDENCE.md create mode 100644 openspec/changes/code-review-12-guided-simplification-enforcement/design.md create mode 100644 openspec/changes/code-review-12-guided-simplification-enforcement/proposal.md create mode 100644 openspec/changes/code-review-12-guided-simplification-enforcement/specs/guided-simplification-review/spec.md create mode 100644 openspec/changes/code-review-12-guided-simplification-enforcement/specs/review-finding-model/spec.md create mode 100644 openspec/changes/code-review-12-guided-simplification-enforcement/specs/review-run-command/spec.md create mode 100644 openspec/changes/code-review-12-guided-simplification-enforcement/tasks.md create mode 100644 tests/unit/test_guided_simplify_resources.py diff --git a/.vibe/skills/specfact-code-review/SKILL.md b/.vibe/skills/specfact-code-review/SKILL.md index e4794759..61bba0c9 100644 --- a/.vibe/skills/specfact-code-review/SKILL.md +++ b/.vibe/skills/specfact-code-review/SKILL.md @@ -6,10 +6,15 @@ allowed-tools: [] # House Rules - AI Coding Context (v4) -Updated: 2026-03-30 | Module: nold-ai/specfact-code-review +Updated: 2026-05-22 | Module: nold-ai/specfact-code-review ## DO +- For simplification queues, run `specfact code review run --scope changed --focus simplify --json --out .specfact/code-review-simplify.json` +- Ask for walkthrough level when interactive: vibe coder, junior developer, senior/pro, or headless agent; auto-adjust if obvious +- Interpret `guidance_kind`: `safe_mechanical` may apply after local safety checks, `needs_tests` requires tests first, `design_judgment` needs human choice, `preserve` means keep and log `preserve_reason` +- Log each simplification action as recommended, applied, kept, skipped, failed, with evidence of improvement or preserved contract +- In headless mode, process one file at a time and emit an action table: file, line, rule, guidance_kind, recommended_action, action_status, evidence - Ask whether tests should be included before repo-wide review; default to excluding tests unless test changes are the target - Use intention-revealing names; avoid placeholder public names like data/process/handle - Keep functions under 120 LOC, shallow nesting, and <= 5 parameters (KISS) diff --git a/openspec/CHANGE_ORDER.md b/openspec/CHANGE_ORDER.md index bf2cb1bc..4291b675 100644 --- a/openspec/CHANGE_ORDER.md +++ b/openspec/CHANGE_ORDER.md @@ -102,6 +102,7 @@ The architecture pillar remains active because `architecture-02-well-architected | codebase + project-runtime | 02 | codebase-import-runtime-hardening | [#235](https://github.com/nold-ai/specfact-cli-modules/issues/235) | Parent Feature: [#234](https://github.com/nold-ai/specfact-cli-modules/issues/234); Epic: [#162](https://github.com/nold-ai/specfact-cli-modules/issues/162); no known blockers | | code-review + project | 03 | code-review-ai-bloat-detection | [#269](https://github.com/nold-ai/specfact-cli-modules/issues/269) | Parent Feature: [#175](https://github.com/nold-ai/specfact-cli-modules/issues/175); Epic: [#162](https://github.com/nold-ai/specfact-cli-modules/issues/162); no known blockers | | code-review + project | 04 | code-review-11-simplification-feedback-loop | [#276](https://github.com/nold-ai/specfact-cli-modules/issues/276) | Parent Feature: [#275](https://github.com/nold-ai/specfact-cli-modules/issues/275); Epic: [#162](https://github.com/nold-ai/specfact-cli-modules/issues/162); blocked by `code-review-ai-bloat-detection` / [#269](https://github.com/nold-ai/specfact-cli-modules/issues/269) | +| code-review + project | 05 | code-review-12-guided-simplification-enforcement | [#286](https://github.com/nold-ai/specfact-cli-modules/issues/286) | Parent Feature: [#275](https://github.com/nold-ai/specfact-cli-modules/issues/275); Epic: [#162](https://github.com/nold-ai/specfact-cli-modules/issues/162); blocked by `code-review-11-simplification-feedback-loop` / [#276](https://github.com/nold-ai/specfact-cli-modules/issues/276) | ### Documentation restructure diff --git a/openspec/changes/code-review-12-guided-simplification-enforcement/.openspec.yaml b/openspec/changes/code-review-12-guided-simplification-enforcement/.openspec.yaml new file mode 100644 index 00000000..4a1c6774 --- /dev/null +++ b/openspec/changes/code-review-12-guided-simplification-enforcement/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-22 diff --git a/openspec/changes/code-review-12-guided-simplification-enforcement/TDD_EVIDENCE.md b/openspec/changes/code-review-12-guided-simplification-enforcement/TDD_EVIDENCE.md new file mode 100644 index 00000000..44439603 --- /dev/null +++ b/openspec/changes/code-review-12-guided-simplification-enforcement/TDD_EVIDENCE.md @@ -0,0 +1,61 @@ +# TDD Evidence: code-review-12-guided-simplification-enforcement + +## Failing Before + +- `hatch run pytest tests/unit/specfact_code_review/run/test_findings.py tests/unit/specfact_code_review/tools/test_ai_bloat_runner.py tests/unit/specfact_code_review/run/test_runner.py tests/unit/test_guided_simplify_resources.py -q` + - Result: failed as expected before implementation. + - Evidence: 18 failed, 64 passed. + - Missing contract areas: guided finding fields, preserve validation, schema 1.2 summary, classifier guidance kinds, simplify enforce behavior, and prompt/skill walkthrough policy. + +## Passing After + +- `hatch run pytest tests/unit/specfact_code_review/run/test_commands.py tests/unit/specfact_code_review/run/test_findings.py tests/unit/specfact_code_review/tools/test_ai_bloat_runner.py tests/unit/specfact_code_review/run/test_runner.py tests/unit/specfact_code_review/rules/test_updater.py::test_default_skill_content_stays_within_line_budget tests/unit/specfact_code_review/rules/test_updater.py::test_load_bundled_skill_content_returns_valid_structure_when_available tests/unit/test_guided_simplify_resources.py -q` + - Result: 116 passed. +- `hatch run pytest tests/unit/specfact_code_review/run/test_commands.py tests/unit/specfact_code_review/tools/test_ai_bloat_runner.py -q` + - Result after final cleanup: 50 passed. +- `hatch run contract-test` + - Result: 742 passed, 2 warnings. +- `hatch run smart-test` + - Result: 742 passed, 2 warnings. +- `hatch run type-check` + - Result: 0 errors, 0 warnings, 0 notes. +- `hatch run lint` + - Result: 10.00/10. +- `hatch run yaml-lint` + - Result: validated 6 manifests and `registry/index.json`. +- `hatch run check-bundle-imports` + - Result: import boundary check passed. +- `hatch run validate-prompt-commands` + - Result: prompt command validation passed with no findings. +- `hatch run verify-modules-signature --payload-from-filesystem --enforce-version-bump --version-check-base origin/dev` + - Result: verified 6 module manifests. +- `hatch run specfact code review run --bug-hunt --json --out .specfact/code-review.json --scope changed` + - Result: PASS, CI exit 0, score 115, 0 findings. +- `openspec validate code-review-12-guided-simplification-enforcement --strict` + - Result: valid. + +## Local Dev-Link Validation + +- Linked live modules with: + - `hatch run link-dev-module specfact-code-review --force` + - `hatch run link-dev-module specfact-project --force` +- Verified runtime precedence through CLI output: project-scope `.specfact/modules/specfact-code-review` and `.specfact/modules/specfact-project` shadow user-scope copies. +- Current changed-scope mode checks: + - Default: PASS, schema `1.0`, score 115, 0 findings. + - Bug-hunt: PASS, schema `1.0`, score 115, 0 findings. + - Simplify shadow: PASS, schema `1.0`, score 115, 0 findings. + - Simplify enforce: PASS, schema `1.0`, score 115, 0 findings. +- Guided fixture checks: + - Simplify shadow on fixture: PASS, schema `1.2`, 3 findings, summary counts `safe_mechanical=1`, `needs_tests=1`, `preserve=1`. + - Simplify enforce on same fixture: FAIL, schema `1.2`, `ci_exit_code=1`, blocked only by the unresolved safe-mechanical recommendation. + - Simplify enforce with `--fix` on safe-mechanical fixture: rewrote `return sum(values)`, then PASS. Remaining Semgrep wrapper finding now carries `design_judgment` guidance and schema `1.2`. +- Prompt/skill dry run with subagent: + - Confirmed walkthrough levels, `guidance_kind` policy, no batch edits, and action status requirements. + - Found and fixed gaps: missing-report explanation, headless batching language, and concrete action-log shape. +- Final dev-link bug-hunt with extended targeted-test timeout: + - `SPECFACT_ALLOW_UNSIGNED=1 SPECFACT_CODE_REVIEW_TARGETED_TEST_TIMEOUT=300 hatch run specfact code review run --bug-hunt --json --out .specfact/tmp-local-dev-link-review/changed-bughunt-clean-final.json --scope changed` + - Result: PASS, CI exit 0, score 115, 0 findings. + +## Signing Note + +`hatch run sign-modules --changed-only --payload-from-filesystem --bump-version patch --base-ref origin/dev` failed locally because no private signing key was available. I reran with `--allow-unsigned`, which bumped affected module versions and refreshed filesystem checksums. Cryptographic signature restoration remains an approval-time signing step. diff --git a/openspec/changes/code-review-12-guided-simplification-enforcement/design.md b/openspec/changes/code-review-12-guided-simplification-enforcement/design.md new file mode 100644 index 00000000..71f3c1d6 --- /dev/null +++ b/openspec/changes/code-review-12-guided-simplification-enforcement/design.md @@ -0,0 +1,70 @@ +## Context + +The current simplify flow emits useful candidates but does not always tell an LLM what to do safely. The most important missing distinction is between accidental structure and meaningful structure. Examples: + +- `@require(lambda ...)` and `@ensure(lambda ...)` are contract expressions, not pass-through bloat. +- Optional parameters on abstract adapters may preserve protocol compatibility even if a concrete implementation does not use them. +- Small wrappers can be domain predicates, public compatibility boundaries, or CLI affordances. +- Long low-complexity functions may be readable orchestration rather than bloat. + +The review output must encode these distinctions deterministically so an AI IDE or headless agent can act without guessing. + +## Goals / Non-Goals + +**Goals:** + +- Add action-oriented guidance to simplify findings while keeping JSON backward-compatible. +- Make safe mechanical cleanup enforceable and optionally fixable. +- Keep judgment-heavy and preserve-worthy patterns out of automatic cleanup. +- Make `/specfact.08-simplify` interactive and adaptive to user experience level. +- Make the `specfact-code-review` skill give LLMs the same cleanup policy in IDE and CLI contexts. + +**Non-Goals:** + +- No LLM or embedding classifier inside the CLI. +- No automatic refactor for design-judgment findings. +- No breaking removal of existing simplification metadata fields. +- No claim that findings prove AI authorship. + +## Decisions + +### Decision 1: Extend finding guidance instead of adding a second artifact + +The JSON report remains the source of truth. Each simplification finding can carry: + +- `guidance_kind`: `safe_mechanical`, `needs_tests`, `design_judgment`, or `preserve`; +- `recommended_action`: `remove`, `inline`, `collapse`, `deduplicate`, `make_required`, `keep`, or `inspect`; +- `clean_code_principle`: `kiss`, `dry`, `yagni`, `contracts`, `api_stability`, or `readability`; +- `rationale`, `safety_checks`, `preserve_reason`, and `action_status`; +- optional before/after evidence and improvement metrics after an auto-applied safe fix. + +Existing fields such as `confidence`, `rewrite_hint`, `canonical_pattern`, `intent_key`, and `related_locations` remain valid. + +### Decision 2: Classify before enforcing + +`--focus simplify` should emit all relevant guidance kinds, but only `safe_mechanical` findings may become enforceable. `needs_tests`, `design_judgment`, and `preserve` remain non-blocking and non-autofix. + +### Decision 3: Preserve meaningful patterns explicitly + +False-positive-prone contexts are not merely suppressed; they should produce `preserve` findings when that helps the user understand why cleanup is not recommended. Contract lambdas, abstract/protocol adapter params, public compatibility wrappers, CLI boundary wrappers, and domain-named predicates should include a `preserve_reason`. + +### Decision 4: Prompt and skill are part of the product + +The slash prompt and skill are the interactive delivery surface for the target audience. They must ask for or infer walkthrough level and adjust wording: + +- vibe coder: teaching flow, one finding at a time; +- junior developer: principle, risk, test, proposed edit; +- senior/pro: concise grouped triage; +- headless agent: deterministic JSON-first behavior with no broad refactors. + +### Decision 5: Evidence must show outcome, not just recommendation + +When `--fix` applies a safe rewrite, the report should record what was recommended, what changed, what remains, and the improvement. For manual prompt flows, the prompt should summarize accepted, skipped, kept, and still-present findings after rerunning review. + +## Risks / Trade-offs + +- **Over-enforcement:** Limit blocking to `safe_mechanical` only. +- **Autofix risk:** Restrict `--fix` to small AST-safe rewrites with post-review evidence. +- **Prompt overwhelm:** Start with walkthrough level and group by guidance kind and intent. +- **Schema churn:** Keep all fields optional and validate legacy reports. +- **False-positive confusion:** Prefer explicit `preserve` with reason over silent disappearance where a finding would otherwise be tempting to fix. diff --git a/openspec/changes/code-review-12-guided-simplification-enforcement/proposal.md b/openspec/changes/code-review-12-guided-simplification-enforcement/proposal.md new file mode 100644 index 00000000..df9e3fe7 --- /dev/null +++ b/openspec/changes/code-review-12-guided-simplification-enforcement/proposal.md @@ -0,0 +1,46 @@ +## Why + +`code-review-11-simplification-feedback-loop` made simplification findings richer, but the output is still closer to a senior developer's radar than a junior-safe workflow. Users and LLM agents need findings that say whether a cleanup is safe to apply, needs tests, requires design judgment, or should be preserved because it encodes a contract, public API boundary, compatibility shim, or domain intent. + +This follow-up turns `specfact code review run --focus simplify` into a guided clean-code workflow for interactive IDE users and headless AI agents. It keeps meaningful contracts and extension points intact while making truly mechanical AI-bloat cleanup clear enough for non-senior developers and LLMs to interpret correctly. + +## What Changes + +- Extend simplification findings with senior-readable guidance: guidance kind, recommended action, rationale, clean-code principle, safety checks, preserve reason, action status, and before/after improvement evidence. +- Classify simplification findings into `safe_mechanical`, `needs_tests`, `design_judgment`, and `preserve` so agents do not blindly remove meaningful patterns. +- Make `--focus simplify --mode enforce` fail only on unresolved `safe_mechanical` findings. +- Make `--focus simplify --fix` apply only deterministic safe-mechanical rewrites and record what was applied, still recommended, kept, skipped, or failed. +- Upgrade `/specfact.08-simplify` and the `specfact-code-review` skill into adaptive walkthrough surfaces for vibe-coder, junior, senior/pro, and headless-agent usage. +- Keep review JSON backward-compatible and use it as the authoritative evidence artifact. + +## Capabilities + +### New Capabilities + +- `guided-simplification-review`: Senior-grade guidance and evidence for simplify-focused code review workflows. + +### Modified Capabilities + +- `review-finding-model`: Add optional action-oriented simplification guidance fields. +- `review-run-command`: Add guided enforce/fix behavior for simplify focus. +- `code-review-simplification-feedback`: Upgrade metadata from advisory hints to actionable, LLM-safe guidance. +- `house-rules-skill`: Align the installed skill with the new simplify decision policy. + +## Impact + +- **Affected bundles:** `packages/specfact-code-review` and `packages/specfact-project`. +- **Affected interfaces:** `.specfact/code-review.json` receives additive optional guidance metadata and report summary fields; existing required fields remain compatible. +- **Affected prompt resources:** `packages/specfact-project/resources/prompts/specfact.08-simplify.md`. +- **Affected skill resources:** `packages/specfact-code-review/src/specfact_code_review/resources/skills/specfact-code-review/SKILL.md`, `skills/specfact-code-review/SKILL.md`, and IDE skill copies. +- **Release impact:** module package version bumps and signature refresh are required when packaged resources or manifests change. + +## Source Tracking + + +- **Modules Epic:** [#162](https://github.com/nold-ai/specfact-cli-modules/issues/162) +- **Parent Feature:** [#275](https://github.com/nold-ai/specfact-cli-modules/issues/275) +- **GitHub Issue:** [#286](https://github.com/nold-ai/specfact-cli-modules/issues/286) +- **Repository:** nold-ai/specfact-cli-modules +- **Prior Baseline:** [#276](https://github.com/nold-ai/specfact-cli-modules/issues/276) / `code-review-11-simplification-feedback-loop` +- **Last Synced Status:** proposed +- **Sanitized:** false diff --git a/openspec/changes/code-review-12-guided-simplification-enforcement/specs/guided-simplification-review/spec.md b/openspec/changes/code-review-12-guided-simplification-enforcement/specs/guided-simplification-review/spec.md new file mode 100644 index 00000000..c2c713e8 --- /dev/null +++ b/openspec/changes/code-review-12-guided-simplification-enforcement/specs/guided-simplification-review/spec.md @@ -0,0 +1,63 @@ +## ADDED Requirements + +### Requirement: Simplification findings classify cleanup safety + +Simplify-focused review findings SHALL classify each simplification candidate into a guidance kind that tells developers and LLM agents how to act safely. + +#### Scenario: Finding describes safe mechanical cleanup + +- **WHEN** a deterministic simplification rule identifies behavior-preserving cleanup +- **THEN** the finding SHALL include `guidance_kind="safe_mechanical"` +- **AND** it SHALL include `recommended_action`, `rationale`, `clean_code_principle`, and `safety_checks` +- **AND** the recommended action SHALL be specific enough for an LLM to explain or apply without inferring intent from the free-form message + +#### Scenario: Finding preserves meaningful structure + +- **WHEN** a candidate occurs in a meaningful contract, interface, public compatibility, CLI boundary, or domain predicate context +- **THEN** the finding SHALL use `guidance_kind="preserve"` or `guidance_kind="design_judgment"` +- **AND** `preserve` findings SHALL include a `preserve_reason` +- **AND** the finding SHALL NOT be eligible for automatic cleanup + +### Requirement: Guided simplification reports summarize recommendations and outcomes + +Review reports containing guided simplification findings SHALL summarize what was recommended, applied, kept, skipped, failed, and still present. + +#### Scenario: Report contains guidance summary + +- **WHEN** a simplify-focused run emits guided simplification findings +- **THEN** the report SHALL include a `simplification_summary` +- **AND** the summary SHALL count findings by `guidance_kind` +- **AND** it SHALL count findings by `action_status` when status is present +- **AND** it SHALL include the number of blocking simplification findings under simplify enforcement + +#### Scenario: Auto-fix records improvement evidence + +- **WHEN** `--focus simplify --fix` applies a safe mechanical rewrite +- **THEN** the resulting report SHALL indicate that the finding was applied or cleared +- **AND** it SHALL record before/after references or improvement evidence sufficient for an LLM to summarize what changed + +### Requirement: Interactive simplify prompt adapts to user level + +The `/specfact.08-simplify` prompt SHALL adapt guidance depth and confirmation behavior to the user's walkthrough level. + +#### Scenario: Prompt asks for walkthrough level + +- **WHEN** the prompt starts without an explicit level argument +- **THEN** it SHALL ask whether the user wants vibe-coder, junior developer, senior/pro, or headless-agent guidance +- **AND** it SHALL explain the practical difference between those levels before proceeding + +#### Scenario: Headless mode stays conservative + +- **WHEN** the prompt or skill is used in headless-agent mode +- **THEN** it SHALL default to review-only behavior unless the user explicitly requested safe automatic application +- **AND** it SHALL apply only findings marked safe for automatic cleanup + +### Requirement: Skill carries the guided simplify decision policy + +The `specfact-code-review` skill SHALL guide LLMs to interpret simplify-focused findings consistently across IDE and CLI contexts. + +#### Scenario: Skill explains action policy + +- **WHEN** an LLM uses the `specfact-code-review` skill to act on simplify findings +- **THEN** the skill SHALL instruct it to apply `safe_mechanical`, test `needs_tests`, inspect `design_judgment`, and keep `preserve` +- **AND** it SHALL prohibit treating AI-bloat findings as proof of AI authorship diff --git a/openspec/changes/code-review-12-guided-simplification-enforcement/specs/review-finding-model/spec.md b/openspec/changes/code-review-12-guided-simplification-enforcement/specs/review-finding-model/spec.md new file mode 100644 index 00000000..fe645a7a --- /dev/null +++ b/openspec/changes/code-review-12-guided-simplification-enforcement/specs/review-finding-model/spec.md @@ -0,0 +1,18 @@ +## MODIFIED Requirements + +### Requirement: ReviewFinding schema supports additive simplification metadata + +The `ReviewFinding` model SHALL accept optional simplification metadata while preserving the existing governed finding fields and category/severity validation. The report schema version SHALL advance additively when simplification metadata or guided simplification metadata is emitted. + +#### Scenario: Simplification metadata validates on a finding + +- **WHEN** a `ReviewFinding` payload includes existing simplification metadata such as `confidence`, `rewrite_hint`, `canonical_pattern`, `intent_key`, `estimated_deletion_lines`, or `related_locations` +- **THEN** model validation SHALL accept the payload when the original required fields are valid +- **AND** `related_locations` SHALL use stable file and line references compatible with existing evidence references + +#### Scenario: Guided simplification metadata validates on a finding + +- **WHEN** a `ReviewFinding` payload includes `guidance_kind`, `recommended_action`, `clean_code_principle`, `rationale`, `safety_checks`, `preserve_reason`, `action_status`, `before_ref`, `after_ref`, or `improvement` +- **THEN** model validation SHALL accept the payload when the original required fields are valid +- **AND** a finding with `guidance_kind="preserve"` SHALL require a non-empty `preserve_reason` +- **AND** legacy finding payloads SHALL remain valid without any guided simplification fields diff --git a/openspec/changes/code-review-12-guided-simplification-enforcement/specs/review-run-command/spec.md b/openspec/changes/code-review-12-guided-simplification-enforcement/specs/review-run-command/spec.md new file mode 100644 index 00000000..de441252 --- /dev/null +++ b/openspec/changes/code-review-12-guided-simplification-enforcement/specs/review-run-command/spec.md @@ -0,0 +1,25 @@ +## MODIFIED Requirements + +### Requirement: Review run supports simplify focus + +The `specfact code review run` command SHALL accept `--focus simplify` as a targeted review focus for simplification feedback. The focus SHALL retain findings that belong in the simplification queue and SHALL classify them with actionable guidance. + +#### Scenario: Simplify focus emits guided simplification queue + +- **WHEN** `specfact code review run --focus simplify --json --out .specfact/code-review.json` completes +- **THEN** the JSON report SHALL retain simplification-focused findings +- **AND** retained findings SHOULD include guidance metadata for actionability, preservation, or design judgment +- **AND** the report SHALL include a simplification summary when guided findings are present + +#### Scenario: Simplify enforce blocks only safe mechanical debt + +- **WHEN** `specfact code review run --focus simplify --mode enforce` runs +- **THEN** the process SHALL fail only when unresolved findings with `guidance_kind="safe_mechanical"` remain +- **AND** findings classified as `needs_tests`, `design_judgment`, or `preserve` SHALL NOT make the run fail + +#### Scenario: Simplify fix applies only safe mechanical rewrites + +- **WHEN** `specfact code review run --focus simplify --fix` runs +- **THEN** automatic rewrites SHALL be limited to deterministic safe-mechanical findings +- **AND** the command SHALL rerun review after applying rewrites +- **AND** the JSON report SHALL record applied, failed, and still-recommended outcomes diff --git a/openspec/changes/code-review-12-guided-simplification-enforcement/tasks.md b/openspec/changes/code-review-12-guided-simplification-enforcement/tasks.md new file mode 100644 index 00000000..eceed778 --- /dev/null +++ b/openspec/changes/code-review-12-guided-simplification-enforcement/tasks.md @@ -0,0 +1,52 @@ +## 1. GitHub readiness and OpenSpec setup + +- [x] 1.1 Create OpenSpec change `code-review-12-guided-simplification-enforcement`. +- [x] 1.2 Create GitHub issue [#286](https://github.com/nold-ai/specfact-cli-modules/issues/286), link it under Feature [#275](https://github.com/nold-ai/specfact-cli-modules/issues/275), and label it with `enhancement`, `codebase`, `openspec`, and `change-proposal`. +- [x] 1.3 Confirm issue project assignment, open/Todo state, parent linkage, and source tracking. +- [x] 1.4 Add `openspec/CHANGE_ORDER.md` row as order 05, blocked by [#276](https://github.com/nold-ai/specfact-cli-modules/issues/276). +- [x] 1.5 Validate the OpenSpec change with `openspec validate code-review-12-guided-simplification-enforcement --strict`. + +## 2. Spec-first failing tests + +- [x] 2.1 Add model tests for guidance fields, legacy compatibility, preserve reason validation, and simplification summary serialization. +- [x] 2.2 Add classifier tests for `safe_mechanical`, `needs_tests`, `design_judgment`, and `preserve` cases. +- [x] 2.3 Add CLI tests proving `--focus simplify --mode enforce` fails only on unresolved safe-mechanical findings. +- [x] 2.4 Add CLI tests proving `--focus simplify --fix` applies only deterministic safe-mechanical rewrites and records action status/evidence. +- [x] 2.5 Add prompt contract tests for walkthrough-level selection, adaptive guidance, headless defaults, and confirmation rules. +- [x] 2.6 Add skill tests or resource checks proving the packaged and repo-local skill carry the same simplify policy. +- [x] 2.7 Record failing-before evidence in `TDD_EVIDENCE.md`. + +## 3. Review model and guidance metadata + +- [x] 3.1 Extend `ReviewFinding` with optional guided simplification fields. +- [x] 3.2 Add `ReviewReport.simplification_summary` with counts by guidance kind and action status. +- [x] 3.3 Ensure legacy reports still validate and existing scoring/blocking behavior remains unchanged outside simplify enforcement. +- [x] 3.4 Add helper predicates for safe-mechanical and auto-fix eligibility. + +## 4. Simplification classifier and preserve policy + +- [x] 4.1 Classify existing simplification rules into guidance kinds with deterministic rationale and safety checks. +- [x] 4.2 Reclassify abstract params and meaningful wrappers as `preserve` or `design_judgment`, with contract-preservation guidance documented for prompt/skill users. +- [x] 4.3 Keep long low-complexity and duplicate-shape signals out of automatic cleanup unless stronger metadata proves safe. +- [x] 4.4 Ensure terminal and JSON output make recommended action and preserve reason obvious. + +## 5. Enforce/fix workflow + +- [x] 5.1 Make `--focus simplify --mode enforce` fail only when unresolved safe-mechanical findings remain. +- [x] 5.2 Implement conservative safe-mechanical auto-fix support for deterministic rewrites only. +- [x] 5.3 Re-run review after auto-fix and record applied/failed/still-recommended outcomes. +- [x] 5.4 Preserve non-autofix behavior for `needs_tests`, `design_judgment`, and `preserve`. + +## 6. Prompt and skill interaction flow + +- [x] 6.1 Update `/specfact.08-simplify` to ask for or infer walkthrough level: vibe coder, junior developer, senior/pro, or headless agent. +- [x] 6.2 Adapt explanation depth, grouping, confirmation, and headless behavior by walkthrough level. +- [x] 6.3 Update `specfact-code-review` skill copies and packaged skill resource with the same decision policy. +- [x] 6.4 Update docs for guided simplify findings, preserve classifications, enforce/fix behavior, and evidence summaries. + +## 7. Packaging, signatures, and verification + +- [x] 7.1 Bump affected module versions when packaged resources change. +- [x] 7.2 Refresh affected module manifest integrity checksums; cryptographic signing key was unavailable locally, so approval-time signing remains required. +- [x] 7.3 Re-run targeted tests and record passing evidence in `TDD_EVIDENCE.md`. +- [x] 7.4 Run required gates for touched scope: `hatch run format`, `hatch run type-check`, `hatch run lint`, `hatch run yaml-lint`, `hatch run check-bundle-imports`, `hatch run verify-modules-signature --payload-from-filesystem --enforce-version-bump`, `hatch run contract-test`, relevant `hatch run smart-test`, relevant `hatch run test`, and `hatch run specfact code review run --bug-hunt --json --out .specfact/code-review.json --scope changed`. diff --git a/packages/specfact-code-review/module-package.yaml b/packages/specfact-code-review/module-package.yaml index c03993af..cda2823d 100644 --- a/packages/specfact-code-review/module-package.yaml +++ b/packages/specfact-code-review/module-package.yaml @@ -1,5 +1,5 @@ name: nold-ai/specfact-code-review -version: 0.47.20 +version: 0.47.21 commands: - code tier: official @@ -23,5 +23,4 @@ description: Official SpecFact code review bundle package. category: codebase bundle_group_command: code integrity: - checksum: sha256:ae75a5054d1034292e69ff7d55a02ce9f88f731644e13e73fa8d49ce576f8084 - signature: 5HjXdkL5IDLvbYN/qU7JOyGaxvEy8yLWF1J0qQQGfxs29bfHp6LR9u+Ay6wEEiqbHkgSnTYNY8Zb4Ljb6dXQBQ== + checksum: sha256:145b823d75e35c71c4d9a3bfdb60a9b1385da8bad274b8a4d2d3b040f7539d83 diff --git a/packages/specfact-code-review/src/specfact_code_review/resources/skills/specfact-code-review/SKILL.md b/packages/specfact-code-review/src/specfact_code_review/resources/skills/specfact-code-review/SKILL.md index f34f2653..7e3464be 100644 --- a/packages/specfact-code-review/src/specfact_code_review/resources/skills/specfact-code-review/SKILL.md +++ b/packages/specfact-code-review/src/specfact_code_review/resources/skills/specfact-code-review/SKILL.md @@ -6,13 +6,15 @@ allowed-tools: [] # House Rules - AI Coding Context / SpecFact Code Review Skill (v2) -Updated: 2026-05-21 | Module: nold-ai/specfact-code-review - +Updated: 2026-05-22 | Module: nold-ai/specfact-code-review ## DO - - Use this skill when asked to run, interpret, or act on SpecFact code review in Codex CLI or another AI IDE - Treat `specfact code review run --help` as authoritative; self-heal stale options by checking help before changing workflow - For simplification queues, run `specfact code review run --scope changed --focus simplify --json --out .specfact/code-review-simplify.json` +- Ask for walkthrough level when interactive: vibe coder, junior developer, senior/pro, or headless agent; auto-adjust if obvious +- Interpret `guidance_kind`: `safe_mechanical` may apply after local safety checks, `needs_tests` requires tests first, `design_judgment` needs human choice, `preserve` means keep and log `preserve_reason` +- Log each simplification action as recommended, applied, kept, skipped, failed, with evidence of improvement or preserved contract +- In headless mode, process one file at a time and emit an action table: file, line, rule, guidance_kind, recommended_action, action_status, evidence - For merge-quality review, run `specfact code review run --scope changed --bug-hunt --json --out .specfact/code-review.json` - Ask whether tests should be included before repo-wide review; default to excluding tests unless test changes are the target - Use intention-revealing names; avoid placeholder public names like data/process/handle @@ -23,9 +25,7 @@ Updated: 2026-05-21 | Module: nold-ai/specfact-code-review - Add @require/@ensure (icontract) + @beartype to all new public APIs - Run hatch run contract-test-contracts before any commit - Write the test file BEFORE the feature file (TDD-first) - ## DON'T - - Don't copy prompt templates into AI IDEs when this installed skill can carry the reusable workflow guidance - Don't treat simplification findings as AI-authorship proof or apply batch rewrites without explicit user approval - Don't enable known noisy findings unless you explicitly want strict/full review output @@ -34,7 +34,5 @@ Updated: 2026-05-21 | Module: nold-ai/specfact-code-review - Don't mix read + write in the same method or call `repository.*` and `http_client.*` together - Don't import at module level if it triggers network calls - Don't hardcode secrets; use env vars via pydantic.BaseSettings - ## TOP VIOLATIONS (auto-updated by specfact code review rules update) - diff --git a/packages/specfact-code-review/src/specfact_code_review/rules/updater.py b/packages/specfact-code-review/src/specfact_code_review/rules/updater.py index 2296a0ec..3a99a22d 100644 --- a/packages/specfact-code-review/src/specfact_code_review/rules/updater.py +++ b/packages/specfact-code-review/src/specfact_code_review/rules/updater.py @@ -34,6 +34,12 @@ "before changing workflow", "- For simplification queues, run `specfact code review run --scope changed --focus simplify --json " "--out .specfact/code-review-simplify.json`", + "- Ask for walkthrough level when interactive: vibe coder, junior developer, senior/pro, or headless agent; " + "auto-adjust if obvious", + "- Interpret `guidance_kind`: `safe_mechanical` may apply after local safety checks, `needs_tests` requires " + "tests first, `design_judgment` needs human choice, `preserve` means keep and log `preserve_reason`", + "- Log each simplification action as recommended, applied, kept, skipped, failed, with evidence of improvement " + "or preserved contract", "- For merge-quality review, run `specfact code review run --scope changed --bug-hunt --json " "--out .specfact/code-review.json`", "- Verify an active OpenSpec change covers the requested scope and follow the sequence: spec delta " @@ -159,17 +165,12 @@ def default_skill_content(*, updated_on: date | None = None) -> str: f"description: {DEFAULT_DESCRIPTION}", "allowed-tools: []", "---", - "", f"{TITLE_PREFIX} (v1)", - "", f"Updated: {stamp} | Module: {MODULE_LABEL}", - "", "## DO", *DEFAULT_DO_RULES, - "", "## DON'T", *DEFAULT_DONT_RULES, - "", TOP_VIOLATIONS_HEADER, TOP_VIOLATIONS_MARKER, ] diff --git a/packages/specfact-code-review/src/specfact_code_review/run/commands.py b/packages/specfact-code-review/src/specfact_code_review/run/commands.py index b1f028e2..a70fd861 100644 --- a/packages/specfact-code-review/src/specfact_code_review/run/commands.py +++ b/packages/specfact-code-review/src/specfact_code_review/run/commands.py @@ -2,6 +2,7 @@ from __future__ import annotations +import ast import subprocess import sys from collections import defaultdict @@ -279,6 +280,141 @@ def _apply_fixes(files: list[Path]) -> None: raise RuntimeError(f"Auto-fix command failed: {' '.join(command)}: {error_output}") +def _apply_simplification_fixes(report: ReviewReport) -> int: + """Apply deterministic safe-mechanical simplification rewrites from a report.""" + applied = 0 + for finding in report.findings: + if not finding.is_safe_mechanical_simplification() or not finding.fixable: + continue + if finding.rule == "ai-bloat.redundant-intermediate": + applied += int(_apply_redundant_intermediate_fix(finding)) + elif finding.rule == "ai-bloat.verbose-bool-return": + applied += int(_apply_verbose_bool_return_fix(finding)) + return applied + + +def _apply_redundant_intermediate_fix(finding: ReviewFinding) -> bool: + parsed = _parsed_finding_source(finding) + if parsed is None: + return False + file_path, source, tree = parsed + for function_node in _iter_functions(tree): + for index, stmt in enumerate(function_node.body[:-1]): + next_stmt = function_node.body[index + 1] + if not _matches_redundant_intermediate(stmt, next_stmt, finding.line): + continue + expression = ast.get_source_segment(source, stmt.value) + if expression is None: + return False + return _replace_line_range( + file_path, + source, + start_line=stmt.lineno, + end_line=next_stmt.end_lineno or next_stmt.lineno, + replacement=f"{_indent_for_line(source, stmt.lineno)}return {expression}", + ) + return False + + +def _apply_verbose_bool_return_fix(finding: ReviewFinding) -> bool: + parsed = _parsed_finding_source(finding) + if parsed is None: + return False + file_path, source, tree = parsed + for function_node in _iter_functions(tree): + for index, stmt in enumerate(function_node.body[:-1]): + next_stmt = function_node.body[index + 1] + expression = _verbose_bool_replacement_expression(source, stmt, next_stmt, finding.line) + if expression is None: + continue + return _replace_line_range( + file_path, + source, + start_line=stmt.lineno, + end_line=next_stmt.end_lineno or next_stmt.lineno, + replacement=f"{_indent_for_line(source, stmt.lineno)}return {expression}", + ) + return False + + +def _parsed_finding_source(finding: ReviewFinding) -> tuple[Path, str, ast.Module] | None: + file_path = Path(finding.file) + try: + source = file_path.read_text(encoding="utf-8") + tree = ast.parse(source, filename=str(file_path)) + except (OSError, SyntaxError, UnicodeDecodeError): + return None + return file_path, source, tree + + +def _matches_redundant_intermediate(stmt: ast.stmt, next_stmt: ast.stmt, line: int) -> bool: + if stmt.lineno != line or not isinstance(stmt, ast.Assign): + return False + if len(stmt.targets) != 1 or not isinstance(stmt.targets[0], ast.Name): + return False + return ( + isinstance(next_stmt, ast.Return) + and isinstance(next_stmt.value, ast.Name) + and next_stmt.value.id == stmt.targets[0].id + ) + + +def _verbose_bool_replacement_expression( + source: str, + stmt: ast.stmt, + next_stmt: ast.stmt, + line: int, +) -> str | None: + if stmt.lineno != line or not isinstance(stmt, ast.If): + return None + predicate = ast.get_source_segment(source, stmt.test) + if predicate is None or len(stmt.body) != 1 or stmt.orelse: + return None + first_value = _return_bool_constant(stmt.body[0]) + second_value = _return_bool_constant(next_stmt) + return ( + None + if first_value is None or second_value is None or first_value == second_value + else _bool_expr(predicate, first_value) + ) + + +def _bool_expr(predicate: str, first_value: bool) -> str: + return predicate if first_value else f"not ({predicate})" + + +def _iter_functions(tree: ast.AST) -> list[ast.FunctionDef | ast.AsyncFunctionDef]: + return [node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef)] + + +def _return_bool_constant(stmt: ast.stmt) -> bool | None: + if isinstance(stmt, ast.Return) and isinstance(stmt.value, ast.Constant) and isinstance(stmt.value.value, bool): + return stmt.value.value + return None + + +def _indent_for_line(source: str, line_number: int) -> str: + line = source.splitlines()[line_number - 1] + return line[: len(line) - len(line.lstrip())] + + +def _replace_line_range( + file_path: Path, + source: str, + *, + start_line: int, + end_line: int, + replacement: str, +) -> bool: + lines = source.splitlines() + if start_line < 1 or end_line < start_line or end_line > len(lines): + return False + lines[start_line - 1 : end_line] = [replacement] + trailing_newline = "\n" if source.endswith("\n") else "" + file_path.write_text("\n".join(lines) + trailing_newline, encoding="utf-8") + return True + + def _render_report(report: ReviewReport) -> None: grouped: dict[str, list[ReviewFinding]] = defaultdict(list) for finding in report.findings: @@ -376,6 +512,9 @@ def _run_review_with_status( ) report = _run_review_once(files, base) if flags.fix: + if flags.review_focus == "simplify": + status.update("Applying safe mechanical simplification fixes...") + _apply_simplification_fixes(report) status.update("Applying Ruff autofixes...") _apply_fixes(files) status.update("Re-running review after autofixes...") @@ -395,6 +534,12 @@ def _run_review_once(files: list[Path], flags: _ReviewLoopFlags) -> ReviewReport focus=flags.review_focus, ) if flags.fix: + if flags.review_focus == "simplify": + if flags.progress_callback is not None: + flags.progress_callback("Applying safe mechanical simplification fixes...") + else: + progress_console.print("[dim]Applying safe mechanical simplification fixes...[/dim]") + _apply_simplification_fixes(report) if flags.progress_callback is not None: flags.progress_callback("Applying Ruff autofixes...") else: diff --git a/packages/specfact-code-review/src/specfact_code_review/run/findings.py b/packages/specfact-code-review/src/specfact_code_review/run/findings.py index 9a312464..6b6dc770 100644 --- a/packages/specfact-code-review/src/specfact_code_review/run/findings.py +++ b/packages/specfact-code-review/src/specfact_code_review/run/findings.py @@ -27,6 +27,8 @@ "ai_bloat", ) VALID_SEVERITIES = ("error", "warning", "info") +GUIDANCE_KINDS = ("safe_mechanical", "needs_tests", "design_judgment", "preserve") +ACTION_STATUSES = ("recommended", "applied", "kept", "skipped", "failed") PASS = "PASS" PASS_WITH_ADVISORY = "PASS_WITH_ADVISORY" FAIL = "FAIL" @@ -111,14 +113,97 @@ class ReviewFinding(BaseModel): default=None, description="Optional related source locations for grouped simplification candidates.", ) + guidance_kind: Literal["safe_mechanical", "needs_tests", "design_judgment", "preserve"] | None = Field( + default=None, + description="Guided simplification action class.", + ) + recommended_action: ( + Literal[ + "remove", + "inline", + "collapse", + "deduplicate", + "make_required", + "keep", + "inspect", + ] + | None + ) = Field(default=None, description="Recommended simplification action.") + clean_code_principle: ( + Literal[ + "kiss", + "dry", + "yagni", + "contracts", + "api_stability", + "readability", + ] + | None + ) = Field(default=None, description="Primary clean-code principle behind the recommendation.") + rationale: str | None = Field(default=None, description="Why the recommendation is meaningful.") + safety_checks: list[str] | None = Field( + default=None, + description="Concrete checks an agent or developer must satisfy before applying the change.", + ) + preserve_reason: str | None = Field( + default=None, + description="Why a preserve recommendation should be kept despite apparent bloat.", + ) + action_status: Literal["recommended", "applied", "kept", "skipped", "failed"] | None = Field( + default=None, + description="Lifecycle status for recommended simplification work.", + ) + before_ref: EvidenceRef | None = Field(default=None, description="Evidence reference before an applied action.") + after_ref: EvidenceRef | None = Field(default=None, description="Evidence reference after an applied action.") + improvement: str | None = Field(default=None, description="Evidence-backed improvement summary.") - @field_validator("tool", "rule", "file", "message", "rewrite_hint", "canonical_pattern", "intent_key") + @field_validator( + "tool", + "rule", + "file", + "message", + "rewrite_hint", + "canonical_pattern", + "intent_key", + "rationale", + "preserve_reason", + "improvement", + ) @classmethod def _validate_non_empty_text(cls, value: str | None) -> str | None: if value is not None and not value.strip(): raise ValueError("value must not be empty") return value + @field_validator("safety_checks") + @classmethod + def _validate_safety_checks(cls, value: list[str] | None) -> list[str] | None: + if value is None: + return value + if not value: + raise ValueError("safety_checks must not be empty when provided") + if any(not item.strip() for item in value): + raise ValueError("safety_checks entries must not be empty") + return value + + @model_validator(mode="after") + def _validate_guided_metadata(self) -> ReviewFinding: + if self.guidance_kind is None: + return self + if self.recommended_action is None: + raise ValueError("recommended_action is required when guidance_kind is present") + if self.clean_code_principle is None: + raise ValueError("clean_code_principle is required when guidance_kind is present") + if self.rationale is None: + raise ValueError("rationale is required when guidance_kind is present") + if self.safety_checks is None: + raise ValueError("safety_checks is required when guidance_kind is present") + if self.action_status is None: + raise ValueError("action_status is required when guidance_kind is present") + if self.guidance_kind == "preserve" and self.preserve_reason is None: + raise ValueError("preserve_reason is required for preserve guidance") + return self + @beartype @ensure(lambda result: isinstance(result, bool)) def has_simplification_metadata(self) -> bool: @@ -132,9 +217,31 @@ def has_simplification_metadata(self) -> bool: self.intent_key, self.estimated_deletion_lines, self.related_locations, + self.guidance_kind, + self.recommended_action, + self.clean_code_principle, + self.rationale, + self.safety_checks, + self.preserve_reason, + self.action_status, + self.before_ref, + self.after_ref, + self.improvement, ) ) + @beartype + @ensure(lambda result: isinstance(result, bool)) + def has_guided_simplification_metadata(self) -> bool: + """Return whether this finding carries agent-action simplification metadata.""" + return self.guidance_kind is not None + + @beartype + @ensure(lambda result: isinstance(result, bool)) + def is_safe_mechanical_simplification(self) -> bool: + """Return whether the finding is an unresolved safe mechanical simplification.""" + return self.guidance_kind == "safe_mechanical" and self.action_status in {None, "recommended", "failed"} + @beartype @ensure(lambda result: isinstance(result, bool)) def simplification_metadata_is_deterministic(self) -> bool: @@ -155,6 +262,16 @@ def is_blocking(self) -> bool: return self.severity == "error" and not self.fixable +class SimplificationSummary(BaseModel): + """Aggregate evidence for guided simplification review runs.""" + + by_guidance_kind: dict[str, int] = Field(default_factory=dict) + by_action_status: dict[str, int] = Field(default_factory=dict) + blocking_simplification_count: int = Field(default=0, ge=0) + applied_count: int = Field(default=0, ge=0) + kept_count: int = Field(default=0, ge=0) + + class ReviewReport(BaseModel): """Governance-aligned evidence envelope for code review results.""" @@ -170,6 +287,10 @@ class ReviewReport(BaseModel): reward_delta: int | None = Field(default=None, description="Reward delta derived from score - 80.") findings: list[ReviewFinding] = Field(default_factory=list, description="Structured review findings.") summary: str = Field(..., description="Human-readable review summary.") + simplification_summary: SimplificationSummary | None = Field( + default=None, + description="Aggregate simplification guidance and action-status evidence.", + ) house_rules_updates: list[str] = Field(default_factory=list, description="Suggested house-rules updates.") @field_validator("schema_version", "run_id", "summary") @@ -188,7 +309,13 @@ def _normalize_timestamp(cls, value: datetime) -> datetime: @model_validator(mode="after") def _derive_governance_fields(self) -> ReviewReport: - if any(finding.has_simplification_metadata() for finding in self.findings): + if self.simplification_summary is None: + self.simplification_summary = _build_simplification_summary(self.findings) + if self.simplification_summary is not None or any( + finding.has_guided_simplification_metadata() for finding in self.findings + ): + self.schema_version = "1.2" + elif any(finding.has_simplification_metadata() for finding in self.findings): self.schema_version = "1.1" blocking_error_present = any(finding.is_blocking() for finding in self.findings) self.reward_delta = self.score - 80 @@ -213,3 +340,25 @@ def _derive_governance_fields(self) -> ReviewReport: def has_blocking_findings(self) -> bool: """Return whether the report contains any blocking findings.""" return any(finding.is_blocking() for finding in self.findings) + + +def _build_simplification_summary(findings: list[ReviewFinding]) -> SimplificationSummary | None: + guided = [finding for finding in findings if finding.has_guided_simplification_metadata()] + if not guided: + return None + by_guidance_kind: dict[str, int] = {} + by_action_status: dict[str, int] = {} + for finding in guided: + if finding.guidance_kind is not None: + by_guidance_kind[finding.guidance_kind] = by_guidance_kind.get(finding.guidance_kind, 0) + 1 + if finding.action_status is not None: + by_action_status[finding.action_status] = by_action_status.get(finding.action_status, 0) + 1 + return SimplificationSummary( + by_guidance_kind=by_guidance_kind, + by_action_status=by_action_status, + blocking_simplification_count=sum( + finding.is_safe_mechanical_simplification() and finding.action_status == "recommended" for finding in guided + ), + applied_count=by_action_status.get("applied", 0), + kept_count=by_action_status.get("kept", 0), + ) diff --git a/packages/specfact-code-review/src/specfact_code_review/run/runner.py b/packages/specfact-code-review/src/specfact_code_review/run/runner.py index 19b51ab0..0f72cbfd 100644 --- a/packages/specfact-code-review/src/specfact_code_review/run/runner.py +++ b/packages/specfact-code-review/src/specfact_code_review/run/runner.py @@ -102,9 +102,7 @@ def _relative_to(candidate: Path, source_root: Path) -> Path | None: def _expected_test_path(source_file: Path) -> Path | None: relative_path = _source_relative_path(source_file) - if relative_path is None: - return None - return Path("tests/unit") / relative_path.parent / f"test_{relative_path.name}" + return None if relative_path is None else Path("tests/unit") / relative_path.parent / f"test_{relative_path.name}" def _coverage_for_source(source_file: Path, payload: dict[str, object]) -> float | None: @@ -653,4 +651,10 @@ def run_review( ) if review_options.review_mode == "shadow": return report.model_copy(update={"ci_exit_code": 0}) + if ( + review_options.focus == "simplify" + and report.simplification_summary is not None + and report.simplification_summary.blocking_simplification_count > 0 + ): + return report.model_copy(update={"overall_verdict": "FAIL", "ci_exit_code": 1}) return report diff --git a/packages/specfact-code-review/src/specfact_code_review/tools/ai_bloat_runner.py b/packages/specfact-code-review/src/specfact_code_review/tools/ai_bloat_runner.py index 7b24dce4..378a1772 100644 --- a/packages/specfact-code-review/src/specfact_code_review/tools/ai_bloat_runner.py +++ b/packages/specfact-code-review/src/specfact_code_review/tools/ai_bloat_runner.py @@ -5,6 +5,7 @@ import ast from dataclasses import dataclass from pathlib import Path +from typing import Literal from beartype import beartype from icontract import ensure, require @@ -26,6 +27,13 @@ class _SimplificationCandidate: canonical_pattern: str rewrite_hint: str estimated_deletion_lines: int + guidance_kind: Literal["safe_mechanical", "needs_tests", "design_judgment", "preserve"] + recommended_action: Literal["remove", "inline", "collapse", "deduplicate", "make_required", "keep", "inspect"] + clean_code_principle: Literal["kiss", "dry", "yagni", "contracts", "api_stability", "readability"] + rationale: str + safety_checks: tuple[str, ...] + preserve_reason: str | None = None + fixable: bool = False def _iter_functions(tree: ast.AST) -> list[ast.FunctionDef | ast.AsyncFunctionDef]: @@ -41,11 +49,18 @@ def _simplification_finding(candidate: _SimplificationCandidate) -> ReviewFindin file=str(candidate.file_path), line=candidate.line, message=candidate.message, - fixable=False, + fixable=candidate.fixable, confidence="high", rewrite_hint=candidate.rewrite_hint, canonical_pattern=candidate.canonical_pattern, estimated_deletion_lines=candidate.estimated_deletion_lines, + guidance_kind=candidate.guidance_kind, + recommended_action=candidate.recommended_action, + clean_code_principle=candidate.clean_code_principle, + rationale=candidate.rationale, + safety_checks=list(candidate.safety_checks), + preserve_reason=candidate.preserve_reason, + action_status="recommended", ) @@ -109,6 +124,20 @@ def _has_none_branch(function_node: ast.FunctionDef | ast.AsyncFunctionDef, name return any(_is_none_check_for_name(node, name) for node in ast.walk(function_node)) +def _decorator_name(node: ast.AST) -> str | None: + if isinstance(node, ast.Name): + return node.id + if isinstance(node, ast.Attribute): + return node.attr + if isinstance(node, ast.Call): + return _decorator_name(node.func) + return None + + +def _has_decorator(function_node: ast.FunctionDef | ast.AsyncFunctionDef, decorator_name: str) -> bool: + return any(_decorator_name(decorator) == decorator_name for decorator in function_node.decorator_list) + + def _unused_optional_param_findings( file_path: Path, function_node: ast.FunctionDef | ast.AsyncFunctionDef ) -> list[ReviewFinding]: @@ -118,6 +147,7 @@ def _unused_optional_param_findings( ast.Module(body=function_node.body, type_ignores=[]), arg.arg ): continue + preserve_signature = _has_decorator(function_node, "abstractmethod") findings.append( ReviewFinding( category="ai_bloat", @@ -131,6 +161,28 @@ def _unused_optional_param_findings( "remove the default or make the parameter required." ), fixable=False, + confidence="high", + rewrite_hint=( + "Keep the compatibility signature." if preserve_signature else "Make the parameter required." + ), + canonical_pattern="unused-optional-param", + estimated_deletion_lines=0 if preserve_signature else 1, + guidance_kind="preserve" if preserve_signature else "design_judgment", + recommended_action="keep" if preserve_signature else "make_required", + clean_code_principle="api_stability" if preserve_signature else "yagni", + rationale=( + "Abstract signatures can define an implementation contract." + if preserve_signature + else "The optional default advertises a branch that the function never implements." + ), + safety_checks=[ + "confirm no implementation depends on the advertised optional branch", + "confirm public callers are not relying on the default", + ], + preserve_reason=( + "abstract method signature can be an implementation contract" if preserve_signature else None + ), + action_status="recommended", ) ) return findings @@ -159,7 +211,20 @@ def _dead_branch_findings( file=str(file_path), line=stmt.lineno, message="Branch duplicates a prior terminal guard and is unreachable in this local flow.", - fixable=False, + fixable=True, + confidence="high", + rewrite_hint="Remove the duplicate terminal branch.", + canonical_pattern="duplicate-terminal-guard", + estimated_deletion_lines=max(1, (stmt.end_lineno or stmt.lineno) - stmt.lineno + 1), + guidance_kind="safe_mechanical", + recommended_action="remove", + clean_code_principle="kiss", + rationale="The branch repeats an earlier terminal guard in the same local function body.", + safety_checks=[ + "same guard expression already returned earlier", + "duplicate branch has no side effects", + ], + action_status="recommended", ) ) if _terminal_return(stmt.body): @@ -207,6 +272,23 @@ def _loc_vs_complexity_findings( "look for a stdlib or comprehension collapse." ), fixable=False, + confidence="medium", + rewrite_hint=( + "Inspect for a behavior-preserving collapse; keep if the expanded form carries domain clarity." + ), + canonical_pattern="loc-vs-complexity", + estimated_deletion_lines=max(1, loc // 3), + guidance_kind="design_judgment", + recommended_action="inspect", + clean_code_principle="readability", + rationale=( + "The function is long for its local branch complexity, but the right simplification depends on intent." + ), + safety_checks=[ + "identify the domain contract before changing shape", + "require tests around the collapsed behavior", + ], + action_status="recommended", ) ] @@ -229,13 +311,14 @@ def _assigned_empty_collection_name(stmt: ast.stmt) -> str | None: elif isinstance(stmt, ast.AnnAssign): target = stmt.target value = stmt.value - if not isinstance(target, ast.Name) or not isinstance(value, ast.List | ast.Dict | ast.Set): - return None - if isinstance(value, ast.List | ast.Set) and value.elts: - return None - if isinstance(value, ast.Dict) and value.keys: - return None - return target.id + collection_is_empty = (isinstance(value, ast.List | ast.Set) and not value.elts) or ( + isinstance(value, ast.Dict) and not value.keys + ) + return ( + target.id + if isinstance(target, ast.Name) and isinstance(value, ast.List | ast.Dict | ast.Set) and collection_is_empty + else None + ) def _loaded_name_count(node: ast.AST, name: str) -> int: @@ -274,6 +357,12 @@ def _redundant_intermediate_findings( canonical_pattern="one-use-temporary", rewrite_hint="Inline the one-use temporary into the return statement.", estimated_deletion_lines=1, + guidance_kind="safe_mechanical", + recommended_action="inline", + clean_code_principle="kiss", + rationale="The assigned name is read only by the immediately following return.", + safety_checks=("same expression is returned", "temporary has no later reads"), + fixable=True, ) ) ) @@ -298,6 +387,14 @@ def _manual_accumulator_loop_findings( canonical_pattern="manual-accumulator-loop", rewrite_hint="Replace the accumulator loop with a comprehension or direct collection constructor.", estimated_deletion_lines=3, + guidance_kind="needs_tests", + recommended_action="collapse", + clean_code_principle="kiss", + rationale="The loop appears structural, but iterator behavior and ordering need test coverage.", + safety_checks=( + "targeted tests cover ordering and empty input", + "no side effects are hidden in the loop", + ), ) ) ) @@ -310,11 +407,15 @@ def _manual_accumulator_name(function_node: ast.FunctionDef | ast.AsyncFunctionD return None loop = function_node.body[index + 1] return_stmt = function_node.body[index + 2] if index + 2 < len(function_node.body) else None - if not _returns_accumulator(return_stmt, accumulator): - return None - if not isinstance(loop, ast.For) or len(loop.body) != 1 or not isinstance(loop.body[0], ast.Expr): - return None - return accumulator if _loop_appends_to_accumulator(loop.body[0].value, accumulator) else None + return ( + accumulator + if _returns_accumulator(return_stmt, accumulator) + and isinstance(loop, ast.For) + and len(loop.body) == 1 + and isinstance(loop.body[0], ast.Expr) + and _loop_appends_to_accumulator(loop.body[0].value, accumulator) + else None + ) def _returns_accumulator(stmt: ast.stmt | None, accumulator: str) -> bool: @@ -359,6 +460,15 @@ def _verbose_bool_return_findings( canonical_pattern="verbose-bool-return", rewrite_hint="Return the predicate directly, negating it if needed.", estimated_deletion_lines=2, + guidance_kind="safe_mechanical", + recommended_action="collapse", + clean_code_principle="kiss", + rationale="Both branches return opposite boolean constants for one predicate.", + safety_checks=( + "branch bodies return only bool constants", + "predicate expression has no duplicated side effect", + ), + fixable=True, ) ) ) @@ -386,6 +496,11 @@ def _redundant_none_branch_findings( canonical_pattern="redundant-none-branch", rewrite_hint="Consider collapsing the None guard into the expression or caller contract.", estimated_deletion_lines=2, + guidance_kind="needs_tests", + recommended_action="collapse", + clean_code_principle="kiss", + rationale="The guard may be redundant, but None semantics often encode a contract boundary.", + safety_checks=("tests cover None input", "callers do not depend on early-return timing"), ) ) ) @@ -414,6 +529,12 @@ def _pass_through_try_except_findings( canonical_pattern="pass-through-try-except", rewrite_hint="Remove the pass-through try/except unless it adds domain context.", estimated_deletion_lines=2, + guidance_kind="safe_mechanical", + recommended_action="remove", + clean_code_principle="kiss", + rationale="The handler immediately re-raises without adding context or cleanup.", + safety_checks=("handler contains only a bare raise", "try block has no else or finally body"), + fixable=True, ) ) ) @@ -429,11 +550,9 @@ def _constant_equality_return(stmt: ast.stmt) -> str | None: return None if not isinstance(stmt.test.ops[0], ast.Eq): return None - if not isinstance(stmt.test.left, ast.Name) or len(stmt.test.comparators) != 1: - return None - if not isinstance(stmt.test.comparators[0], ast.Constant): - return None - return stmt.test.left.id + left = stmt.test.left + comparator = stmt.test.comparators[0] if len(stmt.test.comparators) == 1 else None + return left.id if isinstance(left, ast.Name) and isinstance(comparator, ast.Constant) else None def _table_lookup_match_count(function_node: ast.FunctionDef | ast.AsyncFunctionDef) -> int: @@ -468,6 +587,11 @@ def _table_lookup_candidate_findings( canonical_pattern="table-lookup-candidate", rewrite_hint="Consider replacing repeated equality returns with a lookup table plus default.", estimated_deletion_lines=max(1, matches - 1), + guidance_kind="needs_tests", + recommended_action="collapse", + clean_code_principle="dry", + rationale="Repeated constant equality branches often encode a data table.", + safety_checks=("tests cover known keys and fallback", "preserve branch order if values overlap"), ) ) ] @@ -492,6 +616,11 @@ def _stdlib_replacement_candidate_findings( canonical_pattern="stdlib-replacement-candidate", rewrite_hint="Consider a standard helper such as max, min, any, all, sum, or dict.fromkeys.", estimated_deletion_lines=3, + guidance_kind="needs_tests", + recommended_action="collapse", + clean_code_principle="kiss", + rationale="The loop resembles a standard aggregation helper but edge-case semantics need proof.", + safety_checks=("tests cover empty input", "tests cover tie or None behavior"), ) ) ] @@ -515,9 +644,7 @@ def _stdlib_replacement_candidate(function_node: ast.FunctionDef | ast.AsyncFunc def _none_initializer_name(stmt: ast.stmt) -> str | None: name = _assigned_name(stmt) - if name is None or not isinstance(stmt, ast.Assign) or not _is_none_constant(stmt.value): - return None - return name + return None if name is None or not isinstance(stmt, ast.Assign) or not _is_none_constant(stmt.value) else name def _loop_updates_name(stmt: ast.stmt, name: str) -> bool: @@ -572,6 +699,14 @@ def _wrapper_chain_findings(file_path: Path, tree: ast.Module) -> list[ReviewFin canonical_pattern="wrapper-chain", rewrite_hint="Collapse the wrapper chain or keep only the compatibility boundary.", estimated_deletion_lines=max(1, _function_loc(function_node) - 1), + guidance_kind="design_judgment", + recommended_action="inspect", + clean_code_principle="dry", + rationale="Pass-through wrappers may be bloat or deliberate API compatibility boundaries.", + safety_checks=( + "confirm whether either function is public API", + "keep wrappers that encode compatibility", + ), ) ) ) diff --git a/packages/specfact-code-review/src/specfact_code_review/tools/semgrep_runner.py b/packages/specfact-code-review/src/specfact_code_review/tools/semgrep_runner.py index 258c2251..1bd063f6 100644 --- a/packages/specfact-code-review/src/specfact_code_review/tools/semgrep_runner.py +++ b/packages/specfact-code-review/src/specfact_code_review/tools/semgrep_runner.py @@ -6,6 +6,7 @@ import os import subprocess import tempfile +from dataclasses import dataclass from pathlib import Path from typing import Literal, cast @@ -37,6 +38,54 @@ SemgrepCategory = Literal["clean_code", "architecture", "naming", "ai_bloat"] BugSemgrepCategory = Literal["security", "clean_code"] + +@dataclass(frozen=True) +class _AiBloatGuidance: + guidance_kind: Literal["safe_mechanical", "needs_tests", "design_judgment", "preserve"] + recommended_action: Literal["remove", "inline", "collapse", "deduplicate", "make_required", "keep", "inspect"] + clean_code_principle: Literal["kiss", "dry", "yagni", "contracts", "api_stability", "readability"] + rationale: str + safety_checks: list[str] + + +AI_BLOAT_GUIDANCE: dict[str, _AiBloatGuidance] = { + "ai-bloat.manual-loop-comprehension": _AiBloatGuidance( + guidance_kind="needs_tests", + recommended_action="collapse", + clean_code_principle="kiss", + rationale="The loop appears structural, but iterator behavior and ordering need test coverage.", + safety_checks=["targeted tests cover ordering and empty input", "no side effects are hidden in the loop"], + ), + "ai-bloat.passthrough-lambda": _AiBloatGuidance( + guidance_kind="design_judgment", + recommended_action="inspect", + clean_code_principle="readability", + rationale="A pass-through lambda can be noise, but it may also document callback shape at a boundary.", + safety_checks=["confirm the callable signature is unchanged", "keep if the lambda documents API intent"], + ), + "ai-bloat.identity-try-except": _AiBloatGuidance( + guidance_kind="safe_mechanical", + recommended_action="remove", + clean_code_principle="kiss", + rationale="The handler immediately re-raises without adding context or cleanup.", + safety_checks=["handler contains only a bare raise", "try block has no else or finally body"], + ), + "ai-bloat.none-then-none": _AiBloatGuidance( + guidance_kind="needs_tests", + recommended_action="collapse", + clean_code_principle="kiss", + rationale="The None branch may be redundant, but None semantics often encode a contract boundary.", + safety_checks=["tests cover None input", "callers do not depend on early-return timing"], + ), + "ai-bloat.single-call-wrapper": _AiBloatGuidance( + guidance_kind="design_judgment", + recommended_action="inspect", + clean_code_principle="dry", + rationale="A single-call wrapper may be bloat or a deliberate compatibility boundary.", + safety_checks=["confirm whether the wrapper is public API", "keep wrappers that encode compatibility"], + ), +} + BUG_RULE_CATEGORY: dict[str, BugSemgrepCategory] = { "specfact-bugs-eval-exec": "security", "specfact-bugs-os-system": "security", @@ -202,6 +251,7 @@ def find_semgrep_ai_bloat_config( def _run_semgrep_command( files: list[Path], *, bundle_root: Path | None, config_file: Path | list[Path] ) -> subprocess.CompletedProcess[str]: + del bundle_root config_files = config_file if isinstance(config_file, list) else [config_file] config_args = [arg for path in config_files for arg in ("--config", str(path))] with tempfile.TemporaryDirectory(prefix="semgrep-home-") as temp_home: @@ -319,16 +369,42 @@ def _finding_from_result(item: dict[str, object], *, allowed_paths: set[str]) -> category = _category_for_rule(rule) if category is None: return None + if category == "ai_bloat": + return _ai_bloat_finding_from_result(rule=rule, filename=filename, line=line, message=message) return ReviewFinding( category=category, - severity="info" if category == "ai_bloat" else "warning", + severity="warning", + tool="semgrep", + rule=rule, + file=filename, + line=line, + message=message, + fixable=False, + ) + + +def _ai_bloat_finding_from_result(*, rule: str, filename: str, line: int, message: str) -> ReviewFinding: + guidance = AI_BLOAT_GUIDANCE[rule] + return ReviewFinding( + category="ai_bloat", + severity="info", tool="semgrep", rule=rule, file=filename, line=line, message=message, fixable=False, + confidence="high", + canonical_pattern=rule.removeprefix("ai-bloat."), + rewrite_hint=message, + estimated_deletion_lines=1, + guidance_kind=guidance.guidance_kind, + recommended_action=guidance.recommended_action, + clean_code_principle=guidance.clean_code_principle, + rationale=guidance.rationale, + safety_checks=guidance.safety_checks, + action_status="recommended", ) diff --git a/packages/specfact-project/module-package.yaml b/packages/specfact-project/module-package.yaml index 5ce4ff85..8085bc78 100644 --- a/packages/specfact-project/module-package.yaml +++ b/packages/specfact-project/module-package.yaml @@ -1,5 +1,5 @@ name: nold-ai/specfact-project -version: 0.41.12 +version: 0.41.13 commands: - project - plan @@ -27,5 +27,4 @@ core_compatibility: '>=0.40.0,<1.0.0' description: Official SpecFact project bundle package. bundle_group_command: project integrity: - checksum: sha256:cbae2549ae56e7271c4f4cd0d7140cf2d14ba23b4b49ad2a9afb802275b63833 - signature: Y95kcv6OTjCwwZrBy7R0XktoN1z/Y4T4QNCQk+LPIxkrvACvSuM3bOFiVsSIBfi2b+Jz1tHV9uKIPOOT69lLBw== + checksum: sha256:d3b3554ea11460c4f06dd6b99f2c7c6903c23edc06df8e0177665d3137988ae8 diff --git a/packages/specfact-project/resources/prompts/specfact.08-simplify.md b/packages/specfact-project/resources/prompts/specfact.08-simplify.md index 352b28e3..f50e8f18 100644 --- a/packages/specfact-project/resources/prompts/specfact.08-simplify.md +++ b/packages/specfact-project/resources/prompts/specfact.08-simplify.md @@ -18,13 +18,22 @@ You **MUST** consider the user input before proceeding (if not empty). ## Purpose -Simplify advisory `ai_bloat` and metadata-backed simplification findings from `.specfact/code-review-simplify.json` using the IDE's edit tools with explicit user confirmation for every change. +Simplify `ai_bloat` and metadata-backed simplification findings from `.specfact/code-review-simplify.json` using the IDE's edit tools, user-level guidance, and evidence for every recommendation, applied change, and kept false positive. **Quick:** `/specfact.08-simplify` ## Guidance Character -Act as a conservative code-review simplification assistant. Use the Code Review bundle's deterministic findings as evidence, explain one cleanup at a time, and keep the user in control. Do not infer AI authorship, do not chase broad refactors, and do not edit without explicit confirmation. +Act as a conservative code-review simplification assistant. Use the Code Review bundle's deterministic findings as evidence, explain one cleanup at a time, and keep the user in control. Do not infer AI authorship and do not chase broad refactors. + +Before walking findings, ask for the walkthrough level unless the user already specified it: + +- `vibe coder`: explain why the finding matters, what to check, and what will change in plain language. +- `junior developer`: explain the clean-code principle, the safety checks, and the exact edit. +- `senior/pro`: keep guidance concise and focus on contract risk, blast radius, and verification. +- `headless agent`: do not ask interactive questions; choose the safest flow from metadata and write a concise action log. + +Auto-adjust if the conversation makes the level obvious. ## CLI Grounding @@ -47,23 +56,40 @@ Read `.specfact/code-review-simplify.json`. If it is missing, ask the user to ru specfact code review run --scope changed --focus simplify --json --out .specfact/code-review-simplify.json ``` -If the report contains no findings where `category == "ai_bloat"` and no findings with simplification metadata such as `intent_key`, `rewrite_hint`, or `canonical_pattern`, report that there are no simplification candidates and stop without editing files. +Explain that this report is the evidence file: it lists candidate cleanups, the safety checks, and the preserve reasons the assistant must use before touching code. Do not edit files until the report exists. + +If the report contains no findings where `category == "ai_bloat"` and no findings with simplification metadata such as `intent_key`, `rewrite_hint`, `canonical_pattern`, or `guidance_kind`, report that there are no simplification candidates and stop without editing files. ### Step 2: Group Candidates Group findings by `intent_key` first when present, then by file or domain and rule. For each candidate, inspect the referenced source location, inspect any related locations from `related_locations`, and capture small surrounding snippets before proposing a rewrite. +Use `guidance_kind` as the action contract: + +- `safe_mechanical`: local, high-confidence cleanup; can be applied after checking the listed `safety_checks`. +- `needs_tests`: only apply after targeted tests exist or are added for the behavior. +- `design_judgment`: explain tradeoffs and ask before editing. +- `preserve`: keep by default; record the `preserve_reason` as a false-positive or intentional-pattern note. + ### Step 3: Confirm Each Rewrite For each candidate: -1. Show the file, line, rule, current snippet, and related locations when present. -2. Explain the simplification in one sentence. -3. Draft the replacement. -4. Ask the user to choose: accept, reject, skip, or explain. -5. Apply only accepted edits with the IDE edit tool. +1. Show file, line, rule, `guidance_kind`, `recommended_action`, clean-code principle, current snippet, and related locations. +2. Explain the rationale and the required `safety_checks` at the selected walkthrough level. +3. Draft the replacement or preserve decision. +4. Ask the user to choose: accept, reject, skip, or explain; use `keep` as the reject reason for `preserve` findings. In `headless agent` mode, apply only `safe_mechanical` items whose safety checks are locally provable. +5. Record `action_status` as one of: recommended, applied, kept, skipped, failed. + +Never batch multiple files into one confirmation in interactive mode. +Apply only accepted edits. + +In `headless agent` mode, process candidates one file at a time and write this action log: + +| file | line | rule | guidance_kind | recommended_action | action_status | evidence | +| --- | ---: | --- | --- | --- | --- | --- | -Never apply edits automatically. Never batch multiple files into one confirmation. +Use the evidence column for removed findings, required tests, skipped safety checks, or preserved contracts. ### Step 4: Re-run Review @@ -73,7 +99,7 @@ After accepted edits are applied, suggest: specfact code review run --scope changed --focus simplify --json --out .specfact/code-review-simplify.json ``` -Compare the new report with the prior findings and summarize which `ai_bloat` or metadata-backed simplification candidates were cleared, skipped, or still present. +Compare the new report with the prior findings and summarize which `ai_bloat` or metadata-backed simplification candidates were recommended, applied, kept, skipped, failed, cleared, or still present. Include evidence of improvement such as removed findings, estimated deletion lines, simpler control flow, or preserved contracts. ## Verification diff --git a/skills/specfact-code-review/SKILL.md b/skills/specfact-code-review/SKILL.md index e4794759..61bba0c9 100644 --- a/skills/specfact-code-review/SKILL.md +++ b/skills/specfact-code-review/SKILL.md @@ -6,10 +6,15 @@ allowed-tools: [] # House Rules - AI Coding Context (v4) -Updated: 2026-03-30 | Module: nold-ai/specfact-code-review +Updated: 2026-05-22 | Module: nold-ai/specfact-code-review ## DO +- For simplification queues, run `specfact code review run --scope changed --focus simplify --json --out .specfact/code-review-simplify.json` +- Ask for walkthrough level when interactive: vibe coder, junior developer, senior/pro, or headless agent; auto-adjust if obvious +- Interpret `guidance_kind`: `safe_mechanical` may apply after local safety checks, `needs_tests` requires tests first, `design_judgment` needs human choice, `preserve` means keep and log `preserve_reason` +- Log each simplification action as recommended, applied, kept, skipped, failed, with evidence of improvement or preserved contract +- In headless mode, process one file at a time and emit an action table: file, line, rule, guidance_kind, recommended_action, action_status, evidence - Ask whether tests should be included before repo-wide review; default to excluding tests unless test changes are the target - Use intention-revealing names; avoid placeholder public names like data/process/handle - Keep functions under 120 LOC, shallow nesting, and <= 5 parameters (KISS) diff --git a/tests/unit/specfact_code_review/run/test_commands.py b/tests/unit/specfact_code_review/run/test_commands.py index a67fa014..472d99d7 100644 --- a/tests/unit/specfact_code_review/run/test_commands.py +++ b/tests/unit/specfact_code_review/run/test_commands.py @@ -28,6 +28,37 @@ def _report(*, score: int = 85) -> ReviewReport: ) +def _safe_mechanical_report(file_path: Path, *, line: int, rule: str) -> ReviewReport: + return ReviewReport( + run_id="review-run-001", + timestamp=datetime(2026, 3, 16, tzinfo=UTC), + score=85, + findings=[ + ReviewFinding( + category="ai_bloat", + severity="info", + tool="ast", + rule=rule, + file=str(file_path), + line=line, + message="Safe mechanical simplification.", + fixable=True, + confidence="high", + rewrite_hint="Apply the local rewrite.", + canonical_pattern="safe-mechanical", + estimated_deletion_lines=1, + guidance_kind="safe_mechanical", + recommended_action="inline" if rule == "ai-bloat.redundant-intermediate" else "collapse", + clean_code_principle="kiss", + rationale="The rewrite is local and behavior-preserving.", + safety_checks=["pattern shape is exact"], + action_status="recommended", + ) + ], + summary="Review command test report.", + ) + + def _write_repo_file(repo_root: Path, relative_path: str, *, content: str = "VALUE = 1\n") -> Path: file_path = repo_root / relative_path file_path.parent.mkdir(parents=True, exist_ok=True) @@ -272,6 +303,93 @@ def fake_run_review(files: list[Path], **kwargs: Any) -> ReviewReport: assert recorded == {"files": [package_file], "focus": "simplify"} +def test_apply_simplification_fixes_inlines_redundant_intermediate(tmp_path: Path) -> None: + target = tmp_path / "sample.py" + target.write_text( + "def total(values: list[int]) -> int:\n result = sum(values)\n return result\n", + encoding="utf-8", + ) + + applied = run_commands._apply_simplification_fixes( + _safe_mechanical_report(target, line=2, rule="ai-bloat.redundant-intermediate") + ) + + assert applied == 1 + assert target.read_text(encoding="utf-8") == "def total(values: list[int]) -> int:\n return sum(values)\n" + + +def test_apply_simplification_fixes_skips_non_safe_guidance(tmp_path: Path) -> None: + target = tmp_path / "sample.py" + source = "def total(values: list[int]) -> int:\n result = []\n return result\n" + target.write_text(source, encoding="utf-8") + report = _safe_mechanical_report(target, line=2, rule="ai-bloat.redundant-intermediate") + report.findings[0].guidance_kind = "needs_tests" + + applied = run_commands._apply_simplification_fixes(report) + + assert applied == 0 + assert target.read_text(encoding="utf-8") == source + + +def test_apply_simplification_fixes_collapses_verbose_bool_return(tmp_path: Path) -> None: + target = tmp_path / "sample.py" + target.write_text( + "def allowed(role: str) -> bool:\n if role == 'admin':\n return True\n return False\n", + encoding="utf-8", + ) + + applied = run_commands._apply_simplification_fixes( + _safe_mechanical_report(target, line=2, rule="ai-bloat.verbose-bool-return") + ) + + assert applied == 1 + assert target.read_text(encoding="utf-8") == "def allowed(role: str) -> bool:\n return role == 'admin'\n" + + +def test_apply_simplification_fixes_skips_when_source_no_longer_matches(tmp_path: Path) -> None: + target = tmp_path / "sample.py" + source = "def total(values: list[int]) -> int:\n result = sum(values)\n return result + 1\n" + target.write_text(source, encoding="utf-8") + + applied = run_commands._apply_simplification_fixes( + _safe_mechanical_report(target, line=2, rule="ai-bloat.redundant-intermediate") + ) + + assert applied == 0 + assert target.read_text(encoding="utf-8") == source + + +def test_run_review_once_applies_simplification_fixes_before_rerun(monkeypatch: Any, tmp_path: Path) -> None: + target = tmp_path / "sample.py" + target.write_text( + "def total(values: list[int]) -> int:\n result = sum(values)\n return result\n", + encoding="utf-8", + ) + reports = [ + _safe_mechanical_report(target, line=2, rule="ai-bloat.redundant-intermediate"), + _report(), + ] + monkeypatch.setattr("specfact_code_review.run.commands.run_review", lambda files, **kwargs: reports.pop(0)) + monkeypatch.setattr("specfact_code_review.run.commands._apply_fixes", lambda files: None) + + report = run_commands._run_review_once( + [target], + run_commands._ReviewLoopFlags( + no_tests=True, + include_noise=False, + fix=True, + progress_callback=None, + bug_hunt=False, + review_mode="enforce", + review_level=None, + review_focus="simplify", + ), + ) + + assert report.findings == [] + assert target.read_text(encoding="utf-8") == "def total(values: list[int]) -> int:\n return sum(values)\n" + + def test_run_command_rejects_unknown_keyword_override() -> None: with pytest.raises(run_commands.RunCommandError, match="Unexpected keyword arguments: unknown"): run_commands.run_command([], unknown=True) diff --git a/tests/unit/specfact_code_review/run/test_findings.py b/tests/unit/specfact_code_review/run/test_findings.py index a6906103..0afeb6a0 100644 --- a/tests/unit/specfact_code_review/run/test_findings.py +++ b/tests/unit/specfact_code_review/run/test_findings.py @@ -39,6 +39,16 @@ class ReviewFindingPayload(TypedDict, total=False): intent_key: str estimated_deletion_lines: int related_locations: list[EvidenceRef] + guidance_kind: Literal["safe_mechanical", "needs_tests", "design_judgment", "preserve"] + recommended_action: Literal["remove", "inline", "collapse", "deduplicate", "make_required", "keep", "inspect"] + clean_code_principle: Literal["kiss", "dry", "yagni", "contracts", "api_stability", "readability"] + rationale: str + safety_checks: list[str] + preserve_reason: str + action_status: Literal["recommended", "applied", "kept", "skipped", "failed"] + before_ref: EvidenceRef + after_ref: EvidenceRef + improvement: str def _finding_data(**overrides: Unpack[ReviewFindingPayload]) -> ReviewFindingPayload: @@ -105,6 +115,45 @@ def test_review_finding_marks_deterministic_simplification_metadata() -> None: assert finding.simplification_metadata_is_deterministic() +def test_review_finding_accepts_guided_simplification_metadata() -> None: + finding = ReviewFinding( + **_finding_data( + category="ai_bloat", + severity="info", + rule="ai-bloat.redundant-intermediate", + confidence="high", + rewrite_hint="Inline the one-use temporary into the return statement.", + canonical_pattern="one-use-temporary", + estimated_deletion_lines=1, + guidance_kind="safe_mechanical", + recommended_action="inline", + clean_code_principle="kiss", + rationale="The local variable is assigned once and read only by the following return.", + safety_checks=["same expression is returned", "temporary has no later reads"], + action_status="recommended", + ) + ) + + assert finding.has_guided_simplification_metadata() + assert finding.is_safe_mechanical_simplification() + + +def test_review_finding_rejects_preserve_guidance_without_preserve_reason() -> None: + with pytest.raises(ValidationError): + ReviewFinding( + **_finding_data( + category="ai_bloat", + severity="info", + guidance_kind="preserve", + recommended_action="keep", + clean_code_principle="api_stability", + rationale="The optional argument is part of a public extension contract.", + safety_checks=["public compatibility boundary checked"], + action_status="recommended", + ) + ) + + def test_review_finding_rejects_partial_simplification_metadata_as_nondeterministic() -> None: finding = ReviewFinding( **_finding_data( @@ -229,6 +278,39 @@ def test_review_report_uses_schema_1_1_when_simplification_metadata_is_present() assert report.ci_exit_code == 0 +def test_review_report_uses_schema_1_2_and_summary_when_guided_metadata_is_present() -> None: + report = ReviewReport( + run_id="run-guided-simplify", + timestamp=datetime(2026, 3, 11, tzinfo=UTC), + score=85, + findings=[ + ReviewFinding( + **_finding_data( + category="ai_bloat", + severity="info", + confidence="high", + rewrite_hint="Inline the one-use temporary into the return statement.", + canonical_pattern="one-use-temporary", + estimated_deletion_lines=1, + guidance_kind="safe_mechanical", + recommended_action="inline", + clean_code_principle="kiss", + rationale="The local variable is assigned once and read only by the following return.", + safety_checks=["same expression is returned", "temporary has no later reads"], + action_status="recommended", + ) + ) + ], + summary="Guided simplification advisories.", + ) + + assert report.schema_version == "1.2" + assert report.simplification_summary is not None + assert report.simplification_summary.by_guidance_kind == {"safe_mechanical": 1} + assert report.simplification_summary.by_action_status == {"recommended": 1} + assert report.simplification_summary.blocking_simplification_count == 1 + + def test_review_report_maps_pass_with_advisory_verdict() -> None: report = ReviewReport( run_id="run-002", diff --git a/tests/unit/specfact_code_review/run/test_runner.py b/tests/unit/specfact_code_review/run/test_runner.py index f9337550..370b362b 100644 --- a/tests/unit/specfact_code_review/run/test_runner.py +++ b/tests/unit/specfact_code_review/run/test_runner.py @@ -58,6 +58,7 @@ def _simplification_finding( *, category: Literal["ai_bloat", "dry", "kiss"] = "ai_bloat", confidence: Literal["low", "medium", "high"] = "high", + guidance_kind: Literal["safe_mechanical", "needs_tests", "design_judgment", "preserve"] | None = None, ) -> ReviewFinding: return ReviewFinding( category=category, @@ -73,6 +74,13 @@ def _simplification_finding( canonical_pattern="manual-accumulator-loop", intent_key="score-review", estimated_deletion_lines=3, + guidance_kind=guidance_kind, + recommended_action="collapse" if guidance_kind is not None else None, + clean_code_principle="kiss" if guidance_kind is not None else None, + rationale="The repeated loop shape can be expressed directly.", + safety_checks=["targeted tests cover the surrounding behavior"], + action_status="recommended" if guidance_kind is not None else None, + preserve_reason="The wrapper is a compatibility boundary." if guidance_kind == "preserve" else None, ) @@ -215,6 +223,69 @@ def test_run_review_simplify_focus_keeps_only_simplification_queue(monkeypatch: assert report.overall_verdict == "PASS" +def test_run_review_simplify_enforce_fails_only_safe_mechanical_recommendations(monkeypatch: MonkeyPatch) -> None: + monkeypatch.setattr("specfact_code_review.run.runner.run_ruff", lambda files: []) + monkeypatch.setattr("specfact_code_review.run.runner.run_radon", lambda files: []) + monkeypatch.setattr("specfact_code_review.run.runner.run_semgrep", lambda files: []) + monkeypatch.setattr("specfact_code_review.run.runner.run_semgrep_bugs", lambda files: []) + monkeypatch.setattr( + "specfact_code_review.run.runner.run_ai_bloat", + lambda files: [ + _simplification_finding(category="ai_bloat", guidance_kind="safe_mechanical"), + _simplification_finding(category="ai_bloat", guidance_kind="needs_tests"), + _simplification_finding(category="ai_bloat", guidance_kind="preserve"), + ], + ) + monkeypatch.setattr("specfact_code_review.run.runner.run_ast_clean_code", lambda files: []) + monkeypatch.setattr("specfact_code_review.run.runner.run_basedpyright", lambda files: []) + monkeypatch.setattr("specfact_code_review.run.runner.run_pylint", lambda files: []) + monkeypatch.setattr("specfact_code_review.run.runner.run_contract_check", lambda files, **_: []) + monkeypatch.setattr("specfact_code_review.run.runner._evaluate_tdd_gate", lambda files: ([], None)) + + report = run_review( + [Path("packages/specfact-code-review/src/specfact_code_review/run/scorer.py")], + no_tests=True, + focus="simplify", + review_mode="enforce", + ) + + assert report.schema_version == "1.2" + assert report.overall_verdict == "FAIL" + assert report.ci_exit_code == 1 + assert report.simplification_summary is not None + assert report.simplification_summary.blocking_simplification_count == 1 + + +def test_run_review_simplify_enforce_passes_design_and_preserve_guidance(monkeypatch: MonkeyPatch) -> None: + monkeypatch.setattr("specfact_code_review.run.runner.run_ruff", lambda files: []) + monkeypatch.setattr("specfact_code_review.run.runner.run_radon", lambda files: []) + monkeypatch.setattr("specfact_code_review.run.runner.run_semgrep", lambda files: []) + monkeypatch.setattr("specfact_code_review.run.runner.run_semgrep_bugs", lambda files: []) + monkeypatch.setattr( + "specfact_code_review.run.runner.run_ai_bloat", + lambda files: [ + _simplification_finding(category="ai_bloat", guidance_kind="design_judgment"), + _simplification_finding(category="ai_bloat", guidance_kind="preserve"), + ], + ) + monkeypatch.setattr("specfact_code_review.run.runner.run_ast_clean_code", lambda files: []) + monkeypatch.setattr("specfact_code_review.run.runner.run_basedpyright", lambda files: []) + monkeypatch.setattr("specfact_code_review.run.runner.run_pylint", lambda files: []) + monkeypatch.setattr("specfact_code_review.run.runner.run_contract_check", lambda files, **_: []) + monkeypatch.setattr("specfact_code_review.run.runner._evaluate_tdd_gate", lambda files: ([], None)) + + report = run_review( + [Path("packages/specfact-code-review/src/specfact_code_review/run/scorer.py")], + no_tests=True, + focus="simplify", + review_mode="enforce", + ) + + assert report.overall_verdict == "PASS" + assert report.simplification_summary is not None + assert report.simplification_summary.blocking_simplification_count == 0 + + def test_run_review_simplify_focus_preserves_tool_errors(monkeypatch: MonkeyPatch) -> None: monkeypatch.setattr("specfact_code_review.run.runner.run_ruff", lambda files: []) monkeypatch.setattr("specfact_code_review.run.runner.run_radon", lambda files: []) diff --git a/tests/unit/specfact_code_review/tools/test_ai_bloat_runner.py b/tests/unit/specfact_code_review/tools/test_ai_bloat_runner.py index 12b01209..54086c85 100644 --- a/tests/unit/specfact_code_review/tools/test_ai_bloat_runner.py +++ b/tests/unit/specfact_code_review/tools/test_ai_bloat_runner.py @@ -30,6 +30,8 @@ def greet(name: str, prefix: Optional[str] = None) -> str: assert {finding.rule for finding in findings} == {"ai-bloat.unused-optional-param"} assert findings[0].category == "ai_bloat" assert findings[0].severity == "info" + assert findings[0].guidance_kind == "design_judgment" + assert findings[0].recommended_action == "make_required" def test_optional_param_with_none_branch_is_not_flagged(tmp_path: Path) -> None: @@ -72,7 +74,11 @@ def test_loc_vs_complexity_flags_long_linear_function(tmp_path: Path) -> None: lines.append(" return result") target = _write(tmp_path, "\n".join(lines)) - assert {finding.rule for finding in run_ai_bloat([target])} == {"ai-bloat.loc-vs-complexity"} + findings = run_ai_bloat([target]) + + assert {finding.rule for finding in findings} == {"ai-bloat.loc-vs-complexity"} + assert findings[0].guidance_kind == "design_judgment" + assert findings[0].recommended_action == "inspect" def test_redundant_intermediate_flags_assign_then_immediate_return(tmp_path: Path) -> None: @@ -85,7 +91,12 @@ def total(values: list[int]) -> int: """, ) - assert {finding.rule for finding in run_ai_bloat([target])} == {"ai-bloat.redundant-intermediate"} + findings = run_ai_bloat([target]) + + assert {finding.rule for finding in findings} == {"ai-bloat.redundant-intermediate"} + assert findings[0].guidance_kind == "safe_mechanical" + assert findings[0].recommended_action == "inline" + assert findings[0].fixable is True @pytest.mark.parametrize( @@ -191,6 +202,33 @@ def test_expanded_simplification_patterns_emit_metadata( assert matching[0].canonical_pattern == expected_pattern assert matching[0].rewrite_hint assert matching[0].estimated_deletion_lines is not None + assert matching[0].guidance_kind in {"safe_mechanical", "needs_tests", "design_judgment", "preserve"} + assert matching[0].recommended_action is not None + assert matching[0].clean_code_principle is not None + assert matching[0].rationale + assert matching[0].safety_checks + + +def test_abstract_optional_param_is_preserve_guidance(tmp_path: Path) -> None: + target = _write( + tmp_path, + """ +from abc import ABC, abstractmethod + + +class Provider(ABC): + @abstractmethod + def fetch(self, key: str, timeout: int | None = None) -> str: + raise NotImplementedError +""", + ) + + findings = run_ai_bloat([target]) + + assert {finding.rule for finding in findings} == {"ai-bloat.unused-optional-param"} + assert findings[0].guidance_kind == "preserve" + assert findings[0].recommended_action == "keep" + assert findings[0].preserve_reason == "abstract method signature can be an implementation contract" def test_redundant_intermediate_ignores_reused_names(tmp_path: Path) -> None: diff --git a/tests/unit/specfact_code_review/tools/test_semgrep_runner.py b/tests/unit/specfact_code_review/tools/test_semgrep_runner.py index 8fe60524..2cdbcda4 100644 --- a/tests/unit/specfact_code_review/tools/test_semgrep_runner.py +++ b/tests/unit/specfact_code_review/tools/test_semgrep_runner.py @@ -10,8 +10,11 @@ from pytest import MonkeyPatch from specfact_code_review.tools.semgrep_runner import ( + _parse_semgrep_results, _run_semgrep_command, _snip_stderr_tail, + find_semgrep_ai_bloat_config, + find_semgrep_bugs_config, find_semgrep_config, run_semgrep, run_semgrep_bugs, @@ -160,6 +163,11 @@ def test_run_semgrep_maps_ai_bloat_rules_to_info_findings(tmp_path: Path, monkey assert findings[0].category == "ai_bloat" assert findings[0].severity == "info" assert findings[0].rule == "ai-bloat.single-call-wrapper" + assert findings[0].guidance_kind == "design_judgment" + assert findings[0].recommended_action == "inspect" + assert findings[0].clean_code_principle == "dry" + assert findings[0].rationale + assert findings[0].safety_checks def test_run_semgrep_filters_findings_to_requested_files(tmp_path: Path, monkeypatch: MonkeyPatch) -> None: @@ -260,6 +268,15 @@ def test_run_semgrep_ignores_unsupported_rules(tmp_path: Path, monkeypatch: Monk assert not findings +def test_run_semgrep_returns_empty_for_no_files() -> None: + assert run_semgrep([]) == [] + + +def test_parse_semgrep_results_rejects_non_list_results() -> None: + with pytest.raises(ValueError, match="semgrep results must be a list"): + _parse_semgrep_results({"results": {}}) + + def test_find_semgrep_config_with_explicit_bundle_root(tmp_path: Path) -> None: root = tmp_path / "bundle" (root / ".semgrep").mkdir(parents=True) @@ -267,6 +284,30 @@ def test_find_semgrep_config_with_explicit_bundle_root(tmp_path: Path) -> None: assert find_semgrep_config(bundle_root=root) == root / ".semgrep" / "clean_code.yaml" +def test_find_semgrep_config_with_explicit_bundle_root_reports_missing_config(tmp_path: Path) -> None: + with pytest.raises(FileNotFoundError): + find_semgrep_config(bundle_root=tmp_path) + + +def test_find_semgrep_bugs_config_with_explicit_bundle_root(tmp_path: Path) -> None: + root = tmp_path / "bundle" + (root / ".semgrep").mkdir(parents=True) + (root / ".semgrep" / "bugs.yaml").write_text("rules: []\n", encoding="utf-8") + + assert find_semgrep_bugs_config(bundle_root=root) == root / ".semgrep" / "bugs.yaml" + assert find_semgrep_bugs_config(bundle_root=tmp_path / "missing") is None + + +def test_find_semgrep_ai_bloat_config_with_explicit_bundle_root(tmp_path: Path) -> None: + root = tmp_path / "bundle" + rules = root / "resources" / "semgrep-rules" + rules.mkdir(parents=True) + (rules / "ai-bloat.yaml").write_text("rules: []\n", encoding="utf-8") + + assert find_semgrep_ai_bloat_config(bundle_root=root) == rules / "ai-bloat.yaml" + assert find_semgrep_ai_bloat_config(bundle_root=tmp_path / "missing") is None + + def test_snip_stderr_tail_keeps_last_chars() -> None: """Long stderr should retain the suffix (most recent diagnostics), not the prefix.""" long = "UNIQUE_HEAD_MARKER" + ("A" * 5000) + "END_OF_ERROR" @@ -301,6 +342,62 @@ def test_run_semgrep_bugs_returns_empty_when_semgrep_cli_missing(tmp_path: Path, assert run_semgrep_bugs([target], bundle_root=bundle) == [] +def test_run_semgrep_bugs_returns_empty_for_no_files() -> None: + assert run_semgrep_bugs([]) == [] + + +def test_run_semgrep_bugs_maps_security_and_clean_code_findings(tmp_path: Path, monkeypatch: MonkeyPatch) -> None: + bundle = tmp_path / "bundle" + (bundle / ".semgrep").mkdir(parents=True) + (bundle / ".semgrep" / "bugs.yaml").write_text("rules: []\n", encoding="utf-8") + file_path = tmp_path / "target.py" + payload = { + "results": [ + { + "check_id": "specfact-bugs-eval-exec", + "path": str(file_path), + "start": {"line": 2}, + "extra": {"message": "Avoid eval.", "severity": "ERROR"}, + }, + { + "check_id": "specfact-bugs-useless-comparison", + "path": str(file_path), + "start": {"line": 3}, + "extra": {"message": "Comparison is always true."}, + }, + ] + } + monkeypatch.setattr( + subprocess, + "run", + Mock(return_value=completed_process("semgrep", stdout=json.dumps(payload), returncode=1)), + ) + + findings = run_semgrep_bugs([file_path], bundle_root=bundle) + + assert [(finding.category, finding.severity, finding.rule) for finding in findings] == [ + ("security", "error", "specfact-bugs-eval-exec"), + ("clean_code", "warning", "specfact-bugs-useless-comparison"), + ] + + +def test_run_semgrep_bugs_returns_tool_error_for_invalid_payload(tmp_path: Path, monkeypatch: MonkeyPatch) -> None: + bundle = tmp_path / "bundle" + (bundle / ".semgrep").mkdir(parents=True) + (bundle / ".semgrep" / "bugs.yaml").write_text("rules: []\n", encoding="utf-8") + file_path = tmp_path / "target.py" + monkeypatch.setattr( + subprocess, + "run", + Mock(return_value=completed_process("semgrep", stdout=json.dumps({"results": [{}]}), returncode=1)), + ) + + findings = run_semgrep_bugs([file_path], bundle_root=bundle) + + assert len(findings) == 1 + assert findings[0].category == "tool_error" + + def test_run_semgrep_retries_after_transient_parse_failure(tmp_path: Path, monkeypatch: MonkeyPatch) -> None: file_path = tmp_path / "target.py" payload = { diff --git a/tests/unit/test_guided_simplify_resources.py b/tests/unit/test_guided_simplify_resources.py new file mode 100644 index 00000000..a0522914 --- /dev/null +++ b/tests/unit/test_guided_simplify_resources.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[2] +PROMPT = REPO_ROOT / "packages/specfact-project/resources/prompts/specfact.08-simplify.md" +SKILL = ( + REPO_ROOT / "packages/specfact-code-review/src/specfact_code_review/resources/skills/specfact-code-review/SKILL.md" +) + + +def test_simplify_prompt_guides_interactive_walkthrough_levels() -> None: + text = PROMPT.read_text(encoding="utf-8") + + assert "vibe coder" in text + assert "junior developer" in text + assert "senior/pro" in text + assert "headless agent" in text + assert "safe_mechanical" in text + assert "needs_tests" in text + assert "design_judgment" in text + assert "preserve" in text + assert "recommended, applied, kept, skipped, failed" in text + assert "this report is the evidence file" in text + assert "| file | line | rule | guidance_kind | recommended_action | action_status | evidence |" in text + + +def test_code_review_skill_teaches_llms_how_to_apply_simplification_guidance() -> None: + text = SKILL.read_text(encoding="utf-8") + + assert "Ask for walkthrough level" in text + assert "safe_mechanical" in text + assert "needs_tests" in text + assert "design_judgment" in text + assert "preserve" in text + assert "recommended, applied, kept, skipped, failed" in text + assert "In headless mode, process one file at a time" in text + assert "file, line, rule, guidance_kind, recommended_action, action_status, evidence" in text From 849d97503bfeae6dfdad62ad4c6acb98c9bf9aa0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 22 May 2026 22:16:03 +0000 Subject: [PATCH 2/6] chore(modules): ci sign changed modules --- packages/specfact-code-review/module-package.yaml | 1 + packages/specfact-project/module-package.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/packages/specfact-code-review/module-package.yaml b/packages/specfact-code-review/module-package.yaml index cda2823d..f0ba11e2 100644 --- a/packages/specfact-code-review/module-package.yaml +++ b/packages/specfact-code-review/module-package.yaml @@ -24,3 +24,4 @@ category: codebase bundle_group_command: code integrity: checksum: sha256:145b823d75e35c71c4d9a3bfdb60a9b1385da8bad274b8a4d2d3b040f7539d83 + signature: q7cPJXSkBPnuTPKBuOPKl0x0z0dzcCfgqwqirHEjJoO/MvHrXpMS5M8uxGBxlPYYaowXbEa1Arfmkbe6oLKzAA== diff --git a/packages/specfact-project/module-package.yaml b/packages/specfact-project/module-package.yaml index 8085bc78..0f057ac1 100644 --- a/packages/specfact-project/module-package.yaml +++ b/packages/specfact-project/module-package.yaml @@ -28,3 +28,4 @@ description: Official SpecFact project bundle package. bundle_group_command: project integrity: checksum: sha256:d3b3554ea11460c4f06dd6b99f2c7c6903c23edc06df8e0177665d3137988ae8 + signature: uiHUSMHNJyPQxMx+VQqoptOgUuUG1dB91J/YMYM5rPYIFGHmdfuQBQt+pdKKFy0H2vBfO7yDgDQ3jZz7JObhCQ== From 51c09e4a673cb0178e9789870e1e38061b97a872 Mon Sep 17 00:00:00 2001 From: omit-test Date: Sat, 23 May 2026 22:20:21 +0200 Subject: [PATCH 3/6] Fix review findings --- .../TDD_EVIDENCE.md | 14 +- .../specs/review-run-command/spec.md | 2 +- .../specfact-code-review/module-package.yaml | 5 +- .../src/specfact_code_review/rules/updater.py | 2 + .../src/specfact_code_review/run/commands.py | 101 +++++++++++-- .../src/specfact_code_review/run/findings.py | 13 +- packages/specfact-project/module-package.yaml | 5 +- .../rules/test_updater.py | 2 + .../specfact_code_review/run/test_commands.py | 136 +++++++++++++++--- .../specfact_code_review/run/test_findings.py | 41 ++++++ .../specfact_code_review/run/test_runner.py | 19 ++- tests/unit/test_guided_simplify_resources.py | 26 ++-- 12 files changed, 308 insertions(+), 58 deletions(-) diff --git a/openspec/changes/code-review-12-guided-simplification-enforcement/TDD_EVIDENCE.md b/openspec/changes/code-review-12-guided-simplification-enforcement/TDD_EVIDENCE.md index 44439603..81e8d4b5 100644 --- a/openspec/changes/code-review-12-guided-simplification-enforcement/TDD_EVIDENCE.md +++ b/openspec/changes/code-review-12-guided-simplification-enforcement/TDD_EVIDENCE.md @@ -6,15 +6,23 @@ - Result: failed as expected before implementation. - Evidence: 18 failed, 64 passed. - Missing contract areas: guided finding fields, preserve validation, schema 1.2 summary, classifier guidance kinds, simplify enforce behavior, and prompt/skill walkthrough policy. +- `hatch run pytest tests/unit/specfact_code_review/run/test_findings.py tests/unit/specfact_code_review/run/test_commands.py tests/unit/specfact_code_review/rules/test_updater.py tests/unit/test_guided_simplify_resources.py -q` + - Result: failed as expected before PR review fixes. + - Evidence: 6 failed, 87 passed. + - Missing contract areas: orphan guided-field validation, failed safe-mechanical blocking counts, missing deterministic safe-mechanical fixers, bottom-up rewrite ordering, and headless action-table defaults. ## Passing After - `hatch run pytest tests/unit/specfact_code_review/run/test_commands.py tests/unit/specfact_code_review/run/test_findings.py tests/unit/specfact_code_review/tools/test_ai_bloat_runner.py tests/unit/specfact_code_review/run/test_runner.py tests/unit/specfact_code_review/rules/test_updater.py::test_default_skill_content_stays_within_line_budget tests/unit/specfact_code_review/rules/test_updater.py::test_load_bundled_skill_content_returns_valid_structure_when_available tests/unit/test_guided_simplify_resources.py -q` - Result: 116 passed. +- `hatch run pytest tests/unit/specfact_code_review/run/test_findings.py tests/unit/specfact_code_review/run/test_commands.py tests/unit/specfact_code_review/rules/test_updater.py tests/unit/test_guided_simplify_resources.py -q` + - Result after PR review fixes: 93 passed. +- `hatch run pytest tests/unit/specfact_code_review/run/test_findings.py tests/unit/specfact_code_review/run/test_commands.py tests/unit/specfact_code_review/run/test_runner.py tests/unit/specfact_code_review/rules/test_updater.py tests/unit/test_guided_simplify_resources.py -q` + - Result after strict metadata fallout fix: 125 passed. - `hatch run pytest tests/unit/specfact_code_review/run/test_commands.py tests/unit/specfact_code_review/tools/test_ai_bloat_runner.py -q` - Result after final cleanup: 50 passed. - `hatch run contract-test` - - Result: 742 passed, 2 warnings. + - Result after PR review fixes: 758 passed, 2 warnings. - `hatch run smart-test` - Result: 742 passed, 2 warnings. - `hatch run type-check` @@ -28,9 +36,9 @@ - `hatch run validate-prompt-commands` - Result: prompt command validation passed with no findings. - `hatch run verify-modules-signature --payload-from-filesystem --enforce-version-bump --version-check-base origin/dev` - - Result: verified 6 module manifests. + - Result after PR review fixes: verified 6 module manifests. - `hatch run specfact code review run --bug-hunt --json --out .specfact/code-review.json --scope changed` - - Result: PASS, CI exit 0, score 115, 0 findings. + - Result after PR review fixes: PASS, CI exit 0, score 115, 0 findings. - `openspec validate code-review-12-guided-simplification-enforcement --strict` - Result: valid. diff --git a/openspec/changes/code-review-12-guided-simplification-enforcement/specs/review-run-command/spec.md b/openspec/changes/code-review-12-guided-simplification-enforcement/specs/review-run-command/spec.md index de441252..78218e07 100644 --- a/openspec/changes/code-review-12-guided-simplification-enforcement/specs/review-run-command/spec.md +++ b/openspec/changes/code-review-12-guided-simplification-enforcement/specs/review-run-command/spec.md @@ -8,7 +8,7 @@ The `specfact code review run` command SHALL accept `--focus simplify` as a targ - **WHEN** `specfact code review run --focus simplify --json --out .specfact/code-review.json` completes - **THEN** the JSON report SHALL retain simplification-focused findings -- **AND** retained findings SHOULD include guidance metadata for actionability, preservation, or design judgment +- **AND** retained findings SHALL include guidance metadata for actionability, preservation, or design judgment - **AND** the report SHALL include a simplification summary when guided findings are present #### Scenario: Simplify enforce blocks only safe mechanical debt diff --git a/packages/specfact-code-review/module-package.yaml b/packages/specfact-code-review/module-package.yaml index f0ba11e2..03905db0 100644 --- a/packages/specfact-code-review/module-package.yaml +++ b/packages/specfact-code-review/module-package.yaml @@ -1,5 +1,5 @@ name: nold-ai/specfact-code-review -version: 0.47.21 +version: 0.47.22 commands: - code tier: official @@ -23,5 +23,4 @@ description: Official SpecFact code review bundle package. category: codebase bundle_group_command: code integrity: - checksum: sha256:145b823d75e35c71c4d9a3bfdb60a9b1385da8bad274b8a4d2d3b040f7539d83 - signature: q7cPJXSkBPnuTPKBuOPKl0x0z0dzcCfgqwqirHEjJoO/MvHrXpMS5M8uxGBxlPYYaowXbEa1Arfmkbe6oLKzAA== + checksum: sha256:33646f1efa3af2c99276838b51dafd0279fa12bedb8726d468da34dccc3f978f diff --git a/packages/specfact-code-review/src/specfact_code_review/rules/updater.py b/packages/specfact-code-review/src/specfact_code_review/rules/updater.py index 3a99a22d..38c36b4e 100644 --- a/packages/specfact-code-review/src/specfact_code_review/rules/updater.py +++ b/packages/specfact-code-review/src/specfact_code_review/rules/updater.py @@ -40,6 +40,8 @@ "tests first, `design_judgment` needs human choice, `preserve` means keep and log `preserve_reason`", "- Log each simplification action as recommended, applied, kept, skipped, failed, with evidence of improvement " "or preserved contract", + "- In headless mode, process one file at a time and emit an action table: file, line, rule, guidance_kind, " + "recommended_action, action_status, evidence", "- For merge-quality review, run `specfact code review run --scope changed --bug-hunt --json " "--out .specfact/code-review.json`", "- Verify an active OpenSpec change covers the requested scope and follow the sequence: spec delta " diff --git a/packages/specfact-code-review/src/specfact_code_review/run/commands.py b/packages/specfact-code-review/src/specfact_code_review/run/commands.py index a70fd861..21944e48 100644 --- a/packages/specfact-code-review/src/specfact_code_review/run/commands.py +++ b/packages/specfact-code-review/src/specfact_code_review/run/commands.py @@ -282,17 +282,76 @@ def _apply_fixes(files: list[Path]) -> None: def _apply_simplification_fixes(report: ReviewReport) -> int: """Apply deterministic safe-mechanical simplification rewrites from a report.""" + fixers: dict[str, Callable[[ReviewFinding], bool]] = { + "ai-bloat.dead-branch": _apply_dead_branch_fix, + "ai-bloat.pass-through-try-except": _apply_pass_through_try_except_fix, + "ai-bloat.redundant-intermediate": _apply_redundant_intermediate_fix, + "ai-bloat.verbose-bool-return": _apply_verbose_bool_return_fix, + } applied = 0 - for finding in report.findings: - if not finding.is_safe_mechanical_simplification() or not finding.fixable: + for finding in _fixable_simplifications_by_stable_line_order(report.findings): + fixer = fixers.get(finding.rule) + if fixer is None: continue - if finding.rule == "ai-bloat.redundant-intermediate": - applied += int(_apply_redundant_intermediate_fix(finding)) - elif finding.rule == "ai-bloat.verbose-bool-return": - applied += int(_apply_verbose_bool_return_fix(finding)) + applied += int(fixer(finding)) return applied +def _fixable_simplifications_by_stable_line_order(findings: list[ReviewFinding]) -> list[ReviewFinding]: + indexed_findings = [ + (index, finding) + for index, finding in enumerate(findings) + if finding.is_safe_mechanical_simplification() and finding.fixable + ] + return [finding for _, finding in sorted(indexed_findings, key=lambda item: (item[1].file, -item[1].line, item[0]))] + + +def _apply_dead_branch_fix(finding: ReviewFinding) -> bool: + parsed = _parsed_finding_source(finding) + if parsed is None: + return False + file_path, source, tree = parsed + for function_node in _iter_functions(tree): + prior_terminal_tests: set[str] = set() + for stmt in function_node.body: + if not isinstance(stmt, ast.If): + continue + test_key = ast.dump(stmt.test, include_attributes=False) + if stmt.lineno == finding.line and test_key in prior_terminal_tests and _terminal_return(stmt.body): + return _replace_line_range( + file_path, + source, + start_line=stmt.lineno, + end_line=stmt.end_lineno or stmt.lineno, + replacement=[], + ) + if _terminal_return(stmt.body): + prior_terminal_tests.add(test_key) + return False + + +def _apply_pass_through_try_except_fix(finding: ReviewFinding) -> bool: + parsed = _parsed_finding_source(finding) + if parsed is None: + return False + file_path, source, tree = parsed + for function_node in _iter_functions(tree): + for stmt in function_node.body: + if stmt.lineno != finding.line or not isinstance(stmt, ast.Try) or not _is_pass_through_try_except(stmt): + continue + replacement = _dedented_try_body_lines(source, stmt) + if replacement is None: + return False + return _replace_line_range( + file_path, + source, + start_line=stmt.lineno, + end_line=stmt.end_lineno or stmt.lineno, + replacement=replacement, + ) + return False + + def _apply_redundant_intermediate_fix(finding: ReviewFinding) -> bool: parsed = _parsed_finding_source(finding) if parsed is None: @@ -379,6 +438,31 @@ def _verbose_bool_replacement_expression( ) +def _is_pass_through_try_except(stmt: ast.stmt) -> bool: + if not isinstance(stmt, ast.Try) or stmt.orelse or stmt.finalbody or len(stmt.handlers) != 1: + return False + handler = stmt.handlers[0] + return len(handler.body) == 1 and isinstance(handler.body[0], ast.Raise) and handler.body[0].exc is None + + +def _terminal_return(body: list[ast.stmt]) -> bool: + return bool(body) and isinstance(body[-1], ast.Return) + + +def _dedented_try_body_lines(source: str, stmt: ast.Try) -> list[str] | None: + if not stmt.body: + return None + lines = source.splitlines() + start_line = stmt.body[0].lineno + end_line = stmt.handlers[0].lineno - 1 + try_indent = _indent_for_line(source, stmt.lineno) + body_indent = _indent_for_line(source, start_line) + if len(body_indent) <= len(try_indent): + return None + body_lines = lines[start_line - 1 : end_line] + return [try_indent + line[len(body_indent) :] if line.startswith(body_indent) else line for line in body_lines] + + def _bool_expr(predicate: str, first_value: bool) -> str: return predicate if first_value else f"not ({predicate})" @@ -404,12 +488,13 @@ def _replace_line_range( *, start_line: int, end_line: int, - replacement: str, + replacement: str | list[str], ) -> bool: lines = source.splitlines() if start_line < 1 or end_line < start_line or end_line > len(lines): return False - lines[start_line - 1 : end_line] = [replacement] + replacement_lines = [replacement] if isinstance(replacement, str) else replacement + lines[start_line - 1 : end_line] = replacement_lines trailing_newline = "\n" if source.endswith("\n") else "" file_path.write_text("\n".join(lines) + trailing_newline, encoding="utf-8") return True diff --git a/packages/specfact-code-review/src/specfact_code_review/run/findings.py b/packages/specfact-code-review/src/specfact_code_review/run/findings.py index 6b6dc770..46237073 100644 --- a/packages/specfact-code-review/src/specfact_code_review/run/findings.py +++ b/packages/specfact-code-review/src/specfact_code_review/run/findings.py @@ -188,7 +188,17 @@ def _validate_safety_checks(cls, value: list[str] | None) -> list[str] | None: @model_validator(mode="after") def _validate_guided_metadata(self) -> ReviewFinding: + guided_fields = ( + self.recommended_action, + self.clean_code_principle, + self.rationale, + self.safety_checks, + self.action_status, + self.preserve_reason, + ) if self.guidance_kind is None: + if any(value is not None for value in guided_fields): + raise ValueError("guidance_kind is required when guided metadata fields are present") return self if self.recommended_action is None: raise ValueError("recommended_action is required when guidance_kind is present") @@ -357,7 +367,8 @@ def _build_simplification_summary(findings: list[ReviewFinding]) -> Simplificati by_guidance_kind=by_guidance_kind, by_action_status=by_action_status, blocking_simplification_count=sum( - finding.is_safe_mechanical_simplification() and finding.action_status == "recommended" for finding in guided + finding.is_safe_mechanical_simplification() and finding.action_status in {"recommended", "failed"} + for finding in guided ), applied_count=by_action_status.get("applied", 0), kept_count=by_action_status.get("kept", 0), diff --git a/packages/specfact-project/module-package.yaml b/packages/specfact-project/module-package.yaml index 0f057ac1..dd6f58ac 100644 --- a/packages/specfact-project/module-package.yaml +++ b/packages/specfact-project/module-package.yaml @@ -1,5 +1,5 @@ name: nold-ai/specfact-project -version: 0.41.13 +version: 0.41.14 commands: - project - plan @@ -27,5 +27,4 @@ core_compatibility: '>=0.40.0,<1.0.0' description: Official SpecFact project bundle package. bundle_group_command: project integrity: - checksum: sha256:d3b3554ea11460c4f06dd6b99f2c7c6903c23edc06df8e0177665d3137988ae8 - signature: uiHUSMHNJyPQxMx+VQqoptOgUuUG1dB91J/YMYM5rPYIFGHmdfuQBQt+pdKKFy0H2vBfO7yDgDQ3jZz7JObhCQ== + checksum: sha256:4f7ccf3917bd5e8aac6d7147246c61a2cf7045bbe584fc4735f857c4fac95465 diff --git a/tests/unit/specfact_code_review/rules/test_updater.py b/tests/unit/specfact_code_review/rules/test_updater.py index 8c214740..7cc6e5d1 100644 --- a/tests/unit/specfact_code_review/rules/test_updater.py +++ b/tests/unit/specfact_code_review/rules/test_updater.py @@ -137,6 +137,8 @@ def test_default_skill_content_stays_within_line_budget() -> None: assert "allowed-tools: []" in skill assert "Codex CLI" in skill assert "--focus simplify" in skill + assert "In headless mode, process one file at a time" in skill + assert "file, line, rule, guidance_kind, recommended_action, action_status, evidence" in skill def test_render_cursor_rule_uses_cursor_metadata_and_skill_body() -> None: diff --git a/tests/unit/specfact_code_review/run/test_commands.py b/tests/unit/specfact_code_review/run/test_commands.py index 472d99d7..6db56279 100644 --- a/tests/unit/specfact_code_review/run/test_commands.py +++ b/tests/unit/specfact_code_review/run/test_commands.py @@ -3,7 +3,7 @@ import subprocess from datetime import UTC, datetime from pathlib import Path -from typing import Any +from typing import Any, Literal import pytest from typer.testing import CliRunner @@ -16,6 +16,13 @@ runner = CliRunner() REPO_ROOT = Path(__file__).resolve().parents[4] FIXTURE_FILE = REPO_ROOT / "tests/fixtures/review/clean_module.py" +SafeMechanicalAction = Literal["remove", "inline", "collapse"] +SAFE_MECHANICAL_ACTIONS: dict[str, SafeMechanicalAction] = { + "ai-bloat.dead-branch": "remove", + "ai-bloat.pass-through-try-except": "remove", + "ai-bloat.redundant-intermediate": "inline", + "ai-bloat.verbose-bool-return": "collapse", +} def _report(*, score: int = 85) -> ReviewReport: @@ -28,33 +35,35 @@ def _report(*, score: int = 85) -> ReviewReport: ) +def _safe_mechanical_finding(file_path: Path, *, line: int, rule: str) -> ReviewFinding: + return ReviewFinding( + category="ai_bloat", + severity="info", + tool="ast", + rule=rule, + file=str(file_path), + line=line, + message="Safe mechanical simplification.", + fixable=True, + confidence="high", + rewrite_hint="Apply the local rewrite.", + canonical_pattern="safe-mechanical", + estimated_deletion_lines=1, + guidance_kind="safe_mechanical", + recommended_action=SAFE_MECHANICAL_ACTIONS[rule], + clean_code_principle="kiss", + rationale="The rewrite is local and behavior-preserving.", + safety_checks=["pattern shape is exact"], + action_status="recommended", + ) + + def _safe_mechanical_report(file_path: Path, *, line: int, rule: str) -> ReviewReport: return ReviewReport( run_id="review-run-001", timestamp=datetime(2026, 3, 16, tzinfo=UTC), score=85, - findings=[ - ReviewFinding( - category="ai_bloat", - severity="info", - tool="ast", - rule=rule, - file=str(file_path), - line=line, - message="Safe mechanical simplification.", - fixable=True, - confidence="high", - rewrite_hint="Apply the local rewrite.", - canonical_pattern="safe-mechanical", - estimated_deletion_lines=1, - guidance_kind="safe_mechanical", - recommended_action="inline" if rule == "ai-bloat.redundant-intermediate" else "collapse", - clean_code_principle="kiss", - rationale="The rewrite is local and behavior-preserving.", - safety_checks=["pattern shape is exact"], - action_status="recommended", - ) - ], + findings=[_safe_mechanical_finding(file_path, line=line, rule=rule)], summary="Review command test report.", ) @@ -346,6 +355,87 @@ def test_apply_simplification_fixes_collapses_verbose_bool_return(tmp_path: Path assert target.read_text(encoding="utf-8") == "def allowed(role: str) -> bool:\n return role == 'admin'\n" +def test_apply_simplification_fixes_removes_dead_branch(tmp_path: Path) -> None: + target = tmp_path / "sample.py" + target.write_text( + "def classify(value: int) -> str:\n" + " if value > 10:\n" + " return 'large'\n" + " if value > 10:\n" + " return 'still large'\n" + " return 'small'\n", + encoding="utf-8", + ) + + applied = run_commands._apply_simplification_fixes( + _safe_mechanical_report(target, line=4, rule="ai-bloat.dead-branch") + ) + + assert applied == 1 + assert target.read_text(encoding="utf-8") == ( + "def classify(value: int) -> str:\n if value > 10:\n return 'large'\n return 'small'\n" + ) + + +def test_apply_simplification_fixes_removes_pass_through_try_except(tmp_path: Path) -> None: + target = tmp_path / "sample.py" + target.write_text( + "def parse(raw: str) -> object:\n" + " try:\n" + " return parse_json(raw)\n" + " except Exception:\n" + " raise\n", + encoding="utf-8", + ) + + applied = run_commands._apply_simplification_fixes( + _safe_mechanical_report(target, line=2, rule="ai-bloat.pass-through-try-except") + ) + + assert applied == 1 + assert target.read_text(encoding="utf-8") == "def parse(raw: str) -> object:\n return parse_json(raw)\n" + + +def test_apply_simplification_fixes_uses_bottom_up_line_order(tmp_path: Path) -> None: + target = tmp_path / "sample.py" + target.write_text( + "def total(values: list[int]) -> int:\n" + " result = sum(values)\n" + " return result\n" + "\n" + "def classify(value: int) -> str:\n" + " if value > 10:\n" + " return 'large'\n" + " if value > 10:\n" + " return 'still large'\n" + " return 'small'\n", + encoding="utf-8", + ) + report = ReviewReport( + run_id="review-run-001", + timestamp=datetime(2026, 3, 16, tzinfo=UTC), + score=85, + findings=[ + _safe_mechanical_finding(target, line=2, rule="ai-bloat.redundant-intermediate"), + _safe_mechanical_finding(target, line=8, rule="ai-bloat.dead-branch"), + ], + summary="Review command test report.", + ) + + applied = run_commands._apply_simplification_fixes(report) + + assert applied == 2 + assert target.read_text(encoding="utf-8") == ( + "def total(values: list[int]) -> int:\n" + " return sum(values)\n" + "\n" + "def classify(value: int) -> str:\n" + " if value > 10:\n" + " return 'large'\n" + " return 'small'\n" + ) + + def test_apply_simplification_fixes_skips_when_source_no_longer_matches(tmp_path: Path) -> None: target = tmp_path / "sample.py" source = "def total(values: list[int]) -> int:\n result = sum(values)\n return result + 1\n" diff --git a/tests/unit/specfact_code_review/run/test_findings.py b/tests/unit/specfact_code_review/run/test_findings.py index 0afeb6a0..9f9eb155 100644 --- a/tests/unit/specfact_code_review/run/test_findings.py +++ b/tests/unit/specfact_code_review/run/test_findings.py @@ -154,6 +154,17 @@ def test_review_finding_rejects_preserve_guidance_without_preserve_reason() -> N ) +def test_review_finding_rejects_guided_fields_without_guidance_kind() -> None: + with pytest.raises(ValidationError, match="guidance_kind is required"): + ReviewFinding( + **_finding_data( + category="ai_bloat", + severity="info", + recommended_action="remove", + ) + ) + + def test_review_finding_rejects_partial_simplification_metadata_as_nondeterministic() -> None: finding = ReviewFinding( **_finding_data( @@ -311,6 +322,36 @@ def test_review_report_uses_schema_1_2_and_summary_when_guided_metadata_is_prese assert report.simplification_summary.blocking_simplification_count == 1 +def test_review_report_counts_failed_safe_mechanical_findings_as_blocking() -> None: + report = ReviewReport( + run_id="run-guided-simplify", + timestamp=datetime(2026, 3, 11, tzinfo=UTC), + score=85, + findings=[ + ReviewFinding( + **_finding_data( + category="ai_bloat", + severity="info", + confidence="high", + rewrite_hint="Remove the duplicate terminal branch.", + canonical_pattern="duplicate-terminal-guard", + estimated_deletion_lines=1, + guidance_kind="safe_mechanical", + recommended_action="remove", + clean_code_principle="kiss", + rationale="The branch repeats an earlier terminal guard.", + safety_checks=["same guard expression already returned earlier"], + action_status="failed", + ) + ) + ], + summary="Guided simplification advisories.", + ) + + assert report.simplification_summary is not None + assert report.simplification_summary.blocking_simplification_count == 1 + + def test_review_report_maps_pass_with_advisory_verdict() -> None: report = ReviewReport( run_id="run-002", diff --git a/tests/unit/specfact_code_review/run/test_runner.py b/tests/unit/specfact_code_review/run/test_runner.py index 370b362b..792b92ad 100644 --- a/tests/unit/specfact_code_review/run/test_runner.py +++ b/tests/unit/specfact_code_review/run/test_runner.py @@ -60,6 +60,18 @@ def _simplification_finding( confidence: Literal["low", "medium", "high"] = "high", guidance_kind: Literal["safe_mechanical", "needs_tests", "design_judgment", "preserve"] | None = None, ) -> ReviewFinding: + guided_fields = ( + { + "recommended_action": "collapse", + "clean_code_principle": "kiss", + "rationale": "The repeated loop shape can be expressed directly.", + "safety_checks": ["targeted tests cover the surrounding behavior"], + "action_status": "recommended", + "preserve_reason": "The wrapper is a compatibility boundary." if guidance_kind == "preserve" else None, + } + if guidance_kind is not None + else {} + ) return ReviewFinding( category=category, severity="info", @@ -75,12 +87,7 @@ def _simplification_finding( intent_key="score-review", estimated_deletion_lines=3, guidance_kind=guidance_kind, - recommended_action="collapse" if guidance_kind is not None else None, - clean_code_principle="kiss" if guidance_kind is not None else None, - rationale="The repeated loop shape can be expressed directly.", - safety_checks=["targeted tests cover the surrounding behavior"], - action_status="recommended" if guidance_kind is not None else None, - preserve_reason="The wrapper is a compatibility boundary." if guidance_kind == "preserve" else None, + **guided_fields, ) diff --git a/tests/unit/test_guided_simplify_resources.py b/tests/unit/test_guided_simplify_resources.py index a0522914..bcacec4a 100644 --- a/tests/unit/test_guided_simplify_resources.py +++ b/tests/unit/test_guided_simplify_resources.py @@ -8,6 +8,11 @@ SKILL = ( REPO_ROOT / "packages/specfact-code-review/src/specfact_code_review/resources/skills/specfact-code-review/SKILL.md" ) +SKILL_COPIES = ( + SKILL, + REPO_ROOT / "skills/specfact-code-review/SKILL.md", + REPO_ROOT / ".vibe/skills/specfact-code-review/SKILL.md", +) def test_simplify_prompt_guides_interactive_walkthrough_levels() -> None: @@ -27,13 +32,14 @@ def test_simplify_prompt_guides_interactive_walkthrough_levels() -> None: def test_code_review_skill_teaches_llms_how_to_apply_simplification_guidance() -> None: - text = SKILL.read_text(encoding="utf-8") - - assert "Ask for walkthrough level" in text - assert "safe_mechanical" in text - assert "needs_tests" in text - assert "design_judgment" in text - assert "preserve" in text - assert "recommended, applied, kept, skipped, failed" in text - assert "In headless mode, process one file at a time" in text - assert "file, line, rule, guidance_kind, recommended_action, action_status, evidence" in text + for skill_path in SKILL_COPIES: + text = skill_path.read_text(encoding="utf-8") + + assert "Ask for walkthrough level" in text + assert "safe_mechanical" in text + assert "needs_tests" in text + assert "design_judgment" in text + assert "preserve" in text + assert "recommended, applied, kept, skipped, failed" in text + assert "In headless mode, process one file at a time" in text + assert "file, line, rule, guidance_kind, recommended_action, action_status, evidence" in text From 17d6c5c883df9109560fd93f60cbd51b521591d3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 23 May 2026 20:23:39 +0000 Subject: [PATCH 4/6] chore(modules): ci sign changed modules --- packages/specfact-code-review/module-package.yaml | 1 + packages/specfact-project/module-package.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/packages/specfact-code-review/module-package.yaml b/packages/specfact-code-review/module-package.yaml index 03905db0..1484cc3f 100644 --- a/packages/specfact-code-review/module-package.yaml +++ b/packages/specfact-code-review/module-package.yaml @@ -24,3 +24,4 @@ category: codebase bundle_group_command: code integrity: checksum: sha256:33646f1efa3af2c99276838b51dafd0279fa12bedb8726d468da34dccc3f978f + signature: V4HxJAx7SoAsO8TCyKliFJUWnkrGsDEQSmVZJ3r4cdTRKiUBw7vQkXKdumGo/l3pSaRHLvB9aS80dBv6HlVjBw== diff --git a/packages/specfact-project/module-package.yaml b/packages/specfact-project/module-package.yaml index dd6f58ac..fadb8dca 100644 --- a/packages/specfact-project/module-package.yaml +++ b/packages/specfact-project/module-package.yaml @@ -28,3 +28,4 @@ description: Official SpecFact project bundle package. bundle_group_command: project integrity: checksum: sha256:4f7ccf3917bd5e8aac6d7147246c61a2cf7045bbe584fc4735f857c4fac95465 + signature: tp2QqgHGSQ2e9mXrUuSQhxkD40tkTPTn3R3Hj+iSeKX/O/hMo3dIRHeYD5iIHwYJ+aih7w3eBLeLlURim3rGCA== From 44dd42328568abf1c82acb93e64b93fd2bf122d8 Mon Sep 17 00:00:00 2001 From: omit-test Date: Sat, 23 May 2026 22:37:55 +0200 Subject: [PATCH 5/6] Fix additional critical code review findings --- .../TDD_EVIDENCE.md | 8 ++++++- .../specfact-code-review/module-package.yaml | 2 +- .../src/specfact_code_review/run/commands.py | 7 +++++- .../src/specfact_code_review/run/findings.py | 3 +++ .../specfact_code_review/run/test_commands.py | 22 +++++++++++++++++++ .../specfact_code_review/run/test_findings.py | 18 +++++++++++++++ 6 files changed, 57 insertions(+), 3 deletions(-) diff --git a/openspec/changes/code-review-12-guided-simplification-enforcement/TDD_EVIDENCE.md b/openspec/changes/code-review-12-guided-simplification-enforcement/TDD_EVIDENCE.md index 81e8d4b5..94e4fb6d 100644 --- a/openspec/changes/code-review-12-guided-simplification-enforcement/TDD_EVIDENCE.md +++ b/openspec/changes/code-review-12-guided-simplification-enforcement/TDD_EVIDENCE.md @@ -10,6 +10,10 @@ - Result: failed as expected before PR review fixes. - Evidence: 6 failed, 87 passed. - Missing contract areas: orphan guided-field validation, failed safe-mechanical blocking counts, missing deterministic safe-mechanical fixers, bottom-up rewrite ordering, and headless action-table defaults. +- `hatch run pytest tests/unit/specfact_code_review/run/test_commands.py::test_apply_simplification_fixes_keeps_dead_branch_with_else tests/unit/specfact_code_review/run/test_findings.py::test_review_finding_rejects_guided_evidence_fields_without_guidance_kind -q` + - Result: failed as expected before follow-up PR review fixes. + - Evidence: 4 failed. + - Missing contract areas: dead-branch fixer skipped no-else guard and guided evidence fields required `guidance_kind`. ## Passing After @@ -21,6 +25,8 @@ - Result after strict metadata fallout fix: 125 passed. - `hatch run pytest tests/unit/specfact_code_review/run/test_commands.py tests/unit/specfact_code_review/tools/test_ai_bloat_runner.py -q` - Result after final cleanup: 50 passed. +- `hatch run pytest tests/unit/specfact_code_review/run/test_commands.py tests/unit/specfact_code_review/run/test_findings.py -q` + - Result after follow-up PR review fixes: 77 passed. - `hatch run contract-test` - Result after PR review fixes: 758 passed, 2 warnings. - `hatch run smart-test` @@ -38,7 +44,7 @@ - `hatch run verify-modules-signature --payload-from-filesystem --enforce-version-bump --version-check-base origin/dev` - Result after PR review fixes: verified 6 module manifests. - `hatch run specfact code review run --bug-hunt --json --out .specfact/code-review.json --scope changed` - - Result after PR review fixes: PASS, CI exit 0, score 115, 0 findings. + - Result after follow-up PR review fixes: PASS, CI exit 0, score 120, 0 findings. - `openspec validate code-review-12-guided-simplification-enforcement --strict` - Result: valid. diff --git a/packages/specfact-code-review/module-package.yaml b/packages/specfact-code-review/module-package.yaml index 03905db0..fd8308c3 100644 --- a/packages/specfact-code-review/module-package.yaml +++ b/packages/specfact-code-review/module-package.yaml @@ -23,4 +23,4 @@ description: Official SpecFact code review bundle package. category: codebase bundle_group_command: code integrity: - checksum: sha256:33646f1efa3af2c99276838b51dafd0279fa12bedb8726d468da34dccc3f978f + checksum: sha256:23295ada1b0f046cbb3317c250d60f86733cb1dc7b3c0baf2d386ce5444fe207 diff --git a/packages/specfact-code-review/src/specfact_code_review/run/commands.py b/packages/specfact-code-review/src/specfact_code_review/run/commands.py index 21944e48..1fa6bc73 100644 --- a/packages/specfact-code-review/src/specfact_code_review/run/commands.py +++ b/packages/specfact-code-review/src/specfact_code_review/run/commands.py @@ -317,7 +317,12 @@ def _apply_dead_branch_fix(finding: ReviewFinding) -> bool: if not isinstance(stmt, ast.If): continue test_key = ast.dump(stmt.test, include_attributes=False) - if stmt.lineno == finding.line and test_key in prior_terminal_tests and _terminal_return(stmt.body): + if ( + stmt.lineno == finding.line + and test_key in prior_terminal_tests + and _terminal_return(stmt.body) + and not stmt.orelse + ): return _replace_line_range( file_path, source, diff --git a/packages/specfact-code-review/src/specfact_code_review/run/findings.py b/packages/specfact-code-review/src/specfact_code_review/run/findings.py index 46237073..a62a71c5 100644 --- a/packages/specfact-code-review/src/specfact_code_review/run/findings.py +++ b/packages/specfact-code-review/src/specfact_code_review/run/findings.py @@ -195,6 +195,9 @@ def _validate_guided_metadata(self) -> ReviewFinding: self.safety_checks, self.action_status, self.preserve_reason, + self.before_ref, + self.after_ref, + self.improvement, ) if self.guidance_kind is None: if any(value is not None for value in guided_fields): diff --git a/tests/unit/specfact_code_review/run/test_commands.py b/tests/unit/specfact_code_review/run/test_commands.py index 6db56279..42e869cd 100644 --- a/tests/unit/specfact_code_review/run/test_commands.py +++ b/tests/unit/specfact_code_review/run/test_commands.py @@ -377,6 +377,28 @@ def test_apply_simplification_fixes_removes_dead_branch(tmp_path: Path) -> None: ) +def test_apply_simplification_fixes_keeps_dead_branch_with_else(tmp_path: Path) -> None: + target = tmp_path / "sample.py" + source = ( + "def classify(value: int) -> str:\n" + " if value > 10:\n" + " return 'large'\n" + " if value > 10:\n" + " return 'still large'\n" + " else:\n" + " return 'fallback'\n" + " return 'small'\n" + ) + target.write_text(source, encoding="utf-8") + + applied = run_commands._apply_simplification_fixes( + _safe_mechanical_report(target, line=4, rule="ai-bloat.dead-branch") + ) + + assert applied == 0 + assert target.read_text(encoding="utf-8") == source + + def test_apply_simplification_fixes_removes_pass_through_try_except(tmp_path: Path) -> None: target = tmp_path / "sample.py" target.write_text( diff --git a/tests/unit/specfact_code_review/run/test_findings.py b/tests/unit/specfact_code_review/run/test_findings.py index 9f9eb155..78e591be 100644 --- a/tests/unit/specfact_code_review/run/test_findings.py +++ b/tests/unit/specfact_code_review/run/test_findings.py @@ -165,6 +165,24 @@ def test_review_finding_rejects_guided_fields_without_guidance_kind() -> None: ) +@pytest.mark.parametrize( + "field_payload", + [ + cast(ReviewFindingPayload, {"before_ref": EvidenceRef(path="src/example.py", start_line=10, end_line=12)}), + cast(ReviewFindingPayload, {"after_ref": EvidenceRef(path="src/example.py", start_line=10, end_line=10)}), + cast(ReviewFindingPayload, {"improvement": "Removed one redundant branch."}), + ], +) +def test_review_finding_rejects_guided_evidence_fields_without_guidance_kind( + field_payload: ReviewFindingPayload, +) -> None: + finding_payload = _finding_data(category="ai_bloat", severity="info") + finding_payload.update(field_payload) + + with pytest.raises(ValidationError, match="guidance_kind is required"): + ReviewFinding(**finding_payload) + + def test_review_finding_rejects_partial_simplification_metadata_as_nondeterministic() -> None: finding = ReviewFinding( **_finding_data( From a9c10307da5d589ebbe011f0ed45ac42cfe578f3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 23 May 2026 20:42:30 +0000 Subject: [PATCH 6/6] chore(modules): ci sign changed modules --- packages/specfact-code-review/module-package.yaml | 1 + packages/specfact-project/module-package.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/packages/specfact-code-review/module-package.yaml b/packages/specfact-code-review/module-package.yaml index e742e2c2..6c20194f 100644 --- a/packages/specfact-code-review/module-package.yaml +++ b/packages/specfact-code-review/module-package.yaml @@ -24,3 +24,4 @@ category: codebase bundle_group_command: code integrity: checksum: sha256:43d0b97fa0545409aaa6b0922c3df22bc3625052c88ff7a34c002745b0f01c1d + signature: yhVIa7Izi3NxrPEZAcOfeXmv5Yqj9mesCbrsd8eFltzxh5ECbLLqCr7Cxiv6p7MOmXLSDzhTWbJpM1hLNIgqCg== diff --git a/packages/specfact-project/module-package.yaml b/packages/specfact-project/module-package.yaml index 34040283..fb55ab0b 100644 --- a/packages/specfact-project/module-package.yaml +++ b/packages/specfact-project/module-package.yaml @@ -28,3 +28,4 @@ description: Official SpecFact project bundle package. bundle_group_command: project integrity: checksum: sha256:1dea73c218b64924a003e14e155c25d238ff62d00b6b70cd8a92cbfc4e9834fa + signature: ybXH/L4RAztljN/nnD/R6EFVt4T5HS0FSFb3dKfmixzQLzbQZ7xjj95sY4Iu9qZywuLfd7V6f3s48mu5QPLMAQ==