From 50060d7b2986ea3a74a610e82c3080e595ed2cce Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 20 Apr 2026 21:56:41 +0500 Subject: [PATCH 01/32] refactor(app): global and comprehensive refactoring (Stage #1). --- benchmarks/baselines/reference-cp313.json | 120 + benchmarks/run_benchmark.py | 117 +- codeclone.baseline.json | 16 +- codeclone/_cli_args.py | 456 --- codeclone/_cli_config.py | 303 -- codeclone/_cli_paths.py | 47 - codeclone/_cli_reports.py | 150 - codeclone/analysis/__init__.py | 30 + codeclone/analysis/_module_walk.py | 553 +++ codeclone/{ => analysis}/cfg.py | 2 +- codeclone/{ => analysis}/cfg_model.py | 0 codeclone/analysis/class_metrics.py | 54 + codeclone/analysis/fingerprint.py | 85 + .../{normalize.py => analysis/normalizer.py} | 2 +- codeclone/analysis/parser.py | 219 ++ codeclone/analysis/units.py | 310 ++ codeclone/baseline/__init__.py | 25 + .../baseline/_metrics_baseline_contract.py | 100 + .../baseline/_metrics_baseline_payload.py | 245 ++ .../baseline/_metrics_baseline_validation.py | 569 ++++ .../clone_baseline.py} | 421 +-- codeclone/baseline/diff.py | 111 + codeclone/baseline/metrics_baseline.py | 458 +++ codeclone/baseline/trust.py | 302 ++ codeclone/{blocks.py => blocks/__init__.py} | 8 +- codeclone/cache.py | 2803 ---------------- codeclone/cache/__init__.py | 273 ++ codeclone/cache/_canonicalize.py | 432 +++ codeclone/cache/_validators.py | 228 ++ codeclone/cache/_wire_decode.py | 692 ++++ codeclone/cache/_wire_encode.py | 252 ++ codeclone/cache/_wire_helpers.py | 307 ++ codeclone/cache/entries.py | 470 +++ codeclone/{cache_io.py => cache/integrity.py} | 23 +- .../projection.py} | 56 +- codeclone/cache/store.py | 616 ++++ codeclone/cache/versioning.py | 136 + codeclone/cache_paths.py | 49 - codeclone/cli.py | 1741 ---------- codeclone/config/__init__.py | 83 + codeclone/config/argparse_builder.py | 106 + codeclone/config/pyproject_loader.py | 216 ++ codeclone/config/resolver.py | 91 + codeclone/config/spec.py | 769 +++++ .../{contracts.py => contracts/__init__.py} | 27 + codeclone/{ => contracts}/errors.py | 11 + codeclone/contracts/schemas.py | 85 + codeclone/core/__init__.py | 61 + codeclone/core/_types.py | 315 ++ codeclone/core/api_surface_payload.py | 98 + codeclone/core/bootstrap.py | 41 + codeclone/core/coverage_payload.py | 173 + codeclone/core/discovery.py | 198 ++ codeclone/core/discovery_cache.py | 363 ++ codeclone/core/metrics_payload.py | 312 ++ codeclone/core/parallelism.py | 334 ++ codeclone/core/pipeline.py | 343 ++ codeclone/core/reporting.py | 255 ++ codeclone/core/worker.py | 166 + codeclone/extractor.py | 1149 ------- codeclone/findings/__init__.py | 21 + codeclone/findings/clones/__init__.py | 9 + codeclone/{ => findings/clones}/grouping.py | 2 +- codeclone/findings/ids.py | 31 + codeclone/findings/structural/__init__.py | 21 + .../structural/detectors.py} | 6 +- codeclone/fingerprint.py | 24 - codeclone/main.py | 15 + .../__init__.py} | 0 codeclone/metrics/__init__.py | 8 + codeclone/metrics/_base.py | 62 + codeclone/metrics/complexity.py | 2 +- codeclone/metrics/coverage_join.py | 2 +- codeclone/metrics/overloaded_modules.py | 2 +- codeclone/metrics/registry.py | 673 ++++ codeclone/metrics_baseline.py | 1317 -------- codeclone/{paths.py => paths/__init__.py} | 2 +- codeclone/pipeline.py | 2773 --------------- .../{qualnames.py => qualnames/__init__.py} | 0 codeclone/report/__init__.py | 8 +- codeclone/report/derived.py | 2 +- codeclone/report/document/__init__.py | 64 + codeclone/report/document/_common.py | 407 +++ codeclone/report/document/_design_groups.py | 394 +++ codeclone/report/document/_findings_groups.py | 606 ++++ codeclone/report/document/builder.py | 114 + codeclone/report/document/derived.py | 425 +++ codeclone/report/document/findings.py | 245 ++ codeclone/report/document/integrity.py | 91 + codeclone/report/document/inventory.py | 223 ++ codeclone/report/document/metrics.py | 701 ++++ codeclone/report/explain.py | 2 +- codeclone/report/findings.py | 2 +- codeclone/report/gates/__init__.py | 39 + codeclone/report/gates/evaluator.py | 695 ++++ .../gates/reasons.py} | 0 .../html/__init__.py} | 10 +- .../{_html_report => report/html}/_context.py | 12 +- .../_assemble.py => report/html/assemble.py} | 39 +- .../html/assets}/__init__.py | 0 .../html/assets/css.py} | 2 - .../{_html_js.py => report/html/assets/js.py} | 0 codeclone/report/html/primitives/__init__.py | 5 + .../html/primitives/data_attrs.py} | 2 +- .../html/primitives/escape.py} | 0 .../html/primitives/filters.py} | 2 +- codeclone/report/html/sections/__init__.py | 5 + .../html/sections}/_clones.py | 36 +- .../html/sections}/_coupling.py | 13 +- .../html/sections}/_coverage_join.py | 11 +- .../html/sections}/_dead_code.py | 13 +- .../html/sections}/_dependencies.py | 13 +- .../html/sections}/_meta.py | 10 +- .../html/sections}/_overview.py | 11 +- .../html/sections}/_structural.py | 29 +- .../html/sections}/_suggestions.py | 24 +- codeclone/report/html/widgets/__init__.py | 5 + .../html/widgets/badges.py} | 7 +- .../html/widgets/components.py} | 9 +- .../html/widgets/glossary.py} | 2 +- .../html/widgets/icons.py} | 0 .../html/widgets/snippets.py} | 2 +- .../html/widgets/tables.py} | 8 +- .../_tabs.py => report/html/widgets/tabs.py} | 2 +- codeclone/report/json_contract.py | 2959 +---------------- codeclone/report/markdown.py | 629 +--- codeclone/report/overview.py | 6 +- codeclone/report/renderers/__init__.py | 19 + codeclone/report/renderers/json.py | 18 + codeclone/report/renderers/markdown.py | 628 ++++ codeclone/report/renderers/sarif.py | 974 ++++++ codeclone/report/renderers/text.py | 884 +++++ codeclone/report/sarif.py | 982 +----- codeclone/report/serialize.py | 893 +---- codeclone/report/suggestions.py | 4 +- codeclone/{scanner.py => scanner/__init__.py} | 2 +- codeclone/suppressions.py | 6 +- codeclone/surfaces/__init__.py | 4 + codeclone/surfaces/cli/__init__.py | 5 + .../cli/baseline_state.py} | 81 +- codeclone/surfaces/cli/changed_scope.py | 220 ++ .../{_cli_rich.py => surfaces/cli/console.py} | 97 +- codeclone/surfaces/cli/main.py | 1324 ++++++++ .../cli/report_meta.py} | 69 +- codeclone/surfaces/cli/reports_output.py | 324 ++ .../cli/runtime.py} | 55 +- codeclone/surfaces/cli/state.py | 25 + .../cli/summary.py} | 119 +- codeclone/surfaces/cli/types.py | 34 + codeclone/surfaces/mcp/__init__.py | 58 + codeclone/surfaces/mcp/__main__.py | 9 + codeclone/surfaces/mcp/payloads.py | 56 + .../{mcp_server.py => surfaces/mcp/server.py} | 56 +- codeclone/surfaces/mcp/service.py | 261 ++ .../mcp/session.py} | 304 +- codeclone/surfaces/mcp/tools/__init__.py | 33 + codeclone/surfaces/mcp/tools/_base.py | 44 + codeclone/surfaces/mcp/tools/analyze.py | 28 + codeclone/surfaces/mcp/tools/checks.py | 36 + codeclone/surfaces/mcp/tools/compare.py | 16 + codeclone/surfaces/mcp/tools/findings.py | 36 + codeclone/surfaces/mcp/tools/gates.py | 21 + codeclone/surfaces/mcp/tools/help.py | 16 + codeclone/surfaces/mcp/tools/hotspots.py | 21 + codeclone/surfaces/mcp/tools/pr.py | 16 + .../surfaces/mcp/tools/report_section.py | 16 + codeclone/surfaces/mcp/tools/runs.py | 25 + .../__init__.py} | 6 +- codeclone/{_html_report => utils}/__init__.py | 6 +- codeclone/{_coerce.py => utils/coerce.py} | 0 codeclone/{_git_diff.py => utils/git_diff.py} | 0 codeclone/{_json_io.py => utils/json_io.py} | 0 .../schema_validation.py} | 2 +- pyproject.toml | 41 +- scripts/gen_options_doc.py | 38 + tests/_ast_metrics_helpers.py | 4 +- tests/_contract_snapshots.py | 18 + tests/_import_graph.py | 31 + .../fixtures/contract_snapshots/cli_help.txt | 197 ++ .../contract_snapshots/mcp_tool_schemas.json | 1541 +++++++++ .../public_api_surface.json | 99 + tests/test_architecture.py | 110 +- tests/test_baseline.py | 69 +- tests/test_benchmark.py | 72 + tests/test_blocks.py | 2 +- tests/test_cache.py | 18 +- tests/test_cfg.py | 10 +- tests/test_cfg_model.py | 2 +- tests/test_cli_config.py | 102 +- tests/test_cli_help_snapshot.py | 25 + tests/test_cli_inprocess.py | 62 +- tests/test_cli_smoke.py | 2 +- tests/test_cli_unit.py | 94 +- tests/test_coerce.py | 2 +- tests/test_core_branch_coverage.py | 46 +- tests/test_detector_golden.py | 6 +- tests/test_extractor.py | 203 +- tests/test_fingerprint.py | 2 +- tests/test_gating.py | 218 ++ tests/test_golden_v2.py | 28 +- tests/test_html_report.py | 28 +- tests/test_html_report_helpers.py | 36 +- ..._main_guard.py => test_main_entrypoint.py} | 15 +- tests/test_mcp_server.py | 4 +- ...py => test_mcp_server_main_guard_runpy.py} | 9 +- tests/test_mcp_service.py | 55 +- tests/test_mcp_tool_schema_snapshot.py | 28 + tests/test_metrics_baseline.py | 114 +- tests/test_metrics_modules.py | 2 +- tests/test_metrics_registry.py | 22 + tests/test_normalize.py | 6 +- tests/test_options_spec_coverage.py | 113 + tests/test_pipeline_metrics.py | 83 +- tests/test_pipeline_process.py | 34 +- tests/test_public_api_surface.py | 33 + tests/test_renderer_isolation.py | 35 + tests/test_report.py | 10 +- tests/test_report_branch_invariants.py | 12 +- tests/test_report_contract_coverage.py | 2 +- tests/test_scanner_extra.py | 2 +- tests/test_security.py | 9 +- tests/test_segments.py | 2 +- tests/test_structural_findings.py | 8 +- tests/test_target_module_map_imports.py | 100 + uv.lock | 278 +- 225 files changed, 25173 insertions(+), 17573 deletions(-) create mode 100644 benchmarks/baselines/reference-cp313.json delete mode 100644 codeclone/_cli_args.py delete mode 100644 codeclone/_cli_config.py delete mode 100644 codeclone/_cli_paths.py delete mode 100644 codeclone/_cli_reports.py create mode 100644 codeclone/analysis/__init__.py create mode 100644 codeclone/analysis/_module_walk.py rename codeclone/{ => analysis}/cfg.py (99%) rename codeclone/{ => analysis}/cfg_model.py (100%) create mode 100644 codeclone/analysis/class_metrics.py create mode 100644 codeclone/analysis/fingerprint.py rename codeclone/{normalize.py => analysis/normalizer.py} (99%) create mode 100644 codeclone/analysis/parser.py create mode 100644 codeclone/analysis/units.py create mode 100644 codeclone/baseline/__init__.py create mode 100644 codeclone/baseline/_metrics_baseline_contract.py create mode 100644 codeclone/baseline/_metrics_baseline_payload.py create mode 100644 codeclone/baseline/_metrics_baseline_validation.py rename codeclone/{baseline.py => baseline/clone_baseline.py} (54%) create mode 100644 codeclone/baseline/diff.py create mode 100644 codeclone/baseline/metrics_baseline.py create mode 100644 codeclone/baseline/trust.py rename codeclone/{blocks.py => blocks/__init__.py} (95%) delete mode 100644 codeclone/cache.py create mode 100644 codeclone/cache/__init__.py create mode 100644 codeclone/cache/_canonicalize.py create mode 100644 codeclone/cache/_validators.py create mode 100644 codeclone/cache/_wire_decode.py create mode 100644 codeclone/cache/_wire_encode.py create mode 100644 codeclone/cache/_wire_helpers.py create mode 100644 codeclone/cache/entries.py rename codeclone/{cache_io.py => cache/integrity.py} (80%) rename codeclone/{cache_segments.py => cache/projection.py} (82%) create mode 100644 codeclone/cache/store.py create mode 100644 codeclone/cache/versioning.py delete mode 100644 codeclone/cache_paths.py delete mode 100644 codeclone/cli.py create mode 100644 codeclone/config/__init__.py create mode 100644 codeclone/config/argparse_builder.py create mode 100644 codeclone/config/pyproject_loader.py create mode 100644 codeclone/config/resolver.py create mode 100644 codeclone/config/spec.py rename codeclone/{contracts.py => contracts/__init__.py} (75%) rename codeclone/{ => contracts}/errors.py (85%) create mode 100644 codeclone/contracts/schemas.py create mode 100644 codeclone/core/__init__.py create mode 100644 codeclone/core/_types.py create mode 100644 codeclone/core/api_surface_payload.py create mode 100644 codeclone/core/bootstrap.py create mode 100644 codeclone/core/coverage_payload.py create mode 100644 codeclone/core/discovery.py create mode 100644 codeclone/core/discovery_cache.py create mode 100644 codeclone/core/metrics_payload.py create mode 100644 codeclone/core/parallelism.py create mode 100644 codeclone/core/pipeline.py create mode 100644 codeclone/core/reporting.py create mode 100644 codeclone/core/worker.py delete mode 100644 codeclone/extractor.py create mode 100644 codeclone/findings/__init__.py create mode 100644 codeclone/findings/clones/__init__.py rename codeclone/{ => findings/clones}/grouping.py (98%) create mode 100644 codeclone/findings/ids.py create mode 100644 codeclone/findings/structural/__init__.py rename codeclone/{structural_findings.py => findings/structural/detectors.py} (99%) delete mode 100644 codeclone/fingerprint.py create mode 100644 codeclone/main.py rename codeclone/{meta_markers.py => meta_markers/__init__.py} (100%) create mode 100644 codeclone/metrics/_base.py create mode 100644 codeclone/metrics/registry.py delete mode 100644 codeclone/metrics_baseline.py rename codeclone/{paths.py => paths/__init__.py} (98%) delete mode 100644 codeclone/pipeline.py rename codeclone/{qualnames.py => qualnames/__init__.py} (100%) create mode 100644 codeclone/report/document/__init__.py create mode 100644 codeclone/report/document/_common.py create mode 100644 codeclone/report/document/_design_groups.py create mode 100644 codeclone/report/document/_findings_groups.py create mode 100644 codeclone/report/document/builder.py create mode 100644 codeclone/report/document/derived.py create mode 100644 codeclone/report/document/findings.py create mode 100644 codeclone/report/document/integrity.py create mode 100644 codeclone/report/document/inventory.py create mode 100644 codeclone/report/document/metrics.py create mode 100644 codeclone/report/gates/__init__.py create mode 100644 codeclone/report/gates/evaluator.py rename codeclone/{_cli_gating.py => report/gates/reasons.py} (100%) rename codeclone/{html_report.py => report/html/__init__.py} (66%) rename codeclone/{_html_report => report/html}/_context.py (97%) rename codeclone/{_html_report/_assemble.py => report/html/assemble.py} (93%) rename codeclone/{_html_report/_sections => report/html/assets}/__init__.py (100%) rename codeclone/{_html_css.py => report/html/assets/css.py} (99%) rename codeclone/{_html_js.py => report/html/assets/js.py} (100%) create mode 100644 codeclone/report/html/primitives/__init__.py rename codeclone/{_html_data_attrs.py => report/html/primitives/data_attrs.py} (96%) rename codeclone/{_html_escape.py => report/html/primitives/escape.py} (100%) rename codeclone/{_html_filters.py => report/html/primitives/filters.py} (97%) create mode 100644 codeclone/report/html/sections/__init__.py rename codeclone/{_html_report/_sections => report/html/sections}/_clones.py (97%) rename codeclone/{_html_report/_sections => report/html/sections}/_coupling.py (97%) rename codeclone/{_html_report/_sections => report/html/sections}/_coverage_join.py (96%) rename codeclone/{_html_report/_sections => report/html/sections}/_dead_code.py (94%) rename codeclone/{_html_report/_sections => report/html/sections}/_dependencies.py (98%) rename codeclone/{_html_report/_sections => report/html/sections}/_meta.py (98%) rename codeclone/{_html_report/_sections => report/html/sections}/_overview.py (99%) rename codeclone/{_html_report/_sections => report/html/sections}/_structural.py (96%) rename codeclone/{_html_report/_sections => report/html/sections}/_suggestions.py (95%) create mode 100644 codeclone/report/html/widgets/__init__.py rename codeclone/{_html_badges.py => report/html/widgets/badges.py} (98%) rename codeclone/{_html_report/_components.py => report/html/widgets/components.py} (95%) rename codeclone/{_html_report/_glossary.py => report/html/widgets/glossary.py} (99%) rename codeclone/{_html_report/_icons.py => report/html/widgets/icons.py} (100%) rename codeclone/{_html_snippets.py => report/html/widgets/snippets.py} (99%) rename codeclone/{_html_report/_tables.py => report/html/widgets/tables.py} (95%) rename codeclone/{_html_report/_tabs.py => report/html/widgets/tabs.py} (97%) create mode 100644 codeclone/report/renderers/__init__.py create mode 100644 codeclone/report/renderers/json.py create mode 100644 codeclone/report/renderers/markdown.py create mode 100644 codeclone/report/renderers/sarif.py create mode 100644 codeclone/report/renderers/text.py rename codeclone/{scanner.py => scanner/__init__.py} (98%) create mode 100644 codeclone/surfaces/__init__.py create mode 100644 codeclone/surfaces/cli/__init__.py rename codeclone/{_cli_baselines.py => surfaces/cli/baseline_state.py} (86%) create mode 100644 codeclone/surfaces/cli/changed_scope.py rename codeclone/{_cli_rich.py => surfaces/cli/console.py} (58%) create mode 100644 codeclone/surfaces/cli/main.py rename codeclone/{_cli_meta.py => surfaces/cli/report_meta.py} (68%) create mode 100644 codeclone/surfaces/cli/reports_output.py rename codeclone/{_cli_runtime.py => surfaces/cli/runtime.py} (82%) create mode 100644 codeclone/surfaces/cli/state.py rename codeclone/{_cli_summary.py => surfaces/cli/summary.py} (68%) create mode 100644 codeclone/surfaces/cli/types.py create mode 100644 codeclone/surfaces/mcp/__init__.py create mode 100644 codeclone/surfaces/mcp/__main__.py create mode 100644 codeclone/surfaces/mcp/payloads.py rename codeclone/{mcp_server.py => surfaces/mcp/server.py} (95%) create mode 100644 codeclone/surfaces/mcp/service.py rename codeclone/{mcp_service.py => surfaces/mcp/session.py} (96%) create mode 100644 codeclone/surfaces/mcp/tools/__init__.py create mode 100644 codeclone/surfaces/mcp/tools/_base.py create mode 100644 codeclone/surfaces/mcp/tools/analyze.py create mode 100644 codeclone/surfaces/mcp/tools/checks.py create mode 100644 codeclone/surfaces/mcp/tools/compare.py create mode 100644 codeclone/surfaces/mcp/tools/findings.py create mode 100644 codeclone/surfaces/mcp/tools/gates.py create mode 100644 codeclone/surfaces/mcp/tools/help.py create mode 100644 codeclone/surfaces/mcp/tools/hotspots.py create mode 100644 codeclone/surfaces/mcp/tools/pr.py create mode 100644 codeclone/surfaces/mcp/tools/report_section.py create mode 100644 codeclone/surfaces/mcp/tools/runs.py rename codeclone/{ui_messages.py => ui_messages/__init__.py} (99%) rename codeclone/{_html_report => utils}/__init__.py (69%) rename codeclone/{_coerce.py => utils/coerce.py} (100%) rename codeclone/{_git_diff.py => utils/git_diff.py} (100%) rename codeclone/{_json_io.py => utils/json_io.py} (100%) rename codeclone/{_schema_validation.py => utils/schema_validation.py} (95%) create mode 100644 scripts/gen_options_doc.py create mode 100644 tests/_contract_snapshots.py create mode 100644 tests/_import_graph.py create mode 100644 tests/fixtures/contract_snapshots/cli_help.txt create mode 100644 tests/fixtures/contract_snapshots/mcp_tool_schemas.json create mode 100644 tests/fixtures/contract_snapshots/public_api_surface.json create mode 100644 tests/test_cli_help_snapshot.py create mode 100644 tests/test_gating.py rename tests/{test_cli_main_guard.py => test_main_entrypoint.py} (56%) rename tests/{test_cli_main_guard_runpy.py => test_mcp_server_main_guard_runpy.py} (50%) create mode 100644 tests/test_mcp_tool_schema_snapshot.py create mode 100644 tests/test_metrics_registry.py create mode 100644 tests/test_options_spec_coverage.py create mode 100644 tests/test_public_api_surface.py create mode 100644 tests/test_renderer_isolation.py create mode 100644 tests/test_target_module_map_imports.py diff --git a/benchmarks/baselines/reference-cp313.json b/benchmarks/baselines/reference-cp313.json new file mode 100644 index 0000000..f8089ce --- /dev/null +++ b/benchmarks/baselines/reference-cp313.json @@ -0,0 +1,120 @@ +{ + "benchmark_schema_version": "1.0", + "tool": { + "name": "codeclone", + "version": "2.0.0b5", + "python_tag": "cp313" + }, + "config": { + "target": "", + "runs": 3, + "warmups": 1, + "python_executable": "" + }, + "environment": { + "platform": "macOS-15.7.5-arm64-arm-64bit-Mach-O", + "machine": "arm64", + "python_version": "3.13.12", + "python_implementation": "CPython", + "python_tag": "cp313", + "cpu_count": 10, + "cpu_affinity_count": null, + "container_detected": false, + "cgroup_cpu_max": null, + "cgroup_memory_max": null, + "timestamp_utc": "2026-04-17T13:45:19Z" + }, + "scenarios": [ + { + "name": "cold_full", + "mode": "cold", + "extra_args": [], + "warmups": 1, + "runs": 3, + "deterministic": true, + "digest": "0e4366a01ad8a0db646c9a984e92fa913166119541e296387d963c5ae4301bc9", + "timings_seconds": [ + 1.0560423749998336, + 1.0907688339998458, + 1.0867978750002294 + ], + "stats_seconds": { + "min": 1.0560423749998336, + "max": 1.0907688339998458, + "mean": 1.0778696946666362, + "median": 1.0867978750002294, + "p95": 1.090371738099884, + "stdev": 0.015519150357364984 + }, + "inventory_sample": { + "found": 180, + "analyzed": 180, + "cached": 0, + "skipped": 0 + } + }, + { + "name": "warm_full", + "mode": "warm", + "extra_args": [], + "warmups": 1, + "runs": 3, + "deterministic": true, + "digest": "55ea63867ffdd599784d10cd0c86d15ba0944e128d62d4d6cb8e68ce8779ea2e", + "timings_seconds": [ + 0.2863777919997119, + 0.2806324170001062, + 0.27757904200007033 + ], + "stats_seconds": { + "min": 0.27757904200007033, + "max": 0.2863777919997119, + "mean": 0.28152975033329614, + "median": 0.2806324170001062, + "p95": 0.2858032544997513, + "stdev": 0.003647684719762983 + }, + "inventory_sample": { + "found": 180, + "analyzed": 0, + "cached": 180, + "skipped": 0 + } + }, + { + "name": "warm_clones_only", + "mode": "warm", + "extra_args": [ + "--skip-metrics" + ], + "warmups": 1, + "runs": 3, + "deterministic": true, + "digest": "8e2fbaf49e9f577b89348aa54fc8f7d6866c9c8213ff1e69d831edc2f663d907", + "timings_seconds": [ + 0.2363325830001486, + 0.22605108300012944, + 0.21571508300030473 + ], + "stats_seconds": { + "min": 0.21571508300030473, + "max": 0.2363325830001486, + "mean": 0.2260329163335276, + "median": 0.22605108300012944, + "p95": 0.23530443300014667, + "stdev": 0.00841706893091738 + }, + "inventory_sample": { + "found": 180, + "analyzed": 0, + "cached": 180, + "skipped": 0 + } + } + ], + "comparisons": { + "warm_full_speedup_vs_cold_full": 3.8726740360854963, + "warm_clones_only_speedup_vs_warm_full": 1.2414557509549535 + }, + "generated_at_utc": "2026-04-17T13:45:19Z" +} diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index b77c96b..ad41e07 100755 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -15,6 +15,7 @@ import subprocess import sys import time +from collections.abc import Mapping, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path @@ -240,6 +241,14 @@ def _validate_inventory_sample( ) +def _print_bulleted_lines(header: str, lines: Sequence[str]) -> None: + if not lines: + return + print(header) + for line in lines: + print(f"- {line}") + + def _scenario_result( *, scenario: Scenario, @@ -393,6 +402,67 @@ def _median_for(name: str) -> float | None: return comparisons +def _load_benchmark_payload(path: Path) -> dict[str, object]: + payload_obj: object = json.loads(path.read_text(encoding="utf-8")) + if not isinstance(payload_obj, dict): + raise RuntimeError(f"benchmark payload is not an object: {path}") + return payload_obj + + +def _scenario_medians(payload: Mapping[str, object]) -> dict[str, float]: + scenarios_obj = payload.get("scenarios") + if not isinstance(scenarios_obj, list): + raise RuntimeError("benchmark payload is missing a scenarios list") + + medians: dict[str, float] = {} + for item in scenarios_obj: + if not isinstance(item, dict): + raise RuntimeError("benchmark scenario entry is not an object") + name = item.get("name") + stats = item.get("stats_seconds") + if not isinstance(name, str) or not isinstance(stats, dict): + raise RuntimeError("benchmark scenario entry is missing name/stats_seconds") + median = stats.get("median") + if not isinstance(median, (int, float)): + raise RuntimeError(f"benchmark scenario {name} is missing median timing") + medians[name] = float(median) + return medians + + +def _timing_regressions( + *, + current_payload: Mapping[str, object], + baseline_payload: Mapping[str, object], + max_regression_pct: float, +) -> list[str]: + current_medians = _scenario_medians(current_payload) + baseline_medians = _scenario_medians(baseline_payload) + + missing = sorted(set(baseline_medians) - set(current_medians)) + if missing: + raise RuntimeError( + "benchmark payload is missing baseline scenario(s): " + ", ".join(missing) + ) + + regressions: list[str] = [] + for name, baseline_median in sorted(baseline_medians.items()): + if baseline_median <= 0: + raise RuntimeError( + f"baseline scenario {name} has non-positive median: {baseline_median}" + ) + current_median = current_medians[name] + allowed_median = baseline_median * (1.0 + (max_regression_pct / 100.0)) + if current_median <= allowed_median: + continue + regression_pct = ((current_median - baseline_median) / baseline_median) * 100.0 + regressions.append( + f"{name}: median {current_median:.4f}s exceeds baseline " + f"{baseline_median:.4f}s by {regression_pct:.2f}% " + f"(allowed {max_regression_pct:.2f}%)" + ) + return regressions + + def _parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description=( @@ -440,6 +510,18 @@ def _parse_args() -> argparse.Namespace: default=sys.executable, help="Python executable used to invoke codeclone CLI", ) + parser.add_argument( + "--baseline", + type=Path, + default=None, + help="Existing benchmark JSON used for per-scenario median regression checks.", + ) + parser.add_argument( + "--max-regression-pct", + type=float, + default=5.0, + help="Allowed per-scenario median slowdown versus --baseline.", + ) return parser.parse_args() @@ -449,6 +531,8 @@ def main() -> int: raise SystemExit("--runs must be > 0") if args.warmups < 0: raise SystemExit("--warmups must be >= 0") + if args.max_regression_pct < 0: + raise SystemExit("--max-regression-pct must be >= 0") target = args.target.resolve() if not target.exists(): raise SystemExit(f"target does not exist: {target}") @@ -501,6 +585,22 @@ def main() -> int: .replace("+00:00", "Z"), } + regressions: list[str] = [] + baseline_path = args.baseline.resolve() if args.baseline is not None else None + if baseline_path is not None: + baseline_payload = _load_benchmark_payload(baseline_path) + regressions = _timing_regressions( + current_payload=payload, + baseline_payload=baseline_payload, + max_regression_pct=args.max_regression_pct, + ) + payload["baseline_comparison"] = { + "baseline_path": str(baseline_path), + "max_regression_pct": args.max_regression_pct, + "status": "regression" if regressions else "ok", + "regressions": regressions, + } + args.output.parent.mkdir(parents=True, exist_ok=True) tmp_output = args.output.with_suffix(args.output.suffix + ".tmp") rendered = json.dumps(payload, ensure_ascii=False, indent=2) @@ -522,12 +622,19 @@ def main() -> int: f"p95={p95_s:.4f}s stdev={stdev_s:.4f}s " f"digest={scenario['digest']}" ) - if comparisons: - print("ratios:") - for name, value in sorted(comparisons.items()): - print(f"- {name}={value:.3f}x") + _print_bulleted_lines( + "ratios:", + [f"{name}={value:.3f}x" for name, value in sorted(comparisons.items())], + ) + if baseline_path is not None: + print(f"baseline={baseline_path}") + print(f"max_regression_pct={args.max_regression_pct:.2f}") + if regressions: + _print_bulleted_lines("regressions:", regressions) + else: + print("baseline_status=ok") print(f"output={args.output}") - return 0 + return 1 if regressions else 0 if __name__ == "__main__": diff --git a/codeclone.baseline.json b/codeclone.baseline.json index b4656f8..839cc18 100644 --- a/codeclone.baseline.json +++ b/codeclone.baseline.json @@ -2,14 +2,14 @@ "meta": { "generator": { "name": "codeclone", - "version": "2.0.0b5" + "version": "2.0.0b6" }, "schema_version": "2.1", "fingerprint_version": "1", "python_tag": "cp313", - "created_at": "2026-04-13T13:10:37Z", + "created_at": "2026-04-20T16:54:58Z", "payload_sha256": "07a383c1d0974593c83ac30430aec9b99d89fe50f640a9b3b433658e0bd029e8", - "metrics_payload_sha256": "122ee5d2d3dc2d4e9553b1d440c0314515dcb60cc79ada264b13c39c6ba18e04" + "metrics_payload_sha256": "2b2885a8e2a35a58b25e0136a21d30d57b4a72136a09bf6db3d0ac5b043b2e32" }, "clones": { "functions": [], @@ -23,9 +23,13 @@ "max_cohesion": 3, "low_cohesion_classes": [], "dependency_cycles": [], - "dependency_max_depth": 11, + "dependency_max_depth": 13, "dead_code_items": [], - "health_score": 89, - "health_grade": "B" + "health_score": 88, + "health_grade": "B", + "typing_param_permille": 1000, + "typing_return_permille": 999, + "docstring_permille": 37, + "typing_any_count": 51 } } diff --git a/codeclone/_cli_args.py b/codeclone/_cli_args.py deleted file mode 100644 index 7ad4c95..0000000 --- a/codeclone/_cli_args.py +++ /dev/null @@ -1,456 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import argparse -import sys -from typing import NoReturn - -from . import ui_messages as ui -from .contracts import ( - DEFAULT_COHESION_THRESHOLD, - DEFAULT_COMPLEXITY_THRESHOLD, - DEFAULT_COUPLING_THRESHOLD, - DEFAULT_HEALTH_THRESHOLD, - ExitCode, - cli_help_epilog, -) - -DEFAULT_ROOT = "." -DEFAULT_MIN_LOC = 10 -DEFAULT_MIN_STMT = 6 -DEFAULT_BLOCK_MIN_LOC = 20 -DEFAULT_BLOCK_MIN_STMT = 8 -DEFAULT_SEGMENT_MIN_LOC = 20 -DEFAULT_SEGMENT_MIN_STMT = 10 -DEFAULT_PROCESSES = 4 -DEFAULT_MAX_CACHE_SIZE_MB = 50 -DEFAULT_MAX_BASELINE_SIZE_MB = 5 - -DEFAULT_BASELINE_PATH = "codeclone.baseline.json" -DEFAULT_HTML_REPORT_PATH = ".cache/codeclone/report.html" -DEFAULT_JSON_REPORT_PATH = ".cache/codeclone/report.json" -DEFAULT_MARKDOWN_REPORT_PATH = ".cache/codeclone/report.md" -DEFAULT_SARIF_REPORT_PATH = ".cache/codeclone/report.sarif" -DEFAULT_TEXT_REPORT_PATH = ".cache/codeclone/report.txt" - - -class _ArgumentParser(argparse.ArgumentParser): - def error(self, message: str) -> NoReturn: - self.print_usage(sys.stderr) - self.exit( - int(ExitCode.CONTRACT_ERROR), - f"CONTRACT ERROR: {message}\n", - ) - - -class _HelpFormatter(argparse.RawTextHelpFormatter): - """Product-oriented help formatter extension point.""" - - -def _add_optional_path_argument( - group: argparse._ArgumentGroup, - *, - flag: str, - dest: str, - help_text: str, - default: str | None = None, - const: str | None = None, - metavar: str = "FILE", -) -> None: - group.add_argument( - flag, - dest=dest, - nargs="?", - metavar=metavar, - default=default, - const=const, - help=help_text, - ) - - -def _add_bool_optional_argument( - group: argparse._ArgumentGroup, - *, - flag: str, - help_text: str, - default: bool = False, -) -> None: - group.add_argument( - flag, - action=argparse.BooleanOptionalAction, - default=default, - help=help_text, - ) - - -def build_parser(version: str) -> _ArgumentParser: - ap = _ArgumentParser( - prog="codeclone", - description="Structural code quality analysis for Python.", - add_help=False, - formatter_class=_HelpFormatter, - epilog=cli_help_epilog(), - ) - - target_group = ap.add_argument_group("Target") - target_group.add_argument( - "root", - nargs="?", - default=DEFAULT_ROOT, - help=ui.HELP_ROOT, - ) - - analysis_group = ap.add_argument_group("Analysis") - analysis_group.add_argument( - "--min-loc", - type=int, - default=DEFAULT_MIN_LOC, - help=ui.HELP_MIN_LOC, - ) - analysis_group.add_argument( - "--min-stmt", - type=int, - default=DEFAULT_MIN_STMT, - help=ui.HELP_MIN_STMT, - ) - # Block/segment thresholds are advanced tuning: configurable via - # pyproject.toml only (no CLI flags). Defaults live on the namespace - # so apply_pyproject_config_overrides can override them. - ap.set_defaults( - block_min_loc=DEFAULT_BLOCK_MIN_LOC, - block_min_stmt=DEFAULT_BLOCK_MIN_STMT, - segment_min_loc=DEFAULT_SEGMENT_MIN_LOC, - segment_min_stmt=DEFAULT_SEGMENT_MIN_STMT, - golden_fixture_paths=(), - ) - analysis_group.add_argument( - "--processes", - type=int, - default=DEFAULT_PROCESSES, - help=ui.HELP_PROCESSES, - ) - _add_bool_optional_argument( - analysis_group, - flag="--changed-only", - help_text=ui.HELP_CHANGED_ONLY, - ) - analysis_group.add_argument( - "--diff-against", - default=None, - metavar="GIT_REF", - help=ui.HELP_DIFF_AGAINST, - ) - analysis_group.add_argument( - "--paths-from-git-diff", - default=None, - metavar="GIT_REF", - help=ui.HELP_PATHS_FROM_GIT_DIFF, - ) - _add_optional_path_argument( - analysis_group, - flag="--cache-path", - dest="cache_path", - default=None, - const=None, - help_text=ui.HELP_CACHE_PATH, - ) - _add_optional_path_argument( - analysis_group, - flag="--cache-dir", - dest="cache_path", - default=None, - const=None, - help_text=ui.HELP_CACHE_DIR_LEGACY, - ) - analysis_group.add_argument( - "--max-cache-size-mb", - type=int, - default=DEFAULT_MAX_CACHE_SIZE_MB, - metavar="MB", - help=ui.HELP_MAX_CACHE_SIZE_MB, - ) - - baselines_ci_group = ap.add_argument_group("Baselines and CI") - _add_optional_path_argument( - baselines_ci_group, - flag="--baseline", - dest="baseline", - default=DEFAULT_BASELINE_PATH, - const=DEFAULT_BASELINE_PATH, - help_text=ui.HELP_BASELINE, - ) - baselines_ci_group.add_argument( - "--max-baseline-size-mb", - type=int, - default=DEFAULT_MAX_BASELINE_SIZE_MB, - metavar="MB", - help=ui.HELP_MAX_BASELINE_SIZE_MB, - ) - _add_bool_optional_argument( - baselines_ci_group, - flag="--update-baseline", - help_text=ui.HELP_UPDATE_BASELINE, - ) - _add_optional_path_argument( - baselines_ci_group, - flag="--metrics-baseline", - dest="metrics_baseline", - default=DEFAULT_BASELINE_PATH, - const=DEFAULT_BASELINE_PATH, - help_text=ui.HELP_METRICS_BASELINE, - ) - _add_bool_optional_argument( - baselines_ci_group, - flag="--update-metrics-baseline", - help_text=ui.HELP_UPDATE_METRICS_BASELINE, - ) - _add_bool_optional_argument( - baselines_ci_group, - flag="--ci", - help_text=ui.HELP_CI, - ) - _add_bool_optional_argument( - baselines_ci_group, - flag="--api-surface", - help_text=ui.HELP_API_SURFACE, - ) - baselines_ci_group.add_argument( - "--coverage", - dest="coverage_xml", - metavar="FILE", - default=None, - help=ui.HELP_COVERAGE, - ) - - quality_group = ap.add_argument_group("Quality gates") - _add_bool_optional_argument( - quality_group, - flag="--fail-on-new", - help_text=ui.HELP_FAIL_ON_NEW, - ) - _add_bool_optional_argument( - quality_group, - flag="--fail-on-new-metrics", - help_text=ui.HELP_FAIL_ON_NEW_METRICS, - ) - quality_group.add_argument( - "--fail-threshold", - type=int, - default=-1, - metavar="MAX_CLONES", - help=ui.HELP_FAIL_THRESHOLD, - ) - quality_group.add_argument( - "--fail-complexity", - type=int, - nargs="?", - const=DEFAULT_COMPLEXITY_THRESHOLD, - default=-1, - metavar="CC_MAX", - help=ui.HELP_FAIL_COMPLEXITY, - ) - quality_group.add_argument( - "--fail-coupling", - type=int, - nargs="?", - const=DEFAULT_COUPLING_THRESHOLD, - default=-1, - metavar="CBO_MAX", - help=ui.HELP_FAIL_COUPLING, - ) - quality_group.add_argument( - "--fail-cohesion", - type=int, - nargs="?", - const=DEFAULT_COHESION_THRESHOLD, - default=-1, - metavar="LCOM4_MAX", - help=ui.HELP_FAIL_COHESION, - ) - _add_bool_optional_argument( - quality_group, - flag="--fail-cycles", - help_text=ui.HELP_FAIL_CYCLES, - ) - _add_bool_optional_argument( - quality_group, - flag="--fail-dead-code", - help_text=ui.HELP_FAIL_DEAD_CODE, - ) - quality_group.add_argument( - "--fail-health", - type=int, - nargs="?", - const=DEFAULT_HEALTH_THRESHOLD, - default=-1, - metavar="SCORE_MIN", - help=ui.HELP_FAIL_HEALTH, - ) - _add_bool_optional_argument( - quality_group, - flag="--fail-on-typing-regression", - help_text=ui.HELP_FAIL_ON_TYPING_REGRESSION, - ) - _add_bool_optional_argument( - quality_group, - flag="--fail-on-docstring-regression", - help_text=ui.HELP_FAIL_ON_DOCSTRING_REGRESSION, - ) - _add_bool_optional_argument( - quality_group, - flag="--fail-on-api-break", - help_text=ui.HELP_FAIL_ON_API_BREAK, - ) - _add_bool_optional_argument( - quality_group, - flag="--fail-on-untested-hotspots", - help_text=ui.HELP_FAIL_ON_UNTESTED_HOTSPOTS, - ) - quality_group.add_argument( - "--min-typing-coverage", - type=int, - default=-1, - metavar="PERCENT", - help=ui.HELP_MIN_TYPING_COVERAGE, - ) - quality_group.add_argument( - "--min-docstring-coverage", - type=int, - default=-1, - metavar="PERCENT", - help=ui.HELP_MIN_DOCSTRING_COVERAGE, - ) - quality_group.add_argument( - "--coverage-min", - type=int, - default=50, - metavar="PERCENT", - help=ui.HELP_COVERAGE_MIN, - ) - - stages_group = ap.add_argument_group("Analysis stages") - _add_bool_optional_argument( - stages_group, - flag="--skip-metrics", - help_text=ui.HELP_SKIP_METRICS, - ) - _add_bool_optional_argument( - stages_group, - flag="--skip-dead-code", - help_text=ui.HELP_SKIP_DEAD_CODE, - ) - _add_bool_optional_argument( - stages_group, - flag="--skip-dependencies", - help_text=ui.HELP_SKIP_DEPENDENCIES, - ) - - reporting_group = ap.add_argument_group("Reporting") - _add_optional_path_argument( - reporting_group, - flag="--html", - dest="html_out", - const=DEFAULT_HTML_REPORT_PATH, - help_text=ui.HELP_HTML, - ) - _add_optional_path_argument( - reporting_group, - flag="--json", - dest="json_out", - const=DEFAULT_JSON_REPORT_PATH, - help_text=ui.HELP_JSON, - ) - _add_optional_path_argument( - reporting_group, - flag="--md", - dest="md_out", - const=DEFAULT_MARKDOWN_REPORT_PATH, - help_text=ui.HELP_MD, - ) - _add_optional_path_argument( - reporting_group, - flag="--sarif", - dest="sarif_out", - const=DEFAULT_SARIF_REPORT_PATH, - help_text=ui.HELP_SARIF, - ) - _add_optional_path_argument( - reporting_group, - flag="--text", - dest="text_out", - const=DEFAULT_TEXT_REPORT_PATH, - help_text=ui.HELP_TEXT, - ) - _add_bool_optional_argument( - reporting_group, - flag="--timestamped-report-paths", - help_text=ui.HELP_TIMESTAMPED_REPORT_PATHS, - ) - - ui_group = ap.add_argument_group("Output and UI") - _add_bool_optional_argument( - ui_group, - flag="--open-html-report", - help_text=ui.HELP_OPEN_HTML_REPORT, - ) - ui_group.add_argument( - "--no-progress", - dest="no_progress", - action="store_true", - help=ui.HELP_NO_PROGRESS, - ) - ui_group.add_argument( - "--progress", - dest="no_progress", - action="store_false", - help=ui.HELP_PROGRESS, - ) - ui_group.add_argument( - "--no-color", - dest="no_color", - action="store_true", - help=ui.HELP_NO_COLOR, - ) - ui_group.add_argument( - "--color", - dest="no_color", - action="store_false", - help=ui.HELP_COLOR, - ) - ui_group.set_defaults(no_progress=False, no_color=False) - _add_bool_optional_argument( - ui_group, - flag="--quiet", - help_text=ui.HELP_QUIET, - ) - _add_bool_optional_argument( - ui_group, - flag="--verbose", - help_text=ui.HELP_VERBOSE, - ) - _add_bool_optional_argument( - ui_group, - flag="--debug", - help_text=ui.HELP_DEBUG, - ) - - general_group = ap.add_argument_group("General") - general_group.add_argument( - "-h", - "--help", - action="help", - help="Show this help message and exit.", - ) - general_group.add_argument( - "--version", - action="version", - version=ui.version_output(version), - help=ui.HELP_VERSION, - ) - - return ap diff --git a/codeclone/_cli_config.py b/codeclone/_cli_config.py deleted file mode 100644 index b17ba43..0000000 --- a/codeclone/_cli_config.py +++ /dev/null @@ -1,303 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import importlib -import sys -from dataclasses import dataclass -from pathlib import Path -from typing import TYPE_CHECKING, Final - -from .golden_fixtures import ( - GoldenFixturePatternError, - normalize_golden_fixture_patterns, -) - -if TYPE_CHECKING: - import argparse - from collections.abc import Mapping, Sequence - - -class ConfigValidationError(ValueError): - """Raised when pyproject.toml contains invalid CodeClone configuration.""" - - -@dataclass(frozen=True, slots=True) -class _ConfigKeySpec: - expected_type: type[object] - allow_none: bool = False - expected_name: str | None = None - - -_CONFIG_KEY_SPECS: Final[dict[str, _ConfigKeySpec]] = { - "min_loc": _ConfigKeySpec(int), - "min_stmt": _ConfigKeySpec(int), - "block_min_loc": _ConfigKeySpec(int), - "block_min_stmt": _ConfigKeySpec(int), - "segment_min_loc": _ConfigKeySpec(int), - "segment_min_stmt": _ConfigKeySpec(int), - "processes": _ConfigKeySpec(int), - "cache_path": _ConfigKeySpec(str, allow_none=True), - "max_cache_size_mb": _ConfigKeySpec(int), - "baseline": _ConfigKeySpec(str), - "max_baseline_size_mb": _ConfigKeySpec(int), - "update_baseline": _ConfigKeySpec(bool), - "fail_on_new": _ConfigKeySpec(bool), - "fail_threshold": _ConfigKeySpec(int), - "ci": _ConfigKeySpec(bool), - "fail_complexity": _ConfigKeySpec(int), - "fail_coupling": _ConfigKeySpec(int), - "fail_cohesion": _ConfigKeySpec(int), - "fail_cycles": _ConfigKeySpec(bool), - "fail_dead_code": _ConfigKeySpec(bool), - "fail_health": _ConfigKeySpec(int), - "fail_on_new_metrics": _ConfigKeySpec(bool), - "api_surface": _ConfigKeySpec(bool), - "coverage_xml": _ConfigKeySpec(str, allow_none=True), - "fail_on_typing_regression": _ConfigKeySpec(bool), - "fail_on_docstring_regression": _ConfigKeySpec(bool), - "fail_on_api_break": _ConfigKeySpec(bool), - "fail_on_untested_hotspots": _ConfigKeySpec(bool), - "min_typing_coverage": _ConfigKeySpec(int), - "min_docstring_coverage": _ConfigKeySpec(int), - "coverage_min": _ConfigKeySpec(int), - "update_metrics_baseline": _ConfigKeySpec(bool), - "metrics_baseline": _ConfigKeySpec(str), - "skip_metrics": _ConfigKeySpec(bool), - "skip_dead_code": _ConfigKeySpec(bool), - "skip_dependencies": _ConfigKeySpec(bool), - "golden_fixture_paths": _ConfigKeySpec(list, expected_name="list[str]"), - "html_out": _ConfigKeySpec(str, allow_none=True), - "json_out": _ConfigKeySpec(str, allow_none=True), - "md_out": _ConfigKeySpec(str, allow_none=True), - "sarif_out": _ConfigKeySpec(str, allow_none=True), - "text_out": _ConfigKeySpec(str, allow_none=True), - "no_progress": _ConfigKeySpec(bool), - "no_color": _ConfigKeySpec(bool), - "quiet": _ConfigKeySpec(bool), - "verbose": _ConfigKeySpec(bool), - "debug": _ConfigKeySpec(bool), -} -_PATH_CONFIG_KEYS: Final[frozenset[str]] = frozenset( - { - "cache_path", - "baseline", - "metrics_baseline", - "coverage_xml", - "html_out", - "json_out", - "md_out", - "sarif_out", - "text_out", - } -) - - -def collect_explicit_cli_dests( - parser: argparse.ArgumentParser, - *, - argv: Sequence[str], -) -> set[str]: - option_to_dest: dict[str, str] = {} - for action in parser._actions: - for option in action.option_strings: - option_to_dest[option] = action.dest - - explicit: set[str] = set() - for token in argv: - if token == "--": - break - if not token.startswith("-"): - continue - option = token.split("=", maxsplit=1)[0] - dest = option_to_dest.get(option) - if dest is not None: - explicit.add(dest) - return explicit - - -def load_pyproject_config(root_path: Path) -> dict[str, object]: - config_path = root_path / "pyproject.toml" - if not config_path.exists(): - return {} - - payload: object - try: - payload = _load_toml(config_path) - except OSError as exc: - raise ConfigValidationError( - f"Cannot read pyproject.toml at {config_path}: {exc}" - ) from exc - except ValueError as exc: - raise ConfigValidationError(f"Invalid TOML in {config_path}: {exc}") from exc - - if not isinstance(payload, dict): - raise ConfigValidationError( - f"Invalid pyproject payload at {config_path}: root must be object" - ) - - tool_obj = payload.get("tool") - if tool_obj is None: - return {} - if not isinstance(tool_obj, dict): - raise ConfigValidationError( - f"Invalid pyproject payload at {config_path}: 'tool' must be object" - ) - - codeclone_obj = tool_obj.get("codeclone") - if codeclone_obj is None: - return {} - if not isinstance(codeclone_obj, dict): - raise ConfigValidationError( - "Invalid pyproject payload at " - f"{config_path}: 'tool.codeclone' must be object" - ) - - unknown = sorted(set(codeclone_obj.keys()) - set(_CONFIG_KEY_SPECS)) - if unknown: - raise ConfigValidationError( - "Unknown key(s) in tool.codeclone: " + ", ".join(unknown) - ) - - validated: dict[str, object] = {} - for key in sorted(codeclone_obj.keys()): - value = _validate_config_value( - key=key, - value=codeclone_obj[key], - ) - validated[key] = _normalize_path_config_value( - key=key, - value=value, - root_path=root_path, - ) - return validated - - -def apply_pyproject_config_overrides( - *, - args: argparse.Namespace, - config_values: Mapping[str, object], - explicit_cli_dests: set[str], -) -> None: - for key, value in config_values.items(): - if key in explicit_cli_dests: - continue - setattr(args, key, value) - - -def _validate_config_value(*, key: str, value: object) -> object: - spec = _CONFIG_KEY_SPECS[key] - if value is None: - if spec.allow_none: - return None - raise ConfigValidationError( - "Invalid value type for tool.codeclone." - f"{key}: expected {spec.expected_name or spec.expected_type.__name__}" - ) - - expected_type = spec.expected_type - if expected_type is bool: - return _validated_config_instance( - key=key, - value=value, - expected_type=bool, - expected_name="bool", - ) - - if expected_type is int: - return _validated_config_instance( - key=key, - value=value, - expected_type=int, - expected_name="int", - reject_bool=True, - ) - - if expected_type is str: - return _validated_config_instance( - key=key, - value=value, - expected_type=str, - expected_name="str", - ) - if expected_type is list: - return _validated_string_list(key=key, value=value) - - raise ConfigValidationError(f"Unsupported config key spec for tool.codeclone.{key}") - - -def _validated_config_instance( - *, - key: str, - value: object, - expected_type: type[object], - expected_name: str, - reject_bool: bool = False, -) -> object: - if isinstance(value, expected_type) and ( - not reject_bool or not isinstance(value, bool) - ): - return value - raise ConfigValidationError( - f"Invalid value type for tool.codeclone.{key}: expected {expected_name}" - ) - - -def _validated_string_list(*, key: str, value: object) -> tuple[str, ...]: - if not isinstance(value, list): - raise ConfigValidationError( - f"Invalid value type for tool.codeclone.{key}: expected list[str]" - ) - if not all(isinstance(item, str) for item in value): - raise ConfigValidationError( - f"Invalid value type for tool.codeclone.{key}: expected list[str]" - ) - try: - return normalize_golden_fixture_patterns(value) - except GoldenFixturePatternError as exc: - raise ConfigValidationError(str(exc)) from exc - - -def _load_toml(path: Path) -> object: - if sys.version_info >= (3, 11): - import tomllib - - with path.open("rb") as config_file: - return tomllib.load(config_file) - else: - try: - tomli_module = importlib.import_module("tomli") - except ModuleNotFoundError as exc: - raise ConfigValidationError( - "Python 3.10 requires dependency 'tomli' to read pyproject.toml." - ) from exc - - load_fn = getattr(tomli_module, "load", None) - if not callable(load_fn): - raise ConfigValidationError( - "Invalid 'tomli' module: missing callable 'load'." - ) - - with path.open("rb") as config_file: - return load_fn(config_file) - - -def _normalize_path_config_value( - *, - key: str, - value: object, - root_path: Path, -) -> object: - if key not in _PATH_CONFIG_KEYS: - return value - if not isinstance(value, str): - return value - - path = Path(value).expanduser() - if path.is_absolute(): - return str(path) - return str(root_path / path) diff --git a/codeclone/_cli_paths.py b/codeclone/_cli_paths.py deleted file mode 100644 index 3577dc0..0000000 --- a/codeclone/_cli_paths.py +++ /dev/null @@ -1,47 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import sys -from pathlib import Path -from typing import TYPE_CHECKING, Protocol - -from .contracts import ExitCode -from .ui_messages import fmt_contract_error - -if TYPE_CHECKING: - from collections.abc import Callable - - -class _Printer(Protocol): - def print(self, *objects: object, **kwargs: object) -> None: ... - - -def _validate_output_path( - path: str, - *, - expected_suffix: str, - label: str, - console: _Printer, - invalid_message: Callable[..., str], - invalid_path_message: Callable[..., str], -) -> Path: - out = Path(path).expanduser() - if out.suffix.lower() != expected_suffix: - console.print( - fmt_contract_error( - invalid_message(label=label, path=out, expected_suffix=expected_suffix) - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - try: - return out.resolve() - except OSError as e: - console.print( - fmt_contract_error(invalid_path_message(label=label, path=out, error=e)) - ) - sys.exit(ExitCode.CONTRACT_ERROR) diff --git a/codeclone/_cli_reports.py b/codeclone/_cli_reports.py deleted file mode 100644 index 126879c..0000000 --- a/codeclone/_cli_reports.py +++ /dev/null @@ -1,150 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import sys -import webbrowser -from pathlib import Path -from typing import Protocol - -from . import ui_messages as ui -from .contracts import ExitCode - -__all__ = ["write_report_outputs"] - - -class _PrinterLike(Protocol): - def print(self, *objects: object, **kwargs: object) -> None: ... - - -class _QuietArgs(Protocol): - quiet: bool - - -def _path_attr(obj: object, name: str) -> Path | None: - value = getattr(obj, name, None) - return value if isinstance(value, Path) else None - - -def _text_attr(obj: object, name: str) -> str | None: - value = getattr(obj, name, None) - return value if isinstance(value, str) else None - - -def _write_report_output( - *, - out: Path, - content: str, - label: str, - console: _PrinterLike, -) -> None: - try: - out.parent.mkdir(parents=True, exist_ok=True) - out.write_text(content, "utf-8") - except OSError as exc: - console.print( - ui.fmt_contract_error( - ui.fmt_report_write_failed(label=label, path=out, error=exc) - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - - -def _open_html_report_in_browser(*, path: Path) -> None: - if not webbrowser.open_new_tab(path.as_uri()): - raise OSError("no browser handler available") - - -def write_report_outputs( - *, - args: _QuietArgs, - output_paths: object, - report_artifacts: object, - console: _PrinterLike, - open_html_report: bool = False, -) -> str | None: - html_report_path: str | None = None - saved_reports: list[tuple[str, Path]] = [] - html_path = _path_attr(output_paths, "html") - json_path = _path_attr(output_paths, "json") - md_path = _path_attr(output_paths, "md") - sarif_path = _path_attr(output_paths, "sarif") - text_path = _path_attr(output_paths, "text") - html_report = _text_attr(report_artifacts, "html") - json_report = _text_attr(report_artifacts, "json") - md_report = _text_attr(report_artifacts, "md") - sarif_report = _text_attr(report_artifacts, "sarif") - text_report = _text_attr(report_artifacts, "text") - - if html_path and html_report is not None: - out = html_path - _write_report_output( - out=out, - content=html_report, - label="HTML", - console=console, - ) - html_report_path = str(out) - saved_reports.append(("HTML", out)) - - if json_path and json_report is not None: - out = json_path - _write_report_output( - out=out, - content=json_report, - label="JSON", - console=console, - ) - saved_reports.append(("JSON", out)) - - if md_path and md_report is not None: - out = md_path - _write_report_output( - out=out, - content=md_report, - label="Markdown", - console=console, - ) - saved_reports.append(("Markdown", out)) - - if sarif_path and sarif_report is not None: - out = sarif_path - _write_report_output( - out=out, - content=sarif_report, - label="SARIF", - console=console, - ) - saved_reports.append(("SARIF", out)) - - if text_path and text_report is not None: - out = text_path - _write_report_output( - out=out, - content=text_report, - label="text", - console=console, - ) - saved_reports.append(("Text", out)) - - if saved_reports and not args.quiet: - cwd = Path.cwd() - console.print() - for label, path in saved_reports: - try: - display = path.relative_to(cwd) - except ValueError: - display = path - console.print(f" [bold]{label} report saved:[/bold] [dim]{display}[/dim]") - - if open_html_report and html_path is not None: - try: - _open_html_report_in_browser(path=html_path) - except Exception as exc: - console.print(ui.fmt_html_report_open_failed(path=html_path, error=exc)) - - return html_report_path diff --git a/codeclone/analysis/__init__.py b/codeclone/analysis/__init__.py new file mode 100644 index 0000000..6938ec3 --- /dev/null +++ b/codeclone/analysis/__init__.py @@ -0,0 +1,30 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from .cfg import CFG, CFGBuilder +from .fingerprint import bucket_loc, sha1 +from .normalizer import AstNormalizer, NormalizationConfig, stmt_hashes + + +def __getattr__(name: str) -> object: + if name == "extract_units_and_stats_from_source": + from .units import extract_units_and_stats_from_source + + return extract_units_and_stats_from_source + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +__all__ = [ + "CFG", + "AstNormalizer", + "CFGBuilder", + "NormalizationConfig", + "bucket_loc", + "extract_units_and_stats_from_source", + "sha1", + "stmt_hashes", +] diff --git a/codeclone/analysis/_module_walk.py b/codeclone/analysis/_module_walk.py new file mode 100644 index 0000000..2eef8ba --- /dev/null +++ b/codeclone/analysis/_module_walk.py @@ -0,0 +1,553 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import ast +import tokenize +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Literal, NamedTuple + +from .. import qualnames as _qualnames +from ..models import DeadCandidate, ModuleDep +from ..suppressions import ( + DeclarationTarget, + bind_suppressions_to_declarations, + build_suppression_index, + extract_suppression_directives, + suppression_target_key, +) +from .class_metrics import _node_line_span +from .parser import ( + _build_declaration_token_index, + _declaration_end_line, + _DeclarationTokenIndexKey, + _source_tokens, +) + +if TYPE_CHECKING: + from collections.abc import Mapping + + from ..suppressions import SuppressionTargetKey + + +_NamedDeclarationNode = _qualnames.FunctionNode | ast.ClassDef +_PROTOCOL_MODULE_NAMES = frozenset({"typing", "typing_extensions"}) + + +def _resolve_import_target( + module_name: str, + import_node: ast.ImportFrom, +) -> str: + if import_node.level <= 0: + return import_node.module or "" + + parent_parts = module_name.split(".") + keep = max(0, len(parent_parts) - import_node.level) + prefix = parent_parts[:keep] + if import_node.module: + return ".".join([*prefix, import_node.module]) + return ".".join(prefix) + + +@dataclass(slots=True) +class _ModuleWalkState: + import_names: set[str] = field(default_factory=set) + deps: list[ModuleDep] = field(default_factory=list) + referenced_names: set[str] = field(default_factory=set) + imported_symbol_bindings: dict[str, set[str]] = field(default_factory=dict) + imported_module_aliases: dict[str, str] = field(default_factory=dict) + name_nodes: list[ast.Name] = field(default_factory=list) + attr_nodes: list[ast.Attribute] = field(default_factory=list) + protocol_symbol_aliases: set[str] = field(default_factory=lambda: {"Protocol"}) + protocol_module_aliases: set[str] = field( + default_factory=lambda: set(_PROTOCOL_MODULE_NAMES) + ) + + +def _append_module_dep( + *, + module_name: str, + target: str, + import_type: Literal["import", "from_import"], + line: int, + state: _ModuleWalkState, +) -> None: + state.deps.append( + ModuleDep( + source=module_name, + target=target, + import_type=import_type, + line=line, + ) + ) + + +def _collect_import_node( + *, + node: ast.Import, + module_name: str, + state: _ModuleWalkState, + collect_referenced_names: bool, +) -> None: + line = int(getattr(node, "lineno", 0)) + for alias in node.names: + alias_name = alias.asname or alias.name.split(".", 1)[0] + state.import_names.add(alias_name) + _append_module_dep( + module_name=module_name, + target=alias.name, + import_type="import", + line=line, + state=state, + ) + if collect_referenced_names: + state.imported_module_aliases[alias_name] = alias.name + if alias.name in _PROTOCOL_MODULE_NAMES: + state.protocol_module_aliases.add(alias_name) + + +def _dotted_expr_name(expr: ast.expr) -> str | None: + if isinstance(expr, ast.Name): + return expr.id + if isinstance(expr, ast.Attribute): + prefix = _dotted_expr_name(expr.value) + if prefix is None: + return None + return f"{prefix}.{expr.attr}" + return None + + +def _collect_import_from_node( + *, + node: ast.ImportFrom, + module_name: str, + state: _ModuleWalkState, + collect_referenced_names: bool, +) -> None: + target = _resolve_import_target(module_name, node) + if target: + state.import_names.add(target.split(".", 1)[0]) + _append_module_dep( + module_name=module_name, + target=target, + import_type="from_import", + line=int(getattr(node, "lineno", 0)), + state=state, + ) + + if node.module in _PROTOCOL_MODULE_NAMES: + for alias in node.names: + if alias.name == "Protocol": + state.protocol_symbol_aliases.add(alias.asname or alias.name) + + if not collect_referenced_names or not target: + return + + for alias in node.names: + if alias.name == "*": + continue + alias_name = alias.asname or alias.name + state.imported_symbol_bindings.setdefault(alias_name, set()).add( + f"{target}:{alias.name}" + ) + + +def _collect_load_reference_node( + *, + node: ast.AST, + state: _ModuleWalkState, +) -> None: + if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load): + state.referenced_names.add(node.id) + state.name_nodes.append(node) + return + if isinstance(node, ast.Attribute) and isinstance(node.ctx, ast.Load): + state.referenced_names.add(node.attr) + state.attr_nodes.append(node) + + +def _is_protocol_class( + class_node: ast.ClassDef, + *, + protocol_symbol_aliases: frozenset[str], + protocol_module_aliases: frozenset[str], +) -> bool: + for base in class_node.bases: + base_name = _dotted_expr_name(base) + if base_name is None: + continue + if base_name in protocol_symbol_aliases: + return True + if "." in base_name and base_name.rsplit(".", 1)[-1] == "Protocol": + module_alias = base_name.rsplit(".", 1)[0] + if module_alias in protocol_module_aliases: + return True + return False + + +def _is_non_runtime_candidate(node: _qualnames.FunctionNode) -> bool: + for decorator in node.decorator_list: + name = _dotted_expr_name(decorator) + if name is None: + continue + terminal = name.rsplit(".", 1)[-1] + if terminal in {"overload", "abstractmethod"}: + return True + return False + + +def _dead_candidate_kind(local_name: str) -> Literal["function", "method"]: + return "method" if "." in local_name else "function" + + +def _should_skip_dead_candidate( + local_name: str, + node: _qualnames.FunctionNode, + *, + protocol_class_qualnames: set[str], +) -> bool: + if _is_non_runtime_candidate(node): + return True + if "." not in local_name: + return False + owner_qualname = local_name.rsplit(".", 1)[0] + return owner_qualname in protocol_class_qualnames + + +def _build_dead_candidate( + *, + module_name: str, + local_name: str, + node: _NamedDeclarationNode, + filepath: str, + kind: Literal["class", "function", "method"], + suppression_index: Mapping[SuppressionTargetKey, tuple[str, ...]], + start_line: int, + end_line: int, +) -> DeadCandidate: + qualname = f"{module_name}:{local_name}" + return DeadCandidate( + qualname=qualname, + local_name=node.name, + filepath=filepath, + start_line=start_line, + end_line=end_line, + kind=kind, + suppressed_rules=suppression_index.get( + suppression_target_key( + filepath=filepath, + qualname=qualname, + start_line=start_line, + end_line=end_line, + kind=kind, + ), + (), + ), + ) + + +def _dead_candidate_for_unit( + *, + module_name: str, + local_name: str, + node: _qualnames.FunctionNode, + filepath: str, + suppression_index: Mapping[SuppressionTargetKey, tuple[str, ...]], + protocol_class_qualnames: set[str], +) -> DeadCandidate | None: + span = _node_line_span(node) + if span is None: + return None + if _should_skip_dead_candidate( + local_name, + node, + protocol_class_qualnames=protocol_class_qualnames, + ): + return None + start, end = span + return _build_dead_candidate( + module_name=module_name, + local_name=local_name, + node=node, + filepath=filepath, + kind=_dead_candidate_kind(local_name), + suppression_index=suppression_index, + start_line=start, + end_line=end, + ) + + +def _resolve_referenced_qualnames( + *, + module_name: str, + collector: _qualnames.QualnameCollector, + state: _ModuleWalkState, +) -> frozenset[str]: + top_level_class_by_name = { + class_qualname: class_qualname + for class_qualname, _class_node in collector.class_nodes + if "." not in class_qualname + } + local_method_qualnames = frozenset( + f"{module_name}:{local_name}" + for local_name, _node in collector.units + if "." in local_name + ) + + resolved: set[str] = set() + for name_node in state.name_nodes: + for qualname in state.imported_symbol_bindings.get(name_node.id, ()): + resolved.add(qualname) + + for attr_node in state.attr_nodes: + base = attr_node.value + if isinstance(base, ast.Name): + imported_module = state.imported_module_aliases.get(base.id) + if imported_module is not None: + resolved.add(f"{imported_module}:{attr_node.attr}") + else: + class_qualname = top_level_class_by_name.get(base.id) + if class_qualname is not None: + local_method_qualname = ( + f"{module_name}:{class_qualname}.{attr_node.attr}" + ) + if local_method_qualname in local_method_qualnames: + resolved.add(local_method_qualname) + + return frozenset(resolved) + + +class _ModuleWalkResult(NamedTuple): + import_names: frozenset[str] + module_deps: tuple[ModuleDep, ...] + referenced_names: frozenset[str] + referenced_qualnames: frozenset[str] + protocol_symbol_aliases: frozenset[str] + protocol_module_aliases: frozenset[str] + + +def _collect_module_walk_data( + *, + tree: ast.AST, + module_name: str, + collector: _qualnames.QualnameCollector, + collect_referenced_names: bool, +) -> _ModuleWalkResult: + """Single ast.walk that collects imports, deps, names, qualnames & protocol aliases. + + Reduces the hot path to one tree walk plus one local qualname resolution phase. + """ + state = _ModuleWalkState() + for node in ast.walk(tree): + if isinstance(node, ast.Import): + _collect_import_node( + node=node, + module_name=module_name, + state=state, + collect_referenced_names=collect_referenced_names, + ) + elif isinstance(node, ast.ImportFrom): + _collect_import_from_node( + node=node, + module_name=module_name, + state=state, + collect_referenced_names=collect_referenced_names, + ) + elif collect_referenced_names: + _collect_load_reference_node(node=node, state=state) + + deps_sorted = tuple( + sorted( + state.deps, + key=lambda dep: (dep.source, dep.target, dep.import_type, dep.line), + ) + ) + resolved = ( + _resolve_referenced_qualnames( + module_name=module_name, + collector=collector, + state=state, + ) + if collect_referenced_names + else frozenset() + ) + + return _ModuleWalkResult( + import_names=frozenset(state.import_names), + module_deps=deps_sorted, + referenced_names=frozenset(state.referenced_names), + referenced_qualnames=resolved, + protocol_symbol_aliases=frozenset(state.protocol_symbol_aliases), + protocol_module_aliases=frozenset(state.protocol_module_aliases), + ) + + +def _collect_dead_candidates( + *, + filepath: str, + module_name: str, + collector: _qualnames.QualnameCollector, + protocol_symbol_aliases: frozenset[str] = frozenset({"Protocol"}), + protocol_module_aliases: frozenset[str] = frozenset( + {"typing", "typing_extensions"} + ), + suppression_rules_by_target: Mapping[SuppressionTargetKey, tuple[str, ...]] + | None = None, +) -> tuple[DeadCandidate, ...]: + protocol_class_qualnames = { + class_qualname + for class_qualname, class_node in collector.class_nodes + if _is_protocol_class( + class_node, + protocol_symbol_aliases=protocol_symbol_aliases, + protocol_module_aliases=protocol_module_aliases, + ) + } + + candidates: list[DeadCandidate] = [] + suppression_index = ( + suppression_rules_by_target if suppression_rules_by_target is not None else {} + ) + for local_name, node in collector.units: + candidate = _dead_candidate_for_unit( + module_name=module_name, + local_name=local_name, + node=node, + filepath=filepath, + suppression_index=suppression_index, + protocol_class_qualnames=protocol_class_qualnames, + ) + if candidate is not None: + candidates.append(candidate) + + for class_qualname, class_node in collector.class_nodes: + span = _node_line_span(class_node) + if span is not None: + start, end = span + candidates.append( + _build_dead_candidate( + module_name=module_name, + local_name=class_qualname, + node=class_node, + filepath=filepath, + kind="class", + suppression_index=suppression_index, + start_line=start, + end_line=end, + ) + ) + + return tuple( + sorted( + candidates, + key=lambda item: ( + item.filepath, + item.start_line, + item.end_line, + item.qualname, + ), + ) + ) + + +def _collect_declaration_targets( + *, + filepath: str, + module_name: str, + collector: _qualnames.QualnameCollector, + source_tokens: tuple[tokenize.TokenInfo, ...] = (), + source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None, + include_inline_lines: bool = False, +) -> tuple[DeclarationTarget, ...]: + declarations: list[DeclarationTarget] = [] + declaration_specs: list[ + tuple[str, ast.AST, Literal["function", "method", "class"]] + ] = [ + ( + local_name, + node, + "method" if "." in local_name else "function", + ) + for local_name, node in collector.units + ] + declaration_specs.extend( + (class_qualname, class_node, "class") + for class_qualname, class_node in collector.class_nodes + ) + + for qualname_suffix, node, kind in declaration_specs: + start = int(getattr(node, "lineno", 0)) + end = int(getattr(node, "end_lineno", 0)) + if start > 0 and end > 0: + declaration_end_line = ( + _declaration_end_line( + node, + source_tokens=source_tokens, + source_token_index=source_token_index, + ) + if include_inline_lines + else None + ) + declarations.append( + DeclarationTarget( + filepath=filepath, + qualname=f"{module_name}:{qualname_suffix}", + start_line=start, + end_line=end, + kind=kind, + declaration_end_line=declaration_end_line, + ) + ) + + return tuple( + sorted( + declarations, + key=lambda item: ( + item.filepath, + item.start_line, + item.end_line, + item.qualname, + item.kind, + ), + ) + ) + + +def _build_suppression_index_for_source( + *, + source: str, + filepath: str, + module_name: str, + collector: _qualnames.QualnameCollector, +) -> Mapping[SuppressionTargetKey, tuple[str, ...]]: + suppression_directives = extract_suppression_directives(source) + if not suppression_directives: + return {} + + needs_inline_binding = any( + directive.binding == "inline" for directive in suppression_directives + ) + source_tokens: tuple[tokenize.TokenInfo, ...] = () + source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None + if needs_inline_binding: + source_tokens = _source_tokens(source) + if source_tokens: + source_token_index = _build_declaration_token_index(source_tokens) + + declaration_targets = _collect_declaration_targets( + filepath=filepath, + module_name=module_name, + collector=collector, + source_tokens=source_tokens, + source_token_index=source_token_index, + include_inline_lines=needs_inline_binding, + ) + suppression_bindings = bind_suppressions_to_declarations( + directives=suppression_directives, + declarations=declaration_targets, + ) + return build_suppression_index(suppression_bindings) diff --git a/codeclone/cfg.py b/codeclone/analysis/cfg.py similarity index 99% rename from codeclone/cfg.py rename to codeclone/analysis/cfg.py index f10811f..67838da 100644 --- a/codeclone/cfg.py +++ b/codeclone/analysis/cfg.py @@ -10,8 +10,8 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Protocol, cast +from ..meta_markers import CFG_META_PREFIX from .cfg_model import CFG, Block -from .meta_markers import CFG_META_PREFIX if TYPE_CHECKING: from collections.abc import Iterable diff --git a/codeclone/cfg_model.py b/codeclone/analysis/cfg_model.py similarity index 100% rename from codeclone/cfg_model.py rename to codeclone/analysis/cfg_model.py diff --git a/codeclone/analysis/class_metrics.py b/codeclone/analysis/class_metrics.py new file mode 100644 index 0000000..2d28d84 --- /dev/null +++ b/codeclone/analysis/class_metrics.py @@ -0,0 +1,54 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import ast + +from ..metrics import cohesion_risk, compute_cbo, compute_lcom4, coupling_risk +from ..models import ClassMetrics + + +def _node_line_span(node: ast.AST) -> tuple[int, int] | None: + start = int(getattr(node, "lineno", 0)) + end = int(getattr(node, "end_lineno", 0)) + if start <= 0 or end <= 0: + return None + return start, end + + +def _class_metrics_for_node( + *, + module_name: str, + class_qualname: str, + class_node: ast.ClassDef, + filepath: str, + module_import_names: set[str], + module_class_names: set[str], +) -> ClassMetrics | None: + span = _node_line_span(class_node) + if span is None: + return None + start, end = span + cbo, coupled_classes = compute_cbo( + class_node, + module_import_names=module_import_names, + module_class_names=module_class_names, + ) + lcom4, method_count, instance_var_count = compute_lcom4(class_node) + return ClassMetrics( + qualname=f"{module_name}:{class_qualname}", + filepath=filepath, + start_line=start, + end_line=end, + cbo=cbo, + lcom4=lcom4, + method_count=method_count, + instance_var_count=instance_var_count, + risk_coupling=coupling_risk(cbo), + risk_cohesion=cohesion_risk(lcom4), + coupled_classes=coupled_classes, + ) diff --git a/codeclone/analysis/fingerprint.py b/codeclone/analysis/fingerprint.py new file mode 100644 index 0000000..277f724 --- /dev/null +++ b/codeclone/analysis/fingerprint.py @@ -0,0 +1,85 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +from typing import TYPE_CHECKING + +from .. import qualnames as _qualnames +from ..metrics import cyclomatic_complexity +from .cfg import CFGBuilder +from .normalizer import ( + AstNormalizer, + NormalizationConfig, + normalized_ast_dump_from_list, +) + +if TYPE_CHECKING: + pass + + +def sha1(s: str) -> str: + return hashlib.sha1(s.encode("utf-8")).hexdigest() + + +def bucket_loc(loc: int) -> str: + # Helps avoid grouping wildly different sizes if desired + if loc < 20: + return "0-19" + if loc < 50: + return "20-49" + if loc < 100: + return "50-99" + return "100+" + + +def _cfg_fingerprint_and_complexity( + node: _qualnames.FunctionNode, + cfg: NormalizationConfig, + qualname: str, +) -> tuple[str, int]: + """ + Generate a structural fingerprint for a function using CFG analysis. + + The fingerprint is computed by: + 1. Building a Control Flow Graph (CFG) from the function + 2. Normalizing each CFG block's statements (variable names, constants, etc.) + 3. Creating a canonical representation of the CFG structure + 4. Hashing the representation with SHA-1 + + Functions with identical control flow and normalized statements will + produce the same fingerprint, even if they differ in variable names, + constants, or type annotations. + + Args: + node: Function AST node to fingerprint + cfg: Normalization configuration (what to ignore) + qualname: Qualified name for logging/debugging + + Returns: + 40-character hex SHA-1 hash of the normalized CFG + """ + builder = CFGBuilder() + graph = builder.build(qualname, node) + cfg_normalizer = AstNormalizer(cfg) + + # Use generator to avoid building large list of strings + parts: list[str] = [] + for block in sorted(graph.blocks, key=lambda b: b.id): + succ_ids = ",".join( + str(s.id) for s in sorted(block.successors, key=lambda s: s.id) + ) + block_dump = normalized_ast_dump_from_list( + block.statements, + cfg, + normalizer=cfg_normalizer, + ) + parts.append(f"BLOCK[{block.id}]:{block_dump}|SUCCESSORS:{succ_ids}") + return sha1("|".join(parts)), cyclomatic_complexity(graph) + + +_CFG_FINGERPRINT_AND_COMPLEXITY_IMPL = _cfg_fingerprint_and_complexity diff --git a/codeclone/normalize.py b/codeclone/analysis/normalizer.py similarity index 99% rename from codeclone/normalize.py rename to codeclone/analysis/normalizer.py index 31f39e8..732915e 100644 --- a/codeclone/normalize.py +++ b/codeclone/analysis/normalizer.py @@ -13,7 +13,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, cast -from .meta_markers import CFG_META_PREFIX +from ..meta_markers import CFG_META_PREFIX if TYPE_CHECKING: from collections.abc import Sequence diff --git a/codeclone/analysis/parser.py b/codeclone/analysis/parser.py new file mode 100644 index 0000000..f8bbbb5 --- /dev/null +++ b/codeclone/analysis/parser.py @@ -0,0 +1,219 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import ast +import io +import math +import os +import signal +import tokenize +from contextlib import contextmanager +from typing import TYPE_CHECKING + +from ..contracts.errors import ParseError + +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping + +PARSE_TIMEOUT_SECONDS = 5 + + +class _ParseTimeoutError(Exception): + pass + + +_DeclarationTokenIndexKey = tuple[int, int, str] +_DECLARATION_TOKEN_STRINGS = frozenset({"def", "async", "class"}) + + +def _consumed_cpu_seconds(resource_module: object) -> float: + """Return consumed CPU seconds for the current process.""" + try: + usage = resource_module.getrusage( # type: ignore[attr-defined] + resource_module.RUSAGE_SELF # type: ignore[attr-defined] + ) + return float(usage.ru_utime) + float(usage.ru_stime) + except Exception: + return 0.0 + + +@contextmanager +def _parse_limits(timeout_s: int) -> Iterator[None]: + if os.name != "posix" or timeout_s <= 0: + yield + return + + old_handler = signal.getsignal(signal.SIGALRM) + + def _timeout_handler(_signum: int, _frame: object) -> None: + raise _ParseTimeoutError("AST parsing timeout") + + old_limits: tuple[int, int] | None = None + try: + signal.signal(signal.SIGALRM, _timeout_handler) + signal.setitimer(signal.ITIMER_REAL, timeout_s) + + try: + import resource + + old_limits = resource.getrlimit(resource.RLIMIT_CPU) + soft, hard = old_limits + consumed_cpu_s = _consumed_cpu_seconds(resource) + desired_soft = max(1, timeout_s + math.ceil(consumed_cpu_s)) + if soft == resource.RLIM_INFINITY: + candidate_soft = desired_soft + else: + # Never reduce finite soft limits and avoid immediate SIGXCPU + # when the process already consumed more CPU than timeout_s. + candidate_soft = max(soft, desired_soft) + if hard == resource.RLIM_INFINITY: + new_soft = candidate_soft + else: + new_soft = min(max(1, hard), candidate_soft) + # Never lower hard limit: raising it back may be disallowed for + # unprivileged processes and can lead to process termination later. + resource.setrlimit(resource.RLIMIT_CPU, (new_soft, hard)) + except Exception: + # If resource is unavailable or cannot be set, rely on alarm only. + pass + + yield + finally: + signal.setitimer(signal.ITIMER_REAL, 0) + signal.signal(signal.SIGALRM, old_handler) + if old_limits is not None: + try: + import resource + + resource.setrlimit(resource.RLIMIT_CPU, old_limits) + except Exception: + pass + + +_PARSE_LIMITS_IMPL = _parse_limits + + +def _parse_with_limits(source: str, timeout_s: int) -> ast.AST: + try: + with _parse_limits(timeout_s): + return ast.parse(source) + except _ParseTimeoutError as e: + raise ParseError(str(e)) from e + + +_PARSE_WITH_LIMITS_IMPL = _parse_with_limits + + +def _source_tokens(source: str) -> tuple[tokenize.TokenInfo, ...]: + try: + return tuple(tokenize.generate_tokens(io.StringIO(source).readline)) + except tokenize.TokenError: + return () + + +_SOURCE_TOKENS_IMPL = _source_tokens + + +def _declaration_token_name(node: ast.AST) -> str: + if isinstance(node, ast.ClassDef): + return "class" + if isinstance(node, ast.AsyncFunctionDef): + return "async" + return "def" + + +def _declaration_token_index( + *, + source_tokens: tuple[tokenize.TokenInfo, ...], + start_line: int, + start_col: int, + declaration_token: str, + source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None, +) -> int | None: + if source_token_index is not None: + return source_token_index.get((start_line, start_col, declaration_token)) + for idx, token in enumerate(source_tokens): + if token.start != (start_line, start_col): + continue + if token.type == tokenize.NAME and token.string == declaration_token: + return idx + return None + + +def _build_declaration_token_index( + source_tokens: tuple[tokenize.TokenInfo, ...], +) -> Mapping[_DeclarationTokenIndexKey, int]: + indexed: dict[_DeclarationTokenIndexKey, int] = {} + for idx, token in enumerate(source_tokens): + if token.type == tokenize.NAME and token.string in _DECLARATION_TOKEN_STRINGS: + indexed[(token.start[0], token.start[1], token.string)] = idx + return indexed + + +def _scan_declaration_colon_line( + *, + source_tokens: tuple[tokenize.TokenInfo, ...], + start_index: int, +) -> int | None: + nesting = 0 + for token in source_tokens[start_index + 1 :]: + if token.type == tokenize.OP: + if token.string in "([{": + nesting += 1 + continue + if token.string in ")]}": + if nesting > 0: + nesting -= 1 + continue + if token.string == ":" and nesting == 0: + return token.start[0] + if token.type == tokenize.NEWLINE and nesting == 0: + return None + return None + + +def _fallback_declaration_end_line(node: ast.AST, *, start_line: int) -> int: + body = getattr(node, "body", None) + if not isinstance(body, list) or not body: + return start_line + + first_body_line = int(getattr(body[0], "lineno", 0)) + if first_body_line <= 0 or first_body_line == start_line: + return start_line + return max(start_line, first_body_line - 1) + + +def _declaration_end_line( + node: ast.AST, + *, + source_tokens: tuple[tokenize.TokenInfo, ...], + source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None, +) -> int: + start_line = int(getattr(node, "lineno", 0)) + start_col = int(getattr(node, "col_offset", 0)) + if start_line <= 0: + return 0 + + declaration_token = _declaration_token_name(node) + start_index = _declaration_token_index( + source_tokens=source_tokens, + start_line=start_line, + start_col=start_col, + declaration_token=declaration_token, + source_token_index=source_token_index, + ) + if start_index is None: + return _fallback_declaration_end_line(node, start_line=start_line) + + colon_line = _scan_declaration_colon_line( + source_tokens=source_tokens, + start_index=start_index, + ) + if colon_line is not None: + return colon_line + return _fallback_declaration_end_line(node, start_line=start_line) diff --git a/codeclone/analysis/units.py b/codeclone/analysis/units.py new file mode 100644 index 0000000..fc5ce5b --- /dev/null +++ b/codeclone/analysis/units.py @@ -0,0 +1,310 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import ast +from hashlib import sha1 as _sha1 + +from .. import qualnames as _qualnames +from ..blocks import extract_blocks, extract_segments +from ..contracts.errors import ParseError +from ..findings.structural.detectors import scan_function_structure +from ..metrics import risk_level +from ..metrics.adoption import collect_module_adoption +from ..metrics.api_surface import collect_module_api_surface +from ..models import ( + BlockUnit, + ClassMetrics, + FileMetrics, + SegmentUnit, + SourceStats, + StructuralFindingGroup, + Unit, +) +from ..paths import is_test_filepath +from ._module_walk import ( + _build_suppression_index_for_source, + _collect_dead_candidates, + _collect_module_walk_data, +) +from .class_metrics import _class_metrics_for_node, _node_line_span +from .fingerprint import _cfg_fingerprint_and_complexity, bucket_loc +from .normalizer import NormalizationConfig, stmt_hashes +from .parser import PARSE_TIMEOUT_SECONDS, _parse_with_limits + +__all__ = ["extract_units_and_stats_from_source"] + + +def _stmt_count(node: ast.AST) -> int: + body = getattr(node, "body", None) + return len(body) if isinstance(body, list) else 0 + + +_STMT_COUNT_IMPL = _stmt_count + + +def _raw_source_hash_for_range( + source_lines: list[str], + start_line: int, + end_line: int, +) -> str: + window = "".join(source_lines[start_line - 1 : end_line]).strip() + no_space = "".join(window.split()) + return _sha1(no_space.encode("utf-8")).hexdigest() + + +def _eligible_unit_shape( + node: _qualnames.FunctionNode, + *, + min_loc: int, + min_stmt: int, +) -> tuple[int, int, int, int] | None: + span = _node_line_span(node) + if span is None: + return None + start, end = span + if end < start: + return None + loc = end - start + 1 + stmt_count = _stmt_count(node) + if loc < min_loc or stmt_count < min_stmt: + return None + return start, end, loc, stmt_count + + +def extract_units_and_stats_from_source( + source: str, + filepath: str, + module_name: str, + cfg: NormalizationConfig, + min_loc: int, + min_stmt: int, + *, + block_min_loc: int = 20, + block_min_stmt: int = 8, + segment_min_loc: int = 20, + segment_min_stmt: int = 10, + collect_structural_findings: bool = True, + collect_api_surface: bool = False, + api_include_private_modules: bool = False, +) -> tuple[ + list[Unit], + list[BlockUnit], + list[SegmentUnit], + SourceStats, + FileMetrics, + list[StructuralFindingGroup], +]: + try: + tree = _parse_with_limits(source, PARSE_TIMEOUT_SECONDS) + except SyntaxError as e: + raise ParseError(f"Failed to parse {filepath}: {e}") from e + if not isinstance(tree, ast.Module): + raise ParseError(f"Failed to parse {filepath}: expected module AST root") + + collector = _qualnames.QualnameCollector() + collector.visit(tree) + source_lines = source.splitlines() + source_line_count = len(source_lines) + + is_test_file = is_test_filepath(filepath) + + # Single-pass AST walk replaces 3 separate functions / 4 walks. + _walk = _collect_module_walk_data( + tree=tree, + module_name=module_name, + collector=collector, + collect_referenced_names=not is_test_file, + ) + import_names = _walk.import_names + module_deps = _walk.module_deps + referenced_names = _walk.referenced_names + referenced_qualnames = _walk.referenced_qualnames + protocol_symbol_aliases = _walk.protocol_symbol_aliases + protocol_module_aliases = _walk.protocol_module_aliases + + suppression_index = _build_suppression_index_for_source( + source=source, + filepath=filepath, + module_name=module_name, + collector=collector, + ) + class_names = frozenset(class_node.name for _, class_node in collector.class_nodes) + module_import_names = set(import_names) + module_class_names = set(class_names) + class_metrics: list[ClassMetrics] = [] + + units: list[Unit] = [] + block_units: list[BlockUnit] = [] + segment_units: list[SegmentUnit] = [] + structural_findings: list[StructuralFindingGroup] = [] + + for local_name, node in collector.units: + unit_shape = _eligible_unit_shape( + node, + min_loc=min_loc, + min_stmt=min_stmt, + ) + if unit_shape is None: + continue + start, end, loc, stmt_count = unit_shape + + qualname = f"{module_name}:{local_name}" + fingerprint, complexity = _cfg_fingerprint_and_complexity(node, cfg, qualname) + structure_facts = scan_function_structure( + node, + filepath, + qualname, + collect_findings=collect_structural_findings, + ) + depth = structure_facts.nesting_depth + risk = risk_level(complexity) + raw_hash = _raw_source_hash_for_range(source_lines, start, end) + + units.append( + Unit( + qualname=qualname, + filepath=filepath, + start_line=start, + end_line=end, + loc=loc, + stmt_count=stmt_count, + fingerprint=fingerprint, + loc_bucket=bucket_loc(loc), + cyclomatic_complexity=complexity, + nesting_depth=depth, + risk=risk, + raw_hash=raw_hash, + entry_guard_count=structure_facts.entry_guard_count, + entry_guard_terminal_profile=( + structure_facts.entry_guard_terminal_profile + ), + entry_guard_has_side_effect_before=( + structure_facts.entry_guard_has_side_effect_before + ), + terminal_kind=structure_facts.terminal_kind, + try_finally_profile=structure_facts.try_finally_profile, + side_effect_order_profile=structure_facts.side_effect_order_profile, + ) + ) + + needs_blocks = ( + not local_name.endswith("__init__") + and loc >= block_min_loc + and stmt_count >= block_min_stmt + ) + needs_segments = loc >= segment_min_loc and stmt_count >= segment_min_stmt + + if needs_blocks or needs_segments: + body = getattr(node, "body", None) + hashes: list[str] | None = None + if isinstance(body, list): + hashes = stmt_hashes(body, cfg) + + if needs_blocks: + block_units.extend( + extract_blocks( + node, + filepath=filepath, + qualname=qualname, + cfg=cfg, + block_size=4, + max_blocks=15, + precomputed_hashes=hashes, + ) + ) + + if needs_segments: + segment_units.extend( + extract_segments( + node, + filepath=filepath, + qualname=qualname, + cfg=cfg, + window_size=6, + max_segments=60, + precomputed_hashes=hashes, + ) + ) + + if collect_structural_findings: + structural_findings.extend(structure_facts.structural_findings) + + for class_qualname, class_node in collector.class_nodes: + class_metric = _class_metrics_for_node( + module_name=module_name, + class_qualname=class_qualname, + class_node=class_node, + filepath=filepath, + module_import_names=module_import_names, + module_class_names=module_class_names, + ) + if class_metric is not None: + class_metrics.append(class_metric) + + dead_candidates = _collect_dead_candidates( + filepath=filepath, + module_name=module_name, + collector=collector, + protocol_symbol_aliases=protocol_symbol_aliases, + protocol_module_aliases=protocol_module_aliases, + suppression_rules_by_target=suppression_index, + ) + + sorted_class_metrics = tuple( + sorted( + class_metrics, + key=lambda item: ( + item.filepath, + item.start_line, + item.end_line, + item.qualname, + ), + ) + ) + typing_coverage, docstring_coverage = collect_module_adoption( + tree=tree, + module_name=module_name, + filepath=filepath, + collector=collector, + imported_names=import_names, + ) + api_surface = None + if collect_api_surface: + api_surface = collect_module_api_surface( + tree=tree, + module_name=module_name, + filepath=filepath, + collector=collector, + imported_names=import_names, + include_private_modules=api_include_private_modules, + ) + + return ( + units, + block_units, + segment_units, + SourceStats( + lines=source_line_count, + functions=collector.function_count, + methods=collector.method_count, + classes=collector.class_count, + ), + FileMetrics( + class_metrics=sorted_class_metrics, + module_deps=module_deps, + dead_candidates=dead_candidates, + referenced_names=referenced_names, + import_names=import_names, + class_names=class_names, + referenced_qualnames=referenced_qualnames, + typing_coverage=typing_coverage, + docstring_coverage=docstring_coverage, + api_surface=api_surface, + ), + structural_findings, + ) diff --git a/codeclone/baseline/__init__.py b/codeclone/baseline/__init__.py new file mode 100644 index 0000000..88891d4 --- /dev/null +++ b/codeclone/baseline/__init__.py @@ -0,0 +1,25 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from .clone_baseline import Baseline +from .trust import ( + BASELINE_GENERATOR, + BASELINE_UNTRUSTED_STATUSES, + MAX_BASELINE_SIZE_BYTES, + BaselineStatus, + coerce_baseline_status, + current_python_tag, +) + +__all__ = [ + "BASELINE_GENERATOR", + "BASELINE_UNTRUSTED_STATUSES", + "MAX_BASELINE_SIZE_BYTES", + "Baseline", + "BaselineStatus", + "coerce_baseline_status", + "current_python_tag", +] diff --git a/codeclone/baseline/_metrics_baseline_contract.py b/codeclone/baseline/_metrics_baseline_contract.py new file mode 100644 index 0000000..6ba78a1 --- /dev/null +++ b/codeclone/baseline/_metrics_baseline_contract.py @@ -0,0 +1,100 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from enum import Enum +from typing import Final + +METRICS_BASELINE_GENERATOR: Final = "codeclone" +MAX_METRICS_BASELINE_SIZE_BYTES: Final = 5 * 1024 * 1024 + + +class MetricsBaselineStatus(str, Enum): + OK = "ok" + MISSING = "missing" + TOO_LARGE = "too_large" + INVALID_JSON = "invalid_json" + INVALID_TYPE = "invalid_type" + MISSING_FIELDS = "missing_fields" + MISMATCH_SCHEMA_VERSION = "mismatch_schema_version" + MISMATCH_PYTHON_VERSION = "mismatch_python_version" + GENERATOR_MISMATCH = "generator_mismatch" + INTEGRITY_MISSING = "integrity_missing" + INTEGRITY_FAILED = "integrity_failed" + + +METRICS_BASELINE_UNTRUSTED_STATUSES: Final[frozenset[MetricsBaselineStatus]] = ( + frozenset( + { + MetricsBaselineStatus.MISSING, + MetricsBaselineStatus.TOO_LARGE, + MetricsBaselineStatus.INVALID_JSON, + MetricsBaselineStatus.INVALID_TYPE, + MetricsBaselineStatus.MISSING_FIELDS, + MetricsBaselineStatus.MISMATCH_SCHEMA_VERSION, + MetricsBaselineStatus.MISMATCH_PYTHON_VERSION, + MetricsBaselineStatus.GENERATOR_MISMATCH, + MetricsBaselineStatus.INTEGRITY_MISSING, + MetricsBaselineStatus.INTEGRITY_FAILED, + } + ) +) + +_TOP_LEVEL_REQUIRED_KEYS = frozenset({"meta", "metrics"}) +_TOP_LEVEL_ALLOWED_KEYS = _TOP_LEVEL_REQUIRED_KEYS | frozenset( + {"clones", "api_surface"} +) +_META_REQUIRED_KEYS = frozenset( + {"generator", "schema_version", "python_tag", "created_at", "payload_sha256"} +) +_METRICS_REQUIRED_KEYS = frozenset( + { + "max_complexity", + "high_risk_functions", + "max_coupling", + "high_coupling_classes", + "max_cohesion", + "low_cohesion_classes", + "dependency_cycles", + "dependency_max_depth", + "dead_code_items", + "health_score", + "health_grade", + } +) +_METRICS_OPTIONAL_KEYS = frozenset( + { + "typing_param_permille", + "typing_return_permille", + "docstring_permille", + "typing_any_count", + } +) +_METRICS_PAYLOAD_SHA256_KEY = "metrics_payload_sha256" +_API_SURFACE_PAYLOAD_SHA256_KEY = "api_surface_payload_sha256" + + +def coerce_metrics_baseline_status( + raw_status: str | MetricsBaselineStatus | None, +) -> MetricsBaselineStatus: + if isinstance(raw_status, MetricsBaselineStatus): + return raw_status + if isinstance(raw_status, str): + try: + return MetricsBaselineStatus(raw_status) + except ValueError: + return MetricsBaselineStatus.INVALID_TYPE + return MetricsBaselineStatus.INVALID_TYPE + + +__all__ = [ + "MAX_METRICS_BASELINE_SIZE_BYTES", + "METRICS_BASELINE_GENERATOR", + "METRICS_BASELINE_UNTRUSTED_STATUSES", + "MetricsBaselineStatus", + "coerce_metrics_baseline_status", +] diff --git a/codeclone/baseline/_metrics_baseline_payload.py b/codeclone/baseline/_metrics_baseline_payload.py new file mode 100644 index 0000000..0e5071e --- /dev/null +++ b/codeclone/baseline/_metrics_baseline_payload.py @@ -0,0 +1,245 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +from pathlib import Path +from typing import Any + +import orjson + +from ..cache.projection import wire_filepath_from_runtime +from ..models import ApiSurfaceSnapshot, MetricsSnapshot, ProjectMetrics +from ._metrics_baseline_contract import _API_SURFACE_PAYLOAD_SHA256_KEY + + +def snapshot_from_project_metrics(project_metrics: ProjectMetrics) -> MetricsSnapshot: + return MetricsSnapshot( + max_complexity=int(project_metrics.complexity_max), + high_risk_functions=tuple(sorted(set(project_metrics.high_risk_functions))), + max_coupling=int(project_metrics.coupling_max), + high_coupling_classes=tuple(sorted(set(project_metrics.high_risk_classes))), + max_cohesion=int(project_metrics.cohesion_max), + low_cohesion_classes=tuple(sorted(set(project_metrics.low_cohesion_classes))), + dependency_cycles=tuple( + sorted({tuple(cycle) for cycle in project_metrics.dependency_cycles}) + ), + dependency_max_depth=int(project_metrics.dependency_max_depth), + dead_code_items=tuple( + sorted({item.qualname for item in project_metrics.dead_code}) + ), + health_score=int(project_metrics.health.total), + health_grade=project_metrics.health.grade, + typing_param_permille=_permille( + project_metrics.typing_param_annotated, + project_metrics.typing_param_total, + ), + typing_return_permille=_permille( + project_metrics.typing_return_annotated, + project_metrics.typing_return_total, + ), + docstring_permille=_permille( + project_metrics.docstring_public_documented, + project_metrics.docstring_public_total, + ), + typing_any_count=int(project_metrics.typing_any_count), + ) + + +def _permille(numerator: int, denominator: int) -> int: + if denominator <= 0: + return 0 + return round((1000.0 * float(numerator)) / float(denominator)) + + +def _canonical_json(payload: object) -> str: + return orjson.dumps(payload, option=orjson.OPT_SORT_KEYS).decode("utf-8") + + +def _snapshot_payload( + snapshot: MetricsSnapshot, + *, + include_adoption: bool = True, +) -> dict[str, object]: + payload: dict[str, object] = { + "max_complexity": int(snapshot.max_complexity), + "high_risk_functions": list(snapshot.high_risk_functions), + "max_coupling": int(snapshot.max_coupling), + "high_coupling_classes": list(snapshot.high_coupling_classes), + "max_cohesion": int(snapshot.max_cohesion), + "low_cohesion_classes": list(snapshot.low_cohesion_classes), + "dependency_cycles": [list(cycle) for cycle in snapshot.dependency_cycles], + "dependency_max_depth": int(snapshot.dependency_max_depth), + "dead_code_items": list(snapshot.dead_code_items), + "health_score": int(snapshot.health_score), + "health_grade": snapshot.health_grade, + } + if include_adoption: + payload.update( + { + "typing_param_permille": int(snapshot.typing_param_permille), + "typing_return_permille": int(snapshot.typing_return_permille), + "docstring_permille": int(snapshot.docstring_permille), + "typing_any_count": int(snapshot.typing_any_count), + } + ) + return payload + + +def _compute_payload_sha256( + snapshot: MetricsSnapshot, + *, + include_adoption: bool = True, +) -> str: + canonical = _canonical_json( + _snapshot_payload(snapshot, include_adoption=include_adoption) + ) + return hashlib.sha256(canonical.encode("utf-8")).hexdigest() + + +def _has_coverage_adoption_snapshot(metrics_obj: dict[str, object]) -> bool: + return all( + key in metrics_obj + for key in ( + "typing_param_permille", + "typing_return_permille", + "docstring_permille", + ) + ) + + +def _api_surface_snapshot_payload( + snapshot: ApiSurfaceSnapshot, + *, + root: Path | None = None, + legacy_qualname: bool = False, +) -> dict[str, object]: + return { + "modules": [ + { + "module": module.module, + "filepath": wire_filepath_from_runtime(module.filepath, root=root), + "all_declared": list(module.all_declared or ()), + "symbols": [ + { + ("qualname" if legacy_qualname else "local_name"): ( + symbol.qualname + if legacy_qualname + else _local_name_from_qualname( + module=module.module, + qualname=symbol.qualname, + ) + ), + "kind": symbol.kind, + "start_line": symbol.start_line, + "end_line": symbol.end_line, + "params": [ + { + "name": param.name, + "kind": param.kind, + "has_default": param.has_default, + "annotation_hash": param.annotation_hash, + } + for param in symbol.params + ], + "returns_hash": symbol.returns_hash, + "exported_via": symbol.exported_via, + } + for symbol in sorted( + module.symbols, + key=lambda item: item.qualname, + ) + ], + } + for module in sorted( + snapshot.modules, + key=lambda item: (item.filepath, item.module), + ) + ] + } + + +def _compute_api_surface_payload_sha256( + snapshot: ApiSurfaceSnapshot, + *, + root: Path | None = None, +) -> str: + canonical = _canonical_json(_api_surface_snapshot_payload(snapshot, root=root)) + return hashlib.sha256(canonical.encode("utf-8")).hexdigest() + + +def _compute_legacy_api_surface_payload_sha256( + snapshot: ApiSurfaceSnapshot, + *, + root: Path | None = None, +) -> str: + canonical = _canonical_json( + _api_surface_snapshot_payload(snapshot, root=root, legacy_qualname=True) + ) + return hashlib.sha256(canonical.encode("utf-8")).hexdigest() + + +def _compose_api_surface_qualname(*, module: str, local_name: str) -> str: + return f"{module}:{local_name}" + + +def _local_name_from_qualname(*, module: str, qualname: str) -> str: + prefix = f"{module}:" + if qualname.startswith(prefix): + return qualname[len(prefix) :] + return qualname + + +def _build_payload( + *, + snapshot: MetricsSnapshot, + schema_version: str, + python_tag: str, + generator_name: str, + generator_version: str, + created_at: str, + include_adoption: bool = True, + api_surface_snapshot: ApiSurfaceSnapshot | None = None, + api_surface_root: Path | None = None, +) -> dict[str, Any]: + payload_sha256 = _compute_payload_sha256( + snapshot, + include_adoption=include_adoption, + ) + payload: dict[str, Any] = { + "meta": { + "generator": { + "name": generator_name, + "version": generator_version, + }, + "schema_version": schema_version, + "python_tag": python_tag, + "created_at": created_at, + "payload_sha256": payload_sha256, + }, + "metrics": _snapshot_payload( + snapshot, + include_adoption=include_adoption, + ), + } + if api_surface_snapshot is not None: + payload["meta"][_API_SURFACE_PAYLOAD_SHA256_KEY] = ( + _compute_api_surface_payload_sha256( + api_surface_snapshot, + root=api_surface_root, + ) + ) + payload["api_surface"] = _api_surface_snapshot_payload( + api_surface_snapshot, + root=api_surface_root, + ) + return payload + + +__all__ = [ + "snapshot_from_project_metrics", +] diff --git a/codeclone/baseline/_metrics_baseline_validation.py b/codeclone/baseline/_metrics_baseline_validation.py new file mode 100644 index 0000000..831991d --- /dev/null +++ b/codeclone/baseline/_metrics_baseline_validation.py @@ -0,0 +1,569 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from json import JSONDecodeError +from pathlib import Path +from typing import Any, Literal, cast + +from ..cache.projection import runtime_filepath_from_wire +from ..contracts import BASELINE_SCHEMA_VERSION +from ..contracts.errors import BaselineValidationError +from ..models import ( + ApiParamSpec, + ApiSurfaceSnapshot, + MetricsSnapshot, + ModuleApiSurface, + PublicSymbol, +) +from ..utils.json_io import read_json_object as _read_json_object +from ..utils.json_io import ( + write_json_document_atomically as _write_json_document_atomically, +) +from ..utils.schema_validation import validate_top_level_structure +from ._metrics_baseline_contract import ( + _METRICS_PAYLOAD_SHA256_KEY, + _TOP_LEVEL_ALLOWED_KEYS, + _TOP_LEVEL_REQUIRED_KEYS, + MetricsBaselineStatus, +) +from ._metrics_baseline_payload import _compose_api_surface_qualname + + +def _is_compatible_metrics_schema( + *, + baseline_version: str | None, + expected_version: str, +) -> bool: + if baseline_version is None: + return False + baseline_major_minor = _parse_major_minor(baseline_version) + expected_major_minor = _parse_major_minor(expected_version) + if baseline_major_minor is None or expected_major_minor is None: + return baseline_version == expected_version + baseline_major, baseline_minor = baseline_major_minor + expected_major, expected_minor = expected_major_minor + return baseline_major == expected_major and baseline_minor <= expected_minor + + +def _parse_major_minor(version: str) -> tuple[int, int] | None: + parts = version.split(".") + if len(parts) != 2 or not all(part.isdigit() for part in parts): + return None + return int(parts[0]), int(parts[1]) + + +def _atomic_write_json(path: Path, payload: dict[str, object]) -> None: + _write_json_document_atomically( + path, + payload, + indent=True, + trailing_newline=True, + ) + + +def _load_json_object(path: Path) -> dict[str, Any]: + try: + return _read_json_object(path) + except OSError as e: + raise BaselineValidationError( + f"Cannot read metrics baseline file at {path}: {e}", + status=MetricsBaselineStatus.INVALID_JSON, + ) from e + except JSONDecodeError as e: + raise BaselineValidationError( + f"Corrupted metrics baseline file at {path}: {e}", + status=MetricsBaselineStatus.INVALID_JSON, + ) from e + except TypeError: + raise BaselineValidationError( + f"Metrics baseline payload must be an object at {path}", + status=MetricsBaselineStatus.INVALID_TYPE, + ) from None + + +def _validate_top_level_structure(payload: dict[str, Any], *, path: Path) -> None: + validate_top_level_structure( + payload, + path=path, + required_keys=_TOP_LEVEL_REQUIRED_KEYS, + allowed_keys=_TOP_LEVEL_ALLOWED_KEYS, + schema_label="metrics baseline", + missing_status=MetricsBaselineStatus.MISSING_FIELDS, + extra_status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _validate_required_keys( + payload: dict[str, Any], + required: frozenset[str], + *, + path: Path, +) -> None: + missing = required - set(payload.keys()) + if missing: + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: missing required fields: {', '.join(sorted(missing))}", + status=MetricsBaselineStatus.MISSING_FIELDS, + ) + + +def _validate_exact_keys( + payload: dict[str, Any], + required: frozenset[str], + *, + path: Path, +) -> None: + extra = set(payload.keys()) - set(required) + if extra: + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: unexpected fields: {', '.join(sorted(extra))}", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _require_str(payload: dict[str, Any], key: str, *, path: Path) -> str: + value = payload.get(key) + if isinstance(value, str): + return value + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be str", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _extract_metrics_payload_sha256( + payload: dict[str, Any], + *, + path: Path, +) -> str: + direct = payload.get(_METRICS_PAYLOAD_SHA256_KEY) + if isinstance(direct, str): + return direct + return _require_str(payload, "payload_sha256", path=path) + + +def _extract_optional_payload_sha256( + payload: dict[str, Any], + *, + key: str, +) -> str | None: + value = payload.get(key) + return value if isinstance(value, str) else None + + +def _require_int(payload: dict[str, Any], key: str, *, path: Path) -> int: + value = payload.get(key) + if isinstance(value, bool): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be int", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if isinstance(value, int): + return value + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be int", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _optional_require_str( + payload: dict[str, Any], + key: str, + *, + path: Path, +) -> str | None: + value = payload.get(key) + if value is None: + return None + if isinstance(value, str): + return value + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be str", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _require_str_list(payload: dict[str, Any], key: str, *, path: Path) -> list[str]: + value = payload.get(key) + if not isinstance(value, list): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be list[str]", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if not all(isinstance(item, str) for item in value): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be list[str]", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + return value + + +def _parse_cycles( + payload: dict[str, Any], + *, + key: str, + path: Path, +) -> tuple[tuple[str, ...], ...]: + value = payload.get(key) + if not isinstance(value, list): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: {key!r} must be list", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + cycles: list[tuple[str, ...]] = [] + for cycle in value: + if not isinstance(cycle, list): + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: {key!r} cycle item must be list[str]", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if not all(isinstance(item, str) for item in cycle): + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: {key!r} cycle item must be list[str]", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + cycles.append(tuple(cycle)) + return tuple(sorted(set(cycles))) + + +def _parse_generator( + meta: dict[str, Any], + *, + path: Path, +) -> tuple[str, str | None]: + generator = meta.get("generator") + if isinstance(generator, str): + version_value = meta.get("generator_version") + if version_value is None: + version_value = meta.get("codeclone_version") + if version_value is None: + return generator, None + if not isinstance(version_value, str): + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: generator_version must be str", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + return generator, version_value + + if isinstance(generator, dict): + allowed_keys = {"name", "version"} + extra = set(generator.keys()) - allowed_keys + if extra: + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: " + f"unexpected generator keys: {', '.join(sorted(extra))}", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + name = generator.get("name") + version = generator.get("version") + if not isinstance(name, str): + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: generator.name must be str", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if version is not None and not isinstance(version, str): + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: generator.version must be str", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + return name, version if isinstance(version, str) else None + + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: generator must be object or str", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _require_embedded_clone_baseline_payload( + payload: dict[str, Any], + *, + path: Path, +) -> tuple[dict[str, Any], dict[str, Any]]: + meta_obj = payload.get("meta") + clones_obj = payload.get("clones") + if not isinstance(meta_obj, dict): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: 'meta' must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if not isinstance(clones_obj, dict): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: 'clones' must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + _require_str(meta_obj, "payload_sha256", path=path) + _require_str(meta_obj, "python_tag", path=path) + _require_str(meta_obj, "created_at", path=path) + functions = clones_obj.get("functions") + blocks = clones_obj.get("blocks") + if not isinstance(functions, list) or not all( + isinstance(item, str) for item in functions + ): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: 'clones.functions' must be list[str]", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if not isinstance(blocks, list) or not all( + isinstance(item, str) for item in blocks + ): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: 'clones.blocks' must be list[str]", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + return meta_obj, clones_obj + + +def _resolve_embedded_schema_version(meta: dict[str, Any], *, path: Path) -> str: + raw_version = _require_str(meta, "schema_version", path=path) + parts = raw_version.split(".") + if len(parts) not in {2, 3} or not all(part.isdigit() for part in parts): + raise BaselineValidationError( + "Invalid baseline schema at " + f"{path}: 'schema_version' must be semver string", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + major = int(parts[0]) + if major >= 2: + return raw_version + return BASELINE_SCHEMA_VERSION + + +def _parse_snapshot( + payload: dict[str, Any], + *, + path: Path, +) -> MetricsSnapshot: + grade = _require_str(payload, "health_grade", path=path) + if grade not in {"A", "B", "C", "D", "F"}: + raise BaselineValidationError( + "Invalid metrics baseline schema at " + f"{path}: 'health_grade' must be one of A/B/C/D/F", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + return MetricsSnapshot( + max_complexity=_require_int(payload, "max_complexity", path=path), + high_risk_functions=tuple( + sorted(set(_require_str_list(payload, "high_risk_functions", path=path))) + ), + max_coupling=_require_int(payload, "max_coupling", path=path), + high_coupling_classes=tuple( + sorted(set(_require_str_list(payload, "high_coupling_classes", path=path))) + ), + max_cohesion=_require_int(payload, "max_cohesion", path=path), + low_cohesion_classes=tuple( + sorted(set(_require_str_list(payload, "low_cohesion_classes", path=path))) + ), + dependency_cycles=_parse_cycles(payload, key="dependency_cycles", path=path), + dependency_max_depth=_require_int(payload, "dependency_max_depth", path=path), + dead_code_items=tuple( + sorted(set(_require_str_list(payload, "dead_code_items", path=path))) + ), + health_score=_require_int(payload, "health_score", path=path), + health_grade=cast("Literal['A', 'B', 'C', 'D', 'F']", grade), + typing_param_permille=_optional_int( + payload, + "typing_param_permille", + path=path, + ), + typing_return_permille=_optional_int( + payload, + "typing_return_permille", + path=path, + ), + docstring_permille=_optional_int(payload, "docstring_permille", path=path), + typing_any_count=_optional_int(payload, "typing_any_count", path=path), + ) + + +def _optional_int(payload: dict[str, Any], key: str, *, path: Path) -> int: + value = payload.get(key) + if value is None: + return 0 + return _require_int(payload, key, path=path) + + +def _parse_api_surface_snapshot( + payload: object, + *, + path: Path, + root: Path | None = None, +) -> ApiSurfaceSnapshot | None: + if payload is None: + return None + if not isinstance(payload, dict): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: 'api_surface' must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + raw_modules = payload.get("modules", []) + if not isinstance(raw_modules, list): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: " + "'api_surface.modules' must be list", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + modules: list[ModuleApiSurface] = [] + for raw_module in raw_modules: + if not isinstance(raw_module, dict): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: " + "api surface module must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + module = _require_str(raw_module, "module", path=path) + wire_filepath = _require_str(raw_module, "filepath", path=path) + filepath = runtime_filepath_from_wire(wire_filepath, root=root) + all_declared = _require_str_list_or_none(raw_module, "all_declared", path=path) + raw_symbols = raw_module.get("symbols", []) + if not isinstance(raw_symbols, list): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: " + "api surface symbols must be list", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + symbols: list[PublicSymbol] = [] + for raw_symbol in raw_symbols: + if not isinstance(raw_symbol, dict): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: " + "api surface symbol must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + local_name = _optional_require_str(raw_symbol, "local_name", path=path) + legacy_qualname = _optional_require_str(raw_symbol, "qualname", path=path) + if local_name is None and legacy_qualname is None: + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: " + "api surface symbol requires 'local_name' or 'qualname'", + status=MetricsBaselineStatus.MISSING_FIELDS, + ) + qualname = ( + legacy_qualname + if local_name is None + else _compose_api_surface_qualname( + module=module, + local_name=local_name, + ) + ) + kind = _require_str(raw_symbol, "kind", path=path) + exported_via = _require_str(raw_symbol, "exported_via", path=path) + params_raw = raw_symbol.get("params", []) + if not isinstance(params_raw, list): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: " + "api surface params must be list", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + params: list[ApiParamSpec] = [] + for raw_param in params_raw: + if not isinstance(raw_param, dict): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: " + "api param must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + name = _require_str(raw_param, "name", path=path) + param_kind = _require_str(raw_param, "kind", path=path) + has_default = raw_param.get("has_default") + annotation_hash = _optional_require_str( + raw_param, + "annotation_hash", + path=path, + ) + if not isinstance(has_default, bool): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: " + "api param 'has_default' must be bool", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + params.append( + ApiParamSpec( + name=name, + kind=cast( + ( + "Literal['pos_only', 'pos_or_kw', " + "'vararg', 'kw_only', 'kwarg']" + ), + param_kind, + ), + has_default=has_default, + annotation_hash=annotation_hash or "", + ) + ) + symbols.append( + PublicSymbol( + qualname=qualname or "", + kind=cast( + "Literal['function', 'class', 'method', 'constant']", + kind, + ), + start_line=_require_int(raw_symbol, "start_line", path=path), + end_line=_require_int(raw_symbol, "end_line", path=path), + params=tuple(params), + returns_hash=_optional_require_str( + raw_symbol, + "returns_hash", + path=path, + ) + or "", + exported_via=cast("Literal['all', 'name']", exported_via), + ) + ) + modules.append( + ModuleApiSurface( + module=module, + filepath=filepath, + symbols=tuple(sorted(symbols, key=lambda item: item.qualname)), + all_declared=tuple(all_declared) if all_declared is not None else None, + ) + ) + return ApiSurfaceSnapshot( + modules=tuple(sorted(modules, key=lambda item: (item.filepath, item.module))) + ) + + +def _require_str_list_or_none( + payload: dict[str, Any], + key: str, + *, + path: Path, +) -> list[str] | None: + value = payload.get(key) + if value is None: + return None + return _require_str_list(payload, key, path=path) + + +__all__ = [ + "_atomic_write_json", + "_extract_metrics_payload_sha256", + "_is_compatible_metrics_schema", + "_load_json_object", + "_optional_require_str", + "_parse_api_surface_snapshot", + "_parse_cycles", + "_parse_generator", + "_parse_snapshot", + "_require_embedded_clone_baseline_payload", + "_require_int", + "_require_str", + "_require_str_list", + "_resolve_embedded_schema_version", + "_validate_exact_keys", + "_validate_required_keys", + "_validate_top_level_structure", +] diff --git a/codeclone/baseline.py b/codeclone/baseline/clone_baseline.py similarity index 54% rename from codeclone/baseline.py rename to codeclone/baseline/clone_baseline.py index c16c08c..4fc8ee5 100644 --- a/codeclone/baseline.py +++ b/codeclone/baseline/clone_baseline.py @@ -6,83 +6,26 @@ from __future__ import annotations -import hashlib import hmac import re -import sys -from datetime import datetime, timezone -from enum import Enum -from json import JSONDecodeError from pathlib import Path -from typing import TYPE_CHECKING, Any, Final +from typing import TYPE_CHECKING, Any -import orjson - -from . import __version__ -from ._json_io import read_json_object as _read_json_object -from ._json_io import write_json_document_atomically as _write_json_document_atomically -from ._schema_validation import validate_top_level_structure -from .contracts import ( +from .. import __version__ +from ..contracts import ( BASELINE_FINGERPRINT_VERSION, BASELINE_SCHEMA_VERSION, ) -from .errors import BaselineValidationError - -if TYPE_CHECKING: - from collections.abc import Collection, Mapping - -# Any: baseline JSON parsing/serialization boundary. Values are validated -# and narrowed before entering compatibility/integrity checks. - -BASELINE_GENERATOR = "codeclone" -_BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR = {1: 0, 2: 1} -MAX_BASELINE_SIZE_BYTES = 5 * 1024 * 1024 - - -class BaselineStatus(str, Enum): - OK = "ok" - MISSING = "missing" - TOO_LARGE = "too_large" - INVALID_JSON = "invalid_json" - INVALID_TYPE = "invalid_type" - MISSING_FIELDS = "missing_fields" - MISMATCH_SCHEMA_VERSION = "mismatch_schema_version" - MISMATCH_FINGERPRINT_VERSION = "mismatch_fingerprint_version" - MISMATCH_PYTHON_VERSION = "mismatch_python_version" - GENERATOR_MISMATCH = "generator_mismatch" - INTEGRITY_MISSING = "integrity_missing" - INTEGRITY_FAILED = "integrity_failed" - - -BASELINE_UNTRUSTED_STATUSES: Final[frozenset[BaselineStatus]] = frozenset( - { - BaselineStatus.MISSING, - BaselineStatus.TOO_LARGE, - BaselineStatus.INVALID_JSON, - BaselineStatus.INVALID_TYPE, - BaselineStatus.MISSING_FIELDS, - BaselineStatus.MISMATCH_SCHEMA_VERSION, - BaselineStatus.MISMATCH_FINGERPRINT_VERSION, - BaselineStatus.MISMATCH_PYTHON_VERSION, - BaselineStatus.GENERATOR_MISMATCH, - BaselineStatus.INTEGRITY_MISSING, - BaselineStatus.INTEGRITY_FAILED, - } +from ..contracts.errors import BaselineValidationError +from ..utils.json_io import ( + write_json_document_atomically as _write_json_document_atomically, ) +from ..utils.schema_validation import validate_top_level_structure +from . import trust as _trust +from .diff import diff_clone_groups - -def coerce_baseline_status( - raw_status: str | BaselineStatus | None, -) -> BaselineStatus: - if isinstance(raw_status, BaselineStatus): - return raw_status - if isinstance(raw_status, str): - try: - return BaselineStatus(raw_status) - except ValueError: - return BaselineStatus.INVALID_TYPE - return BaselineStatus.INVALID_TYPE - +if TYPE_CHECKING: + from collections.abc import Mapping _TOP_LEVEL_REQUIRED_KEYS = {"meta", "clones"} _TOP_LEVEL_OPTIONAL_KEYS = {"metrics", "api_surface"} @@ -98,7 +41,6 @@ def coerce_baseline_status( _CLONES_REQUIRED_KEYS = {"functions", "blocks"} _FUNCTION_ID_RE = re.compile(r"^[0-9a-f]{40}\|(?:\d+-\d+|\d+\+)$") _BLOCK_ID_RE = re.compile(r"^[0-9a-f]{40}\|[0-9a-f]{40}\|[0-9a-f]{40}\|[0-9a-f]{40}$") -_UTC_ISO8601_Z_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$") class Baseline: @@ -138,37 +80,37 @@ def load( except OSError as e: raise BaselineValidationError( f"Cannot stat baseline file at {self.path}: {e}", - status=BaselineStatus.INVALID_TYPE, + status=_trust.BaselineStatus.INVALID_TYPE, ) from e if not exists: return size_limit = ( - MAX_BASELINE_SIZE_BYTES if max_size_bytes is None else max_size_bytes + _trust.MAX_BASELINE_SIZE_BYTES if max_size_bytes is None else max_size_bytes ) - size = _safe_stat_size(self.path) + size = _trust._safe_stat_size(self.path) if size > size_limit: raise BaselineValidationError( "Baseline file is too large " f"({size} bytes, max {size_limit} bytes) at {self.path}. " "Increase --max-baseline-size-mb or regenerate baseline.", - status=BaselineStatus.TOO_LARGE, + status=_trust.BaselineStatus.TOO_LARGE, ) if preloaded_payload is None: - payload = _load_json_object(self.path) + payload = _trust._load_json_object(self.path) else: if not isinstance(preloaded_payload, dict): raise BaselineValidationError( f"Baseline payload must be an object at {self.path}", - status=BaselineStatus.INVALID_TYPE, + status=_trust.BaselineStatus.INVALID_TYPE, ) payload = preloaded_payload if _is_legacy_baseline_payload(payload): raise BaselineValidationError( "Baseline format is legacy (<=1.3.x) and must be regenerated. " "Please run --update-baseline.", - status=BaselineStatus.MISSING_FIELDS, + status=_trust.BaselineStatus.MISSING_FIELDS, ) _validate_top_level_structure(payload, path=self.path) @@ -178,21 +120,28 @@ def load( if not isinstance(meta_obj, dict): raise BaselineValidationError( f"Invalid baseline schema at {self.path}: 'meta' must be object", - status=BaselineStatus.INVALID_TYPE, + status=_trust.BaselineStatus.INVALID_TYPE, ) if not isinstance(clones_obj, dict): raise BaselineValidationError( f"Invalid baseline schema at {self.path}: 'clones' must be object", - status=BaselineStatus.INVALID_TYPE, + status=_trust.BaselineStatus.INVALID_TYPE, ) _validate_required_keys(meta_obj, _META_REQUIRED_KEYS, path=self.path) _validate_required_keys(clones_obj, _CLONES_REQUIRED_KEYS, path=self.path) _validate_exact_clone_keys(clones_obj, path=self.path) - generator, generator_version = _parse_generator_meta(meta_obj, path=self.path) - schema_version = _require_semver_str(meta_obj, "schema_version", path=self.path) - schema_major, _, _ = _parse_semver( + generator, generator_version = _trust._parse_generator_meta( + meta_obj, + path=self.path, + ) + schema_version = _trust._require_semver_str( + meta_obj, + "schema_version", + path=self.path, + ) + schema_major, _, _ = _trust._parse_semver( schema_version, key="schema_version", path=self.path, @@ -201,22 +150,28 @@ def load( raise BaselineValidationError( f"Invalid baseline schema at {self.path}: " "top-level 'metrics' requires baseline schema >= 2.0.", - status=BaselineStatus.MISMATCH_SCHEMA_VERSION, + status=_trust.BaselineStatus.MISMATCH_SCHEMA_VERSION, ) - fingerprint_version = _require_str( - meta_obj, "fingerprint_version", path=self.path + fingerprint_version = _trust._require_str( + meta_obj, + "fingerprint_version", + path=self.path, ) - python_tag = _require_python_tag(meta_obj, "python_tag", path=self.path) - created_at = _require_utc_iso8601_z(meta_obj, "created_at", path=self.path) - payload_sha256 = _require_str(meta_obj, "payload_sha256", path=self.path) + python_tag = _trust._require_python_tag(meta_obj, "python_tag", path=self.path) + created_at = _trust._require_utc_iso8601_z( + meta_obj, + "created_at", + path=self.path, + ) + payload_sha256 = _trust._require_str(meta_obj, "payload_sha256", path=self.path) - function_ids = _require_sorted_unique_ids( + function_ids = _trust._require_sorted_unique_ids( clones_obj, "functions", pattern=_FUNCTION_ID_RE, path=self.path, ) - block_ids = _require_sorted_unique_ids( + block_ids = _trust._require_sorted_unique_ids( clones_obj, "blocks", pattern=_BLOCK_ID_RE, @@ -298,60 +253,63 @@ def save(self) -> None: self.payload_sha256 = payload_sha256 def verify_compatibility(self, *, current_python_tag: str) -> None: - if self.generator != BASELINE_GENERATOR: + if self.generator != _trust.BASELINE_GENERATOR: raise BaselineValidationError( "Baseline generator mismatch: expected 'codeclone'.", - status=BaselineStatus.GENERATOR_MISMATCH, + status=_trust.BaselineStatus.GENERATOR_MISMATCH, ) if self.schema_version is None: raise BaselineValidationError( "Baseline schema version is missing.", - status=BaselineStatus.MISSING_FIELDS, + status=_trust.BaselineStatus.MISSING_FIELDS, ) if self.fingerprint_version is None: raise BaselineValidationError( "Baseline fingerprint version is missing.", - status=BaselineStatus.MISSING_FIELDS, + status=_trust.BaselineStatus.MISSING_FIELDS, ) if self.python_tag is None: raise BaselineValidationError( "Baseline python_tag is missing.", - status=BaselineStatus.MISSING_FIELDS, + status=_trust.BaselineStatus.MISSING_FIELDS, ) - schema_major, schema_minor, _ = _parse_semver( - self.schema_version, key="schema_version", path=self.path + schema_major, schema_minor, _ = _trust._parse_semver( + self.schema_version, + key="schema_version", + path=self.path, ) - max_minor = _BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR.get(schema_major) + max_minor = _trust._BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR.get(schema_major) if max_minor is None: supported = ",".join( - str(major) for major in sorted(_BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR) + str(major) + for major in sorted(_trust._BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR) ) raise BaselineValidationError( "Baseline schema version mismatch: " f"baseline={self.schema_version}, " f"supported_majors={supported}.", - status=BaselineStatus.MISMATCH_SCHEMA_VERSION, + status=_trust.BaselineStatus.MISMATCH_SCHEMA_VERSION, ) if schema_minor > max_minor: raise BaselineValidationError( "Baseline schema version is newer than supported: " f"baseline={self.schema_version}, " f"max={schema_major}.{max_minor}.", - status=BaselineStatus.MISMATCH_SCHEMA_VERSION, + status=_trust.BaselineStatus.MISMATCH_SCHEMA_VERSION, ) if self.fingerprint_version != BASELINE_FINGERPRINT_VERSION: raise BaselineValidationError( "Baseline fingerprint version mismatch: " f"baseline={self.fingerprint_version}, " f"expected={BASELINE_FINGERPRINT_VERSION}.", - status=BaselineStatus.MISMATCH_FINGERPRINT_VERSION, + status=_trust.BaselineStatus.MISMATCH_FINGERPRINT_VERSION, ) if self.python_tag != current_python_tag: raise BaselineValidationError( "Baseline python tag mismatch: " f"baseline={self.python_tag}, current={current_python_tag}.", - status=BaselineStatus.MISMATCH_PYTHON_VERSION, + status=_trust.BaselineStatus.MISMATCH_PYTHON_VERSION, ) self.verify_integrity() @@ -359,36 +317,36 @@ def verify_integrity(self) -> None: if not isinstance(self.payload_sha256, str): raise BaselineValidationError( "Baseline integrity payload hash is missing.", - status=BaselineStatus.INTEGRITY_MISSING, + status=_trust.BaselineStatus.INTEGRITY_MISSING, ) if len(self.payload_sha256) != 64: raise BaselineValidationError( "Baseline integrity payload hash is missing.", - status=BaselineStatus.INTEGRITY_MISSING, + status=_trust.BaselineStatus.INTEGRITY_MISSING, ) try: int(self.payload_sha256, 16) except ValueError as e: raise BaselineValidationError( "Baseline integrity payload hash is missing.", - status=BaselineStatus.INTEGRITY_MISSING, + status=_trust.BaselineStatus.INTEGRITY_MISSING, ) from e if self.schema_version is None: raise BaselineValidationError( "Baseline schema version is missing for integrity validation.", - status=BaselineStatus.MISSING_FIELDS, + status=_trust.BaselineStatus.MISSING_FIELDS, ) if self.fingerprint_version is None: raise BaselineValidationError( "Baseline fingerprint version is missing for integrity validation.", - status=BaselineStatus.MISSING_FIELDS, + status=_trust.BaselineStatus.MISSING_FIELDS, ) if self.python_tag is None: raise BaselineValidationError( "Baseline python_tag is missing for integrity validation.", - status=BaselineStatus.MISSING_FIELDS, + status=_trust.BaselineStatus.MISSING_FIELDS, ) - expected = _compute_payload_sha256( + expected = _trust._compute_payload_sha256( functions=self.functions, blocks=self.blocks, fingerprint_version=self.fingerprint_version, @@ -397,7 +355,7 @@ def verify_integrity(self) -> None: if not hmac.compare_digest(self.payload_sha256, expected): raise BaselineValidationError( "Baseline integrity check failed: payload_sha256 mismatch.", - status=BaselineStatus.INTEGRITY_FAILED, + status=_trust.BaselineStatus.INTEGRITY_FAILED, ) @staticmethod @@ -413,21 +371,24 @@ def from_groups( baseline = Baseline(path) baseline.functions = set(func_groups.keys()) baseline.blocks = set(block_groups.keys()) - baseline.generator = BASELINE_GENERATOR + baseline.generator = _trust.BASELINE_GENERATOR baseline.schema_version = schema_version or BASELINE_SCHEMA_VERSION baseline.fingerprint_version = ( fingerprint_version or BASELINE_FINGERPRINT_VERSION ) - baseline.python_tag = python_tag or current_python_tag() + baseline.python_tag = python_tag or _trust.current_python_tag() baseline.generator_version = generator_version or __version__ return baseline def diff( self, func_groups: Mapping[str, object], block_groups: Mapping[str, object] ) -> tuple[set[str], set[str]]: - new_funcs = set(func_groups.keys()) - self.functions - new_blocks = set(block_groups.keys()) - self.blocks - return new_funcs, new_blocks + return diff_clone_groups( + known_functions=self.functions, + known_blocks=self.blocks, + func_groups=func_groups, + block_groups=block_groups, + ) def _atomic_write_json(path: Path, payload: dict[str, Any]) -> None: @@ -439,36 +400,6 @@ def _atomic_write_json(path: Path, payload: dict[str, Any]) -> None: ) -def _safe_stat_size(path: Path) -> int: - try: - return path.stat().st_size - except OSError as e: - raise BaselineValidationError( - f"Cannot stat baseline file at {path}: {e}", - status=BaselineStatus.INVALID_TYPE, - ) from e - - -def _load_json_object(path: Path) -> dict[str, Any]: - try: - return _read_json_object(path) - except OSError as e: - raise BaselineValidationError( - f"Cannot read baseline file at {path}: {e}", - status=BaselineStatus.INVALID_JSON, - ) from e - except JSONDecodeError as e: - raise BaselineValidationError( - f"Corrupted baseline file at {path}: {e}", - status=BaselineStatus.INVALID_JSON, - ) from e - except TypeError: - raise BaselineValidationError( - f"Baseline payload must be an object at {path}", - status=BaselineStatus.INVALID_TYPE, - ) from None - - def _validate_top_level_structure(payload: dict[str, Any], *, path: Path) -> None: validate_top_level_structure( payload, @@ -476,8 +407,8 @@ def _validate_top_level_structure(payload: dict[str, Any], *, path: Path) -> Non required_keys=_TOP_LEVEL_REQUIRED_KEYS, allowed_keys=_TOP_LEVEL_ALLOWED_KEYS, schema_label="baseline", - missing_status=BaselineStatus.MISSING_FIELDS, - extra_status=BaselineStatus.INVALID_TYPE, + missing_status=_trust.BaselineStatus.MISSING_FIELDS, + extra_status=_trust.BaselineStatus.INVALID_TYPE, ) @@ -489,7 +420,7 @@ def _validate_required_keys( raise BaselineValidationError( f"Invalid baseline schema at {path}: missing required fields: " f"{', '.join(sorted(missing))}", - status=BaselineStatus.MISSING_FIELDS, + status=_trust.BaselineStatus.MISSING_FIELDS, ) @@ -500,7 +431,7 @@ def _validate_exact_clone_keys(clones: dict[str, Any], *, path: Path) -> None: raise BaselineValidationError( f"Invalid baseline schema at {path}: unexpected clone keys: " f"{', '.join(sorted(extra))}", - status=BaselineStatus.INVALID_TYPE, + status=_trust.BaselineStatus.INVALID_TYPE, ) @@ -512,7 +443,7 @@ def _preserve_embedded_metrics( path: Path, ) -> tuple[dict[str, Any] | None, str | None, dict[str, Any] | None, str | None]: try: - payload = _load_json_object(path) + payload = _trust._load_json_object(path) except BaselineValidationError: return None, None, None, None metrics_obj = payload.get("metrics") @@ -545,45 +476,6 @@ def _preserve_embedded_metrics( ) -def _parse_generator_meta( - meta_obj: dict[str, Any], *, path: Path -) -> tuple[str, str | None]: - raw_generator = meta_obj.get("generator") - - if isinstance(raw_generator, str): - generator_version = _optional_str(meta_obj, "generator_version", path=path) - if generator_version is None: - # Legacy alias for baselines produced before generator_version rename. - generator_version = _optional_str(meta_obj, "codeclone_version", path=path) - return raw_generator, generator_version - - if isinstance(raw_generator, dict): - allowed_keys = {"name", "version"} - extra = set(raw_generator.keys()) - allowed_keys - if extra: - raise BaselineValidationError( - f"Invalid baseline schema at {path}: unexpected generator keys: " - f"{', '.join(sorted(extra))}", - status=BaselineStatus.INVALID_TYPE, - ) - generator_name = _require_str(raw_generator, "name", path=path) - generator_version = _optional_str(raw_generator, "version", path=path) - - if generator_version is None: - generator_version = _optional_str(meta_obj, "generator_version", path=path) - if generator_version is None: - generator_version = _optional_str( - meta_obj, "codeclone_version", path=path - ) - - return generator_name, generator_version - - raise BaselineValidationError( - f"Invalid baseline schema at {path}: 'generator' must be string or object", - status=BaselineStatus.INVALID_TYPE, - ) - - def _baseline_payload( *, functions: set[str], @@ -595,16 +487,16 @@ def _baseline_payload( generator_version: str | None, created_at: str | None, ) -> dict[str, Any]: - resolved_generator = generator or BASELINE_GENERATOR + resolved_generator = generator or _trust.BASELINE_GENERATOR resolved_schema = schema_version or BASELINE_SCHEMA_VERSION resolved_fingerprint = fingerprint_version or BASELINE_FINGERPRINT_VERSION - resolved_python_tag = python_tag or current_python_tag() + resolved_python_tag = python_tag or _trust.current_python_tag() resolved_generator_version = generator_version or __version__ - resolved_created_at = created_at or _utc_now_z() + resolved_created_at = created_at or _trust._utc_now_z() sorted_functions = sorted(functions) sorted_blocks = sorted(blocks) - payload_sha256 = _compute_payload_sha256( + payload_sha256 = _trust._compute_payload_sha256( functions=sorted_functions, blocks=sorted_blocks, fingerprint_version=resolved_fingerprint, @@ -630,138 +522,11 @@ def _baseline_payload( } -def _compute_payload_sha256( - *, - functions: Collection[str], - blocks: Collection[str], - fingerprint_version: str, - python_tag: str, -) -> str: - canonical = { - "blocks": sorted(blocks), - "fingerprint_version": fingerprint_version, - "functions": sorted(functions), - "python_tag": python_tag, - } - serialized = orjson.dumps(canonical, option=orjson.OPT_SORT_KEYS) - return hashlib.sha256(serialized).hexdigest() - - -def current_python_tag() -> str: - """Return the interpreter compatibility tag as an immutable string.""" - impl = sys.implementation.name - major, minor = sys.version_info[:2] - prefix = "cp" if impl == "cpython" else impl[:2] - return f"{prefix}{major}{minor}" - - -def _utc_now_z() -> str: - return ( - datetime.now(timezone.utc).replace(microsecond=0).strftime("%Y-%m-%dT%H:%M:%SZ") - ) - - -def _require_str(obj: dict[str, Any], key: str, *, path: Path) -> str: - value = obj.get(key) - if not isinstance(value, str): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: '{key}' must be string", - status=BaselineStatus.INVALID_TYPE, - ) - return value - - -def _optional_str(obj: dict[str, Any], key: str, *, path: Path) -> str | None: - value = obj.get(key) - if value is None: - return None - if not isinstance(value, str): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: '{key}' must be string", - status=BaselineStatus.INVALID_TYPE, - ) - return value - - -def _require_semver_str(obj: dict[str, Any], key: str, *, path: Path) -> str: - value = _require_str(obj, key, path=path) - _parse_semver(value, key=key, path=path) - return value - - -def _parse_semver(value: str, *, key: str, path: Path) -> tuple[int, int, int]: - parts = value.split(".") - if len(parts) not in {2, 3} or not all(part.isdigit() for part in parts): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: '{key}' must be semver string", - status=BaselineStatus.INVALID_TYPE, - ) - if len(parts) == 2: - major, minor = int(parts[0]), int(parts[1]) - patch = 0 - else: - major, minor, patch = int(parts[0]), int(parts[1]), int(parts[2]) - return major, minor, patch - - -def _require_python_tag(obj: dict[str, Any], key: str, *, path: Path) -> str: - value = _require_str(obj, key, path=path) - if not re.fullmatch(r"[a-z]{2}\d{2,3}", value): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: '{key}' must look like 'cp313'", - status=BaselineStatus.INVALID_TYPE, - ) - return value - - -def _require_utc_iso8601_z(obj: dict[str, Any], key: str, *, path: Path) -> str: - value = _require_str(obj, key, path=path) - if not _UTC_ISO8601_Z_RE.fullmatch(value): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: '{key}' must be UTC ISO-8601 with Z", - status=BaselineStatus.INVALID_TYPE, - ) - try: - datetime( - int(value[0:4]), - int(value[5:7]), - int(value[8:10]), - int(value[11:13]), - int(value[14:16]), - int(value[17:19]), - tzinfo=timezone.utc, - ) - except ValueError as e: - raise BaselineValidationError( - f"Invalid baseline schema at {path}: '{key}' must be UTC ISO-8601 with Z", - status=BaselineStatus.INVALID_TYPE, - ) from e - return value - - -def _require_sorted_unique_ids( - obj: dict[str, Any], key: str, *, pattern: re.Pattern[str], path: Path -) -> list[str]: - value = obj.get(key) - if not isinstance(value, list): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: '{key}' must be list[str]", - status=BaselineStatus.INVALID_TYPE, - ) - if not all(isinstance(item, str) for item in value): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: '{key}' must be list[str]", - status=BaselineStatus.INVALID_TYPE, - ) - values = list(value) - if values != sorted(values) or len(values) != len(set(values)): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: '{key}' must be sorted and unique", - status=BaselineStatus.INVALID_TYPE, - ) - if not all(pattern.fullmatch(item) for item in values): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: '{key}' has invalid id format", - status=BaselineStatus.INVALID_TYPE, - ) - return values +__all__ = [ + "_BLOCK_ID_RE", + "_FUNCTION_ID_RE", + "Baseline", + "_atomic_write_json", + "_baseline_payload", + "_preserve_embedded_metrics", +] diff --git a/codeclone/baseline/diff.py b/codeclone/baseline/diff.py new file mode 100644 index 0000000..8c6ca2c --- /dev/null +++ b/codeclone/baseline/diff.py @@ -0,0 +1,111 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Mapping, Set + +from ..metrics.api_surface import compare_api_surfaces +from ..models import ( + ApiBreakingChange, + ApiSurfaceSnapshot, + MetricsDiff, + MetricsSnapshot, +) + + +def diff_clone_groups( + *, + known_functions: Set[str], + known_blocks: Set[str], + func_groups: Mapping[str, object], + block_groups: Mapping[str, object], +) -> tuple[set[str], set[str]]: + new_funcs = set(func_groups.keys()) - known_functions + new_blocks = set(block_groups.keys()) - known_blocks + return new_funcs, new_blocks + + +def diff_metrics( + *, + baseline_snapshot: MetricsSnapshot | None, + current_snapshot: MetricsSnapshot, + baseline_api_surface: ApiSurfaceSnapshot | None, + current_api_surface: ApiSurfaceSnapshot | None, +) -> MetricsDiff: + snapshot = baseline_snapshot or MetricsSnapshot( + max_complexity=0, + high_risk_functions=(), + max_coupling=0, + high_coupling_classes=(), + max_cohesion=0, + low_cohesion_classes=(), + dependency_cycles=(), + dependency_max_depth=0, + dead_code_items=(), + health_score=0, + health_grade="F", + typing_param_permille=0, + typing_return_permille=0, + docstring_permille=0, + typing_any_count=0, + ) + + new_high_risk_functions = tuple( + sorted( + set(current_snapshot.high_risk_functions) + - set(snapshot.high_risk_functions) + ) + ) + new_high_coupling_classes = tuple( + sorted( + set(current_snapshot.high_coupling_classes) + - set(snapshot.high_coupling_classes) + ) + ) + new_cycles = tuple( + sorted( + set(current_snapshot.dependency_cycles) - set(snapshot.dependency_cycles) + ) + ) + new_dead_code = tuple( + sorted(set(current_snapshot.dead_code_items) - set(snapshot.dead_code_items)) + ) + + if baseline_api_surface is None: + added_api_symbols: tuple[str, ...] = () + api_breaking_changes: tuple[ApiBreakingChange, ...] = () + else: + added_api_symbols, api_breaking_changes = compare_api_surfaces( + baseline=baseline_api_surface, + current=current_api_surface, + strict_types=False, + ) + + return MetricsDiff( + new_high_risk_functions=new_high_risk_functions, + new_high_coupling_classes=new_high_coupling_classes, + new_cycles=new_cycles, + new_dead_code=new_dead_code, + health_delta=current_snapshot.health_score - snapshot.health_score, + typing_param_permille_delta=( + current_snapshot.typing_param_permille - snapshot.typing_param_permille + ), + typing_return_permille_delta=( + current_snapshot.typing_return_permille - snapshot.typing_return_permille + ), + docstring_permille_delta=( + current_snapshot.docstring_permille - snapshot.docstring_permille + ), + new_api_symbols=added_api_symbols, + new_api_breaking_changes=api_breaking_changes, + ) + + +__all__ = ["diff_clone_groups", "diff_metrics"] diff --git a/codeclone/baseline/metrics_baseline.py b/codeclone/baseline/metrics_baseline.py new file mode 100644 index 0000000..e2afa0e --- /dev/null +++ b/codeclone/baseline/metrics_baseline.py @@ -0,0 +1,458 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hmac +from datetime import datetime, timezone +from pathlib import Path +from typing import TYPE_CHECKING, Any, cast + +from .. import __version__ +from ..contracts import BASELINE_SCHEMA_VERSION, METRICS_BASELINE_SCHEMA_VERSION +from ..contracts.errors import BaselineValidationError +from ..models import ApiSurfaceSnapshot, MetricsDiff, MetricsSnapshot, ProjectMetrics +from ._metrics_baseline_contract import ( + _API_SURFACE_PAYLOAD_SHA256_KEY, + _META_REQUIRED_KEYS, + _METRICS_OPTIONAL_KEYS, + _METRICS_PAYLOAD_SHA256_KEY, + _METRICS_REQUIRED_KEYS, + MAX_METRICS_BASELINE_SIZE_BYTES, + METRICS_BASELINE_GENERATOR, + METRICS_BASELINE_UNTRUSTED_STATUSES, + MetricsBaselineStatus, + coerce_metrics_baseline_status, +) +from ._metrics_baseline_payload import ( + _build_payload, + _compute_api_surface_payload_sha256, + _compute_legacy_api_surface_payload_sha256, + _compute_payload_sha256, + _has_coverage_adoption_snapshot, + snapshot_from_project_metrics, +) +from ._metrics_baseline_validation import ( + _atomic_write_json, + _extract_metrics_payload_sha256, + _extract_optional_payload_sha256, + _is_compatible_metrics_schema, + _load_json_object, + _optional_require_str, + _parse_api_surface_snapshot, + _parse_generator, + _parse_snapshot, + _require_embedded_clone_baseline_payload, + _require_str, + _resolve_embedded_schema_version, + _validate_exact_keys, + _validate_required_keys, + _validate_top_level_structure, +) +from .diff import diff_metrics +from .trust import current_python_tag + +if TYPE_CHECKING: + from collections.abc import Mapping + + +def _now_utc_z() -> str: + return ( + datetime.now(timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace("+00:00", "Z") + ) + + +class MetricsBaseline: + __slots__ = ( + "api_surface_payload_sha256", + "api_surface_snapshot", + "created_at", + "generator_name", + "generator_version", + "has_coverage_adoption_snapshot", + "is_embedded_in_clone_baseline", + "path", + "payload_sha256", + "python_tag", + "schema_version", + "snapshot", + ) + + def __init__(self, path: str | Path) -> None: + self.path = Path(path) + self.generator_name: str | None = None + self.generator_version: str | None = None + self.schema_version: str | None = None + self.python_tag: str | None = None + self.created_at: str | None = None + self.payload_sha256: str | None = None + self.snapshot: MetricsSnapshot | None = None + self.has_coverage_adoption_snapshot = False + self.api_surface_payload_sha256: str | None = None + self.api_surface_snapshot: ApiSurfaceSnapshot | None = None + self.is_embedded_in_clone_baseline = False + + def load( + self, + *, + max_size_bytes: int | None = None, + preloaded_payload: dict[str, object] | None = None, + ) -> None: + try: + exists = self.path.exists() + except OSError as e: + raise BaselineValidationError( + f"Cannot stat metrics baseline file at {self.path}: {e}", + status=MetricsBaselineStatus.INVALID_TYPE, + ) from e + if not exists: + return + + size_limit = ( + MAX_METRICS_BASELINE_SIZE_BYTES + if max_size_bytes is None + else max_size_bytes + ) + try: + file_size = self.path.stat().st_size + except OSError as e: + raise BaselineValidationError( + f"Cannot stat metrics baseline file at {self.path}: {e}", + status=MetricsBaselineStatus.INVALID_TYPE, + ) from e + if file_size > size_limit: + raise BaselineValidationError( + "Metrics baseline file is too large " + f"({file_size} bytes, max {size_limit} bytes) at {self.path}.", + status=MetricsBaselineStatus.TOO_LARGE, + ) + + if preloaded_payload is None: + payload = _load_json_object(self.path) + else: + if not isinstance(preloaded_payload, dict): + raise BaselineValidationError( + f"Metrics baseline payload must be an object at {self.path}", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + payload = preloaded_payload + + _validate_top_level_structure(payload, path=self.path) + self.is_embedded_in_clone_baseline = "clones" in payload + + meta_obj = payload.get("meta") + metrics_obj = payload.get("metrics") + if not isinstance(meta_obj, dict): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {self.path}: " + "'meta' must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + if not isinstance(metrics_obj, dict): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {self.path}: " + "'metrics' must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + _validate_required_keys(meta_obj, _META_REQUIRED_KEYS, path=self.path) + _validate_required_keys(metrics_obj, _METRICS_REQUIRED_KEYS, path=self.path) + _validate_exact_keys( + metrics_obj, + _METRICS_REQUIRED_KEYS | _METRICS_OPTIONAL_KEYS, + path=self.path, + ) + + generator_name, generator_version = _parse_generator(meta_obj, path=self.path) + self.generator_name = generator_name + self.generator_version = generator_version + self.schema_version = _require_str(meta_obj, "schema_version", path=self.path) + self.python_tag = _require_str(meta_obj, "python_tag", path=self.path) + self.created_at = _require_str(meta_obj, "created_at", path=self.path) + self.payload_sha256 = _extract_metrics_payload_sha256( + meta_obj, + path=self.path, + ) + self.api_surface_payload_sha256 = _extract_optional_payload_sha256( + meta_obj, + key=_API_SURFACE_PAYLOAD_SHA256_KEY, + ) + self.snapshot = _parse_snapshot(metrics_obj, path=self.path) + self.has_coverage_adoption_snapshot = _has_coverage_adoption_snapshot( + metrics_obj + ) + self.api_surface_snapshot = _parse_api_surface_snapshot( + payload.get("api_surface"), + path=self.path, + root=self.path.parent, + ) + + def save(self) -> None: + if self.snapshot is None: + raise BaselineValidationError( + "Metrics baseline snapshot is missing.", + status=MetricsBaselineStatus.MISSING_FIELDS, + ) + + payload = _build_payload( + snapshot=self.snapshot, + schema_version=self.schema_version or METRICS_BASELINE_SCHEMA_VERSION, + python_tag=self.python_tag or current_python_tag(), + generator_name=self.generator_name or METRICS_BASELINE_GENERATOR, + generator_version=self.generator_version or __version__, + created_at=self.created_at or _now_utc_z(), + include_adoption=self.has_coverage_adoption_snapshot, + api_surface_snapshot=self.api_surface_snapshot, + api_surface_root=self.path.parent, + ) + payload_meta = cast("Mapping[str, Any]", payload["meta"]) + payload_metrics_hash = _require_str( + cast("dict[str, Any]", payload_meta), + "payload_sha256", + path=self.path, + ) + payload_api_surface_hash = _optional_require_str( + cast("dict[str, Any]", payload_meta), + _API_SURFACE_PAYLOAD_SHA256_KEY, + path=self.path, + ) + + existing: dict[str, Any] | None = None + try: + if self.path.exists(): + loaded = _load_json_object(self.path) + if "clones" in loaded: + existing = loaded + except BaselineValidationError as e: + raise BaselineValidationError( + f"Cannot read existing baseline file at {self.path}: {e}", + status=MetricsBaselineStatus.INVALID_JSON, + ) from e + + if existing is not None: + existing_meta, clones_obj = _require_embedded_clone_baseline_payload( + existing, + path=self.path, + ) + merged_schema_version = _resolve_embedded_schema_version( + existing_meta, + path=self.path, + ) + merged_meta = dict(existing_meta) + merged_meta["schema_version"] = merged_schema_version + merged_meta[_METRICS_PAYLOAD_SHA256_KEY] = payload_metrics_hash + if payload_api_surface_hash is None: + merged_meta.pop(_API_SURFACE_PAYLOAD_SHA256_KEY, None) + else: + merged_meta[_API_SURFACE_PAYLOAD_SHA256_KEY] = payload_api_surface_hash + merged_payload: dict[str, object] = { + "meta": merged_meta, + "clones": clones_obj, + "metrics": payload["metrics"], + } + api_surface_payload = payload.get("api_surface") + if api_surface_payload is not None: + merged_payload["api_surface"] = api_surface_payload + self.path.parent.mkdir(parents=True, exist_ok=True) + _atomic_write_json(self.path, merged_payload) + self.is_embedded_in_clone_baseline = True + self.schema_version = merged_schema_version + self.python_tag = _require_str(merged_meta, "python_tag", path=self.path) + self.created_at = _require_str(merged_meta, "created_at", path=self.path) + self.payload_sha256 = _require_str( + merged_meta, + _METRICS_PAYLOAD_SHA256_KEY, + path=self.path, + ) + self.api_surface_payload_sha256 = _optional_require_str( + merged_meta, + _API_SURFACE_PAYLOAD_SHA256_KEY, + path=self.path, + ) + self.generator_name, self.generator_version = _parse_generator( + merged_meta, + path=self.path, + ) + return + + self.path.parent.mkdir(parents=True, exist_ok=True) + _atomic_write_json(self.path, payload) + self.is_embedded_in_clone_baseline = False + self.schema_version = _require_str( + cast("dict[str, Any]", payload_meta), + "schema_version", + path=self.path, + ) + self.python_tag = _require_str( + cast("dict[str, Any]", payload_meta), + "python_tag", + path=self.path, + ) + self.created_at = _require_str( + cast("dict[str, Any]", payload_meta), + "created_at", + path=self.path, + ) + self.payload_sha256 = payload_metrics_hash + self.api_surface_payload_sha256 = payload_api_surface_hash + + def verify_compatibility(self, *, runtime_python_tag: str) -> None: + if self.generator_name != METRICS_BASELINE_GENERATOR: + raise BaselineValidationError( + "Metrics baseline generator mismatch: expected 'codeclone'.", + status=MetricsBaselineStatus.GENERATOR_MISMATCH, + ) + expected_schema = ( + BASELINE_SCHEMA_VERSION + if self.is_embedded_in_clone_baseline + else METRICS_BASELINE_SCHEMA_VERSION + ) + if not _is_compatible_metrics_schema( + baseline_version=self.schema_version, + expected_version=expected_schema, + ): + raise BaselineValidationError( + "Metrics baseline schema version mismatch: " + f"baseline={self.schema_version}, " + f"expected={expected_schema}.", + status=MetricsBaselineStatus.MISMATCH_SCHEMA_VERSION, + ) + if self.python_tag != runtime_python_tag: + raise BaselineValidationError( + "Metrics baseline python tag mismatch: " + f"baseline={self.python_tag}, current={runtime_python_tag}.", + status=MetricsBaselineStatus.MISMATCH_PYTHON_VERSION, + ) + self.verify_integrity() + + def verify_integrity(self) -> None: + if self.snapshot is None: + raise BaselineValidationError( + "Metrics baseline snapshot is missing.", + status=MetricsBaselineStatus.MISSING_FIELDS, + ) + if not isinstance(self.payload_sha256, str) or len(self.payload_sha256) != 64: + raise BaselineValidationError( + "Metrics baseline integrity payload hash is missing.", + status=MetricsBaselineStatus.INTEGRITY_MISSING, + ) + + expected = _compute_payload_sha256( + self.snapshot, + include_adoption=self.has_coverage_adoption_snapshot, + ) + if not hmac.compare_digest(self.payload_sha256, expected): + raise BaselineValidationError( + "Metrics baseline integrity check failed: payload_sha256 mismatch.", + status=MetricsBaselineStatus.INTEGRITY_FAILED, + ) + + if self.api_surface_snapshot is None: + return + if ( + not isinstance(self.api_surface_payload_sha256, str) + or len(self.api_surface_payload_sha256) != 64 + ): + raise BaselineValidationError( + "Metrics baseline API surface integrity payload hash is missing.", + status=MetricsBaselineStatus.INTEGRITY_MISSING, + ) + + expected_api = _compute_api_surface_payload_sha256( + self.api_surface_snapshot, + root=self.path.parent, + ) + legacy_absolute_expected_api = _compute_api_surface_payload_sha256( + self.api_surface_snapshot + ) + legacy_expected_api = _compute_legacy_api_surface_payload_sha256( + self.api_surface_snapshot, + root=self.path.parent, + ) + legacy_absolute_qualname_expected_api = ( + _compute_legacy_api_surface_payload_sha256(self.api_surface_snapshot) + ) + if not ( + hmac.compare_digest(self.api_surface_payload_sha256, expected_api) + or hmac.compare_digest( + self.api_surface_payload_sha256, + legacy_absolute_expected_api, + ) + or hmac.compare_digest( + self.api_surface_payload_sha256, + legacy_expected_api, + ) + or hmac.compare_digest( + self.api_surface_payload_sha256, + legacy_absolute_qualname_expected_api, + ) + ): + raise BaselineValidationError( + "Metrics baseline integrity check failed: " + "api_surface payload_sha256 mismatch.", + status=MetricsBaselineStatus.INTEGRITY_FAILED, + ) + + @staticmethod + def from_project_metrics( + *, + project_metrics: ProjectMetrics, + path: str | Path, + schema_version: str | None = None, + python_tag: str | None = None, + generator_version: str | None = None, + include_adoption: bool = True, + include_api_surface: bool = True, + ) -> MetricsBaseline: + baseline = MetricsBaseline(path) + baseline.generator_name = METRICS_BASELINE_GENERATOR + baseline.generator_version = generator_version or __version__ + baseline.schema_version = schema_version or METRICS_BASELINE_SCHEMA_VERSION + baseline.python_tag = python_tag or current_python_tag() + baseline.created_at = _now_utc_z() + baseline.snapshot = snapshot_from_project_metrics(project_metrics) + baseline.payload_sha256 = _compute_payload_sha256( + baseline.snapshot, + include_adoption=include_adoption, + ) + baseline.has_coverage_adoption_snapshot = include_adoption + baseline.api_surface_snapshot = ( + project_metrics.api_surface if include_api_surface else None + ) + baseline.api_surface_payload_sha256 = ( + _compute_api_surface_payload_sha256( + baseline.api_surface_snapshot, + root=baseline.path.parent, + ) + if baseline.api_surface_snapshot is not None + else None + ) + return baseline + + def diff(self, current: ProjectMetrics) -> MetricsDiff: + return diff_metrics( + baseline_snapshot=self.snapshot, + current_snapshot=snapshot_from_project_metrics(current), + baseline_api_surface=self.api_surface_snapshot, + current_api_surface=current.api_surface, + ) + + +__all__ = [ + "BASELINE_SCHEMA_VERSION", + "MAX_METRICS_BASELINE_SIZE_BYTES", + "METRICS_BASELINE_GENERATOR", + "METRICS_BASELINE_SCHEMA_VERSION", + "METRICS_BASELINE_UNTRUSTED_STATUSES", + "MetricsBaseline", + "MetricsBaselineStatus", + "coerce_metrics_baseline_status", + "current_python_tag", + "snapshot_from_project_metrics", +] diff --git a/codeclone/baseline/trust.py b/codeclone/baseline/trust.py new file mode 100644 index 0000000..df54168 --- /dev/null +++ b/codeclone/baseline/trust.py @@ -0,0 +1,302 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import re +import sys +from datetime import datetime, timezone +from enum import Enum +from json import JSONDecodeError +from pathlib import Path +from typing import TYPE_CHECKING, Any, Final + +import orjson + +from ..contracts.errors import BaselineValidationError +from ..utils.json_io import read_json_object as _read_json_object + +if TYPE_CHECKING: + from collections.abc import Collection + +BASELINE_GENERATOR = "codeclone" +_BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR = {1: 0, 2: 1} +MAX_BASELINE_SIZE_BYTES = 5 * 1024 * 1024 +_UTC_ISO8601_Z_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$") + + +class BaselineStatus(str, Enum): + OK = "ok" + MISSING = "missing" + TOO_LARGE = "too_large" + INVALID_JSON = "invalid_json" + INVALID_TYPE = "invalid_type" + MISSING_FIELDS = "missing_fields" + MISMATCH_SCHEMA_VERSION = "mismatch_schema_version" + MISMATCH_FINGERPRINT_VERSION = "mismatch_fingerprint_version" + MISMATCH_PYTHON_VERSION = "mismatch_python_version" + GENERATOR_MISMATCH = "generator_mismatch" + INTEGRITY_MISSING = "integrity_missing" + INTEGRITY_FAILED = "integrity_failed" + + +BASELINE_UNTRUSTED_STATUSES: Final[frozenset[BaselineStatus]] = frozenset( + { + BaselineStatus.MISSING, + BaselineStatus.TOO_LARGE, + BaselineStatus.INVALID_JSON, + BaselineStatus.INVALID_TYPE, + BaselineStatus.MISSING_FIELDS, + BaselineStatus.MISMATCH_SCHEMA_VERSION, + BaselineStatus.MISMATCH_FINGERPRINT_VERSION, + BaselineStatus.MISMATCH_PYTHON_VERSION, + BaselineStatus.GENERATOR_MISMATCH, + BaselineStatus.INTEGRITY_MISSING, + BaselineStatus.INTEGRITY_FAILED, + } +) + + +def coerce_baseline_status( + raw_status: str | BaselineStatus | None, +) -> BaselineStatus: + if isinstance(raw_status, BaselineStatus): + return raw_status + if isinstance(raw_status, str): + try: + return BaselineStatus(raw_status) + except ValueError: + return BaselineStatus.INVALID_TYPE + return BaselineStatus.INVALID_TYPE + + +def _safe_stat_size(path: Path) -> int: + try: + return path.stat().st_size + except OSError as e: + raise BaselineValidationError( + f"Cannot stat baseline file at {path}: {e}", + status=BaselineStatus.INVALID_TYPE, + ) from e + + +def _load_json_object(path: Path) -> dict[str, Any]: + try: + return _read_json_object(path) + except OSError as e: + raise BaselineValidationError( + f"Cannot read baseline file at {path}: {e}", + status=BaselineStatus.INVALID_JSON, + ) from e + except JSONDecodeError as e: + raise BaselineValidationError( + f"Corrupted baseline file at {path}: {e}", + status=BaselineStatus.INVALID_JSON, + ) from e + except TypeError: + raise BaselineValidationError( + f"Baseline payload must be an object at {path}", + status=BaselineStatus.INVALID_TYPE, + ) from None + + +def _parse_generator_meta( + meta_obj: dict[str, Any], *, path: Path +) -> tuple[str, str | None]: + raw_generator = meta_obj.get("generator") + + if isinstance(raw_generator, str): + generator_version = _optional_str(meta_obj, "generator_version", path=path) + if generator_version is None: + generator_version = _optional_str(meta_obj, "codeclone_version", path=path) + return raw_generator, generator_version + + if isinstance(raw_generator, dict): + allowed_keys = {"name", "version"} + extra = set(raw_generator.keys()) - allowed_keys + if extra: + raise BaselineValidationError( + f"Invalid baseline schema at {path}: unexpected generator keys: " + f"{', '.join(sorted(extra))}", + status=BaselineStatus.INVALID_TYPE, + ) + generator_name = _require_str(raw_generator, "name", path=path) + generator_version = _optional_str(raw_generator, "version", path=path) + + if generator_version is None: + generator_version = _optional_str(meta_obj, "generator_version", path=path) + if generator_version is None: + generator_version = _optional_str( + meta_obj, "codeclone_version", path=path + ) + + return generator_name, generator_version + + raise BaselineValidationError( + f"Invalid baseline schema at {path}: 'generator' must be string or object", + status=BaselineStatus.INVALID_TYPE, + ) + + +def _compute_payload_sha256( + *, + functions: Collection[str], + blocks: Collection[str], + fingerprint_version: str, + python_tag: str, +) -> str: + canonical = { + "blocks": sorted(blocks), + "fingerprint_version": fingerprint_version, + "functions": sorted(functions), + "python_tag": python_tag, + } + serialized = orjson.dumps(canonical, option=orjson.OPT_SORT_KEYS) + return hashlib.sha256(serialized).hexdigest() + + +def current_python_tag() -> str: + """Return the interpreter compatibility tag as an immutable string.""" + impl = sys.implementation.name + major, minor = sys.version_info[:2] + prefix = "cp" if impl == "cpython" else impl[:2] + return f"{prefix}{major}{minor}" + + +def _utc_now_z() -> str: + return ( + datetime.now(timezone.utc).replace(microsecond=0).strftime("%Y-%m-%dT%H:%M:%SZ") + ) + + +def _require_str(obj: dict[str, Any], key: str, *, path: Path) -> str: + value = obj.get(key) + if not isinstance(value, str): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: '{key}' must be string", + status=BaselineStatus.INVALID_TYPE, + ) + return value + + +def _optional_str(obj: dict[str, Any], key: str, *, path: Path) -> str | None: + value = obj.get(key) + if value is None: + return None + if not isinstance(value, str): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: '{key}' must be string", + status=BaselineStatus.INVALID_TYPE, + ) + return value + + +def _require_semver_str(obj: dict[str, Any], key: str, *, path: Path) -> str: + value = _require_str(obj, key, path=path) + _parse_semver(value, key=key, path=path) + return value + + +def _parse_semver(value: str, *, key: str, path: Path) -> tuple[int, int, int]: + parts = value.split(".") + if len(parts) not in {2, 3} or not all(part.isdigit() for part in parts): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: '{key}' must be semver string", + status=BaselineStatus.INVALID_TYPE, + ) + if len(parts) == 2: + major, minor = int(parts[0]), int(parts[1]) + patch = 0 + else: + major, minor, patch = int(parts[0]), int(parts[1]), int(parts[2]) + return major, minor, patch + + +def _require_python_tag(obj: dict[str, Any], key: str, *, path: Path) -> str: + value = _require_str(obj, key, path=path) + if not re.fullmatch(r"[a-z]{2}\d{2,3}", value): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: '{key}' must look like 'cp313'", + status=BaselineStatus.INVALID_TYPE, + ) + return value + + +def _require_utc_iso8601_z(obj: dict[str, Any], key: str, *, path: Path) -> str: + value = _require_str(obj, key, path=path) + if not _UTC_ISO8601_Z_RE.fullmatch(value): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: '{key}' must be UTC ISO-8601 with Z", + status=BaselineStatus.INVALID_TYPE, + ) + try: + datetime( + int(value[0:4]), + int(value[5:7]), + int(value[8:10]), + int(value[11:13]), + int(value[14:16]), + int(value[17:19]), + tzinfo=timezone.utc, + ) + except ValueError as e: + raise BaselineValidationError( + f"Invalid baseline schema at {path}: '{key}' must be UTC ISO-8601 with Z", + status=BaselineStatus.INVALID_TYPE, + ) from e + return value + + +def _require_sorted_unique_ids( + obj: dict[str, Any], key: str, *, pattern: re.Pattern[str], path: Path +) -> list[str]: + value = obj.get(key) + if not isinstance(value, list): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: '{key}' must be list[str]", + status=BaselineStatus.INVALID_TYPE, + ) + if not all(isinstance(item, str) for item in value): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: '{key}' must be list[str]", + status=BaselineStatus.INVALID_TYPE, + ) + values = list(value) + if values != sorted(values) or len(values) != len(set(values)): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: '{key}' must be sorted and unique", + status=BaselineStatus.INVALID_TYPE, + ) + if not all(pattern.fullmatch(item) for item in values): + raise BaselineValidationError( + f"Invalid baseline schema at {path}: '{key}' has invalid id format", + status=BaselineStatus.INVALID_TYPE, + ) + return values + + +__all__ = [ + "BASELINE_GENERATOR", + "BASELINE_UNTRUSTED_STATUSES", + "MAX_BASELINE_SIZE_BYTES", + "_BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR", + "BaselineStatus", + "_compute_payload_sha256", + "_load_json_object", + "_optional_str", + "_parse_generator_meta", + "_parse_semver", + "_require_python_tag", + "_require_semver_str", + "_require_sorted_unique_ids", + "_require_str", + "_require_utc_iso8601_z", + "_safe_stat_size", + "_utc_now_z", + "coerce_baseline_status", + "current_python_tag", +] diff --git a/codeclone/blocks.py b/codeclone/blocks/__init__.py similarity index 95% rename from codeclone/blocks.py rename to codeclone/blocks/__init__.py index 9089ff1..d998021 100644 --- a/codeclone/blocks.py +++ b/codeclone/blocks/__init__.py @@ -8,15 +8,15 @@ from typing import TYPE_CHECKING -from .fingerprint import sha1 -from .models import BlockUnit, SegmentUnit -from .normalize import stmt_hashes +from ..analysis.fingerprint import sha1 +from ..analysis.normalizer import stmt_hashes +from ..models import BlockUnit, SegmentUnit if TYPE_CHECKING: import ast from collections.abc import Sequence - from .normalize import NormalizationConfig + from ..analysis.normalizer import NormalizationConfig __all__ = ["BlockUnit", "SegmentUnit", "extract_blocks", "extract_segments"] diff --git a/codeclone/cache.py b/codeclone/cache.py deleted file mode 100644 index 282cf66..0000000 --- a/codeclone/cache.py +++ /dev/null @@ -1,2803 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import os -from collections.abc import Collection -from enum import Enum -from json import JSONDecodeError -from pathlib import Path -from typing import TYPE_CHECKING, Literal, TypedDict, TypeGuard, TypeVar, cast - -from .baseline import current_python_tag -from .cache_io import ( - as_int_or_none as _cache_as_int, -) -from .cache_io import ( - as_object_list as _cache_as_list, -) -from .cache_io import ( - as_str_dict as _cache_as_str_dict, -) -from .cache_io import ( - as_str_or_none as _cache_as_str, -) -from .cache_io import ( - read_json_document, - sign_cache_payload, - verify_cache_payload_signature, - write_json_document_atomically, -) -from .cache_paths import runtime_filepath_from_wire, wire_filepath_from_runtime -from .cache_segments import ( - SegmentReportProjection as _SegmentReportProjection, -) -from .cache_segments import ( - build_segment_report_projection as _build_segment_report_projection, -) -from .cache_segments import ( - decode_segment_report_projection, - encode_segment_report_projection, -) -from .contracts import BASELINE_FINGERPRINT_VERSION, CACHE_VERSION -from .errors import CacheError -from .models import ( - BlockGroupItem, - BlockUnit, - ClassMetrics, - DeadCandidate, - FileMetrics, - FunctionGroupItem, - ModuleApiSurface, - ModuleDep, - ModuleDocstringCoverage, - ModuleTypingCoverage, - SegmentGroupItem, - SegmentUnit, - StructuralFindingGroup, - StructuralFindingOccurrence, - Unit, -) -from .structural_findings import normalize_structural_finding_group - -if TYPE_CHECKING: - from collections.abc import Callable, Mapping, Sequence - -SegmentReportProjection = _SegmentReportProjection -build_segment_report_projection = _build_segment_report_projection -_as_str = _cache_as_str -_as_int = _cache_as_int -_as_list = _cache_as_list -_as_str_dict = _cache_as_str_dict - -MAX_CACHE_SIZE_BYTES = 50 * 1024 * 1024 -LEGACY_CACHE_SECRET_FILENAME = ".cache_secret" -_DEFAULT_WIRE_UNIT_FLOW_PROFILES = ( - 0, - "none", - False, - "fallthrough", - "none", - "none", -) - - -class CacheStatus(str, Enum): - OK = "ok" - MISSING = "missing" - TOO_LARGE = "too_large" - UNREADABLE = "unreadable" - INVALID_JSON = "invalid_json" - INVALID_TYPE = "invalid_type" - VERSION_MISMATCH = "version_mismatch" - PYTHON_TAG_MISMATCH = "python_tag_mismatch" - FINGERPRINT_MISMATCH = "mismatch_fingerprint_version" - ANALYSIS_PROFILE_MISMATCH = "analysis_profile_mismatch" - INTEGRITY_FAILED = "integrity_failed" - - -class FileStat(TypedDict): - mtime_ns: int - size: int - - -class SourceStatsDict(TypedDict): - lines: int - functions: int - methods: int - classes: int - - -UnitDict = FunctionGroupItem -BlockDict = BlockGroupItem -SegmentDict = SegmentGroupItem - - -class ClassMetricsDictBase(TypedDict): - qualname: str - filepath: str - start_line: int - end_line: int - cbo: int - lcom4: int - method_count: int - instance_var_count: int - risk_coupling: str - risk_cohesion: str - - -class ClassMetricsDict(ClassMetricsDictBase, total=False): - coupled_classes: list[str] - - -class ModuleDepDict(TypedDict): - source: str - target: str - import_type: str - line: int - - -class DeadCandidateDictBase(TypedDict): - qualname: str - local_name: str - filepath: str - start_line: int - end_line: int - kind: str - - -class DeadCandidateDict(DeadCandidateDictBase, total=False): - suppressed_rules: list[str] - - -class ModuleTypingCoverageDict(TypedDict): - module: str - filepath: str - callable_count: int - params_total: int - params_annotated: int - returns_total: int - returns_annotated: int - any_annotation_count: int - - -class ModuleDocstringCoverageDict(TypedDict): - module: str - filepath: str - public_symbol_total: int - public_symbol_documented: int - - -class ApiParamSpecDict(TypedDict): - name: str - kind: str - has_default: bool - annotation_hash: str - - -class PublicSymbolDict(TypedDict): - qualname: str - kind: str - start_line: int - end_line: int - params: list[ApiParamSpecDict] - returns_hash: str - exported_via: str - - -class ModuleApiSurfaceDict(TypedDict): - module: str - filepath: str - all_declared: list[str] - symbols: list[PublicSymbolDict] - - -class StructuralFindingOccurrenceDict(TypedDict): - qualname: str - start: int - end: int - - -class StructuralFindingGroupDict(TypedDict): - finding_kind: str - finding_key: str - signature: dict[str, str] - items: list[StructuralFindingOccurrenceDict] - - -class CacheEntryBase(TypedDict): - stat: FileStat - units: list[UnitDict] - blocks: list[BlockDict] - segments: list[SegmentDict] - - -class CacheEntry(CacheEntryBase, total=False): - source_stats: SourceStatsDict - class_metrics: list[ClassMetricsDict] - module_deps: list[ModuleDepDict] - dead_candidates: list[DeadCandidateDict] - referenced_names: list[str] - referenced_qualnames: list[str] - import_names: list[str] - class_names: list[str] - typing_coverage: ModuleTypingCoverageDict - docstring_coverage: ModuleDocstringCoverageDict - api_surface: ModuleApiSurfaceDict - structural_findings: list[StructuralFindingGroupDict] - - -class AnalysisProfile(TypedDict): - min_loc: int - min_stmt: int - block_min_loc: int - block_min_stmt: int - segment_min_loc: int - segment_min_stmt: int - collect_api_surface: bool - - -class CacheData(TypedDict): - version: str - python_tag: str - fingerprint_version: str - analysis_profile: AnalysisProfile - files: dict[str, CacheEntry] - - -def _normalize_cached_structural_group( - group: StructuralFindingGroupDict, - *, - filepath: str, -) -> StructuralFindingGroupDict | None: - signature = dict(group["signature"]) - finding_kind = group["finding_kind"] - finding_key = group["finding_key"] - normalized = normalize_structural_finding_group( - StructuralFindingGroup( - finding_kind=finding_kind, - finding_key=finding_key, - signature=signature, - items=tuple( - StructuralFindingOccurrence( - finding_kind=finding_kind, - finding_key=finding_key, - file_path=filepath, - qualname=item["qualname"], - start=item["start"], - end=item["end"], - signature=signature, - ) - for item in group["items"] - ), - ) - ) - if normalized is None: - return None - return StructuralFindingGroupDict( - finding_kind=normalized.finding_kind, - finding_key=normalized.finding_key, - signature=dict(normalized.signature), - items=[ - StructuralFindingOccurrenceDict( - qualname=item.qualname, - start=item.start, - end=item.end, - ) - for item in normalized.items - ], - ) - - -def _normalize_cached_structural_groups( - groups: Sequence[StructuralFindingGroupDict], - *, - filepath: str, -) -> list[StructuralFindingGroupDict]: - normalized = [ - candidate - for candidate in ( - _normalize_cached_structural_group(group, filepath=filepath) - for group in groups - ) - if candidate is not None - ] - normalized.sort(key=lambda group: (-len(group["items"]), group["finding_key"])) - return normalized - - -_DecodedItemT = TypeVar("_DecodedItemT") -_ValidatedItemT = TypeVar("_ValidatedItemT") - - -class Cache: - __slots__ = ( - "_canonical_runtime_paths", - "_dirty", - "analysis_profile", - "cache_schema_version", - "data", - "fingerprint_version", - "legacy_secret_warning", - "load_status", - "load_warning", - "max_size_bytes", - "path", - "root", - "segment_report_projection", - ) - - _CACHE_VERSION = CACHE_VERSION - - def __init__( - self, - path: str | Path, - *, - root: str | Path | None = None, - max_size_bytes: int | None = None, - min_loc: int = 10, - min_stmt: int = 6, - block_min_loc: int = 20, - block_min_stmt: int = 8, - segment_min_loc: int = 20, - segment_min_stmt: int = 10, - collect_api_surface: bool = False, - ): - self.path = Path(path) - self.root = _resolve_root(root) - self.fingerprint_version = BASELINE_FINGERPRINT_VERSION - self.analysis_profile: AnalysisProfile = { - "min_loc": min_loc, - "min_stmt": min_stmt, - "block_min_loc": block_min_loc, - "block_min_stmt": block_min_stmt, - "segment_min_loc": segment_min_loc, - "segment_min_stmt": segment_min_stmt, - "collect_api_surface": collect_api_surface, - } - self.data: CacheData = _empty_cache_data( - version=self._CACHE_VERSION, - python_tag=current_python_tag(), - fingerprint_version=self.fingerprint_version, - analysis_profile=self.analysis_profile, - ) - self._canonical_runtime_paths: set[str] = set() - self.legacy_secret_warning = self._detect_legacy_secret_warning() - self.cache_schema_version: str | None = None - self.load_status = CacheStatus.MISSING - self.load_warning: str | None = self.legacy_secret_warning - self.max_size_bytes = ( - MAX_CACHE_SIZE_BYTES if max_size_bytes is None else max_size_bytes - ) - self.segment_report_projection: SegmentReportProjection | None = None - self._dirty: bool = True # new cache is dirty until loaded from disk - - def _detect_legacy_secret_warning(self) -> str | None: - secret_path = self.path.parent / LEGACY_CACHE_SECRET_FILENAME - try: - if secret_path.exists(): - return ( - f"Legacy cache secret file detected at {secret_path}; " - "delete this obsolete file." - ) - except OSError as e: - return f"Legacy cache secret check failed: {e}" - return None - - def _set_load_warning(self, message: str | None) -> None: - warning = message - if warning is None: - warning = self.legacy_secret_warning - elif self.legacy_secret_warning: - warning = f"{warning}\n{self.legacy_secret_warning}" - self.load_warning = warning - - def _ignore_cache( - self, - message: str, - *, - status: CacheStatus, - schema_version: str | None = None, - ) -> None: - self._set_load_warning(message) - self.load_status = status - self.cache_schema_version = schema_version - self.data = _empty_cache_data( - version=self._CACHE_VERSION, - python_tag=current_python_tag(), - fingerprint_version=self.fingerprint_version, - analysis_profile=self.analysis_profile, - ) - self._canonical_runtime_paths = set() - self.segment_report_projection = None - - def _reject_cache_load( - self, - message: str, - *, - status: CacheStatus, - schema_version: str | None = None, - ) -> CacheData | None: - self._ignore_cache( - message, - status=status, - schema_version=schema_version, - ) - return None - - def _reject_invalid_cache_format( - self, - *, - schema_version: str | None = None, - ) -> CacheData | None: - return self._reject_cache_load( - "Cache format invalid; ignoring cache.", - status=CacheStatus.INVALID_TYPE, - schema_version=schema_version, - ) - - def _reject_version_mismatch(self, version: str) -> CacheData | None: - return self._reject_cache_load( - f"Cache version mismatch (found {version}); ignoring cache.", - status=CacheStatus.VERSION_MISMATCH, - schema_version=version, - ) - - def load(self) -> None: - try: - exists = self.path.exists() - except OSError as e: - self._ignore_cache( - f"Cache unreadable; ignoring cache: {e}", - status=CacheStatus.UNREADABLE, - ) - return - - if not exists: - self._set_load_warning(None) - self.load_status = CacheStatus.MISSING - self.cache_schema_version = None - self._canonical_runtime_paths = set() - self.segment_report_projection = None - return - - try: - size = self.path.stat().st_size - if size > self.max_size_bytes: - self._ignore_cache( - "Cache file too large " - f"({size} bytes, max {self.max_size_bytes}); ignoring cache.", - status=CacheStatus.TOO_LARGE, - ) - return - - raw_obj = read_json_document(self.path) - parsed = self._load_and_validate(raw_obj) - if parsed is None: - return - self.data = parsed - self._canonical_runtime_paths = set(parsed["files"].keys()) - self.load_status = CacheStatus.OK - self._set_load_warning(None) - self._dirty = False # freshly loaded — nothing to persist - - except OSError as e: - self._ignore_cache( - f"Cache unreadable; ignoring cache: {e}", - status=CacheStatus.UNREADABLE, - ) - except JSONDecodeError: - self._ignore_cache( - "Cache corrupted; ignoring cache.", - status=CacheStatus.INVALID_JSON, - ) - - def _load_and_validate(self, raw_obj: object) -> CacheData | None: - raw = _as_str_dict(raw_obj) - if raw is None: - return self._reject_invalid_cache_format() - - # Legacy cache format: top-level {version, files, _signature}. - legacy_version = _as_str(raw.get("version")) - if legacy_version is not None: - return self._reject_version_mismatch(legacy_version) - - version = _as_str(raw.get("v")) - if version is None: - return self._reject_invalid_cache_format() - - if version != self._CACHE_VERSION: - return self._reject_version_mismatch(version) - - sig = _as_str(raw.get("sig")) - payload_obj = raw.get("payload") - payload = _as_str_dict(payload_obj) - if sig is None or payload is None: - return self._reject_invalid_cache_format(schema_version=version) - - if not verify_cache_payload_signature(payload, sig): - return self._reject_cache_load( - "Cache signature mismatch; ignoring cache.", - status=CacheStatus.INTEGRITY_FAILED, - schema_version=version, - ) - - runtime_tag = current_python_tag() - py_tag = _as_str(payload.get("py")) - if py_tag is None: - return self._reject_invalid_cache_format(schema_version=version) - - if py_tag != runtime_tag: - return self._reject_cache_load( - "Cache python tag mismatch " - f"(found {py_tag}, expected {runtime_tag}); ignoring cache.", - status=CacheStatus.PYTHON_TAG_MISMATCH, - schema_version=version, - ) - - fp_version = _as_str(payload.get("fp")) - if fp_version is None: - return self._reject_invalid_cache_format(schema_version=version) - - if fp_version != self.fingerprint_version: - return self._reject_cache_load( - "Cache fingerprint version mismatch " - f"(found {fp_version}, expected {self.fingerprint_version}); " - "ignoring cache.", - status=CacheStatus.FINGERPRINT_MISMATCH, - schema_version=version, - ) - - analysis_profile = _as_analysis_profile(payload.get("ap")) - if analysis_profile is None: - return self._reject_invalid_cache_format(schema_version=version) - - if analysis_profile != self.analysis_profile: - return self._reject_cache_load( - "Cache analysis profile mismatch " - f"(found min_loc={analysis_profile['min_loc']}, " - f"min_stmt={analysis_profile['min_stmt']}, " - "collect_api_surface=" - f"{str(analysis_profile['collect_api_surface']).lower()}; " - f"expected min_loc={self.analysis_profile['min_loc']}, " - f"min_stmt={self.analysis_profile['min_stmt']}, " - "collect_api_surface=" - f"{str(self.analysis_profile['collect_api_surface']).lower()}); " - "ignoring cache.", - status=CacheStatus.ANALYSIS_PROFILE_MISMATCH, - schema_version=version, - ) - - files_obj = payload.get("files") - files_dict = _as_str_dict(files_obj) - if files_dict is None: - return self._reject_invalid_cache_format(schema_version=version) - - parsed_files: dict[str, CacheEntry] = {} - for wire_path, file_entry_obj in files_dict.items(): - runtime_path = runtime_filepath_from_wire(wire_path, root=self.root) - parsed_entry = self._decode_entry(file_entry_obj, runtime_path) - if parsed_entry is None: - return self._reject_invalid_cache_format(schema_version=version) - parsed_files[runtime_path] = _canonicalize_cache_entry(parsed_entry) - self.segment_report_projection = decode_segment_report_projection( - payload.get("sr"), - root=self.root, - ) - - self.cache_schema_version = version - return CacheData( - version=self._CACHE_VERSION, - python_tag=runtime_tag, - fingerprint_version=self.fingerprint_version, - analysis_profile=self.analysis_profile, - files=parsed_files, - ) - - def save(self) -> None: - if not self._dirty: - return - try: - wire_files: dict[str, object] = {} - wire_map = { - rp: wire_filepath_from_runtime(rp, root=self.root) - for rp in self.data["files"] - } - for runtime_path in sorted(self.data["files"], key=wire_map.__getitem__): - entry = self.get_file_entry(runtime_path) - if entry is None: - continue - wire_files[wire_map[runtime_path]] = self._encode_entry(entry) - - payload: dict[str, object] = { - "py": current_python_tag(), - "fp": self.fingerprint_version, - "ap": self.analysis_profile, - "files": wire_files, - } - segment_projection = encode_segment_report_projection( - self.segment_report_projection, - root=self.root, - ) - if segment_projection is not None: - payload["sr"] = segment_projection - signed_doc = { - "v": self._CACHE_VERSION, - "payload": payload, - "sig": sign_cache_payload(payload), - } - write_json_document_atomically(self.path, signed_doc) - self._dirty = False - - self.data["version"] = self._CACHE_VERSION - self.data["python_tag"] = current_python_tag() - self.data["fingerprint_version"] = self.fingerprint_version - self.data["analysis_profile"] = self.analysis_profile - - except OSError as e: - raise CacheError(f"Failed to save cache: {e}") from e - - @staticmethod - def _decode_entry(value: object, filepath: str) -> CacheEntry | None: - return _decode_wire_file_entry(value, filepath) - - @staticmethod - def _encode_entry(entry: CacheEntry) -> dict[str, object]: - return _encode_wire_file_entry(entry) - - def _store_canonical_file_entry( - self, - *, - runtime_path: str, - canonical_entry: CacheEntry, - ) -> CacheEntry: - previous_entry = self.data["files"].get(runtime_path) - was_canonical = runtime_path in self._canonical_runtime_paths - self.data["files"][runtime_path] = canonical_entry - self._canonical_runtime_paths.add(runtime_path) - if not was_canonical or previous_entry != canonical_entry: - self._dirty = True - return canonical_entry - - def get_file_entry(self, filepath: str) -> CacheEntry | None: - runtime_lookup_key = filepath - entry_obj = self.data["files"].get(runtime_lookup_key) - if entry_obj is None: - wire_key = wire_filepath_from_runtime(filepath, root=self.root) - runtime_lookup_key = runtime_filepath_from_wire(wire_key, root=self.root) - entry_obj = self.data["files"].get(runtime_lookup_key) - - if entry_obj is None: - return None - - if runtime_lookup_key in self._canonical_runtime_paths: - if _is_canonical_cache_entry(entry_obj): - return entry_obj - self._canonical_runtime_paths.discard(runtime_lookup_key) - - if not isinstance(entry_obj, dict): - return None - entry = entry_obj - - required = {"stat", "units", "blocks", "segments"} - if not required.issubset(entry.keys()): - return None - - stat = _as_file_stat_dict(entry.get("stat")) - units = _as_typed_unit_list(entry.get("units")) - blocks = _as_typed_block_list(entry.get("blocks")) - segments = _as_typed_segment_list(entry.get("segments")) - if stat is None or units is None or blocks is None or segments is None: - return None - - optional_sections = _decode_optional_cache_sections(entry) - if optional_sections is None: - return None - ( - class_metrics_raw, - module_deps_raw, - dead_candidates_raw, - referenced_names_raw, - referenced_qualnames_raw, - import_names_raw, - class_names_raw, - typing_coverage_raw, - docstring_coverage_raw, - api_surface_raw, - source_stats, - structural_findings, - ) = optional_sections - - entry_to_canonicalize: CacheEntry = _attach_optional_cache_sections( - CacheEntry( - stat=stat, - units=units, - blocks=blocks, - segments=segments, - class_metrics=class_metrics_raw, - module_deps=module_deps_raw, - dead_candidates=dead_candidates_raw, - referenced_names=referenced_names_raw, - referenced_qualnames=referenced_qualnames_raw, - import_names=import_names_raw, - class_names=class_names_raw, - ), - typing_coverage=typing_coverage_raw, - docstring_coverage=docstring_coverage_raw, - api_surface=api_surface_raw, - source_stats=source_stats, - structural_findings=structural_findings, - ) - canonical_entry = _canonicalize_cache_entry(entry_to_canonicalize) - return self._store_canonical_file_entry( - runtime_path=runtime_lookup_key, - canonical_entry=canonical_entry, - ) - - def put_file_entry( - self, - filepath: str, - stat_sig: FileStat, - units: list[Unit], - blocks: list[BlockUnit], - segments: list[SegmentUnit], - *, - source_stats: SourceStatsDict | None = None, - file_metrics: FileMetrics | None = None, - structural_findings: list[StructuralFindingGroup] | None = None, - ) -> None: - runtime_path = runtime_filepath_from_wire( - wire_filepath_from_runtime(filepath, root=self.root), - root=self.root, - ) - - unit_rows = [_unit_dict_from_model(unit, runtime_path) for unit in units] - block_rows = [_block_dict_from_model(block, runtime_path) for block in blocks] - segment_rows = [ - _segment_dict_from_model(segment, runtime_path) for segment in segments - ] - - ( - class_metrics_rows, - module_dep_rows, - dead_candidate_rows, - referenced_names, - referenced_qualnames, - import_names, - class_names, - typing_coverage, - docstring_coverage, - api_surface, - ) = _new_optional_metrics_payload() - if file_metrics is not None: - class_metrics_rows = [ - _class_metrics_dict_from_model(metric, runtime_path) - for metric in file_metrics.class_metrics - ] - module_dep_rows = [ - _module_dep_dict_from_model(dep) for dep in file_metrics.module_deps - ] - dead_candidate_rows = [ - _dead_candidate_dict_from_model(candidate, runtime_path) - for candidate in file_metrics.dead_candidates - ] - referenced_names = sorted(set(file_metrics.referenced_names)) - referenced_qualnames = sorted(set(file_metrics.referenced_qualnames)) - import_names = sorted(set(file_metrics.import_names)) - class_names = sorted(set(file_metrics.class_names)) - typing_coverage = _typing_coverage_dict_from_model( - file_metrics.typing_coverage, - filepath=runtime_path, - ) - docstring_coverage = _docstring_coverage_dict_from_model( - file_metrics.docstring_coverage, - filepath=runtime_path, - ) - api_surface = _api_surface_dict_from_model( - file_metrics.api_surface, - filepath=runtime_path, - ) - - source_stats_payload = source_stats or SourceStatsDict( - lines=0, - functions=0, - methods=0, - classes=0, - ) - entry_dict = CacheEntry( - stat=stat_sig, - source_stats=source_stats_payload, - units=unit_rows, - blocks=block_rows, - segments=segment_rows, - class_metrics=class_metrics_rows, - module_deps=module_dep_rows, - dead_candidates=dead_candidate_rows, - referenced_names=referenced_names, - referenced_qualnames=referenced_qualnames, - import_names=import_names, - class_names=class_names, - ) - if typing_coverage is not None: - entry_dict["typing_coverage"] = typing_coverage - if docstring_coverage is not None: - entry_dict["docstring_coverage"] = docstring_coverage - if api_surface is not None: - entry_dict["api_surface"] = api_surface - if structural_findings is not None: - entry_dict["structural_findings"] = _normalize_cached_structural_groups( - [ - _structural_group_dict_from_model(group) - for group in structural_findings - ], - filepath=runtime_path, - ) - canonical_entry = _canonicalize_cache_entry(entry_dict) - self._store_canonical_file_entry( - runtime_path=runtime_path, - canonical_entry=canonical_entry, - ) - - -def file_stat_signature(path: str) -> FileStat: - st = os.stat(path) - return FileStat( - mtime_ns=st.st_mtime_ns, - size=st.st_size, - ) - - -def _empty_cache_data( - *, - version: str, - python_tag: str, - fingerprint_version: str, - analysis_profile: AnalysisProfile, -) -> CacheData: - return CacheData( - version=version, - python_tag=python_tag, - fingerprint_version=fingerprint_version, - analysis_profile=analysis_profile, - files={}, - ) - - -def _as_risk_literal(value: object) -> Literal["low", "medium", "high"] | None: - match value: - case "low": - return "low" - case "medium": - return "medium" - case "high": - return "high" - case _: - return None - - -def _new_optional_metrics_payload() -> tuple[ - list[ClassMetricsDict], - list[ModuleDepDict], - list[DeadCandidateDict], - list[str], - list[str], - list[str], - list[str], - ModuleTypingCoverageDict | None, - ModuleDocstringCoverageDict | None, - ModuleApiSurfaceDict | None, -]: - return [], [], [], [], [], [], [], None, None, None - - -def _unit_dict_from_model(unit: Unit, filepath: str) -> UnitDict: - return FunctionGroupItem( - qualname=unit.qualname, - filepath=filepath, - start_line=unit.start_line, - end_line=unit.end_line, - loc=unit.loc, - stmt_count=unit.stmt_count, - fingerprint=unit.fingerprint, - loc_bucket=unit.loc_bucket, - cyclomatic_complexity=unit.cyclomatic_complexity, - nesting_depth=unit.nesting_depth, - risk=unit.risk, - raw_hash=unit.raw_hash, - entry_guard_count=unit.entry_guard_count, - entry_guard_terminal_profile=unit.entry_guard_terminal_profile, - entry_guard_has_side_effect_before=unit.entry_guard_has_side_effect_before, - terminal_kind=unit.terminal_kind, - try_finally_profile=unit.try_finally_profile, - side_effect_order_profile=unit.side_effect_order_profile, - ) - - -def _block_dict_from_model(block: BlockUnit, filepath: str) -> BlockDict: - return BlockGroupItem( - block_hash=block.block_hash, - filepath=filepath, - qualname=block.qualname, - start_line=block.start_line, - end_line=block.end_line, - size=block.size, - ) - - -def _segment_dict_from_model(segment: SegmentUnit, filepath: str) -> SegmentDict: - return SegmentGroupItem( - segment_hash=segment.segment_hash, - segment_sig=segment.segment_sig, - filepath=filepath, - qualname=segment.qualname, - start_line=segment.start_line, - end_line=segment.end_line, - size=segment.size, - ) - - -def _typing_coverage_dict_from_model( - coverage: ModuleTypingCoverage | None, - *, - filepath: str, -) -> ModuleTypingCoverageDict | None: - if coverage is None: - return None - return ModuleTypingCoverageDict( - module=coverage.module, - filepath=filepath, - callable_count=coverage.callable_count, - params_total=coverage.params_total, - params_annotated=coverage.params_annotated, - returns_total=coverage.returns_total, - returns_annotated=coverage.returns_annotated, - any_annotation_count=coverage.any_annotation_count, - ) - - -def _docstring_coverage_dict_from_model( - coverage: ModuleDocstringCoverage | None, - *, - filepath: str, -) -> ModuleDocstringCoverageDict | None: - if coverage is None: - return None - return ModuleDocstringCoverageDict( - module=coverage.module, - filepath=filepath, - public_symbol_total=coverage.public_symbol_total, - public_symbol_documented=coverage.public_symbol_documented, - ) - - -def _api_surface_dict_from_model( - surface: ModuleApiSurface | None, - *, - filepath: str, -) -> ModuleApiSurfaceDict | None: - if surface is None: - return None - return ModuleApiSurfaceDict( - module=surface.module, - filepath=filepath, - all_declared=list(surface.all_declared or ()), - symbols=[ - PublicSymbolDict( - qualname=symbol.qualname, - kind=symbol.kind, - start_line=symbol.start_line, - end_line=symbol.end_line, - params=[ - ApiParamSpecDict( - name=param.name, - kind=param.kind, - has_default=param.has_default, - annotation_hash=param.annotation_hash, - ) - for param in symbol.params - ], - returns_hash=symbol.returns_hash, - exported_via=symbol.exported_via, - ) - for symbol in surface.symbols - ], - ) - - -def _class_metrics_dict_from_model( - metric: ClassMetrics, - filepath: str, -) -> ClassMetricsDict: - return ClassMetricsDict( - qualname=metric.qualname, - filepath=filepath, - start_line=metric.start_line, - end_line=metric.end_line, - cbo=metric.cbo, - lcom4=metric.lcom4, - method_count=metric.method_count, - instance_var_count=metric.instance_var_count, - risk_coupling=metric.risk_coupling, - risk_cohesion=metric.risk_cohesion, - coupled_classes=sorted(set(metric.coupled_classes)), - ) - - -def _module_dep_dict_from_model(dep: ModuleDep) -> ModuleDepDict: - return ModuleDepDict( - source=dep.source, - target=dep.target, - import_type=dep.import_type, - line=dep.line, - ) - - -def _dead_candidate_dict_from_model( - candidate: DeadCandidate, - filepath: str, -) -> DeadCandidateDict: - result = DeadCandidateDict( - qualname=candidate.qualname, - local_name=candidate.local_name, - filepath=filepath, - start_line=candidate.start_line, - end_line=candidate.end_line, - kind=candidate.kind, - ) - if candidate.suppressed_rules: - result["suppressed_rules"] = sorted(set(candidate.suppressed_rules)) - return result - - -def _structural_occurrence_dict_from_model( - occurrence: StructuralFindingOccurrence, -) -> StructuralFindingOccurrenceDict: - return StructuralFindingOccurrenceDict( - qualname=occurrence.qualname, - start=occurrence.start, - end=occurrence.end, - ) - - -def _structural_group_dict_from_model( - group: StructuralFindingGroup, -) -> StructuralFindingGroupDict: - return StructuralFindingGroupDict( - finding_kind=group.finding_kind, - finding_key=group.finding_key, - signature=dict(group.signature), - items=[ - _structural_occurrence_dict_from_model(occurrence) - for occurrence in group.items - ], - ) - - -def _as_file_stat_dict(value: object) -> FileStat | None: - if not _is_file_stat_dict(value): - return None - obj = cast("Mapping[str, object]", value) - mtime_ns = obj.get("mtime_ns") - size = obj.get("size") - if not isinstance(mtime_ns, int) or not isinstance(size, int): - return None - return FileStat(mtime_ns=mtime_ns, size=size) - - -def _as_source_stats_dict(value: object) -> SourceStatsDict | None: - if not _is_source_stats_dict(value): - return None - obj = cast("Mapping[str, object]", value) - lines = obj.get("lines") - functions = obj.get("functions") - methods = obj.get("methods") - classes = obj.get("classes") - assert isinstance(lines, int) - assert isinstance(functions, int) - assert isinstance(methods, int) - assert isinstance(classes, int) - return SourceStatsDict( - lines=lines, - functions=functions, - methods=methods, - classes=classes, - ) - - -def _as_typed_list( - value: object, - *, - predicate: Callable[[object], bool], -) -> list[_ValidatedItemT] | None: - if not isinstance(value, list): - return None - if not all(predicate(item) for item in value): - return None - return cast("list[_ValidatedItemT]", value) - - -def _as_typed_unit_list(value: object) -> list[UnitDict] | None: - return _as_typed_list(value, predicate=_is_unit_dict) - - -def _as_typed_block_list(value: object) -> list[BlockDict] | None: - return _as_typed_list(value, predicate=_is_block_dict) - - -def _as_typed_segment_list(value: object) -> list[SegmentDict] | None: - return _as_typed_list(value, predicate=_is_segment_dict) - - -def _as_typed_class_metrics_list(value: object) -> list[ClassMetricsDict] | None: - return _as_typed_list(value, predicate=_is_class_metrics_dict) - - -def _as_typed_dead_candidates_list( - value: object, -) -> list[DeadCandidateDict] | None: - return _as_typed_list(value, predicate=_is_dead_candidate_dict) - - -def _as_typed_module_deps_list(value: object) -> list[ModuleDepDict] | None: - return _as_typed_list(value, predicate=_is_module_dep_dict) - - -def _as_typed_string_list(value: object) -> list[str] | None: - return _as_typed_list(value, predicate=lambda item: isinstance(item, str)) - - -def _as_module_typing_coverage_dict( - value: object, -) -> ModuleTypingCoverageDict | None: - if not _is_module_typing_coverage_dict(value): - return None - return cast("ModuleTypingCoverageDict", value) - - -def _as_module_docstring_coverage_dict( - value: object, -) -> ModuleDocstringCoverageDict | None: - if not _is_module_docstring_coverage_dict(value): - return None - return cast("ModuleDocstringCoverageDict", value) - - -def _as_module_api_surface_dict(value: object) -> ModuleApiSurfaceDict | None: - if not _is_module_api_surface_dict(value): - return None - return cast("ModuleApiSurfaceDict", value) - - -def _normalized_optional_string_list(value: object) -> list[str] | None: - items = _as_typed_string_list(value) - if not items: - return None - return sorted(set(items)) - - -def _is_canonical_cache_entry(value: object) -> TypeGuard[CacheEntry]: - return isinstance(value, dict) and _has_cache_entry_container_shape(value) - - -def _has_cache_entry_container_shape(entry: Mapping[str, object]) -> bool: - required = {"stat", "units", "blocks", "segments"} - if not required.issubset(entry.keys()): - return False - if not isinstance(entry.get("stat"), dict): - return False - if not isinstance(entry.get("units"), list): - return False - if not isinstance(entry.get("blocks"), list): - return False - if not isinstance(entry.get("segments"), list): - return False - source_stats = entry.get("source_stats") - if source_stats is not None and not _is_source_stats_dict(source_stats): - return False - optional_list_keys = ( - "class_metrics", - "module_deps", - "dead_candidates", - "referenced_names", - "referenced_qualnames", - "import_names", - "class_names", - "structural_findings", - ) - if not all(isinstance(entry.get(key, []), list) for key in optional_list_keys): - return False - typing_coverage = entry.get("typing_coverage") - if typing_coverage is not None and not _is_module_typing_coverage_dict( - typing_coverage - ): - return False - docstring_coverage = entry.get("docstring_coverage") - if docstring_coverage is not None and not _is_module_docstring_coverage_dict( - docstring_coverage - ): - return False - api_surface = entry.get("api_surface") - return api_surface is None or _is_module_api_surface_dict(api_surface) - - -def _decode_optional_cache_sections( - entry: Mapping[str, object], -) -> ( - tuple[ - list[ClassMetricsDict], - list[ModuleDepDict], - list[DeadCandidateDict], - list[str], - list[str], - list[str], - list[str], - ModuleTypingCoverageDict | None, - ModuleDocstringCoverageDict | None, - ModuleApiSurfaceDict | None, - SourceStatsDict | None, - list[StructuralFindingGroupDict] | None, - ] - | None -): - class_metrics_raw = _as_typed_class_metrics_list(entry.get("class_metrics", [])) - module_deps_raw = _as_typed_module_deps_list(entry.get("module_deps", [])) - dead_candidates_raw = _as_typed_dead_candidates_list( - entry.get("dead_candidates", []) - ) - referenced_names_raw = _as_typed_string_list(entry.get("referenced_names", [])) - referenced_qualnames_raw = _as_typed_string_list( - entry.get("referenced_qualnames", []) - ) - import_names_raw = _as_typed_string_list(entry.get("import_names", [])) - class_names_raw = _as_typed_string_list(entry.get("class_names", [])) - if ( - class_metrics_raw is None - or module_deps_raw is None - or dead_candidates_raw is None - or referenced_names_raw is None - or referenced_qualnames_raw is None - or import_names_raw is None - or class_names_raw is None - ): - return None - typing_coverage_raw = _as_module_typing_coverage_dict(entry.get("typing_coverage")) - docstring_coverage_raw = _as_module_docstring_coverage_dict( - entry.get("docstring_coverage") - ) - api_surface_raw = _as_module_api_surface_dict(entry.get("api_surface")) - source_stats = _as_source_stats_dict(entry.get("source_stats")) - structural_findings = entry.get("structural_findings") - typed_structural_findings = ( - structural_findings if isinstance(structural_findings, list) else None - ) - return ( - class_metrics_raw, - module_deps_raw, - dead_candidates_raw, - referenced_names_raw, - referenced_qualnames_raw, - import_names_raw, - class_names_raw, - typing_coverage_raw, - docstring_coverage_raw, - api_surface_raw, - source_stats, - typed_structural_findings, - ) - - -def _attach_optional_cache_sections( - entry: CacheEntry, - *, - typing_coverage: ModuleTypingCoverageDict | None = None, - docstring_coverage: ModuleDocstringCoverageDict | None = None, - api_surface: ModuleApiSurfaceDict | None = None, - source_stats: SourceStatsDict | None = None, - structural_findings: list[StructuralFindingGroupDict] | None = None, -) -> CacheEntry: - if typing_coverage is not None: - entry["typing_coverage"] = typing_coverage - if docstring_coverage is not None: - entry["docstring_coverage"] = docstring_coverage - if api_surface is not None: - entry["api_surface"] = api_surface - if source_stats is not None: - entry["source_stats"] = source_stats - if structural_findings is not None: - entry["structural_findings"] = structural_findings - return entry - - -def _canonicalize_cache_entry(entry: CacheEntry) -> CacheEntry: - class_metrics_sorted = sorted( - entry["class_metrics"], - key=lambda item: ( - item["start_line"], - item["end_line"], - item["qualname"], - ), - ) - for metric in class_metrics_sorted: - coupled_classes = metric.get("coupled_classes", []) - if coupled_classes: - metric["coupled_classes"] = sorted(set(coupled_classes)) - - module_deps_sorted = sorted( - entry["module_deps"], - key=lambda item: ( - item["source"], - item["target"], - item["import_type"], - item["line"], - ), - ) - dead_candidates_normalized: list[DeadCandidateDict] = [] - for candidate in entry["dead_candidates"]: - suppressed_rules = candidate.get("suppressed_rules", []) - normalized_candidate = DeadCandidateDict( - qualname=candidate["qualname"], - local_name=candidate["local_name"], - filepath=candidate["filepath"], - start_line=candidate["start_line"], - end_line=candidate["end_line"], - kind=candidate["kind"], - ) - if _is_string_list(suppressed_rules): - normalized_rules = sorted(set(suppressed_rules)) - if normalized_rules: - normalized_candidate["suppressed_rules"] = normalized_rules - dead_candidates_normalized.append(normalized_candidate) - - dead_candidates_sorted = sorted( - dead_candidates_normalized, - key=lambda item: ( - item["start_line"], - item["end_line"], - item["qualname"], - item["local_name"], - item["kind"], - tuple(item.get("suppressed_rules", [])), - ), - ) - - result: CacheEntry = { - "stat": entry["stat"], - "units": entry["units"], - "blocks": entry["blocks"], - "segments": entry["segments"], - "class_metrics": class_metrics_sorted, - "module_deps": module_deps_sorted, - "dead_candidates": dead_candidates_sorted, - "referenced_names": sorted(set(entry["referenced_names"])), - "referenced_qualnames": sorted(set(entry.get("referenced_qualnames", []))), - "import_names": sorted(set(entry["import_names"])), - "class_names": sorted(set(entry["class_names"])), - } - typing_coverage = entry.get("typing_coverage") - if typing_coverage is not None: - result["typing_coverage"] = ModuleTypingCoverageDict( - module=typing_coverage["module"], - filepath=typing_coverage["filepath"], - callable_count=typing_coverage["callable_count"], - params_total=typing_coverage["params_total"], - params_annotated=typing_coverage["params_annotated"], - returns_total=typing_coverage["returns_total"], - returns_annotated=typing_coverage["returns_annotated"], - any_annotation_count=typing_coverage["any_annotation_count"], - ) - docstring_coverage = entry.get("docstring_coverage") - if docstring_coverage is not None: - result["docstring_coverage"] = ModuleDocstringCoverageDict( - module=docstring_coverage["module"], - filepath=docstring_coverage["filepath"], - public_symbol_total=docstring_coverage["public_symbol_total"], - public_symbol_documented=docstring_coverage["public_symbol_documented"], - ) - api_surface = entry.get("api_surface") - if api_surface is not None: - symbols = sorted( - api_surface["symbols"], - key=lambda item: ( - item["qualname"], - item["kind"], - item["start_line"], - item["end_line"], - ), - ) - normalized_symbols = [ - PublicSymbolDict( - qualname=symbol["qualname"], - kind=symbol["kind"], - start_line=symbol["start_line"], - end_line=symbol["end_line"], - params=[ - ApiParamSpecDict( - name=param["name"], - kind=param["kind"], - has_default=param["has_default"], - annotation_hash=param["annotation_hash"], - ) - for param in symbol.get("params", []) - ], - returns_hash=symbol.get("returns_hash", ""), - exported_via=symbol.get("exported_via", "name"), - ) - for symbol in symbols - ] - result["api_surface"] = ModuleApiSurfaceDict( - module=api_surface["module"], - filepath=api_surface["filepath"], - all_declared=sorted(set(api_surface.get("all_declared", []))), - symbols=normalized_symbols, - ) - sf = entry.get("structural_findings") - if sf is not None: - result["structural_findings"] = sf - source_stats = entry.get("source_stats") - if source_stats is not None: - result["source_stats"] = source_stats - return result - - -def _decode_wire_qualname_span( - row: list[object], -) -> tuple[str, int, int] | None: - qualname = _as_str(row[0]) - start_line = _as_int(row[1]) - end_line = _as_int(row[2]) - if qualname is None or start_line is None or end_line is None: - return None - return qualname, start_line, end_line - - -def _decode_wire_qualname_span_size( - row: list[object], -) -> tuple[str, int, int, int] | None: - qualname_span = _decode_wire_qualname_span(row) - if qualname_span is None: - return None - size = _as_int(row[3]) - if size is None: - return None - qualname, start_line, end_line = qualname_span - return qualname, start_line, end_line, size - - -def _as_analysis_profile(value: object) -> AnalysisProfile | None: - obj = _as_str_dict(value) - if obj is None: - return None - - _REQUIRED = { - "min_loc", - "min_stmt", - "block_min_loc", - "block_min_stmt", - "segment_min_loc", - "segment_min_stmt", - } - if set(obj.keys()) < _REQUIRED: - return None - - min_loc = _as_int(obj.get("min_loc")) - min_stmt = _as_int(obj.get("min_stmt")) - block_min_loc = _as_int(obj.get("block_min_loc")) - block_min_stmt = _as_int(obj.get("block_min_stmt")) - segment_min_loc = _as_int(obj.get("segment_min_loc")) - segment_min_stmt = _as_int(obj.get("segment_min_stmt")) - collect_api_surface_raw = obj.get("collect_api_surface", False) - collect_api_surface = ( - collect_api_surface_raw if isinstance(collect_api_surface_raw, bool) else None - ) - if ( - min_loc is None - or min_stmt is None - or block_min_loc is None - or block_min_stmt is None - or segment_min_loc is None - or segment_min_stmt is None - or collect_api_surface is None - ): - return None - - return AnalysisProfile( - min_loc=min_loc, - min_stmt=min_stmt, - block_min_loc=block_min_loc, - block_min_stmt=block_min_stmt, - segment_min_loc=segment_min_loc, - segment_min_stmt=segment_min_stmt, - collect_api_surface=collect_api_surface, - ) - - -def _decode_wire_stat(obj: dict[str, object]) -> FileStat | None: - stat_list = _as_list(obj.get("st")) - if stat_list is None or len(stat_list) != 2: - return None - mtime_ns = _as_int(stat_list[0]) - size = _as_int(stat_list[1]) - if mtime_ns is None or size is None: - return None - return FileStat(mtime_ns=mtime_ns, size=size) - - -def _decode_optional_wire_source_stats( - *, - obj: dict[str, object], -) -> SourceStatsDict | None: - row = _decode_optional_wire_row(obj=obj, key="ss", expected_len=4) - if row is None: - return None - counts = _decode_wire_int_fields(row, 0, 1, 2, 3) - if counts is None: - return None - lines, functions, methods, classes = counts - if any(value < 0 for value in counts): - return None - return SourceStatsDict( - lines=lines, - functions=functions, - methods=methods, - classes=classes, - ) - - -def _decode_optional_wire_items( - *, - obj: dict[str, object], - key: str, - decode_item: Callable[[object], _DecodedItemT | None], -) -> list[_DecodedItemT] | None: - raw_items = obj.get(key) - if raw_items is None: - return [] - wire_items = _as_list(raw_items) - if wire_items is None: - return None - decoded_items: list[_DecodedItemT] = [] - for wire_item in wire_items: - decoded = decode_item(wire_item) - if decoded is None: - return None - decoded_items.append(decoded) - return decoded_items - - -def _decode_optional_wire_items_for_filepath( - *, - obj: dict[str, object], - key: str, - filepath: str, - decode_item: Callable[[object, str], _DecodedItemT | None], -) -> list[_DecodedItemT] | None: - raw_items = obj.get(key) - if raw_items is None: - return [] - wire_items = _as_list(raw_items) - if wire_items is None: - return None - decoded_items: list[_DecodedItemT] = [] - for wire_item in wire_items: - decoded = decode_item(wire_item, filepath) - if decoded is None: - return None - decoded_items.append(decoded) - return decoded_items - - -def _decode_optional_wire_row( - *, - obj: dict[str, object], - key: str, - expected_len: int, -) -> list[object] | None: - raw = obj.get(key) - if raw is None: - return None - row = _as_list(raw) - if row is None or len(row) != expected_len: - return None - return row - - -def _decode_optional_wire_names( - *, - obj: dict[str, object], - key: str, -) -> list[str] | None: - raw_names = obj.get(key) - if raw_names is None: - return [] - names = _as_list(raw_names) - if names is None or not all(isinstance(name, str) for name in names): - return None - return [str(name) for name in names] - - -def _decode_optional_wire_coupled_classes( - *, - obj: dict[str, object], - key: str, -) -> dict[str, list[str]] | None: - raw = obj.get(key) - if raw is None: - return {} - - rows = _as_list(raw) - if rows is None: - return None - - decoded: dict[str, list[str]] = {} - for wire_row in rows: - row = _as_list(wire_row) - if row is None or len(row) != 2: - return None - qualname = _as_str(row[0]) - names = _as_list(row[1]) - if qualname is None or names is None: - return None - if not all(isinstance(name, str) for name in names): - return None - decoded[qualname] = sorted({str(name) for name in names if str(name)}) - - return decoded - - -def _decode_wire_file_entry(value: object, filepath: str) -> CacheEntry | None: - obj = _as_str_dict(value) - if obj is None: - return None - - stat = _decode_wire_stat(obj) - if stat is None: - return None - source_stats = _decode_optional_wire_source_stats(obj=obj) - file_sections = _decode_wire_file_sections(obj=obj, filepath=filepath) - if file_sections is None: - return None - ( - units, - blocks, - segments, - class_metrics, - module_deps, - dead_candidates, - ) = file_sections - name_sections = _decode_wire_name_sections(obj=obj) - if name_sections is None: - return None - ( - referenced_names, - referenced_qualnames, - import_names, - class_names, - ) = name_sections - typing_coverage = _decode_optional_wire_typing_coverage(obj=obj, filepath=filepath) - docstring_coverage = _decode_optional_wire_docstring_coverage( - obj=obj, - filepath=filepath, - ) - api_surface = _decode_optional_wire_api_surface(obj=obj, filepath=filepath) - coupled_classes_map = _decode_optional_wire_coupled_classes(obj=obj, key="cc") - if coupled_classes_map is None: - return None - - for metric in class_metrics: - names = coupled_classes_map.get(metric["qualname"], []) - if names: - metric["coupled_classes"] = names - - has_structural_findings = "sf" in obj - structural_findings = _decode_wire_structural_findings_optional(obj) - if structural_findings is None: - return None - - return _attach_optional_cache_sections( - CacheEntry( - stat=stat, - units=units, - blocks=blocks, - segments=segments, - class_metrics=class_metrics, - module_deps=module_deps, - dead_candidates=dead_candidates, - referenced_names=referenced_names, - referenced_qualnames=referenced_qualnames, - import_names=import_names, - class_names=class_names, - ), - typing_coverage=typing_coverage, - docstring_coverage=docstring_coverage, - api_surface=api_surface, - source_stats=source_stats, - structural_findings=( - _normalize_cached_structural_groups(structural_findings, filepath=filepath) - if has_structural_findings - else None - ), - ) - - -def _decode_wire_file_sections( - *, - obj: dict[str, object], - filepath: str, -) -> ( - tuple[ - list[UnitDict], - list[BlockDict], - list[SegmentDict], - list[ClassMetricsDict], - list[ModuleDepDict], - list[DeadCandidateDict], - ] - | None -): - units = _decode_optional_wire_items_for_filepath( - obj=obj, - key="u", - filepath=filepath, - decode_item=_decode_wire_unit, - ) - blocks = _decode_optional_wire_items_for_filepath( - obj=obj, - key="b", - filepath=filepath, - decode_item=_decode_wire_block, - ) - segments = _decode_optional_wire_items_for_filepath( - obj=obj, - key="s", - filepath=filepath, - decode_item=_decode_wire_segment, - ) - class_metrics = _decode_optional_wire_items_for_filepath( - obj=obj, - key="cm", - filepath=filepath, - decode_item=_decode_wire_class_metric, - ) - module_deps = _decode_optional_wire_items( - obj=obj, - key="md", - decode_item=_decode_wire_module_dep, - ) - dead_candidates = _decode_optional_wire_items_for_filepath( - obj=obj, - key="dc", - filepath=filepath, - decode_item=_decode_wire_dead_candidate, - ) - if ( - units is None - or blocks is None - or segments is None - or class_metrics is None - or module_deps is None - or dead_candidates is None - ): - return None - return ( - units, - blocks, - segments, - class_metrics, - module_deps, - dead_candidates, - ) - - -def _decode_wire_name_sections( - *, - obj: dict[str, object], -) -> tuple[list[str], list[str], list[str], list[str]] | None: - referenced_names = _decode_optional_wire_names(obj=obj, key="rn") - referenced_qualnames = _decode_optional_wire_names(obj=obj, key="rq") - import_names = _decode_optional_wire_names(obj=obj, key="in") - class_names = _decode_optional_wire_names(obj=obj, key="cn") - if ( - referenced_names is None - or referenced_qualnames is None - or import_names is None - or class_names is None - ): - return None - return ( - referenced_names, - referenced_qualnames, - import_names, - class_names, - ) - - -def _decode_optional_wire_typing_coverage( - *, - obj: dict[str, object], - filepath: str, -) -> ModuleTypingCoverageDict | None: - module_and_ints = _decode_optional_wire_module_ints( - obj=obj, - key="tc", - expected_len=7, - int_indexes=(1, 2, 3, 4, 5, 6), - ) - if module_and_ints is None: - return None - module, ints = module_and_ints - ( - callable_count, - params_total, - params_annotated, - returns_total, - returns_annotated, - any_annotation_count, - ) = ints - return ModuleTypingCoverageDict( - module=module, - filepath=filepath, - callable_count=callable_count, - params_total=params_total, - params_annotated=params_annotated, - returns_total=returns_total, - returns_annotated=returns_annotated, - any_annotation_count=any_annotation_count, - ) - - -def _decode_optional_wire_docstring_coverage( - *, - obj: dict[str, object], - filepath: str, -) -> ModuleDocstringCoverageDict | None: - module_and_counts = _decode_optional_wire_module_ints( - obj=obj, - key="dg", - expected_len=3, - int_indexes=(1, 2), - ) - if module_and_counts is None: - return None - module, counts = module_and_counts - public_symbol_total, public_symbol_documented = counts - return ModuleDocstringCoverageDict( - module=module, - filepath=filepath, - public_symbol_total=public_symbol_total, - public_symbol_documented=public_symbol_documented, - ) - - -def _decode_optional_wire_api_surface( - *, - obj: dict[str, object], - filepath: str, -) -> ModuleApiSurfaceDict | None: - row = _decode_optional_wire_row(obj=obj, key="as", expected_len=3) - if row is None: - return None - module = _as_str(row[0]) - all_declared = _decode_optional_wire_names(obj={"ad": row[1]}, key="ad") - symbols_raw = _as_list(row[2]) - if module is None or all_declared is None or symbols_raw is None: - return None - symbols: list[PublicSymbolDict] = [] - for symbol_raw in symbols_raw: - decoded_symbol = _decode_wire_api_surface_symbol(symbol_raw) - if decoded_symbol is None: - return None - symbols.append(decoded_symbol) - return ModuleApiSurfaceDict( - module=module, - filepath=filepath, - all_declared=sorted(set(all_declared)), - symbols=symbols, - ) - - -def _decode_optional_wire_module_ints( - *, - obj: dict[str, object], - key: str, - expected_len: int, - int_indexes: tuple[int, ...], -) -> tuple[str, tuple[int, ...]] | None: - row = _decode_optional_wire_row(obj=obj, key=key, expected_len=expected_len) - if row is None: - return None - module = _as_str(row[0]) - ints = _decode_wire_int_fields(row, *int_indexes) - if module is None or ints is None: - return None - return module, ints - - -def _decode_wire_api_surface_symbol( - value: object, -) -> PublicSymbolDict | None: - symbol_row = _decode_wire_row(value, valid_lengths={7}) - if symbol_row is None: - return None - str_fields = _decode_wire_str_fields(symbol_row, 0, 1, 4, 5) - int_fields = _decode_wire_int_fields(symbol_row, 2, 3) - params_raw = _as_list(symbol_row[6]) - if str_fields is None or int_fields is None or params_raw is None: - return None - qualname, kind, exported_via, returns_hash = str_fields - start_line, end_line = int_fields - params: list[ApiParamSpecDict] = [] - for param_raw in params_raw: - decoded_param = _decode_wire_api_param_spec(param_raw) - if decoded_param is None: - return None - params.append(decoded_param) - return PublicSymbolDict( - qualname=qualname, - kind=kind, - start_line=start_line, - end_line=end_line, - params=params, - returns_hash=returns_hash, - exported_via=exported_via, - ) - - -def _decode_wire_api_param_spec( - value: object, -) -> ApiParamSpecDict | None: - param_row = _decode_wire_row(value, valid_lengths={4}) - if param_row is None: - return None - str_fields = _decode_wire_str_fields(param_row, 0, 1, 3) - int_fields = _decode_wire_int_fields(param_row, 2) - if str_fields is None or int_fields is None: - return None - name, param_kind, annotation_hash = str_fields - (has_default_raw,) = int_fields - return ApiParamSpecDict( - name=name, - kind=param_kind, - has_default=bool(has_default_raw), - annotation_hash=annotation_hash, - ) - - -def _decode_wire_structural_findings_optional( - obj: dict[str, object], -) -> list[StructuralFindingGroupDict] | None: - """Decode optional 'sf' wire key. Returns [] if absent, None on invalid format.""" - raw = obj.get("sf") - if raw is None: - return [] - groups_raw = _as_list(raw) - if groups_raw is None: - return None - groups: list[StructuralFindingGroupDict] = [] - for group_raw in groups_raw: - group = _decode_wire_structural_group(group_raw) - if group is None: - return None - groups.append(group) - return groups - - -def _decode_wire_row( - value: object, - *, - valid_lengths: Collection[int], -) -> list[object] | None: - row = _as_list(value) - if row is None or len(row) not in valid_lengths: - return None - return row - - -def _decode_wire_named_span( - value: object, - *, - valid_lengths: Collection[int], -) -> tuple[list[object], str, int, int] | None: - row = _decode_wire_row(value, valid_lengths=valid_lengths) - if row is None: - return None - span = _decode_wire_qualname_span(row) - if span is None: - return None - qualname, start_line, end_line = span - return row, qualname, start_line, end_line - - -def _decode_wire_named_sized_span( - value: object, - *, - valid_lengths: Collection[int], -) -> tuple[list[object], str, int, int, int] | None: - row = _decode_wire_row(value, valid_lengths=valid_lengths) - if row is None: - return None - span = _decode_wire_qualname_span_size(row) - if span is None: - return None - qualname, start_line, end_line, size = span - return row, qualname, start_line, end_line, size - - -def _decode_wire_int_fields( - row: list[object], - *indexes: int, -) -> tuple[int, ...] | None: - values: list[int] = [] - for index in indexes: - value = _as_int(row[index]) - if value is None: - return None - values.append(value) - return tuple(values) - - -def _decode_wire_str_fields( - row: list[object], - *indexes: int, -) -> tuple[str, ...] | None: - values: list[str] = [] - for index in indexes: - value = _as_str(row[index]) - if value is None: - return None - values.append(value) - return tuple(values) - - -def _decode_wire_unit_core_fields( - row: list[object], -) -> tuple[int, int, str, str, int, int, Literal["low", "medium", "high"], str] | None: - int_fields = _decode_wire_int_fields(row, 3, 4, 7, 8) - str_fields = _decode_wire_str_fields(row, 5, 6, 10) - risk = _as_risk_literal(row[9]) - if int_fields is None or str_fields is None or risk is None: - return None - loc, stmt_count, cyclomatic_complexity, nesting_depth = int_fields - fingerprint, loc_bucket, raw_hash = str_fields - return ( - loc, - stmt_count, - fingerprint, - loc_bucket, - cyclomatic_complexity, - nesting_depth, - risk, - raw_hash, - ) - - -def _decode_wire_unit_flow_profiles( - row: list[object], -) -> tuple[int, str, bool, str, str, str] | None: - if len(row) != 17: - return _DEFAULT_WIRE_UNIT_FLOW_PROFILES - - parsed_entry_guard_count = _as_int(row[11]) - parsed_entry_guard_terminal_profile = _as_str(row[12]) - parsed_entry_guard_has_side_effect_before = _as_int(row[13]) - parsed_terminal_kind = _as_str(row[14]) - parsed_try_finally_profile = _as_str(row[15]) - parsed_side_effect_order_profile = _as_str(row[16]) - if ( - parsed_entry_guard_count is None - or parsed_entry_guard_terminal_profile is None - or parsed_entry_guard_has_side_effect_before is None - or parsed_terminal_kind is None - or parsed_try_finally_profile is None - or parsed_side_effect_order_profile is None - ): - return None - return ( - max(0, parsed_entry_guard_count), - parsed_entry_guard_terminal_profile or "none", - parsed_entry_guard_has_side_effect_before != 0, - parsed_terminal_kind or "fallthrough", - parsed_try_finally_profile or "none", - parsed_side_effect_order_profile or "none", - ) - - -def _decode_wire_class_metric_fields( - row: list[object], -) -> tuple[int, int, int, int, str, str] | None: - int_fields = _decode_wire_int_fields(row, 3, 4, 5, 6) - str_fields = _decode_wire_str_fields(row, 7, 8) - if int_fields is None or str_fields is None: - return None - cbo, lcom4, method_count, instance_var_count = int_fields - risk_coupling, risk_cohesion = str_fields - return ( - cbo, - lcom4, - method_count, - instance_var_count, - risk_coupling, - risk_cohesion, - ) - - -def _decode_wire_structural_group(value: object) -> StructuralFindingGroupDict | None: - group_row = _decode_wire_row(value, valid_lengths={4}) - if group_row is None: - return None - str_fields = _decode_wire_str_fields(group_row, 0, 1) - items_raw = _as_list(group_row[3]) - signature = _decode_wire_structural_signature(group_row[2]) - if str_fields is None or items_raw is None or signature is None: - return None - finding_kind, finding_key = str_fields - items: list[StructuralFindingOccurrenceDict] = [] - for item_raw in items_raw: - item = _decode_wire_structural_occurrence(item_raw) - if item is None: - return None - items.append(item) - return StructuralFindingGroupDict( - finding_kind=finding_kind, - finding_key=finding_key, - signature=signature, - items=items, - ) - - -def _decode_wire_structural_signature(value: object) -> dict[str, str] | None: - sig_raw = _as_list(value) - if sig_raw is None: - return None - signature: dict[str, str] = {} - for pair in sig_raw: - pair_list = _as_list(pair) - if pair_list is None or len(pair_list) != 2: - return None - key = _as_str(pair_list[0]) - val = _as_str(pair_list[1]) - if key is None or val is None: - return None - signature[key] = val - return signature - - -def _decode_wire_structural_occurrence( - value: object, -) -> StructuralFindingOccurrenceDict | None: - item_list = _as_list(value) - if item_list is None or len(item_list) != 3: - return None - qualname = _as_str(item_list[0]) - start = _as_int(item_list[1]) - end = _as_int(item_list[2]) - if qualname is None or start is None or end is None: - return None - return StructuralFindingOccurrenceDict( - qualname=qualname, - start=start, - end=end, - ) - - -def _decode_wire_unit(value: object, filepath: str) -> UnitDict | None: - decoded = _decode_wire_named_span(value, valid_lengths={11, 17}) - if decoded is None: - return None - row, qualname, start_line, end_line = decoded - core_fields = _decode_wire_unit_core_fields(row) - flow_profiles = _decode_wire_unit_flow_profiles(row) - if core_fields is None or flow_profiles is None: - return None - ( - loc, - stmt_count, - fingerprint, - loc_bucket, - cyclomatic_complexity, - nesting_depth, - risk, - raw_hash, - ) = core_fields - ( - entry_guard_count, - entry_guard_terminal_profile, - entry_guard_has_side_effect_before, - terminal_kind, - try_finally_profile, - side_effect_order_profile, - ) = flow_profiles - return FunctionGroupItem( - qualname=qualname, - filepath=filepath, - start_line=start_line, - end_line=end_line, - loc=loc, - stmt_count=stmt_count, - fingerprint=fingerprint, - loc_bucket=loc_bucket, - cyclomatic_complexity=cyclomatic_complexity, - nesting_depth=nesting_depth, - risk=risk, - raw_hash=raw_hash, - entry_guard_count=entry_guard_count, - entry_guard_terminal_profile=entry_guard_terminal_profile, - entry_guard_has_side_effect_before=entry_guard_has_side_effect_before, - terminal_kind=terminal_kind, - try_finally_profile=try_finally_profile, - side_effect_order_profile=side_effect_order_profile, - ) - - -def _decode_wire_block(value: object, filepath: str) -> BlockDict | None: - decoded = _decode_wire_named_sized_span(value, valid_lengths={5}) - if decoded is None: - return None - row, qualname, start_line, end_line, size = decoded - block_hash = _as_str(row[4]) - if block_hash is None: - return None - - return BlockGroupItem( - block_hash=block_hash, - filepath=filepath, - qualname=qualname, - start_line=start_line, - end_line=end_line, - size=size, - ) - - -def _decode_wire_segment(value: object, filepath: str) -> SegmentDict | None: - decoded = _decode_wire_named_sized_span(value, valid_lengths={6}) - if decoded is None: - return None - row, qualname, start_line, end_line, size = decoded - segment_hash = _as_str(row[4]) - segment_sig = _as_str(row[5]) - if segment_hash is None or segment_sig is None: - return None - - return SegmentGroupItem( - segment_hash=segment_hash, - segment_sig=segment_sig, - filepath=filepath, - qualname=qualname, - start_line=start_line, - end_line=end_line, - size=size, - ) - - -def _decode_wire_class_metric( - value: object, - filepath: str, -) -> ClassMetricsDict | None: - decoded = _decode_wire_named_span(value, valid_lengths={9}) - if decoded is None: - return None - row, qualname, start_line, end_line = decoded - metric_fields = _decode_wire_class_metric_fields(row) - if metric_fields is None: - return None - cbo, lcom4, method_count, instance_var_count, risk_coupling, risk_cohesion = ( - metric_fields - ) - return ClassMetricsDict( - qualname=qualname, - filepath=filepath, - start_line=start_line, - end_line=end_line, - cbo=cbo, - lcom4=lcom4, - method_count=method_count, - instance_var_count=instance_var_count, - risk_coupling=risk_coupling, - risk_cohesion=risk_cohesion, - ) - - -def _decode_wire_module_dep(value: object) -> ModuleDepDict | None: - row = _as_list(value) - if row is None or len(row) != 4: - return None - source = _as_str(row[0]) - target = _as_str(row[1]) - import_type = _as_str(row[2]) - line = _as_int(row[3]) - if source is None or target is None or import_type is None or line is None: - return None - return ModuleDepDict( - source=source, - target=target, - import_type=import_type, - line=line, - ) - - -def _decode_wire_dead_candidate( - value: object, - filepath: str, -) -> DeadCandidateDict | None: - row = _decode_wire_row(value, valid_lengths={5, 6}) - if row is None: - return None - str_fields = _decode_wire_str_fields(row, 0, 1, 4) - int_fields = _decode_wire_int_fields(row, 2, 3) - suppressed_rules: list[str] | None = [] - if len(row) == 6: - raw_rules = _as_list(row[5]) - if raw_rules is None or not all(isinstance(rule, str) for rule in raw_rules): - return None - suppressed_rules = sorted({str(rule) for rule in raw_rules if str(rule)}) - if str_fields is None or int_fields is None: - return None - qualname, local_name, kind = str_fields - start_line, end_line = int_fields - decoded = DeadCandidateDict( - qualname=qualname, - local_name=local_name, - filepath=filepath, - start_line=start_line, - end_line=end_line, - kind=kind, - ) - if suppressed_rules: - decoded["suppressed_rules"] = suppressed_rules - return decoded - - -def _encode_wire_file_entry(entry: CacheEntry) -> dict[str, object]: - wire: dict[str, object] = { - "st": [entry["stat"]["mtime_ns"], entry["stat"]["size"]], - } - source_stats = entry.get("source_stats") - if source_stats is not None: - wire["ss"] = [ - source_stats["lines"], - source_stats["functions"], - source_stats["methods"], - source_stats["classes"], - ] - - units = sorted( - entry["units"], - key=lambda unit: ( - unit["qualname"], - unit["start_line"], - unit["end_line"], - unit["fingerprint"], - ), - ) - if units: - wire["u"] = [ - [ - unit["qualname"], - unit["start_line"], - unit["end_line"], - unit["loc"], - unit["stmt_count"], - unit["fingerprint"], - unit["loc_bucket"], - unit.get("cyclomatic_complexity", 1), - unit.get("nesting_depth", 0), - unit.get("risk", "low"), - unit.get("raw_hash", ""), - unit.get("entry_guard_count", 0), - unit.get("entry_guard_terminal_profile", "none"), - 1 if unit.get("entry_guard_has_side_effect_before", False) else 0, - unit.get("terminal_kind", "fallthrough"), - unit.get("try_finally_profile", "none"), - unit.get("side_effect_order_profile", "none"), - ] - for unit in units - ] - - blocks = sorted( - entry["blocks"], - key=lambda block: ( - block["qualname"], - block["start_line"], - block["end_line"], - block["block_hash"], - ), - ) - if blocks: - wire["b"] = [ - [ - block["qualname"], - block["start_line"], - block["end_line"], - block["size"], - block["block_hash"], - ] - for block in blocks - ] - - segments = sorted( - entry["segments"], - key=lambda segment: ( - segment["qualname"], - segment["start_line"], - segment["end_line"], - segment["segment_hash"], - ), - ) - if segments: - wire["s"] = [ - [ - segment["qualname"], - segment["start_line"], - segment["end_line"], - segment["size"], - segment["segment_hash"], - segment["segment_sig"], - ] - for segment in segments - ] - - class_metrics = sorted( - entry["class_metrics"], - key=lambda metric: ( - metric["start_line"], - metric["end_line"], - metric["qualname"], - ), - ) - if class_metrics: - coupled_classes_rows: list[list[object]] = [] - - def _append_coupled_classes_row(metric: ClassMetricsDict) -> None: - coupled_classes = _normalized_optional_string_list( - metric.get("coupled_classes", []) - ) - if coupled_classes: - coupled_classes_rows.append([metric["qualname"], coupled_classes]) - - wire["cm"] = [ - [ - metric["qualname"], - metric["start_line"], - metric["end_line"], - metric["cbo"], - metric["lcom4"], - metric["method_count"], - metric["instance_var_count"], - metric["risk_coupling"], - metric["risk_cohesion"], - ] - for metric in class_metrics - ] - for metric in class_metrics: - _append_coupled_classes_row(metric) - if coupled_classes_rows: - wire["cc"] = coupled_classes_rows - - module_deps = sorted( - entry["module_deps"], - key=lambda dep: (dep["source"], dep["target"], dep["import_type"], dep["line"]), - ) - if module_deps: - wire["md"] = [ - [ - dep["source"], - dep["target"], - dep["import_type"], - dep["line"], - ] - for dep in module_deps - ] - - dead_candidates = sorted( - entry["dead_candidates"], - key=lambda candidate: ( - candidate["start_line"], - candidate["end_line"], - candidate["qualname"], - candidate["local_name"], - candidate["kind"], - ), - ) - if dead_candidates: - # Dead candidates are stored inside a per-file cache entry, so the - # filepath is implicit and does not need to be repeated in every row. - encoded_dead_candidates: list[list[object]] = [] - for candidate in dead_candidates: - encoded = [ - candidate["qualname"], - candidate["local_name"], - candidate["start_line"], - candidate["end_line"], - candidate["kind"], - ] - suppressed_rules = candidate.get("suppressed_rules", []) - normalized_rules = _normalized_optional_string_list(suppressed_rules) - if normalized_rules: - encoded.append(normalized_rules) - encoded_dead_candidates.append(encoded) - wire["dc"] = encoded_dead_candidates - - if entry["referenced_names"]: - wire["rn"] = sorted(set(entry["referenced_names"])) - if entry.get("referenced_qualnames"): - wire["rq"] = sorted(set(entry["referenced_qualnames"])) - if entry["import_names"]: - wire["in"] = sorted(set(entry["import_names"])) - if entry["class_names"]: - wire["cn"] = sorted(set(entry["class_names"])) - typing_coverage = entry.get("typing_coverage") - if typing_coverage is not None: - wire["tc"] = [ - typing_coverage["module"], - typing_coverage["callable_count"], - typing_coverage["params_total"], - typing_coverage["params_annotated"], - typing_coverage["returns_total"], - typing_coverage["returns_annotated"], - typing_coverage["any_annotation_count"], - ] - docstring_coverage = entry.get("docstring_coverage") - if docstring_coverage is not None: - wire["dg"] = [ - docstring_coverage["module"], - docstring_coverage["public_symbol_total"], - docstring_coverage["public_symbol_documented"], - ] - api_surface = entry.get("api_surface") - if api_surface is not None: - wire["as"] = [ - api_surface["module"], - sorted(set(api_surface.get("all_declared", []))), - [ - [ - symbol["qualname"], - symbol["kind"], - symbol["start_line"], - symbol["end_line"], - symbol.get("exported_via", "name"), - symbol.get("returns_hash", ""), - [ - [ - param["name"], - param["kind"], - 1 if param["has_default"] else 0, - param.get("annotation_hash", ""), - ] - for param in symbol.get("params", []) - ], - ] - for symbol in api_surface["symbols"] - ], - ] - - if "structural_findings" in entry: - sf = entry.get("structural_findings", []) - wire["sf"] = [ - [ - group["finding_kind"], - group["finding_key"], - sorted(group["signature"].items()), - [ - [item["qualname"], item["start"], item["end"]] - for item in group["items"] - ], - ] - for group in sf - ] - - return wire - - -def _resolve_root(root: str | Path | None) -> Path | None: - if root is None: - return None - try: - return Path(root).resolve(strict=False) - except OSError: - return None - - -def _is_file_stat_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - return isinstance(value.get("mtime_ns"), int) and isinstance(value.get("size"), int) - - -def _is_source_stats_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - lines = value.get("lines") - functions = value.get("functions") - methods = value.get("methods") - classes = value.get("classes") - return ( - isinstance(lines, int) - and lines >= 0 - and isinstance(functions, int) - and functions >= 0 - and isinstance(methods, int) - and methods >= 0 - and isinstance(classes, int) - and classes >= 0 - ) - - -def _is_unit_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - string_keys = ("qualname", "filepath", "fingerprint", "loc_bucket") - int_keys = ("start_line", "end_line", "loc", "stmt_count") - if not _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys): - return False - cyclomatic_complexity = value.get("cyclomatic_complexity", 1) - nesting_depth = value.get("nesting_depth", 0) - risk = value.get("risk", "low") - raw_hash = value.get("raw_hash", "") - return ( - isinstance(cyclomatic_complexity, int) - and isinstance(nesting_depth, int) - and isinstance(risk, str) - and risk in {"low", "medium", "high"} - and isinstance(raw_hash, str) - and isinstance(value.get("entry_guard_count", 0), int) - and isinstance(value.get("entry_guard_terminal_profile", "none"), str) - and isinstance(value.get("entry_guard_has_side_effect_before", False), bool) - and isinstance(value.get("terminal_kind", "fallthrough"), str) - and isinstance(value.get("try_finally_profile", "none"), str) - and isinstance(value.get("side_effect_order_profile", "none"), str) - ) - - -def _is_block_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - string_keys = ("block_hash", "filepath", "qualname") - int_keys = ("start_line", "end_line", "size") - return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) - - -def _is_segment_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - string_keys = ("segment_hash", "segment_sig", "filepath", "qualname") - int_keys = ("start_line", "end_line", "size") - return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) - - -def _is_module_typing_coverage_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - string_keys = ("module", "filepath") - int_keys = ( - "callable_count", - "params_total", - "params_annotated", - "returns_total", - "returns_annotated", - "any_annotation_count", - ) - return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) - - -def _is_module_docstring_coverage_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - string_keys = ("module", "filepath") - int_keys = ("public_symbol_total", "public_symbol_documented") - return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) - - -def _is_api_param_spec_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - return ( - isinstance(value.get("name"), str) - and isinstance(value.get("kind"), str) - and isinstance(value.get("has_default"), bool) - and isinstance(value.get("annotation_hash", ""), str) - ) - - -def _is_public_symbol_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - if not _has_typed_fields( - value, - string_keys=("qualname", "kind", "exported_via"), - int_keys=("start_line", "end_line"), - ): - return False - params = value.get("params", []) - return ( - isinstance(value.get("returns_hash", ""), str) - and isinstance( - params, - list, - ) - and all(_is_api_param_spec_dict(item) for item in params) - ) - - -def _is_module_api_surface_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - all_declared = value.get("all_declared", []) - symbols = value.get("symbols", []) - return ( - isinstance(value.get("module"), str) - and isinstance(value.get("filepath"), str) - and _is_string_list(all_declared) - and isinstance(symbols, list) - and all(_is_public_symbol_dict(item) for item in symbols) - ) - - -def _is_class_metrics_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - if not _has_typed_fields( - value, - string_keys=( - "qualname", - "filepath", - "risk_coupling", - "risk_cohesion", - ), - int_keys=( - "start_line", - "end_line", - "cbo", - "lcom4", - "method_count", - "instance_var_count", - ), - ): - return False - - coupled_classes = value.get("coupled_classes") - if coupled_classes is None: - return True - return _is_string_list(coupled_classes) - - -def _is_module_dep_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - return _has_typed_fields( - value, - string_keys=("source", "target", "import_type"), - int_keys=("line",), - ) - - -def _is_dead_candidate_dict(value: object) -> bool: - if not isinstance(value, dict): - return False - if not _has_typed_fields( - value, - string_keys=("qualname", "local_name", "filepath", "kind"), - int_keys=("start_line", "end_line"), - ): - return False - suppressed_rules = value.get("suppressed_rules") - if suppressed_rules is None: - return True - return _is_string_list(suppressed_rules) - - -def _is_string_list(value: object) -> bool: - return isinstance(value, list) and all(isinstance(item, str) for item in value) - - -def _has_typed_fields( - value: Mapping[str, object], - *, - string_keys: Sequence[str], - int_keys: Sequence[str], -) -> bool: - return all(isinstance(value.get(key), str) for key in string_keys) and all( - isinstance(value.get(key), int) for key in int_keys - ) diff --git a/codeclone/cache/__init__.py b/codeclone/cache/__init__.py new file mode 100644 index 0000000..bf55c08 --- /dev/null +++ b/codeclone/cache/__init__.py @@ -0,0 +1,273 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ._canonicalize import ( + _as_file_stat_dict, + _as_module_api_surface_dict, + _as_module_docstring_coverage_dict, + _as_module_typing_coverage_dict, + _as_source_stats_dict, + _attach_optional_cache_sections, + _canonicalize_cache_entry, + _decode_optional_cache_sections, + _has_cache_entry_container_shape, + _is_canonical_cache_entry, + _normalized_optional_string_list, +) +from ._validators import ( + _has_typed_fields, + _is_api_param_spec_dict, + _is_block_dict, + _is_class_metrics_dict, + _is_dead_candidate_dict, + _is_file_stat_dict, + _is_module_api_surface_dict, + _is_module_dep_dict, + _is_module_docstring_coverage_dict, + _is_module_typing_coverage_dict, + _is_public_symbol_dict, + _is_segment_dict, + _is_source_stats_dict, + _is_string_list, + _is_unit_dict, +) +from ._wire_decode import ( + _decode_optional_wire_api_surface, + _decode_optional_wire_docstring_coverage, + _decode_optional_wire_module_ints, + _decode_optional_wire_source_stats, + _decode_optional_wire_typing_coverage, + _decode_wire_api_param_spec, + _decode_wire_api_surface_symbol, + _decode_wire_block, + _decode_wire_class_metric, + _decode_wire_dead_candidate, + _decode_wire_file_entry, + _decode_wire_file_sections, + _decode_wire_module_dep, + _decode_wire_name_sections, + _decode_wire_segment, + _decode_wire_stat, + _decode_wire_structural_findings_optional, + _decode_wire_structural_group, + _decode_wire_structural_occurrence, + _decode_wire_structural_signature, + _decode_wire_unit, +) +from ._wire_encode import _encode_wire_file_entry +from ._wire_helpers import ( + _decode_optional_wire_coupled_classes, + _decode_optional_wire_names, + _decode_wire_class_metric_fields, + _decode_wire_int_fields, + _decode_wire_named_sized_span, + _decode_wire_named_span, + _decode_wire_qualname_span, + _decode_wire_qualname_span_size, + _decode_wire_row, + _decode_wire_str_fields, + _decode_wire_unit_core_fields, + _decode_wire_unit_flow_profiles, +) +from .entries import ( + ApiParamSpecDict, + BlockDict, + CacheEntry, + CacheEntryBase, + ClassMetricsDict, + DeadCandidateDict, + FileStat, + ModuleApiSurfaceDict, + ModuleDepDict, + ModuleDocstringCoverageDict, + ModuleTypingCoverageDict, + PublicSymbolDict, + SegmentDict, + SourceStatsDict, + StructuralFindingGroupDict, + StructuralFindingOccurrenceDict, + UnitDict, + _api_surface_dict_from_model, + _as_risk_literal, + _block_dict_from_model, + _class_metrics_dict_from_model, + _dead_candidate_dict_from_model, + _docstring_coverage_dict_from_model, + _module_dep_dict_from_model, + _new_optional_metrics_payload, + _normalize_cached_structural_group, + _normalize_cached_structural_groups, + _segment_dict_from_model, + _structural_group_dict_from_model, + _structural_occurrence_dict_from_model, + _typing_coverage_dict_from_model, + _unit_dict_from_model, +) +from .integrity import ( + as_int_or_none, + as_object_list, + as_str_dict, + as_str_or_none, + canonical_json, + read_json_document, + sign_cache_payload, + verify_cache_payload_signature, + write_json_document_atomically, +) +from .projection import ( + SegmentReportProjection, + build_segment_report_projection, + decode_segment_report_projection, + encode_segment_report_projection, + runtime_filepath_from_wire, + wire_filepath_from_runtime, +) +from .store import Cache, file_stat_signature +from .versioning import ( + _DEFAULT_WIRE_UNIT_FLOW_PROFILES, + CACHE_VERSION, + LEGACY_CACHE_SECRET_FILENAME, + MAX_CACHE_SIZE_BYTES, + AnalysisProfile, + CacheData, + CacheStatus, + _as_analysis_profile, + _empty_cache_data, + _resolve_root, +) + +_as_str = as_str_or_none +_as_int = as_int_or_none +_as_list = as_object_list +_as_str_dict = as_str_dict + +__all__ = [ + "CACHE_VERSION", + "LEGACY_CACHE_SECRET_FILENAME", + "MAX_CACHE_SIZE_BYTES", + "_DEFAULT_WIRE_UNIT_FLOW_PROFILES", + "AnalysisProfile", + "ApiParamSpecDict", + "BlockDict", + "Cache", + "CacheData", + "CacheEntry", + "CacheEntryBase", + "CacheStatus", + "ClassMetricsDict", + "DeadCandidateDict", + "FileStat", + "ModuleApiSurfaceDict", + "ModuleDepDict", + "ModuleDocstringCoverageDict", + "ModuleTypingCoverageDict", + "PublicSymbolDict", + "SegmentDict", + "SegmentReportProjection", + "SourceStatsDict", + "StructuralFindingGroupDict", + "StructuralFindingOccurrenceDict", + "UnitDict", + "_api_surface_dict_from_model", + "_as_analysis_profile", + "_as_file_stat_dict", + "_as_int", + "_as_list", + "_as_module_api_surface_dict", + "_as_module_docstring_coverage_dict", + "_as_module_typing_coverage_dict", + "_as_risk_literal", + "_as_source_stats_dict", + "_as_str", + "_as_str_dict", + "_attach_optional_cache_sections", + "_block_dict_from_model", + "_canonicalize_cache_entry", + "_class_metrics_dict_from_model", + "_dead_candidate_dict_from_model", + "_decode_optional_cache_sections", + "_decode_optional_wire_api_surface", + "_decode_optional_wire_coupled_classes", + "_decode_optional_wire_docstring_coverage", + "_decode_optional_wire_module_ints", + "_decode_optional_wire_names", + "_decode_optional_wire_source_stats", + "_decode_optional_wire_typing_coverage", + "_decode_wire_api_param_spec", + "_decode_wire_api_surface_symbol", + "_decode_wire_block", + "_decode_wire_class_metric", + "_decode_wire_class_metric_fields", + "_decode_wire_dead_candidate", + "_decode_wire_file_entry", + "_decode_wire_file_sections", + "_decode_wire_int_fields", + "_decode_wire_module_dep", + "_decode_wire_name_sections", + "_decode_wire_named_sized_span", + "_decode_wire_named_span", + "_decode_wire_qualname_span", + "_decode_wire_qualname_span_size", + "_decode_wire_row", + "_decode_wire_segment", + "_decode_wire_stat", + "_decode_wire_str_fields", + "_decode_wire_structural_findings_optional", + "_decode_wire_structural_group", + "_decode_wire_structural_occurrence", + "_decode_wire_structural_signature", + "_decode_wire_unit", + "_decode_wire_unit_core_fields", + "_decode_wire_unit_flow_profiles", + "_docstring_coverage_dict_from_model", + "_empty_cache_data", + "_encode_wire_file_entry", + "_has_cache_entry_container_shape", + "_has_typed_fields", + "_is_api_param_spec_dict", + "_is_block_dict", + "_is_canonical_cache_entry", + "_is_class_metrics_dict", + "_is_dead_candidate_dict", + "_is_file_stat_dict", + "_is_module_api_surface_dict", + "_is_module_dep_dict", + "_is_module_docstring_coverage_dict", + "_is_module_typing_coverage_dict", + "_is_public_symbol_dict", + "_is_segment_dict", + "_is_source_stats_dict", + "_is_string_list", + "_is_unit_dict", + "_module_dep_dict_from_model", + "_new_optional_metrics_payload", + "_normalize_cached_structural_group", + "_normalize_cached_structural_groups", + "_normalized_optional_string_list", + "_resolve_root", + "_segment_dict_from_model", + "_structural_group_dict_from_model", + "_structural_occurrence_dict_from_model", + "_typing_coverage_dict_from_model", + "_unit_dict_from_model", + "as_int_or_none", + "as_object_list", + "as_str_dict", + "as_str_or_none", + "build_segment_report_projection", + "canonical_json", + "decode_segment_report_projection", + "encode_segment_report_projection", + "file_stat_signature", + "read_json_document", + "runtime_filepath_from_wire", + "sign_cache_payload", + "verify_cache_payload_signature", + "wire_filepath_from_runtime", + "write_json_document_atomically", +] diff --git a/codeclone/cache/_canonicalize.py b/codeclone/cache/_canonicalize.py new file mode 100644 index 0000000..e6d8048 --- /dev/null +++ b/codeclone/cache/_canonicalize.py @@ -0,0 +1,432 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Callable, Mapping +from typing import TypeGuard, TypeVar, cast + +from ._validators import ( + _is_block_dict, + _is_class_metrics_dict, + _is_dead_candidate_dict, + _is_file_stat_dict, + _is_module_api_surface_dict, + _is_module_dep_dict, + _is_module_docstring_coverage_dict, + _is_module_typing_coverage_dict, + _is_segment_dict, + _is_source_stats_dict, + _is_string_list, + _is_unit_dict, +) +from .entries import ( + ApiParamSpecDict, + BlockDict, + CacheEntry, + ClassMetricsDict, + DeadCandidateDict, + FileStat, + ModuleApiSurfaceDict, + ModuleDepDict, + ModuleDocstringCoverageDict, + ModuleTypingCoverageDict, + PublicSymbolDict, + SegmentDict, + SourceStatsDict, + StructuralFindingGroupDict, + UnitDict, +) + +_ValidatedItemT = TypeVar("_ValidatedItemT") + + +def _as_file_stat_dict(value: object) -> FileStat | None: + if not _is_file_stat_dict(value): + return None + obj = cast("Mapping[str, object]", value) + mtime_ns = obj.get("mtime_ns") + size = obj.get("size") + if not isinstance(mtime_ns, int) or not isinstance(size, int): + return None + return FileStat(mtime_ns=mtime_ns, size=size) + + +def _as_source_stats_dict(value: object) -> SourceStatsDict | None: + if not _is_source_stats_dict(value): + return None + obj = cast("Mapping[str, object]", value) + lines = obj.get("lines") + functions = obj.get("functions") + methods = obj.get("methods") + classes = obj.get("classes") + assert isinstance(lines, int) + assert isinstance(functions, int) + assert isinstance(methods, int) + assert isinstance(classes, int) + return SourceStatsDict( + lines=lines, + functions=functions, + methods=methods, + classes=classes, + ) + + +def _as_typed_list( + value: object, + *, + predicate: Callable[[object], bool], +) -> list[_ValidatedItemT] | None: + if not isinstance(value, list): + return None + if not all(predicate(item) for item in value): + return None + return cast("list[_ValidatedItemT]", value) + + +def _as_typed_unit_list(value: object) -> list[UnitDict] | None: + return _as_typed_list(value, predicate=_is_unit_dict) + + +def _as_typed_block_list(value: object) -> list[BlockDict] | None: + return _as_typed_list(value, predicate=_is_block_dict) + + +def _as_typed_segment_list(value: object) -> list[SegmentDict] | None: + return _as_typed_list(value, predicate=_is_segment_dict) + + +def _as_typed_class_metrics_list(value: object) -> list[ClassMetricsDict] | None: + return _as_typed_list(value, predicate=_is_class_metrics_dict) + + +def _as_typed_dead_candidates_list( + value: object, +) -> list[DeadCandidateDict] | None: + return _as_typed_list(value, predicate=_is_dead_candidate_dict) + + +def _as_typed_module_deps_list(value: object) -> list[ModuleDepDict] | None: + return _as_typed_list(value, predicate=_is_module_dep_dict) + + +def _as_typed_string_list(value: object) -> list[str] | None: + return _as_typed_list(value, predicate=lambda item: isinstance(item, str)) + + +def _as_module_typing_coverage_dict( + value: object, +) -> ModuleTypingCoverageDict | None: + if not _is_module_typing_coverage_dict(value): + return None + return cast("ModuleTypingCoverageDict", value) + + +def _as_module_docstring_coverage_dict( + value: object, +) -> ModuleDocstringCoverageDict | None: + if not _is_module_docstring_coverage_dict(value): + return None + return cast("ModuleDocstringCoverageDict", value) + + +def _as_module_api_surface_dict(value: object) -> ModuleApiSurfaceDict | None: + if not _is_module_api_surface_dict(value): + return None + return cast("ModuleApiSurfaceDict", value) + + +def _normalized_optional_string_list(value: object) -> list[str] | None: + items = _as_typed_string_list(value) + if not items: + return None + return sorted(set(items)) + + +def _is_canonical_cache_entry(value: object) -> TypeGuard[CacheEntry]: + return isinstance(value, dict) and _has_cache_entry_container_shape(value) + + +def _has_cache_entry_container_shape(entry: Mapping[str, object]) -> bool: + required = {"stat", "units", "blocks", "segments"} + if not required.issubset(entry.keys()): + return False + if not isinstance(entry.get("stat"), dict): + return False + if not isinstance(entry.get("units"), list): + return False + if not isinstance(entry.get("blocks"), list): + return False + if not isinstance(entry.get("segments"), list): + return False + source_stats = entry.get("source_stats") + if source_stats is not None and not _is_source_stats_dict(source_stats): + return False + optional_list_keys = ( + "class_metrics", + "module_deps", + "dead_candidates", + "referenced_names", + "referenced_qualnames", + "import_names", + "class_names", + "structural_findings", + ) + if not all(isinstance(entry.get(key, []), list) for key in optional_list_keys): + return False + typing_coverage = entry.get("typing_coverage") + if typing_coverage is not None and not _is_module_typing_coverage_dict( + typing_coverage + ): + return False + docstring_coverage = entry.get("docstring_coverage") + if docstring_coverage is not None and not _is_module_docstring_coverage_dict( + docstring_coverage + ): + return False + api_surface = entry.get("api_surface") + return api_surface is None or _is_module_api_surface_dict(api_surface) + + +def _decode_optional_cache_sections( + entry: Mapping[str, object], +) -> ( + tuple[ + list[ClassMetricsDict], + list[ModuleDepDict], + list[DeadCandidateDict], + list[str], + list[str], + list[str], + list[str], + ModuleTypingCoverageDict | None, + ModuleDocstringCoverageDict | None, + ModuleApiSurfaceDict | None, + SourceStatsDict | None, + list[StructuralFindingGroupDict] | None, + ] + | None +): + class_metrics_raw = _as_typed_class_metrics_list(entry.get("class_metrics", [])) + module_deps_raw = _as_typed_module_deps_list(entry.get("module_deps", [])) + dead_candidates_raw = _as_typed_dead_candidates_list( + entry.get("dead_candidates", []) + ) + referenced_names_raw = _as_typed_string_list(entry.get("referenced_names", [])) + referenced_qualnames_raw = _as_typed_string_list( + entry.get("referenced_qualnames", []) + ) + import_names_raw = _as_typed_string_list(entry.get("import_names", [])) + class_names_raw = _as_typed_string_list(entry.get("class_names", [])) + if ( + class_metrics_raw is None + or module_deps_raw is None + or dead_candidates_raw is None + or referenced_names_raw is None + or referenced_qualnames_raw is None + or import_names_raw is None + or class_names_raw is None + ): + return None + typing_coverage_raw = _as_module_typing_coverage_dict(entry.get("typing_coverage")) + docstring_coverage_raw = _as_module_docstring_coverage_dict( + entry.get("docstring_coverage") + ) + api_surface_raw = _as_module_api_surface_dict(entry.get("api_surface")) + source_stats = _as_source_stats_dict(entry.get("source_stats")) + structural_findings = entry.get("structural_findings") + typed_structural_findings = ( + structural_findings if isinstance(structural_findings, list) else None + ) + return ( + class_metrics_raw, + module_deps_raw, + dead_candidates_raw, + referenced_names_raw, + referenced_qualnames_raw, + import_names_raw, + class_names_raw, + typing_coverage_raw, + docstring_coverage_raw, + api_surface_raw, + source_stats, + typed_structural_findings, + ) + + +def _attach_optional_cache_sections( + entry: CacheEntry, + *, + typing_coverage: ModuleTypingCoverageDict | None = None, + docstring_coverage: ModuleDocstringCoverageDict | None = None, + api_surface: ModuleApiSurfaceDict | None = None, + source_stats: SourceStatsDict | None = None, + structural_findings: list[StructuralFindingGroupDict] | None = None, +) -> CacheEntry: + if typing_coverage is not None: + entry["typing_coverage"] = typing_coverage + if docstring_coverage is not None: + entry["docstring_coverage"] = docstring_coverage + if api_surface is not None: + entry["api_surface"] = api_surface + if source_stats is not None: + entry["source_stats"] = source_stats + if structural_findings is not None: + entry["structural_findings"] = structural_findings + return entry + + +def _canonicalize_cache_entry(entry: CacheEntry) -> CacheEntry: + class_metrics_sorted = sorted( + entry["class_metrics"], + key=lambda item: ( + item["start_line"], + item["end_line"], + item["qualname"], + ), + ) + for metric in class_metrics_sorted: + coupled_classes = metric.get("coupled_classes", []) + if coupled_classes: + metric["coupled_classes"] = sorted(set(coupled_classes)) + + module_deps_sorted = sorted( + entry["module_deps"], + key=lambda item: ( + item["source"], + item["target"], + item["import_type"], + item["line"], + ), + ) + dead_candidates_normalized: list[DeadCandidateDict] = [] + for candidate in entry["dead_candidates"]: + suppressed_rules = candidate.get("suppressed_rules", []) + normalized_candidate = DeadCandidateDict( + qualname=candidate["qualname"], + local_name=candidate["local_name"], + filepath=candidate["filepath"], + start_line=candidate["start_line"], + end_line=candidate["end_line"], + kind=candidate["kind"], + ) + if _is_string_list(suppressed_rules): + normalized_rules = sorted(set(suppressed_rules)) + if normalized_rules: + normalized_candidate["suppressed_rules"] = normalized_rules + dead_candidates_normalized.append(normalized_candidate) + + dead_candidates_sorted = sorted( + dead_candidates_normalized, + key=lambda item: ( + item["start_line"], + item["end_line"], + item["qualname"], + item["local_name"], + item["kind"], + tuple(item.get("suppressed_rules", [])), + ), + ) + + result: CacheEntry = { + "stat": entry["stat"], + "units": entry["units"], + "blocks": entry["blocks"], + "segments": entry["segments"], + "class_metrics": class_metrics_sorted, + "module_deps": module_deps_sorted, + "dead_candidates": dead_candidates_sorted, + "referenced_names": sorted(set(entry["referenced_names"])), + "referenced_qualnames": sorted(set(entry.get("referenced_qualnames", []))), + "import_names": sorted(set(entry["import_names"])), + "class_names": sorted(set(entry["class_names"])), + } + typing_coverage = entry.get("typing_coverage") + if typing_coverage is not None: + result["typing_coverage"] = ModuleTypingCoverageDict( + module=typing_coverage["module"], + filepath=typing_coverage["filepath"], + callable_count=typing_coverage["callable_count"], + params_total=typing_coverage["params_total"], + params_annotated=typing_coverage["params_annotated"], + returns_total=typing_coverage["returns_total"], + returns_annotated=typing_coverage["returns_annotated"], + any_annotation_count=typing_coverage["any_annotation_count"], + ) + docstring_coverage = entry.get("docstring_coverage") + if docstring_coverage is not None: + result["docstring_coverage"] = ModuleDocstringCoverageDict( + module=docstring_coverage["module"], + filepath=docstring_coverage["filepath"], + public_symbol_total=docstring_coverage["public_symbol_total"], + public_symbol_documented=docstring_coverage["public_symbol_documented"], + ) + api_surface = entry.get("api_surface") + if api_surface is not None: + symbols = sorted( + api_surface["symbols"], + key=lambda item: ( + item["qualname"], + item["kind"], + item["start_line"], + item["end_line"], + ), + ) + normalized_symbols = [ + PublicSymbolDict( + qualname=symbol["qualname"], + kind=symbol["kind"], + start_line=symbol["start_line"], + end_line=symbol["end_line"], + params=[ + ApiParamSpecDict( + name=param["name"], + kind=param["kind"], + has_default=param["has_default"], + annotation_hash=param["annotation_hash"], + ) + for param in symbol.get("params", []) + ], + returns_hash=symbol.get("returns_hash", ""), + exported_via=symbol.get("exported_via", "name"), + ) + for symbol in symbols + ] + result["api_surface"] = ModuleApiSurfaceDict( + module=api_surface["module"], + filepath=api_surface["filepath"], + all_declared=sorted(set(api_surface.get("all_declared", []))), + symbols=normalized_symbols, + ) + structural_findings = entry.get("structural_findings") + if structural_findings is not None: + result["structural_findings"] = structural_findings + source_stats = entry.get("source_stats") + if source_stats is not None: + result["source_stats"] = source_stats + return result + + +__all__ = [ + "_as_file_stat_dict", + "_as_module_api_surface_dict", + "_as_module_docstring_coverage_dict", + "_as_module_typing_coverage_dict", + "_as_source_stats_dict", + "_as_typed_block_list", + "_as_typed_class_metrics_list", + "_as_typed_dead_candidates_list", + "_as_typed_module_deps_list", + "_as_typed_segment_list", + "_as_typed_string_list", + "_as_typed_unit_list", + "_attach_optional_cache_sections", + "_canonicalize_cache_entry", + "_decode_optional_cache_sections", + "_has_cache_entry_container_shape", + "_is_canonical_cache_entry", + "_normalized_optional_string_list", +] diff --git a/codeclone/cache/_validators.py b/codeclone/cache/_validators.py new file mode 100644 index 0000000..7cfc433 --- /dev/null +++ b/codeclone/cache/_validators.py @@ -0,0 +1,228 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence + + +def _is_file_stat_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + return isinstance(value.get("mtime_ns"), int) and isinstance(value.get("size"), int) + + +def _is_source_stats_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + lines = value.get("lines") + functions = value.get("functions") + methods = value.get("methods") + classes = value.get("classes") + return ( + isinstance(lines, int) + and lines >= 0 + and isinstance(functions, int) + and functions >= 0 + and isinstance(methods, int) + and methods >= 0 + and isinstance(classes, int) + and classes >= 0 + ) + + +def _is_unit_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + string_keys = ("qualname", "filepath", "fingerprint", "loc_bucket") + int_keys = ("start_line", "end_line", "loc", "stmt_count") + if not _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys): + return False + cyclomatic_complexity = value.get("cyclomatic_complexity", 1) + nesting_depth = value.get("nesting_depth", 0) + risk = value.get("risk", "low") + raw_hash = value.get("raw_hash", "") + return ( + isinstance(cyclomatic_complexity, int) + and isinstance(nesting_depth, int) + and isinstance(risk, str) + and risk in {"low", "medium", "high"} + and isinstance(raw_hash, str) + and isinstance(value.get("entry_guard_count", 0), int) + and isinstance(value.get("entry_guard_terminal_profile", "none"), str) + and isinstance(value.get("entry_guard_has_side_effect_before", False), bool) + and isinstance(value.get("terminal_kind", "fallthrough"), str) + and isinstance(value.get("try_finally_profile", "none"), str) + and isinstance(value.get("side_effect_order_profile", "none"), str) + ) + + +def _is_block_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + string_keys = ("block_hash", "filepath", "qualname") + int_keys = ("start_line", "end_line", "size") + return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) + + +def _is_segment_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + string_keys = ("segment_hash", "segment_sig", "filepath", "qualname") + int_keys = ("start_line", "end_line", "size") + return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) + + +def _is_module_typing_coverage_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + string_keys = ("module", "filepath") + int_keys = ( + "callable_count", + "params_total", + "params_annotated", + "returns_total", + "returns_annotated", + "any_annotation_count", + ) + return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) + + +def _is_module_docstring_coverage_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + string_keys = ("module", "filepath") + int_keys = ("public_symbol_total", "public_symbol_documented") + return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) + + +def _is_api_param_spec_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + return ( + isinstance(value.get("name"), str) + and isinstance(value.get("kind"), str) + and isinstance(value.get("has_default"), bool) + and isinstance(value.get("annotation_hash", ""), str) + ) + + +def _is_public_symbol_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + if not _has_typed_fields( + value, + string_keys=("qualname", "kind", "exported_via"), + int_keys=("start_line", "end_line"), + ): + return False + params = value.get("params", []) + return ( + isinstance(value.get("returns_hash", ""), str) + and isinstance(params, list) + and all(_is_api_param_spec_dict(item) for item in params) + ) + + +def _is_module_api_surface_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + all_declared = value.get("all_declared", []) + symbols = value.get("symbols", []) + return ( + isinstance(value.get("module"), str) + and isinstance(value.get("filepath"), str) + and _is_string_list(all_declared) + and isinstance(symbols, list) + and all(_is_public_symbol_dict(item) for item in symbols) + ) + + +def _is_class_metrics_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + if not _has_typed_fields( + value, + string_keys=( + "qualname", + "filepath", + "risk_coupling", + "risk_cohesion", + ), + int_keys=( + "start_line", + "end_line", + "cbo", + "lcom4", + "method_count", + "instance_var_count", + ), + ): + return False + + coupled_classes = value.get("coupled_classes") + if coupled_classes is None: + return True + return _is_string_list(coupled_classes) + + +def _is_module_dep_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + return _has_typed_fields( + value, + string_keys=("source", "target", "import_type"), + int_keys=("line",), + ) + + +def _is_dead_candidate_dict(value: object) -> bool: + if not isinstance(value, dict): + return False + if not _has_typed_fields( + value, + string_keys=("qualname", "local_name", "filepath", "kind"), + int_keys=("start_line", "end_line"), + ): + return False + suppressed_rules = value.get("suppressed_rules") + if suppressed_rules is None: + return True + return _is_string_list(suppressed_rules) + + +def _is_string_list(value: object) -> bool: + return isinstance(value, list) and all(isinstance(item, str) for item in value) + + +def _has_typed_fields( + value: Mapping[str, object], + *, + string_keys: Sequence[str], + int_keys: Sequence[str], +) -> bool: + return all(isinstance(value.get(key), str) for key in string_keys) and all( + isinstance(value.get(key), int) for key in int_keys + ) + + +__all__ = [ + "_has_typed_fields", + "_is_api_param_spec_dict", + "_is_block_dict", + "_is_class_metrics_dict", + "_is_dead_candidate_dict", + "_is_file_stat_dict", + "_is_module_api_surface_dict", + "_is_module_dep_dict", + "_is_module_docstring_coverage_dict", + "_is_module_typing_coverage_dict", + "_is_public_symbol_dict", + "_is_segment_dict", + "_is_source_stats_dict", + "_is_string_list", + "_is_unit_dict", +] diff --git a/codeclone/cache/_wire_decode.py b/codeclone/cache/_wire_decode.py new file mode 100644 index 0000000..c23802b --- /dev/null +++ b/codeclone/cache/_wire_decode.py @@ -0,0 +1,692 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ..models import BlockGroupItem, FunctionGroupItem, SegmentGroupItem +from ._canonicalize import _attach_optional_cache_sections +from ._wire_helpers import ( + _decode_optional_wire_coupled_classes, + _decode_optional_wire_items, + _decode_optional_wire_items_for_filepath, + _decode_optional_wire_names, + _decode_optional_wire_row, + _decode_wire_class_metric_fields, + _decode_wire_int_fields, + _decode_wire_named_sized_span, + _decode_wire_named_span, + _decode_wire_qualname_span, + _decode_wire_qualname_span_size, + _decode_wire_row, + _decode_wire_str_fields, + _decode_wire_unit_core_fields, + _decode_wire_unit_flow_profiles, +) +from .entries import ( + ApiParamSpecDict, + BlockDict, + CacheEntry, + ClassMetricsDict, + DeadCandidateDict, + FileStat, + ModuleApiSurfaceDict, + ModuleDepDict, + ModuleDocstringCoverageDict, + ModuleTypingCoverageDict, + PublicSymbolDict, + SegmentDict, + SourceStatsDict, + StructuralFindingGroupDict, + StructuralFindingOccurrenceDict, + UnitDict, + _normalize_cached_structural_groups, +) +from .integrity import ( + as_int_or_none as _as_int, +) +from .integrity import ( + as_object_list as _as_list, +) +from .integrity import ( + as_str_dict as _as_str_dict, +) +from .integrity import ( + as_str_or_none as _as_str, +) + + +def _decode_wire_stat(obj: dict[str, object]) -> FileStat | None: + stat_list = _as_list(obj.get("st")) + if stat_list is None or len(stat_list) != 2: + return None + mtime_ns = _as_int(stat_list[0]) + size = _as_int(stat_list[1]) + if mtime_ns is None or size is None: + return None + return FileStat(mtime_ns=mtime_ns, size=size) + + +def _decode_optional_wire_source_stats( + *, + obj: dict[str, object], +) -> SourceStatsDict | None: + row = _decode_optional_wire_row(obj=obj, key="ss", expected_len=4) + if row is None: + return None + counts = _decode_wire_int_fields(row, 0, 1, 2, 3) + if counts is None: + return None + lines, functions, methods, classes = counts + if any(value < 0 for value in counts): + return None + return SourceStatsDict( + lines=lines, + functions=functions, + methods=methods, + classes=classes, + ) + + +def _decode_wire_file_entry(value: object, filepath: str) -> CacheEntry | None: + obj = _as_str_dict(value) + if obj is None: + return None + + stat = _decode_wire_stat(obj) + if stat is None: + return None + source_stats = _decode_optional_wire_source_stats(obj=obj) + file_sections = _decode_wire_file_sections(obj=obj, filepath=filepath) + if file_sections is None: + return None + ( + units, + blocks, + segments, + class_metrics, + module_deps, + dead_candidates, + ) = file_sections + name_sections = _decode_wire_name_sections(obj=obj) + if name_sections is None: + return None + ( + referenced_names, + referenced_qualnames, + import_names, + class_names, + ) = name_sections + typing_coverage = _decode_optional_wire_typing_coverage(obj=obj, filepath=filepath) + docstring_coverage = _decode_optional_wire_docstring_coverage( + obj=obj, + filepath=filepath, + ) + api_surface = _decode_optional_wire_api_surface(obj=obj, filepath=filepath) + coupled_classes_map = _decode_optional_wire_coupled_classes(obj=obj, key="cc") + if coupled_classes_map is None: + return None + + for metric in class_metrics: + names = coupled_classes_map.get(metric["qualname"], []) + if names: + metric["coupled_classes"] = names + + has_structural_findings = "sf" in obj + structural_findings = _decode_wire_structural_findings_optional(obj) + if structural_findings is None: + return None + + return _attach_optional_cache_sections( + CacheEntry( + stat=stat, + units=units, + blocks=blocks, + segments=segments, + class_metrics=class_metrics, + module_deps=module_deps, + dead_candidates=dead_candidates, + referenced_names=referenced_names, + referenced_qualnames=referenced_qualnames, + import_names=import_names, + class_names=class_names, + ), + typing_coverage=typing_coverage, + docstring_coverage=docstring_coverage, + api_surface=api_surface, + source_stats=source_stats, + structural_findings=( + _normalize_cached_structural_groups(structural_findings, filepath=filepath) + if has_structural_findings + else None + ), + ) + + +def _decode_wire_file_sections( + *, + obj: dict[str, object], + filepath: str, +) -> ( + tuple[ + list[UnitDict], + list[BlockDict], + list[SegmentDict], + list[ClassMetricsDict], + list[ModuleDepDict], + list[DeadCandidateDict], + ] + | None +): + units = _decode_optional_wire_items_for_filepath( + obj=obj, + key="u", + filepath=filepath, + decode_item=_decode_wire_unit, + ) + blocks = _decode_optional_wire_items_for_filepath( + obj=obj, + key="b", + filepath=filepath, + decode_item=_decode_wire_block, + ) + segments = _decode_optional_wire_items_for_filepath( + obj=obj, + key="s", + filepath=filepath, + decode_item=_decode_wire_segment, + ) + class_metrics = _decode_optional_wire_items_for_filepath( + obj=obj, + key="cm", + filepath=filepath, + decode_item=_decode_wire_class_metric, + ) + module_deps = _decode_optional_wire_items( + obj=obj, + key="md", + decode_item=_decode_wire_module_dep, + ) + dead_candidates = _decode_optional_wire_items_for_filepath( + obj=obj, + key="dc", + filepath=filepath, + decode_item=_decode_wire_dead_candidate, + ) + if ( + units is None + or blocks is None + or segments is None + or class_metrics is None + or module_deps is None + or dead_candidates is None + ): + return None + return ( + units, + blocks, + segments, + class_metrics, + module_deps, + dead_candidates, + ) + + +def _decode_wire_name_sections( + *, + obj: dict[str, object], +) -> tuple[list[str], list[str], list[str], list[str]] | None: + referenced_names = _decode_optional_wire_names(obj=obj, key="rn") + referenced_qualnames = _decode_optional_wire_names(obj=obj, key="rq") + import_names = _decode_optional_wire_names(obj=obj, key="in") + class_names = _decode_optional_wire_names(obj=obj, key="cn") + if ( + referenced_names is None + or referenced_qualnames is None + or import_names is None + or class_names is None + ): + return None + return ( + referenced_names, + referenced_qualnames, + import_names, + class_names, + ) + + +def _decode_optional_wire_typing_coverage( + *, + obj: dict[str, object], + filepath: str, +) -> ModuleTypingCoverageDict | None: + module_and_ints = _decode_optional_wire_module_ints( + obj=obj, + key="tc", + expected_len=7, + int_indexes=(1, 2, 3, 4, 5, 6), + ) + if module_and_ints is None: + return None + module, ints = module_and_ints + ( + callable_count, + params_total, + params_annotated, + returns_total, + returns_annotated, + any_annotation_count, + ) = ints + return ModuleTypingCoverageDict( + module=module, + filepath=filepath, + callable_count=callable_count, + params_total=params_total, + params_annotated=params_annotated, + returns_total=returns_total, + returns_annotated=returns_annotated, + any_annotation_count=any_annotation_count, + ) + + +def _decode_optional_wire_docstring_coverage( + *, + obj: dict[str, object], + filepath: str, +) -> ModuleDocstringCoverageDict | None: + module_and_counts = _decode_optional_wire_module_ints( + obj=obj, + key="dg", + expected_len=3, + int_indexes=(1, 2), + ) + if module_and_counts is None: + return None + module, counts = module_and_counts + public_symbol_total, public_symbol_documented = counts + return ModuleDocstringCoverageDict( + module=module, + filepath=filepath, + public_symbol_total=public_symbol_total, + public_symbol_documented=public_symbol_documented, + ) + + +def _decode_optional_wire_api_surface( + *, + obj: dict[str, object], + filepath: str, +) -> ModuleApiSurfaceDict | None: + row = _decode_optional_wire_row(obj=obj, key="as", expected_len=3) + if row is None: + return None + module = _as_str(row[0]) + all_declared = _decode_optional_wire_names(obj={"ad": row[1]}, key="ad") + symbols_raw = _as_list(row[2]) + if module is None or all_declared is None or symbols_raw is None: + return None + symbols: list[PublicSymbolDict] = [] + for symbol_raw in symbols_raw: + decoded_symbol = _decode_wire_api_surface_symbol(symbol_raw) + if decoded_symbol is None: + return None + symbols.append(decoded_symbol) + return ModuleApiSurfaceDict( + module=module, + filepath=filepath, + all_declared=sorted(set(all_declared)), + symbols=symbols, + ) + + +def _decode_optional_wire_module_ints( + *, + obj: dict[str, object], + key: str, + expected_len: int, + int_indexes: tuple[int, ...], +) -> tuple[str, tuple[int, ...]] | None: + row = _decode_optional_wire_row(obj=obj, key=key, expected_len=expected_len) + if row is None: + return None + module = _as_str(row[0]) + ints = _decode_wire_int_fields(row, *int_indexes) + if module is None or ints is None: + return None + return module, ints + + +def _decode_wire_api_surface_symbol( + value: object, +) -> PublicSymbolDict | None: + symbol_row = _decode_wire_row(value, valid_lengths={7}) + if symbol_row is None: + return None + str_fields = _decode_wire_str_fields(symbol_row, 0, 1, 4, 5) + int_fields = _decode_wire_int_fields(symbol_row, 2, 3) + params_raw = _as_list(symbol_row[6]) + if str_fields is None or int_fields is None or params_raw is None: + return None + qualname, kind, exported_via, returns_hash = str_fields + start_line, end_line = int_fields + params: list[ApiParamSpecDict] = [] + for param_raw in params_raw: + decoded_param = _decode_wire_api_param_spec(param_raw) + if decoded_param is None: + return None + params.append(decoded_param) + return PublicSymbolDict( + qualname=qualname, + kind=kind, + start_line=start_line, + end_line=end_line, + params=params, + returns_hash=returns_hash, + exported_via=exported_via, + ) + + +def _decode_wire_api_param_spec( + value: object, +) -> ApiParamSpecDict | None: + param_row = _decode_wire_row(value, valid_lengths={4}) + if param_row is None: + return None + str_fields = _decode_wire_str_fields(param_row, 0, 1, 3) + int_fields = _decode_wire_int_fields(param_row, 2) + if str_fields is None or int_fields is None: + return None + name, param_kind, annotation_hash = str_fields + (has_default_raw,) = int_fields + return ApiParamSpecDict( + name=name, + kind=param_kind, + has_default=bool(has_default_raw), + annotation_hash=annotation_hash, + ) + + +def _decode_wire_structural_findings_optional( + obj: dict[str, object], +) -> list[StructuralFindingGroupDict] | None: + raw = obj.get("sf") + if raw is None: + return [] + groups_raw = _as_list(raw) + if groups_raw is None: + return None + groups: list[StructuralFindingGroupDict] = [] + for group_raw in groups_raw: + group = _decode_wire_structural_group(group_raw) + if group is None: + return None + groups.append(group) + return groups + + +def _decode_wire_structural_group(value: object) -> StructuralFindingGroupDict | None: + group_row = _decode_wire_row(value, valid_lengths={4}) + if group_row is None: + return None + str_fields = _decode_wire_str_fields(group_row, 0, 1) + items_raw = _as_list(group_row[3]) + signature = _decode_wire_structural_signature(group_row[2]) + if str_fields is None or items_raw is None or signature is None: + return None + finding_kind, finding_key = str_fields + items: list[StructuralFindingOccurrenceDict] = [] + for item_raw in items_raw: + item = _decode_wire_structural_occurrence(item_raw) + if item is None: + return None + items.append(item) + return StructuralFindingGroupDict( + finding_kind=finding_kind, + finding_key=finding_key, + signature=signature, + items=items, + ) + + +def _decode_wire_structural_signature(value: object) -> dict[str, str] | None: + sig_raw = _as_list(value) + if sig_raw is None: + return None + signature: dict[str, str] = {} + for pair in sig_raw: + pair_list = _as_list(pair) + if pair_list is None or len(pair_list) != 2: + return None + key = _as_str(pair_list[0]) + val = _as_str(pair_list[1]) + if key is None or val is None: + return None + signature[key] = val + return signature + + +def _decode_wire_structural_occurrence( + value: object, +) -> StructuralFindingOccurrenceDict | None: + item_list = _as_list(value) + if item_list is None or len(item_list) != 3: + return None + qualname = _as_str(item_list[0]) + start = _as_int(item_list[1]) + end = _as_int(item_list[2]) + if qualname is None or start is None or end is None: + return None + return StructuralFindingOccurrenceDict( + qualname=qualname, + start=start, + end=end, + ) + + +def _decode_wire_unit(value: object, filepath: str) -> UnitDict | None: + decoded = _decode_wire_named_span(value, valid_lengths={11, 17}) + if decoded is None: + return None + row, qualname, start_line, end_line = decoded + core_fields = _decode_wire_unit_core_fields(row) + flow_profiles = _decode_wire_unit_flow_profiles(row) + if core_fields is None or flow_profiles is None: + return None + ( + loc, + stmt_count, + fingerprint, + loc_bucket, + cyclomatic_complexity, + nesting_depth, + risk, + raw_hash, + ) = core_fields + ( + entry_guard_count, + entry_guard_terminal_profile, + entry_guard_has_side_effect_before, + terminal_kind, + try_finally_profile, + side_effect_order_profile, + ) = flow_profiles + return FunctionGroupItem( + qualname=qualname, + filepath=filepath, + start_line=start_line, + end_line=end_line, + loc=loc, + stmt_count=stmt_count, + fingerprint=fingerprint, + loc_bucket=loc_bucket, + cyclomatic_complexity=cyclomatic_complexity, + nesting_depth=nesting_depth, + risk=risk, + raw_hash=raw_hash, + entry_guard_count=entry_guard_count, + entry_guard_terminal_profile=entry_guard_terminal_profile, + entry_guard_has_side_effect_before=entry_guard_has_side_effect_before, + terminal_kind=terminal_kind, + try_finally_profile=try_finally_profile, + side_effect_order_profile=side_effect_order_profile, + ) + + +def _decode_wire_block(value: object, filepath: str) -> BlockDict | None: + decoded = _decode_wire_named_sized_span(value, valid_lengths={5}) + if decoded is None: + return None + row, qualname, start_line, end_line, size = decoded + block_hash = _as_str(row[4]) + if block_hash is None: + return None + + return BlockGroupItem( + block_hash=block_hash, + filepath=filepath, + qualname=qualname, + start_line=start_line, + end_line=end_line, + size=size, + ) + + +def _decode_wire_segment(value: object, filepath: str) -> SegmentDict | None: + decoded = _decode_wire_named_sized_span(value, valid_lengths={6}) + if decoded is None: + return None + row, qualname, start_line, end_line, size = decoded + segment_hash = _as_str(row[4]) + segment_sig = _as_str(row[5]) + if segment_hash is None or segment_sig is None: + return None + + return SegmentGroupItem( + segment_hash=segment_hash, + segment_sig=segment_sig, + filepath=filepath, + qualname=qualname, + start_line=start_line, + end_line=end_line, + size=size, + ) + + +def _decode_wire_class_metric( + value: object, + filepath: str, +) -> ClassMetricsDict | None: + decoded = _decode_wire_named_span(value, valid_lengths={9}) + if decoded is None: + return None + row, qualname, start_line, end_line = decoded + metric_fields = _decode_wire_class_metric_fields(row) + if metric_fields is None: + return None + cbo, lcom4, method_count, instance_var_count, risk_coupling, risk_cohesion = ( + metric_fields + ) + return ClassMetricsDict( + qualname=qualname, + filepath=filepath, + start_line=start_line, + end_line=end_line, + cbo=cbo, + lcom4=lcom4, + method_count=method_count, + instance_var_count=instance_var_count, + risk_coupling=risk_coupling, + risk_cohesion=risk_cohesion, + ) + + +def _decode_wire_module_dep(value: object) -> ModuleDepDict | None: + row = _as_list(value) + if row is None or len(row) != 4: + return None + source = _as_str(row[0]) + target = _as_str(row[1]) + import_type = _as_str(row[2]) + line = _as_int(row[3]) + if source is None or target is None or import_type is None or line is None: + return None + return ModuleDepDict( + source=source, + target=target, + import_type=import_type, + line=line, + ) + + +def _decode_wire_dead_candidate( + value: object, + filepath: str, +) -> DeadCandidateDict | None: + row = _decode_wire_row(value, valid_lengths={5, 6}) + if row is None: + return None + str_fields = _decode_wire_str_fields(row, 0, 1, 4) + int_fields = _decode_wire_int_fields(row, 2, 3) + suppressed_rules: list[str] | None = [] + if len(row) == 6: + raw_rules = _as_list(row[5]) + if raw_rules is None or not all(isinstance(rule, str) for rule in raw_rules): + return None + suppressed_rules = sorted({str(rule) for rule in raw_rules if str(rule)}) + if str_fields is None or int_fields is None: + return None + qualname, local_name, kind = str_fields + start_line, end_line = int_fields + decoded = DeadCandidateDict( + qualname=qualname, + local_name=local_name, + filepath=filepath, + start_line=start_line, + end_line=end_line, + kind=kind, + ) + if suppressed_rules: + decoded["suppressed_rules"] = suppressed_rules + return decoded + + +__all__ = [ + "_decode_optional_wire_api_surface", + "_decode_optional_wire_coupled_classes", + "_decode_optional_wire_docstring_coverage", + "_decode_optional_wire_items", + "_decode_optional_wire_items_for_filepath", + "_decode_optional_wire_module_ints", + "_decode_optional_wire_names", + "_decode_optional_wire_row", + "_decode_optional_wire_source_stats", + "_decode_optional_wire_typing_coverage", + "_decode_wire_api_param_spec", + "_decode_wire_api_surface_symbol", + "_decode_wire_block", + "_decode_wire_class_metric", + "_decode_wire_class_metric_fields", + "_decode_wire_dead_candidate", + "_decode_wire_file_entry", + "_decode_wire_file_sections", + "_decode_wire_int_fields", + "_decode_wire_module_dep", + "_decode_wire_name_sections", + "_decode_wire_named_sized_span", + "_decode_wire_named_span", + "_decode_wire_qualname_span", + "_decode_wire_qualname_span_size", + "_decode_wire_row", + "_decode_wire_segment", + "_decode_wire_stat", + "_decode_wire_str_fields", + "_decode_wire_structural_findings_optional", + "_decode_wire_structural_group", + "_decode_wire_structural_occurrence", + "_decode_wire_structural_signature", + "_decode_wire_unit", + "_decode_wire_unit_core_fields", + "_decode_wire_unit_flow_profiles", +] diff --git a/codeclone/cache/_wire_encode.py b/codeclone/cache/_wire_encode.py new file mode 100644 index 0000000..95745db --- /dev/null +++ b/codeclone/cache/_wire_encode.py @@ -0,0 +1,252 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ._canonicalize import _normalized_optional_string_list +from .entries import CacheEntry, ClassMetricsDict + + +def _encode_wire_file_entry(entry: CacheEntry) -> dict[str, object]: + wire: dict[str, object] = { + "st": [entry["stat"]["mtime_ns"], entry["stat"]["size"]], + } + source_stats = entry.get("source_stats") + if source_stats is not None: + wire["ss"] = [ + source_stats["lines"], + source_stats["functions"], + source_stats["methods"], + source_stats["classes"], + ] + + units = sorted( + entry["units"], + key=lambda unit: ( + unit["qualname"], + unit["start_line"], + unit["end_line"], + unit["fingerprint"], + ), + ) + if units: + wire["u"] = [ + [ + unit["qualname"], + unit["start_line"], + unit["end_line"], + unit["loc"], + unit["stmt_count"], + unit["fingerprint"], + unit["loc_bucket"], + unit.get("cyclomatic_complexity", 1), + unit.get("nesting_depth", 0), + unit.get("risk", "low"), + unit.get("raw_hash", ""), + unit.get("entry_guard_count", 0), + unit.get("entry_guard_terminal_profile", "none"), + 1 if unit.get("entry_guard_has_side_effect_before", False) else 0, + unit.get("terminal_kind", "fallthrough"), + unit.get("try_finally_profile", "none"), + unit.get("side_effect_order_profile", "none"), + ] + for unit in units + ] + + blocks = sorted( + entry["blocks"], + key=lambda block: ( + block["qualname"], + block["start_line"], + block["end_line"], + block["block_hash"], + ), + ) + if blocks: + wire["b"] = [ + [ + block["qualname"], + block["start_line"], + block["end_line"], + block["size"], + block["block_hash"], + ] + for block in blocks + ] + + segments = sorted( + entry["segments"], + key=lambda segment: ( + segment["qualname"], + segment["start_line"], + segment["end_line"], + segment["segment_hash"], + ), + ) + if segments: + wire["s"] = [ + [ + segment["qualname"], + segment["start_line"], + segment["end_line"], + segment["size"], + segment["segment_hash"], + segment["segment_sig"], + ] + for segment in segments + ] + + class_metrics = sorted( + entry["class_metrics"], + key=lambda metric: ( + metric["start_line"], + metric["end_line"], + metric["qualname"], + ), + ) + if class_metrics: + coupled_classes_rows: list[list[object]] = [] + + def _append_coupled_classes_row(metric: ClassMetricsDict) -> None: + coupled_classes = _normalized_optional_string_list( + metric.get("coupled_classes", []) + ) + if coupled_classes: + coupled_classes_rows.append([metric["qualname"], coupled_classes]) + + wire["cm"] = [ + [ + metric["qualname"], + metric["start_line"], + metric["end_line"], + metric["cbo"], + metric["lcom4"], + metric["method_count"], + metric["instance_var_count"], + metric["risk_coupling"], + metric["risk_cohesion"], + ] + for metric in class_metrics + ] + for metric in class_metrics: + _append_coupled_classes_row(metric) + if coupled_classes_rows: + wire["cc"] = coupled_classes_rows + + module_deps = sorted( + entry["module_deps"], + key=lambda dep: (dep["source"], dep["target"], dep["import_type"], dep["line"]), + ) + if module_deps: + wire["md"] = [ + [ + dep["source"], + dep["target"], + dep["import_type"], + dep["line"], + ] + for dep in module_deps + ] + + dead_candidates = sorted( + entry["dead_candidates"], + key=lambda candidate: ( + candidate["start_line"], + candidate["end_line"], + candidate["qualname"], + candidate["local_name"], + candidate["kind"], + ), + ) + if dead_candidates: + encoded_dead_candidates: list[list[object]] = [] + for candidate in dead_candidates: + encoded = [ + candidate["qualname"], + candidate["local_name"], + candidate["start_line"], + candidate["end_line"], + candidate["kind"], + ] + suppressed_rules = candidate.get("suppressed_rules", []) + normalized_rules = _normalized_optional_string_list(suppressed_rules) + if normalized_rules: + encoded.append(normalized_rules) + encoded_dead_candidates.append(encoded) + wire["dc"] = encoded_dead_candidates + + if entry["referenced_names"]: + wire["rn"] = sorted(set(entry["referenced_names"])) + if entry.get("referenced_qualnames"): + wire["rq"] = sorted(set(entry["referenced_qualnames"])) + if entry["import_names"]: + wire["in"] = sorted(set(entry["import_names"])) + if entry["class_names"]: + wire["cn"] = sorted(set(entry["class_names"])) + typing_coverage = entry.get("typing_coverage") + if typing_coverage is not None: + wire["tc"] = [ + typing_coverage["module"], + typing_coverage["callable_count"], + typing_coverage["params_total"], + typing_coverage["params_annotated"], + typing_coverage["returns_total"], + typing_coverage["returns_annotated"], + typing_coverage["any_annotation_count"], + ] + docstring_coverage = entry.get("docstring_coverage") + if docstring_coverage is not None: + wire["dg"] = [ + docstring_coverage["module"], + docstring_coverage["public_symbol_total"], + docstring_coverage["public_symbol_documented"], + ] + api_surface = entry.get("api_surface") + if api_surface is not None: + wire["as"] = [ + api_surface["module"], + sorted(set(api_surface.get("all_declared", []))), + [ + [ + symbol["qualname"], + symbol["kind"], + symbol["start_line"], + symbol["end_line"], + symbol.get("exported_via", "name"), + symbol.get("returns_hash", ""), + [ + [ + param["name"], + param["kind"], + 1 if param["has_default"] else 0, + param.get("annotation_hash", ""), + ] + for param in symbol.get("params", []) + ], + ] + for symbol in api_surface["symbols"] + ], + ] + + if "structural_findings" in entry: + structural_findings = entry.get("structural_findings", []) + wire["sf"] = [ + [ + group["finding_kind"], + group["finding_key"], + sorted(group["signature"].items()), + [ + [item["qualname"], item["start"], item["end"]] + for item in group["items"] + ], + ] + for group in structural_findings + ] + + return wire + + +__all__ = ["_encode_wire_file_entry"] diff --git a/codeclone/cache/_wire_helpers.py b/codeclone/cache/_wire_helpers.py new file mode 100644 index 0000000..3e987f7 --- /dev/null +++ b/codeclone/cache/_wire_helpers.py @@ -0,0 +1,307 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Callable, Collection +from typing import Literal, TypeVar + +from .entries import _as_risk_literal +from .integrity import ( + as_int_or_none as _as_int, +) +from .integrity import ( + as_object_list as _as_list, +) +from .integrity import ( + as_str_or_none as _as_str, +) +from .versioning import _DEFAULT_WIRE_UNIT_FLOW_PROFILES + +_DecodedItemT = TypeVar("_DecodedItemT") + + +def _decode_wire_qualname_span( + row: list[object], +) -> tuple[str, int, int] | None: + qualname = _as_str(row[0]) + start_line = _as_int(row[1]) + end_line = _as_int(row[2]) + if qualname is None or start_line is None or end_line is None: + return None + return qualname, start_line, end_line + + +def _decode_wire_qualname_span_size( + row: list[object], +) -> tuple[str, int, int, int] | None: + qualname_span = _decode_wire_qualname_span(row) + if qualname_span is None: + return None + size = _as_int(row[3]) + if size is None: + return None + qualname, start_line, end_line = qualname_span + return qualname, start_line, end_line, size + + +def _decode_optional_wire_items( + *, + obj: dict[str, object], + key: str, + decode_item: Callable[[object], _DecodedItemT | None], +) -> list[_DecodedItemT] | None: + raw_items = obj.get(key) + if raw_items is None: + return [] + wire_items = _as_list(raw_items) + if wire_items is None: + return None + decoded_items: list[_DecodedItemT] = [] + for wire_item in wire_items: + decoded = decode_item(wire_item) + if decoded is None: + return None + decoded_items.append(decoded) + return decoded_items + + +def _decode_optional_wire_items_for_filepath( + *, + obj: dict[str, object], + key: str, + filepath: str, + decode_item: Callable[[object, str], _DecodedItemT | None], +) -> list[_DecodedItemT] | None: + raw_items = obj.get(key) + if raw_items is None: + return [] + wire_items = _as_list(raw_items) + if wire_items is None: + return None + decoded_items: list[_DecodedItemT] = [] + for wire_item in wire_items: + decoded = decode_item(wire_item, filepath) + if decoded is None: + return None + decoded_items.append(decoded) + return decoded_items + + +def _decode_optional_wire_row( + *, + obj: dict[str, object], + key: str, + expected_len: int, +) -> list[object] | None: + raw = obj.get(key) + if raw is None: + return None + row = _as_list(raw) + if row is None or len(row) != expected_len: + return None + return row + + +def _decode_optional_wire_names( + *, + obj: dict[str, object], + key: str, +) -> list[str] | None: + raw_names = obj.get(key) + if raw_names is None: + return [] + names = _as_list(raw_names) + if names is None or not all(isinstance(name, str) for name in names): + return None + return [str(name) for name in names] + + +def _decode_optional_wire_coupled_classes( + *, + obj: dict[str, object], + key: str, +) -> dict[str, list[str]] | None: + raw = obj.get(key) + if raw is None: + return {} + + rows = _as_list(raw) + if rows is None: + return None + + decoded: dict[str, list[str]] = {} + for wire_row in rows: + row = _as_list(wire_row) + if row is None or len(row) != 2: + return None + qualname = _as_str(row[0]) + names = _as_list(row[1]) + if qualname is None or names is None: + return None + if not all(isinstance(name, str) for name in names): + return None + decoded[qualname] = sorted({str(name) for name in names if str(name)}) + + return decoded + + +def _decode_wire_row( + value: object, + *, + valid_lengths: Collection[int], +) -> list[object] | None: + row = _as_list(value) + if row is None or len(row) not in valid_lengths: + return None + return row + + +def _decode_wire_named_span( + value: object, + *, + valid_lengths: Collection[int], +) -> tuple[list[object], str, int, int] | None: + row = _decode_wire_row(value, valid_lengths=valid_lengths) + if row is None: + return None + span = _decode_wire_qualname_span(row) + if span is None: + return None + qualname, start_line, end_line = span + return row, qualname, start_line, end_line + + +def _decode_wire_named_sized_span( + value: object, + *, + valid_lengths: Collection[int], +) -> tuple[list[object], str, int, int, int] | None: + row = _decode_wire_row(value, valid_lengths=valid_lengths) + if row is None: + return None + span = _decode_wire_qualname_span_size(row) + if span is None: + return None + qualname, start_line, end_line, size = span + return row, qualname, start_line, end_line, size + + +def _decode_wire_int_fields( + row: list[object], + *indexes: int, +) -> tuple[int, ...] | None: + values: list[int] = [] + for index in indexes: + value = _as_int(row[index]) + if value is None: + return None + values.append(value) + return tuple(values) + + +def _decode_wire_str_fields( + row: list[object], + *indexes: int, +) -> tuple[str, ...] | None: + values: list[str] = [] + for index in indexes: + value = _as_str(row[index]) + if value is None: + return None + values.append(value) + return tuple(values) + + +def _decode_wire_unit_core_fields( + row: list[object], +) -> tuple[int, int, str, str, int, int, Literal["low", "medium", "high"], str] | None: + int_fields = _decode_wire_int_fields(row, 3, 4, 7, 8) + str_fields = _decode_wire_str_fields(row, 5, 6, 10) + risk = _as_risk_literal(row[9]) + if int_fields is None or str_fields is None or risk is None: + return None + loc, stmt_count, cyclomatic_complexity, nesting_depth = int_fields + fingerprint, loc_bucket, raw_hash = str_fields + return ( + loc, + stmt_count, + fingerprint, + loc_bucket, + cyclomatic_complexity, + nesting_depth, + risk, + raw_hash, + ) + + +def _decode_wire_unit_flow_profiles( + row: list[object], +) -> tuple[int, str, bool, str, str, str] | None: + if len(row) != 17: + return _DEFAULT_WIRE_UNIT_FLOW_PROFILES + + parsed_entry_guard_count = _as_int(row[11]) + parsed_entry_guard_terminal_profile = _as_str(row[12]) + parsed_entry_guard_has_side_effect_before = _as_int(row[13]) + parsed_terminal_kind = _as_str(row[14]) + parsed_try_finally_profile = _as_str(row[15]) + parsed_side_effect_order_profile = _as_str(row[16]) + if ( + parsed_entry_guard_count is None + or parsed_entry_guard_terminal_profile is None + or parsed_entry_guard_has_side_effect_before is None + or parsed_terminal_kind is None + or parsed_try_finally_profile is None + or parsed_side_effect_order_profile is None + ): + return None + return ( + max(0, parsed_entry_guard_count), + parsed_entry_guard_terminal_profile or "none", + parsed_entry_guard_has_side_effect_before != 0, + parsed_terminal_kind or "fallthrough", + parsed_try_finally_profile or "none", + parsed_side_effect_order_profile or "none", + ) + + +def _decode_wire_class_metric_fields( + row: list[object], +) -> tuple[int, int, int, int, str, str] | None: + int_fields = _decode_wire_int_fields(row, 3, 4, 5, 6) + str_fields = _decode_wire_str_fields(row, 7, 8) + if int_fields is None or str_fields is None: + return None + cbo, lcom4, method_count, instance_var_count = int_fields + risk_coupling, risk_cohesion = str_fields + return ( + cbo, + lcom4, + method_count, + instance_var_count, + risk_coupling, + risk_cohesion, + ) + + +__all__ = [ + "_decode_optional_wire_coupled_classes", + "_decode_optional_wire_items", + "_decode_optional_wire_items_for_filepath", + "_decode_optional_wire_names", + "_decode_optional_wire_row", + "_decode_wire_class_metric_fields", + "_decode_wire_int_fields", + "_decode_wire_named_sized_span", + "_decode_wire_named_span", + "_decode_wire_qualname_span", + "_decode_wire_qualname_span_size", + "_decode_wire_row", + "_decode_wire_str_fields", + "_decode_wire_unit_core_fields", + "_decode_wire_unit_flow_profiles", +] diff --git a/codeclone/cache/entries.py b/codeclone/cache/entries.py new file mode 100644 index 0000000..d704a22 --- /dev/null +++ b/codeclone/cache/entries.py @@ -0,0 +1,470 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Literal, TypedDict + +from ..findings.structural.detectors import normalize_structural_finding_group +from ..models import ( + BlockGroupItem, + BlockUnit, + ClassMetrics, + DeadCandidate, + FunctionGroupItem, + ModuleApiSurface, + ModuleDep, + ModuleDocstringCoverage, + ModuleTypingCoverage, + SegmentGroupItem, + SegmentUnit, + StructuralFindingGroup, + StructuralFindingOccurrence, + Unit, +) + + +class FileStat(TypedDict): + mtime_ns: int + size: int + + +class SourceStatsDict(TypedDict): + lines: int + functions: int + methods: int + classes: int + + +UnitDict = FunctionGroupItem +BlockDict = BlockGroupItem +SegmentDict = SegmentGroupItem + + +class ClassMetricsDictBase(TypedDict): + qualname: str + filepath: str + start_line: int + end_line: int + cbo: int + lcom4: int + method_count: int + instance_var_count: int + risk_coupling: str + risk_cohesion: str + + +class ClassMetricsDict(ClassMetricsDictBase, total=False): + coupled_classes: list[str] + + +class ModuleDepDict(TypedDict): + source: str + target: str + import_type: str + line: int + + +class DeadCandidateDictBase(TypedDict): + qualname: str + local_name: str + filepath: str + start_line: int + end_line: int + kind: str + + +class DeadCandidateDict(DeadCandidateDictBase, total=False): + suppressed_rules: list[str] + + +class ModuleTypingCoverageDict(TypedDict): + module: str + filepath: str + callable_count: int + params_total: int + params_annotated: int + returns_total: int + returns_annotated: int + any_annotation_count: int + + +class ModuleDocstringCoverageDict(TypedDict): + module: str + filepath: str + public_symbol_total: int + public_symbol_documented: int + + +class ApiParamSpecDict(TypedDict): + name: str + kind: str + has_default: bool + annotation_hash: str + + +class PublicSymbolDict(TypedDict): + qualname: str + kind: str + start_line: int + end_line: int + params: list[ApiParamSpecDict] + returns_hash: str + exported_via: str + + +class ModuleApiSurfaceDict(TypedDict): + module: str + filepath: str + all_declared: list[str] + symbols: list[PublicSymbolDict] + + +class StructuralFindingOccurrenceDict(TypedDict): + qualname: str + start: int + end: int + + +class StructuralFindingGroupDict(TypedDict): + finding_kind: str + finding_key: str + signature: dict[str, str] + items: list[StructuralFindingOccurrenceDict] + + +class _FileEntryBase(TypedDict): + stat: FileStat + units: list[UnitDict] + blocks: list[BlockDict] + segments: list[SegmentDict] + + +class _FileEntryV25(_FileEntryBase, total=False): + source_stats: SourceStatsDict + class_metrics: list[ClassMetricsDict] + module_deps: list[ModuleDepDict] + dead_candidates: list[DeadCandidateDict] + referenced_names: list[str] + referenced_qualnames: list[str] + import_names: list[str] + class_names: list[str] + typing_coverage: ModuleTypingCoverageDict + docstring_coverage: ModuleDocstringCoverageDict + api_surface: ModuleApiSurfaceDict + structural_findings: list[StructuralFindingGroupDict] + + +CacheEntryBase = _FileEntryBase +CacheEntry = _FileEntryV25 + + +def _normalize_cached_structural_group( + group: StructuralFindingGroupDict, + *, + filepath: str, +) -> StructuralFindingGroupDict | None: + signature = dict(group["signature"]) + finding_kind = group["finding_kind"] + finding_key = group["finding_key"] + normalized = normalize_structural_finding_group( + StructuralFindingGroup( + finding_kind=finding_kind, + finding_key=finding_key, + signature=signature, + items=tuple( + StructuralFindingOccurrence( + finding_kind=finding_kind, + finding_key=finding_key, + file_path=filepath, + qualname=item["qualname"], + start=item["start"], + end=item["end"], + signature=signature, + ) + for item in group["items"] + ), + ) + ) + if normalized is None: + return None + return StructuralFindingGroupDict( + finding_kind=normalized.finding_kind, + finding_key=normalized.finding_key, + signature=dict(normalized.signature), + items=[ + StructuralFindingOccurrenceDict( + qualname=item.qualname, + start=item.start, + end=item.end, + ) + for item in normalized.items + ], + ) + + +def _normalize_cached_structural_groups( + groups: Sequence[StructuralFindingGroupDict], + *, + filepath: str, +) -> list[StructuralFindingGroupDict]: + normalized = [ + candidate + for candidate in ( + _normalize_cached_structural_group(group, filepath=filepath) + for group in groups + ) + if candidate is not None + ] + normalized.sort(key=lambda group: (-len(group["items"]), group["finding_key"])) + return normalized + + +def _as_risk_literal(value: object) -> Literal["low", "medium", "high"] | None: + match value: + case "low": + return "low" + case "medium": + return "medium" + case "high": + return "high" + case _: + return None + + +def _new_optional_metrics_payload() -> tuple[ + list[ClassMetricsDict], + list[ModuleDepDict], + list[DeadCandidateDict], + list[str], + list[str], + list[str], + list[str], + ModuleTypingCoverageDict | None, + ModuleDocstringCoverageDict | None, + ModuleApiSurfaceDict | None, +]: + return [], [], [], [], [], [], [], None, None, None + + +def _unit_dict_from_model(unit: Unit, filepath: str) -> UnitDict: + return FunctionGroupItem( + qualname=unit.qualname, + filepath=filepath, + start_line=unit.start_line, + end_line=unit.end_line, + loc=unit.loc, + stmt_count=unit.stmt_count, + fingerprint=unit.fingerprint, + loc_bucket=unit.loc_bucket, + cyclomatic_complexity=unit.cyclomatic_complexity, + nesting_depth=unit.nesting_depth, + risk=unit.risk, + raw_hash=unit.raw_hash, + entry_guard_count=unit.entry_guard_count, + entry_guard_terminal_profile=unit.entry_guard_terminal_profile, + entry_guard_has_side_effect_before=unit.entry_guard_has_side_effect_before, + terminal_kind=unit.terminal_kind, + try_finally_profile=unit.try_finally_profile, + side_effect_order_profile=unit.side_effect_order_profile, + ) + + +def _block_dict_from_model(block: BlockUnit, filepath: str) -> BlockDict: + return BlockGroupItem( + block_hash=block.block_hash, + filepath=filepath, + qualname=block.qualname, + start_line=block.start_line, + end_line=block.end_line, + size=block.size, + ) + + +def _segment_dict_from_model(segment: SegmentUnit, filepath: str) -> SegmentDict: + return SegmentGroupItem( + segment_hash=segment.segment_hash, + segment_sig=segment.segment_sig, + filepath=filepath, + qualname=segment.qualname, + start_line=segment.start_line, + end_line=segment.end_line, + size=segment.size, + ) + + +def _typing_coverage_dict_from_model( + coverage: ModuleTypingCoverage | None, + *, + filepath: str, +) -> ModuleTypingCoverageDict | None: + if coverage is None: + return None + return ModuleTypingCoverageDict( + module=coverage.module, + filepath=filepath, + callable_count=coverage.callable_count, + params_total=coverage.params_total, + params_annotated=coverage.params_annotated, + returns_total=coverage.returns_total, + returns_annotated=coverage.returns_annotated, + any_annotation_count=coverage.any_annotation_count, + ) + + +def _docstring_coverage_dict_from_model( + coverage: ModuleDocstringCoverage | None, + *, + filepath: str, +) -> ModuleDocstringCoverageDict | None: + if coverage is None: + return None + return ModuleDocstringCoverageDict( + module=coverage.module, + filepath=filepath, + public_symbol_total=coverage.public_symbol_total, + public_symbol_documented=coverage.public_symbol_documented, + ) + + +def _api_surface_dict_from_model( + surface: ModuleApiSurface | None, + *, + filepath: str, +) -> ModuleApiSurfaceDict | None: + if surface is None: + return None + return ModuleApiSurfaceDict( + module=surface.module, + filepath=filepath, + all_declared=list(surface.all_declared or ()), + symbols=[ + PublicSymbolDict( + qualname=symbol.qualname, + kind=symbol.kind, + start_line=symbol.start_line, + end_line=symbol.end_line, + params=[ + ApiParamSpecDict( + name=param.name, + kind=param.kind, + has_default=param.has_default, + annotation_hash=param.annotation_hash, + ) + for param in symbol.params + ], + returns_hash=symbol.returns_hash, + exported_via=symbol.exported_via, + ) + for symbol in surface.symbols + ], + ) + + +def _class_metrics_dict_from_model( + metric: ClassMetrics, + filepath: str, +) -> ClassMetricsDict: + return ClassMetricsDict( + qualname=metric.qualname, + filepath=filepath, + start_line=metric.start_line, + end_line=metric.end_line, + cbo=metric.cbo, + lcom4=metric.lcom4, + method_count=metric.method_count, + instance_var_count=metric.instance_var_count, + risk_coupling=metric.risk_coupling, + risk_cohesion=metric.risk_cohesion, + coupled_classes=sorted(set(metric.coupled_classes)), + ) + + +def _module_dep_dict_from_model(dep: ModuleDep) -> ModuleDepDict: + return ModuleDepDict( + source=dep.source, + target=dep.target, + import_type=dep.import_type, + line=dep.line, + ) + + +def _dead_candidate_dict_from_model( + candidate: DeadCandidate, + filepath: str, +) -> DeadCandidateDict: + result = DeadCandidateDict( + qualname=candidate.qualname, + local_name=candidate.local_name, + filepath=filepath, + start_line=candidate.start_line, + end_line=candidate.end_line, + kind=candidate.kind, + ) + if candidate.suppressed_rules: + result["suppressed_rules"] = sorted(set(candidate.suppressed_rules)) + return result + + +def _structural_occurrence_dict_from_model( + occurrence: StructuralFindingOccurrence, +) -> StructuralFindingOccurrenceDict: + return StructuralFindingOccurrenceDict( + qualname=occurrence.qualname, + start=occurrence.start, + end=occurrence.end, + ) + + +def _structural_group_dict_from_model( + group: StructuralFindingGroup, +) -> StructuralFindingGroupDict: + return StructuralFindingGroupDict( + finding_kind=group.finding_kind, + finding_key=group.finding_key, + signature=dict(group.signature), + items=[ + _structural_occurrence_dict_from_model(occurrence) + for occurrence in group.items + ], + ) + + +__all__ = [ + "ApiParamSpecDict", + "BlockDict", + "CacheEntry", + "CacheEntryBase", + "ClassMetricsDict", + "DeadCandidateDict", + "FileStat", + "ModuleApiSurfaceDict", + "ModuleDepDict", + "ModuleDocstringCoverageDict", + "ModuleTypingCoverageDict", + "PublicSymbolDict", + "SegmentDict", + "SourceStatsDict", + "StructuralFindingGroupDict", + "StructuralFindingOccurrenceDict", + "UnitDict", + "_api_surface_dict_from_model", + "_as_risk_literal", + "_block_dict_from_model", + "_class_metrics_dict_from_model", + "_dead_candidate_dict_from_model", + "_docstring_coverage_dict_from_model", + "_module_dep_dict_from_model", + "_new_optional_metrics_payload", + "_normalize_cached_structural_group", + "_normalize_cached_structural_groups", + "_segment_dict_from_model", + "_structural_group_dict_from_model", + "_structural_occurrence_dict_from_model", + "_typing_coverage_dict_from_model", + "_unit_dict_from_model", +] diff --git a/codeclone/cache_io.py b/codeclone/cache/integrity.py similarity index 80% rename from codeclone/cache_io.py rename to codeclone/cache/integrity.py index c077cc8..12086b1 100644 --- a/codeclone/cache_io.py +++ b/codeclone/cache/integrity.py @@ -11,13 +11,9 @@ from collections.abc import Mapping from pathlib import Path -from ._json_io import ( - json_text as _json_text, -) -from ._json_io import ( - read_json_document as _read_json_document, -) -from ._json_io import ( +from ..utils.json_io import json_text as _json_text +from ..utils.json_io import read_json_document as _read_json_document +from ..utils.json_io import ( write_json_document_atomically as _write_json_document_atomically, ) @@ -64,3 +60,16 @@ def read_json_document(path: Path) -> object: def write_json_document_atomically(path: Path, document: object) -> None: _write_json_document_atomically(path, document, sort_keys=True) + + +__all__ = [ + "as_int_or_none", + "as_object_list", + "as_str_dict", + "as_str_or_none", + "canonical_json", + "read_json_document", + "sign_cache_payload", + "verify_cache_payload_signature", + "write_json_document_atomically", +] diff --git a/codeclone/cache_segments.py b/codeclone/cache/projection.py similarity index 82% rename from codeclone/cache_segments.py rename to codeclone/cache/projection.py index a771e51..49b2db3 100644 --- a/codeclone/cache_segments.py +++ b/codeclone/cache/projection.py @@ -10,18 +10,57 @@ from pathlib import Path from typing import TypedDict -from .cache_io import ( +from ..models import SegmentGroupItem +from .integrity import ( as_int_or_none, as_object_list, as_str_dict, as_str_or_none, ) -from .cache_paths import runtime_filepath_from_wire, wire_filepath_from_runtime -from .models import SegmentGroupItem SegmentDict = SegmentGroupItem +def wire_filepath_from_runtime( + runtime_filepath: str, + *, + root: Path | None, +) -> str: + runtime_path = Path(runtime_filepath) + if root is None: + return runtime_path.as_posix() + + try: + relative = runtime_path.relative_to(root) + return relative.as_posix() + except ValueError: + pass + + try: + relative = runtime_path.resolve().relative_to(root.resolve()) + return relative.as_posix() + except OSError: + return runtime_path.as_posix() + except ValueError: + return runtime_path.as_posix() + + +def runtime_filepath_from_wire( + wire_filepath: str, + *, + root: Path | None, +) -> str: + wire_path = Path(wire_filepath) + if root is None or wire_path.is_absolute(): + return str(wire_path) + + combined = root / wire_path + try: + return str(combined.resolve(strict=False)) + except OSError: + return str(combined) + + class SegmentReportProjection(TypedDict): digest: str suppressed: int @@ -182,3 +221,14 @@ def encode_segment_report_projection( "s": max(0, int(projection["suppressed"])), "g": groups_rows, } + + +__all__ = [ + "SegmentDict", + "SegmentReportProjection", + "build_segment_report_projection", + "decode_segment_report_projection", + "encode_segment_report_projection", + "runtime_filepath_from_wire", + "wire_filepath_from_runtime", +] diff --git a/codeclone/cache/store.py b/codeclone/cache/store.py new file mode 100644 index 0000000..0ad7612 --- /dev/null +++ b/codeclone/cache/store.py @@ -0,0 +1,616 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +import sys +from json import JSONDecodeError +from pathlib import Path + +from ..baseline.trust import current_python_tag +from ..contracts import BASELINE_FINGERPRINT_VERSION, CACHE_VERSION +from ..contracts.errors import CacheError +from ..models import BlockUnit, FileMetrics, SegmentUnit, StructuralFindingGroup, Unit +from ._canonicalize import ( + _as_file_stat_dict, + _as_typed_block_list, + _as_typed_segment_list, + _as_typed_unit_list, + _attach_optional_cache_sections, + _canonicalize_cache_entry, + _decode_optional_cache_sections, + _is_canonical_cache_entry, +) +from ._wire_decode import _decode_wire_file_entry +from ._wire_encode import _encode_wire_file_entry +from .entries import ( + CacheEntry, + FileStat, + SourceStatsDict, + _api_surface_dict_from_model, + _block_dict_from_model, + _class_metrics_dict_from_model, + _dead_candidate_dict_from_model, + _docstring_coverage_dict_from_model, + _module_dep_dict_from_model, + _new_optional_metrics_payload, + _normalize_cached_structural_groups, + _segment_dict_from_model, + _structural_group_dict_from_model, + _typing_coverage_dict_from_model, + _unit_dict_from_model, +) +from .integrity import ( + as_str_dict as _as_str_dict, +) +from .integrity import ( + as_str_or_none as _as_str, +) +from .integrity import ( + read_json_document, + sign_cache_payload, + verify_cache_payload_signature, + write_json_document_atomically, +) +from .projection import ( + SegmentReportProjection, + decode_segment_report_projection, + encode_segment_report_projection, + runtime_filepath_from_wire, + wire_filepath_from_runtime, +) +from .versioning import ( + LEGACY_CACHE_SECRET_FILENAME, + MAX_CACHE_SIZE_BYTES, + AnalysisProfile, + CacheData, + CacheStatus, + _as_analysis_profile, + _empty_cache_data, + _resolve_root, +) + + +def _default_max_cache_size_bytes() -> int: + public_module = sys.modules.get("codeclone.cache") + if public_module is not None: + candidate = getattr(public_module, "MAX_CACHE_SIZE_BYTES", MAX_CACHE_SIZE_BYTES) + if isinstance(candidate, int): + return candidate + return MAX_CACHE_SIZE_BYTES + + +class Cache: + __slots__ = ( + "_canonical_runtime_paths", + "_dirty", + "analysis_profile", + "cache_schema_version", + "data", + "fingerprint_version", + "legacy_secret_warning", + "load_status", + "load_warning", + "max_size_bytes", + "path", + "root", + "segment_report_projection", + ) + + _CACHE_VERSION = CACHE_VERSION + + def __init__( + self, + path: str | Path, + *, + root: str | Path | None = None, + max_size_bytes: int | None = None, + min_loc: int = 10, + min_stmt: int = 6, + block_min_loc: int = 20, + block_min_stmt: int = 8, + segment_min_loc: int = 20, + segment_min_stmt: int = 10, + collect_api_surface: bool = False, + ): + self.path = Path(path) + self.root = _resolve_root(root) + self.fingerprint_version = BASELINE_FINGERPRINT_VERSION + self.analysis_profile: AnalysisProfile = { + "min_loc": min_loc, + "min_stmt": min_stmt, + "block_min_loc": block_min_loc, + "block_min_stmt": block_min_stmt, + "segment_min_loc": segment_min_loc, + "segment_min_stmt": segment_min_stmt, + "collect_api_surface": collect_api_surface, + } + self.data: CacheData = _empty_cache_data( + version=self._CACHE_VERSION, + python_tag=current_python_tag(), + fingerprint_version=self.fingerprint_version, + analysis_profile=self.analysis_profile, + ) + self._canonical_runtime_paths: set[str] = set() + self.legacy_secret_warning = self._detect_legacy_secret_warning() + self.cache_schema_version: str | None = None + self.load_status = CacheStatus.MISSING + self.load_warning: str | None = self.legacy_secret_warning + self.max_size_bytes = ( + _default_max_cache_size_bytes() + if max_size_bytes is None + else max_size_bytes + ) + self.segment_report_projection: SegmentReportProjection | None = None + self._dirty: bool = True + + def _detect_legacy_secret_warning(self) -> str | None: + secret_path = self.path.parent / LEGACY_CACHE_SECRET_FILENAME + try: + if secret_path.exists(): + return ( + f"Legacy cache secret file detected at {secret_path}; " + "delete this obsolete file." + ) + except OSError as exc: + return f"Legacy cache secret check failed: {exc}" + return None + + def _set_load_warning(self, message: str | None) -> None: + warning = message + if warning is None: + warning = self.legacy_secret_warning + elif self.legacy_secret_warning: + warning = f"{warning}\n{self.legacy_secret_warning}" + self.load_warning = warning + + def _ignore_cache( + self, + message: str, + *, + status: CacheStatus, + schema_version: str | None = None, + ) -> None: + self._set_load_warning(message) + self.load_status = status + self.cache_schema_version = schema_version + self.data = _empty_cache_data( + version=self._CACHE_VERSION, + python_tag=current_python_tag(), + fingerprint_version=self.fingerprint_version, + analysis_profile=self.analysis_profile, + ) + self._canonical_runtime_paths = set() + self.segment_report_projection = None + + def _reject_cache_load( + self, + message: str, + *, + status: CacheStatus, + schema_version: str | None = None, + ) -> CacheData | None: + self._ignore_cache( + message, + status=status, + schema_version=schema_version, + ) + return None + + def _reject_invalid_cache_format( + self, + *, + schema_version: str | None = None, + ) -> CacheData | None: + return self._reject_cache_load( + "Cache format invalid; ignoring cache.", + status=CacheStatus.INVALID_TYPE, + schema_version=schema_version, + ) + + def _reject_version_mismatch(self, version: str) -> CacheData | None: + return self._reject_cache_load( + f"Cache version mismatch (found {version}); ignoring cache.", + status=CacheStatus.VERSION_MISMATCH, + schema_version=version, + ) + + def load(self) -> None: + try: + exists = self.path.exists() + except OSError as exc: + self._ignore_cache( + f"Cache unreadable; ignoring cache: {exc}", + status=CacheStatus.UNREADABLE, + ) + return + + if not exists: + self._set_load_warning(None) + self.load_status = CacheStatus.MISSING + self.cache_schema_version = None + self._canonical_runtime_paths = set() + self.segment_report_projection = None + return + + try: + size = self.path.stat().st_size + if size > self.max_size_bytes: + self._ignore_cache( + "Cache file too large " + f"({size} bytes, max {self.max_size_bytes}); ignoring cache.", + status=CacheStatus.TOO_LARGE, + ) + return + + raw_obj = read_json_document(self.path) + parsed = self._load_and_validate(raw_obj) + if parsed is None: + return + self.data = parsed + self._canonical_runtime_paths = set(parsed["files"].keys()) + self.load_status = CacheStatus.OK + self._set_load_warning(None) + self._dirty = False + except OSError as exc: + self._ignore_cache( + f"Cache unreadable; ignoring cache: {exc}", + status=CacheStatus.UNREADABLE, + ) + except JSONDecodeError: + self._ignore_cache( + "Cache corrupted; ignoring cache.", + status=CacheStatus.INVALID_JSON, + ) + + def _load_and_validate(self, raw_obj: object) -> CacheData | None: + raw = _as_str_dict(raw_obj) + if raw is None: + return self._reject_invalid_cache_format() + + legacy_version = _as_str(raw.get("version")) + if legacy_version is not None: + return self._reject_version_mismatch(legacy_version) + + version = _as_str(raw.get("v")) + if version is None: + return self._reject_invalid_cache_format() + + if version != self._CACHE_VERSION: + return self._reject_version_mismatch(version) + + sig = _as_str(raw.get("sig")) + payload = _as_str_dict(raw.get("payload")) + if sig is None or payload is None: + return self._reject_invalid_cache_format(schema_version=version) + + if not verify_cache_payload_signature(payload, sig): + return self._reject_cache_load( + "Cache signature mismatch; ignoring cache.", + status=CacheStatus.INTEGRITY_FAILED, + schema_version=version, + ) + + runtime_tag = current_python_tag() + py_tag = _as_str(payload.get("py")) + if py_tag is None: + return self._reject_invalid_cache_format(schema_version=version) + + if py_tag != runtime_tag: + return self._reject_cache_load( + "Cache python tag mismatch " + f"(found {py_tag}, expected {runtime_tag}); ignoring cache.", + status=CacheStatus.PYTHON_TAG_MISMATCH, + schema_version=version, + ) + + fp_version = _as_str(payload.get("fp")) + if fp_version is None: + return self._reject_invalid_cache_format(schema_version=version) + + if fp_version != self.fingerprint_version: + return self._reject_cache_load( + "Cache fingerprint version mismatch " + f"(found {fp_version}, expected {self.fingerprint_version}); " + "ignoring cache.", + status=CacheStatus.FINGERPRINT_MISMATCH, + schema_version=version, + ) + + analysis_profile = _as_analysis_profile(payload.get("ap")) + if analysis_profile is None: + return self._reject_invalid_cache_format(schema_version=version) + + if analysis_profile != self.analysis_profile: + return self._reject_cache_load( + "Cache analysis profile mismatch " + f"(found min_loc={analysis_profile['min_loc']}, " + f"min_stmt={analysis_profile['min_stmt']}, " + "collect_api_surface=" + f"{str(analysis_profile['collect_api_surface']).lower()}; " + f"expected min_loc={self.analysis_profile['min_loc']}, " + f"min_stmt={self.analysis_profile['min_stmt']}, " + "collect_api_surface=" + f"{str(self.analysis_profile['collect_api_surface']).lower()}); " + "ignoring cache.", + status=CacheStatus.ANALYSIS_PROFILE_MISMATCH, + schema_version=version, + ) + + files_dict = _as_str_dict(payload.get("files")) + if files_dict is None: + return self._reject_invalid_cache_format(schema_version=version) + + parsed_files: dict[str, CacheEntry] = {} + for wire_path, file_entry_obj in files_dict.items(): + runtime_path = runtime_filepath_from_wire(wire_path, root=self.root) + parsed_entry = self._decode_entry(file_entry_obj, runtime_path) + if parsed_entry is None: + return self._reject_invalid_cache_format(schema_version=version) + parsed_files[runtime_path] = _canonicalize_cache_entry(parsed_entry) + self.segment_report_projection = decode_segment_report_projection( + payload.get("sr"), + root=self.root, + ) + + self.cache_schema_version = version + return CacheData( + version=self._CACHE_VERSION, + python_tag=runtime_tag, + fingerprint_version=self.fingerprint_version, + analysis_profile=self.analysis_profile, + files=parsed_files, + ) + + def save(self) -> None: + if not self._dirty: + return + try: + wire_files: dict[str, object] = {} + wire_map = { + runtime_path: wire_filepath_from_runtime(runtime_path, root=self.root) + for runtime_path in self.data["files"] + } + for runtime_path in sorted(self.data["files"], key=wire_map.__getitem__): + entry = self.get_file_entry(runtime_path) + if entry is None: + continue + wire_files[wire_map[runtime_path]] = self._encode_entry(entry) + + payload: dict[str, object] = { + "py": current_python_tag(), + "fp": self.fingerprint_version, + "ap": self.analysis_profile, + "files": wire_files, + } + segment_projection = encode_segment_report_projection( + self.segment_report_projection, + root=self.root, + ) + if segment_projection is not None: + payload["sr"] = segment_projection + signed_doc = { + "v": self._CACHE_VERSION, + "payload": payload, + "sig": sign_cache_payload(payload), + } + write_json_document_atomically(self.path, signed_doc) + self._dirty = False + + self.data["version"] = self._CACHE_VERSION + self.data["python_tag"] = current_python_tag() + self.data["fingerprint_version"] = self.fingerprint_version + self.data["analysis_profile"] = self.analysis_profile + except OSError as exc: + raise CacheError(f"Failed to save cache: {exc}") from exc + + @staticmethod + def _decode_entry(value: object, filepath: str) -> CacheEntry | None: + return _decode_wire_file_entry(value, filepath) + + @staticmethod + def _encode_entry(entry: CacheEntry) -> dict[str, object]: + return _encode_wire_file_entry(entry) + + def _store_canonical_file_entry( + self, + *, + runtime_path: str, + canonical_entry: CacheEntry, + ) -> CacheEntry: + previous_entry = self.data["files"].get(runtime_path) + was_canonical = runtime_path in self._canonical_runtime_paths + self.data["files"][runtime_path] = canonical_entry + self._canonical_runtime_paths.add(runtime_path) + if not was_canonical or previous_entry != canonical_entry: + self._dirty = True + return canonical_entry + + def get_file_entry(self, filepath: str) -> CacheEntry | None: + runtime_lookup_key = filepath + entry_obj = self.data["files"].get(runtime_lookup_key) + if entry_obj is None: + wire_key = wire_filepath_from_runtime(filepath, root=self.root) + runtime_lookup_key = runtime_filepath_from_wire(wire_key, root=self.root) + entry_obj = self.data["files"].get(runtime_lookup_key) + + if entry_obj is None: + return None + + if runtime_lookup_key in self._canonical_runtime_paths: + if _is_canonical_cache_entry(entry_obj): + return entry_obj + self._canonical_runtime_paths.discard(runtime_lookup_key) + + if not isinstance(entry_obj, dict): + return None + + stat = _as_file_stat_dict(entry_obj.get("stat")) + units = _as_typed_unit_list(entry_obj.get("units")) + blocks = _as_typed_block_list(entry_obj.get("blocks")) + segments = _as_typed_segment_list(entry_obj.get("segments")) + if stat is None or units is None or blocks is None or segments is None: + return None + + optional_sections = _decode_optional_cache_sections(entry_obj) + if optional_sections is None: + return None + ( + class_metrics_raw, + module_deps_raw, + dead_candidates_raw, + referenced_names_raw, + referenced_qualnames_raw, + import_names_raw, + class_names_raw, + typing_coverage_raw, + docstring_coverage_raw, + api_surface_raw, + source_stats, + structural_findings, + ) = optional_sections + + entry_to_canonicalize: CacheEntry = _attach_optional_cache_sections( + CacheEntry( + stat=stat, + units=units, + blocks=blocks, + segments=segments, + class_metrics=class_metrics_raw, + module_deps=module_deps_raw, + dead_candidates=dead_candidates_raw, + referenced_names=referenced_names_raw, + referenced_qualnames=referenced_qualnames_raw, + import_names=import_names_raw, + class_names=class_names_raw, + ), + typing_coverage=typing_coverage_raw, + docstring_coverage=docstring_coverage_raw, + api_surface=api_surface_raw, + source_stats=source_stats, + structural_findings=structural_findings, + ) + canonical_entry = _canonicalize_cache_entry(entry_to_canonicalize) + return self._store_canonical_file_entry( + runtime_path=runtime_lookup_key, + canonical_entry=canonical_entry, + ) + + def put_file_entry( + self, + filepath: str, + stat_sig: FileStat, + units: list[Unit], + blocks: list[BlockUnit], + segments: list[SegmentUnit], + *, + source_stats: SourceStatsDict | None = None, + file_metrics: FileMetrics | None = None, + structural_findings: list[StructuralFindingGroup] | None = None, + ) -> None: + runtime_path = runtime_filepath_from_wire( + wire_filepath_from_runtime(filepath, root=self.root), + root=self.root, + ) + + unit_rows = [_unit_dict_from_model(unit, runtime_path) for unit in units] + block_rows = [_block_dict_from_model(block, runtime_path) for block in blocks] + segment_rows = [ + _segment_dict_from_model(segment, runtime_path) for segment in segments + ] + + ( + class_metrics_rows, + module_dep_rows, + dead_candidate_rows, + referenced_names, + referenced_qualnames, + import_names, + class_names, + typing_coverage, + docstring_coverage, + api_surface, + ) = _new_optional_metrics_payload() + if file_metrics is not None: + class_metrics_rows = [ + _class_metrics_dict_from_model(metric, runtime_path) + for metric in file_metrics.class_metrics + ] + module_dep_rows = [ + _module_dep_dict_from_model(dep) for dep in file_metrics.module_deps + ] + dead_candidate_rows = [ + _dead_candidate_dict_from_model(candidate, runtime_path) + for candidate in file_metrics.dead_candidates + ] + referenced_names = sorted(set(file_metrics.referenced_names)) + referenced_qualnames = sorted(set(file_metrics.referenced_qualnames)) + import_names = sorted(set(file_metrics.import_names)) + class_names = sorted(set(file_metrics.class_names)) + typing_coverage = _typing_coverage_dict_from_model( + file_metrics.typing_coverage, + filepath=runtime_path, + ) + docstring_coverage = _docstring_coverage_dict_from_model( + file_metrics.docstring_coverage, + filepath=runtime_path, + ) + api_surface = _api_surface_dict_from_model( + file_metrics.api_surface, + filepath=runtime_path, + ) + + source_stats_payload = source_stats or SourceStatsDict( + lines=0, + functions=0, + methods=0, + classes=0, + ) + entry_dict = CacheEntry( + stat=stat_sig, + source_stats=source_stats_payload, + units=unit_rows, + blocks=block_rows, + segments=segment_rows, + class_metrics=class_metrics_rows, + module_deps=module_dep_rows, + dead_candidates=dead_candidate_rows, + referenced_names=referenced_names, + referenced_qualnames=referenced_qualnames, + import_names=import_names, + class_names=class_names, + ) + if typing_coverage is not None: + entry_dict["typing_coverage"] = typing_coverage + if docstring_coverage is not None: + entry_dict["docstring_coverage"] = docstring_coverage + if api_surface is not None: + entry_dict["api_surface"] = api_surface + if structural_findings is not None: + entry_dict["structural_findings"] = _normalize_cached_structural_groups( + [ + _structural_group_dict_from_model(group) + for group in structural_findings + ], + filepath=runtime_path, + ) + canonical_entry = _canonicalize_cache_entry(entry_dict) + self._store_canonical_file_entry( + runtime_path=runtime_path, + canonical_entry=canonical_entry, + ) + + +def file_stat_signature(path: str) -> FileStat: + stat_result = os.stat(path) + return FileStat( + mtime_ns=stat_result.st_mtime_ns, + size=stat_result.st_size, + ) + + +__all__ = ["Cache", "file_stat_signature"] diff --git a/codeclone/cache/versioning.py b/codeclone/cache/versioning.py new file mode 100644 index 0000000..d204994 --- /dev/null +++ b/codeclone/cache/versioning.py @@ -0,0 +1,136 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from enum import Enum +from pathlib import Path +from typing import TypedDict + +from ..contracts import CACHE_VERSION +from ..contracts.schemas import AnalysisProfile +from .entries import CacheEntry +from .integrity import as_int_or_none, as_str_dict + +MAX_CACHE_SIZE_BYTES = 50 * 1024 * 1024 +LEGACY_CACHE_SECRET_FILENAME = ".cache_secret" +_DEFAULT_WIRE_UNIT_FLOW_PROFILES = ( + 0, + "none", + False, + "fallthrough", + "none", + "none", +) + + +class CacheStatus(str, Enum): + OK = "ok" + MISSING = "missing" + TOO_LARGE = "too_large" + UNREADABLE = "unreadable" + INVALID_JSON = "invalid_json" + INVALID_TYPE = "invalid_type" + VERSION_MISMATCH = "version_mismatch" + PYTHON_TAG_MISMATCH = "python_tag_mismatch" + FINGERPRINT_MISMATCH = "mismatch_fingerprint_version" + ANALYSIS_PROFILE_MISMATCH = "analysis_profile_mismatch" + INTEGRITY_FAILED = "integrity_failed" + + +class CacheData(TypedDict): + version: str + python_tag: str + fingerprint_version: str + analysis_profile: AnalysisProfile + files: dict[str, CacheEntry] + + +def _empty_cache_data( + *, + version: str = CACHE_VERSION, + python_tag: str, + fingerprint_version: str, + analysis_profile: AnalysisProfile, +) -> CacheData: + return CacheData( + version=version, + python_tag=python_tag, + fingerprint_version=fingerprint_version, + analysis_profile=analysis_profile, + files={}, + ) + + +def _as_analysis_profile(value: object) -> AnalysisProfile | None: + obj = as_str_dict(value) + if obj is None: + return None + + required = { + "min_loc", + "min_stmt", + "block_min_loc", + "block_min_stmt", + "segment_min_loc", + "segment_min_stmt", + } + if set(obj.keys()) < required: + return None + + min_loc = as_int_or_none(obj.get("min_loc")) + min_stmt = as_int_or_none(obj.get("min_stmt")) + block_min_loc = as_int_or_none(obj.get("block_min_loc")) + block_min_stmt = as_int_or_none(obj.get("block_min_stmt")) + segment_min_loc = as_int_or_none(obj.get("segment_min_loc")) + segment_min_stmt = as_int_or_none(obj.get("segment_min_stmt")) + collect_api_surface_raw = obj.get("collect_api_surface", False) + collect_api_surface = ( + collect_api_surface_raw if isinstance(collect_api_surface_raw, bool) else None + ) + if ( + min_loc is None + or min_stmt is None + or block_min_loc is None + or block_min_stmt is None + or segment_min_loc is None + or segment_min_stmt is None + or collect_api_surface is None + ): + return None + + return AnalysisProfile( + min_loc=min_loc, + min_stmt=min_stmt, + block_min_loc=block_min_loc, + block_min_stmt=block_min_stmt, + segment_min_loc=segment_min_loc, + segment_min_stmt=segment_min_stmt, + collect_api_surface=collect_api_surface, + ) + + +def _resolve_root(root: str | Path | None) -> Path | None: + if root is None: + return None + try: + return Path(root).resolve(strict=False) + except OSError: + return None + + +__all__ = [ + "CACHE_VERSION", + "LEGACY_CACHE_SECRET_FILENAME", + "MAX_CACHE_SIZE_BYTES", + "_DEFAULT_WIRE_UNIT_FLOW_PROFILES", + "AnalysisProfile", + "CacheData", + "CacheStatus", + "_as_analysis_profile", + "_empty_cache_data", + "_resolve_root", +] diff --git a/codeclone/cache_paths.py b/codeclone/cache_paths.py deleted file mode 100644 index 8de7c63..0000000 --- a/codeclone/cache_paths.py +++ /dev/null @@ -1,49 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from pathlib import Path - - -def wire_filepath_from_runtime( - runtime_filepath: str, - *, - root: Path | None, -) -> str: - runtime_path = Path(runtime_filepath) - if root is None: - return runtime_path.as_posix() - - try: - relative = runtime_path.relative_to(root) - return relative.as_posix() - except ValueError: - pass - - try: - relative = runtime_path.resolve().relative_to(root.resolve()) - return relative.as_posix() - except OSError: - return runtime_path.as_posix() - except ValueError: - return runtime_path.as_posix() - - -def runtime_filepath_from_wire( - wire_filepath: str, - *, - root: Path | None, -) -> str: - wire_path = Path(wire_filepath) - if root is None or wire_path.is_absolute(): - return str(wire_path) - - combined = root / wire_path - try: - return str(combined.resolve(strict=False)) - except OSError: - return str(combined) diff --git a/codeclone/cli.py b/codeclone/cli.py deleted file mode 100644 index 09ac8c5..0000000 --- a/codeclone/cli.py +++ /dev/null @@ -1,1741 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import os -import subprocess -import sys -import time -from collections.abc import Mapping, Sequence -from dataclasses import dataclass -from pathlib import Path -from typing import TYPE_CHECKING, Literal, Protocol, cast - -from . import __version__, _coerce -from . import ui_messages as ui -from ._cli_args import build_parser -from ._cli_baselines import ( - CloneBaselineState as _CloneBaselineStateImpl, -) -from ._cli_baselines import ( - MetricsBaselineSectionProbe as _MetricsBaselineSectionProbeImpl, -) -from ._cli_baselines import ( - MetricsBaselineState as _MetricsBaselineStateImpl, -) -from ._cli_baselines import ( - probe_metrics_baseline_section as _probe_metrics_baseline_section_impl, -) -from ._cli_baselines import ( - resolve_clone_baseline_state as _resolve_clone_baseline_state_impl, -) -from ._cli_baselines import ( - resolve_metrics_baseline_state as _resolve_metrics_baseline_state_impl, -) -from ._cli_config import ( - ConfigValidationError, - apply_pyproject_config_overrides, - collect_explicit_cli_dests, - load_pyproject_config, -) -from ._cli_gating import ( - parse_metric_reason_entry as _parse_metric_reason_entry_impl, -) -from ._cli_gating import ( - print_gating_failure_block as _print_gating_failure_block_impl, -) -from ._cli_paths import _validate_output_path -from ._cli_reports import ( - write_report_outputs as _write_report_outputs_impl, -) -from ._cli_rich import ( - PlainConsole as _PlainConsole, -) -from ._cli_rich import ( - make_console as _make_rich_console, -) -from ._cli_rich import ( - make_plain_console as _make_plain_console_impl, -) -from ._cli_rich import ( - print_banner as _print_banner_impl, -) -from ._cli_rich import ( - rich_progress_symbols as _rich_progress_symbols_impl, -) -from ._cli_runtime import ( - configure_metrics_mode as _configure_metrics_mode_impl, -) -from ._cli_runtime import ( - metrics_computed as _metrics_computed_impl, -) -from ._cli_runtime import ( - print_failed_files as _print_failed_files_impl, -) -from ._cli_runtime import ( - resolve_cache_path as _resolve_cache_path_impl, -) -from ._cli_runtime import ( - resolve_cache_status as _resolve_cache_status_impl, -) -from ._cli_runtime import ( - validate_numeric_args as _validate_numeric_args_impl, -) -from ._cli_summary import ( - ChangedScopeSnapshot, - MetricsSnapshot, - _print_changed_scope, - _print_metrics, - _print_summary, -) -from ._git_diff import validate_git_diff_ref -from .baseline import Baseline -from .cache import Cache, CacheStatus, build_segment_report_projection -from .contracts import ( - DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, - DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, - DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - ISSUES_URL, - ExitCode, -) -from .errors import CacheError - -if TYPE_CHECKING: - from argparse import Namespace - from collections.abc import Callable, Mapping, Sequence - from types import ModuleType - - from rich.console import Console as RichConsole - from rich.progress import BarColumn as RichBarColumn - from rich.progress import Progress as RichProgress - from rich.progress import SpinnerColumn as RichSpinnerColumn - from rich.progress import TextColumn as RichTextColumn - from rich.progress import TimeElapsedColumn as RichTimeElapsedColumn - - from ._cli_baselines import _BaselineArgs as _BaselineArgsLike - from ._cli_gating import _GatingArgs as _GatingArgsLike - from ._cli_reports import _QuietArgs as _QuietArgsLike - from ._cli_runtime import _RuntimeArgs as _RuntimeArgsLike - from .models import MetricsDiff - from .normalize import NormalizationConfig - from .pipeline import ( - AnalysisResult, - BootstrapResult, - DiscoveryResult, - GatingResult, - ReportArtifacts, - ) - from .pipeline import ( - OutputPaths as PipelineOutputPaths, - ) - from .pipeline import ( - ProcessingResult as PipelineProcessingResult, - ) - -MAX_FILE_SIZE = 10 * 1024 * 1024 -__all__ = [ - "MAX_FILE_SIZE", - "ExitCode", - "ProcessingResult", - "analyze", - "bootstrap", - "discover", - "gate", - "main", - "process", - "process_file", - "report", -] - -# Lazy singleton for pipeline module — deferred import to keep CLI startup fast. -# Tests monkeypatch this via _pipeline_module() to inject mocks. -_PIPELINE_MODULE: ModuleType | None = None - - -def _pipeline_module() -> ModuleType: - global _PIPELINE_MODULE - if _PIPELINE_MODULE is None: - from . import pipeline as _pipeline - - _PIPELINE_MODULE = _pipeline - return _PIPELINE_MODULE - - -@dataclass(frozen=True, slots=True) -class OutputPaths: - html: Path | None = None - json: Path | None = None - text: Path | None = None - md: Path | None = None - sarif: Path | None = None - - -@dataclass(frozen=True, slots=True) -class ProcessingResult: - filepath: str - success: bool - error: str | None = None - units: list[object] | None = None - blocks: list[object] | None = None - segments: list[object] | None = None - lines: int = 0 - functions: int = 0 - methods: int = 0 - classes: int = 0 - stat: Mapping[str, int] | None = None - error_kind: str | None = None - file_metrics: object | None = None - structural_findings: list[object] | None = None - - -@dataclass(frozen=True, slots=True) -class ChangedCloneGate: - changed_paths: tuple[str, ...] - new_func: frozenset[str] - new_block: frozenset[str] - total_clone_groups: int - findings_total: int - findings_new: int - findings_known: int - - -_as_mapping = _coerce.as_mapping -_as_int = _coerce.as_int -_as_sequence = _coerce.as_sequence - - -def _validate_changed_scope_args(*, args: Namespace) -> str | None: - if args.diff_against and args.paths_from_git_diff: - console.print( - ui.fmt_contract_error( - "Use --diff-against or --paths-from-git-diff, not both." - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - if args.paths_from_git_diff: - args.changed_only = True - return str(args.paths_from_git_diff) - if args.diff_against and not args.changed_only: - console.print(ui.fmt_contract_error("--diff-against requires --changed-only.")) - sys.exit(ExitCode.CONTRACT_ERROR) - if args.changed_only and not args.diff_against: - console.print( - ui.fmt_contract_error( - "--changed-only requires --diff-against or --paths-from-git-diff." - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - return str(args.diff_against) if args.diff_against else None - - -def _normalize_changed_paths( - *, - root_path: Path, - paths: Sequence[str], -) -> tuple[str, ...]: - normalized: set[str] = set() - for raw_path in paths: - candidate = raw_path.strip() - if not candidate: - continue - candidate_path = Path(candidate) - try: - absolute_path = ( - candidate_path.resolve() - if candidate_path.is_absolute() - else (root_path / candidate_path).resolve() - ) - except OSError as exc: - console.print( - ui.fmt_contract_error( - f"Unable to resolve changed path '{candidate}': {exc}" - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - try: - relative_path = absolute_path.relative_to(root_path) - except ValueError: - console.print( - ui.fmt_contract_error( - f"Changed path '{candidate}' is outside the scan root." - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - cleaned = str(relative_path).replace("\\", "/").strip("/") - if cleaned: - normalized.add(cleaned) - return tuple(sorted(normalized)) - - -def _git_diff_changed_paths(*, root_path: Path, git_diff_ref: str) -> tuple[str, ...]: - try: - validated_ref = validate_git_diff_ref(git_diff_ref) - except ValueError as exc: - console.print(ui.fmt_contract_error(str(exc))) - sys.exit(ExitCode.CONTRACT_ERROR) - try: - completed = subprocess.run( - ["git", "diff", "--name-only", validated_ref, "--"], - cwd=str(root_path), - check=True, - capture_output=True, - text=True, - timeout=30, - ) - except ( - FileNotFoundError, - subprocess.CalledProcessError, - subprocess.TimeoutExpired, - ) as exc: - console.print( - ui.fmt_contract_error( - "Unable to resolve changed files from git diff ref " - f"'{validated_ref}': {exc}" - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - lines = [line.strip() for line in completed.stdout.splitlines() if line.strip()] - return _normalize_changed_paths(root_path=root_path, paths=lines) - - -def _path_matches(relative_path: str, changed_paths: Sequence[str]) -> bool: - return any( - relative_path == candidate or relative_path.startswith(candidate + "/") - for candidate in changed_paths - ) - - -def _flatten_report_findings( - report_document: Mapping[str, object], -) -> list[dict[str, object]]: - findings = _as_mapping(report_document.get("findings")) - groups = _as_mapping(findings.get("groups")) - clone_groups = _as_mapping(groups.get("clones")) - return [ - *[ - dict(_as_mapping(item)) - for item in _as_sequence(clone_groups.get("functions")) - ], - *[dict(_as_mapping(item)) for item in _as_sequence(clone_groups.get("blocks"))], - *[ - dict(_as_mapping(item)) - for item in _as_sequence(clone_groups.get("segments")) - ], - *[ - dict(_as_mapping(item)) - for item in _as_sequence( - _as_mapping(groups.get("structural")).get("groups") - ) - ], - *[ - dict(_as_mapping(item)) - for item in _as_sequence(_as_mapping(groups.get("dead_code")).get("groups")) - ], - *[ - dict(_as_mapping(item)) - for item in _as_sequence(_as_mapping(groups.get("design")).get("groups")) - ], - ] - - -def _finding_touches_changed_paths( - finding: Mapping[str, object], - *, - changed_paths: Sequence[str], -) -> bool: - for item in _as_sequence(finding.get("items")): - relative_path = str(_as_mapping(item).get("relative_path", "")).strip() - if relative_path and _path_matches(relative_path, changed_paths): - return True - return False - - -def _changed_clone_gate_from_report( - report_document: Mapping[str, object], - *, - changed_paths: Sequence[str], -) -> ChangedCloneGate: - findings = [ - finding - for finding in _flatten_report_findings(report_document) - if _finding_touches_changed_paths(finding, changed_paths=changed_paths) - ] - clone_findings = [ - finding - for finding in findings - if str(finding.get("family", "")).strip() == "clone" - and str(finding.get("category", "")).strip() in {"function", "block"} - ] - new_func = frozenset( - str(finding.get("id", "")) - for finding in clone_findings - if str(finding.get("category", "")).strip() == "function" - and str(finding.get("novelty", "")).strip() == "new" - ) - new_block = frozenset( - str(finding.get("id", "")) - for finding in clone_findings - if str(finding.get("category", "")).strip() == "block" - and str(finding.get("novelty", "")).strip() == "new" - ) - findings_new = sum( - 1 for finding in findings if str(finding.get("novelty", "")).strip() == "new" - ) - findings_known = sum( - 1 for finding in findings if str(finding.get("novelty", "")).strip() == "known" - ) - return ChangedCloneGate( - changed_paths=tuple(changed_paths), - new_func=new_func, - new_block=new_block, - total_clone_groups=len(clone_findings), - findings_total=len(findings), - findings_new=findings_new, - findings_known=findings_known, - ) - - -def process_file( - filepath: str, - root: str, - cfg: NormalizationConfig, - min_loc: int, - min_stmt: int, - collect_structural_findings: bool = True, -) -> ProcessingResult: - pipeline_mod = _pipeline_module() - result = pipeline_mod.process_file( - filepath, - root, - cfg, - min_loc, - min_stmt, - collect_structural_findings, - ) - return cast("ProcessingResult", result) - - -def bootstrap( - *, - args: Namespace, - root: Path, - output_paths: PipelineOutputPaths | OutputPaths, - cache_path: Path, -) -> BootstrapResult: - return cast( - "BootstrapResult", - _pipeline_module().bootstrap( - args=args, - root=root, - output_paths=output_paths, - cache_path=cache_path, - ), - ) - - -def discover(*, boot: BootstrapResult, cache: Cache) -> DiscoveryResult: - return cast("DiscoveryResult", _pipeline_module().discover(boot=boot, cache=cache)) - - -def process( - *, - boot: BootstrapResult, - discovery: DiscoveryResult, - cache: Cache, - on_advance: Callable[[], None] | None = None, - on_worker_error: Callable[[str], None] | None = None, - on_parallel_fallback: Callable[[Exception], None] | None = None, -) -> PipelineProcessingResult: - return cast( - "PipelineProcessingResult", - _pipeline_module().process( - boot=boot, - discovery=discovery, - cache=cache, - on_advance=on_advance, - on_worker_error=on_worker_error, - on_parallel_fallback=on_parallel_fallback, - ), - ) - - -def analyze( - *, - boot: BootstrapResult, - discovery: DiscoveryResult, - processing: PipelineProcessingResult, -) -> AnalysisResult: - return cast( - "AnalysisResult", - _pipeline_module().analyze( - boot=boot, - discovery=discovery, - processing=processing, - ), - ) - - -def report( - *, - boot: BootstrapResult, - discovery: DiscoveryResult, - processing: PipelineProcessingResult, - analysis: AnalysisResult, - report_meta: Mapping[str, object], - new_func: set[str], - new_block: set[str], - html_builder: Callable[..., str] | None = None, - metrics_diff: MetricsDiff | None = None, - coverage_adoption_diff_available: bool = False, - api_surface_diff_available: bool = False, - include_report_document: bool = False, -) -> ReportArtifacts: - return cast( - "ReportArtifacts", - _pipeline_module().report( - boot=boot, - discovery=discovery, - processing=processing, - analysis=analysis, - report_meta=report_meta, - new_func=new_func, - new_block=new_block, - html_builder=html_builder, - metrics_diff=metrics_diff, - coverage_adoption_diff_available=coverage_adoption_diff_available, - api_surface_diff_available=api_surface_diff_available, - include_report_document=include_report_document, - ), - ) - - -def gate( - *, - boot: BootstrapResult, - analysis: AnalysisResult, - new_func: set[str], - new_block: set[str], - metrics_diff: MetricsDiff | None, -) -> GatingResult: - return cast( - "GatingResult", - _pipeline_module().gate( - boot=boot, - analysis=analysis, - new_func=new_func, - new_block=new_block, - metrics_diff=metrics_diff, - ), - ) - - -class _PrinterLike(Protocol): - def print(self, *objects: object, **kwargs: object) -> None: ... - - -LEGACY_CACHE_PATH = Path("~/.cache/codeclone/cache.json").expanduser() -ReportPathOrigin = Literal["default", "explicit"] - - -def _rich_progress_symbols() -> tuple[ - type[RichProgress], - type[RichSpinnerColumn], - type[RichTextColumn], - type[RichBarColumn], - type[RichTimeElapsedColumn], -]: - return _rich_progress_symbols_impl() - - -def _make_console(*, no_color: bool) -> RichConsole: - return _make_rich_console( - no_color=no_color, - width=ui.CLI_LAYOUT_MAX_WIDTH, - ) - - -def _print_verbose_clone_hashes( - console: _PrinterLike, - *, - label: str, - clone_hashes: set[str], -) -> None: - if not clone_hashes: - return - console.print(f"\n {label}:") - for clone_hash in sorted(clone_hashes): - console.print(f" - {clone_hash}") - - -def _make_plain_console() -> _PlainConsole: - return _make_plain_console_impl() - - -console: RichConsole | _PlainConsole = _make_plain_console() - - -def _parse_metric_reason_entry(reason: str) -> tuple[str, str]: - return _parse_metric_reason_entry_impl(reason) - - -def _print_gating_failure_block( - *, - code: str, - entries: Sequence[tuple[str, object]], - args: Namespace, -) -> None: - _print_gating_failure_block_impl( - console=cast("_PrinterLike", console), - code=code, - entries=list(entries), - args=cast("_GatingArgsLike", cast(object, args)), - ) - - -def build_html_report(*args: object, **kwargs: object) -> str: - # Lazy import avoids pulling HTML renderer in non-HTML CLI runs. - from .html_report import build_html_report as _build_html_report - - html_builder: Callable[..., str] = _build_html_report - return html_builder(*args, **kwargs) - - -_CloneBaselineState = _CloneBaselineStateImpl -_MetricsBaselineState = _MetricsBaselineStateImpl -_MetricsBaselineSectionProbe = _MetricsBaselineSectionProbeImpl - - -def print_banner(*, root: Path | None = None) -> None: - _print_banner_impl( - console=cast("_PrinterLike", console), - banner_title=ui.banner_title(__version__), - project_name=(root.name if root is not None else None), - root_display=(str(root) if root is not None else None), - ) - - -def _is_debug_enabled( - *, - argv: Sequence[str] | None = None, - environ: Mapping[str, str] | None = None, -) -> bool: - args = list(sys.argv[1:] if argv is None else argv) - debug_from_flag = any(arg == "--debug" for arg in args) - env = os.environ if environ is None else environ - debug_from_env = env.get("CODECLONE_DEBUG") == "1" - return debug_from_flag or debug_from_env - - -def _report_path_origins(argv: Sequence[str]) -> dict[str, ReportPathOrigin | None]: - origins: dict[str, ReportPathOrigin | None] = { - "html": None, - "json": None, - "md": None, - "sarif": None, - "text": None, - } - flag_to_field = { - "--html": "html", - "--json": "json", - "--md": "md", - "--sarif": "sarif", - "--text": "text", - } - index = 0 - while index < len(argv): - token = argv[index] - if token == "--": - break - if "=" in token: - flag, _value = token.split("=", maxsplit=1) - field_name = flag_to_field.get(flag) - if field_name is not None: - origins[field_name] = "explicit" - index += 1 - continue - field_name = flag_to_field.get(token) - if field_name is None: - index += 1 - continue - next_token = argv[index + 1] if index + 1 < len(argv) else None - if next_token is None or next_token.startswith("-"): - origins[field_name] = "default" - index += 1 - continue - origins[field_name] = "explicit" - index += 2 - return origins - - -def _report_path_timestamp_slug(report_generated_at_utc: str) -> str: - return report_generated_at_utc.replace("-", "").replace(":", "") - - -def _timestamped_report_path(path: Path, *, report_generated_at_utc: str) -> Path: - suffix = path.suffix - stem = path.name[: -len(suffix)] if suffix else path.name - return path.with_name( - f"{stem}-{_report_path_timestamp_slug(report_generated_at_utc)}{suffix}" - ) - - -def _resolve_output_paths( - args: Namespace, - *, - report_path_origins: Mapping[str, ReportPathOrigin | None], - report_generated_at_utc: str, -) -> OutputPaths: - printer = cast("_PrinterLike", console) - resolved: dict[str, Path | None] = { - "html": None, - "json": None, - "md": None, - "sarif": None, - "text": None, - } - output_specs = ( - ("html", "html_out", ".html", "HTML"), - ("json", "json_out", ".json", "JSON"), - ("md", "md_out", ".md", "Markdown"), - ("sarif", "sarif_out", ".sarif", "SARIF"), - ("text", "text_out", ".txt", "text"), - ) - - for field_name, arg_name, expected_suffix, label in output_specs: - raw_value = getattr(args, arg_name, None) - if not raw_value: - continue - path = _validate_output_path( - raw_value, - expected_suffix=expected_suffix, - label=label, - console=printer, - invalid_message=ui.fmt_invalid_output_extension, - invalid_path_message=ui.fmt_invalid_output_path, - ) - if ( - args.timestamped_report_paths - and report_path_origins.get(field_name) == "default" - ): - path = _timestamped_report_path( - path, - report_generated_at_utc=report_generated_at_utc, - ) - resolved[field_name] = path - - return OutputPaths( - html=resolved["html"], - json=resolved["json"], - text=resolved["text"], - md=resolved["md"], - sarif=resolved["sarif"], - ) - - -def _validate_report_ui_flags(*, args: Namespace, output_paths: OutputPaths) -> None: - if args.open_html_report and output_paths.html is None: - console.print(ui.fmt_contract_error(ui.ERR_OPEN_HTML_REPORT_REQUIRES_HTML)) - sys.exit(ExitCode.CONTRACT_ERROR) - - if args.timestamped_report_paths and not any( - ( - output_paths.html, - output_paths.json, - output_paths.md, - output_paths.sarif, - output_paths.text, - ) - ): - console.print( - ui.fmt_contract_error(ui.ERR_TIMESTAMPED_REPORT_PATHS_REQUIRES_REPORT) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - - -def _resolve_cache_path(*, root_path: Path, args: Namespace, from_args: bool) -> Path: - return _resolve_cache_path_impl( - root_path=root_path, - args=cast("_RuntimeArgsLike", cast(object, args)), - from_args=from_args, - legacy_cache_path=LEGACY_CACHE_PATH, - console=cast("_PrinterLike", console), - ) - - -def _validate_numeric_args(args: Namespace) -> bool: - return _validate_numeric_args_impl(cast("_RuntimeArgsLike", cast(object, args))) - - -def _configure_metrics_mode(*, args: Namespace, metrics_baseline_exists: bool) -> None: - _configure_metrics_mode_impl( - args=cast("_RuntimeArgsLike", cast(object, args)), - metrics_baseline_exists=metrics_baseline_exists, - console=cast("_PrinterLike", console), - ) - - -def _print_failed_files(failed_files: Sequence[str]) -> None: - _print_failed_files_impl( - failed_files=tuple(failed_files), - console=cast("_PrinterLike", console), - ) - - -def _metrics_computed(args: Namespace) -> tuple[str, ...]: - return _metrics_computed_impl(cast("_RuntimeArgsLike", cast(object, args))) - - -def _probe_metrics_baseline_section(path: Path) -> _MetricsBaselineSectionProbe: - return _probe_metrics_baseline_section_impl(path) - - -def _resolve_clone_baseline_state( - *, - args: Namespace, - baseline_path: Path, - baseline_exists: bool, - analysis: AnalysisResult, - shared_baseline_payload: dict[str, object] | None = None, -) -> _CloneBaselineState: - return _resolve_clone_baseline_state_impl( - args=cast("_BaselineArgsLike", cast(object, args)), - baseline_path=baseline_path, - baseline_exists=baseline_exists, - func_groups=analysis.func_groups, - block_groups=analysis.block_groups, - codeclone_version=__version__, - console=cast("_PrinterLike", console), - shared_baseline_payload=shared_baseline_payload, - ) - - -def _resolve_metrics_baseline_state( - *, - args: Namespace, - metrics_baseline_path: Path, - metrics_baseline_exists: bool, - baseline_updated_path: Path | None, - analysis: AnalysisResult, - shared_baseline_payload: dict[str, object] | None = None, -) -> _MetricsBaselineState: - return _resolve_metrics_baseline_state_impl( - args=cast("_BaselineArgsLike", cast(object, args)), - metrics_baseline_path=metrics_baseline_path, - metrics_baseline_exists=metrics_baseline_exists, - baseline_updated_path=baseline_updated_path, - project_metrics=analysis.project_metrics, - console=cast("_PrinterLike", console), - shared_baseline_payload=shared_baseline_payload, - ) - - -def _resolve_cache_status(cache: Cache) -> tuple[CacheStatus, str | None]: - return _resolve_cache_status_impl(cache) - - -def _cache_update_segment_projection(cache: Cache, analysis: AnalysisResult) -> None: - if not hasattr(cache, "segment_report_projection"): - return - new_projection = build_segment_report_projection( - digest=analysis.segment_groups_raw_digest, - suppressed=analysis.suppressed_segment_groups, - groups=analysis.segment_groups, - ) - if new_projection != cache.segment_report_projection: - cache.segment_report_projection = new_projection - cache._dirty = True - - -def _run_analysis_stages( - *, - args: Namespace, - boot: BootstrapResult, - cache: Cache, -) -> tuple[DiscoveryResult, PipelineProcessingResult, AnalysisResult]: - def _require_rich_console( - value: RichConsole | _PlainConsole, - ) -> RichConsole: - if isinstance(value, _PlainConsole): - raise RuntimeError("Rich console is required when progress UI is enabled.") - return value - - use_status = not args.quiet and not args.no_progress - try: - if use_status: - with console.status(ui.STATUS_DISCOVERING, spinner="dots"): - discovery_result = discover(boot=boot, cache=cache) - else: - discovery_result = discover(boot=boot, cache=cache) - except OSError as exc: - console.print(ui.fmt_contract_error(ui.ERR_SCAN_FAILED.format(error=exc))) - sys.exit(ExitCode.CONTRACT_ERROR) - - for warning in discovery_result.skipped_warnings: - console.print(f"[warning]{warning}[/warning]") - - total_files = len(discovery_result.files_to_process) - if total_files > 0 and not args.quiet and args.no_progress: - console.print(ui.fmt_processing_changed(total_files)) - - if total_files > 0 and not args.no_progress: - ( - progress_cls, - spinner_column_cls, - text_column_cls, - bar_column_cls, - time_elapsed_column_cls, - ) = _rich_progress_symbols() - - with progress_cls( - spinner_column_cls(), - text_column_cls("[progress.description]{task.description}"), - bar_column_cls(), - text_column_cls("[progress.percentage]{task.percentage:>3.0f}%"), - time_elapsed_column_cls(), - console=_require_rich_console(console), - ) as progress_ui: - task_id = progress_ui.add_task( - f"Analyzing {total_files} files...", - total=total_files, - ) - processing_result = process( - boot=boot, - discovery=discovery_result, - cache=cache, - on_advance=lambda: progress_ui.advance(task_id), - on_worker_error=lambda reason: console.print( - ui.fmt_worker_failed(reason) - ), - on_parallel_fallback=lambda exc: console.print( - ui.fmt_parallel_fallback(exc) - ), - ) - else: - processing_result = process( - boot=boot, - discovery=discovery_result, - cache=cache, - on_worker_error=( - (lambda reason: console.print(ui.fmt_batch_item_failed(reason))) - if args.no_progress - else (lambda reason: console.print(ui.fmt_worker_failed(reason))) - ), - on_parallel_fallback=lambda exc: console.print( - ui.fmt_parallel_fallback(exc) - ), - ) - - _print_failed_files(processing_result.failed_files) - # Keep unreadable-source diagnostics visible in normal mode even if - # failed_files was filtered/empty due upstream transport differences. - if not processing_result.failed_files and processing_result.source_read_failures: - _print_failed_files(processing_result.source_read_failures) - - if use_status: - with console.status(ui.STATUS_GROUPING, spinner="dots"): - analysis_result = analyze( - boot=boot, - discovery=discovery_result, - processing=processing_result, - ) - _cache_update_segment_projection(cache, analysis_result) - try: - cache.save() - except CacheError as exc: - console.print(ui.fmt_cache_save_failed(exc)) - else: - analysis_result = analyze( - boot=boot, - discovery=discovery_result, - processing=processing_result, - ) - _cache_update_segment_projection(cache, analysis_result) - try: - cache.save() - except CacheError as exc: - console.print(ui.fmt_cache_save_failed(exc)) - - coverage_join = getattr(analysis_result, "coverage_join", None) - if ( - coverage_join is not None - and coverage_join.status != "ok" - and coverage_join.invalid_reason - ): - console.print(ui.fmt_coverage_join_ignored(coverage_join.invalid_reason)) - - return discovery_result, processing_result, analysis_result - - -def _write_report_outputs( - *, - args: Namespace, - output_paths: OutputPaths, - report_artifacts: ReportArtifacts, - open_html_report: bool = False, -) -> str | None: - return _write_report_outputs_impl( - args=cast("_QuietArgsLike", cast(object, args)), - output_paths=output_paths, - report_artifacts=report_artifacts, - console=cast("_PrinterLike", console), - open_html_report=open_html_report, - ) - - -def _enforce_gating( - *, - args: Namespace, - boot: BootstrapResult, - analysis: AnalysisResult, - processing: PipelineProcessingResult, - source_read_contract_failure: bool, - baseline_failure_code: ExitCode | None, - metrics_baseline_failure_code: ExitCode | None, - new_func: set[str], - new_block: set[str], - metrics_diff: MetricsDiff | None, - html_report_path: str | None, - clone_threshold_total: int | None = None, -) -> None: - if source_read_contract_failure: - console.print( - ui.fmt_contract_error( - ui.fmt_unreadable_source_in_gating( - count=len(processing.source_read_failures) - ) - ) - ) - for failure in processing.source_read_failures[:10]: - console.print(f" • {failure}") - if len(processing.source_read_failures) > 10: - console.print(f" ... and {len(processing.source_read_failures) - 10} more") - sys.exit(ExitCode.CONTRACT_ERROR) - - if baseline_failure_code is not None: - console.print(ui.fmt_contract_error(ui.ERR_BASELINE_GATING_REQUIRES_TRUSTED)) - sys.exit(baseline_failure_code) - - if metrics_baseline_failure_code is not None: - console.print( - ui.fmt_contract_error( - "Metrics baseline is untrusted or missing for requested metrics gating." - ) - ) - sys.exit(metrics_baseline_failure_code) - - if bool(getattr(args, "fail_on_untested_hotspots", False)): - if analysis.coverage_join is None: - console.print( - ui.fmt_contract_error( - "--fail-on-untested-hotspots requires --coverage." - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - if analysis.coverage_join.status != "ok": - detail = analysis.coverage_join.invalid_reason or "invalid coverage input" - console.print( - ui.fmt_contract_error( - "Coverage gating requires a valid Cobertura XML input.\n" - f"Reason: {detail}" - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - - gate_result = gate( - boot=boot, - analysis=analysis, - new_func=new_func, - new_block=new_block, - metrics_diff=metrics_diff, - ) - if clone_threshold_total is not None: - reasons = [ - reason - for reason in gate_result.reasons - if not reason.startswith("clone:threshold:") - ] - if 0 <= args.fail_threshold < clone_threshold_total: - reasons.append( - f"clone:threshold:{clone_threshold_total}:{args.fail_threshold}" - ) - gate_result = cast( - "GatingResult", - _pipeline_module().GatingResult( - exit_code=( - int(ExitCode.GATING_FAILURE) if reasons else int(ExitCode.SUCCESS) - ), - reasons=tuple(reasons), - ), - ) - - metric_reasons = [ - reason[len("metric:") :] - for reason in gate_result.reasons - if reason.startswith("metric:") - ] - if metric_reasons: - _print_gating_failure_block( - code="metrics", - entries=[_parse_metric_reason_entry(reason) for reason in metric_reasons], - args=args, - ) - sys.exit(ExitCode.GATING_FAILURE) - - if "clone:new" in gate_result.reasons: - default_report = Path(".cache/codeclone/report.html") - resolved_html_report_path = html_report_path - if resolved_html_report_path is None and default_report.exists(): - resolved_html_report_path = str(default_report) - - clone_entries: list[tuple[str, object]] = [ - ("new_function_clone_groups", len(new_func)), - ("new_block_clone_groups", len(new_block)), - ] - if resolved_html_report_path: - clone_entries.append(("report", resolved_html_report_path)) - clone_entries.append(("accept", "codeclone . --update-baseline")) - _print_gating_failure_block( - code="new-clones", - entries=clone_entries, - args=args, - ) - - if args.verbose: - _print_verbose_clone_hashes( - cast("_PrinterLike", console), - label="Function clone hashes", - clone_hashes=new_func, - ) - _print_verbose_clone_hashes( - cast("_PrinterLike", console), - label="Block clone hashes", - clone_hashes=new_block, - ) - - sys.exit(ExitCode.GATING_FAILURE) - - threshold_reason = next( - ( - reason - for reason in gate_result.reasons - if reason.startswith("clone:threshold:") - ), - None, - ) - if threshold_reason is not None: - _, _, total_raw, threshold_raw = threshold_reason.split(":", maxsplit=3) - total = int(total_raw) - threshold = int(threshold_raw) - _print_gating_failure_block( - code="threshold", - entries=( - ("clone_groups_total", total), - ("clone_groups_limit", threshold), - ), - args=args, - ) - sys.exit(ExitCode.GATING_FAILURE) - - -def _main_impl() -> None: - global console - - run_started_at = time.monotonic() - from ._cli_meta import _build_report_meta, _current_report_timestamp_utc - - analysis_started_at_utc = _current_report_timestamp_utc() - ap = build_parser(__version__) - - def _resolve_runtime_path_arg( - *, - root_path: Path, - raw_path: str, - from_cli: bool, - ) -> Path: - candidate_path = Path(raw_path).expanduser() - if from_cli or candidate_path.is_absolute(): - return candidate_path.resolve() - return (root_path / candidate_path).resolve() - - def _prepare_run_inputs() -> tuple[ - Namespace, - Path, - Path, - bool, - Path, - bool, - OutputPaths, - Path, - dict[str, object] | None, - tuple[str, ...], - str, - str, - ]: - global console - raw_argv = tuple(sys.argv[1:]) - explicit_cli_dests = collect_explicit_cli_dests(ap, argv=raw_argv) - report_path_origins = _report_path_origins(raw_argv) - report_generated_at_utc = _current_report_timestamp_utc() - cache_path_from_args = any( - arg in {"--cache-dir", "--cache-path"} - or arg.startswith(("--cache-dir=", "--cache-path=")) - for arg in sys.argv - ) - baseline_path_from_args = any( - arg == "--baseline" or arg.startswith("--baseline=") for arg in sys.argv - ) - metrics_path_from_args = any( - arg == "--metrics-baseline" or arg.startswith("--metrics-baseline=") - for arg in sys.argv - ) - args = ap.parse_args() - - try: - root_path = Path(args.root).resolve() - if not root_path.exists(): - console.print( - ui.fmt_contract_error(ui.ERR_ROOT_NOT_FOUND.format(path=root_path)) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - except OSError as exc: - console.print( - ui.fmt_contract_error(ui.ERR_INVALID_ROOT_PATH.format(error=exc)) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - - try: - pyproject_config = load_pyproject_config(root_path) - except ConfigValidationError as exc: - console.print(ui.fmt_contract_error(str(exc))) - sys.exit(ExitCode.CONTRACT_ERROR) - apply_pyproject_config_overrides( - args=args, - config_values=pyproject_config, - explicit_cli_dests=explicit_cli_dests, - ) - git_diff_ref = _validate_changed_scope_args(args=args) - changed_paths = ( - _git_diff_changed_paths(root_path=root_path, git_diff_ref=git_diff_ref) - if git_diff_ref is not None - else () - ) - if args.debug: - os.environ["CODECLONE_DEBUG"] = "1" - - if args.ci: - args.fail_on_new = True - args.no_color = True - args.quiet = True - - console = ( - _make_plain_console() - if args.quiet - else _make_console(no_color=args.no_color) - ) - - if not _validate_numeric_args(args): - console.print( - ui.fmt_contract_error( - "Size limits must be non-negative integers (MB), " - "threshold flags must be >= 0 or -1, and coverage thresholds " - "must be between 0 and 100." - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - - baseline_arg_path = Path(args.baseline).expanduser() - try: - baseline_path = _resolve_runtime_path_arg( - root_path=root_path, - raw_path=args.baseline, - from_cli=baseline_path_from_args, - ) - baseline_exists = baseline_path.exists() - except OSError as exc: - console.print( - ui.fmt_contract_error( - ui.fmt_invalid_baseline_path(path=baseline_arg_path, error=exc) - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - - shared_baseline_payload: dict[str, object] | None = None - default_metrics_baseline = ap.get_default("metrics_baseline") - metrics_path_overridden = metrics_path_from_args or ( - args.metrics_baseline != default_metrics_baseline - ) - metrics_baseline_arg_path = Path( - args.metrics_baseline if metrics_path_overridden else args.baseline - ).expanduser() - try: - metrics_baseline_path = _resolve_runtime_path_arg( - root_path=root_path, - raw_path=( - args.metrics_baseline if metrics_path_overridden else args.baseline - ), - from_cli=metrics_path_from_args, - ) - if metrics_baseline_path == baseline_path: - probe = _probe_metrics_baseline_section(metrics_baseline_path) - metrics_baseline_exists = probe.has_metrics_section - shared_baseline_payload = probe.payload - else: - metrics_baseline_exists = metrics_baseline_path.exists() - except OSError as exc: - console.print( - ui.fmt_contract_error( - ui.fmt_invalid_baseline_path( - path=metrics_baseline_arg_path, - error=exc, - ) - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - - if ( - args.update_baseline - and not args.skip_metrics - and not args.update_metrics_baseline - ): - args.update_metrics_baseline = True - _configure_metrics_mode( - args=args, - metrics_baseline_exists=metrics_baseline_exists, - ) - if ( - args.update_metrics_baseline - and metrics_baseline_path == baseline_path - and not baseline_exists - and not args.update_baseline - ): - # Unified baseline needs clone payload before metrics can be embedded. - args.update_baseline = True - - if args.quiet: - args.no_progress = True - - if not args.quiet: - print_banner(root=root_path) - - output_paths = _resolve_output_paths( - args, - report_path_origins=report_path_origins, - report_generated_at_utc=report_generated_at_utc, - ) - _validate_report_ui_flags(args=args, output_paths=output_paths) - cache_path = _resolve_cache_path( - root_path=root_path, - args=args, - from_args=cache_path_from_args, - ) - return ( - args, - root_path, - baseline_path, - baseline_exists, - metrics_baseline_path, - metrics_baseline_exists, - output_paths, - cache_path, - shared_baseline_payload, - changed_paths, - analysis_started_at_utc, - report_generated_at_utc, - ) - - ( - args, - root_path, - baseline_path, - baseline_exists, - metrics_baseline_path, - metrics_baseline_exists, - output_paths, - cache_path, - shared_baseline_payload, - changed_paths, - analysis_started_at_utc, - report_generated_at_utc, - ) = _prepare_run_inputs() - - cache = Cache( - cache_path, - root=root_path, - max_size_bytes=args.max_cache_size_mb * 1024 * 1024, - min_loc=args.min_loc, - min_stmt=args.min_stmt, - block_min_loc=args.block_min_loc, - block_min_stmt=args.block_min_stmt, - segment_min_loc=args.segment_min_loc, - segment_min_stmt=args.segment_min_stmt, - collect_api_surface=bool(args.api_surface), - ) - cache.load() - if cache.load_warning: - console.print(f"[warning]{cache.load_warning}[/warning]") - - boot = bootstrap( - args=args, - root=root_path, - output_paths=output_paths, - cache_path=cache_path, - ) - discovery_result, processing_result, analysis_result = _run_analysis_stages( - args=args, - boot=boot, - cache=cache, - ) - - gating_mode = ( - args.fail_on_new - or args.fail_threshold >= 0 - or args.fail_complexity >= 0 - or args.fail_coupling >= 0 - or args.fail_cohesion >= 0 - or args.fail_cycles - or args.fail_dead_code - or args.fail_health >= 0 - or args.fail_on_new_metrics - or args.fail_on_typing_regression - or args.fail_on_docstring_regression - or args.fail_on_api_break - or args.min_typing_coverage >= 0 - or args.min_docstring_coverage >= 0 - ) - source_read_contract_failure = ( - bool(processing_result.source_read_failures) - and gating_mode - and not args.update_baseline - ) - baseline_state = _resolve_clone_baseline_state( - args=args, - baseline_path=baseline_path, - baseline_exists=baseline_exists, - analysis=analysis_result, - shared_baseline_payload=( - shared_baseline_payload if metrics_baseline_path == baseline_path else None - ), - ) - metrics_baseline_state = _resolve_metrics_baseline_state( - args=args, - metrics_baseline_path=metrics_baseline_path, - metrics_baseline_exists=metrics_baseline_exists, - baseline_updated_path=baseline_state.updated_path, - analysis=analysis_result, - shared_baseline_payload=( - shared_baseline_payload if metrics_baseline_path == baseline_path else None - ), - ) - - try: - report_cache_path = cache_path.resolve() - except OSError: - report_cache_path = cache_path - - cache_status, cache_schema_version = _resolve_cache_status(cache) - - report_meta = _build_report_meta( - codeclone_version=__version__, - scan_root=root_path, - baseline_path=baseline_path, - baseline=baseline_state.baseline, - baseline_loaded=baseline_state.loaded, - baseline_status=baseline_state.status.value, - cache_path=report_cache_path, - cache_used=cache_status == CacheStatus.OK, - cache_status=cache_status.value, - cache_schema_version=cache_schema_version, - files_skipped_source_io=len(processing_result.source_read_failures), - metrics_baseline_path=metrics_baseline_path, - metrics_baseline=metrics_baseline_state.baseline, - metrics_baseline_loaded=metrics_baseline_state.loaded, - metrics_baseline_status=metrics_baseline_state.status.value, - health_score=( - analysis_result.project_metrics.health.total - if analysis_result.project_metrics - else None - ), - health_grade=( - analysis_result.project_metrics.health.grade - if analysis_result.project_metrics - else None - ), - analysis_mode=("clones_only" if args.skip_metrics else "full"), - metrics_computed=_metrics_computed(args), - min_loc=args.min_loc, - min_stmt=args.min_stmt, - block_min_loc=args.block_min_loc, - block_min_stmt=args.block_min_stmt, - segment_min_loc=args.segment_min_loc, - segment_min_stmt=args.segment_min_stmt, - design_complexity_threshold=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, - design_coupling_threshold=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - design_cohesion_threshold=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, - analysis_started_at_utc=analysis_started_at_utc, - report_generated_at_utc=report_generated_at_utc, - ) - - baseline_for_diff = ( - baseline_state.baseline - if baseline_state.trusted_for_diff - else Baseline(baseline_path) - ) - new_func, new_block = baseline_for_diff.diff( - analysis_result.func_groups, - analysis_result.block_groups, - ) - new_clones_count = len(new_func) + len(new_block) - - metrics_diff: MetricsDiff | None = None - if ( - analysis_result.project_metrics is not None - and metrics_baseline_state.trusted_for_diff - ): - metrics_diff = metrics_baseline_state.baseline.diff( - analysis_result.project_metrics - ) - coverage_adoption_diff_available = bool( - metrics_baseline_state.trusted_for_diff - and getattr( - metrics_baseline_state.baseline, - "has_coverage_adoption_snapshot", - False, - ) - ) - api_surface_diff_available = bool( - metrics_baseline_state.trusted_for_diff - and getattr(metrics_baseline_state.baseline, "api_surface_snapshot", None) - is not None - ) - - _print_summary( - console=cast("_PrinterLike", console), - quiet=args.quiet, - files_found=discovery_result.files_found, - files_analyzed=processing_result.files_analyzed, - cache_hits=discovery_result.cache_hits, - files_skipped=processing_result.files_skipped, - analyzed_lines=( - processing_result.analyzed_lines - + int(getattr(discovery_result, "cached_lines", 0)) - ), - analyzed_functions=( - processing_result.analyzed_functions - + int(getattr(discovery_result, "cached_functions", 0)) - ), - analyzed_methods=( - processing_result.analyzed_methods - + int(getattr(discovery_result, "cached_methods", 0)) - ), - analyzed_classes=( - processing_result.analyzed_classes - + int(getattr(discovery_result, "cached_classes", 0)) - ), - func_clones_count=analysis_result.func_clones_count, - block_clones_count=analysis_result.block_clones_count, - segment_clones_count=analysis_result.segment_clones_count, - suppressed_golden_fixture_groups=len( - getattr(analysis_result, "suppressed_clone_groups", ()) - ), - suppressed_segment_groups=analysis_result.suppressed_segment_groups, - new_clones_count=new_clones_count, - ) - - if analysis_result.project_metrics is not None: - pm = analysis_result.project_metrics - metrics_payload_map = _as_mapping(analysis_result.metrics_payload) - overloaded_modules_summary = _as_mapping( - _as_mapping(metrics_payload_map.get("overloaded_modules")).get("summary") - ) - adoption_summary = _as_mapping( - _as_mapping(metrics_payload_map.get("coverage_adoption")).get("summary") - ) - api_surface_summary = _as_mapping( - _as_mapping(metrics_payload_map.get("api_surface")).get("summary") - ) - coverage_join_summary = _as_mapping( - _as_mapping(metrics_payload_map.get("coverage_join")).get("summary") - ) - overloaded_modules_summary_map = _as_mapping(overloaded_modules_summary) - coverage_join_source = str(coverage_join_summary.get("source", "")).strip() - _print_metrics( - console=cast("_PrinterLike", console), - quiet=args.quiet, - metrics=MetricsSnapshot( - complexity_avg=pm.complexity_avg, - complexity_max=pm.complexity_max, - high_risk_count=len(pm.high_risk_functions), - coupling_avg=pm.coupling_avg, - coupling_max=pm.coupling_max, - cohesion_avg=pm.cohesion_avg, - cohesion_max=pm.cohesion_max, - cycles_count=len(pm.dependency_cycles), - dead_code_count=len(pm.dead_code), - health_total=pm.health.total, - health_grade=pm.health.grade, - suppressed_dead_code_count=analysis_result.suppressed_dead_code_items, - overloaded_modules_candidates=_as_int( - overloaded_modules_summary_map.get("candidates") - ), - overloaded_modules_total=_as_int( - overloaded_modules_summary_map.get("total") - ), - overloaded_modules_population_status=str( - overloaded_modules_summary_map.get("population_status", "") - ), - overloaded_modules_top_score=_coerce.as_float( - overloaded_modules_summary_map.get("top_score") - ), - adoption_param_permille=( - _as_int(adoption_summary.get("param_permille")) - if adoption_summary - else None - ), - adoption_return_permille=( - _as_int(adoption_summary.get("return_permille")) - if adoption_summary - else None - ), - adoption_docstring_permille=( - _as_int(adoption_summary.get("docstring_permille")) - if adoption_summary - else None - ), - adoption_any_annotation_count=_as_int( - adoption_summary.get("typing_any_count") - ), - api_surface_enabled=bool(api_surface_summary.get("enabled")), - api_surface_modules=_as_int(api_surface_summary.get("modules")), - api_surface_public_symbols=_as_int( - api_surface_summary.get("public_symbols") - ), - api_surface_added=( - len(metrics_diff.new_api_symbols) - if metrics_diff is not None and api_surface_diff_available - else 0 - ), - api_surface_breaking=( - len(metrics_diff.new_api_breaking_changes) - if metrics_diff is not None and api_surface_diff_available - else 0 - ), - coverage_join_status=str( - coverage_join_summary.get("status", "") - ).strip(), - coverage_join_overall_permille=_as_int( - coverage_join_summary.get("overall_permille") - ), - coverage_join_coverage_hotspots=_as_int( - coverage_join_summary.get("coverage_hotspots") - ), - coverage_join_scope_gap_hotspots=_as_int( - coverage_join_summary.get("scope_gap_hotspots") - ), - coverage_join_threshold_percent=_as_int( - coverage_join_summary.get("hotspot_threshold_percent") - ), - coverage_join_source_label=( - Path(coverage_join_source).name if coverage_join_source else "" - ), - ), - ) - - report_artifacts = report( - boot=boot, - discovery=discovery_result, - processing=processing_result, - analysis=analysis_result, - report_meta=report_meta, - new_func=new_func, - new_block=new_block, - html_builder=build_html_report, - metrics_diff=metrics_diff, - coverage_adoption_diff_available=coverage_adoption_diff_available, - api_surface_diff_available=api_surface_diff_available, - include_report_document=bool(changed_paths), - ) - changed_clone_gate = ( - _changed_clone_gate_from_report( - report_artifacts.report_document or {}, - changed_paths=changed_paths, - ) - if args.changed_only and report_artifacts.report_document is not None - else None - ) - if changed_clone_gate is not None: - _print_changed_scope( - console=cast("_PrinterLike", console), - quiet=args.quiet, - changed_scope=ChangedScopeSnapshot( - paths_count=len(changed_clone_gate.changed_paths), - findings_total=changed_clone_gate.findings_total, - findings_new=changed_clone_gate.findings_new, - findings_known=changed_clone_gate.findings_known, - ), - ) - html_report_path = _write_report_outputs( - args=args, - output_paths=output_paths, - report_artifacts=report_artifacts, - open_html_report=args.open_html_report, - ) - - _enforce_gating( - args=args, - boot=boot, - analysis=analysis_result, - processing=processing_result, - source_read_contract_failure=source_read_contract_failure, - baseline_failure_code=baseline_state.failure_code, - metrics_baseline_failure_code=metrics_baseline_state.failure_code, - new_func=set(changed_clone_gate.new_func) if changed_clone_gate else new_func, - new_block=( - set(changed_clone_gate.new_block) if changed_clone_gate else new_block - ), - metrics_diff=metrics_diff, - html_report_path=html_report_path, - clone_threshold_total=( - changed_clone_gate.total_clone_groups if changed_clone_gate else None - ), - ) - - notice_new_clones_count = ( - len(changed_clone_gate.new_func) + len(changed_clone_gate.new_block) - if changed_clone_gate is not None - else new_clones_count - ) - if ( - not args.update_baseline - and not args.fail_on_new - and notice_new_clones_count > 0 - ): - console.print(ui.WARN_NEW_CLONES_WITHOUT_FAIL) - - if not args.quiet: - elapsed = time.monotonic() - run_started_at - console.print() - console.print(ui.fmt_pipeline_done(elapsed)) - - -def main() -> None: - try: - _main_impl() - except SystemExit: - raise - except Exception as exc: - console.print( - ui.fmt_internal_error( - exc, - issues_url=ISSUES_URL, - debug=_is_debug_enabled(), - ) - ) - sys.exit(ExitCode.INTERNAL_ERROR) - - -if __name__ == "__main__": - main() diff --git a/codeclone/config/__init__.py b/codeclone/config/__init__.py new file mode 100644 index 0000000..8040155 --- /dev/null +++ b/codeclone/config/__init__.py @@ -0,0 +1,83 @@ +from .argparse_builder import _ArgumentParser, _HelpFormatter, build_parser +from .pyproject_loader import ( + CONFIG_KEY_SPECS, + PATH_CONFIG_KEYS, + ConfigValidationError, + _load_toml, + load_pyproject_config, + normalize_path_config_value, + validate_config_value, +) +from .resolver import ( + ResolvedConfig, + apply_pyproject_config_overrides, + apply_resolved_config, + collect_explicit_cli_dests, + resolve_config, +) +from .spec import ( + ARGUMENT_GROUP_TITLES, + DEFAULT_BASELINE_PATH, + DEFAULT_BLOCK_MIN_LOC, + DEFAULT_BLOCK_MIN_STMT, + DEFAULT_HTML_REPORT_PATH, + DEFAULT_JSON_REPORT_PATH, + DEFAULT_MARKDOWN_REPORT_PATH, + DEFAULT_MAX_BASELINE_SIZE_MB, + DEFAULT_MAX_CACHE_SIZE_MB, + DEFAULT_MIN_LOC, + DEFAULT_MIN_STMT, + DEFAULT_PROCESSES, + DEFAULT_ROOT, + DEFAULT_SARIF_REPORT_PATH, + DEFAULT_SEGMENT_MIN_LOC, + DEFAULT_SEGMENT_MIN_STMT, + DEFAULT_TEXT_REPORT_PATH, + DEFAULTS_BY_DEST, + OPTIONS, + PYPROJECT_OPTIONS, + TESTABLE_CLI_OPTIONS, + ConfigKeySpec, + OptionSpec, +) + +__all__ = [ + "ARGUMENT_GROUP_TITLES", + "CONFIG_KEY_SPECS", + "DEFAULTS_BY_DEST", + "DEFAULT_BASELINE_PATH", + "DEFAULT_BLOCK_MIN_LOC", + "DEFAULT_BLOCK_MIN_STMT", + "DEFAULT_HTML_REPORT_PATH", + "DEFAULT_JSON_REPORT_PATH", + "DEFAULT_MARKDOWN_REPORT_PATH", + "DEFAULT_MAX_BASELINE_SIZE_MB", + "DEFAULT_MAX_CACHE_SIZE_MB", + "DEFAULT_MIN_LOC", + "DEFAULT_MIN_STMT", + "DEFAULT_PROCESSES", + "DEFAULT_ROOT", + "DEFAULT_SARIF_REPORT_PATH", + "DEFAULT_SEGMENT_MIN_LOC", + "DEFAULT_SEGMENT_MIN_STMT", + "DEFAULT_TEXT_REPORT_PATH", + "OPTIONS", + "PATH_CONFIG_KEYS", + "PYPROJECT_OPTIONS", + "TESTABLE_CLI_OPTIONS", + "ConfigKeySpec", + "ConfigValidationError", + "OptionSpec", + "ResolvedConfig", + "_ArgumentParser", + "_HelpFormatter", + "_load_toml", + "apply_pyproject_config_overrides", + "apply_resolved_config", + "build_parser", + "collect_explicit_cli_dests", + "load_pyproject_config", + "normalize_path_config_value", + "resolve_config", + "validate_config_value", +] diff --git a/codeclone/config/argparse_builder.py b/codeclone/config/argparse_builder.py new file mode 100644 index 0000000..79f4956 --- /dev/null +++ b/codeclone/config/argparse_builder.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +import argparse +import sys +from typing import Any, NoReturn + +from .. import ui_messages as ui +from ..contracts import ExitCode, cli_help_epilog +from .spec import ARGUMENT_GROUP_TITLES, DEFAULTS_BY_DEST, OPTIONS, OptionSpec + + +class _ArgumentParser(argparse.ArgumentParser): + def error(self, message: str) -> NoReturn: + self.print_usage(sys.stderr) + self.exit( + int(ExitCode.CONTRACT_ERROR), + f"CONTRACT ERROR: {message}\n", + ) + + +class _HelpFormatter(argparse.RawTextHelpFormatter): + """Product-oriented help formatter extension point.""" + + +def _add_option( + group: argparse._ArgumentGroup, + *, + option: OptionSpec, + version: str, +) -> None: + if option.cli_kind == "positional": + group.add_argument( + option.dest, + nargs=option.nargs, + metavar=option.metavar, + help=option.help_text, + ) + return + + argument_kwargs: dict[str, Any] = {"help": option.help_text} + + if option.cli_kind == "value": + argument_kwargs.update( + dest=option.dest, + nargs=option.nargs, + const=option.const, + metavar=option.metavar, + ) + if option.value_type is not None: + argument_kwargs["type"] = option.value_type + elif option.cli_kind == "optional_path": + argument_kwargs.update( + dest=option.dest, + nargs="?", + const=option.const, + metavar=option.metavar or "FILE", + ) + elif option.cli_kind == "bool_optional": + argument_kwargs.update( + action=argparse.BooleanOptionalAction, + default=argparse.SUPPRESS, + ) + elif option.cli_kind in {"store_true", "store_false"}: + argument_kwargs.update( + dest=option.dest, + action=option.cli_kind, + default=argparse.SUPPRESS, + ) + elif option.cli_kind == "help": + argument_kwargs["action"] = "help" + elif option.cli_kind == "version": + argument_kwargs.update( + action="version", + version=ui.version_output(version), + ) + else: + raise RuntimeError(f"Unsupported CLI option kind: {option.cli_kind}") + + group.add_argument(*option.flags, **argument_kwargs) + + +def build_parser(version: str) -> _ArgumentParser: + parser = _ArgumentParser( + prog="codeclone", + description="Structural code quality analysis for Python.", + add_help=False, + formatter_class=_HelpFormatter, + epilog=cli_help_epilog(), + ) + + for group_title in ARGUMENT_GROUP_TITLES: + argument_group = parser.add_argument_group(group_title) + for option in OPTIONS: + if option.group != group_title or option.cli_kind is None: + continue + _add_option( + argument_group, + option=option, + version=version, + ) + + parser.set_defaults(**DEFAULTS_BY_DEST) + return parser + + +__all__ = ["_ArgumentParser", "_HelpFormatter", "build_parser"] diff --git a/codeclone/config/pyproject_loader.py b/codeclone/config/pyproject_loader.py new file mode 100644 index 0000000..596f32f --- /dev/null +++ b/codeclone/config/pyproject_loader.py @@ -0,0 +1,216 @@ +from __future__ import annotations + +import importlib +import sys +from pathlib import Path +from typing import TYPE_CHECKING + +from ..golden_fixtures import ( + GoldenFixturePatternError, + normalize_golden_fixture_patterns, +) +from .spec import CONFIG_KEY_SPECS, PATH_CONFIG_KEYS, ConfigKeySpec + +if TYPE_CHECKING: + from collections.abc import Callable, Mapping, Set + + +class ConfigValidationError(ValueError): + """Raised when pyproject.toml contains invalid CodeClone configuration.""" + + +def validate_config_value( + *, + key: str, + value: object, + config_key_specs: Mapping[str, ConfigKeySpec] = CONFIG_KEY_SPECS, +) -> object: + spec = config_key_specs[key] + if value is None: + if spec.allow_none: + return None + raise ConfigValidationError( + "Invalid value type for tool.codeclone." + f"{key}: expected {spec.expected_name or spec.expected_type.__name__}" + ) + + expected_type = spec.expected_type + if expected_type is bool: + return _validated_config_instance( + key=key, + value=value, + expected_type=bool, + expected_name="bool", + ) + + if expected_type is int: + return _validated_config_instance( + key=key, + value=value, + expected_type=int, + expected_name="int", + reject_bool=True, + ) + + if expected_type is str: + return _validated_config_instance( + key=key, + value=value, + expected_type=str, + expected_name="str", + ) + + if expected_type is list: + return _validated_string_list(key=key, value=value) + + raise ConfigValidationError(f"Unsupported config key spec for tool.codeclone.{key}") + + +def load_pyproject_config( + root_path: Path, + *, + load_toml: Callable[[Path], object] | None = None, + config_key_specs: Mapping[str, ConfigKeySpec] = CONFIG_KEY_SPECS, + path_config_keys: Set[str] | frozenset[str] = PATH_CONFIG_KEYS, +) -> dict[str, object]: + config_path = root_path / "pyproject.toml" + if not config_path.exists(): + return {} + + load_toml_fn = _load_toml if load_toml is None else load_toml + + payload: object + try: + payload = load_toml_fn(config_path) + except OSError as exc: + raise ConfigValidationError( + f"Cannot read pyproject.toml at {config_path}: {exc}" + ) from exc + except ValueError as exc: + raise ConfigValidationError(f"Invalid TOML in {config_path}: {exc}") from exc + + if not isinstance(payload, dict): + raise ConfigValidationError( + f"Invalid pyproject payload at {config_path}: root must be object" + ) + + tool_obj = payload.get("tool") + if tool_obj is None: + return {} + if not isinstance(tool_obj, dict): + raise ConfigValidationError( + f"Invalid pyproject payload at {config_path}: 'tool' must be object" + ) + + codeclone_obj = tool_obj.get("codeclone") + if codeclone_obj is None: + return {} + if not isinstance(codeclone_obj, dict): + raise ConfigValidationError( + "Invalid pyproject payload at " + f"{config_path}: 'tool.codeclone' must be object" + ) + + unknown = sorted(set(codeclone_obj.keys()) - set(config_key_specs)) + if unknown: + raise ConfigValidationError( + "Unknown key(s) in tool.codeclone: " + ", ".join(unknown) + ) + + validated: dict[str, object] = {} + for key in sorted(codeclone_obj.keys()): + value = validate_config_value( + key=key, + value=codeclone_obj[key], + config_key_specs=config_key_specs, + ) + validated[key] = normalize_path_config_value( + key=key, + value=value, + root_path=root_path, + path_config_keys=path_config_keys, + ) + return validated + + +def normalize_path_config_value( + *, + key: str, + value: object, + root_path: Path, + path_config_keys: Set[str] | frozenset[str] = PATH_CONFIG_KEYS, +) -> object: + if key not in path_config_keys: + return value + if not isinstance(value, str): + return value + + path = Path(value).expanduser() + if path.is_absolute(): + return str(path) + return str(root_path / path) + + +def _validated_config_instance( + *, + key: str, + value: object, + expected_type: type[object], + expected_name: str, + reject_bool: bool = False, +) -> object: + if isinstance(value, expected_type) and ( + not reject_bool or not isinstance(value, bool) + ): + return value + raise ConfigValidationError( + f"Invalid value type for tool.codeclone.{key}: expected {expected_name}" + ) + + +def _validated_string_list(*, key: str, value: object) -> tuple[str, ...]: + if not isinstance(value, list): + raise ConfigValidationError( + f"Invalid value type for tool.codeclone.{key}: expected list[str]" + ) + if not all(isinstance(item, str) for item in value): + raise ConfigValidationError( + f"Invalid value type for tool.codeclone.{key}: expected list[str]" + ) + try: + return normalize_golden_fixture_patterns(value) + except GoldenFixturePatternError as exc: + raise ConfigValidationError(str(exc)) from exc + + +def _load_toml(path: Path) -> object: + if sys.version_info >= (3, 11): + import tomllib + + with path.open("rb") as config_file: + return tomllib.load(config_file) + + try: + tomli_module = importlib.import_module("tomli") + except ModuleNotFoundError as exc: + raise ConfigValidationError( + "Python 3.10 requires dependency 'tomli' to read pyproject.toml." + ) from exc + + load_fn = getattr(tomli_module, "load", None) + if not callable(load_fn): + raise ConfigValidationError("Invalid 'tomli' module: missing callable 'load'.") + + with path.open("rb") as config_file: + return load_fn(config_file) + + +__all__ = [ + "CONFIG_KEY_SPECS", + "PATH_CONFIG_KEYS", + "ConfigValidationError", + "_load_toml", + "load_pyproject_config", + "normalize_path_config_value", + "validate_config_value", +] diff --git a/codeclone/config/resolver.py b/codeclone/config/resolver.py new file mode 100644 index 0000000..03ef896 --- /dev/null +++ b/codeclone/config/resolver.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import argparse + from collections.abc import Mapping, Sequence + + +@dataclass(frozen=True, slots=True) +class ResolvedConfig: + values: dict[str, object] + explicit_cli_dests: frozenset[str] + pyproject_values: dict[str, object] + + +def collect_explicit_cli_dests( + parser: argparse.ArgumentParser, + *, + argv: Sequence[str], +) -> set[str]: + option_to_dest: dict[str, str] = {} + for action in parser._actions: + for option in action.option_strings: + option_to_dest[option] = action.dest + + explicit: set[str] = set() + for token in argv: + if token == "--": + break + if not token.startswith("-"): + continue + option = token.split("=", maxsplit=1)[0] + dest = option_to_dest.get(option) + if dest is not None: + explicit.add(dest) + return explicit + + +def resolve_config( + *, + args: argparse.Namespace, + config_values: Mapping[str, object], + explicit_cli_dests: set[str], +) -> ResolvedConfig: + resolved_values = vars(args).copy() + for key, value in config_values.items(): + if key in explicit_cli_dests: + continue + resolved_values[key] = value + + return ResolvedConfig( + values=resolved_values, + explicit_cli_dests=frozenset(explicit_cli_dests), + pyproject_values=dict(config_values), + ) + + +def apply_resolved_config( + *, + args: argparse.Namespace, + resolved: ResolvedConfig, +) -> None: + for key, value in resolved.values.items(): + setattr(args, key, value) + + +def apply_pyproject_config_overrides( + *, + args: argparse.Namespace, + config_values: Mapping[str, object], + explicit_cli_dests: set[str], +) -> None: + apply_resolved_config( + args=args, + resolved=resolve_config( + args=args, + config_values=config_values, + explicit_cli_dests=explicit_cli_dests, + ), + ) + + +__all__ = [ + "ResolvedConfig", + "apply_pyproject_config_overrides", + "apply_resolved_config", + "collect_explicit_cli_dests", + "resolve_config", +] diff --git a/codeclone/config/spec.py b/codeclone/config/spec.py new file mode 100644 index 0000000..095a3fc --- /dev/null +++ b/codeclone/config/spec.py @@ -0,0 +1,769 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Final, Literal, cast + +from .. import ui_messages as ui +from ..contracts import ( + DEFAULT_COHESION_THRESHOLD, + DEFAULT_COMPLEXITY_THRESHOLD, + DEFAULT_COUPLING_THRESHOLD, + DEFAULT_HEALTH_THRESHOLD, +) + +CliKind = Literal[ + "positional", + "value", + "optional_path", + "bool_optional", + "store_true", + "store_false", + "help", + "version", +] + +DEFAULT_ROOT = "." +DEFAULT_MIN_LOC = 10 +DEFAULT_MIN_STMT = 6 +DEFAULT_BLOCK_MIN_LOC = 20 +DEFAULT_BLOCK_MIN_STMT = 8 +DEFAULT_SEGMENT_MIN_LOC = 20 +DEFAULT_SEGMENT_MIN_STMT = 10 +DEFAULT_PROCESSES = 4 +DEFAULT_MAX_CACHE_SIZE_MB = 50 +DEFAULT_MAX_BASELINE_SIZE_MB = 5 + +DEFAULT_BASELINE_PATH = "codeclone.baseline.json" +DEFAULT_HTML_REPORT_PATH = ".cache/codeclone/report.html" +DEFAULT_JSON_REPORT_PATH = ".cache/codeclone/report.json" +DEFAULT_MARKDOWN_REPORT_PATH = ".cache/codeclone/report.md" +DEFAULT_SARIF_REPORT_PATH = ".cache/codeclone/report.sarif" +DEFAULT_TEXT_REPORT_PATH = ".cache/codeclone/report.txt" + +_UNSET: Final[object] = object() +_INFER_PYPROJECT_KEY: Final[object] = object() + + +@dataclass(frozen=True, slots=True) +class ConfigKeySpec: + expected_type: type[object] + allow_none: bool = False + expected_name: str | None = None + + +@dataclass(frozen=True, slots=True) +class OptionSpec: + dest: str + group: str | None + cli_kind: CliKind | None = None + flags: tuple[str, ...] = () + default: object = _UNSET + value_type: type[object] | None = None + const: object | None = None + nargs: str | int | None = None + metavar: str | None = None + help_text: str | None = None + pyproject_key: str | None = None + config_spec: ConfigKeySpec | None = None + path_value: bool = False + + @property + def has_default(self) -> bool: + return self.default is not _UNSET + + +def _option( + *, + dest: str, + group: str | None, + cli_kind: CliKind | None = None, + flags: tuple[str, ...] = (), + default: object = _UNSET, + value_type: type[object] | None = None, + const: object | None = None, + nargs: str | int | None = None, + metavar: str | None = None, + help_text: str | None = None, + pyproject_type: type[object] | None = None, + allow_none: bool = False, + expected_name: str | None = None, + pyproject_key: object = _INFER_PYPROJECT_KEY, + path_value: bool = False, +) -> OptionSpec: + config_spec = ( + ConfigKeySpec( + expected_type=pyproject_type, + allow_none=allow_none, + expected_name=expected_name, + ) + if pyproject_type is not None + else None + ) + resolved_pyproject_key: str | None + if pyproject_type is None: + resolved_pyproject_key = None + elif pyproject_key is _INFER_PYPROJECT_KEY: + resolved_pyproject_key = dest + else: + resolved_pyproject_key = cast("str | None", pyproject_key) + return OptionSpec( + dest=dest, + group=group, + cli_kind=cli_kind, + flags=flags, + default=default, + value_type=value_type, + const=const, + nargs=nargs, + metavar=metavar, + help_text=help_text, + pyproject_key=resolved_pyproject_key, + config_spec=config_spec, + path_value=path_value, + ) + + +ARGUMENT_GROUP_TITLES: Final[tuple[str, ...]] = ( + "Target", + "Analysis", + "Baselines and CI", + "Quality gates", + "Analysis stages", + "Reporting", + "Output and UI", + "General", +) + +OPTIONS: Final[tuple[OptionSpec, ...]] = ( + _option( + dest="root", + group="Target", + cli_kind="positional", + default=DEFAULT_ROOT, + nargs="?", + help_text=ui.HELP_ROOT, + ), + _option( + dest="min_loc", + group="Analysis", + cli_kind="value", + flags=("--min-loc",), + default=DEFAULT_MIN_LOC, + value_type=int, + help_text=ui.HELP_MIN_LOC, + pyproject_type=int, + ), + _option( + dest="min_stmt", + group="Analysis", + cli_kind="value", + flags=("--min-stmt",), + default=DEFAULT_MIN_STMT, + value_type=int, + help_text=ui.HELP_MIN_STMT, + pyproject_type=int, + ), + _option( + dest="block_min_loc", + group="Analysis", + default=DEFAULT_BLOCK_MIN_LOC, + pyproject_type=int, + ), + _option( + dest="block_min_stmt", + group="Analysis", + default=DEFAULT_BLOCK_MIN_STMT, + pyproject_type=int, + ), + _option( + dest="segment_min_loc", + group="Analysis", + default=DEFAULT_SEGMENT_MIN_LOC, + pyproject_type=int, + ), + _option( + dest="segment_min_stmt", + group="Analysis", + default=DEFAULT_SEGMENT_MIN_STMT, + pyproject_type=int, + ), + _option( + dest="golden_fixture_paths", + group="Analysis", + default=(), + pyproject_type=list, + expected_name="list[str]", + ), + _option( + dest="processes", + group="Analysis", + cli_kind="value", + flags=("--processes",), + default=DEFAULT_PROCESSES, + value_type=int, + help_text=ui.HELP_PROCESSES, + pyproject_type=int, + ), + _option( + dest="changed_only", + group="Analysis", + cli_kind="bool_optional", + flags=("--changed-only",), + default=False, + help_text=ui.HELP_CHANGED_ONLY, + ), + _option( + dest="diff_against", + group="Analysis", + cli_kind="value", + flags=("--diff-against",), + default=None, + metavar="GIT_REF", + help_text=ui.HELP_DIFF_AGAINST, + ), + _option( + dest="paths_from_git_diff", + group="Analysis", + cli_kind="value", + flags=("--paths-from-git-diff",), + default=None, + metavar="GIT_REF", + help_text=ui.HELP_PATHS_FROM_GIT_DIFF, + ), + _option( + dest="cache_path", + group="Analysis", + cli_kind="optional_path", + flags=("--cache-path",), + default=None, + metavar="FILE", + help_text=ui.HELP_CACHE_PATH, + pyproject_type=str, + allow_none=True, + path_value=True, + ), + _option( + dest="cache_path", + group="Analysis", + cli_kind="optional_path", + flags=("--cache-dir",), + metavar="FILE", + help_text=ui.HELP_CACHE_DIR_LEGACY, + pyproject_key=None, + ), + _option( + dest="max_cache_size_mb", + group="Analysis", + cli_kind="value", + flags=("--max-cache-size-mb",), + default=DEFAULT_MAX_CACHE_SIZE_MB, + value_type=int, + metavar="MB", + help_text=ui.HELP_MAX_CACHE_SIZE_MB, + pyproject_type=int, + ), + _option( + dest="baseline", + group="Baselines and CI", + cli_kind="optional_path", + flags=("--baseline",), + default=DEFAULT_BASELINE_PATH, + const=DEFAULT_BASELINE_PATH, + metavar="FILE", + help_text=ui.HELP_BASELINE, + pyproject_type=str, + path_value=True, + ), + _option( + dest="max_baseline_size_mb", + group="Baselines and CI", + cli_kind="value", + flags=("--max-baseline-size-mb",), + default=DEFAULT_MAX_BASELINE_SIZE_MB, + value_type=int, + metavar="MB", + help_text=ui.HELP_MAX_BASELINE_SIZE_MB, + pyproject_type=int, + ), + _option( + dest="update_baseline", + group="Baselines and CI", + cli_kind="bool_optional", + flags=("--update-baseline",), + default=False, + help_text=ui.HELP_UPDATE_BASELINE, + pyproject_type=bool, + ), + _option( + dest="metrics_baseline", + group="Baselines and CI", + cli_kind="optional_path", + flags=("--metrics-baseline",), + default=DEFAULT_BASELINE_PATH, + const=DEFAULT_BASELINE_PATH, + metavar="FILE", + help_text=ui.HELP_METRICS_BASELINE, + pyproject_type=str, + path_value=True, + ), + _option( + dest="update_metrics_baseline", + group="Baselines and CI", + cli_kind="bool_optional", + flags=("--update-metrics-baseline",), + default=False, + help_text=ui.HELP_UPDATE_METRICS_BASELINE, + pyproject_type=bool, + ), + _option( + dest="ci", + group="Baselines and CI", + cli_kind="bool_optional", + flags=("--ci",), + default=False, + help_text=ui.HELP_CI, + pyproject_type=bool, + ), + _option( + dest="api_surface", + group="Baselines and CI", + cli_kind="bool_optional", + flags=("--api-surface",), + default=False, + help_text=ui.HELP_API_SURFACE, + pyproject_type=bool, + ), + _option( + dest="coverage_xml", + group="Baselines and CI", + cli_kind="value", + flags=("--coverage",), + default=None, + metavar="FILE", + help_text=ui.HELP_COVERAGE, + pyproject_type=str, + allow_none=True, + path_value=True, + ), + _option( + dest="fail_on_new", + group="Quality gates", + cli_kind="bool_optional", + flags=("--fail-on-new",), + default=False, + help_text=ui.HELP_FAIL_ON_NEW, + pyproject_type=bool, + ), + _option( + dest="fail_on_new_metrics", + group="Quality gates", + cli_kind="bool_optional", + flags=("--fail-on-new-metrics",), + default=False, + help_text=ui.HELP_FAIL_ON_NEW_METRICS, + pyproject_type=bool, + ), + _option( + dest="fail_threshold", + group="Quality gates", + cli_kind="value", + flags=("--fail-threshold",), + default=-1, + value_type=int, + metavar="MAX_CLONES", + help_text=ui.HELP_FAIL_THRESHOLD, + pyproject_type=int, + ), + _option( + dest="fail_complexity", + group="Quality gates", + cli_kind="value", + flags=("--fail-complexity",), + default=-1, + value_type=int, + nargs="?", + const=DEFAULT_COMPLEXITY_THRESHOLD, + metavar="CC_MAX", + help_text=ui.HELP_FAIL_COMPLEXITY, + pyproject_type=int, + ), + _option( + dest="fail_coupling", + group="Quality gates", + cli_kind="value", + flags=("--fail-coupling",), + default=-1, + value_type=int, + nargs="?", + const=DEFAULT_COUPLING_THRESHOLD, + metavar="CBO_MAX", + help_text=ui.HELP_FAIL_COUPLING, + pyproject_type=int, + ), + _option( + dest="fail_cohesion", + group="Quality gates", + cli_kind="value", + flags=("--fail-cohesion",), + default=-1, + value_type=int, + nargs="?", + const=DEFAULT_COHESION_THRESHOLD, + metavar="LCOM4_MAX", + help_text=ui.HELP_FAIL_COHESION, + pyproject_type=int, + ), + _option( + dest="fail_cycles", + group="Quality gates", + cli_kind="bool_optional", + flags=("--fail-cycles",), + default=False, + help_text=ui.HELP_FAIL_CYCLES, + pyproject_type=bool, + ), + _option( + dest="fail_dead_code", + group="Quality gates", + cli_kind="bool_optional", + flags=("--fail-dead-code",), + default=False, + help_text=ui.HELP_FAIL_DEAD_CODE, + pyproject_type=bool, + ), + _option( + dest="fail_health", + group="Quality gates", + cli_kind="value", + flags=("--fail-health",), + default=-1, + value_type=int, + nargs="?", + const=DEFAULT_HEALTH_THRESHOLD, + metavar="SCORE_MIN", + help_text=ui.HELP_FAIL_HEALTH, + pyproject_type=int, + ), + _option( + dest="fail_on_typing_regression", + group="Quality gates", + cli_kind="bool_optional", + flags=("--fail-on-typing-regression",), + default=False, + help_text=ui.HELP_FAIL_ON_TYPING_REGRESSION, + pyproject_type=bool, + ), + _option( + dest="fail_on_docstring_regression", + group="Quality gates", + cli_kind="bool_optional", + flags=("--fail-on-docstring-regression",), + default=False, + help_text=ui.HELP_FAIL_ON_DOCSTRING_REGRESSION, + pyproject_type=bool, + ), + _option( + dest="fail_on_api_break", + group="Quality gates", + cli_kind="bool_optional", + flags=("--fail-on-api-break",), + default=False, + help_text=ui.HELP_FAIL_ON_API_BREAK, + pyproject_type=bool, + ), + _option( + dest="fail_on_untested_hotspots", + group="Quality gates", + cli_kind="bool_optional", + flags=("--fail-on-untested-hotspots",), + default=False, + help_text=ui.HELP_FAIL_ON_UNTESTED_HOTSPOTS, + pyproject_type=bool, + ), + _option( + dest="min_typing_coverage", + group="Quality gates", + cli_kind="value", + flags=("--min-typing-coverage",), + default=-1, + value_type=int, + metavar="PERCENT", + help_text=ui.HELP_MIN_TYPING_COVERAGE, + pyproject_type=int, + ), + _option( + dest="min_docstring_coverage", + group="Quality gates", + cli_kind="value", + flags=("--min-docstring-coverage",), + default=-1, + value_type=int, + metavar="PERCENT", + help_text=ui.HELP_MIN_DOCSTRING_COVERAGE, + pyproject_type=int, + ), + _option( + dest="coverage_min", + group="Quality gates", + cli_kind="value", + flags=("--coverage-min",), + default=50, + value_type=int, + metavar="PERCENT", + help_text=ui.HELP_COVERAGE_MIN, + pyproject_type=int, + ), + _option( + dest="skip_metrics", + group="Analysis stages", + cli_kind="bool_optional", + flags=("--skip-metrics",), + default=False, + help_text=ui.HELP_SKIP_METRICS, + pyproject_type=bool, + ), + _option( + dest="skip_dead_code", + group="Analysis stages", + cli_kind="bool_optional", + flags=("--skip-dead-code",), + default=False, + help_text=ui.HELP_SKIP_DEAD_CODE, + pyproject_type=bool, + ), + _option( + dest="skip_dependencies", + group="Analysis stages", + cli_kind="bool_optional", + flags=("--skip-dependencies",), + default=False, + help_text=ui.HELP_SKIP_DEPENDENCIES, + pyproject_type=bool, + ), + _option( + dest="html_out", + group="Reporting", + cli_kind="optional_path", + flags=("--html",), + default=None, + const=DEFAULT_HTML_REPORT_PATH, + metavar="FILE", + help_text=ui.HELP_HTML, + pyproject_type=str, + allow_none=True, + path_value=True, + ), + _option( + dest="json_out", + group="Reporting", + cli_kind="optional_path", + flags=("--json",), + default=None, + const=DEFAULT_JSON_REPORT_PATH, + metavar="FILE", + help_text=ui.HELP_JSON, + pyproject_type=str, + allow_none=True, + path_value=True, + ), + _option( + dest="md_out", + group="Reporting", + cli_kind="optional_path", + flags=("--md",), + default=None, + const=DEFAULT_MARKDOWN_REPORT_PATH, + metavar="FILE", + help_text=ui.HELP_MD, + pyproject_type=str, + allow_none=True, + path_value=True, + ), + _option( + dest="sarif_out", + group="Reporting", + cli_kind="optional_path", + flags=("--sarif",), + default=None, + const=DEFAULT_SARIF_REPORT_PATH, + metavar="FILE", + help_text=ui.HELP_SARIF, + pyproject_type=str, + allow_none=True, + path_value=True, + ), + _option( + dest="text_out", + group="Reporting", + cli_kind="optional_path", + flags=("--text",), + default=None, + const=DEFAULT_TEXT_REPORT_PATH, + metavar="FILE", + help_text=ui.HELP_TEXT, + pyproject_type=str, + allow_none=True, + path_value=True, + ), + _option( + dest="timestamped_report_paths", + group="Reporting", + cli_kind="bool_optional", + flags=("--timestamped-report-paths",), + default=False, + help_text=ui.HELP_TIMESTAMPED_REPORT_PATHS, + ), + _option( + dest="open_html_report", + group="Output and UI", + cli_kind="bool_optional", + flags=("--open-html-report",), + default=False, + help_text=ui.HELP_OPEN_HTML_REPORT, + ), + _option( + dest="no_progress", + group="Output and UI", + cli_kind="store_true", + flags=("--no-progress",), + default=False, + help_text=ui.HELP_NO_PROGRESS, + pyproject_type=bool, + ), + _option( + dest="no_progress", + group="Output and UI", + cli_kind="store_false", + flags=("--progress",), + help_text=ui.HELP_PROGRESS, + pyproject_key=None, + ), + _option( + dest="no_color", + group="Output and UI", + cli_kind="store_true", + flags=("--no-color",), + default=False, + help_text=ui.HELP_NO_COLOR, + pyproject_type=bool, + ), + _option( + dest="no_color", + group="Output and UI", + cli_kind="store_false", + flags=("--color",), + help_text=ui.HELP_COLOR, + pyproject_key=None, + ), + _option( + dest="quiet", + group="Output and UI", + cli_kind="bool_optional", + flags=("--quiet",), + default=False, + help_text=ui.HELP_QUIET, + pyproject_type=bool, + ), + _option( + dest="verbose", + group="Output and UI", + cli_kind="bool_optional", + flags=("--verbose",), + default=False, + help_text=ui.HELP_VERBOSE, + pyproject_type=bool, + ), + _option( + dest="debug", + group="Output and UI", + cli_kind="bool_optional", + flags=("--debug",), + default=False, + help_text=ui.HELP_DEBUG, + pyproject_type=bool, + ), + _option( + dest="help", + group="General", + cli_kind="help", + flags=("-h", "--help"), + help_text="Show this help message and exit.", + ), + _option( + dest="version", + group="General", + cli_kind="version", + flags=("--version",), + help_text=ui.HELP_VERSION, + ), +) + + +def _build_defaults_by_dest() -> dict[str, object]: + defaults: dict[str, object] = {} + for spec in OPTIONS: + if not spec.has_default or spec.dest in defaults: + continue + defaults[spec.dest] = spec.default + return defaults + + +def _build_pyproject_specs() -> dict[str, ConfigKeySpec]: + config_specs: dict[str, ConfigKeySpec] = {} + for spec in OPTIONS: + if spec.pyproject_key is None or spec.config_spec is None: + continue + if spec.pyproject_key in config_specs: + existing = config_specs[spec.pyproject_key] + if existing != spec.config_spec: + raise RuntimeError( + f"Conflicting pyproject spec for {spec.pyproject_key}" + ) + continue + config_specs[spec.pyproject_key] = spec.config_spec + return config_specs + + +DEFAULTS_BY_DEST: Final[dict[str, object]] = _build_defaults_by_dest() +CONFIG_KEY_SPECS: Final[dict[str, ConfigKeySpec]] = _build_pyproject_specs() +PATH_CONFIG_KEYS: Final[frozenset[str]] = frozenset( + spec.pyproject_key + for spec in OPTIONS + if spec.pyproject_key is not None and spec.path_value +) +TESTABLE_CLI_OPTIONS: Final[tuple[OptionSpec, ...]] = tuple( + spec + for spec in OPTIONS + if spec.cli_kind is not None and spec.cli_kind not in {"help", "version"} +) +PYPROJECT_OPTIONS: Final[tuple[OptionSpec, ...]] = tuple( + spec for spec in OPTIONS if spec.pyproject_key is not None and spec.config_spec +) + +__all__ = [ + "ARGUMENT_GROUP_TITLES", + "CONFIG_KEY_SPECS", + "DEFAULTS_BY_DEST", + "DEFAULT_BASELINE_PATH", + "DEFAULT_BLOCK_MIN_LOC", + "DEFAULT_BLOCK_MIN_STMT", + "DEFAULT_HTML_REPORT_PATH", + "DEFAULT_JSON_REPORT_PATH", + "DEFAULT_MARKDOWN_REPORT_PATH", + "DEFAULT_MAX_BASELINE_SIZE_MB", + "DEFAULT_MAX_CACHE_SIZE_MB", + "DEFAULT_MIN_LOC", + "DEFAULT_MIN_STMT", + "DEFAULT_PROCESSES", + "DEFAULT_ROOT", + "DEFAULT_SARIF_REPORT_PATH", + "DEFAULT_SEGMENT_MIN_LOC", + "DEFAULT_SEGMENT_MIN_STMT", + "DEFAULT_TEXT_REPORT_PATH", + "OPTIONS", + "PATH_CONFIG_KEYS", + "PYPROJECT_OPTIONS", + "TESTABLE_CLI_OPTIONS", + "ConfigKeySpec", + "OptionSpec", +] diff --git a/codeclone/contracts.py b/codeclone/contracts/__init__.py similarity index 75% rename from codeclone/contracts.py rename to codeclone/contracts/__init__.py index 70a76ee..170fde6 100644 --- a/codeclone/contracts.py +++ b/codeclone/contracts/__init__.py @@ -70,3 +70,30 @@ def cli_help_epilog() -> str: f"Docs: {DOCS_URL}", ] ) + + +__all__ = [ + "BASELINE_FINGERPRINT_VERSION", + "BASELINE_SCHEMA_VERSION", + "CACHE_VERSION", + "COHESION_RISK_MEDIUM_MAX", + "COMPLEXITY_RISK_LOW_MAX", + "COMPLEXITY_RISK_MEDIUM_MAX", + "COUPLING_RISK_LOW_MAX", + "COUPLING_RISK_MEDIUM_MAX", + "DEFAULT_COHESION_THRESHOLD", + "DEFAULT_COMPLEXITY_THRESHOLD", + "DEFAULT_COUPLING_THRESHOLD", + "DEFAULT_HEALTH_THRESHOLD", + "DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD", + "DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD", + "DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD", + "DOCS_URL", + "HEALTH_WEIGHTS", + "ISSUES_URL", + "METRICS_BASELINE_SCHEMA_VERSION", + "REPORT_SCHEMA_VERSION", + "REPOSITORY_URL", + "ExitCode", + "cli_help_epilog", +] diff --git a/codeclone/errors.py b/codeclone/contracts/errors.py similarity index 85% rename from codeclone/errors.py rename to codeclone/contracts/errors.py index 7b9331f..f19c34b 100644 --- a/codeclone/errors.py +++ b/codeclone/contracts/errors.py @@ -37,3 +37,14 @@ class BaselineValidationError(BaselineSchemaError): def __init__(self, message: str, *, status: str = "invalid_type") -> None: super().__init__(message) self.status = status + + +__all__ = [ + "BaselineSchemaError", + "BaselineValidationError", + "CacheError", + "CodeCloneError", + "FileProcessingError", + "ParseError", + "ValidationError", +] diff --git a/codeclone/contracts/schemas.py b/codeclone/contracts/schemas.py new file mode 100644 index 0000000..f9079ca --- /dev/null +++ b/codeclone/contracts/schemas.py @@ -0,0 +1,85 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import TypedDict + + +class AnalysisProfile(TypedDict): + min_loc: int + min_stmt: int + block_min_loc: int + block_min_stmt: int + segment_min_loc: int + segment_min_stmt: int + collect_api_surface: bool + + +class AnalysisProfileMeta(TypedDict): + min_loc: int + min_stmt: int + block_min_loc: int + block_min_stmt: int + segment_min_loc: int + segment_min_stmt: int + + +class ReportMeta(TypedDict): + """ + Canonical report metadata contract shared by HTML, JSON, and TXT reports. + + Key semantics: + - python_version: runtime major.minor string for human readability (e.g. "3.13") + - python_tag: runtime compatibility tag used by baseline/cache contracts + (e.g. "cp313") + - baseline_*: values loaded from baseline metadata for audit/provenance + - cache_*: cache status/provenance for run transparency + """ + + codeclone_version: str + project_name: str + scan_root: str + python_version: str + python_tag: str + baseline_path: str + baseline_fingerprint_version: str | None + baseline_schema_version: str | None + baseline_python_tag: str | None + baseline_generator_name: str | None + baseline_generator_version: str | None + baseline_payload_sha256: str | None + baseline_payload_sha256_verified: bool + baseline_loaded: bool + baseline_status: str + cache_path: str + cache_used: bool + cache_status: str + cache_schema_version: str | None + files_skipped_source_io: int + metrics_baseline_path: str + metrics_baseline_loaded: bool + metrics_baseline_status: str + metrics_baseline_schema_version: str | None + metrics_baseline_payload_sha256: str | None + metrics_baseline_payload_sha256_verified: bool + health_score: int | None + health_grade: str | None + analysis_mode: str + metrics_computed: list[str] + analysis_profile: AnalysisProfileMeta + design_complexity_threshold: int + design_coupling_threshold: int + design_cohesion_threshold: int + analysis_started_at_utc: str | None + report_generated_at_utc: str + + +__all__ = [ + "AnalysisProfile", + "AnalysisProfileMeta", + "ReportMeta", +] diff --git a/codeclone/core/__init__.py b/codeclone/core/__init__.py new file mode 100644 index 0000000..f684ebc --- /dev/null +++ b/codeclone/core/__init__.py @@ -0,0 +1,61 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from ._types import ( + DEFAULT_BATCH_SIZE, + DEFAULT_RUNTIME_PROCESSES, + MAX_FILE_SIZE, + PARALLEL_MIN_FILES_FLOOR, + PARALLEL_MIN_FILES_PER_WORKER, + AnalysisResult, + BootstrapResult, + DiscoveryResult, + FileProcessResult, + OutputPaths, + ProcessingResult, + ReportArtifacts, +) +from .bootstrap import _resolve_optional_runtime_path, bootstrap +from .discovery import discover +from .parallelism import ( + _parallel_min_files, + _resolve_process_count, + _should_use_parallel, + process, +) +from .pipeline import analyze, compute_project_metrics, compute_suggestions +from .reporting import GatingResult, MetricGateConfig, gate, report +from .worker import _invoke_process_file, process_file + +__all__ = [ + "DEFAULT_BATCH_SIZE", + "DEFAULT_RUNTIME_PROCESSES", + "MAX_FILE_SIZE", + "PARALLEL_MIN_FILES_FLOOR", + "PARALLEL_MIN_FILES_PER_WORKER", + "AnalysisResult", + "BootstrapResult", + "DiscoveryResult", + "FileProcessResult", + "GatingResult", + "MetricGateConfig", + "OutputPaths", + "ProcessingResult", + "ReportArtifacts", + "_invoke_process_file", + "_parallel_min_files", + "_resolve_optional_runtime_path", + "_resolve_process_count", + "_should_use_parallel", + "analyze", + "bootstrap", + "compute_project_metrics", + "compute_suggestions", + "discover", + "gate", + "process", + "process_file", + "report", +] diff --git a/codeclone/core/_types.py b/codeclone/core/_types.py new file mode 100644 index 0000000..5d76c9c --- /dev/null +++ b/codeclone/core/_types.py @@ -0,0 +1,315 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from argparse import Namespace +from collections.abc import Mapping +from dataclasses import dataclass +from hashlib import sha256 +from pathlib import Path +from typing import cast + +import orjson + +from ..analysis.normalizer import NormalizationConfig +from ..cache import FileStat, SegmentReportProjection +from ..models import ( + BlockUnit, + ClassMetrics, + CoverageJoinResult, + DeadCandidate, + FileMetrics, + GroupItem, + GroupItemLike, + ModuleApiSurface, + ModuleDep, + ModuleDocstringCoverage, + ModuleTypingCoverage, + ProjectMetrics, + SegmentUnit, + StructuralFindingGroup, + Suggestion, + SuppressedCloneGroup, + Unit, +) +from ..utils.coerce import as_int, as_str + +MAX_FILE_SIZE = 10 * 1024 * 1024 +DEFAULT_BATCH_SIZE = 100 +PARALLEL_MIN_FILES_PER_WORKER = 8 +PARALLEL_MIN_FILES_FLOOR = 16 +DEFAULT_RUNTIME_PROCESSES = 4 + + +@dataclass(frozen=True, slots=True) +class OutputPaths: + html: Path | None = None + json: Path | None = None + text: Path | None = None + md: Path | None = None + sarif: Path | None = None + + +@dataclass(frozen=True, slots=True) +class BootstrapResult: + root: Path + config: NormalizationConfig + args: Namespace + output_paths: OutputPaths + cache_path: Path + + +@dataclass(frozen=True, slots=True) +class DiscoveryResult: + files_found: int + cache_hits: int + files_skipped: int + all_file_paths: tuple[str, ...] + cached_units: tuple[GroupItem, ...] + cached_blocks: tuple[GroupItem, ...] + cached_segments: tuple[GroupItem, ...] + cached_class_metrics: tuple[ClassMetrics, ...] + cached_module_deps: tuple[ModuleDep, ...] + cached_dead_candidates: tuple[DeadCandidate, ...] + cached_referenced_names: frozenset[str] + files_to_process: tuple[str, ...] + skipped_warnings: tuple[str, ...] + cached_referenced_qualnames: frozenset[str] = frozenset() + cached_typing_modules: tuple[ModuleTypingCoverage, ...] = () + cached_docstring_modules: tuple[ModuleDocstringCoverage, ...] = () + cached_api_modules: tuple[ModuleApiSurface, ...] = () + cached_structural_findings: tuple[StructuralFindingGroup, ...] = () + cached_segment_report_projection: SegmentReportProjection | None = None + cached_lines: int = 0 + cached_functions: int = 0 + cached_methods: int = 0 + cached_classes: int = 0 + cached_source_stats_by_file: tuple[tuple[str, int, int, int, int], ...] = () + + +@dataclass(frozen=True, slots=True) +class FileProcessResult: + filepath: str + success: bool + error: str | None = None + units: list[Unit] | None = None + blocks: list[BlockUnit] | None = None + segments: list[SegmentUnit] | None = None + lines: int = 0 + functions: int = 0 + methods: int = 0 + classes: int = 0 + stat: FileStat | None = None + error_kind: str | None = None + file_metrics: FileMetrics | None = None + structural_findings: list[StructuralFindingGroup] | None = None + + +@dataclass(frozen=True, slots=True) +class ProcessingResult: + units: tuple[GroupItem, ...] + blocks: tuple[GroupItem, ...] + segments: tuple[GroupItem, ...] + class_metrics: tuple[ClassMetrics, ...] + module_deps: tuple[ModuleDep, ...] + dead_candidates: tuple[DeadCandidate, ...] + referenced_names: frozenset[str] + files_analyzed: int + files_skipped: int + analyzed_lines: int + analyzed_functions: int + analyzed_methods: int + analyzed_classes: int + failed_files: tuple[str, ...] + source_read_failures: tuple[str, ...] + referenced_qualnames: frozenset[str] = frozenset() + typing_modules: tuple[ModuleTypingCoverage, ...] = () + docstring_modules: tuple[ModuleDocstringCoverage, ...] = () + api_modules: tuple[ModuleApiSurface, ...] = () + structural_findings: tuple[StructuralFindingGroup, ...] = () + source_stats_by_file: tuple[tuple[str, int, int, int, int], ...] = () + + +@dataclass(frozen=True, slots=True) +class AnalysisResult: + func_groups: Mapping[str, list[GroupItem]] + block_groups: Mapping[str, list[GroupItem]] + block_groups_report: Mapping[str, list[GroupItem]] + segment_groups: Mapping[str, list[GroupItem]] + suppressed_segment_groups: int + block_group_facts: dict[str, dict[str, str]] + func_clones_count: int + block_clones_count: int + segment_clones_count: int + files_analyzed_or_cached: int + project_metrics: ProjectMetrics | None + metrics_payload: dict[str, object] | None + suggestions: tuple[Suggestion, ...] + segment_groups_raw_digest: str + suppressed_clone_groups: tuple[SuppressedCloneGroup, ...] = () + coverage_join: CoverageJoinResult | None = None + suppressed_dead_code_items: int = 0 + structural_findings: tuple[StructuralFindingGroup, ...] = () + + +@dataclass(frozen=True, slots=True) +class ReportArtifacts: + html: str | None = None + json: str | None = None + text: str | None = None + md: str | None = None + sarif: str | None = None + report_document: dict[str, object] | None = None + + +def _as_sorted_str_tuple(value: object) -> tuple[str, ...]: + if not isinstance(value, list): + return () + return tuple(sorted({item for item in value if isinstance(item, str) and item})) + + +def _group_item_sort_key(item: GroupItemLike) -> tuple[str, int, int, str]: + return ( + as_str(item.get("filepath")), + as_int(item.get("start_line")), + as_int(item.get("end_line")), + as_str(item.get("qualname")), + ) + + +def _segment_projection_item_sort_key( + item: GroupItemLike, +) -> tuple[str, str, int, int]: + return ( + as_str(item.get("filepath")), + as_str(item.get("qualname")), + as_int(item.get("start_line")), + as_int(item.get("end_line")), + ) + + +def _segment_groups_digest(segment_groups: Mapping[str, list[GroupItem]]) -> str: + normalized_rows: list[ + tuple[str, tuple[tuple[str, str, int, int, int, str, str], ...]] + ] = [] + for group_key in sorted(segment_groups): + items = sorted(segment_groups[group_key], key=_segment_projection_item_sort_key) + normalized_items = [ + ( + as_str(item.get("filepath")), + as_str(item.get("qualname")), + as_int(item.get("start_line")), + as_int(item.get("end_line")), + as_int(item.get("size")), + as_str(item.get("segment_hash")), + as_str(item.get("segment_sig")), + ) + for item in items + ] + normalized_rows.append((group_key, tuple(normalized_items))) + payload = orjson.dumps(tuple(normalized_rows), option=orjson.OPT_SORT_KEYS) + return sha256(payload).hexdigest() + + +def _coerce_segment_report_projection( + value: object, +) -> SegmentReportProjection | None: + if not isinstance(value, dict): + return None + digest = value.get("digest") + suppressed = value.get("suppressed") + groups = value.get("groups") + if ( + not isinstance(digest, str) + or not isinstance(suppressed, int) + or not isinstance(groups, dict) + ): + return None + if not all( + isinstance(group_key, str) and isinstance(items, list) + for group_key, items in groups.items() + ): + return None + return cast("SegmentReportProjection", value) + + +def _module_dep_sort_key(dep: ModuleDep) -> tuple[str, str, str, int]: + return dep.source, dep.target, dep.import_type, dep.line + + +def _class_metric_sort_key(metric: ClassMetrics) -> tuple[str, int, int, str]: + return metric.filepath, metric.start_line, metric.end_line, metric.qualname + + +def _dead_candidate_sort_key(item: DeadCandidate) -> tuple[str, int, int, str]: + return item.filepath, item.start_line, item.end_line, item.qualname + + +def _module_names_from_units(units: tuple[GroupItemLike, ...]) -> frozenset[str]: + modules: set[str] = set() + for item in units: + qualname = as_str(item.get("qualname")) if isinstance(item, Mapping) else "" + module_name = qualname.split(":", 1)[0] if ":" in qualname else qualname + if module_name: + modules.add(module_name) + return frozenset(sorted(modules)) + + +def _unit_to_group_item(unit: Unit) -> GroupItem: + return { + "qualname": unit.qualname, + "filepath": unit.filepath, + "start_line": unit.start_line, + "end_line": unit.end_line, + "loc": unit.loc, + "stmt_count": unit.stmt_count, + "fingerprint": unit.fingerprint, + "loc_bucket": unit.loc_bucket, + "cyclomatic_complexity": unit.cyclomatic_complexity, + "nesting_depth": unit.nesting_depth, + "risk": unit.risk, + "raw_hash": unit.raw_hash, + "entry_guard_count": unit.entry_guard_count, + "entry_guard_terminal_profile": unit.entry_guard_terminal_profile, + "entry_guard_has_side_effect_before": unit.entry_guard_has_side_effect_before, + "terminal_kind": unit.terminal_kind, + "try_finally_profile": unit.try_finally_profile, + "side_effect_order_profile": unit.side_effect_order_profile, + } + + +def _block_to_group_item(block: BlockUnit) -> GroupItem: + return { + "block_hash": block.block_hash, + "filepath": block.filepath, + "qualname": block.qualname, + "start_line": block.start_line, + "end_line": block.end_line, + "size": block.size, + } + + +def _segment_to_group_item(segment: SegmentUnit) -> GroupItem: + return { + "filepath": segment.filepath, + "qualname": segment.qualname, + "start_line": segment.start_line, + "end_line": segment.end_line, + "size": segment.size, + "segment_hash": segment.segment_hash, + "segment_sig": segment.segment_sig, + } + + +def _should_collect_structural_findings(output_paths: OutputPaths) -> bool: + return bool( + output_paths.html + or output_paths.json + or output_paths.md + or output_paths.text + or output_paths.sarif + ) diff --git a/codeclone/core/api_surface_payload.py b/codeclone/core/api_surface_payload.py new file mode 100644 index 0000000..7ec0ff8 --- /dev/null +++ b/codeclone/core/api_surface_payload.py @@ -0,0 +1,98 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Sequence + +from ..models import ApiBreakingChange, ApiSurfaceSnapshot +from ..utils.coerce import as_int, as_str + + +def _api_surface_summary(api_surface: ApiSurfaceSnapshot | None) -> dict[str, object]: + modules = api_surface.modules if api_surface is not None else () + return { + "enabled": api_surface is not None, + "modules": len(modules), + "public_symbols": sum(len(module.symbols) for module in modules), + "added": 0, + "breaking": 0, + "strict_types": False, + } + + +def _api_surface_rows( + api_surface: ApiSurfaceSnapshot | None, +) -> list[dict[str, object]]: + if api_surface is None: + return [] + rows: list[dict[str, object]] = [] + for module in api_surface.modules: + rows.extend( + { + "record_kind": "symbol", + "module": module.module, + "filepath": module.filepath, + "qualname": symbol.qualname, + "start_line": symbol.start_line, + "end_line": symbol.end_line, + "symbol_kind": symbol.kind, + "exported_via": symbol.exported_via, + "params_total": len(symbol.params), + "params": [ + { + "name": param.name, + "kind": param.kind, + "has_default": param.has_default, + "annotated": bool(param.annotation_hash), + } + for param in symbol.params + ], + "returns_annotated": bool(symbol.returns_hash), + } + for symbol in module.symbols + ) + return sorted( + rows, + key=lambda item: ( + as_str(item.get("filepath")), + as_int(item.get("start_line")), + as_int(item.get("end_line")), + as_str(item.get("qualname")), + as_str(item.get("record_kind")), + ), + ) + + +def _breaking_api_surface_rows(changes: Sequence[object]) -> list[dict[str, object]]: + rows: list[dict[str, object]] = [] + for change in changes: + if not isinstance(change, ApiBreakingChange): + continue + module_name, _, _local_name = change.qualname.partition(":") + rows.append( + { + "record_kind": "breaking_change", + "module": module_name, + "filepath": change.filepath, + "qualname": change.qualname, + "start_line": change.start_line, + "end_line": change.end_line, + "symbol_kind": change.symbol_kind, + "change_kind": change.change_kind, + "detail": change.detail, + } + ) + return sorted( + rows, + key=lambda item: ( + as_str(item.get("filepath")), + as_int(item.get("start_line")), + as_int(item.get("end_line")), + as_str(item.get("qualname")), + as_str(item.get("change_kind")), + ), + ) diff --git a/codeclone/core/bootstrap.py b/codeclone/core/bootstrap.py new file mode 100644 index 0000000..1043a27 --- /dev/null +++ b/codeclone/core/bootstrap.py @@ -0,0 +1,41 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from argparse import Namespace +from pathlib import Path + +from ..analysis.normalizer import NormalizationConfig +from ._types import BootstrapResult, OutputPaths + + +def bootstrap( + *, + args: Namespace, + root: Path, + output_paths: OutputPaths, + cache_path: Path, +) -> BootstrapResult: + return BootstrapResult( + root=root, + config=NormalizationConfig(), + args=args, + output_paths=output_paths, + cache_path=cache_path, + ) + + +def _resolve_optional_runtime_path(value: object, *, root: Path) -> Path | None: + text = str(value).strip() if value is not None else "" + if not text: + return None + candidate = Path(text).expanduser() + resolved = candidate if candidate.is_absolute() else root / candidate + try: + return resolved.resolve() + except OSError: + return resolved.absolute() diff --git a/codeclone/core/coverage_payload.py b/codeclone/core/coverage_payload.py new file mode 100644 index 0000000..1380b71 --- /dev/null +++ b/codeclone/core/coverage_payload.py @@ -0,0 +1,173 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ..models import CoverageJoinResult, ProjectMetrics +from ..utils.coerce import as_int, as_str + + +def _permille(numerator: int, denominator: int) -> int: + if denominator <= 0: + return 0 + return round((1000.0 * float(numerator)) / float(denominator)) + + +def _coverage_join_summary( + coverage_join: CoverageJoinResult | None, +) -> dict[str, object]: + if coverage_join is None: + return {} + return { + "status": coverage_join.status, + "source": coverage_join.coverage_xml, + "files": coverage_join.files, + "units": len(coverage_join.units), + "measured_units": coverage_join.measured_units, + "overall_executable_lines": coverage_join.overall_executable_lines, + "overall_covered_lines": coverage_join.overall_covered_lines, + "overall_permille": _permille( + coverage_join.overall_covered_lines, + coverage_join.overall_executable_lines, + ), + "missing_from_report_units": sum( + 1 + for fact in coverage_join.units + if fact.coverage_status == "missing_from_report" + ), + "coverage_hotspots": coverage_join.coverage_hotspots, + "scope_gap_hotspots": coverage_join.scope_gap_hotspots, + "hotspot_threshold_percent": coverage_join.hotspot_threshold_percent, + "invalid_reason": coverage_join.invalid_reason, + } + + +def _coverage_join_rows( + coverage_join: CoverageJoinResult | None, +) -> list[dict[str, object]]: + if coverage_join is None or coverage_join.status != "ok": + return [] + return sorted( + ( + { + "qualname": fact.qualname, + "filepath": fact.filepath, + "start_line": fact.start_line, + "end_line": fact.end_line, + "cyclomatic_complexity": fact.cyclomatic_complexity, + "risk": fact.risk, + "executable_lines": fact.executable_lines, + "covered_lines": fact.covered_lines, + "coverage_permille": fact.coverage_permille, + "coverage_status": fact.coverage_status, + "coverage_hotspot": ( + fact.risk in {"medium", "high"} + and fact.coverage_status == "measured" + and (fact.coverage_permille / 10.0) + < float(coverage_join.hotspot_threshold_percent) + ), + "scope_gap_hotspot": ( + fact.risk in {"medium", "high"} + and fact.coverage_status == "missing_from_report" + ), + "coverage_review_item": ( + ( + fact.risk in {"medium", "high"} + and fact.coverage_status == "measured" + and (fact.coverage_permille / 10.0) + < float(coverage_join.hotspot_threshold_percent) + ) + or ( + fact.risk in {"medium", "high"} + and fact.coverage_status == "missing_from_report" + ) + ), + } + for fact in coverage_join.units + ), + key=lambda item: ( + 0 if bool(item.get("coverage_hotspot")) else 1, + 0 if bool(item.get("scope_gap_hotspot")) else 1, + {"high": 0, "medium": 1, "low": 2}.get(as_str(item.get("risk")), 3), + as_int(item.get("coverage_permille"), 0), + -as_int(item.get("cyclomatic_complexity"), 0), + as_str(item.get("filepath")), + as_int(item.get("start_line")), + as_str(item.get("qualname")), + ), + ) + + +def _coverage_adoption_rows(project_metrics: ProjectMetrics) -> list[dict[str, object]]: + docstring_by_module = { + (item.filepath, item.module): item for item in project_metrics.docstring_modules + } + rows: list[dict[str, object]] = [] + seen_keys: set[tuple[str, str]] = set() + for typing_item in project_metrics.typing_modules: + key = (typing_item.filepath, typing_item.module) + seen_keys.add(key) + docstring_item = docstring_by_module.get(key) + doc_total = docstring_item.public_symbol_total if docstring_item else 0 + doc_documented = ( + docstring_item.public_symbol_documented if docstring_item else 0 + ) + rows.append( + { + "module": typing_item.module, + "filepath": typing_item.filepath, + "callable_count": typing_item.callable_count, + "params_total": typing_item.params_total, + "params_annotated": typing_item.params_annotated, + "param_permille": _permille( + typing_item.params_annotated, + typing_item.params_total, + ), + "returns_total": typing_item.returns_total, + "returns_annotated": typing_item.returns_annotated, + "return_permille": _permille( + typing_item.returns_annotated, + typing_item.returns_total, + ), + "any_annotation_count": typing_item.any_annotation_count, + "public_symbol_total": doc_total, + "public_symbol_documented": doc_documented, + "docstring_permille": _permille(doc_documented, doc_total), + } + ) + for docstring_item in project_metrics.docstring_modules: + key = (docstring_item.filepath, docstring_item.module) + if key in seen_keys: + continue + rows.append( + { + "module": docstring_item.module, + "filepath": docstring_item.filepath, + "callable_count": 0, + "params_total": 0, + "params_annotated": 0, + "param_permille": 0, + "returns_total": 0, + "returns_annotated": 0, + "return_permille": 0, + "any_annotation_count": 0, + "public_symbol_total": docstring_item.public_symbol_total, + "public_symbol_documented": docstring_item.public_symbol_documented, + "docstring_permille": _permille( + docstring_item.public_symbol_documented, + docstring_item.public_symbol_total, + ), + } + ) + return sorted( + rows, + key=lambda item: ( + as_int(item.get("param_permille")), + as_int(item.get("docstring_permille")), + as_int(item.get("return_permille")), + as_str(item.get("module")), + ), + ) diff --git a/codeclone/core/discovery.py b/codeclone/core/discovery.py new file mode 100644 index 0000000..9ebd83d --- /dev/null +++ b/codeclone/core/discovery.py @@ -0,0 +1,198 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from typing import cast + +from ..cache import Cache, file_stat_signature +from ..models import ( + ClassMetrics, + DeadCandidate, + GroupItem, + ModuleApiSurface, + ModuleDep, + ModuleDocstringCoverage, + ModuleTypingCoverage, + StructuralFindingGroup, +) +from ..scanner import iter_py_files +from ._types import ( + BootstrapResult, + DiscoveryResult, + _class_metric_sort_key, + _coerce_segment_report_projection, + _dead_candidate_sort_key, + _group_item_sort_key, + _module_dep_sort_key, + _should_collect_structural_findings, +) +from .discovery_cache import ( + decode_cached_structural_finding_group as _decode_cached_structural_finding_group, +) +from .discovery_cache import ( + load_cached_metrics_extended as _load_cached_metrics_extended, +) +from .discovery_cache import usable_cached_source_stats as _usable_cached_source_stats + +DiscoveryBuffers = tuple[ + list[GroupItem], + list[GroupItem], + list[GroupItem], + list[ClassMetrics], + list[ModuleDep], + list[DeadCandidate], + set[str], + set[str], + list[ModuleTypingCoverage], + list[ModuleDocstringCoverage], + list[ModuleApiSurface], + list[str], + list[str], +] + + +def _new_discovery_buffers() -> DiscoveryBuffers: + return [], [], [], [], [], [], set(), set(), [], [], [], [], [] + + +def discover(*, boot: BootstrapResult, cache: Cache) -> DiscoveryResult: + files_found = 0 + cache_hits = 0 + files_skipped = 0 + collect_structural_findings = _should_collect_structural_findings(boot.output_paths) + cached_segment_projection = _coerce_segment_report_projection( + getattr(cache, "segment_report_projection", None) + ) + ( + cached_units, + cached_blocks, + cached_segments, + cached_class_metrics, + cached_module_deps, + cached_dead_candidates, + cached_referenced_names, + cached_referenced_qualnames, + cached_typing_modules, + cached_docstring_modules, + cached_api_modules, + files_to_process, + skipped_warnings, + ) = _new_discovery_buffers() + cached_sf: list[StructuralFindingGroup] = [] + cached_source_stats_by_file: list[tuple[str, int, int, int, int]] = [] + cached_lines = 0 + cached_functions = 0 + cached_methods = 0 + cached_classes = 0 + all_file_paths: list[str] = [] + + for filepath in iter_py_files(str(boot.root)): + files_found += 1 + all_file_paths.append(filepath) + try: + stat = file_stat_signature(filepath) + except OSError as exc: + files_skipped += 1 + skipped_warnings.append(f"{filepath}: {exc}") + continue + cached = cache.get_file_entry(filepath) + if cached and cached.get("stat") == stat: + cached_source_stats = _usable_cached_source_stats( + cached, + skip_metrics=boot.args.skip_metrics, + collect_structural_findings=collect_structural_findings, + ) + if cached_source_stats is None: + files_to_process.append(filepath) + continue + cache_hits += 1 + lines, functions, methods, classes = cached_source_stats + cached_lines += lines + cached_functions += functions + cached_methods += methods + cached_classes += classes + cached_source_stats_by_file.append( + (filepath, lines, functions, methods, classes) + ) + cached_units.extend(cast("list[GroupItem]", cast(object, cached["units"]))) + cached_blocks.extend( + cast("list[GroupItem]", cast(object, cached["blocks"])) + ) + cached_segments.extend( + cast("list[GroupItem]", cast(object, cached["segments"])) + ) + if not boot.args.skip_metrics: + ( + class_metrics, + module_deps, + dead_candidates, + referenced_names, + referenced_qualnames, + typing_coverage, + docstring_coverage, + api_surface, + ) = _load_cached_metrics_extended(cached, filepath=filepath) + cached_class_metrics.extend(class_metrics) + cached_module_deps.extend(module_deps) + cached_dead_candidates.extend(dead_candidates) + cached_referenced_names.update(referenced_names) + cached_referenced_qualnames.update(referenced_qualnames) + if typing_coverage is not None: + cached_typing_modules.append(typing_coverage) + if docstring_coverage is not None: + cached_docstring_modules.append(docstring_coverage) + if api_surface is not None: + cached_api_modules.append(api_surface) + if collect_structural_findings: + cached_sf.extend( + _decode_cached_structural_finding_group(group_dict, filepath) + for group_dict in cached.get("structural_findings") or [] + ) + continue + files_to_process.append(filepath) + + return DiscoveryResult( + files_found=files_found, + cache_hits=cache_hits, + files_skipped=files_skipped, + all_file_paths=tuple(all_file_paths), + cached_units=tuple(sorted(cached_units, key=_group_item_sort_key)), + cached_blocks=tuple(sorted(cached_blocks, key=_group_item_sort_key)), + cached_segments=tuple(sorted(cached_segments, key=_group_item_sort_key)), + cached_class_metrics=tuple( + sorted(cached_class_metrics, key=_class_metric_sort_key) + ), + cached_module_deps=tuple(sorted(cached_module_deps, key=_module_dep_sort_key)), + cached_dead_candidates=tuple( + sorted(cached_dead_candidates, key=_dead_candidate_sort_key) + ), + cached_referenced_names=frozenset(cached_referenced_names), + cached_referenced_qualnames=frozenset(cached_referenced_qualnames), + cached_typing_modules=tuple( + sorted(cached_typing_modules, key=lambda item: (item.filepath, item.module)) + ), + cached_docstring_modules=tuple( + sorted( + cached_docstring_modules, + key=lambda item: (item.filepath, item.module), + ) + ), + cached_api_modules=tuple( + sorted(cached_api_modules, key=lambda item: (item.filepath, item.module)) + ), + files_to_process=tuple(files_to_process), + skipped_warnings=tuple(sorted(skipped_warnings)), + cached_structural_findings=tuple(cached_sf), + cached_segment_report_projection=cached_segment_projection, + cached_lines=cached_lines, + cached_functions=cached_functions, + cached_methods=cached_methods, + cached_classes=cached_classes, + cached_source_stats_by_file=tuple( + sorted(cached_source_stats_by_file, key=lambda row: row[0]) + ), + ) diff --git a/codeclone/core/discovery_cache.py b/codeclone/core/discovery_cache.py new file mode 100644 index 0000000..d6d9470 --- /dev/null +++ b/codeclone/core/discovery_cache.py @@ -0,0 +1,363 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Literal, cast + +from ..cache import ( + ApiParamSpecDict, + CacheEntry, + ClassMetricsDict, + DeadCandidateDict, + ModuleDepDict, + PublicSymbolDict, + StructuralFindingGroupDict, +) +from ..models import ( + ApiParamSpec, + ClassMetrics, + DeadCandidate, + ModuleApiSurface, + ModuleDep, + ModuleDocstringCoverage, + ModuleTypingCoverage, + PublicSymbol, + StructuralFindingGroup, + StructuralFindingOccurrence, +) +from ..paths import is_test_filepath +from ._types import _as_sorted_str_tuple + + +def decode_cached_structural_finding_group( + group_dict: StructuralFindingGroupDict, + filepath: str, +) -> StructuralFindingGroup: + finding_kind = group_dict["finding_kind"] + finding_key = group_dict["finding_key"] + signature = group_dict["signature"] + items = tuple( + StructuralFindingOccurrence( + finding_kind=finding_kind, + finding_key=finding_key, + file_path=filepath, + qualname=item["qualname"], + start=item["start"], + end=item["end"], + signature=signature, + ) + for item in group_dict["items"] + ) + return StructuralFindingGroup( + finding_kind=finding_kind, + finding_key=finding_key, + signature=signature, + items=items, + ) + + +def _cache_entry_has_metrics(entry: CacheEntry) -> bool: + metric_keys = ( + "class_metrics", + "module_deps", + "dead_candidates", + "referenced_names", + "referenced_qualnames", + "import_names", + "class_names", + ) + return all(key in entry and isinstance(entry.get(key), list) for key in metric_keys) + + +def _cache_entry_has_structural_findings(entry: CacheEntry) -> bool: + return "structural_findings" in entry + + +def _cache_entry_source_stats(entry: CacheEntry) -> tuple[int, int, int, int] | None: + stats_obj = entry.get("source_stats") + if not isinstance(stats_obj, dict): + return None + lines = stats_obj.get("lines") + functions = stats_obj.get("functions") + methods = stats_obj.get("methods") + classes = stats_obj.get("classes") + if not ( + isinstance(lines, int) + and isinstance(functions, int) + and isinstance(methods, int) + and isinstance(classes, int) + and lines >= 0 + and functions >= 0 + and methods >= 0 + and classes >= 0 + ): + return None + return lines, functions, methods, classes + + +def usable_cached_source_stats( + entry: CacheEntry, + *, + skip_metrics: bool, + collect_structural_findings: bool, +) -> tuple[int, int, int, int] | None: + if not skip_metrics and not _cache_entry_has_metrics(entry): + return None + if collect_structural_findings and not _cache_entry_has_structural_findings(entry): + return None + return _cache_entry_source_stats(entry) + + +def _cache_dict_module_fields( + value: object, +) -> tuple[Mapping[str, object], str, str] | None: + if not isinstance(value, dict): + return None + row = cast("Mapping[str, object]", value) + module = row.get("module") + filepath = row.get("filepath") + if not isinstance(module, str) or not isinstance(filepath, str): + return None + return row, module, filepath + + +def _cache_dict_int_fields( + row: Mapping[str, object], + *keys: str, +) -> tuple[int, ...] | None: + values: list[int] = [] + for key in keys: + value = row.get(key) + if not isinstance(value, int): + return None + values.append(value) + return tuple(values) + + +def _typing_coverage_from_cache_dict(value: object) -> ModuleTypingCoverage | None: + row_info = _cache_dict_module_fields(value) + if row_info is None: + return None + row, module, filepath = row_info + int_fields = _cache_dict_int_fields( + row, + "callable_count", + "params_total", + "params_annotated", + "returns_total", + "returns_annotated", + "any_annotation_count", + ) + if int_fields is None: + return None + return ModuleTypingCoverage( + module=module, + filepath=filepath, + callable_count=int_fields[0], + params_total=int_fields[1], + params_annotated=int_fields[2], + returns_total=int_fields[3], + returns_annotated=int_fields[4], + any_annotation_count=int_fields[5], + ) + + +def _docstring_coverage_from_cache_dict( + value: object, +) -> ModuleDocstringCoverage | None: + row_info = _cache_dict_module_fields(value) + if row_info is None: + return None + row, module, filepath = row_info + totals = _cache_dict_int_fields( + row, + "public_symbol_total", + "public_symbol_documented", + ) + if totals is None: + return None + return ModuleDocstringCoverage( + module=module, + filepath=filepath, + public_symbol_total=totals[0], + public_symbol_documented=totals[1], + ) + + +def _api_param_spec_from_cache_dict(value: ApiParamSpecDict) -> ApiParamSpec | None: + name = value.get("name") + kind = value.get("kind") + has_default = value.get("has_default") + annotation_hash = value.get("annotation_hash", "") + if ( + not isinstance(name, str) + or not isinstance(kind, str) + or not isinstance(has_default, bool) + or not isinstance(annotation_hash, str) + ): + return None + return ApiParamSpec( + name=name, + kind=cast( + "Literal['pos_only', 'pos_or_kw', 'vararg', 'kw_only', 'kwarg']", + kind, + ), + has_default=has_default, + annotation_hash=annotation_hash, + ) + + +def _public_symbol_from_cache_dict(value: PublicSymbolDict) -> PublicSymbol | None: + qualname = value.get("qualname") + kind = value.get("kind") + start_line = value.get("start_line") + end_line = value.get("end_line") + exported_via = value.get("exported_via", "name") + returns_hash = value.get("returns_hash", "") + params_raw = value.get("params", []) + if ( + not isinstance(qualname, str) + or not isinstance(kind, str) + or not isinstance(start_line, int) + or not isinstance(end_line, int) + or not isinstance(exported_via, str) + or not isinstance(returns_hash, str) + or not isinstance(params_raw, list) + ): + return None + params: list[ApiParamSpec] = [] + for param in params_raw: + if not isinstance(param, dict): + return None + parsed = _api_param_spec_from_cache_dict(param) + if parsed is None: + return None + params.append(parsed) + return PublicSymbol( + qualname=qualname, + kind=cast("Literal['function', 'class', 'method', 'constant']", kind), + start_line=start_line, + end_line=end_line, + params=tuple(params), + returns_hash=returns_hash, + exported_via=cast("Literal['all', 'name']", exported_via), + ) + + +def _api_surface_from_cache_dict(value: object) -> ModuleApiSurface | None: + row_info = _cache_dict_module_fields(value) + if row_info is None: + return None + row, module, filepath = row_info + all_declared_raw = row.get("all_declared", []) + symbols_raw = row.get("symbols", []) + if ( + not isinstance(all_declared_raw, list) + or not isinstance(symbols_raw, list) + or not all(isinstance(item, str) for item in all_declared_raw) + ): + return None + symbols: list[PublicSymbol] = [] + for item in symbols_raw: + if not isinstance(item, dict): + return None + parsed = _public_symbol_from_cache_dict(cast("PublicSymbolDict", item)) + if parsed is None: + return None + symbols.append(parsed) + return ModuleApiSurface( + module=module, + filepath=filepath, + all_declared=tuple(sorted(set(all_declared_raw))) or None, + symbols=tuple(sorted(symbols, key=lambda item: item.qualname)), + ) + + +def load_cached_metrics_extended( + entry: CacheEntry, + *, + filepath: str, +) -> tuple[ + tuple[ClassMetrics, ...], + tuple[ModuleDep, ...], + tuple[DeadCandidate, ...], + frozenset[str], + frozenset[str], + ModuleTypingCoverage | None, + ModuleDocstringCoverage | None, + ModuleApiSurface | None, +]: + class_metrics_rows: list[ClassMetricsDict] = entry.get("class_metrics", []) + class_metrics = tuple( + ClassMetrics( + qualname=row["qualname"], + filepath=row["filepath"], + start_line=row["start_line"], + end_line=row["end_line"], + cbo=row["cbo"], + lcom4=row["lcom4"], + method_count=row["method_count"], + instance_var_count=row["instance_var_count"], + risk_coupling=cast( + "Literal['low', 'medium', 'high']", + row["risk_coupling"], + ), + risk_cohesion=cast( + "Literal['low', 'medium', 'high']", + row["risk_cohesion"], + ), + coupled_classes=_as_sorted_str_tuple(row.get("coupled_classes", [])), + ) + for row in class_metrics_rows + if row.get("qualname") and row.get("filepath") + ) + module_dep_rows: list[ModuleDepDict] = entry.get("module_deps", []) + module_deps = tuple( + ModuleDep( + source=row["source"], + target=row["target"], + import_type=cast("Literal['import', 'from_import']", row["import_type"]), + line=row["line"], + ) + for row in module_dep_rows + if row.get("source") and row.get("target") + ) + dead_rows: list[DeadCandidateDict] = entry.get("dead_candidates", []) + dead_candidates = tuple( + DeadCandidate( + qualname=row["qualname"], + local_name=row["local_name"], + filepath=row["filepath"], + start_line=row["start_line"], + end_line=row["end_line"], + kind=cast("Literal['function', 'class', 'method', 'import']", row["kind"]), + suppressed_rules=tuple(sorted(set(row.get("suppressed_rules", [])))), + ) + for row in dead_rows + if row.get("qualname") and row.get("local_name") and row.get("filepath") + ) + referenced_names = ( + frozenset() + if is_test_filepath(filepath) + else frozenset(entry.get("referenced_names", [])) + ) + referenced_qualnames = ( + frozenset() + if is_test_filepath(filepath) + else frozenset(entry.get("referenced_qualnames", [])) + ) + return ( + class_metrics, + module_deps, + dead_candidates, + referenced_names, + referenced_qualnames, + _typing_coverage_from_cache_dict(entry.get("typing_coverage")), + _docstring_coverage_from_cache_dict(entry.get("docstring_coverage")), + _api_surface_from_cache_dict(entry.get("api_surface")), + ) diff --git a/codeclone/core/metrics_payload.py b/codeclone/core/metrics_payload.py new file mode 100644 index 0000000..b675b8f --- /dev/null +++ b/codeclone/core/metrics_payload.py @@ -0,0 +1,312 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from typing import cast + +from ..domain.findings import CATEGORY_COHESION, CATEGORY_COMPLEXITY, CATEGORY_COUPLING +from ..domain.quality import CONFIDENCE_HIGH, RISK_LOW +from ..metrics import build_overloaded_modules_payload +from ..models import ( + ClassMetrics, + CoverageJoinResult, + DeadItem, + GroupItemLike, + MetricsDiff, + ModuleDep, + ProjectMetrics, +) +from ..suppressions import DEAD_CODE_RULE_ID, INLINE_CODECLONE_SUPPRESSION_SOURCE +from ..utils.coerce import as_int, as_str +from .api_surface_payload import ( + _api_surface_rows, + _api_surface_summary, + _breaking_api_surface_rows, +) +from .coverage_payload import ( + _coverage_adoption_rows, + _coverage_join_rows, + _coverage_join_summary, + _permille, +) + + +def _enrich_metrics_report_payload( + *, + metrics_payload: Mapping[str, object], + metrics_diff: MetricsDiff | None, + coverage_adoption_diff_available: bool, + api_surface_diff_available: bool, +) -> dict[str, object]: + enriched = { + key: (dict(value) if isinstance(value, Mapping) else value) + for key, value in metrics_payload.items() + } + coverage_adoption = dict( + cast("Mapping[str, object]", enriched.get("coverage_adoption", {})) + ) + coverage_summary = dict( + cast("Mapping[str, object]", coverage_adoption.get("summary", {})) + ) + if coverage_summary: + coverage_summary["baseline_diff_available"] = coverage_adoption_diff_available + coverage_summary["param_delta"] = ( + int(metrics_diff.typing_param_permille_delta) + if metrics_diff is not None and coverage_adoption_diff_available + else 0 + ) + coverage_summary["return_delta"] = ( + int(metrics_diff.typing_return_permille_delta) + if metrics_diff is not None and coverage_adoption_diff_available + else 0 + ) + coverage_summary["docstring_delta"] = ( + int(metrics_diff.docstring_permille_delta) + if metrics_diff is not None and coverage_adoption_diff_available + else 0 + ) + coverage_adoption["summary"] = coverage_summary + enriched["coverage_adoption"] = coverage_adoption + + api_surface = dict(cast("Mapping[str, object]", enriched.get("api_surface", {}))) + api_summary = dict(cast("Mapping[str, object]", api_surface.get("summary", {}))) + api_items = list(cast("Sequence[object]", api_surface.get("items", ()))) + if api_summary: + api_summary["baseline_diff_available"] = api_surface_diff_available + api_summary["added"] = ( + len(metrics_diff.new_api_symbols) + if metrics_diff is not None and api_surface_diff_available + else 0 + ) + api_summary["breaking"] = ( + len(metrics_diff.new_api_breaking_changes) + if metrics_diff is not None and api_surface_diff_available + else 0 + ) + api_surface["summary"] = api_summary + if ( + metrics_diff is not None + and api_surface_diff_available + and metrics_diff.new_api_breaking_changes + ): + api_items.extend( + _breaking_api_surface_rows(metrics_diff.new_api_breaking_changes) + ) + api_surface["items"] = api_items + if api_surface: + enriched["api_surface"] = api_surface + return enriched + + +def build_metrics_report_payload( + *, + scan_root: str = "", + project_metrics: ProjectMetrics, + coverage_join: CoverageJoinResult | None = None, + units: Sequence[GroupItemLike], + class_metrics: Sequence[ClassMetrics], + module_deps: Sequence[ModuleDep] = (), + source_stats_by_file: Sequence[tuple[str, int, int, int, int]] = (), + suppressed_dead_code: Sequence[DeadItem] = (), +) -> dict[str, object]: + sorted_units = sorted( + units, + key=lambda item: ( + as_int(item.get("cyclomatic_complexity"), 0), + as_int(item.get("nesting_depth"), 0), + as_str(item.get("qualname")), + ), + reverse=True, + ) + complexity_rows = [ + { + "qualname": as_str(item.get("qualname")), + "filepath": as_str(item.get("filepath")), + "start_line": as_int(item.get("start_line"), 0), + "end_line": as_int(item.get("end_line"), 0), + "cyclomatic_complexity": as_int(item.get("cyclomatic_complexity"), 1), + "nesting_depth": as_int(item.get("nesting_depth"), 0), + "risk": as_str(item.get("risk"), RISK_LOW), + } + for item in sorted_units + ] + classes_sorted = sorted( + class_metrics, + key=lambda item: (item.cbo, item.lcom4, item.qualname), + reverse=True, + ) + coupling_rows = [ + { + "qualname": metric.qualname, + "filepath": metric.filepath, + "start_line": metric.start_line, + "end_line": metric.end_line, + "cbo": metric.cbo, + "risk": metric.risk_coupling, + "coupled_classes": list(metric.coupled_classes), + } + for metric in classes_sorted + ] + cohesion_rows = [ + { + "qualname": metric.qualname, + "filepath": metric.filepath, + "start_line": metric.start_line, + "end_line": metric.end_line, + "lcom4": metric.lcom4, + "risk": metric.risk_cohesion, + "method_count": metric.method_count, + "instance_var_count": metric.instance_var_count, + } + for metric in classes_sorted + ] + active_dead_items = tuple(project_metrics.dead_code) + suppressed_dead_items = tuple(suppressed_dead_code) + coverage_adoption_rows = _coverage_adoption_rows(project_metrics) + api_surface_summary = _api_surface_summary(project_metrics.api_surface) + api_surface_items = _api_surface_rows(project_metrics.api_surface) + coverage_join_summary = _coverage_join_summary(coverage_join) + coverage_join_items = _coverage_join_rows(coverage_join) + + def _serialize_dead_item( + item: DeadItem, + *, + suppressed: bool = False, + ) -> dict[str, object]: + payload: dict[str, object] = { + "qualname": item.qualname, + "filepath": item.filepath, + "start_line": item.start_line, + "end_line": item.end_line, + "kind": item.kind, + "confidence": item.confidence, + } + if suppressed: + payload["suppressed_by"] = [ + { + "rule": DEAD_CODE_RULE_ID, + "source": INLINE_CODECLONE_SUPPRESSION_SOURCE, + } + ] + return payload + + payload = { + CATEGORY_COMPLEXITY: { + "functions": complexity_rows, + "summary": { + "total": len(complexity_rows), + "average": round(project_metrics.complexity_avg, 2), + "max": project_metrics.complexity_max, + "high_risk": len(project_metrics.high_risk_functions), + }, + }, + CATEGORY_COUPLING: { + "classes": coupling_rows, + "summary": { + "total": len(coupling_rows), + "average": round(project_metrics.coupling_avg, 2), + "max": project_metrics.coupling_max, + "high_risk": len(project_metrics.high_risk_classes), + }, + }, + CATEGORY_COHESION: { + "classes": cohesion_rows, + "summary": { + "total": len(cohesion_rows), + "average": round(project_metrics.cohesion_avg, 2), + "max": project_metrics.cohesion_max, + "low_cohesion": len(project_metrics.low_cohesion_classes), + }, + }, + "dependencies": { + "modules": project_metrics.dependency_modules, + "edges": project_metrics.dependency_edges, + "max_depth": project_metrics.dependency_max_depth, + "cycles": [list(cycle) for cycle in project_metrics.dependency_cycles], + "longest_chains": [ + list(chain) for chain in project_metrics.dependency_longest_chains + ], + "edge_list": [ + { + "source": edge.source, + "target": edge.target, + "import_type": edge.import_type, + "line": edge.line, + } + for edge in project_metrics.dependency_edge_list + ], + }, + "dead_code": { + "items": [_serialize_dead_item(item) for item in active_dead_items], + "suppressed_items": [ + _serialize_dead_item(item, suppressed=True) + for item in suppressed_dead_items + ], + "summary": { + "total": len(active_dead_items), + "critical": sum( + 1 + for item in active_dead_items + if item.confidence == CONFIDENCE_HIGH + ), + "high_confidence": sum( + 1 + for item in active_dead_items + if item.confidence == CONFIDENCE_HIGH + ), + "suppressed": len(suppressed_dead_items), + }, + }, + "health": { + "score": project_metrics.health.total, + "grade": project_metrics.health.grade, + "dimensions": dict(project_metrics.health.dimensions), + }, + "coverage_adoption": { + "summary": { + "modules": len(coverage_adoption_rows), + "params_total": project_metrics.typing_param_total, + "params_annotated": project_metrics.typing_param_annotated, + "param_permille": _permille( + project_metrics.typing_param_annotated, + project_metrics.typing_param_total, + ), + "returns_total": project_metrics.typing_return_total, + "returns_annotated": project_metrics.typing_return_annotated, + "return_permille": _permille( + project_metrics.typing_return_annotated, + project_metrics.typing_return_total, + ), + "public_symbol_total": project_metrics.docstring_public_total, + "public_symbol_documented": project_metrics.docstring_public_documented, + "docstring_permille": _permille( + project_metrics.docstring_public_documented, + project_metrics.docstring_public_total, + ), + "typing_any_count": project_metrics.typing_any_count, + }, + "items": coverage_adoption_rows, + }, + "api_surface": { + "summary": dict(api_surface_summary), + "items": api_surface_items, + }, + "overloaded_modules": build_overloaded_modules_payload( + scan_root=scan_root, + source_stats_by_file=source_stats_by_file, + units=units, + class_metrics=class_metrics, + module_deps=module_deps, + ), + } + if coverage_join is not None: + payload["coverage_join"] = { + "summary": dict(coverage_join_summary), + "items": coverage_join_items, + } + return payload diff --git a/codeclone/core/parallelism.py b/codeclone/core/parallelism.py new file mode 100644 index 0000000..b4df298 --- /dev/null +++ b/codeclone/core/parallelism.py @@ -0,0 +1,334 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Callable, Sequence +from concurrent.futures import ProcessPoolExecutor, as_completed + +from ..cache import Cache, SourceStatsDict +from ..models import ( + ClassMetrics, + DeadCandidate, + GroupItem, + ModuleApiSurface, + ModuleDep, + ModuleDocstringCoverage, + ModuleTypingCoverage, + StructuralFindingGroup, +) +from ._types import ( + DEFAULT_BATCH_SIZE, + DEFAULT_RUNTIME_PROCESSES, + PARALLEL_MIN_FILES_FLOOR, + PARALLEL_MIN_FILES_PER_WORKER, + BootstrapResult, + DiscoveryResult, + FileProcessResult, + ProcessingResult, + _block_to_group_item, + _class_metric_sort_key, + _dead_candidate_sort_key, + _group_item_sort_key, + _module_dep_sort_key, + _segment_to_group_item, + _should_collect_structural_findings, + _unit_to_group_item, +) +from .worker import _invoke_process_file + + +def _parallel_min_files(processes: int) -> int: + return max(PARALLEL_MIN_FILES_FLOOR, processes * PARALLEL_MIN_FILES_PER_WORKER) + + +def _resolve_process_count(processes: object) -> int: + if not isinstance(processes, int): + return DEFAULT_RUNTIME_PROCESSES + return max(1, processes) + + +def _should_use_parallel(files_count: int, processes: int) -> bool: + if processes <= 1: + return False + return files_count >= _parallel_min_files(processes) + + +def process( + *, + boot: BootstrapResult, + discovery: DiscoveryResult, + cache: Cache, + on_advance: Callable[[], None] | None = None, + on_worker_error: Callable[[str], None] | None = None, + on_parallel_fallback: Callable[[Exception], None] | None = None, + batch_size: int = DEFAULT_BATCH_SIZE, +) -> ProcessingResult: + files_to_process = discovery.files_to_process + if not files_to_process: + return ProcessingResult( + units=discovery.cached_units, + blocks=discovery.cached_blocks, + segments=discovery.cached_segments, + class_metrics=discovery.cached_class_metrics, + module_deps=discovery.cached_module_deps, + dead_candidates=discovery.cached_dead_candidates, + referenced_names=discovery.cached_referenced_names, + referenced_qualnames=discovery.cached_referenced_qualnames, + typing_modules=discovery.cached_typing_modules, + docstring_modules=discovery.cached_docstring_modules, + api_modules=discovery.cached_api_modules, + files_analyzed=0, + files_skipped=discovery.files_skipped, + analyzed_lines=0, + analyzed_functions=0, + analyzed_methods=0, + analyzed_classes=0, + failed_files=(), + source_read_failures=(), + structural_findings=discovery.cached_structural_findings, + source_stats_by_file=discovery.cached_source_stats_by_file, + ) + + all_units: list[GroupItem] = list(discovery.cached_units) + all_blocks: list[GroupItem] = list(discovery.cached_blocks) + all_segments: list[GroupItem] = list(discovery.cached_segments) + all_class_metrics: list[ClassMetrics] = list(discovery.cached_class_metrics) + all_module_deps: list[ModuleDep] = list(discovery.cached_module_deps) + all_dead_candidates: list[DeadCandidate] = list(discovery.cached_dead_candidates) + all_referenced_names: set[str] = set(discovery.cached_referenced_names) + all_referenced_qualnames: set[str] = set(discovery.cached_referenced_qualnames) + all_typing_modules: list[ModuleTypingCoverage] = list( + discovery.cached_typing_modules + ) + all_docstring_modules: list[ModuleDocstringCoverage] = list( + discovery.cached_docstring_modules + ) + all_api_modules: list[ModuleApiSurface] = list(discovery.cached_api_modules) + + collect_structural_findings = _should_collect_structural_findings(boot.output_paths) + collect_api_surface = not boot.args.skip_metrics and bool( + getattr(boot.args, "api_surface", False) + ) + api_include_private_modules = bool( + getattr(boot.args, "api_include_private_modules", False) + ) + files_analyzed = 0 + files_skipped = discovery.files_skipped + analyzed_lines = 0 + analyzed_functions = 0 + analyzed_methods = 0 + analyzed_classes = 0 + all_structural_findings: list[StructuralFindingGroup] = list( + discovery.cached_structural_findings + ) + source_stats_by_file: dict[str, tuple[int, int, int, int]] = { + filepath: (lines, functions, methods, classes) + for ( + filepath, + lines, + functions, + methods, + classes, + ) in discovery.cached_source_stats_by_file + } + failed_files: list[str] = [] + source_read_failures: list[str] = [] + root_str = str(boot.root) + processes = _resolve_process_count(boot.args.processes) + min_loc = int(boot.args.min_loc) + min_stmt = int(boot.args.min_stmt) + block_min_loc = int(boot.args.block_min_loc) + block_min_stmt = int(boot.args.block_min_stmt) + segment_min_loc = int(boot.args.segment_min_loc) + segment_min_stmt = int(boot.args.segment_min_stmt) + + def _accept_result(result: FileProcessResult) -> None: + nonlocal files_analyzed + nonlocal files_skipped + nonlocal analyzed_lines + nonlocal analyzed_functions + nonlocal analyzed_methods + nonlocal analyzed_classes + + if result.success and result.stat is not None: + source_stats_payload = SourceStatsDict( + lines=result.lines, + functions=result.functions, + methods=result.methods, + classes=result.classes, + ) + structural_payload = ( + result.structural_findings if collect_structural_findings else None + ) + try: + cache.put_file_entry( + result.filepath, + result.stat, + result.units or [], + result.blocks or [], + result.segments or [], + source_stats=source_stats_payload, + file_metrics=result.file_metrics, + structural_findings=structural_payload, + ) + except TypeError as exc: + if "source_stats" not in str(exc): + raise + cache.put_file_entry( + result.filepath, + result.stat, + result.units or [], + result.blocks or [], + result.segments or [], + file_metrics=result.file_metrics, + structural_findings=structural_payload, + ) + files_analyzed += 1 + analyzed_lines += result.lines + analyzed_functions += result.functions + analyzed_methods += result.methods + analyzed_classes += result.classes + source_stats_by_file[result.filepath] = ( + result.lines, + result.functions, + result.methods, + result.classes, + ) + if result.units: + all_units.extend(_unit_to_group_item(unit) for unit in result.units) + if result.blocks: + all_blocks.extend( + _block_to_group_item(block) for block in result.blocks + ) + if result.segments: + all_segments.extend( + _segment_to_group_item(segment) for segment in result.segments + ) + if result.structural_findings: + all_structural_findings.extend(result.structural_findings) + if not boot.args.skip_metrics and result.file_metrics is not None: + all_class_metrics.extend(result.file_metrics.class_metrics) + all_module_deps.extend(result.file_metrics.module_deps) + all_dead_candidates.extend(result.file_metrics.dead_candidates) + all_referenced_names.update(result.file_metrics.referenced_names) + all_referenced_qualnames.update( + result.file_metrics.referenced_qualnames + ) + if result.file_metrics.typing_coverage is not None: + all_typing_modules.append(result.file_metrics.typing_coverage) + if result.file_metrics.docstring_coverage is not None: + all_docstring_modules.append(result.file_metrics.docstring_coverage) + if result.file_metrics.api_surface is not None: + all_api_modules.append(result.file_metrics.api_surface) + return + + files_skipped += 1 + failure = f"{result.filepath}: {result.error}" + failed_files.append(failure) + if result.error_kind == "source_read_error": + source_read_failures.append(failure) + + def _run_sequential(files: Sequence[str]) -> None: + for filepath in files: + _accept_result( + _invoke_process_file( + filepath, + root_str, + boot.config, + min_loc, + min_stmt, + collect_structural_findings=collect_structural_findings, + collect_api_surface=collect_api_surface, + api_include_private_modules=api_include_private_modules, + block_min_loc=block_min_loc, + block_min_stmt=block_min_stmt, + segment_min_loc=segment_min_loc, + segment_min_stmt=segment_min_stmt, + ) + ) + if on_advance is not None: + on_advance() + + if _should_use_parallel(len(files_to_process), processes): + try: + with ProcessPoolExecutor(max_workers=processes) as executor: + for idx in range(0, len(files_to_process), batch_size): + batch = files_to_process[idx : idx + batch_size] + futures = [ + executor.submit( + _invoke_process_file, + filepath, + root_str, + boot.config, + min_loc, + min_stmt, + collect_structural_findings=collect_structural_findings, + collect_api_surface=collect_api_surface, + api_include_private_modules=api_include_private_modules, + block_min_loc=block_min_loc, + block_min_stmt=block_min_stmt, + segment_min_loc=segment_min_loc, + segment_min_stmt=segment_min_stmt, + ) + for filepath in batch + ] + future_to_path = { + id(future): filepath + for future, filepath in zip(futures, batch, strict=True) + } + for future in as_completed(futures): + filepath = future_to_path[id(future)] + try: + _accept_result(future.result()) + except Exception as exc: # pragma: no cover - worker crash + files_skipped += 1 + failed_files.append(f"{filepath}: {exc}") + if on_worker_error is not None: + on_worker_error(str(exc)) + if on_advance is not None: + on_advance() + except (OSError, RuntimeError, PermissionError) as exc: + if on_parallel_fallback is not None: + on_parallel_fallback(exc) + _run_sequential(files_to_process) + else: + _run_sequential(files_to_process) + + return ProcessingResult( + units=tuple(sorted(all_units, key=_group_item_sort_key)), + blocks=tuple(sorted(all_blocks, key=_group_item_sort_key)), + segments=tuple(sorted(all_segments, key=_group_item_sort_key)), + class_metrics=tuple(sorted(all_class_metrics, key=_class_metric_sort_key)), + module_deps=tuple(sorted(all_module_deps, key=_module_dep_sort_key)), + dead_candidates=tuple( + sorted(all_dead_candidates, key=_dead_candidate_sort_key) + ), + referenced_names=frozenset(all_referenced_names), + referenced_qualnames=frozenset(all_referenced_qualnames), + typing_modules=tuple( + sorted(all_typing_modules, key=lambda item: (item.filepath, item.module)) + ), + docstring_modules=tuple( + sorted(all_docstring_modules, key=lambda item: (item.filepath, item.module)) + ), + api_modules=tuple( + sorted(all_api_modules, key=lambda item: (item.filepath, item.module)) + ), + files_analyzed=files_analyzed, + files_skipped=files_skipped, + analyzed_lines=analyzed_lines, + analyzed_functions=analyzed_functions, + analyzed_methods=analyzed_methods, + analyzed_classes=analyzed_classes, + failed_files=tuple(sorted(failed_files)), + source_read_failures=tuple(sorted(source_read_failures)), + structural_findings=tuple(all_structural_findings), + source_stats_by_file=tuple( + (filepath, *stats) + for filepath, stats in sorted(source_stats_by_file.items()) + ), + ) diff --git a/codeclone/core/pipeline.py b/codeclone/core/pipeline.py new file mode 100644 index 0000000..d974c3f --- /dev/null +++ b/codeclone/core/pipeline.py @@ -0,0 +1,343 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from typing import cast + +from ..findings.clones.grouping import ( + build_block_groups, + build_groups, + build_segment_groups, +) +from ..findings.structural.detectors import ( + build_clone_cohort_structural_findings, +) +from ..golden_fixtures import ( + build_suppressed_clone_groups, + split_clone_groups_for_golden_fixtures, +) +from ..metrics import ( + METRIC_FAMILIES, + CoverageJoinParseError, + MetricProjectContext, + build_coverage_join, + build_project_metrics, + find_suppressed_unused, + project_metrics_defaults, +) +from ..models import ( + ClassMetrics, + CoverageJoinResult, + DeadCandidate, + DeadItem, + DepGraph, + GroupItemLike, + ModuleApiSurface, + ModuleDep, + ModuleDocstringCoverage, + ModuleTypingCoverage, + ProjectMetrics, + StructuralFindingGroup, + Suggestion, +) +from ..report.blocks import prepare_block_report_groups +from ..report.explain import build_block_group_facts +from ..report.segments import prepare_segment_report_groups +from ..report.suggestions import generate_suggestions +from ._types import ( + AnalysisResult, + BootstrapResult, + DiscoveryResult, + ProcessingResult, + _segment_groups_digest, + _should_collect_structural_findings, +) +from .bootstrap import _resolve_optional_runtime_path +from .metrics_payload import build_metrics_report_payload + + +def compute_project_metrics( + *, + units: Sequence[GroupItemLike], + class_metrics: Sequence[ClassMetrics], + module_deps: Sequence[ModuleDep], + dead_candidates: Sequence[DeadCandidate], + referenced_names: frozenset[str], + referenced_qualnames: frozenset[str], + typing_modules: Sequence[ModuleTypingCoverage] = (), + docstring_modules: Sequence[ModuleDocstringCoverage] = (), + api_modules: Sequence[ModuleApiSurface] = (), + files_found: int, + files_analyzed_or_cached: int, + function_clone_groups: int, + block_clone_groups: int, + skip_dependencies: bool, + skip_dead_code: bool, +) -> tuple[ProjectMetrics, DepGraph, tuple[DeadItem, ...]]: + context = MetricProjectContext( + units=tuple(units), + class_metrics=tuple(class_metrics), + module_deps=tuple(module_deps), + dead_candidates=tuple(dead_candidates), + referenced_names=referenced_names, + referenced_qualnames=referenced_qualnames, + typing_modules=tuple(typing_modules), + docstring_modules=tuple(docstring_modules), + api_modules=tuple(api_modules), + files_found=files_found, + files_analyzed_or_cached=files_analyzed_or_cached, + function_clone_groups=function_clone_groups, + block_clone_groups=block_clone_groups, + skip_dependencies=skip_dependencies, + skip_dead_code=skip_dead_code, + ) + project_fields = project_metrics_defaults() + dep_graph = DepGraph( + modules=frozenset(), + edges=(), + cycles=(), + max_depth=0, + longest_chains=(), + ) + dead_items: tuple[DeadItem, ...] = () + for family in METRIC_FAMILIES.values(): + aggregate = family.aggregate([family.compute(context)]) + project_fields.update(aggregate.project_fields) + dep_graph = cast("DepGraph", aggregate.artifacts.get("dep_graph", dep_graph)) + dead_items = cast( + "tuple[DeadItem, ...]", + aggregate.artifacts.get("dead_items", dead_items), + ) + return build_project_metrics(project_fields), dep_graph, dead_items + + +def compute_suggestions( + *, + project_metrics: ProjectMetrics, + units: Sequence[GroupItemLike], + class_metrics: Sequence[ClassMetrics], + func_groups: Mapping[str, Sequence[GroupItemLike]], + block_groups: Mapping[str, Sequence[GroupItemLike]], + segment_groups: Mapping[str, Sequence[GroupItemLike]], + block_group_facts: Mapping[str, Mapping[str, str]] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, + scan_root: str = "", +) -> tuple[Suggestion, ...]: + return generate_suggestions( + project_metrics=project_metrics, + units=units, + class_metrics=class_metrics, + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + block_group_facts=block_group_facts, + structural_findings=structural_findings, + scan_root=scan_root, + ) + + +def analyze( + *, + boot: BootstrapResult, + discovery: DiscoveryResult, + processing: ProcessingResult, +) -> AnalysisResult: + golden_fixture_paths = tuple( + str(pattern).strip() + for pattern in getattr(boot.args, "golden_fixture_paths", ()) + if str(pattern).strip() + ) + func_split = split_clone_groups_for_golden_fixtures( + groups=build_groups(processing.units), + kind="function", + golden_fixture_paths=golden_fixture_paths, + scan_root=str(boot.root), + ) + block_split = split_clone_groups_for_golden_fixtures( + groups=build_block_groups(processing.blocks), + kind="block", + golden_fixture_paths=golden_fixture_paths, + scan_root=str(boot.root), + ) + segment_split = split_clone_groups_for_golden_fixtures( + groups=build_segment_groups(processing.segments), + kind="segment", + golden_fixture_paths=golden_fixture_paths, + scan_root=str(boot.root), + ) + + func_groups = func_split.active_groups + block_groups = block_split.active_groups + segment_groups_raw = segment_split.active_groups + segment_groups_raw_digest = _segment_groups_digest(segment_groups_raw) + cached_projection = discovery.cached_segment_report_projection + if ( + cached_projection is not None + and cached_projection.get("digest") == segment_groups_raw_digest + ): + projection_groups = cached_projection.get("groups", {}) + segment_groups = { + group_key: [ + { + "segment_hash": str(item["segment_hash"]), + "segment_sig": str(item["segment_sig"]), + "filepath": str(item["filepath"]), + "qualname": str(item["qualname"]), + "start_line": int(item["start_line"]), + "end_line": int(item["end_line"]), + "size": int(item["size"]), + } + for item in projection_groups[group_key] + ] + for group_key in sorted(projection_groups) + } + suppressed_segment_groups = int(cached_projection.get("suppressed", 0)) + else: + segment_groups, suppressed_segment_groups = prepare_segment_report_groups( + segment_groups_raw + ) + + block_groups_report = prepare_block_report_groups(block_groups) + suppressed_block_groups_report = prepare_block_report_groups( + block_split.suppressed_groups + ) + if segment_split.suppressed_groups: + suppressed_segment_groups_report, _ = prepare_segment_report_groups( + segment_split.suppressed_groups + ) + else: + suppressed_segment_groups_report = {} + suppressed_clone_groups = ( + *build_suppressed_clone_groups( + kind="function", + groups=func_split.suppressed_groups, + matched_patterns=func_split.matched_patterns, + ), + *build_suppressed_clone_groups( + kind="block", + groups=suppressed_block_groups_report, + matched_patterns=block_split.matched_patterns, + ), + *build_suppressed_clone_groups( + kind="segment", + groups=suppressed_segment_groups_report, + matched_patterns=segment_split.matched_patterns, + ), + ) + block_group_facts = build_block_group_facts( + {**block_groups_report, **suppressed_block_groups_report} + ) + + func_clones_count = len(func_groups) + block_clones_count = len(block_groups) + segment_clones_count = len(segment_groups) + files_analyzed_or_cached = processing.files_analyzed + discovery.cache_hits + + project_metrics: ProjectMetrics | None = None + metrics_payload: dict[str, object] | None = None + suggestions: tuple[Suggestion, ...] = () + suppressed_dead_items: tuple[DeadItem, ...] = () + coverage_join: CoverageJoinResult | None = None + cohort_structural_findings: tuple[StructuralFindingGroup, ...] = () + if _should_collect_structural_findings(boot.output_paths): + cohort_structural_findings = build_clone_cohort_structural_findings( + func_groups=func_groups + ) + combined_structural_findings = ( + *processing.structural_findings, + *cohort_structural_findings, + ) + if not boot.args.skip_metrics: + project_metrics, _, _ = compute_project_metrics( + units=processing.units, + class_metrics=processing.class_metrics, + module_deps=processing.module_deps, + dead_candidates=processing.dead_candidates, + referenced_names=processing.referenced_names, + referenced_qualnames=processing.referenced_qualnames, + typing_modules=processing.typing_modules, + docstring_modules=processing.docstring_modules, + api_modules=processing.api_modules, + files_found=discovery.files_found, + files_analyzed_or_cached=files_analyzed_or_cached, + function_clone_groups=func_clones_count, + block_clone_groups=block_clones_count, + skip_dependencies=boot.args.skip_dependencies, + skip_dead_code=boot.args.skip_dead_code, + ) + if not boot.args.skip_dead_code: + suppressed_dead_items = find_suppressed_unused( + definitions=tuple(processing.dead_candidates), + referenced_names=processing.referenced_names, + referenced_qualnames=processing.referenced_qualnames, + ) + suggestions = compute_suggestions( + project_metrics=project_metrics, + units=processing.units, + class_metrics=processing.class_metrics, + func_groups=func_groups, + block_groups=block_groups_report, + segment_groups=segment_groups, + block_group_facts=block_group_facts, + structural_findings=combined_structural_findings, + scan_root=str(boot.root), + ) + coverage_xml_path = _resolve_optional_runtime_path( + getattr(boot.args, "coverage_xml", None), + root=boot.root, + ) + if coverage_xml_path is not None: + try: + coverage_join = build_coverage_join( + coverage_xml=coverage_xml_path, + root_path=boot.root, + units=processing.units, + hotspot_threshold_percent=int( + getattr(boot.args, "coverage_min", 50) + ), + ) + except CoverageJoinParseError as exc: + coverage_join = CoverageJoinResult( + coverage_xml=str(coverage_xml_path), + status="invalid", + hotspot_threshold_percent=int( + getattr(boot.args, "coverage_min", 50) + ), + invalid_reason=str(exc), + ) + metrics_payload = build_metrics_report_payload( + scan_root=str(boot.root), + project_metrics=project_metrics, + coverage_join=coverage_join, + units=processing.units, + class_metrics=processing.class_metrics, + module_deps=processing.module_deps, + source_stats_by_file=processing.source_stats_by_file, + suppressed_dead_code=suppressed_dead_items, + ) + + return AnalysisResult( + func_groups=func_groups, + block_groups=block_groups, + block_groups_report=block_groups_report, + segment_groups=segment_groups, + suppressed_clone_groups=tuple(suppressed_clone_groups), + suppressed_segment_groups=suppressed_segment_groups, + block_group_facts=block_group_facts, + func_clones_count=func_clones_count, + block_clones_count=block_clones_count, + segment_clones_count=segment_clones_count, + files_analyzed_or_cached=files_analyzed_or_cached, + project_metrics=project_metrics, + metrics_payload=metrics_payload, + suggestions=suggestions, + segment_groups_raw_digest=segment_groups_raw_digest, + coverage_join=coverage_join, + suppressed_dead_code_items=len(suppressed_dead_items), + structural_findings=combined_structural_findings, + ) diff --git a/codeclone/core/reporting.py b/codeclone/core/reporting.py new file mode 100644 index 0000000..01c21cb --- /dev/null +++ b/codeclone/core/reporting.py @@ -0,0 +1,255 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Callable, Collection, Mapping +from typing import cast + +from ..models import MetricsDiff +from ..report.document import build_report_document +from ..report.gates.evaluator import GateResult, GateState +from ..report.gates.evaluator import MetricGateConfig as _MetricGateConfig +from ..report.gates.evaluator import evaluate_gate_state as _evaluate_gate_state +from ..report.gates.evaluator import ( + gate_state_from_project_metrics as _gate_state_from_metrics, +) +from ..report.renderers.json import render_json_report_document +from ..report.renderers.text import render_text_report_document +from ._types import ( + AnalysisResult, + BootstrapResult, + DiscoveryResult, + ProcessingResult, + ReportArtifacts, +) +from .metrics_payload import _enrich_metrics_report_payload + +MetricGateConfig = _MetricGateConfig +GatingResult = GateResult + + +def _load_markdown_report_renderer() -> Callable[..., str]: + from ..report.markdown import to_markdown_report + + return to_markdown_report + + +def _load_sarif_report_renderer() -> Callable[..., str]: + from ..report.sarif import to_sarif_report + + return to_sarif_report + + +def report( + *, + boot: BootstrapResult, + discovery: DiscoveryResult, + processing: ProcessingResult, + analysis: AnalysisResult, + report_meta: Mapping[str, object], + new_func: Collection[str], + new_block: Collection[str], + html_builder: Callable[..., str] | None = None, + metrics_diff: object | None = None, + coverage_adoption_diff_available: bool = False, + api_surface_diff_available: bool = False, + include_report_document: bool = False, +) -> ReportArtifacts: + contents: dict[str, str | None] = { + "html": None, + "json": None, + "md": None, + "sarif": None, + "text": None, + } + structural_findings = ( + analysis.structural_findings if analysis.structural_findings else None + ) + report_inventory = { + "files": { + "total_found": discovery.files_found, + "analyzed": processing.files_analyzed, + "cached": discovery.cache_hits, + "skipped": processing.files_skipped, + "source_io_skipped": len(processing.source_read_failures), + }, + "code": { + "parsed_lines": processing.analyzed_lines + discovery.cached_lines, + "functions": processing.analyzed_functions + discovery.cached_functions, + "methods": processing.analyzed_methods + discovery.cached_methods, + "classes": processing.analyzed_classes + discovery.cached_classes, + }, + "file_list": list(discovery.all_file_paths), + } + report_document: dict[str, object] | None = None + needs_report_document = ( + include_report_document + or boot.output_paths.html is not None + or any( + path is not None + for path in ( + boot.output_paths.json, + boot.output_paths.md, + boot.output_paths.sarif, + boot.output_paths.text, + ) + ) + ) + if needs_report_document: + metrics_for_report = ( + _enrich_metrics_report_payload( + metrics_payload=analysis.metrics_payload, + metrics_diff=cast("MetricsDiff | None", metrics_diff), + coverage_adoption_diff_available=coverage_adoption_diff_available, + api_surface_diff_available=api_surface_diff_available, + ) + if analysis.metrics_payload is not None + else None + ) + report_document = build_report_document( + func_groups=analysis.func_groups, + block_groups=analysis.block_groups_report, + segment_groups=analysis.segment_groups, + suppressed_clone_groups=analysis.suppressed_clone_groups, + meta=report_meta, + inventory=report_inventory, + block_facts=analysis.block_group_facts, + new_function_group_keys=new_func, + new_block_group_keys=new_block, + new_segment_group_keys=set(analysis.segment_groups.keys()), + metrics=metrics_for_report, + suggestions=analysis.suggestions, + structural_findings=structural_findings, + ) + + if boot.output_paths.html and html_builder is not None: + metrics_for_html = ( + _enrich_metrics_report_payload( + metrics_payload=analysis.metrics_payload, + metrics_diff=cast("MetricsDiff | None", metrics_diff), + coverage_adoption_diff_available=coverage_adoption_diff_available, + api_surface_diff_available=api_surface_diff_available, + ) + if analysis.metrics_payload is not None + else None + ) + contents["html"] = html_builder( + func_groups=analysis.func_groups, + block_groups=analysis.block_groups_report, + segment_groups=analysis.segment_groups, + block_group_facts=analysis.block_group_facts, + new_function_group_keys=new_func, + new_block_group_keys=new_block, + report_meta=report_meta, + metrics=metrics_for_html, + suggestions=analysis.suggestions, + structural_findings=structural_findings, + report_document=report_document, + metrics_diff=metrics_diff, + title="CodeClone Report", + context_lines=3, + max_snippet_lines=220, + ) + + if any( + path is not None + for path in ( + boot.output_paths.json, + boot.output_paths.md, + boot.output_paths.sarif, + boot.output_paths.text, + ) + ): + assert report_document is not None + + if boot.output_paths.json and report_document is not None: + contents["json"] = render_json_report_document(report_document) + + def _render_projection_artifact(renderer: Callable[..., str]) -> str: + assert report_document is not None + return renderer( + report_document=report_document, + meta=report_meta, + inventory=report_inventory, + func_groups=analysis.func_groups, + block_groups=analysis.block_groups_report, + segment_groups=analysis.segment_groups, + block_facts=analysis.block_group_facts, + new_function_group_keys=new_func, + new_block_group_keys=new_block, + new_segment_group_keys=set(analysis.segment_groups.keys()), + metrics=analysis.metrics_payload, + suggestions=analysis.suggestions, + structural_findings=structural_findings, + ) + + for key, output_path, loader in ( + ("md", boot.output_paths.md, _load_markdown_report_renderer), + ("sarif", boot.output_paths.sarif, _load_sarif_report_renderer), + ): + if output_path and report_document is not None: + contents[key] = _render_projection_artifact(loader()) + + if boot.output_paths.text and report_document is not None: + contents["text"] = render_text_report_document(report_document) + + return ReportArtifacts( + html=contents["html"], + json=contents["json"], + md=contents["md"], + sarif=contents["sarif"], + text=contents["text"], + report_document=report_document, + ) + + +def gate( + *, + boot: BootstrapResult, + analysis: AnalysisResult, + new_func: Collection[str], + new_block: Collection[str], + metrics_diff: MetricsDiff | None, +) -> GatingResult: + config = MetricGateConfig( + fail_complexity=boot.args.fail_complexity, + fail_coupling=boot.args.fail_coupling, + fail_cohesion=boot.args.fail_cohesion, + fail_cycles=boot.args.fail_cycles, + fail_dead_code=boot.args.fail_dead_code, + fail_health=boot.args.fail_health, + fail_on_new_metrics=boot.args.fail_on_new_metrics, + fail_on_typing_regression=bool( + getattr(boot.args, "fail_on_typing_regression", False) + ), + fail_on_docstring_regression=bool( + getattr(boot.args, "fail_on_docstring_regression", False) + ), + fail_on_api_break=bool(getattr(boot.args, "fail_on_api_break", False)), + fail_on_untested_hotspots=bool( + getattr(boot.args, "fail_on_untested_hotspots", False) + ), + min_typing_coverage=int(getattr(boot.args, "min_typing_coverage", -1)), + min_docstring_coverage=int(getattr(boot.args, "min_docstring_coverage", -1)), + coverage_min=int(getattr(boot.args, "coverage_min", 50)), + fail_on_new=bool(getattr(boot.args, "fail_on_new", False)), + fail_threshold=int(getattr(boot.args, "fail_threshold", -1)), + ) + clone_new_count = len(tuple(new_func)) + len(tuple(new_block)) + clone_total = analysis.func_clones_count + analysis.block_clones_count + if analysis.project_metrics is None: + state = GateState(clone_new_count=clone_new_count, clone_total=clone_total) + else: + state = _gate_state_from_metrics( + project_metrics=analysis.project_metrics, + coverage_join=analysis.coverage_join, + metrics_diff=metrics_diff, + clone_new_count=clone_new_count, + clone_total=clone_total, + ) + result = _evaluate_gate_state(state=state, config=config) + return GatingResult(exit_code=result.exit_code, reasons=result.reasons) diff --git a/codeclone/core/worker.py b/codeclone/core/worker.py new file mode 100644 index 0000000..07fdd1d --- /dev/null +++ b/codeclone/core/worker.py @@ -0,0 +1,166 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import inspect +import os +from collections.abc import Callable +from pathlib import Path +from typing import cast + +from ..analysis.normalizer import NormalizationConfig +from ..analysis.units import extract_units_and_stats_from_source +from ..cache import FileStat +from ..scanner import module_name_from_path +from ._types import MAX_FILE_SIZE, FileProcessResult + + +def process_file( + filepath: str, + root: str, + cfg: NormalizationConfig, + min_loc: int, + min_stmt: int, + collect_structural_findings: bool = True, + collect_api_surface: bool = False, + api_include_private_modules: bool = False, + block_min_loc: int = 20, + block_min_stmt: int = 8, + segment_min_loc: int = 20, + segment_min_stmt: int = 10, +) -> FileProcessResult: + try: + try: + stat_result = os.stat(filepath) + if stat_result.st_size > MAX_FILE_SIZE: + return FileProcessResult( + filepath=filepath, + success=False, + error=( + f"File too large: {stat_result.st_size} bytes " + f"(max {MAX_FILE_SIZE})" + ), + error_kind="file_too_large", + ) + except OSError as exc: + return FileProcessResult( + filepath=filepath, + success=False, + error=f"Cannot stat file: {exc}", + error_kind="stat_error", + ) + stat: FileStat = { + "mtime_ns": stat_result.st_mtime_ns, + "size": stat_result.st_size, + } + try: + source = Path(filepath).read_text("utf-8") + except UnicodeDecodeError as exc: + return FileProcessResult( + filepath=filepath, + success=False, + error=f"Encoding error: {exc}", + error_kind="source_read_error", + ) + except OSError as exc: + return FileProcessResult( + filepath=filepath, + success=False, + error=f"Cannot read file: {exc}", + error_kind="source_read_error", + ) + module_name = module_name_from_path(root, filepath) + units, blocks, segments, source_stats, file_metrics, structural_findings = ( + extract_units_and_stats_from_source( + source=source, + filepath=filepath, + module_name=module_name, + cfg=cfg, + min_loc=min_loc, + min_stmt=min_stmt, + block_min_loc=block_min_loc, + block_min_stmt=block_min_stmt, + segment_min_loc=segment_min_loc, + segment_min_stmt=segment_min_stmt, + collect_structural_findings=collect_structural_findings, + collect_api_surface=collect_api_surface, + api_include_private_modules=api_include_private_modules, + ) + ) + return FileProcessResult( + filepath=filepath, + success=True, + units=units, + blocks=blocks, + segments=segments, + lines=source_stats.lines, + functions=source_stats.functions, + methods=source_stats.methods, + classes=source_stats.classes, + stat=stat, + file_metrics=file_metrics, + structural_findings=structural_findings, + ) + except Exception as exc: # pragma: no cover - defensive shell around workers + return FileProcessResult( + filepath=filepath, + success=False, + error=f"Unexpected error: {type(exc).__name__}: {exc}", + error_kind="unexpected_error", + ) + + +def _invoke_process_file( + filepath: str, + root: str, + cfg: NormalizationConfig, + min_loc: int, + min_stmt: int, + *, + collect_structural_findings: bool, + collect_api_surface: bool, + api_include_private_modules: bool, + block_min_loc: int, + block_min_stmt: int, + segment_min_loc: int, + segment_min_stmt: int, +) -> FileProcessResult: + optional_kwargs: dict[str, object] = { + "collect_structural_findings": collect_structural_findings, + "collect_api_surface": collect_api_surface, + "api_include_private_modules": api_include_private_modules, + "block_min_loc": block_min_loc, + "block_min_stmt": block_min_stmt, + "segment_min_loc": segment_min_loc, + "segment_min_stmt": segment_min_stmt, + } + try: + signature = inspect.signature(process_file) + except (TypeError, ValueError): + supported_kwargs = optional_kwargs + else: + parameters = tuple(signature.parameters.values()) + if any( + parameter.kind == inspect.Parameter.VAR_KEYWORD for parameter in parameters + ): + supported_kwargs = optional_kwargs + else: + supported_names = {parameter.name for parameter in parameters} + supported_kwargs = { + key: value + for key, value in optional_kwargs.items() + if key in supported_names + } + process_callable = cast("Callable[..., FileProcessResult]", process_file) + return process_callable( + filepath, + root, + cfg, + min_loc, + min_stmt, + **supported_kwargs, + ) diff --git a/codeclone/extractor.py b/codeclone/extractor.py deleted file mode 100644 index bacbef4..0000000 --- a/codeclone/extractor.py +++ /dev/null @@ -1,1149 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import ast -import io -import math -import os -import signal -import tokenize -from contextlib import contextmanager -from dataclasses import dataclass, field -from hashlib import sha1 as _sha1 -from typing import TYPE_CHECKING, Literal, NamedTuple - -from . import qualnames as _qualnames -from .blocks import extract_blocks, extract_segments -from .cfg import CFGBuilder -from .errors import ParseError -from .fingerprint import bucket_loc, sha1 -from .metrics import ( - cohesion_risk, - compute_cbo, - compute_lcom4, - coupling_risk, - cyclomatic_complexity, - risk_level, -) -from .metrics.adoption import collect_module_adoption -from .metrics.api_surface import collect_module_api_surface -from .models import ( - BlockUnit, - ClassMetrics, - DeadCandidate, - FileMetrics, - ModuleDep, - SegmentUnit, - SourceStats, - StructuralFindingGroup, - Unit, -) -from .normalize import ( - AstNormalizer, - NormalizationConfig, - normalized_ast_dump_from_list, - stmt_hashes, -) -from .paths import is_test_filepath -from .structural_findings import scan_function_structure -from .suppressions import ( - DeclarationTarget, - bind_suppressions_to_declarations, - build_suppression_index, - extract_suppression_directives, - suppression_target_key, -) - -if TYPE_CHECKING: - from collections.abc import Iterator, Mapping - - from .suppressions import SuppressionTargetKey - -__all__ = [ - "Unit", - "extract_units_and_stats_from_source", -] - -# ========================= -# Helpers -# ========================= - -PARSE_TIMEOUT_SECONDS = 5 - - -class _ParseTimeoutError(Exception): - pass - - -# Any named declaration: function, async function, or class. -_NamedDeclarationNode = _qualnames.FunctionNode | ast.ClassDef -# Unique key for a declaration's token index: (start_line, end_line, qualname). -_DeclarationTokenIndexKey = tuple[int, int, str] -_DECLARATION_TOKEN_STRINGS = frozenset({"def", "async", "class"}) - - -def _consumed_cpu_seconds(resource_module: object) -> float: - """Return consumed CPU seconds for the current process.""" - try: - usage = resource_module.getrusage( # type: ignore[attr-defined] - resource_module.RUSAGE_SELF # type: ignore[attr-defined] - ) - return float(usage.ru_utime) + float(usage.ru_stime) - except Exception: - return 0.0 - - -@contextmanager -def _parse_limits(timeout_s: int) -> Iterator[None]: - if os.name != "posix" or timeout_s <= 0: - yield - return - - old_handler = signal.getsignal(signal.SIGALRM) - - def _timeout_handler(_signum: int, _frame: object) -> None: - raise _ParseTimeoutError("AST parsing timeout") - - old_limits: tuple[int, int] | None = None - try: - signal.signal(signal.SIGALRM, _timeout_handler) - signal.setitimer(signal.ITIMER_REAL, timeout_s) - - try: - import resource - - old_limits = resource.getrlimit(resource.RLIMIT_CPU) - soft, hard = old_limits - consumed_cpu_s = _consumed_cpu_seconds(resource) - desired_soft = max(1, timeout_s + math.ceil(consumed_cpu_s)) - if soft == resource.RLIM_INFINITY: - candidate_soft = desired_soft - else: - # Never reduce finite soft limits and avoid immediate SIGXCPU - # when the process already consumed more CPU than timeout_s. - candidate_soft = max(soft, desired_soft) - if hard == resource.RLIM_INFINITY: - new_soft = candidate_soft - else: - new_soft = min(max(1, hard), candidate_soft) - # Never lower hard limit: raising it back may be disallowed for - # unprivileged processes and can lead to process termination later. - resource.setrlimit(resource.RLIMIT_CPU, (new_soft, hard)) - except Exception: - # If resource is unavailable or cannot be set, rely on alarm only. - pass - - yield - finally: - signal.setitimer(signal.ITIMER_REAL, 0) - signal.signal(signal.SIGALRM, old_handler) - if old_limits is not None: - try: - import resource - - resource.setrlimit(resource.RLIMIT_CPU, old_limits) - except Exception: - pass - - -def _parse_with_limits(source: str, timeout_s: int) -> ast.AST: - try: - with _parse_limits(timeout_s): - return ast.parse(source) - except _ParseTimeoutError as e: - raise ParseError(str(e)) from e - - -def _stmt_count(node: ast.AST) -> int: - body = getattr(node, "body", None) - return len(body) if isinstance(body, list) else 0 - - -def _source_tokens(source: str) -> tuple[tokenize.TokenInfo, ...]: - try: - return tuple(tokenize.generate_tokens(io.StringIO(source).readline)) - except tokenize.TokenError: - return () - - -def _declaration_token_name(node: ast.AST) -> str: - if isinstance(node, ast.ClassDef): - return "class" - if isinstance(node, ast.AsyncFunctionDef): - return "async" - return "def" - - -def _declaration_token_index( - *, - source_tokens: tuple[tokenize.TokenInfo, ...], - start_line: int, - start_col: int, - declaration_token: str, - source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None, -) -> int | None: - if source_token_index is not None: - return source_token_index.get((start_line, start_col, declaration_token)) - for idx, token in enumerate(source_tokens): - if token.start != (start_line, start_col): - continue - if token.type == tokenize.NAME and token.string == declaration_token: - return idx - return None - - -def _build_declaration_token_index( - source_tokens: tuple[tokenize.TokenInfo, ...], -) -> Mapping[_DeclarationTokenIndexKey, int]: - indexed: dict[_DeclarationTokenIndexKey, int] = {} - for idx, token in enumerate(source_tokens): - if token.type == tokenize.NAME and token.string in _DECLARATION_TOKEN_STRINGS: - indexed[(token.start[0], token.start[1], token.string)] = idx - return indexed - - -def _scan_declaration_colon_line( - *, - source_tokens: tuple[tokenize.TokenInfo, ...], - start_index: int, -) -> int | None: - nesting = 0 - for token in source_tokens[start_index + 1 :]: - if token.type == tokenize.OP: - if token.string in "([{": - nesting += 1 - continue - if token.string in ")]}": - if nesting > 0: - nesting -= 1 - continue - if token.string == ":" and nesting == 0: - return token.start[0] - if token.type == tokenize.NEWLINE and nesting == 0: - return None - return None - - -def _fallback_declaration_end_line(node: ast.AST, *, start_line: int) -> int: - body = getattr(node, "body", None) - if not isinstance(body, list) or not body: - return start_line - - first_body_line = int(getattr(body[0], "lineno", 0)) - if first_body_line <= 0 or first_body_line == start_line: - return start_line - return max(start_line, first_body_line - 1) - - -def _declaration_end_line( - node: ast.AST, - *, - source_tokens: tuple[tokenize.TokenInfo, ...], - source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None, -) -> int: - start_line = int(getattr(node, "lineno", 0)) - start_col = int(getattr(node, "col_offset", 0)) - if start_line <= 0: - return 0 - - declaration_token = _declaration_token_name(node) - start_index = _declaration_token_index( - source_tokens=source_tokens, - start_line=start_line, - start_col=start_col, - declaration_token=declaration_token, - source_token_index=source_token_index, - ) - if start_index is None: - return _fallback_declaration_end_line(node, start_line=start_line) - - colon_line = _scan_declaration_colon_line( - source_tokens=source_tokens, - start_index=start_index, - ) - if colon_line is not None: - return colon_line - return _fallback_declaration_end_line(node, start_line=start_line) - - -# ========================= -# CFG fingerprinting -# ========================= - - -def _cfg_fingerprint_and_complexity( - node: _qualnames.FunctionNode, - cfg: NormalizationConfig, - qualname: str, -) -> tuple[str, int]: - """ - Generate a structural fingerprint for a function using CFG analysis. - - The fingerprint is computed by: - 1. Building a Control Flow Graph (CFG) from the function - 2. Normalizing each CFG block's statements (variable names, constants, etc.) - 3. Creating a canonical representation of the CFG structure - 4. Hashing the representation with SHA-1 - - Functions with identical control flow and normalized statements will - produce the same fingerprint, even if they differ in variable names, - constants, or type annotations. - - Args: - node: Function AST node to fingerprint - cfg: Normalization configuration (what to ignore) - qualname: Qualified name for logging/debugging - - Returns: - 40-character hex SHA-1 hash of the normalized CFG - """ - builder = CFGBuilder() - graph = builder.build(qualname, node) - cfg_normalizer = AstNormalizer(cfg) - - # Use generator to avoid building large list of strings - parts: list[str] = [] - for block in sorted(graph.blocks, key=lambda b: b.id): - succ_ids = ",".join( - str(s.id) for s in sorted(block.successors, key=lambda s: s.id) - ) - block_dump = normalized_ast_dump_from_list( - block.statements, - cfg, - normalizer=cfg_normalizer, - ) - parts.append(f"BLOCK[{block.id}]:{block_dump}|SUCCESSORS:{succ_ids}") - return sha1("|".join(parts)), cyclomatic_complexity(graph) - - -def _raw_source_hash_for_range( - source_lines: list[str], - start_line: int, - end_line: int, -) -> str: - window = "".join(source_lines[start_line - 1 : end_line]).strip() - no_space = "".join(window.split()) - return _sha1(no_space.encode("utf-8")).hexdigest() - - -def _resolve_import_target( - module_name: str, - import_node: ast.ImportFrom, -) -> str: - if import_node.level <= 0: - return import_node.module or "" - - parent_parts = module_name.split(".") - keep = max(0, len(parent_parts) - import_node.level) - prefix = parent_parts[:keep] - if import_node.module: - return ".".join([*prefix, import_node.module]) - return ".".join(prefix) - - -_PROTOCOL_MODULE_NAMES = frozenset({"typing", "typing_extensions"}) - - -@dataclass(slots=True) -class _ModuleWalkState: - import_names: set[str] = field(default_factory=set) - deps: list[ModuleDep] = field(default_factory=list) - referenced_names: set[str] = field(default_factory=set) - imported_symbol_bindings: dict[str, set[str]] = field(default_factory=dict) - imported_module_aliases: dict[str, str] = field(default_factory=dict) - name_nodes: list[ast.Name] = field(default_factory=list) - attr_nodes: list[ast.Attribute] = field(default_factory=list) - protocol_symbol_aliases: set[str] = field(default_factory=lambda: {"Protocol"}) - protocol_module_aliases: set[str] = field( - default_factory=lambda: set(_PROTOCOL_MODULE_NAMES) - ) - - -def _append_module_dep( - *, - module_name: str, - target: str, - import_type: Literal["import", "from_import"], - line: int, - state: _ModuleWalkState, -) -> None: - state.deps.append( - ModuleDep( - source=module_name, - target=target, - import_type=import_type, - line=line, - ) - ) - - -def _collect_import_node( - *, - node: ast.Import, - module_name: str, - state: _ModuleWalkState, - collect_referenced_names: bool, -) -> None: - line = int(getattr(node, "lineno", 0)) - for alias in node.names: - alias_name = alias.asname or alias.name.split(".", 1)[0] - state.import_names.add(alias_name) - _append_module_dep( - module_name=module_name, - target=alias.name, - import_type="import", - line=line, - state=state, - ) - if collect_referenced_names: - state.imported_module_aliases[alias_name] = alias.name - if alias.name in _PROTOCOL_MODULE_NAMES: - state.protocol_module_aliases.add(alias_name) - - -def _dotted_expr_name(expr: ast.expr) -> str | None: - if isinstance(expr, ast.Name): - return expr.id - if isinstance(expr, ast.Attribute): - prefix = _dotted_expr_name(expr.value) - if prefix is None: - return None - return f"{prefix}.{expr.attr}" - return None - - -def _collect_import_from_node( - *, - node: ast.ImportFrom, - module_name: str, - state: _ModuleWalkState, - collect_referenced_names: bool, -) -> None: - target = _resolve_import_target(module_name, node) - if target: - state.import_names.add(target.split(".", 1)[0]) - _append_module_dep( - module_name=module_name, - target=target, - import_type="from_import", - line=int(getattr(node, "lineno", 0)), - state=state, - ) - - if node.module in _PROTOCOL_MODULE_NAMES: - for alias in node.names: - if alias.name == "Protocol": - state.protocol_symbol_aliases.add(alias.asname or alias.name) - - if not collect_referenced_names or not target: - return - - for alias in node.names: - if alias.name == "*": - continue - alias_name = alias.asname or alias.name - state.imported_symbol_bindings.setdefault(alias_name, set()).add( - f"{target}:{alias.name}" - ) - - -def _is_protocol_class( - class_node: ast.ClassDef, - *, - protocol_symbol_aliases: frozenset[str], - protocol_module_aliases: frozenset[str], -) -> bool: - for base in class_node.bases: - base_name = _dotted_expr_name(base) - if base_name is None: - continue - if base_name in protocol_symbol_aliases: - return True - if "." in base_name and base_name.rsplit(".", 1)[-1] == "Protocol": - module_alias = base_name.rsplit(".", 1)[0] - if module_alias in protocol_module_aliases: - return True - return False - - -def _is_non_runtime_candidate(node: _qualnames.FunctionNode) -> bool: - for decorator in node.decorator_list: - name = _dotted_expr_name(decorator) - if name is None: - continue - terminal = name.rsplit(".", 1)[-1] - if terminal in {"overload", "abstractmethod"}: - return True - return False - - -def _node_line_span(node: ast.AST) -> tuple[int, int] | None: - start = int(getattr(node, "lineno", 0)) - end = int(getattr(node, "end_lineno", 0)) - if start <= 0 or end <= 0: - return None - return start, end - - -def _eligible_unit_shape( - node: _qualnames.FunctionNode, - *, - min_loc: int, - min_stmt: int, -) -> tuple[int, int, int, int] | None: - span = _node_line_span(node) - if span is None: - return None - start, end = span - if end < start: - return None - loc = end - start + 1 - stmt_count = _stmt_count(node) - if loc < min_loc or stmt_count < min_stmt: - return None - return start, end, loc, stmt_count - - -def _class_metrics_for_node( - *, - module_name: str, - class_qualname: str, - class_node: ast.ClassDef, - filepath: str, - module_import_names: set[str], - module_class_names: set[str], -) -> ClassMetrics | None: - span = _node_line_span(class_node) - if span is None: - return None - start, end = span - cbo, coupled_classes = compute_cbo( - class_node, - module_import_names=module_import_names, - module_class_names=module_class_names, - ) - lcom4, method_count, instance_var_count = compute_lcom4(class_node) - return ClassMetrics( - qualname=f"{module_name}:{class_qualname}", - filepath=filepath, - start_line=start, - end_line=end, - cbo=cbo, - lcom4=lcom4, - method_count=method_count, - instance_var_count=instance_var_count, - risk_coupling=coupling_risk(cbo), - risk_cohesion=cohesion_risk(lcom4), - coupled_classes=coupled_classes, - ) - - -def _dead_candidate_kind(local_name: str) -> Literal["function", "method"]: - return "method" if "." in local_name else "function" - - -def _should_skip_dead_candidate( - local_name: str, - node: _qualnames.FunctionNode, - *, - protocol_class_qualnames: set[str], -) -> bool: - if _is_non_runtime_candidate(node): - return True - if "." not in local_name: - return False - owner_qualname = local_name.rsplit(".", 1)[0] - return owner_qualname in protocol_class_qualnames - - -def _build_dead_candidate( - *, - module_name: str, - local_name: str, - node: _NamedDeclarationNode, - filepath: str, - kind: Literal["class", "function", "method"], - suppression_index: Mapping[SuppressionTargetKey, tuple[str, ...]], - start_line: int, - end_line: int, -) -> DeadCandidate: - qualname = f"{module_name}:{local_name}" - return DeadCandidate( - qualname=qualname, - local_name=node.name, - filepath=filepath, - start_line=start_line, - end_line=end_line, - kind=kind, - suppressed_rules=suppression_index.get( - suppression_target_key( - filepath=filepath, - qualname=qualname, - start_line=start_line, - end_line=end_line, - kind=kind, - ), - (), - ), - ) - - -def _dead_candidate_for_unit( - *, - module_name: str, - local_name: str, - node: _qualnames.FunctionNode, - filepath: str, - suppression_index: Mapping[SuppressionTargetKey, tuple[str, ...]], - protocol_class_qualnames: set[str], -) -> DeadCandidate | None: - span = _node_line_span(node) - if span is None: - return None - if _should_skip_dead_candidate( - local_name, - node, - protocol_class_qualnames=protocol_class_qualnames, - ): - return None - start, end = span - return _build_dead_candidate( - module_name=module_name, - local_name=local_name, - node=node, - filepath=filepath, - kind=_dead_candidate_kind(local_name), - suppression_index=suppression_index, - start_line=start, - end_line=end, - ) - - -def _collect_load_reference_node( - *, - node: ast.AST, - state: _ModuleWalkState, -) -> None: - if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load): - state.referenced_names.add(node.id) - state.name_nodes.append(node) - return - if isinstance(node, ast.Attribute) and isinstance(node.ctx, ast.Load): - state.referenced_names.add(node.attr) - state.attr_nodes.append(node) - - -def _resolve_referenced_qualnames( - *, - module_name: str, - collector: _qualnames.QualnameCollector, - state: _ModuleWalkState, -) -> frozenset[str]: - top_level_class_by_name = { - class_qualname: class_qualname - for class_qualname, _class_node in collector.class_nodes - if "." not in class_qualname - } - local_method_qualnames = frozenset( - f"{module_name}:{local_name}" - for local_name, _node in collector.units - if "." in local_name - ) - - resolved: set[str] = set() - for name_node in state.name_nodes: - for qualname in state.imported_symbol_bindings.get(name_node.id, ()): - resolved.add(qualname) - - for attr_node in state.attr_nodes: - base = attr_node.value - if isinstance(base, ast.Name): - imported_module = state.imported_module_aliases.get(base.id) - if imported_module is not None: - resolved.add(f"{imported_module}:{attr_node.attr}") - else: - class_qualname = top_level_class_by_name.get(base.id) - if class_qualname is not None: - local_method_qualname = ( - f"{module_name}:{class_qualname}.{attr_node.attr}" - ) - if local_method_qualname in local_method_qualnames: - resolved.add(local_method_qualname) - - return frozenset(resolved) - - -class _ModuleWalkResult(NamedTuple): - import_names: frozenset[str] - module_deps: tuple[ModuleDep, ...] - referenced_names: frozenset[str] - referenced_qualnames: frozenset[str] - protocol_symbol_aliases: frozenset[str] - protocol_module_aliases: frozenset[str] - - -def _collect_module_walk_data( - *, - tree: ast.AST, - module_name: str, - collector: _qualnames.QualnameCollector, - collect_referenced_names: bool, -) -> _ModuleWalkResult: - """Single ast.walk that collects imports, deps, names, qualnames & protocol aliases. - - Reduces the hot path to one tree walk plus one local qualname resolution phase. - """ - state = _ModuleWalkState() - for node in ast.walk(tree): - if isinstance(node, ast.Import): - _collect_import_node( - node=node, - module_name=module_name, - state=state, - collect_referenced_names=collect_referenced_names, - ) - elif isinstance(node, ast.ImportFrom): - _collect_import_from_node( - node=node, - module_name=module_name, - state=state, - collect_referenced_names=collect_referenced_names, - ) - elif collect_referenced_names: - _collect_load_reference_node(node=node, state=state) - - deps_sorted = tuple( - sorted( - state.deps, - key=lambda dep: (dep.source, dep.target, dep.import_type, dep.line), - ) - ) - resolved = ( - _resolve_referenced_qualnames( - module_name=module_name, - collector=collector, - state=state, - ) - if collect_referenced_names - else frozenset() - ) - - return _ModuleWalkResult( - import_names=frozenset(state.import_names), - module_deps=deps_sorted, - referenced_names=frozenset(state.referenced_names), - referenced_qualnames=resolved, - protocol_symbol_aliases=frozenset(state.protocol_symbol_aliases), - protocol_module_aliases=frozenset(state.protocol_module_aliases), - ) - - -def _collect_dead_candidates( - *, - filepath: str, - module_name: str, - collector: _qualnames.QualnameCollector, - protocol_symbol_aliases: frozenset[str] = frozenset({"Protocol"}), - protocol_module_aliases: frozenset[str] = frozenset( - {"typing", "typing_extensions"} - ), - suppression_rules_by_target: Mapping[SuppressionTargetKey, tuple[str, ...]] - | None = None, -) -> tuple[DeadCandidate, ...]: - protocol_class_qualnames = { - class_qualname - for class_qualname, class_node in collector.class_nodes - if _is_protocol_class( - class_node, - protocol_symbol_aliases=protocol_symbol_aliases, - protocol_module_aliases=protocol_module_aliases, - ) - } - - candidates: list[DeadCandidate] = [] - suppression_index = ( - suppression_rules_by_target if suppression_rules_by_target is not None else {} - ) - for local_name, node in collector.units: - candidate = _dead_candidate_for_unit( - module_name=module_name, - local_name=local_name, - node=node, - filepath=filepath, - suppression_index=suppression_index, - protocol_class_qualnames=protocol_class_qualnames, - ) - if candidate is not None: - candidates.append(candidate) - - for class_qualname, class_node in collector.class_nodes: - span = _node_line_span(class_node) - if span is not None: - start, end = span - candidates.append( - _build_dead_candidate( - module_name=module_name, - local_name=class_qualname, - node=class_node, - filepath=filepath, - kind="class", - suppression_index=suppression_index, - start_line=start, - end_line=end, - ) - ) - - return tuple( - sorted( - candidates, - key=lambda item: ( - item.filepath, - item.start_line, - item.end_line, - item.qualname, - ), - ) - ) - - -def _collect_declaration_targets( - *, - filepath: str, - module_name: str, - collector: _qualnames.QualnameCollector, - source_tokens: tuple[tokenize.TokenInfo, ...] = (), - source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None, - include_inline_lines: bool = False, -) -> tuple[DeclarationTarget, ...]: - declarations: list[DeclarationTarget] = [] - declaration_specs: list[ - tuple[str, ast.AST, Literal["function", "method", "class"]] - ] = [ - ( - local_name, - node, - "method" if "." in local_name else "function", - ) - for local_name, node in collector.units - ] - declaration_specs.extend( - (class_qualname, class_node, "class") - for class_qualname, class_node in collector.class_nodes - ) - - for qualname_suffix, node, kind in declaration_specs: - start = int(getattr(node, "lineno", 0)) - end = int(getattr(node, "end_lineno", 0)) - if start > 0 and end > 0: - declaration_end_line = ( - _declaration_end_line( - node, - source_tokens=source_tokens, - source_token_index=source_token_index, - ) - if include_inline_lines - else None - ) - declarations.append( - DeclarationTarget( - filepath=filepath, - qualname=f"{module_name}:{qualname_suffix}", - start_line=start, - end_line=end, - kind=kind, - declaration_end_line=declaration_end_line, - ) - ) - - return tuple( - sorted( - declarations, - key=lambda item: ( - item.filepath, - item.start_line, - item.end_line, - item.qualname, - item.kind, - ), - ) - ) - - -def _build_suppression_index_for_source( - *, - source: str, - filepath: str, - module_name: str, - collector: _qualnames.QualnameCollector, -) -> Mapping[SuppressionTargetKey, tuple[str, ...]]: - suppression_directives = extract_suppression_directives(source) - if not suppression_directives: - return {} - - needs_inline_binding = any( - directive.binding == "inline" for directive in suppression_directives - ) - source_tokens: tuple[tokenize.TokenInfo, ...] = () - source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None - if needs_inline_binding: - source_tokens = _source_tokens(source) - if source_tokens: - source_token_index = _build_declaration_token_index(source_tokens) - - declaration_targets = _collect_declaration_targets( - filepath=filepath, - module_name=module_name, - collector=collector, - source_tokens=source_tokens, - source_token_index=source_token_index, - include_inline_lines=needs_inline_binding, - ) - suppression_bindings = bind_suppressions_to_declarations( - directives=suppression_directives, - declarations=declaration_targets, - ) - return build_suppression_index(suppression_bindings) - - -# ========================= -# Public API -# ========================= - - -def extract_units_and_stats_from_source( - source: str, - filepath: str, - module_name: str, - cfg: NormalizationConfig, - min_loc: int, - min_stmt: int, - *, - block_min_loc: int = 20, - block_min_stmt: int = 8, - segment_min_loc: int = 20, - segment_min_stmt: int = 10, - collect_structural_findings: bool = True, - collect_api_surface: bool = False, - api_include_private_modules: bool = False, -) -> tuple[ - list[Unit], - list[BlockUnit], - list[SegmentUnit], - SourceStats, - FileMetrics, - list[StructuralFindingGroup], -]: - try: - tree = _parse_with_limits(source, PARSE_TIMEOUT_SECONDS) - except SyntaxError as e: - raise ParseError(f"Failed to parse {filepath}: {e}") from e - if not isinstance(tree, ast.Module): - raise ParseError(f"Failed to parse {filepath}: expected module AST root") - - collector = _qualnames.QualnameCollector() - collector.visit(tree) - source_lines = source.splitlines() - source_line_count = len(source_lines) - - is_test_file = is_test_filepath(filepath) - - # Single-pass AST walk replaces 3 separate functions / 4 walks. - _walk = _collect_module_walk_data( - tree=tree, - module_name=module_name, - collector=collector, - collect_referenced_names=not is_test_file, - ) - import_names = _walk.import_names - module_deps = _walk.module_deps - referenced_names = _walk.referenced_names - referenced_qualnames = _walk.referenced_qualnames - protocol_symbol_aliases = _walk.protocol_symbol_aliases - protocol_module_aliases = _walk.protocol_module_aliases - - suppression_index = _build_suppression_index_for_source( - source=source, - filepath=filepath, - module_name=module_name, - collector=collector, - ) - class_names = frozenset(class_node.name for _, class_node in collector.class_nodes) - module_import_names = set(import_names) - module_class_names = set(class_names) - class_metrics: list[ClassMetrics] = [] - - units: list[Unit] = [] - block_units: list[BlockUnit] = [] - segment_units: list[SegmentUnit] = [] - structural_findings: list[StructuralFindingGroup] = [] - - for local_name, node in collector.units: - unit_shape = _eligible_unit_shape( - node, - min_loc=min_loc, - min_stmt=min_stmt, - ) - if unit_shape is None: - continue - start, end, loc, stmt_count = unit_shape - - qualname = f"{module_name}:{local_name}" - fingerprint, complexity = _cfg_fingerprint_and_complexity(node, cfg, qualname) - structure_facts = scan_function_structure( - node, - filepath, - qualname, - collect_findings=collect_structural_findings, - ) - depth = structure_facts.nesting_depth - risk = risk_level(complexity) - raw_hash = _raw_source_hash_for_range(source_lines, start, end) - - units.append( - Unit( - qualname=qualname, - filepath=filepath, - start_line=start, - end_line=end, - loc=loc, - stmt_count=stmt_count, - fingerprint=fingerprint, - loc_bucket=bucket_loc(loc), - cyclomatic_complexity=complexity, - nesting_depth=depth, - risk=risk, - raw_hash=raw_hash, - entry_guard_count=structure_facts.entry_guard_count, - entry_guard_terminal_profile=( - structure_facts.entry_guard_terminal_profile - ), - entry_guard_has_side_effect_before=( - structure_facts.entry_guard_has_side_effect_before - ), - terminal_kind=structure_facts.terminal_kind, - try_finally_profile=structure_facts.try_finally_profile, - side_effect_order_profile=structure_facts.side_effect_order_profile, - ) - ) - - needs_blocks = ( - not local_name.endswith("__init__") - and loc >= block_min_loc - and stmt_count >= block_min_stmt - ) - needs_segments = loc >= segment_min_loc and stmt_count >= segment_min_stmt - - if needs_blocks or needs_segments: - body = getattr(node, "body", None) - hashes: list[str] | None = None - if isinstance(body, list): - hashes = stmt_hashes(body, cfg) - - if needs_blocks: - block_units.extend( - extract_blocks( - node, - filepath=filepath, - qualname=qualname, - cfg=cfg, - block_size=4, - max_blocks=15, - precomputed_hashes=hashes, - ) - ) - - if needs_segments: - segment_units.extend( - extract_segments( - node, - filepath=filepath, - qualname=qualname, - cfg=cfg, - window_size=6, - max_segments=60, - precomputed_hashes=hashes, - ) - ) - - if collect_structural_findings: - structural_findings.extend(structure_facts.structural_findings) - - for class_qualname, class_node in collector.class_nodes: - class_metric = _class_metrics_for_node( - module_name=module_name, - class_qualname=class_qualname, - class_node=class_node, - filepath=filepath, - module_import_names=module_import_names, - module_class_names=module_class_names, - ) - if class_metric is not None: - class_metrics.append(class_metric) - - dead_candidates = _collect_dead_candidates( - filepath=filepath, - module_name=module_name, - collector=collector, - protocol_symbol_aliases=protocol_symbol_aliases, - protocol_module_aliases=protocol_module_aliases, - suppression_rules_by_target=suppression_index, - ) - - sorted_class_metrics = tuple( - sorted( - class_metrics, - key=lambda item: ( - item.filepath, - item.start_line, - item.end_line, - item.qualname, - ), - ) - ) - typing_coverage, docstring_coverage = collect_module_adoption( - tree=tree, - module_name=module_name, - filepath=filepath, - collector=collector, - imported_names=import_names, - ) - api_surface = None - if collect_api_surface: - api_surface = collect_module_api_surface( - tree=tree, - module_name=module_name, - filepath=filepath, - collector=collector, - imported_names=import_names, - include_private_modules=api_include_private_modules, - ) - - return ( - units, - block_units, - segment_units, - SourceStats( - lines=source_line_count, - functions=collector.function_count, - methods=collector.method_count, - classes=collector.class_count, - ), - FileMetrics( - class_metrics=sorted_class_metrics, - module_deps=module_deps, - dead_candidates=dead_candidates, - referenced_names=referenced_names, - import_names=import_names, - class_names=class_names, - referenced_qualnames=referenced_qualnames, - typing_coverage=typing_coverage, - docstring_coverage=docstring_coverage, - api_surface=api_surface, - ), - structural_findings, - ) diff --git a/codeclone/findings/__init__.py b/codeclone/findings/__init__.py new file mode 100644 index 0000000..ed88cfe --- /dev/null +++ b/codeclone/findings/__init__.py @@ -0,0 +1,21 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from .ids import ( + clone_group_id, + dead_code_group_id, + design_group_id, + structural_group_id, +) + +__all__ = [ + "clone_group_id", + "dead_code_group_id", + "design_group_id", + "structural_group_id", +] diff --git a/codeclone/findings/clones/__init__.py b/codeclone/findings/clones/__init__.py new file mode 100644 index 0000000..e4f7372 --- /dev/null +++ b/codeclone/findings/clones/__init__.py @@ -0,0 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from .grouping import build_block_groups, build_groups, build_segment_groups + +__all__ = ["build_block_groups", "build_groups", "build_segment_groups"] diff --git a/codeclone/grouping.py b/codeclone/findings/clones/grouping.py similarity index 98% rename from codeclone/grouping.py rename to codeclone/findings/clones/grouping.py index c4590b3..7aa37dc 100644 --- a/codeclone/grouping.py +++ b/codeclone/findings/clones/grouping.py @@ -9,7 +9,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from .models import GroupItemsLike, GroupMap + from ...models import GroupItemsLike, GroupMap def _group_items_by_key( diff --git a/codeclone/findings/ids.py b/codeclone/findings/ids.py new file mode 100644 index 0000000..d09cf0d --- /dev/null +++ b/codeclone/findings/ids.py @@ -0,0 +1,31 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + + +def clone_group_id(kind: str, group_key: str) -> str: + return f"clone:{kind}:{group_key}" + + +def structural_group_id(finding_kind: str, finding_key: str) -> str: + return f"structural:{finding_kind}:{finding_key}" + + +def dead_code_group_id(subject_key: str) -> str: + return f"dead_code:{subject_key}" + + +def design_group_id(category: str, subject_key: str) -> str: + return f"design:{category}:{subject_key}" + + +__all__ = [ + "clone_group_id", + "dead_code_group_id", + "design_group_id", + "structural_group_id", +] diff --git a/codeclone/findings/structural/__init__.py b/codeclone/findings/structural/__init__.py new file mode 100644 index 0000000..b64196d --- /dev/null +++ b/codeclone/findings/structural/__init__.py @@ -0,0 +1,21 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from .detectors import ( + build_clone_cohort_structural_findings, + is_reportable_structural_signature, + normalize_structural_finding_group, + normalize_structural_findings, + scan_function_structure, +) + +__all__ = [ + "build_clone_cohort_structural_findings", + "is_reportable_structural_signature", + "normalize_structural_finding_group", + "normalize_structural_findings", + "scan_function_structure", +] diff --git a/codeclone/structural_findings.py b/codeclone/findings/structural/detectors.py similarity index 99% rename from codeclone/structural_findings.py rename to codeclone/findings/structural/detectors.py index 2d805d7..0ebe138 100644 --- a/codeclone/structural_findings.py +++ b/codeclone/findings/structural/detectors.py @@ -21,13 +21,13 @@ from hashlib import sha1 from typing import TYPE_CHECKING, overload -from ._coerce import as_int, as_str -from .domain.findings import ( +from ...domain.findings import ( STRUCTURAL_KIND_CLONE_COHORT_DRIFT, STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, STRUCTURAL_KIND_DUPLICATED_BRANCHES, ) -from .models import GroupItemLike, StructuralFindingGroup, StructuralFindingOccurrence +from ...models import GroupItemLike, StructuralFindingGroup, StructuralFindingOccurrence +from ...utils.coerce import as_int, as_str if TYPE_CHECKING: from collections.abc import Mapping, Sequence diff --git a/codeclone/fingerprint.py b/codeclone/fingerprint.py deleted file mode 100644 index 72adaee..0000000 --- a/codeclone/fingerprint.py +++ /dev/null @@ -1,24 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import hashlib - - -def sha1(s: str) -> str: - return hashlib.sha1(s.encode("utf-8")).hexdigest() - - -def bucket_loc(loc: int) -> str: - # Helps avoid grouping wildly different sizes if desired - if loc < 20: - return "0-19" - if loc < 50: - return "20-49" - if loc < 100: - return "50-99" - return "100+" diff --git a/codeclone/main.py b/codeclone/main.py new file mode 100644 index 0000000..1f97418 --- /dev/null +++ b/codeclone/main.py @@ -0,0 +1,15 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from .surfaces.cli.main import main + +__all__ = ["main"] + + +if __name__ == "__main__": + main() diff --git a/codeclone/meta_markers.py b/codeclone/meta_markers/__init__.py similarity index 100% rename from codeclone/meta_markers.py rename to codeclone/meta_markers/__init__.py diff --git a/codeclone/metrics/__init__.py b/codeclone/metrics/__init__.py index 0551b7d..25efd2e 100644 --- a/codeclone/metrics/__init__.py +++ b/codeclone/metrics/__init__.py @@ -6,6 +6,7 @@ from __future__ import annotations +from ._base import MetricAggregate, MetricFamily, MetricProjectContext from .cohesion import cohesion_risk, compute_lcom4 from .complexity import cyclomatic_complexity, nesting_depth, risk_level from .coupling import compute_cbo, coupling_risk @@ -20,14 +21,20 @@ ) from .health import HealthInputs, compute_health from .overloaded_modules import build_overloaded_modules_payload +from .registry import METRIC_FAMILIES, build_project_metrics, project_metrics_defaults __all__ = [ + "METRIC_FAMILIES", "CoverageJoinParseError", "HealthInputs", + "MetricAggregate", + "MetricFamily", + "MetricProjectContext", "build_coverage_join", "build_dep_graph", "build_import_graph", "build_overloaded_modules_payload", + "build_project_metrics", "cohesion_risk", "compute_cbo", "compute_health", @@ -40,5 +47,6 @@ "longest_chains", "max_depth", "nesting_depth", + "project_metrics_defaults", "risk_level", ] diff --git a/codeclone/metrics/_base.py b/codeclone/metrics/_base.py new file mode 100644 index 0000000..1e34195 --- /dev/null +++ b/codeclone/metrics/_base.py @@ -0,0 +1,62 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Callable + + from ..models import ( + ClassMetrics, + DeadCandidate, + GroupItemLike, + ModuleApiSurface, + ModuleDep, + ModuleDocstringCoverage, + ModuleTypingCoverage, + ) + +MetricResult = dict[str, object] + + +@dataclass(frozen=True, slots=True) +class MetricAggregate: + project_fields: dict[str, object] + artifacts: dict[str, object] = field(default_factory=dict) + + +@dataclass(slots=True) +class MetricProjectContext: + units: tuple[GroupItemLike, ...] + class_metrics: tuple[ClassMetrics, ...] + module_deps: tuple[ModuleDep, ...] + dead_candidates: tuple[DeadCandidate, ...] + referenced_names: frozenset[str] + referenced_qualnames: frozenset[str] + typing_modules: tuple[ModuleTypingCoverage, ...] = () + docstring_modules: tuple[ModuleDocstringCoverage, ...] = () + api_modules: tuple[ModuleApiSurface, ...] = () + files_found: int = 0 + files_analyzed_or_cached: int = 0 + function_clone_groups: int = 0 + block_clone_groups: int = 0 + skip_dependencies: bool = False + skip_dead_code: bool = False + memo: dict[str, MetricResult] = field(default_factory=dict) + + +@dataclass(frozen=True, slots=True) +class MetricFamily: + name: str + compute: Callable[[MetricProjectContext], MetricResult] + aggregate: Callable[[list[MetricResult]], MetricAggregate] + report_section: str + baseline_key: str | None + gate_keys: tuple[str, ...] + skippable_flag: str | None diff --git a/codeclone/metrics/complexity.py b/codeclone/metrics/complexity.py index fa98f9d..97808e3 100644 --- a/codeclone/metrics/complexity.py +++ b/codeclone/metrics/complexity.py @@ -15,7 +15,7 @@ if TYPE_CHECKING: from collections.abc import Iterable - from ..cfg_model import CFG + from ..analysis.cfg_model import CFG ControlNode = ( ast.If diff --git a/codeclone/metrics/coverage_join.py b/codeclone/metrics/coverage_join.py index 08c8278..8109acd 100644 --- a/codeclone/metrics/coverage_join.py +++ b/codeclone/metrics/coverage_join.py @@ -13,8 +13,8 @@ from typing import Literal, cast from xml.etree import ElementTree -from .._coerce import as_int, as_str from ..models import CoverageJoinResult, GroupItemLike, UnitCoverageFact +from ..utils.coerce import as_int, as_str __all__ = [ "CoverageJoinParseError", diff --git a/codeclone/metrics/overloaded_modules.py b/codeclone/metrics/overloaded_modules.py index 46b414b..e151879 100644 --- a/codeclone/metrics/overloaded_modules.py +++ b/codeclone/metrics/overloaded_modules.py @@ -11,7 +11,6 @@ from collections.abc import Sequence from math import floor -from .._coerce import as_float, as_int, as_sequence, as_str from ..domain.source_scope import ( SOURCE_KIND_FIXTURES, SOURCE_KIND_OTHER, @@ -20,6 +19,7 @@ ) from ..models import ClassMetrics, GroupItemLike, ModuleDep from ..scanner import module_name_from_path +from ..utils.coerce import as_float, as_int, as_sequence, as_str _CANDIDATE = "candidate" _NON_CANDIDATE = "non_candidate" diff --git a/codeclone/metrics/registry.py b/codeclone/metrics/registry.py new file mode 100644 index 0000000..35f7f84 --- /dev/null +++ b/codeclone/metrics/registry.py @@ -0,0 +1,673 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Callable +from typing import cast + +from ..domain.findings import CATEGORY_COHESION, CATEGORY_COMPLEXITY, CATEGORY_COUPLING +from ..domain.quality import RISK_HIGH +from ..models import ( + ApiSurfaceSnapshot, + DeadItem, + DepGraph, + HealthScore, + ModuleDep, + ModuleDocstringCoverage, + ModuleTypingCoverage, + ProjectMetrics, +) +from ..utils.coerce import as_int as _as_int +from ..utils.coerce import as_str as _as_str +from ._base import MetricAggregate, MetricFamily, MetricProjectContext, MetricResult +from .dead_code import find_unused +from .dependencies import build_dep_graph +from .health import HealthInputs, compute_health + + +def _group_item_sort_key(item: object) -> tuple[str, int, int, str]: + if not isinstance(item, dict): + return "", 0, 0, "" + return ( + _as_str(item.get("filepath")), + _as_int(item.get("start_line")), + _as_int(item.get("end_line")), + _as_str(item.get("qualname")), + ) + + +def _class_metric_sort_key(metric: object) -> tuple[str, int, int, str]: + filepath = getattr(metric, "filepath", "") + start_line = getattr(metric, "start_line", 0) + end_line = getattr(metric, "end_line", 0) + qualname = getattr(metric, "qualname", "") + return str(filepath), int(start_line), int(end_line), str(qualname) + + +def _module_names_from_units(units: tuple[object, ...]) -> frozenset[str]: + modules: set[str] = set() + for item in units: + if not isinstance(item, dict): + continue + qualname = _as_str(item.get("qualname")) + module_name = qualname.split(":", 1)[0] if ":" in qualname else qualname + if module_name: + modules.add(module_name) + return frozenset(sorted(modules)) + + +def _empty_dep_graph() -> DepGraph: + return DepGraph( + modules=frozenset(), + edges=(), + cycles=(), + max_depth=0, + longest_chains=(), + ) + + +def project_metrics_defaults() -> dict[str, object]: + return { + "complexity_avg": 0.0, + "complexity_max": 0, + "high_risk_functions": (), + "coupling_avg": 0.0, + "coupling_max": 0, + "high_risk_classes": (), + "cohesion_avg": 0.0, + "cohesion_max": 0, + "low_cohesion_classes": (), + "dependency_modules": 0, + "dependency_edges": 0, + "dependency_edge_list": (), + "dependency_cycles": (), + "dependency_max_depth": 0, + "dependency_longest_chains": (), + "dead_code": (), + "health": compute_health( + HealthInputs( + files_found=0, + files_analyzed_or_cached=0, + function_clone_groups=0, + block_clone_groups=0, + complexity_avg=0.0, + complexity_max=0, + high_risk_functions=0, + coupling_avg=0.0, + coupling_max=0, + high_risk_classes=0, + cohesion_avg=0.0, + low_cohesion_classes=0, + dependency_cycles=0, + dependency_max_depth=0, + dead_code_items=0, + ) + ), + "typing_param_total": 0, + "typing_param_annotated": 0, + "typing_return_total": 0, + "typing_return_annotated": 0, + "typing_any_count": 0, + "docstring_public_total": 0, + "docstring_public_documented": 0, + "typing_modules": (), + "docstring_modules": (), + "api_surface": None, + } + + +def build_project_metrics(project_fields: dict[str, object]) -> ProjectMetrics: + return ProjectMetrics( + complexity_avg=_result_float(project_fields, "complexity_avg"), + complexity_max=_result_int(project_fields, "complexity_max"), + high_risk_functions=_result_tuple_str(project_fields, "high_risk_functions"), + coupling_avg=_result_float(project_fields, "coupling_avg"), + coupling_max=_result_int(project_fields, "coupling_max"), + high_risk_classes=_result_tuple_str(project_fields, "high_risk_classes"), + cohesion_avg=_result_float(project_fields, "cohesion_avg"), + cohesion_max=_result_int(project_fields, "cohesion_max"), + low_cohesion_classes=_result_tuple_str(project_fields, "low_cohesion_classes"), + dependency_modules=_result_int(project_fields, "dependency_modules"), + dependency_edges=_result_int(project_fields, "dependency_edges"), + dependency_edge_list=cast( + "tuple[ModuleDep, ...]", + project_fields.get("dependency_edge_list", ()), + ), + dependency_cycles=cast( + "tuple[tuple[str, ...], ...]", + project_fields.get("dependency_cycles", ()), + ), + dependency_max_depth=_result_int(project_fields, "dependency_max_depth"), + dependency_longest_chains=cast( + "tuple[tuple[str, ...], ...]", + project_fields.get("dependency_longest_chains", ()), + ), + dead_code=cast("tuple[DeadItem, ...]", project_fields.get("dead_code", ())), + health=cast("HealthScore", project_fields["health"]), + typing_param_total=_result_int(project_fields, "typing_param_total"), + typing_param_annotated=_result_int(project_fields, "typing_param_annotated"), + typing_return_total=_result_int(project_fields, "typing_return_total"), + typing_return_annotated=_result_int( + project_fields, + "typing_return_annotated", + ), + typing_any_count=_result_int(project_fields, "typing_any_count"), + docstring_public_total=_result_int(project_fields, "docstring_public_total"), + docstring_public_documented=_result_int( + project_fields, + "docstring_public_documented", + ), + typing_modules=cast( + "tuple[ModuleTypingCoverage, ...]", + project_fields.get("typing_modules", ()), + ), + docstring_modules=cast( + "tuple[ModuleDocstringCoverage, ...]", + project_fields.get("docstring_modules", ()), + ), + api_surface=cast( + "ApiSurfaceSnapshot | None", + project_fields.get("api_surface"), + ), + ) + + +def _result_float(result: dict[str, object], key: str) -> float: + value = result.get(key) + return float(value) if isinstance(value, int | float) else 0.0 + + +def _result_int(result: dict[str, object], key: str) -> int: + return _as_int(result.get(key), 0) + + +def _result_tuple_str(result: dict[str, object], key: str) -> tuple[str, ...]: + return cast("tuple[str, ...]", result.get(key, ())) + + +def _result_nested_tuple_str( + result: dict[str, object], + key: str, +) -> tuple[tuple[str, ...], ...]: + return cast("tuple[tuple[str, ...], ...]", result.get(key, ())) + + +def _result_dead_items( + result: dict[str, object], + key: str, +) -> tuple[DeadItem, ...]: + return cast("tuple[DeadItem, ...]", result.get(key, ())) + + +def _memoized_result( + context: MetricProjectContext, + *, + family_name: str, + builder: Callable[[MetricProjectContext], MetricResult], +) -> MetricResult: + cached = context.memo.get(family_name) + if cached is not None: + return cached + result = builder(context) + context.memo[family_name] = result + return result + + +def _first_result(results: list[MetricResult]) -> MetricResult: + return results[0] if results else {} + + +def _build_complexity_result(context: MetricProjectContext) -> MetricResult: + unit_rows = tuple(sorted(context.units, key=_group_item_sort_key)) + complexities = tuple( + max(1, _as_int(row.get("cyclomatic_complexity"), 1)) for row in unit_rows + ) + complexity_max = max(complexities) if complexities else 0 + complexity_avg = ( + float(sum(complexities)) / float(len(complexities)) if complexities else 0.0 + ) + high_risk_functions = tuple( + sorted( + { + _as_str(row.get("qualname")) + for row in unit_rows + if _as_str(row.get("risk")) == RISK_HIGH + } + ) + ) + return { + "complexity_avg": complexity_avg, + "complexity_max": complexity_max, + "high_risk_functions": high_risk_functions, + } + + +def _summarize_class_metric_family( + context: MetricProjectContext, + *, + value_attr: str, + risk_attr: str, +) -> tuple[float, int, tuple[str, ...]]: + classes_sorted = tuple(sorted(context.class_metrics, key=_class_metric_sort_key)) + values = tuple( + _as_int(getattr(metric, value_attr, 0), 0) for metric in classes_sorted + ) + value_max = max(values) if values else 0 + value_avg = float(sum(values)) / float(len(values)) if values else 0.0 + high_risk_symbols = tuple( + sorted( + { + metric.qualname + for metric in classes_sorted + if str(getattr(metric, risk_attr, "")) == RISK_HIGH + } + ) + ) + return value_avg, value_max, high_risk_symbols + + +def _compute_complexity_family(context: MetricProjectContext) -> MetricResult: + return _memoized_result( + context, + family_name=CATEGORY_COMPLEXITY, + builder=_build_complexity_result, + ) + + +def _aggregate_complexity_family(results: list[MetricResult]) -> MetricAggregate: + result = _first_result(results) + return MetricAggregate( + project_fields={ + "complexity_avg": _result_float(result, "complexity_avg"), + "complexity_max": _result_int(result, "complexity_max"), + "high_risk_functions": _result_tuple_str(result, "high_risk_functions"), + } + ) + + +def _build_coupling_result(context: MetricProjectContext) -> MetricResult: + coupling_avg, coupling_max, high_risk_classes = _summarize_class_metric_family( + context, + value_attr="cbo", + risk_attr="risk_coupling", + ) + return { + "coupling_avg": coupling_avg, + "coupling_max": coupling_max, + "high_risk_classes": high_risk_classes, + } + + +def _compute_coupling_family(context: MetricProjectContext) -> MetricResult: + return _memoized_result( + context, + family_name=CATEGORY_COUPLING, + builder=_build_coupling_result, + ) + + +def _aggregate_coupling_family(results: list[MetricResult]) -> MetricAggregate: + result = _first_result(results) + return MetricAggregate( + project_fields={ + "coupling_avg": _result_float(result, "coupling_avg"), + "coupling_max": _result_int(result, "coupling_max"), + "high_risk_classes": _result_tuple_str(result, "high_risk_classes"), + } + ) + + +def _build_cohesion_result(context: MetricProjectContext) -> MetricResult: + cohesion_avg, cohesion_max, low_cohesion_classes = _summarize_class_metric_family( + context, + value_attr="lcom4", + risk_attr="risk_cohesion", + ) + return { + "cohesion_avg": cohesion_avg, + "cohesion_max": cohesion_max, + "low_cohesion_classes": low_cohesion_classes, + } + + +def _compute_cohesion_family(context: MetricProjectContext) -> MetricResult: + return _memoized_result( + context, + family_name=CATEGORY_COHESION, + builder=_build_cohesion_result, + ) + + +def _aggregate_cohesion_family(results: list[MetricResult]) -> MetricAggregate: + result = _first_result(results) + return MetricAggregate( + project_fields={ + "cohesion_avg": _result_float(result, "cohesion_avg"), + "cohesion_max": _result_int(result, "cohesion_max"), + "low_cohesion_classes": _result_tuple_str(result, "low_cohesion_classes"), + } + ) + + +def _build_dependencies_result(context: MetricProjectContext) -> MetricResult: + dep_graph = _empty_dep_graph() + if not context.skip_dependencies: + dep_graph = build_dep_graph( + modules=_module_names_from_units(tuple(context.units)), + deps=context.module_deps, + ) + return { + "dependency_modules": len(dep_graph.modules), + "dependency_edges": len(dep_graph.edges), + "dependency_edge_list": dep_graph.edges, + "dependency_cycles": dep_graph.cycles, + "dependency_max_depth": dep_graph.max_depth, + "dependency_longest_chains": dep_graph.longest_chains, + "dep_graph": dep_graph, + } + + +def _compute_dependencies_family(context: MetricProjectContext) -> MetricResult: + return _memoized_result( + context, + family_name="dependencies", + builder=_build_dependencies_result, + ) + + +def _aggregate_dependencies_family(results: list[MetricResult]) -> MetricAggregate: + result = _first_result(results) + dep_graph = result.get("dep_graph") + return MetricAggregate( + project_fields={ + "dependency_modules": _result_int(result, "dependency_modules"), + "dependency_edges": _result_int(result, "dependency_edges"), + "dependency_edge_list": cast( + "tuple[ModuleDep, ...]", + result.get("dependency_edge_list", ()), + ), + "dependency_cycles": _result_nested_tuple_str(result, "dependency_cycles"), + "dependency_max_depth": _result_int(result, "dependency_max_depth"), + "dependency_longest_chains": _result_nested_tuple_str( + result, + "dependency_longest_chains", + ), + }, + artifacts=({"dep_graph": dep_graph} if isinstance(dep_graph, DepGraph) else {}), + ) + + +def _build_dead_code_result(context: MetricProjectContext) -> MetricResult: + dead_items: tuple[DeadItem, ...] = () + if not context.skip_dead_code: + dead_items = find_unused( + definitions=tuple(context.dead_candidates), + referenced_names=context.referenced_names, + referenced_qualnames=context.referenced_qualnames, + ) + return { + "dead_code": dead_items, + "dead_items": dead_items, + } + + +def _compute_dead_code_family(context: MetricProjectContext) -> MetricResult: + return _memoized_result( + context, + family_name="dead_code", + builder=_build_dead_code_result, + ) + + +def _aggregate_dead_code_family(results: list[MetricResult]) -> MetricAggregate: + result = _first_result(results) + dead_items = result.get("dead_items") + return MetricAggregate( + project_fields={ + "dead_code": _result_dead_items(result, "dead_code"), + }, + artifacts=({"dead_items": dead_items} if isinstance(dead_items, tuple) else {}), + ) + + +def _build_health_result(context: MetricProjectContext) -> MetricResult: + complexity = _compute_complexity_family(context) + coupling = _compute_coupling_family(context) + cohesion = _compute_cohesion_family(context) + dependencies = _compute_dependencies_family(context) + dead_code = _compute_dead_code_family(context) + health = compute_health( + HealthInputs( + files_found=context.files_found, + files_analyzed_or_cached=context.files_analyzed_or_cached, + function_clone_groups=context.function_clone_groups, + block_clone_groups=context.block_clone_groups, + complexity_avg=_result_float(complexity, "complexity_avg"), + complexity_max=_result_int(complexity, "complexity_max"), + high_risk_functions=len( + _result_tuple_str(complexity, "high_risk_functions") + ), + coupling_avg=_result_float(coupling, "coupling_avg"), + coupling_max=_result_int(coupling, "coupling_max"), + high_risk_classes=len(_result_tuple_str(coupling, "high_risk_classes")), + cohesion_avg=_result_float(cohesion, "cohesion_avg"), + low_cohesion_classes=len( + _result_tuple_str(cohesion, "low_cohesion_classes") + ), + dependency_cycles=len( + _result_nested_tuple_str(dependencies, "dependency_cycles") + ), + dependency_max_depth=_result_int(dependencies, "dependency_max_depth"), + dead_code_items=len(_result_dead_items(dead_code, "dead_code")), + ) + ) + return {"health": health} + + +def _compute_health_family(context: MetricProjectContext) -> MetricResult: + return _memoized_result( + context, + family_name="health", + builder=_build_health_result, + ) + + +def _aggregate_health_family(results: list[MetricResult]) -> MetricAggregate: + result = _first_result(results) + return MetricAggregate( + project_fields={"health": cast("HealthScore", result.get("health"))} + ) + + +def _build_coverage_adoption_result(context: MetricProjectContext) -> MetricResult: + typing_rows = tuple( + sorted(context.typing_modules, key=lambda item: (item.filepath, item.module)) + ) + docstring_rows = tuple( + sorted(context.docstring_modules, key=lambda item: (item.filepath, item.module)) + ) + return { + "typing_param_total": sum(item.params_total for item in typing_rows), + "typing_param_annotated": sum(item.params_annotated for item in typing_rows), + "typing_return_total": sum(item.returns_total for item in typing_rows), + "typing_return_annotated": sum(item.returns_annotated for item in typing_rows), + "typing_any_count": sum(item.any_annotation_count for item in typing_rows), + "docstring_public_total": sum( + item.public_symbol_total for item in docstring_rows + ), + "docstring_public_documented": sum( + item.public_symbol_documented for item in docstring_rows + ), + "typing_modules": typing_rows, + "docstring_modules": docstring_rows, + } + + +def _compute_coverage_adoption_family(context: MetricProjectContext) -> MetricResult: + return _memoized_result( + context, + family_name="coverage_adoption", + builder=_build_coverage_adoption_result, + ) + + +def _aggregate_coverage_adoption_family(results: list[MetricResult]) -> MetricAggregate: + result = _first_result(results) + return MetricAggregate( + project_fields={ + "typing_param_total": _result_int(result, "typing_param_total"), + "typing_param_annotated": _result_int(result, "typing_param_annotated"), + "typing_return_total": _result_int(result, "typing_return_total"), + "typing_return_annotated": _result_int( + result, + "typing_return_annotated", + ), + "typing_any_count": _result_int(result, "typing_any_count"), + "docstring_public_total": _result_int(result, "docstring_public_total"), + "docstring_public_documented": _result_int( + result, + "docstring_public_documented", + ), + "typing_modules": cast( + "tuple[ModuleTypingCoverage, ...]", + result.get("typing_modules", ()), + ), + "docstring_modules": cast( + "tuple[ModuleDocstringCoverage, ...]", + result.get("docstring_modules", ()), + ), + } + ) + + +def _build_api_surface_result(context: MetricProjectContext) -> MetricResult: + api_rows = tuple( + sorted(context.api_modules, key=lambda item: (item.filepath, item.module)) + ) + return { + "api_surface": ApiSurfaceSnapshot(modules=api_rows) if api_rows else None, + } + + +def _compute_api_surface_family(context: MetricProjectContext) -> MetricResult: + return _memoized_result( + context, + family_name="api_surface", + builder=_build_api_surface_result, + ) + + +def _aggregate_api_surface_family(results: list[MetricResult]) -> MetricAggregate: + result = _first_result(results) + return MetricAggregate(project_fields={"api_surface": result.get("api_surface")}) + + +def _compute_report_only_family(_context: MetricProjectContext) -> MetricResult: + return {} + + +def _aggregate_empty_family(_results: list[MetricResult]) -> MetricAggregate: + return MetricAggregate(project_fields={}) + + +METRIC_FAMILIES: dict[str, MetricFamily] = { + CATEGORY_COMPLEXITY: MetricFamily( + name=CATEGORY_COMPLEXITY, + compute=_compute_complexity_family, + aggregate=_aggregate_complexity_family, + report_section=CATEGORY_COMPLEXITY, + baseline_key="max_complexity", + gate_keys=("complexity_threshold", "new_high_risk_functions"), + skippable_flag="skip_metrics", + ), + CATEGORY_COUPLING: MetricFamily( + name=CATEGORY_COUPLING, + compute=_compute_coupling_family, + aggregate=_aggregate_coupling_family, + report_section=CATEGORY_COUPLING, + baseline_key="max_coupling", + gate_keys=("coupling_threshold", "new_high_coupling_classes"), + skippable_flag="skip_metrics", + ), + CATEGORY_COHESION: MetricFamily( + name=CATEGORY_COHESION, + compute=_compute_cohesion_family, + aggregate=_aggregate_cohesion_family, + report_section=CATEGORY_COHESION, + baseline_key="max_cohesion", + gate_keys=("cohesion_threshold",), + skippable_flag="skip_metrics", + ), + "dependencies": MetricFamily( + name="dependencies", + compute=_compute_dependencies_family, + aggregate=_aggregate_dependencies_family, + report_section="dependencies", + baseline_key="dependency_cycles", + gate_keys=("dependency_cycles", "new_dependency_cycles"), + skippable_flag="skip_metrics", + ), + "dead_code": MetricFamily( + name="dead_code", + compute=_compute_dead_code_family, + aggregate=_aggregate_dead_code_family, + report_section="dead_code", + baseline_key="dead_code_items", + gate_keys=("dead_code_high_confidence", "new_dead_code"), + skippable_flag="skip_metrics", + ), + "health": MetricFamily( + name="health", + compute=_compute_health_family, + aggregate=_aggregate_health_family, + report_section="health", + baseline_key="health_score", + gate_keys=("health_threshold", "health_regression"), + skippable_flag="skip_metrics", + ), + "coverage_adoption": MetricFamily( + name="coverage_adoption", + compute=_compute_coverage_adoption_family, + aggregate=_aggregate_coverage_adoption_family, + report_section="coverage_adoption", + baseline_key="typing_param_permille", + gate_keys=( + "typing_coverage_threshold", + "docstring_coverage_threshold", + "typing_regression", + "docstring_regression", + ), + skippable_flag="skip_metrics", + ), + "api_surface": MetricFamily( + name="api_surface", + compute=_compute_api_surface_family, + aggregate=_aggregate_api_surface_family, + report_section="api_surface", + baseline_key=None, + gate_keys=("api_breaking_changes",), + skippable_flag="skip_metrics", + ), + "overloaded_modules": MetricFamily( + name="overloaded_modules", + compute=_compute_report_only_family, + aggregate=_aggregate_empty_family, + report_section="overloaded_modules", + baseline_key=None, + gate_keys=(), + skippable_flag="skip_metrics", + ), + "coverage_join": MetricFamily( + name="coverage_join", + compute=_compute_report_only_family, + aggregate=_aggregate_empty_family, + report_section="coverage_join", + baseline_key=None, + gate_keys=("coverage_hotspots",), + skippable_flag="skip_metrics", + ), +} diff --git a/codeclone/metrics_baseline.py b/codeclone/metrics_baseline.py deleted file mode 100644 index ed4197e..0000000 --- a/codeclone/metrics_baseline.py +++ /dev/null @@ -1,1317 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import hashlib -import hmac -from datetime import datetime, timezone -from enum import Enum -from json import JSONDecodeError -from pathlib import Path -from typing import TYPE_CHECKING, Any, Final, Literal, cast - -import orjson - -from . import __version__ -from ._json_io import read_json_object as _read_json_object -from ._json_io import write_json_document_atomically as _write_json_document_atomically -from ._schema_validation import validate_top_level_structure -from .baseline import current_python_tag -from .cache_paths import runtime_filepath_from_wire, wire_filepath_from_runtime -from .contracts import BASELINE_SCHEMA_VERSION, METRICS_BASELINE_SCHEMA_VERSION -from .errors import BaselineValidationError -from .metrics.api_surface import compare_api_surfaces -from .models import ( - ApiBreakingChange, - ApiParamSpec, - ApiSurfaceSnapshot, - MetricsDiff, - MetricsSnapshot, - ModuleApiSurface, - ProjectMetrics, - PublicSymbol, -) - -if TYPE_CHECKING: - from collections.abc import Mapping - -METRICS_BASELINE_GENERATOR: Final = "codeclone" -MAX_METRICS_BASELINE_SIZE_BYTES: Final = 5 * 1024 * 1024 - - -class MetricsBaselineStatus(str, Enum): - OK = "ok" - MISSING = "missing" - TOO_LARGE = "too_large" - INVALID_JSON = "invalid_json" - INVALID_TYPE = "invalid_type" - MISSING_FIELDS = "missing_fields" - MISMATCH_SCHEMA_VERSION = "mismatch_schema_version" - MISMATCH_PYTHON_VERSION = "mismatch_python_version" - GENERATOR_MISMATCH = "generator_mismatch" - INTEGRITY_MISSING = "integrity_missing" - INTEGRITY_FAILED = "integrity_failed" - - -METRICS_BASELINE_UNTRUSTED_STATUSES: Final[frozenset[MetricsBaselineStatus]] = ( - frozenset( - { - MetricsBaselineStatus.MISSING, - MetricsBaselineStatus.TOO_LARGE, - MetricsBaselineStatus.INVALID_JSON, - MetricsBaselineStatus.INVALID_TYPE, - MetricsBaselineStatus.MISSING_FIELDS, - MetricsBaselineStatus.MISMATCH_SCHEMA_VERSION, - MetricsBaselineStatus.MISMATCH_PYTHON_VERSION, - MetricsBaselineStatus.GENERATOR_MISMATCH, - MetricsBaselineStatus.INTEGRITY_MISSING, - MetricsBaselineStatus.INTEGRITY_FAILED, - } - ) -) - -_TOP_LEVEL_REQUIRED_KEYS = frozenset({"meta", "metrics"}) -_TOP_LEVEL_ALLOWED_KEYS = _TOP_LEVEL_REQUIRED_KEYS | frozenset( - {"clones", "api_surface"} -) -_META_REQUIRED_KEYS = frozenset( - {"generator", "schema_version", "python_tag", "created_at", "payload_sha256"} -) -_METRICS_REQUIRED_KEYS = frozenset( - { - "max_complexity", - "high_risk_functions", - "max_coupling", - "high_coupling_classes", - "max_cohesion", - "low_cohesion_classes", - "dependency_cycles", - "dependency_max_depth", - "dead_code_items", - "health_score", - "health_grade", - } -) -_METRICS_OPTIONAL_KEYS = frozenset( - { - "typing_param_permille", - "typing_return_permille", - "docstring_permille", - "typing_any_count", - } -) -_METRICS_PAYLOAD_SHA256_KEY = "metrics_payload_sha256" -_API_SURFACE_PAYLOAD_SHA256_KEY = "api_surface_payload_sha256" - - -def coerce_metrics_baseline_status( - raw_status: str | MetricsBaselineStatus | None, -) -> MetricsBaselineStatus: - if isinstance(raw_status, MetricsBaselineStatus): - return raw_status - if isinstance(raw_status, str): - try: - return MetricsBaselineStatus(raw_status) - except ValueError: - return MetricsBaselineStatus.INVALID_TYPE - return MetricsBaselineStatus.INVALID_TYPE - - -def snapshot_from_project_metrics(project_metrics: ProjectMetrics) -> MetricsSnapshot: - return MetricsSnapshot( - max_complexity=int(project_metrics.complexity_max), - high_risk_functions=tuple(sorted(set(project_metrics.high_risk_functions))), - max_coupling=int(project_metrics.coupling_max), - high_coupling_classes=tuple(sorted(set(project_metrics.high_risk_classes))), - max_cohesion=int(project_metrics.cohesion_max), - low_cohesion_classes=tuple(sorted(set(project_metrics.low_cohesion_classes))), - dependency_cycles=tuple( - sorted({tuple(cycle) for cycle in project_metrics.dependency_cycles}) - ), - dependency_max_depth=int(project_metrics.dependency_max_depth), - dead_code_items=tuple( - sorted({item.qualname for item in project_metrics.dead_code}) - ), - health_score=int(project_metrics.health.total), - health_grade=project_metrics.health.grade, - typing_param_permille=_permille( - project_metrics.typing_param_annotated, - project_metrics.typing_param_total, - ), - typing_return_permille=_permille( - project_metrics.typing_return_annotated, - project_metrics.typing_return_total, - ), - docstring_permille=_permille( - project_metrics.docstring_public_documented, - project_metrics.docstring_public_total, - ), - typing_any_count=int(project_metrics.typing_any_count), - ) - - -def _permille(numerator: int, denominator: int) -> int: - if denominator <= 0: - return 0 - return round((1000.0 * float(numerator)) / float(denominator)) - - -def _canonical_json(payload: object) -> str: - return orjson.dumps(payload, option=orjson.OPT_SORT_KEYS).decode("utf-8") - - -def _snapshot_payload( - snapshot: MetricsSnapshot, - *, - include_adoption: bool = True, -) -> dict[str, object]: - payload: dict[str, object] = { - "max_complexity": int(snapshot.max_complexity), - "high_risk_functions": list(snapshot.high_risk_functions), - "max_coupling": int(snapshot.max_coupling), - "high_coupling_classes": list(snapshot.high_coupling_classes), - "max_cohesion": int(snapshot.max_cohesion), - "low_cohesion_classes": list(snapshot.low_cohesion_classes), - "dependency_cycles": [list(cycle) for cycle in snapshot.dependency_cycles], - "dependency_max_depth": int(snapshot.dependency_max_depth), - "dead_code_items": list(snapshot.dead_code_items), - "health_score": int(snapshot.health_score), - "health_grade": snapshot.health_grade, - } - if include_adoption: - payload.update( - { - "typing_param_permille": int(snapshot.typing_param_permille), - "typing_return_permille": int(snapshot.typing_return_permille), - "docstring_permille": int(snapshot.docstring_permille), - "typing_any_count": int(snapshot.typing_any_count), - } - ) - return payload - - -def _compute_payload_sha256( - snapshot: MetricsSnapshot, - *, - include_adoption: bool = True, -) -> str: - canonical = _canonical_json( - _snapshot_payload(snapshot, include_adoption=include_adoption) - ) - return hashlib.sha256(canonical.encode("utf-8")).hexdigest() - - -def _now_utc_z() -> str: - return ( - datetime.now(timezone.utc) - .replace(microsecond=0) - .isoformat() - .replace( - "+00:00", - "Z", - ) - ) - - -class MetricsBaseline: - __slots__ = ( - "api_surface_payload_sha256", - "api_surface_snapshot", - "created_at", - "generator_name", - "generator_version", - "has_coverage_adoption_snapshot", - "is_embedded_in_clone_baseline", - "path", - "payload_sha256", - "python_tag", - "schema_version", - "snapshot", - ) - - def __init__(self, path: str | Path) -> None: - self.path = Path(path) - self.generator_name: str | None = None - self.generator_version: str | None = None - self.schema_version: str | None = None - self.python_tag: str | None = None - self.created_at: str | None = None - self.payload_sha256: str | None = None - self.snapshot: MetricsSnapshot | None = None - self.has_coverage_adoption_snapshot = False - self.api_surface_payload_sha256: str | None = None - self.api_surface_snapshot: ApiSurfaceSnapshot | None = None - self.is_embedded_in_clone_baseline = False - - def load( - self, - *, - max_size_bytes: int | None = None, - preloaded_payload: dict[str, object] | None = None, - ) -> None: - try: - exists = self.path.exists() - except OSError as e: - raise BaselineValidationError( - f"Cannot stat metrics baseline file at {self.path}: {e}", - status=MetricsBaselineStatus.INVALID_TYPE, - ) from e - if not exists: - return - - size_limit = ( - MAX_METRICS_BASELINE_SIZE_BYTES - if max_size_bytes is None - else max_size_bytes - ) - try: - file_size = self.path.stat().st_size - except OSError as e: - raise BaselineValidationError( - f"Cannot stat metrics baseline file at {self.path}: {e}", - status=MetricsBaselineStatus.INVALID_TYPE, - ) from e - if file_size > size_limit: - raise BaselineValidationError( - "Metrics baseline file is too large " - f"({file_size} bytes, max {size_limit} bytes) at {self.path}.", - status=MetricsBaselineStatus.TOO_LARGE, - ) - - if preloaded_payload is None: - payload = _load_json_object(self.path) - else: - if not isinstance(preloaded_payload, dict): - raise BaselineValidationError( - f"Metrics baseline payload must be an object at {self.path}", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - payload = preloaded_payload - _validate_top_level_structure(payload, path=self.path) - self.is_embedded_in_clone_baseline = "clones" in payload - - meta_obj = payload.get("meta") - metrics_obj = payload.get("metrics") - if not isinstance(meta_obj, dict): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {self.path}: " - "'meta' must be object", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - if not isinstance(metrics_obj, dict): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {self.path}: " - "'metrics' must be object", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - - _validate_required_keys(meta_obj, _META_REQUIRED_KEYS, path=self.path) - _validate_required_keys(metrics_obj, _METRICS_REQUIRED_KEYS, path=self.path) - _validate_exact_keys( - metrics_obj, - _METRICS_REQUIRED_KEYS | _METRICS_OPTIONAL_KEYS, - path=self.path, - ) - - generator_name, generator_version = _parse_generator(meta_obj, path=self.path) - schema_version = _require_str(meta_obj, "schema_version", path=self.path) - python_tag = _require_str(meta_obj, "python_tag", path=self.path) - created_at = _require_str(meta_obj, "created_at", path=self.path) - payload_sha256 = _extract_metrics_payload_sha256(meta_obj, path=self.path) - api_surface_payload_sha256 = _extract_optional_payload_sha256( - meta_obj, - key=_API_SURFACE_PAYLOAD_SHA256_KEY, - ) - - self.generator_name = generator_name - self.generator_version = generator_version - self.schema_version = schema_version - self.python_tag = python_tag - self.created_at = created_at - self.payload_sha256 = payload_sha256 - self.api_surface_payload_sha256 = api_surface_payload_sha256 - self.snapshot = _parse_snapshot(metrics_obj, path=self.path) - self.has_coverage_adoption_snapshot = _has_coverage_adoption_snapshot( - metrics_obj, - ) - self.api_surface_snapshot = _parse_api_surface_snapshot( - payload.get("api_surface"), - path=self.path, - root=self.path.parent, - ) - - def save(self) -> None: - if self.snapshot is None: - raise BaselineValidationError( - "Metrics baseline snapshot is missing.", - status=MetricsBaselineStatus.MISSING_FIELDS, - ) - payload = _build_payload( - snapshot=self.snapshot, - schema_version=self.schema_version or METRICS_BASELINE_SCHEMA_VERSION, - python_tag=self.python_tag or current_python_tag(), - generator_name=self.generator_name or METRICS_BASELINE_GENERATOR, - generator_version=self.generator_version or __version__, - created_at=self.created_at or _now_utc_z(), - include_adoption=self.has_coverage_adoption_snapshot, - api_surface_snapshot=self.api_surface_snapshot, - api_surface_root=self.path.parent, - ) - payload_meta = cast("Mapping[str, Any]", payload["meta"]) - payload_metrics_hash = _require_str( - payload_meta, - "payload_sha256", - path=self.path, - ) - payload_api_surface_hash = _optional_require_str( - payload_meta, - _API_SURFACE_PAYLOAD_SHA256_KEY, - path=self.path, - ) - existing: dict[str, Any] | None = None - try: - if self.path.exists(): - loaded = _load_json_object(self.path) - if "clones" in loaded: - existing = loaded - except BaselineValidationError as e: - raise BaselineValidationError( - f"Cannot read existing baseline file at {self.path}: {e}", - status=MetricsBaselineStatus.INVALID_JSON, - ) from e - - if existing is not None: - existing_meta, clones_obj = _require_embedded_clone_baseline_payload( - existing, path=self.path - ) - merged_schema_version = _resolve_embedded_schema_version( - existing_meta, path=self.path - ) - merged_meta = dict(existing_meta) - merged_meta["schema_version"] = merged_schema_version - merged_meta[_METRICS_PAYLOAD_SHA256_KEY] = payload_metrics_hash - if payload_api_surface_hash is None: - merged_meta.pop(_API_SURFACE_PAYLOAD_SHA256_KEY, None) - else: - merged_meta[_API_SURFACE_PAYLOAD_SHA256_KEY] = payload_api_surface_hash - merged_payload: dict[str, object] = { - "meta": merged_meta, - "clones": clones_obj, - "metrics": payload["metrics"], - } - api_surface_payload = payload.get("api_surface") - if api_surface_payload is not None: - merged_payload["api_surface"] = api_surface_payload - self.path.parent.mkdir(parents=True, exist_ok=True) - _atomic_write_json(self.path, merged_payload) - self.is_embedded_in_clone_baseline = True - self.schema_version = merged_schema_version - self.python_tag = _require_str(merged_meta, "python_tag", path=self.path) - self.created_at = _require_str(merged_meta, "created_at", path=self.path) - self.payload_sha256 = _require_str( - merged_meta, _METRICS_PAYLOAD_SHA256_KEY, path=self.path - ) - self.api_surface_payload_sha256 = _optional_require_str( - merged_meta, - _API_SURFACE_PAYLOAD_SHA256_KEY, - path=self.path, - ) - self.generator_name, self.generator_version = _parse_generator( - merged_meta, path=self.path - ) - return - - self.path.parent.mkdir(parents=True, exist_ok=True) - _atomic_write_json(self.path, payload) - self.is_embedded_in_clone_baseline = False - self.schema_version = _require_str( - payload_meta, "schema_version", path=self.path - ) - self.python_tag = _require_str(payload_meta, "python_tag", path=self.path) - self.created_at = _require_str(payload_meta, "created_at", path=self.path) - self.payload_sha256 = payload_metrics_hash - self.api_surface_payload_sha256 = payload_api_surface_hash - - def verify_compatibility(self, *, runtime_python_tag: str) -> None: - if self.generator_name != METRICS_BASELINE_GENERATOR: - raise BaselineValidationError( - "Metrics baseline generator mismatch: expected 'codeclone'.", - status=MetricsBaselineStatus.GENERATOR_MISMATCH, - ) - expected_schema = ( - BASELINE_SCHEMA_VERSION - if self.is_embedded_in_clone_baseline - else METRICS_BASELINE_SCHEMA_VERSION - ) - if not _is_compatible_metrics_schema( - baseline_version=self.schema_version, - expected_version=expected_schema, - ): - raise BaselineValidationError( - "Metrics baseline schema version mismatch: " - f"baseline={self.schema_version}, " - f"expected={expected_schema}.", - status=MetricsBaselineStatus.MISMATCH_SCHEMA_VERSION, - ) - if self.python_tag != runtime_python_tag: - raise BaselineValidationError( - "Metrics baseline python tag mismatch: " - f"baseline={self.python_tag}, current={runtime_python_tag}.", - status=MetricsBaselineStatus.MISMATCH_PYTHON_VERSION, - ) - self.verify_integrity() - - def verify_integrity(self) -> None: - if self.snapshot is None: - raise BaselineValidationError( - "Metrics baseline snapshot is missing.", - status=MetricsBaselineStatus.MISSING_FIELDS, - ) - if not isinstance(self.payload_sha256, str): - raise BaselineValidationError( - "Metrics baseline integrity payload hash is missing.", - status=MetricsBaselineStatus.INTEGRITY_MISSING, - ) - if len(self.payload_sha256) != 64: - raise BaselineValidationError( - "Metrics baseline integrity payload hash is missing.", - status=MetricsBaselineStatus.INTEGRITY_MISSING, - ) - expected = _compute_payload_sha256( - self.snapshot, - include_adoption=self.has_coverage_adoption_snapshot, - ) - if not hmac.compare_digest(self.payload_sha256, expected): - raise BaselineValidationError( - "Metrics baseline integrity check failed: payload_sha256 mismatch.", - status=MetricsBaselineStatus.INTEGRITY_FAILED, - ) - if self.api_surface_snapshot is not None: - if ( - not isinstance(self.api_surface_payload_sha256, str) - or len(self.api_surface_payload_sha256) != 64 - ): - raise BaselineValidationError( - "Metrics baseline API surface integrity payload hash is missing.", - status=MetricsBaselineStatus.INTEGRITY_MISSING, - ) - expected_api = _compute_api_surface_payload_sha256( - self.api_surface_snapshot, - root=self.path.parent, - ) - legacy_absolute_expected_api = _compute_api_surface_payload_sha256( - self.api_surface_snapshot - ) - legacy_expected_api = _compute_legacy_api_surface_payload_sha256( - self.api_surface_snapshot, - root=self.path.parent, - ) - legacy_absolute_qualname_expected_api = ( - _compute_legacy_api_surface_payload_sha256(self.api_surface_snapshot) - ) - if not ( - hmac.compare_digest(self.api_surface_payload_sha256, expected_api) - or hmac.compare_digest( - self.api_surface_payload_sha256, - legacy_absolute_expected_api, - ) - or hmac.compare_digest( - self.api_surface_payload_sha256, - legacy_expected_api, - ) - or hmac.compare_digest( - self.api_surface_payload_sha256, - legacy_absolute_qualname_expected_api, - ) - ): - raise BaselineValidationError( - "Metrics baseline integrity check failed: " - "api_surface payload_sha256 mismatch.", - status=MetricsBaselineStatus.INTEGRITY_FAILED, - ) - - @staticmethod - def from_project_metrics( - *, - project_metrics: ProjectMetrics, - path: str | Path, - schema_version: str | None = None, - python_tag: str | None = None, - generator_version: str | None = None, - include_adoption: bool = True, - include_api_surface: bool = True, - ) -> MetricsBaseline: - baseline = MetricsBaseline(path) - baseline.generator_name = METRICS_BASELINE_GENERATOR - baseline.generator_version = generator_version or __version__ - baseline.schema_version = schema_version or METRICS_BASELINE_SCHEMA_VERSION - baseline.python_tag = python_tag or current_python_tag() - baseline.created_at = _now_utc_z() - baseline.snapshot = snapshot_from_project_metrics(project_metrics) - baseline.payload_sha256 = _compute_payload_sha256( - baseline.snapshot, - include_adoption=include_adoption, - ) - baseline.has_coverage_adoption_snapshot = include_adoption - baseline.api_surface_snapshot = ( - project_metrics.api_surface if include_api_surface else None - ) - baseline.api_surface_payload_sha256 = ( - _compute_api_surface_payload_sha256( - baseline.api_surface_snapshot, - root=baseline.path.parent, - ) - if baseline.api_surface_snapshot is not None - else None - ) - return baseline - - def diff(self, current: ProjectMetrics) -> MetricsDiff: - if self.snapshot is None: - snapshot = MetricsSnapshot( - max_complexity=0, - high_risk_functions=(), - max_coupling=0, - high_coupling_classes=(), - max_cohesion=0, - low_cohesion_classes=(), - dependency_cycles=(), - dependency_max_depth=0, - dead_code_items=(), - health_score=0, - health_grade="F", - typing_param_permille=0, - typing_return_permille=0, - docstring_permille=0, - typing_any_count=0, - ) - else: - snapshot = self.snapshot - - current_snapshot = snapshot_from_project_metrics(current) - - new_high_risk_functions = tuple( - sorted( - set(current_snapshot.high_risk_functions) - - set(snapshot.high_risk_functions) - ) - ) - new_high_coupling_classes = tuple( - sorted( - set(current_snapshot.high_coupling_classes) - - set(snapshot.high_coupling_classes) - ) - ) - new_cycles = tuple( - sorted( - set(current_snapshot.dependency_cycles) - - set(snapshot.dependency_cycles) - ) - ) - new_dead_code = tuple( - sorted( - set(current_snapshot.dead_code_items) - set(snapshot.dead_code_items) - ) - ) - added_api_symbols: tuple[str, ...] - api_breaking_changes: tuple[ApiBreakingChange, ...] - if self.api_surface_snapshot is None: - added_api_symbols = () - api_breaking_changes = () - else: - added_api_symbols, api_breaking_changes = compare_api_surfaces( - baseline=self.api_surface_snapshot, - current=current.api_surface, - strict_types=False, - ) - - return MetricsDiff( - new_high_risk_functions=new_high_risk_functions, - new_high_coupling_classes=new_high_coupling_classes, - new_cycles=new_cycles, - new_dead_code=new_dead_code, - health_delta=current_snapshot.health_score - snapshot.health_score, - typing_param_permille_delta=( - current_snapshot.typing_param_permille - snapshot.typing_param_permille - ), - typing_return_permille_delta=( - current_snapshot.typing_return_permille - - snapshot.typing_return_permille - ), - docstring_permille_delta=( - current_snapshot.docstring_permille - snapshot.docstring_permille - ), - new_api_symbols=added_api_symbols, - new_api_breaking_changes=api_breaking_changes, - ) - - -def _is_compatible_metrics_schema( - *, - baseline_version: str | None, - expected_version: str, -) -> bool: - if baseline_version is None: - return False - baseline_major_minor = _parse_major_minor(baseline_version) - expected_major_minor = _parse_major_minor(expected_version) - if baseline_major_minor is None or expected_major_minor is None: - return baseline_version == expected_version - baseline_major, baseline_minor = baseline_major_minor - expected_major, expected_minor = expected_major_minor - return baseline_major == expected_major and baseline_minor <= expected_minor - - -def _has_coverage_adoption_snapshot(metrics_obj: Mapping[str, object]) -> bool: - return all( - key in metrics_obj - for key in ( - "typing_param_permille", - "typing_return_permille", - "docstring_permille", - ) - ) - - -def _parse_major_minor(version: str) -> tuple[int, int] | None: - parts = version.split(".") - if len(parts) != 2 or not all(part.isdigit() for part in parts): - return None - return int(parts[0]), int(parts[1]) - - -def _atomic_write_json(path: Path, payload: dict[str, object]) -> None: - _write_json_document_atomically( - path, - payload, - indent=True, - trailing_newline=True, - ) - - -def _load_json_object(path: Path) -> dict[str, Any]: - try: - return _read_json_object(path) - except OSError as e: - raise BaselineValidationError( - f"Cannot read metrics baseline file at {path}: {e}", - status=MetricsBaselineStatus.INVALID_JSON, - ) from e - except JSONDecodeError as e: - raise BaselineValidationError( - f"Corrupted metrics baseline file at {path}: {e}", - status=MetricsBaselineStatus.INVALID_JSON, - ) from e - except TypeError: - raise BaselineValidationError( - f"Metrics baseline payload must be an object at {path}", - status=MetricsBaselineStatus.INVALID_TYPE, - ) from None - - -def _validate_top_level_structure(payload: dict[str, Any], *, path: Path) -> None: - validate_top_level_structure( - payload, - path=path, - required_keys=_TOP_LEVEL_REQUIRED_KEYS, - allowed_keys=_TOP_LEVEL_ALLOWED_KEYS, - schema_label="metrics baseline", - missing_status=MetricsBaselineStatus.MISSING_FIELDS, - extra_status=MetricsBaselineStatus.INVALID_TYPE, - ) - - -def _validate_required_keys( - payload: Mapping[str, Any], - required: frozenset[str], - *, - path: Path, -) -> None: - missing = required - set(payload.keys()) - if missing: - raise BaselineValidationError( - "Invalid metrics baseline schema at " - f"{path}: missing required fields: {', '.join(sorted(missing))}", - status=MetricsBaselineStatus.MISSING_FIELDS, - ) - - -def _validate_exact_keys( - payload: Mapping[str, Any], - required: frozenset[str], - *, - path: Path, -) -> None: - extra = set(payload.keys()) - set(required) - if extra: - raise BaselineValidationError( - "Invalid metrics baseline schema at " - f"{path}: unexpected fields: {', '.join(sorted(extra))}", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - - -def _require_str(payload: Mapping[str, Any], key: str, *, path: Path) -> str: - value = payload.get(key) - if isinstance(value, str): - return value - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: {key!r} must be str", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - - -def _extract_metrics_payload_sha256( - payload: Mapping[str, Any], - *, - path: Path, -) -> str: - direct = payload.get(_METRICS_PAYLOAD_SHA256_KEY) - if isinstance(direct, str): - return direct - return _require_str(payload, "payload_sha256", path=path) - - -def _extract_optional_payload_sha256( - payload: Mapping[str, Any], - *, - key: str, -) -> str | None: - value = payload.get(key) - return value if isinstance(value, str) else None - - -def _require_int(payload: Mapping[str, Any], key: str, *, path: Path) -> int: - value = payload.get(key) - if isinstance(value, bool): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: {key!r} must be int", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - if isinstance(value, int): - return value - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: {key!r} must be int", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - - -def _optional_require_str( - payload: Mapping[str, Any], - key: str, - *, - path: Path, -) -> str | None: - value = payload.get(key) - if value is None: - return None - if isinstance(value, str): - return value - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: {key!r} must be str", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - - -def _require_str_list(payload: Mapping[str, Any], key: str, *, path: Path) -> list[str]: - value = payload.get(key) - if not isinstance(value, list): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: {key!r} must be list[str]", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - if not all(isinstance(item, str) for item in value): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: {key!r} must be list[str]", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - return value - - -def _parse_cycles( - payload: Mapping[str, Any], - *, - key: str, - path: Path, -) -> tuple[tuple[str, ...], ...]: - value = payload.get(key) - if not isinstance(value, list): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: {key!r} must be list", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - - cycles: list[tuple[str, ...]] = [] - for cycle in value: - if not isinstance(cycle, list): - raise BaselineValidationError( - "Invalid metrics baseline schema at " - f"{path}: {key!r} cycle item must be list[str]", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - if not all(isinstance(item, str) for item in cycle): - raise BaselineValidationError( - "Invalid metrics baseline schema at " - f"{path}: {key!r} cycle item must be list[str]", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - cycles.append(tuple(cycle)) - return tuple(sorted(set(cycles))) - - -def _parse_generator( - meta: Mapping[str, Any], - *, - path: Path, -) -> tuple[str, str | None]: - generator = meta.get("generator") - if isinstance(generator, str): - version_value = meta.get("generator_version") - if version_value is None: - version_value = meta.get("codeclone_version") - if version_value is None: - return generator, None - if not isinstance(version_value, str): - raise BaselineValidationError( - "Invalid metrics baseline schema at " - f"{path}: generator_version must be str", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - return generator, version_value - - if isinstance(generator, dict): - allowed_keys = {"name", "version"} - extra = set(generator.keys()) - allowed_keys - if extra: - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: " - f"unexpected generator keys: {', '.join(sorted(extra))}", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - name = generator.get("name") - version = generator.get("version") - if not isinstance(name, str): - raise BaselineValidationError( - "Invalid metrics baseline schema at " - f"{path}: generator.name must be str", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - if version is not None and not isinstance(version, str): - raise BaselineValidationError( - "Invalid metrics baseline schema at " - f"{path}: generator.version must be str", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - return name, version if isinstance(version, str) else None - - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: generator must be object or str", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - - -def _require_embedded_clone_baseline_payload( - payload: Mapping[str, Any], - *, - path: Path, -) -> tuple[dict[str, Any], dict[str, Any]]: - meta_obj = payload.get("meta") - clones_obj = payload.get("clones") - if not isinstance(meta_obj, dict): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: 'meta' must be object", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - if not isinstance(clones_obj, dict): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: 'clones' must be object", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - _require_str(meta_obj, "payload_sha256", path=path) - _require_str(meta_obj, "python_tag", path=path) - _require_str(meta_obj, "created_at", path=path) - functions = clones_obj.get("functions") - blocks = clones_obj.get("blocks") - if not isinstance(functions, list) or not all( - isinstance(item, str) for item in functions - ): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: 'clones.functions' must be list[str]", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - if not isinstance(blocks, list) or not all( - isinstance(item, str) for item in blocks - ): - raise BaselineValidationError( - f"Invalid baseline schema at {path}: 'clones.blocks' must be list[str]", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - return meta_obj, clones_obj - - -def _resolve_embedded_schema_version(meta: Mapping[str, Any], *, path: Path) -> str: - raw_version = _require_str(meta, "schema_version", path=path) - parts = raw_version.split(".") - if len(parts) not in {2, 3} or not all(part.isdigit() for part in parts): - raise BaselineValidationError( - "Invalid baseline schema at " - f"{path}: 'schema_version' must be semver string", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - major = int(parts[0]) - if major >= 2: - return raw_version - return BASELINE_SCHEMA_VERSION - - -def _parse_snapshot( - payload: Mapping[str, Any], - *, - path: Path, -) -> MetricsSnapshot: - grade = _require_str(payload, "health_grade", path=path) - if grade not in {"A", "B", "C", "D", "F"}: - raise BaselineValidationError( - "Invalid metrics baseline schema at " - f"{path}: 'health_grade' must be one of A/B/C/D/F", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - - return MetricsSnapshot( - max_complexity=_require_int(payload, "max_complexity", path=path), - high_risk_functions=tuple( - sorted(set(_require_str_list(payload, "high_risk_functions", path=path))) - ), - max_coupling=_require_int(payload, "max_coupling", path=path), - high_coupling_classes=tuple( - sorted(set(_require_str_list(payload, "high_coupling_classes", path=path))) - ), - max_cohesion=_require_int(payload, "max_cohesion", path=path), - low_cohesion_classes=tuple( - sorted(set(_require_str_list(payload, "low_cohesion_classes", path=path))) - ), - dependency_cycles=_parse_cycles(payload, key="dependency_cycles", path=path), - dependency_max_depth=_require_int(payload, "dependency_max_depth", path=path), - dead_code_items=tuple( - sorted(set(_require_str_list(payload, "dead_code_items", path=path))) - ), - health_score=_require_int(payload, "health_score", path=path), - health_grade=cast("Literal['A', 'B', 'C', 'D', 'F']", grade), - typing_param_permille=_optional_int( - payload, - "typing_param_permille", - path=path, - ), - typing_return_permille=_optional_int( - payload, - "typing_return_permille", - path=path, - ), - docstring_permille=_optional_int(payload, "docstring_permille", path=path), - typing_any_count=_optional_int(payload, "typing_any_count", path=path), - ) - - -def _optional_int(payload: Mapping[str, Any], key: str, *, path: Path) -> int: - value = payload.get(key) - if value is None: - return 0 - return _require_int(payload, key, path=path) - - -def _parse_api_surface_snapshot( - payload: object, - *, - path: Path, - root: Path | None = None, -) -> ApiSurfaceSnapshot | None: - if payload is None: - return None - if not isinstance(payload, dict): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: 'api_surface' must be object", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - raw_modules = payload.get("modules", []) - if not isinstance(raw_modules, list): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: " - "'api_surface.modules' must be list", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - modules: list[ModuleApiSurface] = [] - for raw_module in raw_modules: - if not isinstance(raw_module, dict): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: " - "api surface module must be object", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - module = _require_str(raw_module, "module", path=path) - wire_filepath = _require_str(raw_module, "filepath", path=path) - filepath = runtime_filepath_from_wire(wire_filepath, root=root) - all_declared = _require_str_list_or_none(raw_module, "all_declared", path=path) - raw_symbols = raw_module.get("symbols", []) - if not isinstance(raw_symbols, list): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: " - "api surface symbols must be list", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - symbols: list[PublicSymbol] = [] - for raw_symbol in raw_symbols: - if not isinstance(raw_symbol, dict): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: " - "api surface symbol must be object", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - local_name = _optional_require_str(raw_symbol, "local_name", path=path) - legacy_qualname = _optional_require_str(raw_symbol, "qualname", path=path) - if local_name is None and legacy_qualname is None: - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: " - "api surface symbol requires 'local_name' or 'qualname'", - status=MetricsBaselineStatus.MISSING_FIELDS, - ) - if local_name is None: - assert legacy_qualname is not None - qualname = legacy_qualname - else: - qualname = _compose_api_surface_qualname( - module=module, - local_name=local_name, - ) - kind = _require_str(raw_symbol, "kind", path=path) - exported_via = _require_str(raw_symbol, "exported_via", path=path) - params_raw = raw_symbol.get("params", []) - if not isinstance(params_raw, list): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: " - "api surface params must be list", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - params: list[ApiParamSpec] = [] - for raw_param in params_raw: - if not isinstance(raw_param, dict): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: " - "api param must be object", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - name = _require_str(raw_param, "name", path=path) - param_kind = _require_str(raw_param, "kind", path=path) - has_default = raw_param.get("has_default") - annotation_hash = _optional_require_str( - raw_param, - "annotation_hash", - path=path, - ) - if not isinstance(has_default, bool): - raise BaselineValidationError( - f"Invalid metrics baseline schema at {path}: " - "api param 'has_default' must be bool", - status=MetricsBaselineStatus.INVALID_TYPE, - ) - params.append( - ApiParamSpec( - name=name, - kind=cast( - ( - "Literal['pos_only', 'pos_or_kw', " - "'vararg', 'kw_only', 'kwarg']" - ), - param_kind, - ), - has_default=has_default, - annotation_hash=annotation_hash or "", - ) - ) - symbols.append( - PublicSymbol( - qualname=qualname, - kind=cast( - "Literal['function', 'class', 'method', 'constant']", - kind, - ), - start_line=_require_int(raw_symbol, "start_line", path=path), - end_line=_require_int(raw_symbol, "end_line", path=path), - params=tuple(params), - returns_hash=_optional_require_str( - raw_symbol, - "returns_hash", - path=path, - ) - or "", - exported_via=cast("Literal['all', 'name']", exported_via), - ) - ) - modules.append( - ModuleApiSurface( - module=module, - filepath=filepath, - symbols=tuple(sorted(symbols, key=lambda item: item.qualname)), - all_declared=tuple(all_declared) if all_declared is not None else None, - ) - ) - return ApiSurfaceSnapshot( - modules=tuple(sorted(modules, key=lambda item: (item.filepath, item.module))) - ) - - -def _require_str_list_or_none( - payload: Mapping[str, Any], - key: str, - *, - path: Path, -) -> list[str] | None: - value = payload.get(key) - if value is None: - return None - return _require_str_list(payload, key, path=path) - - -def _api_surface_snapshot_payload( - snapshot: ApiSurfaceSnapshot, - *, - root: Path | None = None, - legacy_qualname: bool = False, -) -> dict[str, object]: - return { - "modules": [ - { - "module": module.module, - "filepath": wire_filepath_from_runtime(module.filepath, root=root), - "all_declared": list(module.all_declared or ()), - "symbols": [ - { - ("qualname" if legacy_qualname else "local_name"): ( - symbol.qualname - if legacy_qualname - else _local_name_from_qualname( - module=module.module, - qualname=symbol.qualname, - ) - ), - "kind": symbol.kind, - "start_line": symbol.start_line, - "end_line": symbol.end_line, - "params": [ - { - "name": param.name, - "kind": param.kind, - "has_default": param.has_default, - "annotation_hash": param.annotation_hash, - } - for param in symbol.params - ], - "returns_hash": symbol.returns_hash, - "exported_via": symbol.exported_via, - } - for symbol in sorted( - module.symbols, - key=lambda item: item.qualname, - ) - ], - } - for module in sorted( - snapshot.modules, - key=lambda item: (item.filepath, item.module), - ) - ] - } - - -def _compute_api_surface_payload_sha256( - snapshot: ApiSurfaceSnapshot, - *, - root: Path | None = None, -) -> str: - canonical = _canonical_json(_api_surface_snapshot_payload(snapshot, root=root)) - return hashlib.sha256(canonical.encode("utf-8")).hexdigest() - - -def _compute_legacy_api_surface_payload_sha256( - snapshot: ApiSurfaceSnapshot, - *, - root: Path | None = None, -) -> str: - canonical = _canonical_json( - _api_surface_snapshot_payload(snapshot, root=root, legacy_qualname=True) - ) - return hashlib.sha256(canonical.encode("utf-8")).hexdigest() - - -def _compose_api_surface_qualname(*, module: str, local_name: str) -> str: - return f"{module}:{local_name}" - - -def _local_name_from_qualname(*, module: str, qualname: str) -> str: - prefix = f"{module}:" - if qualname.startswith(prefix): - return qualname[len(prefix) :] - return qualname - - -def _build_payload( - *, - snapshot: MetricsSnapshot, - schema_version: str, - python_tag: str, - generator_name: str, - generator_version: str, - created_at: str, - include_adoption: bool = True, - api_surface_snapshot: ApiSurfaceSnapshot | None = None, - api_surface_root: Path | None = None, -) -> dict[str, Any]: - payload_sha256 = _compute_payload_sha256( - snapshot, - include_adoption=include_adoption, - ) - payload: dict[str, Any] = { - "meta": { - "generator": { - "name": generator_name, - "version": generator_version, - }, - "schema_version": schema_version, - "python_tag": python_tag, - "created_at": created_at, - "payload_sha256": payload_sha256, - }, - "metrics": _snapshot_payload( - snapshot, - include_adoption=include_adoption, - ), - } - if api_surface_snapshot is not None: - payload["meta"][_API_SURFACE_PAYLOAD_SHA256_KEY] = ( - _compute_api_surface_payload_sha256( - api_surface_snapshot, - root=api_surface_root, - ) - ) - payload["api_surface"] = _api_surface_snapshot_payload( - api_surface_snapshot, - root=api_surface_root, - ) - return payload - - -__all__ = [ - "BASELINE_SCHEMA_VERSION", - "MAX_METRICS_BASELINE_SIZE_BYTES", - "METRICS_BASELINE_GENERATOR", - "METRICS_BASELINE_SCHEMA_VERSION", - "METRICS_BASELINE_UNTRUSTED_STATUSES", - "MetricsBaseline", - "MetricsBaselineStatus", - "coerce_metrics_baseline_status", - "current_python_tag", - "snapshot_from_project_metrics", -] diff --git a/codeclone/paths.py b/codeclone/paths/__init__.py similarity index 98% rename from codeclone/paths.py rename to codeclone/paths/__init__.py index d93428f..f12522a 100644 --- a/codeclone/paths.py +++ b/codeclone/paths/__init__.py @@ -8,7 +8,7 @@ from pathlib import Path -from .domain.source_scope import ( +from ..domain.source_scope import ( SOURCE_KIND_FIXTURES, SOURCE_KIND_OTHER, SOURCE_KIND_PRODUCTION, diff --git a/codeclone/pipeline.py b/codeclone/pipeline.py deleted file mode 100644 index 50d2c58..0000000 --- a/codeclone/pipeline.py +++ /dev/null @@ -1,2773 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import inspect -import os -from collections.abc import Mapping -from concurrent.futures import ProcessPoolExecutor, as_completed -from dataclasses import dataclass -from hashlib import sha256 -from pathlib import Path -from typing import TYPE_CHECKING, Literal, cast - -import orjson - -from ._coerce import as_int, as_str -from .cache import ( - ApiParamSpecDict, - Cache, - CacheEntry, - ClassMetricsDict, - DeadCandidateDict, - FileStat, - ModuleDepDict, - PublicSymbolDict, - SegmentReportProjection, - SourceStatsDict, - StructuralFindingGroupDict, - file_stat_signature, -) -from .contracts import ExitCode -from .domain.findings import CATEGORY_COHESION, CATEGORY_COMPLEXITY, CATEGORY_COUPLING -from .domain.quality import CONFIDENCE_HIGH, RISK_HIGH, RISK_LOW -from .extractor import extract_units_and_stats_from_source -from .golden_fixtures import ( - build_suppressed_clone_groups, - split_clone_groups_for_golden_fixtures, -) -from .grouping import build_block_groups, build_groups, build_segment_groups -from .metrics import ( - CoverageJoinParseError, - HealthInputs, - build_coverage_join, - build_dep_graph, - build_overloaded_modules_payload, - compute_health, - find_suppressed_unused, - find_unused, -) -from .models import ( - ApiBreakingChange, - ApiParamSpec, - ApiSurfaceSnapshot, - BlockUnit, - ClassMetrics, - CoverageJoinResult, - DeadCandidate, - DeadItem, - DepGraph, - FileMetrics, - GroupItem, - GroupItemLike, - GroupMap, - MetricsDiff, - ModuleApiSurface, - ModuleDep, - ModuleDocstringCoverage, - ModuleTypingCoverage, - ProjectMetrics, - PublicSymbol, - SegmentUnit, - StructuralFindingGroup, - StructuralFindingOccurrence, - Suggestion, - SuppressedCloneGroup, - Unit, -) -from .normalize import NormalizationConfig -from .paths import is_test_filepath -from .report.blocks import prepare_block_report_groups -from .report.explain import build_block_group_facts -from .report.json_contract import build_report_document -from .report.segments import prepare_segment_report_groups -from .report.serialize import render_json_report_document, render_text_report_document -from .report.suggestions import generate_suggestions -from .scanner import iter_py_files, module_name_from_path -from .structural_findings import build_clone_cohort_structural_findings -from .suppressions import DEAD_CODE_RULE_ID, INLINE_CODECLONE_SUPPRESSION_SOURCE - -if TYPE_CHECKING: - from argparse import Namespace - from collections.abc import Callable, Collection, Mapping, Sequence - -MAX_FILE_SIZE = 10 * 1024 * 1024 -DEFAULT_BATCH_SIZE = 100 -PARALLEL_MIN_FILES_PER_WORKER = 8 -PARALLEL_MIN_FILES_FLOOR = 16 -DEFAULT_RUNTIME_PROCESSES = 4 - -_as_int = as_int -_as_str = as_str - - -@dataclass(frozen=True, slots=True) -class OutputPaths: - html: Path | None = None - json: Path | None = None - text: Path | None = None - md: Path | None = None - sarif: Path | None = None - - -@dataclass(frozen=True, slots=True) -class BootstrapResult: - root: Path - config: NormalizationConfig - args: Namespace - output_paths: OutputPaths - cache_path: Path - - -@dataclass(frozen=True, slots=True) -class DiscoveryResult: - files_found: int - cache_hits: int - files_skipped: int - all_file_paths: tuple[str, ...] - cached_units: tuple[GroupItem, ...] - cached_blocks: tuple[GroupItem, ...] - cached_segments: tuple[GroupItem, ...] - cached_class_metrics: tuple[ClassMetrics, ...] - cached_module_deps: tuple[ModuleDep, ...] - cached_dead_candidates: tuple[DeadCandidate, ...] - cached_referenced_names: frozenset[str] - files_to_process: tuple[str, ...] - skipped_warnings: tuple[str, ...] - cached_referenced_qualnames: frozenset[str] = frozenset() - cached_typing_modules: tuple[ModuleTypingCoverage, ...] = () - cached_docstring_modules: tuple[ModuleDocstringCoverage, ...] = () - cached_api_modules: tuple[ModuleApiSurface, ...] = () - cached_structural_findings: tuple[StructuralFindingGroup, ...] = () - cached_segment_report_projection: SegmentReportProjection | None = None - cached_lines: int = 0 - cached_functions: int = 0 - cached_methods: int = 0 - cached_classes: int = 0 - cached_source_stats_by_file: tuple[tuple[str, int, int, int, int], ...] = () - - -@dataclass(frozen=True, slots=True) -class FileProcessResult: - filepath: str - success: bool - error: str | None = None - units: list[Unit] | None = None - blocks: list[BlockUnit] | None = None - segments: list[SegmentUnit] | None = None - lines: int = 0 - functions: int = 0 - methods: int = 0 - classes: int = 0 - stat: FileStat | None = None - error_kind: str | None = None - file_metrics: FileMetrics | None = None - structural_findings: list[StructuralFindingGroup] | None = None - - -@dataclass(frozen=True, slots=True) -class ProcessingResult: - units: tuple[GroupItem, ...] - blocks: tuple[GroupItem, ...] - segments: tuple[GroupItem, ...] - class_metrics: tuple[ClassMetrics, ...] - module_deps: tuple[ModuleDep, ...] - dead_candidates: tuple[DeadCandidate, ...] - referenced_names: frozenset[str] - files_analyzed: int - files_skipped: int - analyzed_lines: int - analyzed_functions: int - analyzed_methods: int - analyzed_classes: int - failed_files: tuple[str, ...] - source_read_failures: tuple[str, ...] - referenced_qualnames: frozenset[str] = frozenset() - typing_modules: tuple[ModuleTypingCoverage, ...] = () - docstring_modules: tuple[ModuleDocstringCoverage, ...] = () - api_modules: tuple[ModuleApiSurface, ...] = () - structural_findings: tuple[StructuralFindingGroup, ...] = () - source_stats_by_file: tuple[tuple[str, int, int, int, int], ...] = () - - -@dataclass(frozen=True, slots=True) -class AnalysisResult: - func_groups: GroupMap - block_groups: GroupMap - block_groups_report: GroupMap - segment_groups: GroupMap - suppressed_segment_groups: int - block_group_facts: dict[str, dict[str, str]] - func_clones_count: int - block_clones_count: int - segment_clones_count: int - files_analyzed_or_cached: int - project_metrics: ProjectMetrics | None - metrics_payload: dict[str, object] | None - suggestions: tuple[Suggestion, ...] - segment_groups_raw_digest: str - suppressed_clone_groups: tuple[SuppressedCloneGroup, ...] = () - coverage_join: CoverageJoinResult | None = None - suppressed_dead_code_items: int = 0 - structural_findings: tuple[StructuralFindingGroup, ...] = () - - -@dataclass(frozen=True, slots=True) -class GatingResult: - exit_code: int - reasons: tuple[str, ...] - - -@dataclass(frozen=True, slots=True) -class ReportArtifacts: - html: str | None = None - json: str | None = None - text: str | None = None - md: str | None = None - sarif: str | None = None - report_document: dict[str, object] | None = None - - -@dataclass(frozen=True, slots=True) -class MetricGateConfig: - fail_complexity: int - fail_coupling: int - fail_cohesion: int - fail_cycles: bool - fail_dead_code: bool - fail_health: int - fail_on_new_metrics: bool - fail_on_typing_regression: bool = False - fail_on_docstring_regression: bool = False - fail_on_api_break: bool = False - fail_on_untested_hotspots: bool = False - min_typing_coverage: int = -1 - min_docstring_coverage: int = -1 - coverage_min: int = 50 - - -def _as_sorted_str_tuple(value: object) -> tuple[str, ...]: - if not isinstance(value, list): - return () - return tuple(sorted({item for item in value if isinstance(item, str) and item})) - - -def _group_item_sort_key(item: GroupItemLike) -> tuple[str, int, int, str]: - return ( - _as_str(item.get("filepath")), - _as_int(item.get("start_line")), - _as_int(item.get("end_line")), - _as_str(item.get("qualname")), - ) - - -def _segment_projection_item_sort_key(item: GroupItemLike) -> tuple[str, str, int, int]: - return ( - _as_str(item.get("filepath")), - _as_str(item.get("qualname")), - _as_int(item.get("start_line")), - _as_int(item.get("end_line")), - ) - - -def _segment_groups_digest(segment_groups: GroupMap) -> str: - normalized_rows: list[ - tuple[str, tuple[tuple[str, str, int, int, int, str, str], ...]] - ] = [] - for group_key in sorted(segment_groups): - items = sorted(segment_groups[group_key], key=_segment_projection_item_sort_key) - normalized_items: list[tuple[str, str, int, int, int, str, str]] = [ - ( - _as_str(item.get("filepath")), - _as_str(item.get("qualname")), - _as_int(item.get("start_line")), - _as_int(item.get("end_line")), - _as_int(item.get("size")), - _as_str(item.get("segment_hash")), - _as_str(item.get("segment_sig")), - ) - for item in items - ] - normalized_rows.append((group_key, tuple(normalized_items))) - payload = orjson.dumps(tuple(normalized_rows), option=orjson.OPT_SORT_KEYS) - return sha256(payload).hexdigest() - - -def _coerce_segment_report_projection( - value: object, -) -> SegmentReportProjection | None: - if not isinstance(value, dict): - return None - digest = value.get("digest") - suppressed = value.get("suppressed") - groups = value.get("groups") - if ( - not isinstance(digest, str) - or not isinstance(suppressed, int) - or not isinstance(groups, dict) - ): - return None - if not all( - isinstance(group_key, str) and isinstance(items, list) - for group_key, items in groups.items() - ): - return None - return cast("SegmentReportProjection", value) - - -def _module_dep_sort_key(dep: ModuleDep) -> tuple[str, str, str, int]: - return dep.source, dep.target, dep.import_type, dep.line - - -def _class_metric_sort_key(metric: ClassMetrics) -> tuple[str, int, int, str]: - return metric.filepath, metric.start_line, metric.end_line, metric.qualname - - -def _dead_candidate_sort_key(item: DeadCandidate) -> tuple[str, int, int, str]: - return item.filepath, item.start_line, item.end_line, item.qualname - - -def _unit_to_group_item(unit: Unit) -> GroupItem: - return { - "qualname": unit.qualname, - "filepath": unit.filepath, - "start_line": unit.start_line, - "end_line": unit.end_line, - "loc": unit.loc, - "stmt_count": unit.stmt_count, - "fingerprint": unit.fingerprint, - "loc_bucket": unit.loc_bucket, - "cyclomatic_complexity": unit.cyclomatic_complexity, - "nesting_depth": unit.nesting_depth, - "risk": unit.risk, - "raw_hash": unit.raw_hash, - "entry_guard_count": unit.entry_guard_count, - "entry_guard_terminal_profile": unit.entry_guard_terminal_profile, - "entry_guard_has_side_effect_before": unit.entry_guard_has_side_effect_before, - "terminal_kind": unit.terminal_kind, - "try_finally_profile": unit.try_finally_profile, - "side_effect_order_profile": unit.side_effect_order_profile, - } - - -def _block_to_group_item(block: BlockUnit) -> GroupItem: - return { - "block_hash": block.block_hash, - "filepath": block.filepath, - "qualname": block.qualname, - "start_line": block.start_line, - "end_line": block.end_line, - "size": block.size, - } - - -def _segment_to_group_item(segment: SegmentUnit) -> GroupItem: - return { - "segment_hash": segment.segment_hash, - "segment_sig": segment.segment_sig, - "filepath": segment.filepath, - "qualname": segment.qualname, - "start_line": segment.start_line, - "end_line": segment.end_line, - "size": segment.size, - } - - -def _parallel_min_files(processes: int) -> int: - return max(PARALLEL_MIN_FILES_FLOOR, processes * PARALLEL_MIN_FILES_PER_WORKER) - - -def _resolve_process_count(processes: object) -> int: - if processes is None: - return DEFAULT_RUNTIME_PROCESSES - return max(1, _as_int(processes, DEFAULT_RUNTIME_PROCESSES)) - - -def _should_collect_structural_findings(output_paths: OutputPaths) -> bool: - return any( - path is not None - for path in ( - output_paths.html, - output_paths.json, - output_paths.md, - output_paths.sarif, - output_paths.text, - ) - ) - - -def _should_use_parallel(files_count: int, processes: int) -> bool: - if processes <= 1: - return False - return files_count >= _parallel_min_files(processes) - - -def _new_discovery_buffers() -> tuple[ - list[GroupItem], - list[GroupItem], - list[GroupItem], - list[ClassMetrics], - list[ModuleDep], - list[DeadCandidate], - set[str], - set[str], - list[ModuleTypingCoverage], - list[ModuleDocstringCoverage], - list[ModuleApiSurface], - list[str], - list[str], -]: - return [], [], [], [], [], [], set(), set(), [], [], [], [], [] - - -def _decode_cached_structural_finding_group( - group_dict: StructuralFindingGroupDict, - filepath: str, -) -> StructuralFindingGroup: - """Convert a StructuralFindingGroupDict (from cache) to a StructuralFindingGroup.""" - finding_kind = group_dict["finding_kind"] - finding_key = group_dict["finding_key"] - signature = group_dict["signature"] - items = tuple( - StructuralFindingOccurrence( - finding_kind=finding_kind, - finding_key=finding_key, - file_path=filepath, - qualname=item["qualname"], - start=item["start"], - end=item["end"], - signature=signature, - ) - for item in group_dict["items"] - ) - return StructuralFindingGroup( - finding_kind=finding_kind, - finding_key=finding_key, - signature=signature, - items=items, - ) - - -def bootstrap( - *, - args: Namespace, - root: Path, - output_paths: OutputPaths, - cache_path: Path, -) -> BootstrapResult: - return BootstrapResult( - root=root, - config=NormalizationConfig(), - args=args, - output_paths=output_paths, - cache_path=cache_path, - ) - - -def _resolve_optional_runtime_path(value: object, *, root: Path) -> Path | None: - text = str(value).strip() if value is not None else "" - if not text: - return None - candidate = Path(text).expanduser() - resolved = candidate if candidate.is_absolute() else root / candidate - try: - return resolved.resolve() - except OSError: - return resolved.absolute() - - -def _cache_entry_has_metrics(entry: CacheEntry) -> bool: - metric_keys = ( - "class_metrics", - "module_deps", - "dead_candidates", - "referenced_names", - "referenced_qualnames", - "import_names", - "class_names", - ) - return all(key in entry and isinstance(entry.get(key), list) for key in metric_keys) - - -def _cache_entry_has_structural_findings(entry: CacheEntry) -> bool: - return "structural_findings" in entry - - -def _cache_entry_source_stats(entry: CacheEntry) -> tuple[int, int, int, int] | None: - stats_obj = entry.get("source_stats") - if not isinstance(stats_obj, dict): - return None - lines = stats_obj.get("lines") - functions = stats_obj.get("functions") - methods = stats_obj.get("methods") - classes = stats_obj.get("classes") - if not ( - isinstance(lines, int) - and isinstance(functions, int) - and isinstance(methods, int) - and isinstance(classes, int) - and lines >= 0 - and functions >= 0 - and methods >= 0 - and classes >= 0 - ): - return None - return lines, functions, methods, classes - - -def _usable_cached_source_stats( - entry: CacheEntry, - *, - skip_metrics: bool, - collect_structural_findings: bool, -) -> tuple[int, int, int, int] | None: - if not skip_metrics and not _cache_entry_has_metrics(entry): - return None - if collect_structural_findings and not _cache_entry_has_structural_findings(entry): - return None - return _cache_entry_source_stats(entry) - - -def _cache_dict_module_fields( - value: object, -) -> tuple[Mapping[str, object], str, str] | None: - if not isinstance(value, dict): - return None - row = cast("Mapping[str, object]", value) - module = row.get("module") - filepath = row.get("filepath") - if not isinstance(module, str) or not isinstance(filepath, str): - return None - return row, module, filepath - - -def _cache_dict_int_fields( - row: Mapping[str, object], - *keys: str, -) -> tuple[int, ...] | None: - values: list[int] = [] - for key in keys: - value = row.get(key) - if not isinstance(value, int): - return None - values.append(value) - return tuple(values) - - -def _typing_coverage_from_cache_dict( - value: object, -) -> ModuleTypingCoverage | None: - row_info = _cache_dict_module_fields(value) - if row_info is None: - return None - row, module, filepath = row_info - int_fields = _cache_dict_int_fields( - row, - "callable_count", - "params_total", - "params_annotated", - "returns_total", - "returns_annotated", - "any_annotation_count", - ) - if int_fields is None: - return None - ( - callable_count, - params_total, - params_annotated, - returns_total, - returns_annotated, - any_annotation_count, - ) = int_fields - return ModuleTypingCoverage( - module=module, - filepath=filepath, - callable_count=callable_count, - params_total=params_total, - params_annotated=params_annotated, - returns_total=returns_total, - returns_annotated=returns_annotated, - any_annotation_count=any_annotation_count, - ) - - -def _docstring_coverage_from_cache_dict( - value: object, -) -> ModuleDocstringCoverage | None: - row_info = _cache_dict_module_fields(value) - if row_info is None: - return None - row, module, filepath = row_info - totals = _cache_dict_int_fields( - row, - "public_symbol_total", - "public_symbol_documented", - ) - if totals is None: - return None - public_symbol_total, public_symbol_documented = totals - return ModuleDocstringCoverage( - module=module, - filepath=filepath, - public_symbol_total=public_symbol_total, - public_symbol_documented=public_symbol_documented, - ) - - -def _api_param_spec_from_cache_dict(value: ApiParamSpecDict) -> ApiParamSpec | None: - name = value.get("name") - kind = value.get("kind") - has_default = value.get("has_default") - annotation_hash = value.get("annotation_hash", "") - if ( - not isinstance(name, str) - or not isinstance(kind, str) - or not isinstance(has_default, bool) - or not isinstance(annotation_hash, str) - ): - return None - return ApiParamSpec( - name=name, - kind=cast( - "Literal['pos_only', 'pos_or_kw', 'vararg', 'kw_only', 'kwarg']", - kind, - ), - has_default=has_default, - annotation_hash=annotation_hash, - ) - - -def _public_symbol_from_cache_dict( - value: PublicSymbolDict, -) -> PublicSymbol | None: - qualname = value.get("qualname") - kind = value.get("kind") - start_line = value.get("start_line") - end_line = value.get("end_line") - exported_via = value.get("exported_via", "name") - returns_hash = value.get("returns_hash", "") - params_raw = value.get("params", []) - if ( - not isinstance(qualname, str) - or not isinstance(kind, str) - or not isinstance(start_line, int) - or not isinstance(end_line, int) - or not isinstance(exported_via, str) - or not isinstance(returns_hash, str) - or not isinstance(params_raw, list) - ): - return None - params = [] - for param in params_raw: - if not isinstance(param, dict): - return None - parsed = _api_param_spec_from_cache_dict(param) - if parsed is None: - return None - params.append(parsed) - return PublicSymbol( - qualname=qualname, - kind=cast("Literal['function', 'class', 'method', 'constant']", kind), - start_line=start_line, - end_line=end_line, - params=tuple(params), - returns_hash=returns_hash, - exported_via=cast("Literal['all', 'name']", exported_via), - ) - - -def _api_surface_from_cache_dict(value: object) -> ModuleApiSurface | None: - row_info = _cache_dict_module_fields(value) - if row_info is None: - return None - row, module, filepath = row_info - all_declared_raw = row.get("all_declared", []) - symbols_raw = row.get("symbols", []) - if ( - not isinstance(all_declared_raw, list) - or not isinstance(symbols_raw, list) - or not all(isinstance(item, str) for item in all_declared_raw) - ): - return None - symbols: list[PublicSymbol] = [] - for item in symbols_raw: - if not isinstance(item, dict): - return None - parsed = _public_symbol_from_cache_dict(cast("PublicSymbolDict", item)) - if parsed is None: - return None - symbols.append(parsed) - return ModuleApiSurface( - module=module, - filepath=filepath, - all_declared=tuple(sorted(set(all_declared_raw))) or None, - symbols=tuple(sorted(symbols, key=lambda item: item.qualname)), - ) - - -def _load_cached_metrics_extended( - entry: CacheEntry, - *, - filepath: str, -) -> tuple[ - tuple[ClassMetrics, ...], - tuple[ModuleDep, ...], - tuple[DeadCandidate, ...], - frozenset[str], - frozenset[str], - ModuleTypingCoverage | None, - ModuleDocstringCoverage | None, - ModuleApiSurface | None, -]: - class_metrics_rows: list[ClassMetricsDict] = entry.get("class_metrics", []) - class_metrics = tuple( - ClassMetrics( - qualname=row["qualname"], - filepath=row["filepath"], - start_line=row["start_line"], - end_line=row["end_line"], - cbo=row["cbo"], - lcom4=row["lcom4"], - method_count=row["method_count"], - instance_var_count=row["instance_var_count"], - risk_coupling=cast( - "Literal['low', 'medium', 'high']", - row["risk_coupling"], - ), - risk_cohesion=cast( - "Literal['low', 'medium', 'high']", - row["risk_cohesion"], - ), - coupled_classes=_as_sorted_str_tuple(row.get("coupled_classes", [])), - ) - for row in class_metrics_rows - if row.get("qualname") and row.get("filepath") - ) - - module_dep_rows: list[ModuleDepDict] = entry.get("module_deps", []) - module_deps = tuple( - ModuleDep( - source=row["source"], - target=row["target"], - import_type=cast("Literal['import', 'from_import']", row["import_type"]), - line=row["line"], - ) - for row in module_dep_rows - if row.get("source") and row.get("target") - ) - - dead_rows: list[DeadCandidateDict] = entry.get("dead_candidates", []) - dead_candidates = tuple( - DeadCandidate( - qualname=row["qualname"], - local_name=row["local_name"], - filepath=row["filepath"], - start_line=row["start_line"], - end_line=row["end_line"], - kind=cast( - "Literal['function', 'class', 'method', 'import']", - row["kind"], - ), - suppressed_rules=tuple(sorted(set(row.get("suppressed_rules", [])))), - ) - for row in dead_rows - if row.get("qualname") and row.get("local_name") and row.get("filepath") - ) - - referenced_names = ( - frozenset() - if is_test_filepath(filepath) - else frozenset(entry.get("referenced_names", [])) - ) - referenced_qualnames = ( - frozenset() - if is_test_filepath(filepath) - else frozenset(entry.get("referenced_qualnames", [])) - ) - typing_coverage = _typing_coverage_from_cache_dict(entry.get("typing_coverage")) - docstring_coverage = _docstring_coverage_from_cache_dict( - entry.get("docstring_coverage") - ) - api_surface = _api_surface_from_cache_dict(entry.get("api_surface")) - return ( - class_metrics, - module_deps, - dead_candidates, - referenced_names, - referenced_qualnames, - typing_coverage, - docstring_coverage, - api_surface, - ) - - -def discover(*, boot: BootstrapResult, cache: Cache) -> DiscoveryResult: - files_found = 0 - cache_hits = 0 - files_skipped = 0 - collect_structural_findings = _should_collect_structural_findings(boot.output_paths) - cached_segment_projection = _coerce_segment_report_projection( - getattr(cache, "segment_report_projection", None) - ) - - ( - cached_units, - cached_blocks, - cached_segments, - cached_class_metrics, - cached_module_deps, - cached_dead_candidates, - cached_referenced_names, - cached_referenced_qualnames, - cached_typing_modules, - cached_docstring_modules, - cached_api_modules, - files_to_process, - skipped_warnings, - ) = _new_discovery_buffers() - cached_sf: list[StructuralFindingGroup] = [] - cached_source_stats_by_file: list[tuple[str, int, int, int, int]] = [] - cached_lines = 0 - cached_functions = 0 - cached_methods = 0 - cached_classes = 0 - all_file_paths: list[str] = [] - - for filepath in iter_py_files(str(boot.root)): - files_found += 1 - all_file_paths.append(filepath) - try: - stat = file_stat_signature(filepath) - except OSError as exc: - files_skipped += 1 - skipped_warnings.append(f"{filepath}: {exc}") - continue - - cached = cache.get_file_entry(filepath) - if cached and cached.get("stat") == stat: - cached_source_stats = _usable_cached_source_stats( - cached, - skip_metrics=boot.args.skip_metrics, - collect_structural_findings=collect_structural_findings, - ) - if cached_source_stats is None: - files_to_process.append(filepath) - continue - - cache_hits += 1 - lines, functions, methods, classes = cached_source_stats - cached_lines += lines - cached_functions += functions - cached_methods += methods - cached_classes += classes - cached_source_stats_by_file.append( - (filepath, lines, functions, methods, classes) - ) - cached_units.extend(cast("list[GroupItem]", cast(object, cached["units"]))) - cached_blocks.extend( - cast("list[GroupItem]", cast(object, cached["blocks"])) - ) - cached_segments.extend( - cast("list[GroupItem]", cast(object, cached["segments"])) - ) - - if not boot.args.skip_metrics: - ( - class_metrics, - module_deps, - dead_candidates, - referenced_names, - referenced_qualnames, - typing_coverage, - docstring_coverage, - api_surface, - ) = _load_cached_metrics_extended(cached, filepath=filepath) - cached_class_metrics.extend(class_metrics) - cached_module_deps.extend(module_deps) - cached_dead_candidates.extend(dead_candidates) - cached_referenced_names.update(referenced_names) - cached_referenced_qualnames.update(referenced_qualnames) - if typing_coverage is not None: - cached_typing_modules.append(typing_coverage) - if docstring_coverage is not None: - cached_docstring_modules.append(docstring_coverage) - if api_surface is not None: - cached_api_modules.append(api_surface) - if collect_structural_findings: - cached_sf.extend( - _decode_cached_structural_finding_group(group_dict, filepath) - for group_dict in cached.get("structural_findings") or [] - ) - continue - - files_to_process.append(filepath) - - return DiscoveryResult( - files_found=files_found, - cache_hits=cache_hits, - files_skipped=files_skipped, - all_file_paths=tuple(all_file_paths), - cached_units=tuple(sorted(cached_units, key=_group_item_sort_key)), - cached_blocks=tuple(sorted(cached_blocks, key=_group_item_sort_key)), - cached_segments=tuple(sorted(cached_segments, key=_group_item_sort_key)), - cached_class_metrics=tuple( - sorted(cached_class_metrics, key=_class_metric_sort_key) - ), - cached_module_deps=tuple(sorted(cached_module_deps, key=_module_dep_sort_key)), - cached_dead_candidates=tuple( - sorted(cached_dead_candidates, key=_dead_candidate_sort_key) - ), - cached_referenced_names=frozenset(cached_referenced_names), - cached_referenced_qualnames=frozenset(cached_referenced_qualnames), - cached_typing_modules=tuple( - sorted(cached_typing_modules, key=lambda item: (item.filepath, item.module)) - ), - cached_docstring_modules=tuple( - sorted( - cached_docstring_modules, - key=lambda item: (item.filepath, item.module), - ) - ), - cached_api_modules=tuple( - sorted(cached_api_modules, key=lambda item: (item.filepath, item.module)) - ), - files_to_process=tuple(files_to_process), - skipped_warnings=tuple(sorted(skipped_warnings)), - cached_structural_findings=tuple(cached_sf), - cached_segment_report_projection=cached_segment_projection, - cached_lines=cached_lines, - cached_functions=cached_functions, - cached_methods=cached_methods, - cached_classes=cached_classes, - cached_source_stats_by_file=tuple( - sorted(cached_source_stats_by_file, key=lambda row: row[0]) - ), - ) - - -def process_file( - filepath: str, - root: str, - cfg: NormalizationConfig, - min_loc: int, - min_stmt: int, - collect_structural_findings: bool = True, - collect_api_surface: bool = False, - api_include_private_modules: bool = False, - block_min_loc: int = 20, - block_min_stmt: int = 8, - segment_min_loc: int = 20, - segment_min_stmt: int = 10, -) -> FileProcessResult: - try: - try: - stat_result = os.stat(filepath) - if stat_result.st_size > MAX_FILE_SIZE: - return FileProcessResult( - filepath=filepath, - success=False, - error=( - f"File too large: {stat_result.st_size} bytes " - f"(max {MAX_FILE_SIZE})" - ), - error_kind="file_too_large", - ) - except OSError as exc: - return FileProcessResult( - filepath=filepath, - success=False, - error=f"Cannot stat file: {exc}", - error_kind="stat_error", - ) - - stat: FileStat = { - "mtime_ns": stat_result.st_mtime_ns, - "size": stat_result.st_size, - } - - try: - source = Path(filepath).read_text("utf-8") - except UnicodeDecodeError as exc: - return FileProcessResult( - filepath=filepath, - success=False, - error=f"Encoding error: {exc}", - error_kind="source_read_error", - ) - except OSError as exc: - return FileProcessResult( - filepath=filepath, - success=False, - error=f"Cannot read file: {exc}", - error_kind="source_read_error", - ) - - module_name = module_name_from_path(root, filepath) - units, blocks, segments, source_stats, file_metrics, sf = ( - extract_units_and_stats_from_source( - source=source, - filepath=filepath, - module_name=module_name, - cfg=cfg, - min_loc=min_loc, - min_stmt=min_stmt, - block_min_loc=block_min_loc, - block_min_stmt=block_min_stmt, - segment_min_loc=segment_min_loc, - segment_min_stmt=segment_min_stmt, - collect_structural_findings=collect_structural_findings, - collect_api_surface=collect_api_surface, - api_include_private_modules=api_include_private_modules, - ) - ) - - return FileProcessResult( - filepath=filepath, - success=True, - units=units, - blocks=blocks, - segments=segments, - lines=source_stats.lines, - functions=source_stats.functions, - methods=source_stats.methods, - classes=source_stats.classes, - stat=stat, - file_metrics=file_metrics, - structural_findings=sf, - ) - except Exception as exc: # pragma: no cover - defensive shell around workers - return FileProcessResult( - filepath=filepath, - success=False, - error=f"Unexpected error: {type(exc).__name__}: {exc}", - error_kind="unexpected_error", - ) - - -def _invoke_process_file( - filepath: str, - root: str, - cfg: NormalizationConfig, - min_loc: int, - min_stmt: int, - *, - collect_structural_findings: bool, - collect_api_surface: bool, - api_include_private_modules: bool, - block_min_loc: int, - block_min_stmt: int, - segment_min_loc: int, - segment_min_stmt: int, -) -> FileProcessResult: - optional_kwargs: dict[str, object] = { - "collect_structural_findings": collect_structural_findings, - "collect_api_surface": collect_api_surface, - "api_include_private_modules": api_include_private_modules, - "block_min_loc": block_min_loc, - "block_min_stmt": block_min_stmt, - "segment_min_loc": segment_min_loc, - "segment_min_stmt": segment_min_stmt, - } - try: - signature = inspect.signature(process_file) - except (TypeError, ValueError): - supported_kwargs = optional_kwargs - else: - parameters = tuple(signature.parameters.values()) - if any( - parameter.kind == inspect.Parameter.VAR_KEYWORD for parameter in parameters - ): - supported_kwargs = optional_kwargs - else: - supported_names = {parameter.name for parameter in parameters} - supported_kwargs = { - key: value - for key, value in optional_kwargs.items() - if key in supported_names - } - process_callable = cast("Callable[..., FileProcessResult]", process_file) - return process_callable( - filepath, - root, - cfg, - min_loc, - min_stmt, - **supported_kwargs, - ) - - -def process( - *, - boot: BootstrapResult, - discovery: DiscoveryResult, - cache: Cache, - on_advance: Callable[[], None] | None = None, - on_worker_error: Callable[[str], None] | None = None, - on_parallel_fallback: Callable[[Exception], None] | None = None, - batch_size: int = DEFAULT_BATCH_SIZE, -) -> ProcessingResult: - files_to_process = discovery.files_to_process - if not files_to_process: - return ProcessingResult( - units=discovery.cached_units, - blocks=discovery.cached_blocks, - segments=discovery.cached_segments, - class_metrics=discovery.cached_class_metrics, - module_deps=discovery.cached_module_deps, - dead_candidates=discovery.cached_dead_candidates, - referenced_names=discovery.cached_referenced_names, - referenced_qualnames=discovery.cached_referenced_qualnames, - typing_modules=discovery.cached_typing_modules, - docstring_modules=discovery.cached_docstring_modules, - api_modules=discovery.cached_api_modules, - files_analyzed=0, - files_skipped=discovery.files_skipped, - analyzed_lines=0, - analyzed_functions=0, - analyzed_methods=0, - analyzed_classes=0, - failed_files=(), - source_read_failures=(), - structural_findings=discovery.cached_structural_findings, - source_stats_by_file=discovery.cached_source_stats_by_file, - ) - - all_units: list[GroupItem] = list(discovery.cached_units) - all_blocks: list[GroupItem] = list(discovery.cached_blocks) - all_segments: list[GroupItem] = list(discovery.cached_segments) - - all_class_metrics: list[ClassMetrics] = list(discovery.cached_class_metrics) - all_module_deps: list[ModuleDep] = list(discovery.cached_module_deps) - all_dead_candidates: list[DeadCandidate] = list(discovery.cached_dead_candidates) - all_referenced_names: set[str] = set(discovery.cached_referenced_names) - all_referenced_qualnames: set[str] = set(discovery.cached_referenced_qualnames) - all_typing_modules: list[ModuleTypingCoverage] = list( - discovery.cached_typing_modules - ) - all_docstring_modules: list[ModuleDocstringCoverage] = list( - discovery.cached_docstring_modules - ) - all_api_modules: list[ModuleApiSurface] = list(discovery.cached_api_modules) - collect_structural_findings = _should_collect_structural_findings(boot.output_paths) - collect_api_surface = not boot.args.skip_metrics and bool( - getattr(boot.args, "api_surface", False) - ) - api_include_private_modules = bool( - getattr(boot.args, "api_include_private_modules", False) - ) - - files_analyzed = 0 - files_skipped = discovery.files_skipped - analyzed_lines = 0 - analyzed_functions = 0 - analyzed_methods = 0 - analyzed_classes = 0 - - all_structural_findings: list[StructuralFindingGroup] = list( - discovery.cached_structural_findings - ) - source_stats_by_file: dict[str, tuple[int, int, int, int]] = { - filepath: (lines, functions, methods, classes) - for filepath, lines, functions, methods, classes in ( - discovery.cached_source_stats_by_file - ) - } - failed_files: list[str] = [] - source_read_failures: list[str] = [] - root_str = str(boot.root) - # Keep process-count fallback in the core runtime so non-CLI callers such as - # the MCP service do not need to guess or mirror parallelism policy. - processes = _resolve_process_count(boot.args.processes) - min_loc = int(boot.args.min_loc) - min_stmt = int(boot.args.min_stmt) - block_min_loc = int(boot.args.block_min_loc) - block_min_stmt = int(boot.args.block_min_stmt) - segment_min_loc = int(boot.args.segment_min_loc) - segment_min_stmt = int(boot.args.segment_min_stmt) - collect_structural_findings = _should_collect_structural_findings(boot.output_paths) - - def _accept_result(result: FileProcessResult) -> None: - nonlocal files_analyzed - nonlocal files_skipped - nonlocal analyzed_lines - nonlocal analyzed_functions - nonlocal analyzed_methods - nonlocal analyzed_classes - - if result.success and result.stat is not None: - source_stats_payload = SourceStatsDict( - lines=result.lines, - functions=result.functions, - methods=result.methods, - classes=result.classes, - ) - structural_payload = ( - result.structural_findings if collect_structural_findings else None - ) - try: - cache.put_file_entry( - result.filepath, - result.stat, - result.units or [], - result.blocks or [], - result.segments or [], - source_stats=source_stats_payload, - file_metrics=result.file_metrics, - structural_findings=structural_payload, - ) - except TypeError as exc: - if "source_stats" not in str(exc): - raise - cache.put_file_entry( - result.filepath, - result.stat, - result.units or [], - result.blocks or [], - result.segments or [], - file_metrics=result.file_metrics, - structural_findings=structural_payload, - ) - files_analyzed += 1 - analyzed_lines += result.lines - analyzed_functions += result.functions - analyzed_methods += result.methods - analyzed_classes += result.classes - source_stats_by_file[result.filepath] = ( - result.lines, - result.functions, - result.methods, - result.classes, - ) - - if result.units: - all_units.extend(_unit_to_group_item(unit) for unit in result.units) - if result.blocks: - all_blocks.extend( - _block_to_group_item(block) for block in result.blocks - ) - if result.segments: - all_segments.extend( - _segment_to_group_item(segment) for segment in result.segments - ) - if result.structural_findings: - all_structural_findings.extend(result.structural_findings) - - if not boot.args.skip_metrics and result.file_metrics is not None: - all_class_metrics.extend(result.file_metrics.class_metrics) - all_module_deps.extend(result.file_metrics.module_deps) - all_dead_candidates.extend(result.file_metrics.dead_candidates) - all_referenced_names.update(result.file_metrics.referenced_names) - all_referenced_qualnames.update( - result.file_metrics.referenced_qualnames - ) - if result.file_metrics.typing_coverage is not None: - all_typing_modules.append(result.file_metrics.typing_coverage) - if result.file_metrics.docstring_coverage is not None: - all_docstring_modules.append(result.file_metrics.docstring_coverage) - if result.file_metrics.api_surface is not None: - all_api_modules.append(result.file_metrics.api_surface) - return - - files_skipped += 1 - failure = f"{result.filepath}: {result.error}" - failed_files.append(failure) - if result.error_kind == "source_read_error": - source_read_failures.append(failure) - - def _run_sequential(files: Sequence[str]) -> None: - for filepath in files: - _accept_result( - _invoke_process_file( - filepath, - root_str, - boot.config, - min_loc, - min_stmt, - collect_structural_findings=collect_structural_findings, - collect_api_surface=collect_api_surface, - api_include_private_modules=api_include_private_modules, - block_min_loc=block_min_loc, - block_min_stmt=block_min_stmt, - segment_min_loc=segment_min_loc, - segment_min_stmt=segment_min_stmt, - ) - ) - if on_advance is not None: - on_advance() - - if _should_use_parallel(len(files_to_process), processes): - try: - with ProcessPoolExecutor(max_workers=processes) as executor: - for idx in range(0, len(files_to_process), batch_size): - batch = files_to_process[idx : idx + batch_size] - futures = [ - executor.submit( - _invoke_process_file, - filepath, - root_str, - boot.config, - min_loc, - min_stmt, - collect_structural_findings=collect_structural_findings, - collect_api_surface=collect_api_surface, - api_include_private_modules=api_include_private_modules, - block_min_loc=block_min_loc, - block_min_stmt=block_min_stmt, - segment_min_loc=segment_min_loc, - segment_min_stmt=segment_min_stmt, - ) - for filepath in batch - ] - future_to_path = { - id(future): filepath - for future, filepath in zip(futures, batch, strict=True) - } - for future in as_completed(futures): - filepath = future_to_path[id(future)] - try: - _accept_result(future.result()) - except Exception as exc: # pragma: no cover - worker crash - files_skipped += 1 - failed_files.append(f"{filepath}: {exc}") - if on_worker_error is not None: - on_worker_error(str(exc)) - if on_advance is not None: - on_advance() - except (OSError, RuntimeError, PermissionError) as exc: - if on_parallel_fallback is not None: - on_parallel_fallback(exc) - _run_sequential(files_to_process) - else: - _run_sequential(files_to_process) - - return ProcessingResult( - units=tuple(sorted(all_units, key=_group_item_sort_key)), - blocks=tuple(sorted(all_blocks, key=_group_item_sort_key)), - segments=tuple(sorted(all_segments, key=_group_item_sort_key)), - class_metrics=tuple(sorted(all_class_metrics, key=_class_metric_sort_key)), - module_deps=tuple(sorted(all_module_deps, key=_module_dep_sort_key)), - dead_candidates=tuple( - sorted(all_dead_candidates, key=_dead_candidate_sort_key) - ), - referenced_names=frozenset(all_referenced_names), - referenced_qualnames=frozenset(all_referenced_qualnames), - typing_modules=tuple( - sorted(all_typing_modules, key=lambda item: (item.filepath, item.module)) - ), - docstring_modules=tuple( - sorted(all_docstring_modules, key=lambda item: (item.filepath, item.module)) - ), - api_modules=tuple( - sorted(all_api_modules, key=lambda item: (item.filepath, item.module)) - ), - files_analyzed=files_analyzed, - files_skipped=files_skipped, - analyzed_lines=analyzed_lines, - analyzed_functions=analyzed_functions, - analyzed_methods=analyzed_methods, - analyzed_classes=analyzed_classes, - failed_files=tuple(sorted(failed_files)), - source_read_failures=tuple(sorted(source_read_failures)), - structural_findings=tuple(all_structural_findings), - source_stats_by_file=tuple( - (filepath, *stats) - for filepath, stats in sorted(source_stats_by_file.items()) - ), - ) - - -def _module_names_from_units(units: Sequence[GroupItemLike]) -> frozenset[str]: - modules: set[str] = set() - for unit in units: - qualname = _as_str(unit.get("qualname")) - module_name = qualname.split(":", 1)[0] if ":" in qualname else qualname - if module_name: - modules.add(module_name) - return frozenset(sorted(modules)) - - -def compute_project_metrics( - *, - units: Sequence[GroupItemLike], - class_metrics: Sequence[ClassMetrics], - module_deps: Sequence[ModuleDep], - dead_candidates: Sequence[DeadCandidate], - referenced_names: frozenset[str], - referenced_qualnames: frozenset[str], - typing_modules: Sequence[ModuleTypingCoverage] = (), - docstring_modules: Sequence[ModuleDocstringCoverage] = (), - api_modules: Sequence[ModuleApiSurface] = (), - files_found: int, - files_analyzed_or_cached: int, - function_clone_groups: int, - block_clone_groups: int, - skip_dependencies: bool, - skip_dead_code: bool, -) -> tuple[ProjectMetrics, DepGraph, tuple[DeadItem, ...]]: - unit_rows = sorted(units, key=_group_item_sort_key) - complexities = tuple( - max(1, _as_int(row.get("cyclomatic_complexity"), 1)) for row in unit_rows - ) - complexity_max = max(complexities) if complexities else 0 - complexity_avg = ( - float(sum(complexities)) / float(len(complexities)) if complexities else 0.0 - ) - high_risk_functions = tuple( - sorted( - { - _as_str(row.get("qualname")) - for row in unit_rows - if _as_str(row.get("risk")) == RISK_HIGH - } - ) - ) - - classes_sorted = tuple(sorted(class_metrics, key=_class_metric_sort_key)) - coupling_values = tuple(metric.cbo for metric in classes_sorted) - coupling_max = max(coupling_values) if coupling_values else 0 - coupling_avg = ( - float(sum(coupling_values)) / float(len(coupling_values)) - if coupling_values - else 0.0 - ) - high_risk_classes = tuple( - sorted( - { - metric.qualname - for metric in classes_sorted - if metric.risk_coupling == RISK_HIGH - } - ) - ) - - cohesion_values = tuple(metric.lcom4 for metric in classes_sorted) - cohesion_max = max(cohesion_values) if cohesion_values else 0 - cohesion_avg = ( - float(sum(cohesion_values)) / float(len(cohesion_values)) - if cohesion_values - else 0.0 - ) - low_cohesion_classes = tuple( - sorted( - { - metric.qualname - for metric in classes_sorted - if metric.risk_cohesion == RISK_HIGH - } - ) - ) - - dep_graph = DepGraph( - modules=frozenset(), - edges=(), - cycles=(), - max_depth=0, - longest_chains=(), - ) - if not skip_dependencies: - dep_graph = build_dep_graph( - modules=_module_names_from_units(unit_rows), - deps=module_deps, - ) - - dead_items: tuple[DeadItem, ...] = () - if not skip_dead_code: - dead_items = find_unused( - definitions=tuple(dead_candidates), - referenced_names=referenced_names, - referenced_qualnames=referenced_qualnames, - ) - - typing_rows = tuple( - sorted(typing_modules, key=lambda item: (item.filepath, item.module)) - ) - docstring_rows = tuple( - sorted(docstring_modules, key=lambda item: (item.filepath, item.module)) - ) - api_rows = tuple(sorted(api_modules, key=lambda item: (item.filepath, item.module))) - typing_param_total = sum(item.params_total for item in typing_rows) - typing_param_annotated = sum(item.params_annotated for item in typing_rows) - typing_return_total = sum(item.returns_total for item in typing_rows) - typing_return_annotated = sum(item.returns_annotated for item in typing_rows) - typing_any_count = sum(item.any_annotation_count for item in typing_rows) - docstring_public_total = sum(item.public_symbol_total for item in docstring_rows) - docstring_public_documented = sum( - item.public_symbol_documented for item in docstring_rows - ) - - health = compute_health( - HealthInputs( - files_found=files_found, - files_analyzed_or_cached=files_analyzed_or_cached, - function_clone_groups=function_clone_groups, - block_clone_groups=block_clone_groups, - complexity_avg=complexity_avg, - complexity_max=complexity_max, - high_risk_functions=len(high_risk_functions), - coupling_avg=coupling_avg, - coupling_max=coupling_max, - high_risk_classes=len(high_risk_classes), - cohesion_avg=cohesion_avg, - low_cohesion_classes=len(low_cohesion_classes), - dependency_cycles=len(dep_graph.cycles), - dependency_max_depth=dep_graph.max_depth, - dead_code_items=len(dead_items), - ) - ) - - project_metrics = ProjectMetrics( - complexity_avg=complexity_avg, - complexity_max=complexity_max, - high_risk_functions=high_risk_functions, - coupling_avg=coupling_avg, - coupling_max=coupling_max, - high_risk_classes=high_risk_classes, - cohesion_avg=cohesion_avg, - cohesion_max=cohesion_max, - low_cohesion_classes=low_cohesion_classes, - dependency_modules=len(dep_graph.modules), - dependency_edges=len(dep_graph.edges), - dependency_edge_list=dep_graph.edges, - dependency_cycles=dep_graph.cycles, - dependency_max_depth=dep_graph.max_depth, - dependency_longest_chains=dep_graph.longest_chains, - dead_code=dead_items, - health=health, - typing_param_total=typing_param_total, - typing_param_annotated=typing_param_annotated, - typing_return_total=typing_return_total, - typing_return_annotated=typing_return_annotated, - typing_any_count=typing_any_count, - docstring_public_total=docstring_public_total, - docstring_public_documented=docstring_public_documented, - typing_modules=typing_rows, - docstring_modules=docstring_rows, - api_surface=ApiSurfaceSnapshot(modules=api_rows) if api_rows else None, - ) - return project_metrics, dep_graph, dead_items - - -def compute_suggestions( - *, - project_metrics: ProjectMetrics, - units: Sequence[GroupItemLike], - class_metrics: Sequence[ClassMetrics], - func_groups: Mapping[str, Sequence[GroupItemLike]], - block_groups: Mapping[str, Sequence[GroupItemLike]], - segment_groups: Mapping[str, Sequence[GroupItemLike]], - block_group_facts: Mapping[str, Mapping[str, str]] | None = None, - structural_findings: Sequence[StructuralFindingGroup] | None = None, - scan_root: str = "", -) -> tuple[Suggestion, ...]: - return generate_suggestions( - project_metrics=project_metrics, - units=units, - class_metrics=class_metrics, - func_groups=func_groups, - block_groups=block_groups, - segment_groups=segment_groups, - block_group_facts=block_group_facts, - structural_findings=structural_findings, - scan_root=scan_root, - ) - - -def _permille(numerator: int, denominator: int) -> int: - if denominator <= 0: - return 0 - return round((1000.0 * float(numerator)) / float(denominator)) - - -def _coverage_join_summary( - coverage_join: CoverageJoinResult | None, -) -> dict[str, object]: - if coverage_join is None: - return {} - return { - "status": coverage_join.status, - "source": coverage_join.coverage_xml, - "files": coverage_join.files, - "units": len(coverage_join.units), - "measured_units": coverage_join.measured_units, - "overall_executable_lines": coverage_join.overall_executable_lines, - "overall_covered_lines": coverage_join.overall_covered_lines, - "overall_permille": _permille( - coverage_join.overall_covered_lines, - coverage_join.overall_executable_lines, - ), - "missing_from_report_units": sum( - 1 - for fact in coverage_join.units - if fact.coverage_status == "missing_from_report" - ), - "coverage_hotspots": coverage_join.coverage_hotspots, - "scope_gap_hotspots": coverage_join.scope_gap_hotspots, - "hotspot_threshold_percent": coverage_join.hotspot_threshold_percent, - "invalid_reason": coverage_join.invalid_reason, - } - - -def _coverage_join_rows( - coverage_join: CoverageJoinResult | None, -) -> list[dict[str, object]]: - if coverage_join is None or coverage_join.status != "ok": - return [] - return sorted( - ( - { - "qualname": fact.qualname, - "filepath": fact.filepath, - "start_line": fact.start_line, - "end_line": fact.end_line, - "cyclomatic_complexity": fact.cyclomatic_complexity, - "risk": fact.risk, - "executable_lines": fact.executable_lines, - "covered_lines": fact.covered_lines, - "coverage_permille": fact.coverage_permille, - "coverage_status": fact.coverage_status, - "coverage_hotspot": ( - fact.risk in {"medium", "high"} - and fact.coverage_status == "measured" - and (fact.coverage_permille / 10.0) - < float(coverage_join.hotspot_threshold_percent) - ), - "scope_gap_hotspot": ( - fact.risk in {"medium", "high"} - and fact.coverage_status == "missing_from_report" - ), - "coverage_review_item": ( - ( - fact.risk in {"medium", "high"} - and fact.coverage_status == "measured" - and (fact.coverage_permille / 10.0) - < float(coverage_join.hotspot_threshold_percent) - ) - or ( - fact.risk in {"medium", "high"} - and fact.coverage_status == "missing_from_report" - ) - ), - } - for fact in coverage_join.units - ), - key=lambda item: ( - 0 if bool(item.get("coverage_hotspot")) else 1, - 0 if bool(item.get("scope_gap_hotspot")) else 1, - {"high": 0, "medium": 1, "low": 2}.get(_as_str(item.get("risk")), 3), - _as_int(item.get("coverage_permille"), 0), - -_as_int(item.get("cyclomatic_complexity"), 0), - _as_str(item.get("filepath")), - _as_int(item.get("start_line")), - _as_str(item.get("qualname")), - ), - ) - - -def _coverage_adoption_rows( - project_metrics: ProjectMetrics, -) -> list[dict[str, object]]: - docstring_by_module = { - (item.filepath, item.module): item for item in project_metrics.docstring_modules - } - rows: list[dict[str, object]] = [] - seen_keys: set[tuple[str, str]] = set() - for typing_item in project_metrics.typing_modules: - key = (typing_item.filepath, typing_item.module) - seen_keys.add(key) - docstring_item = docstring_by_module.get(key) - doc_total = docstring_item.public_symbol_total if docstring_item else 0 - doc_documented = ( - docstring_item.public_symbol_documented if docstring_item else 0 - ) - rows.append( - { - "module": typing_item.module, - "filepath": typing_item.filepath, - "callable_count": typing_item.callable_count, - "params_total": typing_item.params_total, - "params_annotated": typing_item.params_annotated, - "param_permille": _permille( - typing_item.params_annotated, - typing_item.params_total, - ), - "returns_total": typing_item.returns_total, - "returns_annotated": typing_item.returns_annotated, - "return_permille": _permille( - typing_item.returns_annotated, - typing_item.returns_total, - ), - "any_annotation_count": typing_item.any_annotation_count, - "public_symbol_total": doc_total, - "public_symbol_documented": doc_documented, - "docstring_permille": _permille(doc_documented, doc_total), - } - ) - for docstring_item in project_metrics.docstring_modules: - key = (docstring_item.filepath, docstring_item.module) - if key in seen_keys: - continue - rows.append( - { - "module": docstring_item.module, - "filepath": docstring_item.filepath, - "callable_count": 0, - "params_total": 0, - "params_annotated": 0, - "param_permille": 0, - "returns_total": 0, - "returns_annotated": 0, - "return_permille": 0, - "any_annotation_count": 0, - "public_symbol_total": docstring_item.public_symbol_total, - "public_symbol_documented": docstring_item.public_symbol_documented, - "docstring_permille": _permille( - docstring_item.public_symbol_documented, - docstring_item.public_symbol_total, - ), - } - ) - return sorted( - rows, - key=lambda item: ( - _as_int(item.get("param_permille")), - _as_int(item.get("docstring_permille")), - _as_int(item.get("return_permille")), - _as_str(item.get("module")), - ), - ) - - -def _api_surface_summary( - api_surface: ApiSurfaceSnapshot | None, -) -> dict[str, object]: - modules = api_surface.modules if api_surface is not None else () - return { - "enabled": api_surface is not None, - "modules": len(modules), - "public_symbols": sum(len(module.symbols) for module in modules), - "added": 0, - "breaking": 0, - "strict_types": False, - } - - -def _api_surface_rows( - api_surface: ApiSurfaceSnapshot | None, -) -> list[dict[str, object]]: - if api_surface is None: - return [] - rows: list[dict[str, object]] = [] - for module in api_surface.modules: - rows.extend( - { - "record_kind": "symbol", - "module": module.module, - "filepath": module.filepath, - "qualname": symbol.qualname, - "start_line": symbol.start_line, - "end_line": symbol.end_line, - "symbol_kind": symbol.kind, - "exported_via": symbol.exported_via, - "params_total": len(symbol.params), - "params": [ - { - "name": param.name, - "kind": param.kind, - "has_default": param.has_default, - "annotated": bool(param.annotation_hash), - } - for param in symbol.params - ], - "returns_annotated": bool(symbol.returns_hash), - } - for symbol in module.symbols - ) - return sorted( - rows, - key=lambda item: ( - _as_str(item.get("filepath")), - _as_int(item.get("start_line")), - _as_int(item.get("end_line")), - _as_str(item.get("qualname")), - _as_str(item.get("record_kind")), - ), - ) - - -def _breaking_api_surface_rows( - changes: Sequence[object], -) -> list[dict[str, object]]: - rows: list[dict[str, object]] = [] - for change in changes: - if not isinstance(change, ApiBreakingChange): - continue - module_name, _, _local_name = change.qualname.partition(":") - rows.append( - { - "record_kind": "breaking_change", - "module": module_name, - "filepath": change.filepath, - "qualname": change.qualname, - "start_line": change.start_line, - "end_line": change.end_line, - "symbol_kind": change.symbol_kind, - "change_kind": change.change_kind, - "detail": change.detail, - } - ) - return sorted( - rows, - key=lambda item: ( - _as_str(item.get("filepath")), - _as_int(item.get("start_line")), - _as_int(item.get("end_line")), - _as_str(item.get("qualname")), - _as_str(item.get("change_kind")), - ), - ) - - -def _enrich_metrics_report_payload( - *, - metrics_payload: Mapping[str, object], - metrics_diff: MetricsDiff | None, - coverage_adoption_diff_available: bool, - api_surface_diff_available: bool, -) -> dict[str, object]: - enriched = { - key: (dict(value) if isinstance(value, Mapping) else value) - for key, value in metrics_payload.items() - } - coverage_adoption = dict( - cast("Mapping[str, object]", enriched.get("coverage_adoption", {})) - ) - coverage_summary = dict( - cast("Mapping[str, object]", coverage_adoption.get("summary", {})) - ) - if coverage_summary: - coverage_summary["baseline_diff_available"] = coverage_adoption_diff_available - coverage_summary["param_delta"] = ( - int(metrics_diff.typing_param_permille_delta) - if metrics_diff is not None and coverage_adoption_diff_available - else 0 - ) - coverage_summary["return_delta"] = ( - int(metrics_diff.typing_return_permille_delta) - if metrics_diff is not None and coverage_adoption_diff_available - else 0 - ) - coverage_summary["docstring_delta"] = ( - int(metrics_diff.docstring_permille_delta) - if metrics_diff is not None and coverage_adoption_diff_available - else 0 - ) - coverage_adoption["summary"] = coverage_summary - enriched["coverage_adoption"] = coverage_adoption - - api_surface = dict(cast("Mapping[str, object]", enriched.get("api_surface", {}))) - api_summary = dict(cast("Mapping[str, object]", api_surface.get("summary", {}))) - api_items = list(cast("Sequence[object]", api_surface.get("items", ()))) - if api_summary: - api_summary["baseline_diff_available"] = api_surface_diff_available - api_summary["added"] = ( - len(metrics_diff.new_api_symbols) - if metrics_diff is not None and api_surface_diff_available - else 0 - ) - api_summary["breaking"] = ( - len(metrics_diff.new_api_breaking_changes) - if metrics_diff is not None and api_surface_diff_available - else 0 - ) - api_surface["summary"] = api_summary - if ( - metrics_diff is not None - and api_surface_diff_available - and metrics_diff.new_api_breaking_changes - ): - api_items.extend( - _breaking_api_surface_rows(metrics_diff.new_api_breaking_changes) - ) - api_surface["items"] = api_items - if api_surface: - enriched["api_surface"] = api_surface - return enriched - - -def build_metrics_report_payload( - *, - scan_root: str = "", - project_metrics: ProjectMetrics, - coverage_join: CoverageJoinResult | None = None, - units: Sequence[GroupItemLike], - class_metrics: Sequence[ClassMetrics], - module_deps: Sequence[ModuleDep] = (), - source_stats_by_file: Sequence[tuple[str, int, int, int, int]] = (), - suppressed_dead_code: Sequence[DeadItem] = (), -) -> dict[str, object]: - sorted_units = sorted( - units, - key=lambda item: ( - _as_int(item.get("cyclomatic_complexity")), - _as_int(item.get("nesting_depth")), - _as_str(item.get("qualname")), - ), - reverse=True, - ) - complexity_rows = [ - { - "qualname": _as_str(item.get("qualname")), - "filepath": _as_str(item.get("filepath")), - "start_line": _as_int(item.get("start_line")), - "end_line": _as_int(item.get("end_line")), - "cyclomatic_complexity": _as_int(item.get("cyclomatic_complexity"), 1), - "nesting_depth": _as_int(item.get("nesting_depth")), - "risk": _as_str(item.get("risk"), RISK_LOW), - } - for item in sorted_units - ] - classes_sorted = sorted( - class_metrics, - key=lambda item: (item.cbo, item.lcom4, item.qualname), - reverse=True, - ) - coupling_rows = [ - { - "qualname": metric.qualname, - "filepath": metric.filepath, - "start_line": metric.start_line, - "end_line": metric.end_line, - "cbo": metric.cbo, - "risk": metric.risk_coupling, - "coupled_classes": list(metric.coupled_classes), - } - for metric in classes_sorted - ] - cohesion_rows = [ - { - "qualname": metric.qualname, - "filepath": metric.filepath, - "start_line": metric.start_line, - "end_line": metric.end_line, - "lcom4": metric.lcom4, - "risk": metric.risk_cohesion, - "method_count": metric.method_count, - "instance_var_count": metric.instance_var_count, - } - for metric in classes_sorted - ] - active_dead_items = tuple(project_metrics.dead_code) - suppressed_dead_items = tuple(suppressed_dead_code) - coverage_adoption_rows = _coverage_adoption_rows(project_metrics) - api_surface_summary = _api_surface_summary(project_metrics.api_surface) - api_surface_items = _api_surface_rows(project_metrics.api_surface) - coverage_join_summary = _coverage_join_summary(coverage_join) - coverage_join_items = _coverage_join_rows(coverage_join) - - def _serialize_dead_item( - item: DeadItem, - *, - suppressed: bool = False, - ) -> dict[str, object]: - payload: dict[str, object] = { - "qualname": item.qualname, - "filepath": item.filepath, - "start_line": item.start_line, - "end_line": item.end_line, - "kind": item.kind, - "confidence": item.confidence, - } - if suppressed: - payload["suppressed_by"] = [ - { - "rule": DEAD_CODE_RULE_ID, - "source": INLINE_CODECLONE_SUPPRESSION_SOURCE, - } - ] - return payload - - payload = { - CATEGORY_COMPLEXITY: { - "functions": complexity_rows, - "summary": { - "total": len(complexity_rows), - "average": round(project_metrics.complexity_avg, 2), - "max": project_metrics.complexity_max, - "high_risk": len(project_metrics.high_risk_functions), - }, - }, - CATEGORY_COUPLING: { - "classes": coupling_rows, - "summary": { - "total": len(coupling_rows), - "average": round(project_metrics.coupling_avg, 2), - "max": project_metrics.coupling_max, - "high_risk": len(project_metrics.high_risk_classes), - }, - }, - CATEGORY_COHESION: { - "classes": cohesion_rows, - "summary": { - "total": len(cohesion_rows), - "average": round(project_metrics.cohesion_avg, 2), - "max": project_metrics.cohesion_max, - "low_cohesion": len(project_metrics.low_cohesion_classes), - }, - }, - "dependencies": { - "modules": project_metrics.dependency_modules, - "edges": project_metrics.dependency_edges, - "max_depth": project_metrics.dependency_max_depth, - "cycles": [list(cycle) for cycle in project_metrics.dependency_cycles], - "longest_chains": [ - list(chain) for chain in project_metrics.dependency_longest_chains - ], - "edge_list": [ - { - "source": edge.source, - "target": edge.target, - "import_type": edge.import_type, - "line": edge.line, - } - for edge in project_metrics.dependency_edge_list - ], - }, - "dead_code": { - "items": [_serialize_dead_item(item) for item in active_dead_items], - "suppressed_items": [ - _serialize_dead_item(item, suppressed=True) - for item in suppressed_dead_items - ], - "summary": { - "total": len(active_dead_items), - "critical": sum( - 1 - for item in active_dead_items - if item.confidence == CONFIDENCE_HIGH - ), - "high_confidence": sum( - 1 - for item in active_dead_items - if item.confidence == CONFIDENCE_HIGH - ), - "suppressed": len(suppressed_dead_items), - }, - }, - "health": { - "score": project_metrics.health.total, - "grade": project_metrics.health.grade, - "dimensions": dict(project_metrics.health.dimensions), - }, - "coverage_adoption": { - "summary": { - "modules": len(coverage_adoption_rows), - "params_total": project_metrics.typing_param_total, - "params_annotated": project_metrics.typing_param_annotated, - "param_permille": _permille( - project_metrics.typing_param_annotated, - project_metrics.typing_param_total, - ), - "returns_total": project_metrics.typing_return_total, - "returns_annotated": project_metrics.typing_return_annotated, - "return_permille": _permille( - project_metrics.typing_return_annotated, - project_metrics.typing_return_total, - ), - "public_symbol_total": project_metrics.docstring_public_total, - "public_symbol_documented": project_metrics.docstring_public_documented, - "docstring_permille": _permille( - project_metrics.docstring_public_documented, - project_metrics.docstring_public_total, - ), - "typing_any_count": project_metrics.typing_any_count, - }, - "items": coverage_adoption_rows, - }, - "api_surface": { - "summary": dict(api_surface_summary), - "items": api_surface_items, - }, - "overloaded_modules": build_overloaded_modules_payload( - scan_root=scan_root, - source_stats_by_file=source_stats_by_file, - units=units, - class_metrics=class_metrics, - module_deps=module_deps, - ), - } - if coverage_join is not None: - payload["coverage_join"] = { - "summary": dict(coverage_join_summary), - "items": coverage_join_items, - } - return payload - - -def analyze( - *, - boot: BootstrapResult, - discovery: DiscoveryResult, - processing: ProcessingResult, -) -> AnalysisResult: - golden_fixture_paths = tuple( - str(pattern).strip() - for pattern in getattr(boot.args, "golden_fixture_paths", ()) - if str(pattern).strip() - ) - - func_split = split_clone_groups_for_golden_fixtures( - groups=build_groups(processing.units), - kind="function", - golden_fixture_paths=golden_fixture_paths, - scan_root=str(boot.root), - ) - block_split = split_clone_groups_for_golden_fixtures( - groups=build_block_groups(processing.blocks), - kind="block", - golden_fixture_paths=golden_fixture_paths, - scan_root=str(boot.root), - ) - segment_split = split_clone_groups_for_golden_fixtures( - groups=build_segment_groups(processing.segments), - kind="segment", - golden_fixture_paths=golden_fixture_paths, - scan_root=str(boot.root), - ) - - func_groups = func_split.active_groups - block_groups = block_split.active_groups - segment_groups_raw = segment_split.active_groups - segment_groups_raw_digest = _segment_groups_digest(segment_groups_raw) - cached_projection = discovery.cached_segment_report_projection - if ( - cached_projection is not None - and cached_projection.get("digest") == segment_groups_raw_digest - ): - projection_groups = cached_projection.get("groups", {}) - segment_groups = { - group_key: [ - { - "segment_hash": str(item["segment_hash"]), - "segment_sig": str(item["segment_sig"]), - "filepath": str(item["filepath"]), - "qualname": str(item["qualname"]), - "start_line": int(item["start_line"]), - "end_line": int(item["end_line"]), - "size": int(item["size"]), - } - for item in projection_groups[group_key] - ] - for group_key in sorted(projection_groups) - } - suppressed_segment_groups = int(cached_projection.get("suppressed", 0)) - else: - segment_groups, suppressed_segment_groups = prepare_segment_report_groups( - segment_groups_raw - ) - - block_groups_report = prepare_block_report_groups(block_groups) - suppressed_block_groups_report = prepare_block_report_groups( - block_split.suppressed_groups - ) - if segment_split.suppressed_groups: - suppressed_segment_groups_report, _ = prepare_segment_report_groups( - segment_split.suppressed_groups - ) - else: - suppressed_segment_groups_report = {} - suppressed_clone_groups = ( - *build_suppressed_clone_groups( - kind="function", - groups=func_split.suppressed_groups, - matched_patterns=func_split.matched_patterns, - ), - *build_suppressed_clone_groups( - kind="block", - groups=suppressed_block_groups_report, - matched_patterns=block_split.matched_patterns, - ), - *build_suppressed_clone_groups( - kind="segment", - groups=suppressed_segment_groups_report, - matched_patterns=segment_split.matched_patterns, - ), - ) - block_group_facts = build_block_group_facts( - { - **block_groups_report, - **suppressed_block_groups_report, - } - ) - - func_clones_count = len(func_groups) - block_clones_count = len(block_groups) - segment_clones_count = len(segment_groups) - files_analyzed_or_cached = processing.files_analyzed + discovery.cache_hits - - project_metrics: ProjectMetrics | None = None - metrics_payload: dict[str, object] | None = None - suggestions: tuple[Suggestion, ...] = () - suppressed_dead_items: tuple[DeadItem, ...] = () - coverage_join: CoverageJoinResult | None = None - cohort_structural_findings: tuple[StructuralFindingGroup, ...] = () - if _should_collect_structural_findings(boot.output_paths): - cohort_structural_findings = build_clone_cohort_structural_findings( - func_groups=func_groups, - ) - combined_structural_findings = ( - *processing.structural_findings, - *cohort_structural_findings, - ) - - if not boot.args.skip_metrics: - project_metrics, _, _ = compute_project_metrics( - units=processing.units, - class_metrics=processing.class_metrics, - module_deps=processing.module_deps, - dead_candidates=processing.dead_candidates, - referenced_names=processing.referenced_names, - referenced_qualnames=processing.referenced_qualnames, - typing_modules=processing.typing_modules, - docstring_modules=processing.docstring_modules, - api_modules=processing.api_modules, - files_found=discovery.files_found, - files_analyzed_or_cached=files_analyzed_or_cached, - function_clone_groups=func_clones_count, - block_clone_groups=block_clones_count, - skip_dependencies=boot.args.skip_dependencies, - skip_dead_code=boot.args.skip_dead_code, - ) - if not boot.args.skip_dead_code: - suppressed_dead_items = find_suppressed_unused( - definitions=tuple(processing.dead_candidates), - referenced_names=processing.referenced_names, - referenced_qualnames=processing.referenced_qualnames, - ) - suggestions = compute_suggestions( - project_metrics=project_metrics, - units=processing.units, - class_metrics=processing.class_metrics, - func_groups=func_groups, - block_groups=block_groups_report, - segment_groups=segment_groups, - block_group_facts=block_group_facts, - structural_findings=combined_structural_findings, - scan_root=str(boot.root), - ) - coverage_xml_path = _resolve_optional_runtime_path( - getattr(boot.args, "coverage_xml", None), - root=boot.root, - ) - if coverage_xml_path is not None: - try: - coverage_join = build_coverage_join( - coverage_xml=coverage_xml_path, - root_path=boot.root, - units=processing.units, - hotspot_threshold_percent=int( - getattr(boot.args, "coverage_min", 50) - ), - ) - except CoverageJoinParseError as exc: - coverage_join = CoverageJoinResult( - coverage_xml=str(coverage_xml_path), - status="invalid", - hotspot_threshold_percent=int( - getattr(boot.args, "coverage_min", 50) - ), - invalid_reason=str(exc), - ) - metrics_payload = build_metrics_report_payload( - scan_root=str(boot.root), - project_metrics=project_metrics, - coverage_join=coverage_join, - units=processing.units, - class_metrics=processing.class_metrics, - module_deps=processing.module_deps, - source_stats_by_file=processing.source_stats_by_file, - suppressed_dead_code=suppressed_dead_items, - ) - - return AnalysisResult( - func_groups=func_groups, - block_groups=block_groups, - block_groups_report=block_groups_report, - segment_groups=segment_groups, - suppressed_clone_groups=tuple(suppressed_clone_groups), - suppressed_segment_groups=suppressed_segment_groups, - block_group_facts=block_group_facts, - func_clones_count=func_clones_count, - block_clones_count=block_clones_count, - segment_clones_count=segment_clones_count, - files_analyzed_or_cached=files_analyzed_or_cached, - project_metrics=project_metrics, - metrics_payload=metrics_payload, - suggestions=suggestions, - segment_groups_raw_digest=segment_groups_raw_digest, - coverage_join=coverage_join, - suppressed_dead_code_items=len(suppressed_dead_items), - structural_findings=combined_structural_findings, - ) - - -def _load_markdown_report_renderer() -> Callable[..., str]: - from .report.markdown import to_markdown_report - - return to_markdown_report - - -def _load_sarif_report_renderer() -> Callable[..., str]: - from .report.sarif import to_sarif_report - - return to_sarif_report - - -def report( - *, - boot: BootstrapResult, - discovery: DiscoveryResult, - processing: ProcessingResult, - analysis: AnalysisResult, - report_meta: Mapping[str, object], - new_func: Collection[str], - new_block: Collection[str], - html_builder: Callable[..., str] | None = None, - metrics_diff: object | None = None, - coverage_adoption_diff_available: bool = False, - api_surface_diff_available: bool = False, - include_report_document: bool = False, -) -> ReportArtifacts: - contents: dict[str, str | None] = { - "html": None, - "json": None, - "md": None, - "sarif": None, - "text": None, - } - - sf = analysis.structural_findings if analysis.structural_findings else None - report_inventory = { - "files": { - "total_found": discovery.files_found, - "analyzed": processing.files_analyzed, - "cached": discovery.cache_hits, - "skipped": processing.files_skipped, - "source_io_skipped": len(processing.source_read_failures), - }, - "code": { - "parsed_lines": processing.analyzed_lines + discovery.cached_lines, - "functions": processing.analyzed_functions + discovery.cached_functions, - "methods": processing.analyzed_methods + discovery.cached_methods, - "classes": processing.analyzed_classes + discovery.cached_classes, - }, - "file_list": list(discovery.all_file_paths), - } - report_document: dict[str, object] | None = None - needs_report_document = ( - include_report_document - or boot.output_paths.html is not None - or any( - path is not None - for path in ( - boot.output_paths.json, - boot.output_paths.md, - boot.output_paths.sarif, - boot.output_paths.text, - ) - ) - ) - - if needs_report_document: - metrics_for_report = ( - _enrich_metrics_report_payload( - metrics_payload=analysis.metrics_payload, - metrics_diff=cast("MetricsDiff | None", metrics_diff), - coverage_adoption_diff_available=coverage_adoption_diff_available, - api_surface_diff_available=api_surface_diff_available, - ) - if analysis.metrics_payload is not None - else None - ) - report_document = build_report_document( - func_groups=analysis.func_groups, - block_groups=analysis.block_groups_report, - segment_groups=analysis.segment_groups, - suppressed_clone_groups=analysis.suppressed_clone_groups, - meta=report_meta, - inventory=report_inventory, - block_facts=analysis.block_group_facts, - new_function_group_keys=new_func, - new_block_group_keys=new_block, - new_segment_group_keys=set(analysis.segment_groups.keys()), - metrics=metrics_for_report, - suggestions=analysis.suggestions, - structural_findings=sf, - ) - - if boot.output_paths.html and html_builder is not None: - metrics_for_html = ( - _enrich_metrics_report_payload( - metrics_payload=analysis.metrics_payload, - metrics_diff=cast("MetricsDiff | None", metrics_diff), - coverage_adoption_diff_available=coverage_adoption_diff_available, - api_surface_diff_available=api_surface_diff_available, - ) - if analysis.metrics_payload is not None - else None - ) - contents["html"] = html_builder( - func_groups=analysis.func_groups, - block_groups=analysis.block_groups_report, - segment_groups=analysis.segment_groups, - block_group_facts=analysis.block_group_facts, - new_function_group_keys=new_func, - new_block_group_keys=new_block, - report_meta=report_meta, - metrics=metrics_for_html, - suggestions=analysis.suggestions, - structural_findings=sf, - report_document=report_document, - metrics_diff=metrics_diff, - title="CodeClone Report", - context_lines=3, - max_snippet_lines=220, - ) - - if any( - path is not None - for path in ( - boot.output_paths.json, - boot.output_paths.md, - boot.output_paths.sarif, - boot.output_paths.text, - ) - ): - assert report_document is not None - - if boot.output_paths.json and report_document is not None: - contents["json"] = render_json_report_document(report_document) - - def _render_projection_artifact( - renderer: Callable[..., str], - ) -> str: - assert report_document is not None - return renderer( - report_document=report_document, - meta=report_meta, - inventory=report_inventory, - func_groups=analysis.func_groups, - block_groups=analysis.block_groups_report, - segment_groups=analysis.segment_groups, - block_facts=analysis.block_group_facts, - new_function_group_keys=new_func, - new_block_group_keys=new_block, - new_segment_group_keys=set(analysis.segment_groups.keys()), - metrics=analysis.metrics_payload, - suggestions=analysis.suggestions, - structural_findings=sf, - ) - - for key, output_path, loader in ( - ("md", boot.output_paths.md, _load_markdown_report_renderer), - ("sarif", boot.output_paths.sarif, _load_sarif_report_renderer), - ): - if output_path and report_document is not None: - contents[key] = _render_projection_artifact(loader()) - - if boot.output_paths.text and report_document is not None: - contents["text"] = render_text_report_document(report_document) - - return ReportArtifacts( - html=contents["html"], - json=contents["json"], - md=contents["md"], - sarif=contents["sarif"], - text=contents["text"], - report_document=report_document, - ) - - -def metric_gate_reasons( - *, - project_metrics: ProjectMetrics, - coverage_join: CoverageJoinResult | None, - metrics_diff: MetricsDiff | None, - config: MetricGateConfig, -) -> tuple[str, ...]: - reasons: list[str] = [] - _append_threshold_metric_reasons( - reasons=reasons, - project_metrics=project_metrics, - config=config, - ) - _append_new_metric_diff_reasons( - reasons=reasons, - metrics_diff=metrics_diff, - config=config, - ) - _append_adoption_metric_reasons( - reasons=reasons, - metrics_diff=metrics_diff, - project_metrics=project_metrics, - config=config, - ) - _append_coverage_join_reasons( - reasons=reasons, - coverage_join=coverage_join, - config=config, - ) - return tuple(reasons) - - -def _append_threshold_metric_reasons( - *, - reasons: list[str], - project_metrics: ProjectMetrics, - config: MetricGateConfig, -) -> None: - threshold_rows = ( - ( - config.fail_complexity >= 0 - and project_metrics.complexity_max > config.fail_complexity, - "Complexity threshold exceeded: " - f"max CC={project_metrics.complexity_max}, " - f"threshold={config.fail_complexity}.", - ), - ( - config.fail_coupling >= 0 - and project_metrics.coupling_max > config.fail_coupling, - "Coupling threshold exceeded: " - f"max CBO={project_metrics.coupling_max}, " - f"threshold={config.fail_coupling}.", - ), - ( - config.fail_cohesion >= 0 - and project_metrics.cohesion_max > config.fail_cohesion, - "Cohesion threshold exceeded: " - f"max LCOM4={project_metrics.cohesion_max}, " - f"threshold={config.fail_cohesion}.", - ), - ( - config.fail_health >= 0 - and project_metrics.health.total < config.fail_health, - "Health score below threshold: " - f"score={project_metrics.health.total}, threshold={config.fail_health}.", - ), - ) - reasons.extend(message for triggered, message in threshold_rows if triggered) - if config.fail_cycles and project_metrics.dependency_cycles: - reasons.append( - "Dependency cycles detected: " - f"{len(project_metrics.dependency_cycles)} cycle(s)." - ) - high_conf_dead = _high_confidence_dead_code_count(project_metrics.dead_code) - if config.fail_dead_code and high_conf_dead > 0: - reasons.append( - f"Dead code detected (high confidence): {high_conf_dead} item(s)." - ) - - -def _append_new_metric_diff_reasons( - *, - reasons: list[str], - metrics_diff: MetricsDiff | None, - config: MetricGateConfig, -) -> None: - if not config.fail_on_new_metrics or metrics_diff is None: - return - if metrics_diff.new_high_risk_functions: - reasons.append( - "New high-risk functions vs metrics baseline: " - f"{len(metrics_diff.new_high_risk_functions)}." - ) - if metrics_diff.new_high_coupling_classes: - reasons.append( - "New high-coupling classes vs metrics baseline: " - f"{len(metrics_diff.new_high_coupling_classes)}." - ) - if metrics_diff.new_cycles: - reasons.append( - "New dependency cycles vs metrics baseline: " - f"{len(metrics_diff.new_cycles)}." - ) - if metrics_diff.new_dead_code: - reasons.append( - "New dead code items vs metrics baseline: " - f"{len(metrics_diff.new_dead_code)}." - ) - if metrics_diff.health_delta < 0: - reasons.append( - "Health score regressed vs metrics baseline: " - f"delta={metrics_diff.health_delta}." - ) - - -def _append_metric_gate_reason( - *, - reasons: list[str], - enabled: bool, - triggered: bool, - message: str, -) -> None: - if enabled and triggered: - reasons.append(message) - - -def _append_adoption_metric_reasons( - *, - reasons: list[str], - metrics_diff: MetricsDiff | None, - project_metrics: ProjectMetrics, - config: MetricGateConfig, -) -> None: - typing_percent = ( - _permille( - project_metrics.typing_param_annotated, - project_metrics.typing_param_total, - ) - / 10.0 - ) - docstring_percent = ( - _permille( - project_metrics.docstring_public_documented, - project_metrics.docstring_public_total, - ) - / 10.0 - ) - if config.min_typing_coverage >= 0 and typing_percent < float( - config.min_typing_coverage - ): - reasons.append( - "Typing coverage below threshold: " - f"coverage={typing_percent:.1f}%, threshold={config.min_typing_coverage}%." - ) - if config.min_docstring_coverage >= 0 and docstring_percent < float( - config.min_docstring_coverage - ): - reasons.append( - "Docstring coverage below threshold: " - "coverage=" - f"{docstring_percent:.1f}%, " - f"threshold={config.min_docstring_coverage}%." - ) - if metrics_diff is None: - return - if config.fail_on_typing_regression: - typing_delta = int(getattr(metrics_diff, "typing_param_permille_delta", 0)) - return_delta = int(getattr(metrics_diff, "typing_return_permille_delta", 0)) - if typing_delta < 0 or return_delta < 0: - reasons.append( - "Typing coverage regressed vs metrics baseline: " - f"params_delta={typing_delta}, returns_delta={return_delta}." - ) - docstring_delta = int(getattr(metrics_diff, "docstring_permille_delta", 0)) - _append_metric_gate_reason( - reasons=reasons, - enabled=config.fail_on_docstring_regression, - triggered=docstring_delta < 0, - message=( - "Docstring coverage regressed vs metrics baseline: " - f"delta={docstring_delta}." - ), - ) - api_breaking = tuple( - cast( - "Sequence[object]", - getattr(metrics_diff, "new_api_breaking_changes", ()), - ) - ) - _append_metric_gate_reason( - reasons=reasons, - enabled=config.fail_on_api_break, - triggered=bool(api_breaking), - message=( - f"Public API breaking changes vs metrics baseline: {len(api_breaking)}." - ), - ) - - -def _append_coverage_join_reasons( - *, - reasons: list[str], - coverage_join: CoverageJoinResult | None, - config: MetricGateConfig, -) -> None: - if not config.fail_on_untested_hotspots or coverage_join is None: - return - if coverage_join.status != "ok": - return - if coverage_join.coverage_hotspots > 0: - reasons.append( - "Coverage hotspots detected: " - f"hotspots={coverage_join.coverage_hotspots}, " - f"threshold={config.coverage_min}%." - ) - - -def _high_confidence_dead_code_count(items: Sequence[DeadItem]) -> int: - return sum(1 for item in items if item.confidence == "high") - - -def gate( - *, - boot: BootstrapResult, - analysis: AnalysisResult, - new_func: Collection[str], - new_block: Collection[str], - metrics_diff: MetricsDiff | None, -) -> GatingResult: - reasons: list[str] = [] - - if analysis.project_metrics is not None: - metric_reasons = metric_gate_reasons( - project_metrics=analysis.project_metrics, - coverage_join=analysis.coverage_join, - metrics_diff=metrics_diff, - config=MetricGateConfig( - fail_complexity=boot.args.fail_complexity, - fail_coupling=boot.args.fail_coupling, - fail_cohesion=boot.args.fail_cohesion, - fail_cycles=boot.args.fail_cycles, - fail_dead_code=boot.args.fail_dead_code, - fail_health=boot.args.fail_health, - fail_on_new_metrics=boot.args.fail_on_new_metrics, - fail_on_typing_regression=bool( - getattr(boot.args, "fail_on_typing_regression", False) - ), - fail_on_docstring_regression=bool( - getattr(boot.args, "fail_on_docstring_regression", False) - ), - fail_on_api_break=bool(getattr(boot.args, "fail_on_api_break", False)), - fail_on_untested_hotspots=bool( - getattr(boot.args, "fail_on_untested_hotspots", False) - ), - min_typing_coverage=int(getattr(boot.args, "min_typing_coverage", -1)), - min_docstring_coverage=int( - getattr(boot.args, "min_docstring_coverage", -1) - ), - coverage_min=int(getattr(boot.args, "coverage_min", 50)), - ), - ) - reasons.extend(f"metric:{reason}" for reason in metric_reasons) - - if boot.args.fail_on_new and (new_func or new_block): - reasons.append("clone:new") - - total_clone_groups = analysis.func_clones_count + analysis.block_clones_count - if 0 <= boot.args.fail_threshold < total_clone_groups: - reasons.append( - f"clone:threshold:{total_clone_groups}:{boot.args.fail_threshold}" - ) - - if reasons: - return GatingResult( - exit_code=int(ExitCode.GATING_FAILURE), - reasons=tuple(reasons), - ) - - return GatingResult(exit_code=int(ExitCode.SUCCESS), reasons=()) diff --git a/codeclone/qualnames.py b/codeclone/qualnames/__init__.py similarity index 100% rename from codeclone/qualnames.py rename to codeclone/qualnames/__init__.py diff --git a/codeclone/report/__init__.py b/codeclone/report/__init__.py index e5869f7..4b3685e 100644 --- a/codeclone/report/__init__.py +++ b/codeclone/report/__init__.py @@ -6,8 +6,13 @@ from __future__ import annotations -from ..grouping import build_block_groups, build_groups, build_segment_groups +from ..findings.clones.grouping import ( + build_block_groups, + build_groups, + build_segment_groups, +) from .blocks import prepare_block_report_groups +from .document import build_report_document from .explain import build_block_group_facts from .markdown import render_markdown_report_document, to_markdown_report from .sarif import render_sarif_report_document, to_sarif_report @@ -29,6 +34,7 @@ "build_block_group_facts", "build_block_groups", "build_groups", + "build_report_document", "build_segment_groups", "classify_clone_type", "generate_suggestions", diff --git a/codeclone/report/derived.py b/codeclone/report/derived.py index 6873a08..362e491 100644 --- a/codeclone/report/derived.py +++ b/codeclone/report/derived.py @@ -9,7 +9,6 @@ from collections import Counter from typing import TYPE_CHECKING, cast -from .._coerce import as_int as _as_int from ..domain.source_scope import ( IMPACT_SCOPE_MIXED, IMPACT_SCOPE_NON_RUNTIME, @@ -31,6 +30,7 @@ from ..paths import ( relative_repo_path as _relative_repo_path, ) +from ..utils.coerce import as_int as _as_int if TYPE_CHECKING: from collections.abc import Iterable, Mapping, Sequence diff --git a/codeclone/report/document/__init__.py b/codeclone/report/document/__init__.py new file mode 100644 index 0000000..f56610a --- /dev/null +++ b/codeclone/report/document/__init__.py @@ -0,0 +1,64 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ...findings.ids import ( + clone_group_id, + dead_code_group_id, + design_group_id, + structural_group_id, +) +from ._common import ( + _collect_paths_from_metrics, + _collect_report_file_list, + _contract_path, + _count_file_lines, + _count_file_lines_for_path, + _is_absolute_path, + _normalize_block_machine_facts, + _normalize_nested_string_rows, + _parse_ratio_percent, + _source_scope_from_filepaths, + _source_scope_from_locations, +) +from ._design_groups import _build_design_groups +from ._findings_groups import ( + _clone_group_assessment, + _csv_values, + _structural_group_assessment, +) +from .builder import build_report_document +from .derived import _combined_impact_scope, _suggestion_finding_id +from .findings import _findings_summary +from .inventory import _derive_inventory_code_counts + +__all__ = [ + "_build_design_groups", + "_clone_group_assessment", + "_collect_paths_from_metrics", + "_collect_report_file_list", + "_combined_impact_scope", + "_contract_path", + "_count_file_lines", + "_count_file_lines_for_path", + "_csv_values", + "_derive_inventory_code_counts", + "_findings_summary", + "_is_absolute_path", + "_normalize_block_machine_facts", + "_normalize_nested_string_rows", + "_parse_ratio_percent", + "_source_scope_from_filepaths", + "_source_scope_from_locations", + "_structural_group_assessment", + "_suggestion_finding_id", + "build_report_document", + "clone_group_id", + "dead_code_group_id", + "design_group_id", + "structural_group_id", +] diff --git a/codeclone/report/document/_common.py b/codeclone/report/document/_common.py new file mode 100644 index 0000000..c82f40f --- /dev/null +++ b/codeclone/report/document/_common.py @@ -0,0 +1,407 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections import Counter +from collections.abc import Collection, Iterable, Mapping, Sequence +from typing import TYPE_CHECKING + +from ...contracts import ( + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, +) +from ...domain.findings import ( + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CLONE_NOVELTY_KNOWN, + CLONE_NOVELTY_NEW, + FAMILY_DEAD_CODE, +) +from ...domain.quality import ( + EFFORT_WEIGHT, + SEVERITY_RANK, +) +from ...findings.structural.detectors import normalize_structural_findings +from ...utils.coerce import as_int as _as_int +from ...utils.coerce import as_mapping as _as_mapping +from ...utils.coerce import as_sequence as _as_sequence +from ..derived import ( + normalized_source_kind as _normalized_source_kind, +) +from ..derived import ( + relative_report_path, + report_location_from_group_item, +) +from ..derived import ( + source_scope_from_counts as _report_source_scope_from_counts, +) +from ..derived import ( + source_scope_from_locations as _report_source_scope_from_locations, +) + +if TYPE_CHECKING: + from ...models import ( + GroupMapLike, + SourceKind, + StructuralFindingGroup, + SuppressedCloneGroup, + ) + +_OVERLOADED_MODULES_FAMILY = "overloaded_modules" +_COVERAGE_ADOPTION_FAMILY = "coverage_adoption" +_API_SURFACE_FAMILY = "api_surface" +_COVERAGE_JOIN_FAMILY = "coverage_join" + + +def _optional_str(value: object) -> str | None: + if value is None: + return None + text = str(value).strip() + return text or None + + +def _coerced_nonnegative_threshold(value: object, *, default: int) -> int: + threshold = _as_int(value, default) + return threshold if threshold >= 0 else default + + +def _design_findings_thresholds_payload( + raw_meta: Mapping[str, object] | None, +) -> dict[str, object]: + meta = dict(raw_meta or {}) + return { + "design_findings": { + CATEGORY_COMPLEXITY: { + "metric": "cyclomatic_complexity", + "operator": ">", + "value": _coerced_nonnegative_threshold( + meta.get("design_complexity_threshold"), + default=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + ), + }, + CATEGORY_COUPLING: { + "metric": "cbo", + "operator": ">", + "value": _coerced_nonnegative_threshold( + meta.get("design_coupling_threshold"), + default=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + ), + }, + CATEGORY_COHESION: { + "metric": "lcom4", + "operator": ">=", + "value": _coerced_nonnegative_threshold( + meta.get("design_cohesion_threshold"), + default=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + ), + }, + } + } + + +def _analysis_profile_payload( + raw_meta: Mapping[str, object] | None, +) -> dict[str, int] | None: + meta = dict(raw_meta or {}) + nested = _as_mapping(meta.get("analysis_profile")) + if nested: + meta = dict(nested) + keys = ( + "min_loc", + "min_stmt", + "block_min_loc", + "block_min_stmt", + "segment_min_loc", + "segment_min_stmt", + ) + if any(key not in meta for key in keys): + return None + payload = {key: _as_int(meta.get(key), -1) for key in keys} + if any(value < 0 for value in payload.values()): + return None + return payload + + +def _normalize_path(value: str) -> str: + return value.replace("\\", "/").strip() + + +def _is_absolute_path(value: str) -> bool: + normalized = _normalize_path(value) + if not normalized: + return False + if normalized.startswith("/"): + return True + return len(normalized) > 2 and normalized[1] == ":" and normalized[2] == "/" + + +def _contract_path( + value: object, + *, + scan_root: str, +) -> tuple[str | None, str | None, str | None]: + path_text = _optional_str(value) + if path_text is None: + return None, None, None + normalized_path = _normalize_path(path_text) + relative_path = relative_report_path(normalized_path, scan_root=scan_root) + if relative_path and relative_path != normalized_path: + return relative_path, "in_root", normalized_path + if _is_absolute_path(normalized_path): + return normalized_path.rsplit("/", maxsplit=1)[-1], "external", normalized_path + return normalized_path, "relative", None + + +def _contract_report_location_path(location_path: str, *, scan_root: str) -> str: + contract_path, _scope, _absolute = _contract_path( + location_path, + scan_root=scan_root, + ) + return contract_path or "" + + +def _priority( + severity: str, + effort: str, +) -> float: + severity_rank = SEVERITY_RANK.get(severity, 1) + effort_rank = EFFORT_WEIGHT.get(effort, 1) + return float(severity_rank) / float(effort_rank) + + +def _clone_novelty( + *, + group_key: str, + baseline_trusted: bool, + new_keys: Collection[str] | None, +) -> str: + if not baseline_trusted: + return CLONE_NOVELTY_NEW + if new_keys is None: + return CLONE_NOVELTY_NEW + return CLONE_NOVELTY_NEW if group_key in new_keys else CLONE_NOVELTY_KNOWN + + +def _item_sort_key(item: Mapping[str, object]) -> tuple[str, int, int, str]: + return ( + str(item.get("relative_path", "")), + _as_int(item.get("start_line")), + _as_int(item.get("end_line")), + str(item.get("qualname", "")), + ) + + +def _parse_bool_text(value: object) -> bool: + text = str(value).strip().lower() + return text in {"1", "true", "yes"} + + +def _parse_ratio_percent(value: object) -> float | None: + text = str(value).strip() + if not text: + return None + if text.endswith("%"): + try: + return float(text[:-1]) / 100.0 + except ValueError: + return None + try: + numeric = float(text) + except ValueError: + return None + return numeric if numeric <= 1.0 else numeric / 100.0 + + +def _normalize_block_machine_facts( + *, + group_key: str, + group_arity: int, + block_facts: Mapping[str, str], +) -> tuple[dict[str, object], dict[str, str]]: + facts: dict[str, object] = { + "group_key": group_key, + "group_arity": group_arity, + } + display_facts: dict[str, str] = {} + for key in sorted(block_facts): + value = str(block_facts[key]) + match key: + case "group_arity": + facts[key] = _as_int(value) + case "block_size" | "consecutive_asserts" | "instance_peer_count": + facts[key] = _as_int(value) + case "merged_regions": + facts[key] = _parse_bool_text(value) + case "assert_ratio": + ratio = _parse_ratio_percent(value) + if ratio is not None: + facts[key] = ratio + display_facts[key] = value + case ( + "match_rule" | "pattern" | "signature_kind" | "hint" | "hint_confidence" + ): + facts[key] = value + case _: + display_facts[key] = value + return facts, display_facts + + +def _source_scope_from_filepaths( + filepaths: Iterable[str], + *, + scan_root: str, +) -> dict[str, object]: + counts: Counter[SourceKind] = Counter() + for filepath in filepaths: + location = report_location_from_group_item( + {"filepath": filepath, "start_line": 0, "end_line": 0, "qualname": ""}, + scan_root=scan_root, + ) + counts[location.source_kind] += 1 + return _source_scope_from_counts(counts) + + +def _source_scope_from_counts( + counts: Mapping[SourceKind, int], +) -> dict[str, object]: + return _report_source_scope_from_counts(counts) + + +def _source_scope_from_locations( + locations: Sequence[Mapping[str, object]], +) -> dict[str, object]: + normalized_locations = [ + {"source_kind": _normalized_source_kind(location.get("source_kind"))} + for location in locations + ] + return _report_source_scope_from_locations(normalized_locations) + + +def _collect_paths_from_metrics(metrics: Mapping[str, object]) -> set[str]: + paths: set[str] = set() + complexity = _as_mapping(metrics.get(CATEGORY_COMPLEXITY)) + for item in _as_sequence(complexity.get("functions")): + item_map = _as_mapping(item) + filepath = _optional_str(item_map.get("filepath")) + if filepath is not None: + paths.add(filepath) + for family_name in (CATEGORY_COUPLING, CATEGORY_COHESION): + family = _as_mapping(metrics.get(family_name)) + for item in _as_sequence(family.get("classes")): + item_map = _as_mapping(item) + filepath = _optional_str(item_map.get("filepath")) + if filepath is not None: + paths.add(filepath) + dead_code = _as_mapping(metrics.get(FAMILY_DEAD_CODE)) + for item in _as_sequence(dead_code.get("items")): + item_map = _as_mapping(item) + filepath = _optional_str(item_map.get("filepath")) + if filepath is not None: + paths.add(filepath) + for item in _as_sequence(dead_code.get("suppressed_items")): + item_map = _as_mapping(item) + filepath = _optional_str(item_map.get("filepath")) + if filepath is not None: + paths.add(filepath) + overloaded_modules = _as_mapping(metrics.get(_OVERLOADED_MODULES_FAMILY)) + for item in _as_sequence(overloaded_modules.get("items")): + item_map = _as_mapping(item) + filepath = _optional_str(item_map.get("filepath")) + if filepath is not None: + paths.add(filepath) + coverage_adoption = _as_mapping(metrics.get(_COVERAGE_ADOPTION_FAMILY)) + for item in _as_sequence(coverage_adoption.get("items")): + item_map = _as_mapping(item) + filepath = _optional_str(item_map.get("filepath")) + if filepath is not None: + paths.add(filepath) + api_surface = _as_mapping(metrics.get(_API_SURFACE_FAMILY)) + for item in _as_sequence(api_surface.get("items")): + item_map = _as_mapping(item) + filepath = _optional_str(item_map.get("filepath")) + if filepath is not None: + paths.add(filepath) + coverage_join = _as_mapping(metrics.get(_COVERAGE_JOIN_FAMILY)) + for item in _as_sequence(coverage_join.get("items")): + item_map = _as_mapping(item) + filepath = _optional_str(item_map.get("filepath")) + if filepath is not None: + paths.add(filepath) + return paths + + +def _collect_report_file_list( + *, + inventory: Mapping[str, object] | None, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + segment_groups: GroupMapLike, + suppressed_clone_groups: Sequence[SuppressedCloneGroup] | None = None, + metrics: Mapping[str, object] | None, + structural_findings: Sequence[StructuralFindingGroup] | None, +) -> list[str]: + files: set[str] = set() + inventory_map = _as_mapping(inventory) + for filepath in _as_sequence(inventory_map.get("file_list")): + file_text = _optional_str(filepath) + if file_text is not None: + files.add(file_text) + for groups in (func_groups, block_groups, segment_groups): + for items in groups.values(): + for item in items: + filepath = _optional_str(item.get("filepath")) + if filepath is not None: + files.add(filepath) + for suppressed_group in suppressed_clone_groups or (): + for item in suppressed_group.items: + filepath = _optional_str(item.get("filepath")) + if filepath is not None: + files.add(filepath) + if metrics is not None: + files.update(_collect_paths_from_metrics(metrics)) + if structural_findings: + for structural_group in normalize_structural_findings(structural_findings): + for occurrence in structural_group.items: + filepath = _optional_str(occurrence.file_path) + if filepath is not None: + files.add(filepath) + return sorted(files) + + +def _count_file_lines(filepaths: Sequence[str]) -> int: + total = 0 + for filepath in filepaths: + total += _count_file_lines_for_path(filepath) + return total + + +def _count_file_lines_for_path(filepath: str) -> int: + try: + with open(filepath, encoding="utf-8", errors="surrogateescape") as handle: + return sum(1 for _ in handle) + except OSError: + return 0 + + +def _normalize_nested_string_rows(value: object) -> list[list[str]]: + rows: list[tuple[str, ...]] = [] + for row in _as_sequence(value): + modules = tuple( + str(module) for module in _as_sequence(row) if str(module).strip() + ) + if modules: + rows.append(modules) + rows.sort(key=lambda row: (len(row), row)) + return [list(row) for row in rows] + + +__all__ = [ + "_collect_report_file_list", + "normalize_structural_findings", +] diff --git a/codeclone/report/document/_design_groups.py b/codeclone/report/document/_design_groups.py new file mode 100644 index 0000000..5ef9e59 --- /dev/null +++ b/codeclone/report/document/_design_groups.py @@ -0,0 +1,394 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping +from typing import TYPE_CHECKING + +from ...contracts import ( + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, +) +from ...domain.findings import ( + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_COVERAGE, + CATEGORY_DEPENDENCY, + FAMILY_DESIGN, + FINDING_KIND_COVERAGE_HOTSPOT, + FINDING_KIND_COVERAGE_SCOPE_GAP, +) +from ...domain.quality import ( + CONFIDENCE_HIGH, + EFFORT_HARD, + EFFORT_MODERATE, + RISK_LOW, + SEVERITY_CRITICAL, + SEVERITY_WARNING, +) +from ...utils.coerce import as_float as _as_float +from ...utils.coerce import as_int as _as_int +from ...utils.coerce import as_mapping as _as_mapping +from ...utils.coerce import as_sequence as _as_sequence +from ..derived import ( + report_location_from_group_item, +) + +if TYPE_CHECKING: + pass + +from ...findings.ids import design_group_id +from ._common import ( + _COVERAGE_JOIN_FAMILY, + _coerced_nonnegative_threshold, + _contract_report_location_path, + _priority, + _source_scope_from_filepaths, +) +from ._findings_groups import _single_location_source_scope + + +def _design_singleton_group( + *, + category: str, + kind: str, + severity: str, + qualname: str, + filepath: str, + start_line: int, + end_line: int, + scan_root: str, + item_data: Mapping[str, object], + facts: Mapping[str, object], +) -> dict[str, object]: + return { + "id": design_group_id(category, qualname), + "family": FAMILY_DESIGN, + "category": category, + "kind": kind, + "severity": severity, + "confidence": CONFIDENCE_HIGH, + "priority": _priority(severity, EFFORT_MODERATE), + "count": 1, + "source_scope": _single_location_source_scope( + filepath, + scan_root=scan_root, + ), + "spread": {"files": 1, "functions": 1}, + "items": [ + { + "relative_path": _contract_report_location_path( + filepath, + scan_root=scan_root, + ), + "qualname": qualname, + "start_line": start_line, + "end_line": end_line, + **item_data, + } + ], + "facts": dict(facts), + } + + +def _complexity_design_group( + item_map: Mapping[str, object], + *, + threshold: int, + scan_root: str, +) -> dict[str, object] | None: + cc = _as_int(item_map.get("cyclomatic_complexity"), 1) + if cc <= threshold: + return None + qualname = str(item_map.get("qualname", "")) + filepath = str(item_map.get("relative_path", "")) + nesting_depth = _as_int(item_map.get("nesting_depth")) + severity = SEVERITY_CRITICAL if cc > 40 else SEVERITY_WARNING + return _design_singleton_group( + category=CATEGORY_COMPLEXITY, + kind="function_hotspot", + severity=severity, + qualname=qualname, + filepath=filepath, + start_line=_as_int(item_map.get("start_line")), + end_line=_as_int(item_map.get("end_line")), + scan_root=scan_root, + item_data={ + "cyclomatic_complexity": cc, + "nesting_depth": nesting_depth, + "risk": str(item_map.get("risk", RISK_LOW)), + }, + facts={ + "cyclomatic_complexity": cc, + "nesting_depth": nesting_depth, + }, + ) + + +def _coupling_design_group( + item_map: Mapping[str, object], + *, + threshold: int, + scan_root: str, +) -> dict[str, object] | None: + cbo = _as_int(item_map.get("cbo")) + if cbo <= threshold: + return None + qualname = str(item_map.get("qualname", "")) + filepath = str(item_map.get("relative_path", "")) + coupled_classes = list(_as_sequence(item_map.get("coupled_classes"))) + return _design_singleton_group( + category=CATEGORY_COUPLING, + kind="class_hotspot", + severity=SEVERITY_WARNING, + qualname=qualname, + filepath=filepath, + start_line=_as_int(item_map.get("start_line")), + end_line=_as_int(item_map.get("end_line")), + scan_root=scan_root, + item_data={ + "cbo": cbo, + "risk": str(item_map.get("risk", RISK_LOW)), + "coupled_classes": coupled_classes, + }, + facts={ + "cbo": cbo, + "coupled_classes": coupled_classes, + }, + ) + + +def _cohesion_design_group( + item_map: Mapping[str, object], + *, + threshold: int, + scan_root: str, +) -> dict[str, object] | None: + lcom4 = _as_int(item_map.get("lcom4")) + if lcom4 < threshold: + return None + qualname = str(item_map.get("qualname", "")) + filepath = str(item_map.get("relative_path", "")) + method_count = _as_int(item_map.get("method_count")) + instance_var_count = _as_int(item_map.get("instance_var_count")) + return _design_singleton_group( + category=CATEGORY_COHESION, + kind="class_hotspot", + severity=SEVERITY_WARNING, + qualname=qualname, + filepath=filepath, + start_line=_as_int(item_map.get("start_line")), + end_line=_as_int(item_map.get("end_line")), + scan_root=scan_root, + item_data={ + "lcom4": lcom4, + "risk": str(item_map.get("risk", RISK_LOW)), + "method_count": method_count, + "instance_var_count": instance_var_count, + }, + facts={ + "lcom4": lcom4, + "method_count": method_count, + "instance_var_count": instance_var_count, + }, + ) + + +def _dependency_design_group( + cycle: object, + *, + scan_root: str, +) -> dict[str, object] | None: + modules = [str(module) for module in _as_sequence(cycle) if str(module).strip()] + if not modules: + return None + cycle_key = " -> ".join(modules) + return { + "id": design_group_id(CATEGORY_DEPENDENCY, cycle_key), + "family": FAMILY_DESIGN, + "category": CATEGORY_DEPENDENCY, + "kind": "cycle", + "severity": SEVERITY_CRITICAL, + "confidence": CONFIDENCE_HIGH, + "priority": _priority(SEVERITY_CRITICAL, EFFORT_HARD), + "count": len(modules), + "source_scope": _source_scope_from_filepaths( + (module.replace(".", "/") + ".py" for module in modules), + scan_root=scan_root, + ), + "spread": {"files": len(modules), "functions": 0}, + "items": [ + { + "module": module, + "relative_path": module.replace(".", "/") + ".py", + "source_kind": report_location_from_group_item( + { + "filepath": module.replace(".", "/") + ".py", + "qualname": "", + "start_line": 0, + "end_line": 0, + } + ).source_kind, + } + for module in modules + ], + "facts": { + "cycle_length": len(modules), + }, + } + + +def _coverage_design_group( + item_map: Mapping[str, object], + *, + threshold_percent: int, + scan_root: str, +) -> dict[str, object] | None: + coverage_hotspot = bool(item_map.get("coverage_hotspot")) + scope_gap_hotspot = bool(item_map.get("scope_gap_hotspot")) + if not coverage_hotspot and not scope_gap_hotspot: + return None + qualname = str(item_map.get("qualname", "")).strip() + filepath = str(item_map.get("relative_path", "")).strip() + if not filepath: + return None + start_line = _as_int(item_map.get("start_line")) + end_line = _as_int(item_map.get("end_line")) + subject_key = qualname or f"{filepath}:{start_line}:{end_line}" + risk = str(item_map.get("risk", RISK_LOW)).strip() or RISK_LOW + coverage_status = str(item_map.get("coverage_status", "")).strip() + coverage_permille = _as_int(item_map.get("coverage_permille")) + covered_lines = _as_int(item_map.get("covered_lines")) + executable_lines = _as_int(item_map.get("executable_lines")) + complexity = _as_int(item_map.get("cyclomatic_complexity"), 1) + severity = SEVERITY_CRITICAL if risk == "high" else SEVERITY_WARNING + if scope_gap_hotspot: + kind = FINDING_KIND_COVERAGE_SCOPE_GAP + detail = "The supplied coverage.xml did not map to this function's file." + else: + kind = FINDING_KIND_COVERAGE_HOTSPOT + detail = "Joined line coverage is below the configured hotspot threshold." + return { + "id": design_group_id(CATEGORY_COVERAGE, subject_key), + "family": FAMILY_DESIGN, + "category": CATEGORY_COVERAGE, + "kind": kind, + "severity": severity, + "confidence": CONFIDENCE_HIGH, + "priority": _priority(severity, EFFORT_MODERATE), + "count": 1, + "source_scope": _single_location_source_scope( + filepath, + scan_root=scan_root, + ), + "spread": {"files": 1, "functions": 1}, + "items": [ + { + "relative_path": filepath, + "qualname": qualname, + "start_line": start_line, + "end_line": end_line, + "risk": risk, + "cyclomatic_complexity": complexity, + "coverage_permille": coverage_permille, + "coverage_status": coverage_status, + "covered_lines": covered_lines, + "executable_lines": executable_lines, + "coverage_hotspot": coverage_hotspot, + "scope_gap_hotspot": scope_gap_hotspot, + } + ], + "facts": { + "coverage_permille": coverage_permille, + "hotspot_threshold_percent": threshold_percent, + "coverage_status": coverage_status, + "covered_lines": covered_lines, + "executable_lines": executable_lines, + "cyclomatic_complexity": complexity, + "coverage_hotspot": coverage_hotspot, + "scope_gap_hotspot": scope_gap_hotspot, + "detail": detail, + }, + } + + +def _build_design_groups( + metrics_payload: Mapping[str, object], + *, + design_thresholds: Mapping[str, object] | None = None, + scan_root: str, +) -> list[dict[str, object]]: + families = _as_mapping(metrics_payload.get("families")) + thresholds = _as_mapping(design_thresholds) + complexity_threshold = _coerced_nonnegative_threshold( + _as_mapping(thresholds.get(CATEGORY_COMPLEXITY)).get("value"), + default=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + ) + coupling_threshold = _coerced_nonnegative_threshold( + _as_mapping(thresholds.get(CATEGORY_COUPLING)).get("value"), + default=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + ) + cohesion_threshold = _coerced_nonnegative_threshold( + _as_mapping(thresholds.get(CATEGORY_COHESION)).get("value"), + default=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + ) + coverage_join = _as_mapping(families.get(_COVERAGE_JOIN_FAMILY)) + coverage_threshold = _as_int( + _as_mapping(coverage_join.get("summary")).get("hotspot_threshold_percent"), + 50, + ) + groups: list[dict[str, object]] = [] + + complexity = _as_mapping(families.get(CATEGORY_COMPLEXITY)) + for item in _as_sequence(complexity.get("items")): + group = _complexity_design_group( + _as_mapping(item), + threshold=complexity_threshold, + scan_root=scan_root, + ) + if group is not None: + groups.append(group) + + coupling = _as_mapping(families.get(CATEGORY_COUPLING)) + for item in _as_sequence(coupling.get("items")): + group = _coupling_design_group( + _as_mapping(item), + threshold=coupling_threshold, + scan_root=scan_root, + ) + if group is not None: + groups.append(group) + + cohesion = _as_mapping(families.get(CATEGORY_COHESION)) + for item in _as_sequence(cohesion.get("items")): + group = _cohesion_design_group( + _as_mapping(item), + threshold=cohesion_threshold, + scan_root=scan_root, + ) + if group is not None: + groups.append(group) + + dependencies = _as_mapping(families.get("dependencies")) + for cycle in _as_sequence(dependencies.get("cycles")): + group = _dependency_design_group(cycle, scan_root=scan_root) + if group is not None: + groups.append(group) + + for item in _as_sequence(coverage_join.get("items")): + group = _coverage_design_group( + _as_mapping(item), + threshold_percent=coverage_threshold, + scan_root=scan_root, + ) + if group is not None: + groups.append(group) + + groups.sort(key=lambda group: (-_as_float(group["priority"]), str(group["id"]))) + return groups diff --git a/codeclone/report/document/_findings_groups.py b/codeclone/report/document/_findings_groups.py new file mode 100644 index 0000000..8653708 --- /dev/null +++ b/codeclone/report/document/_findings_groups.py @@ -0,0 +1,606 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Collection, Mapping, Sequence +from typing import TYPE_CHECKING, Literal + +from ...domain.findings import ( + CLONE_KIND_BLOCK, + CLONE_KIND_FUNCTION, + CLONE_KIND_SEGMENT, + FAMILY_CLONE, + FAMILY_DEAD_CODE, + FAMILY_STRUCTURAL, +) +from ...domain.quality import ( + CONFIDENCE_HIGH, + CONFIDENCE_MEDIUM, + EFFORT_EASY, + RISK_LOW, + SEVERITY_CRITICAL, + SEVERITY_INFO, + SEVERITY_WARNING, +) +from ...findings.structural.detectors import normalize_structural_findings +from ...utils.coerce import as_float as _as_float +from ...utils.coerce import as_int as _as_int +from ...utils.coerce import as_mapping as _as_mapping +from ...utils.coerce import as_sequence as _as_sequence +from ..derived import ( + group_spread, + report_location_from_group_item, + report_location_from_structural_occurrence, +) +from ..suggestions import classify_clone_type + +if TYPE_CHECKING: + from ...models import ( + GroupItemLike, + GroupMapLike, + StructuralFindingGroup, + SuppressedCloneGroup, + ) + +from ...findings.ids import clone_group_id, dead_code_group_id, structural_group_id +from ._common import ( + _clone_novelty, + _contract_report_location_path, + _item_sort_key, + _normalize_block_machine_facts, + _priority, + _source_scope_from_locations, +) + + +def _clone_group_assessment( + *, + count: int, + clone_type: str, +) -> tuple[str, float]: + match (count >= 4, clone_type in {"Type-1", "Type-2"}): + case (True, _): + severity = SEVERITY_CRITICAL + case (False, True): + severity = SEVERITY_WARNING + case _: + severity = SEVERITY_INFO + effort = "easy" if clone_type in {"Type-1", "Type-2"} else "moderate" + return severity, _priority(severity, effort) + + +def _build_clone_group_facts( + *, + group_key: str, + kind: Literal["function", "block", "segment"], + items: Sequence[GroupItemLike], + block_facts: Mapping[str, Mapping[str, str]], +) -> tuple[dict[str, object], dict[str, str]]: + base: dict[str, object] = { + "group_key": group_key, + "group_arity": len(items), + } + display_facts: dict[str, str] = {} + match kind: + case "function": + loc_buckets = sorted( + { + str(item.get("loc_bucket", "")) + for item in items + if str(item.get("loc_bucket", "")).strip() + } + ) + base["loc_buckets"] = loc_buckets + case "block" if group_key in block_facts: + typed_facts, block_display_facts = _normalize_block_machine_facts( + group_key=group_key, + group_arity=len(items), + block_facts=block_facts[group_key], + ) + base.update(typed_facts) + display_facts.update(block_display_facts) + case _: + pass + return base, display_facts + + +def _clone_item_payload( + item: GroupItemLike, + *, + kind: Literal["function", "block", "segment"], + scan_root: str, +) -> dict[str, object]: + payload: dict[str, object] = { + "relative_path": _contract_report_location_path( + str(item.get("filepath", "")), + scan_root=scan_root, + ), + "qualname": str(item.get("qualname", "")), + "start_line": _as_int(item.get("start_line", 0)), + "end_line": _as_int(item.get("end_line", 0)), + } + match kind: + case "function": + payload.update( + { + "loc": _as_int(item.get("loc", 0)), + "stmt_count": _as_int(item.get("stmt_count", 0)), + "fingerprint": str(item.get("fingerprint", "")), + "loc_bucket": str(item.get("loc_bucket", "")), + "cyclomatic_complexity": _as_int( + item.get("cyclomatic_complexity", 1) + ), + "nesting_depth": _as_int(item.get("nesting_depth", 0)), + "risk": str(item.get("risk", RISK_LOW)), + "raw_hash": str(item.get("raw_hash", "")), + } + ) + case "block": + payload["size"] = _as_int(item.get("size", 0)) + case _: + payload.update( + { + "size": _as_int(item.get("size", 0)), + "segment_hash": str(item.get("segment_hash", "")), + "segment_sig": str(item.get("segment_sig", "")), + } + ) + return payload + + +def _build_clone_groups( + *, + groups: GroupMapLike, + kind: Literal["function", "block", "segment"], + baseline_trusted: bool, + new_keys: Collection[str] | None, + block_facts: Mapping[str, Mapping[str, str]], + scan_root: str, +) -> list[dict[str, object]]: + encoded_groups: list[dict[str, object]] = [] + new_key_set = set(new_keys) if new_keys is not None else None + for group_key in sorted(groups): + items = groups[group_key] + clone_type = classify_clone_type(items=items, kind=kind) + severity, priority = _clone_group_assessment( + count=len(items), + clone_type=clone_type, + ) + novelty = _clone_novelty( + group_key=group_key, + baseline_trusted=baseline_trusted, + new_keys=new_key_set, + ) + locations = tuple( + report_location_from_group_item(item, scan_root=scan_root) for item in items + ) + source_scope = _source_scope_from_locations( + [ + { + "source_kind": location.source_kind, + } + for location in locations + ] + ) + spread_files, spread_functions = group_spread(locations) + rows = sorted( + [ + _clone_item_payload( + item, + kind=kind, + scan_root=scan_root, + ) + for item in items + ], + key=_item_sort_key, + ) + facts, display_facts = _build_clone_group_facts( + group_key=group_key, + kind=kind, + items=items, + block_facts=block_facts, + ) + encoded_groups.append( + { + "id": clone_group_id(kind, group_key), + "family": FAMILY_CLONE, + "category": kind, + "kind": "clone_group", + "severity": severity, + "confidence": CONFIDENCE_HIGH, + "priority": priority, + "clone_kind": kind, + "clone_type": clone_type, + "novelty": novelty, + "count": len(items), + "source_scope": source_scope, + "spread": { + "files": spread_files, + "functions": spread_functions, + }, + "items": rows, + "facts": facts, + **({"display_facts": display_facts} if display_facts else {}), + } + ) + encoded_groups.sort( + key=lambda group: (-_as_int(group.get("count")), str(group["id"])) + ) + return encoded_groups + + +def _build_suppressed_clone_groups( + *, + groups: Sequence[SuppressedCloneGroup] | None, + block_facts: Mapping[str, Mapping[str, str]], + scan_root: str, +) -> dict[str, list[dict[str, object]]]: + buckets: dict[str, list[dict[str, object]]] = { + CLONE_KIND_FUNCTION: [], + CLONE_KIND_BLOCK: [], + CLONE_KIND_SEGMENT: [], + } + for group in groups or (): + items = group.items + clone_type = classify_clone_type(items=items, kind=group.kind) + severity, priority = _clone_group_assessment( + count=len(items), + clone_type=clone_type, + ) + locations = tuple( + report_location_from_group_item(item, scan_root=scan_root) for item in items + ) + source_scope = _source_scope_from_locations( + [ + { + "source_kind": location.source_kind, + } + for location in locations + ] + ) + spread_files, spread_functions = group_spread(locations) + rows = sorted( + [ + _clone_item_payload( + item, + kind=group.kind, + scan_root=scan_root, + ) + for item in items + ], + key=_item_sort_key, + ) + facts, display_facts = _build_clone_group_facts( + group_key=group.group_key, + kind=group.kind, + items=items, + block_facts=block_facts, + ) + encoded: dict[str, object] = { + "id": clone_group_id(group.kind, group.group_key), + "family": FAMILY_CLONE, + "category": group.kind, + "kind": "clone_group", + "severity": severity, + "confidence": CONFIDENCE_HIGH, + "priority": priority, + "clone_kind": group.kind, + "clone_type": clone_type, + "count": len(items), + "source_scope": source_scope, + "spread": { + "files": spread_files, + "functions": spread_functions, + }, + "items": rows, + "facts": facts, + "suppression_rule": group.suppression_rule, + "suppression_source": group.suppression_source, + "matched_patterns": list(group.matched_patterns), + } + if display_facts: + encoded["display_facts"] = display_facts + buckets[group.kind].append(encoded) + for bucket in buckets.values(): + bucket.sort(key=lambda group: (-_as_int(group.get("count")), str(group["id"]))) + return buckets + + +def _structural_group_assessment( + *, + finding_kind: str, + count: int, + spread_functions: int, +) -> tuple[str, float]: + match finding_kind: + case "clone_guard_exit_divergence" | "clone_cohort_drift": + severity = SEVERITY_WARNING + if count >= 3 or spread_functions > 1: + severity = SEVERITY_CRITICAL + return severity, _priority(severity, "moderate") + case _: + severity = ( + SEVERITY_WARNING + if count >= 4 or spread_functions > 1 + else SEVERITY_INFO + ) + return severity, _priority(severity, "moderate") + + +def _csv_values(value: object) -> list[str]: + raw = str(value).strip() + if not raw: + return [] + return sorted({part.strip() for part in raw.split(",") if part.strip()}) + + +def _build_structural_signature( + finding_kind: str, + signature: Mapping[str, str], +) -> dict[str, object]: + debug = {str(key): str(signature[key]) for key in sorted(signature)} + match finding_kind: + case "clone_guard_exit_divergence": + return { + "version": "1", + "stable": { + "family": "clone_guard_exit_divergence", + "cohort_id": str(signature.get("cohort_id", "")), + "majority_guard_count": _as_int( + signature.get("majority_guard_count") + ), + "majority_guard_terminal_profile": str( + signature.get("majority_guard_terminal_profile", "none") + ), + "majority_terminal_kind": str( + signature.get("majority_terminal_kind", "fallthrough") + ), + "majority_side_effect_before_guard": ( + str(signature.get("majority_side_effect_before_guard", "0")) + == "1" + ), + }, + "debug": debug, + } + case "clone_cohort_drift": + return { + "version": "1", + "stable": { + "family": "clone_cohort_drift", + "cohort_id": str(signature.get("cohort_id", "")), + "drift_fields": _csv_values(signature.get("drift_fields")), + "majority_profile": { + "terminal_kind": str( + signature.get("majority_terminal_kind", "") + ), + "guard_exit_profile": str( + signature.get("majority_guard_exit_profile", "") + ), + "try_finally_profile": str( + signature.get("majority_try_finally_profile", "") + ), + "side_effect_order_profile": str( + signature.get("majority_side_effect_order_profile", "") + ), + }, + }, + "debug": debug, + } + case _: + return { + "version": "1", + "stable": { + "family": "duplicated_branches", + "stmt_shape": str(signature.get("stmt_seq", "")), + "terminal_kind": str(signature.get("terminal", "")), + "control_flow": { + "has_loop": str(signature.get("has_loop", "0")) == "1", + "has_try": str(signature.get("has_try", "0")) == "1", + "nested_if": str(signature.get("nested_if", "0")) == "1", + }, + }, + "debug": debug, + } + + +def _build_structural_facts( + finding_kind: str, + signature: Mapping[str, str], + *, + count: int, +) -> dict[str, object]: + match finding_kind: + case "clone_guard_exit_divergence": + return { + "cohort_id": str(signature.get("cohort_id", "")), + "cohort_arity": _as_int(signature.get("cohort_arity")), + "divergent_members": _as_int(signature.get("divergent_members"), count), + "majority_entry_guard_count": _as_int( + signature.get("majority_guard_count"), + ), + "majority_guard_terminal_profile": str( + signature.get("majority_guard_terminal_profile", "none") + ), + "majority_terminal_kind": str( + signature.get("majority_terminal_kind", "fallthrough") + ), + "majority_side_effect_before_guard": ( + str(signature.get("majority_side_effect_before_guard", "0")) == "1" + ), + "guard_count_values": _csv_values(signature.get("guard_count_values")), + "guard_terminal_values": _csv_values( + signature.get("guard_terminal_values"), + ), + "terminal_values": _csv_values(signature.get("terminal_values")), + "side_effect_before_guard_values": _csv_values( + signature.get("side_effect_before_guard_values"), + ), + } + case "clone_cohort_drift": + return { + "cohort_id": str(signature.get("cohort_id", "")), + "cohort_arity": _as_int(signature.get("cohort_arity")), + "divergent_members": _as_int(signature.get("divergent_members"), count), + "drift_fields": _csv_values(signature.get("drift_fields")), + "stable_majority_profile": { + "terminal_kind": str(signature.get("majority_terminal_kind", "")), + "guard_exit_profile": str( + signature.get("majority_guard_exit_profile", "") + ), + "try_finally_profile": str( + signature.get("majority_try_finally_profile", "") + ), + "side_effect_order_profile": str( + signature.get("majority_side_effect_order_profile", "") + ), + }, + } + case _: + return { + "occurrence_count": count, + "non_overlapping": True, + "call_bucket": _as_int(signature.get("calls", "0")), + "raise_bucket": _as_int(signature.get("raises", "0")), + } + + +def _build_structural_groups( + groups: Sequence[StructuralFindingGroup] | None, + *, + scan_root: str, +) -> list[dict[str, object]]: + normalized_groups = normalize_structural_findings(groups or ()) + out: list[dict[str, object]] = [] + for group in normalized_groups: + locations = tuple( + report_location_from_structural_occurrence(item, scan_root=scan_root) + for item in group.items + ) + source_scope = _source_scope_from_locations( + [{"source_kind": location.source_kind} for location in locations] + ) + spread_files, spread_functions = group_spread(locations) + severity, priority = _structural_group_assessment( + finding_kind=group.finding_kind, + count=len(group.items), + spread_functions=spread_functions, + ) + out.append( + { + "id": structural_group_id(group.finding_kind, group.finding_key), + "family": FAMILY_STRUCTURAL, + "category": group.finding_kind, + "kind": group.finding_kind, + "severity": severity, + "confidence": ( + CONFIDENCE_HIGH + if group.finding_kind + in {"clone_guard_exit_divergence", "clone_cohort_drift"} + else CONFIDENCE_MEDIUM + ), + "priority": priority, + "count": len(group.items), + "source_scope": source_scope, + "spread": { + "files": spread_files, + "functions": spread_functions, + }, + "signature": _build_structural_signature( + group.finding_kind, + group.signature, + ), + "items": sorted( + [ + { + "relative_path": _contract_report_location_path( + item.file_path, + scan_root=scan_root, + ), + "qualname": item.qualname, + "start_line": item.start, + "end_line": item.end, + } + for item in group.items + ], + key=_item_sort_key, + ), + "facts": _build_structural_facts( + group.finding_kind, + group.signature, + count=len(group.items), + ), + } + ) + out.sort(key=lambda group: (-_as_int(group.get("count")), str(group["id"]))) + return out + + +def _single_location_source_scope( + filepath: str, + *, + scan_root: str, +) -> dict[str, object]: + location = report_location_from_group_item( + { + "filepath": filepath, + "qualname": "", + "start_line": 0, + "end_line": 0, + }, + scan_root=scan_root, + ) + return _source_scope_from_locations([{"source_kind": location.source_kind}]) + + +def _build_dead_code_groups( + metrics_payload: Mapping[str, object], + *, + scan_root: str, +) -> list[dict[str, object]]: + families = _as_mapping(metrics_payload.get("families")) + dead_code = _as_mapping(families.get(FAMILY_DEAD_CODE)) + groups: list[dict[str, object]] = [] + for item in _as_sequence(dead_code.get("items")): + item_map = _as_mapping(item) + qualname = str(item_map.get("qualname", "")) + filepath = str(item_map.get("relative_path", "")) + confidence = str(item_map.get("confidence", CONFIDENCE_MEDIUM)) + severity = SEVERITY_WARNING if confidence == CONFIDENCE_HIGH else SEVERITY_INFO + groups.append( + { + "id": dead_code_group_id(qualname), + "family": FAMILY_DEAD_CODE, + "category": str(item_map.get("kind", "unknown")), + "kind": "unused_symbol", + "severity": severity, + "confidence": confidence, + "priority": _priority(severity, EFFORT_EASY), + "count": 1, + "source_scope": _single_location_source_scope( + filepath, + scan_root=scan_root, + ), + "spread": {"files": 1, "functions": 1 if qualname else 0}, + "items": [ + { + "relative_path": _contract_report_location_path( + filepath, + scan_root=scan_root, + ), + "qualname": qualname, + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + } + ], + "facts": { + "kind": str(item_map.get("kind", "unknown")), + "confidence": confidence, + }, + } + ) + groups.sort(key=lambda group: (-_as_float(group["priority"]), str(group["id"]))) + return groups diff --git a/codeclone/report/document/builder.py b/codeclone/report/document/builder.py new file mode 100644 index 0000000..9d22dfa --- /dev/null +++ b/codeclone/report/document/builder.py @@ -0,0 +1,114 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Collection, Mapping, Sequence +from typing import TYPE_CHECKING + +from ...contracts import ( + REPORT_SCHEMA_VERSION, +) +from ...utils.coerce import as_mapping as _as_mapping + +if TYPE_CHECKING: + from ...models import ( + GroupMapLike, + StructuralFindingGroup, + Suggestion, + SuppressedCloneGroup, + ) + +from ._common import _collect_report_file_list +from .derived import _build_derived_overview, _build_derived_suggestions +from .findings import _build_findings_payload +from .integrity import _build_integrity_payload +from .inventory import ( + _baseline_is_trusted, + _build_inventory_payload, + _build_meta_payload, +) +from .metrics import _build_metrics_payload + + +def build_report_document( + *, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + segment_groups: GroupMapLike, + meta: Mapping[str, object] | None = None, + inventory: Mapping[str, object] | None = None, + block_facts: Mapping[str, Mapping[str, str]] | None = None, + new_function_group_keys: Collection[str] | None = None, + new_block_group_keys: Collection[str] | None = None, + new_segment_group_keys: Collection[str] | None = None, + suppressed_clone_groups: Sequence[SuppressedCloneGroup] | None = None, + metrics: Mapping[str, object] | None = None, + suggestions: Sequence[Suggestion] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, +) -> dict[str, object]: + report_schema_version = REPORT_SCHEMA_VERSION + scan_root = str(_as_mapping(meta).get("scan_root", "")) + meta_payload = _build_meta_payload(meta, scan_root=scan_root) + design_thresholds = _as_mapping( + _as_mapping(meta_payload.get("analysis_thresholds")).get("design_findings") + ) + metrics_payload = _build_metrics_payload(metrics, scan_root=scan_root) + file_list = _collect_report_file_list( + inventory=inventory, + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + suppressed_clone_groups=suppressed_clone_groups, + metrics=metrics, + structural_findings=structural_findings, + ) + inventory_payload = _build_inventory_payload( + inventory=inventory, + file_list=file_list, + metrics_payload=metrics_payload, + scan_root=scan_root, + ) + findings_payload = _build_findings_payload( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + block_facts=block_facts or {}, + structural_findings=structural_findings, + metrics_payload=metrics_payload, + baseline_trusted=_baseline_is_trusted(meta_payload), + new_function_group_keys=new_function_group_keys, + new_block_group_keys=new_block_group_keys, + new_segment_group_keys=new_segment_group_keys, + suppressed_clone_groups=suppressed_clone_groups, + design_thresholds=design_thresholds, + scan_root=scan_root, + ) + overview_payload, hotlists_payload = _build_derived_overview( + findings=findings_payload, + metrics_payload=metrics_payload, + ) + derived_payload = { + "suggestions": _build_derived_suggestions(suggestions), + "overview": overview_payload, + "hotlists": hotlists_payload, + } + integrity_payload = _build_integrity_payload( + report_schema_version=report_schema_version, + meta=meta_payload, + inventory=inventory_payload, + findings=findings_payload, + metrics=metrics_payload, + ) + return { + "report_schema_version": report_schema_version, + "meta": meta_payload, + "inventory": inventory_payload, + "findings": findings_payload, + "metrics": metrics_payload, + "derived": derived_payload, + "integrity": integrity_payload, + } diff --git a/codeclone/report/document/derived.py b/codeclone/report/document/derived.py new file mode 100644 index 0000000..add2042 --- /dev/null +++ b/codeclone/report/document/derived.py @@ -0,0 +1,425 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections import Counter +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING + +from ...domain.findings import ( + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_DEAD_CODE, + CATEGORY_DEPENDENCY, + CLONE_KIND_BLOCK, + CLONE_KIND_FUNCTION, + CLONE_KIND_SEGMENT, + FAMILY_CLONE, + FAMILY_CLONES, + FAMILY_DEAD_CODE, + FAMILY_DESIGN, + FAMILY_STRUCTURAL, +) +from ...domain.quality import ( + SEVERITY_INFO, + SEVERITY_ORDER, +) +from ...domain.source_scope import ( + IMPACT_SCOPE_MIXED, + IMPACT_SCOPE_NON_RUNTIME, + IMPACT_SCOPE_RUNTIME, + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, + SOURCE_KIND_OTHER, + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, +) +from ...findings.ids import ( + clone_group_id, + dead_code_group_id, + design_group_id, + structural_group_id, +) +from ...utils.coerce import as_float as _as_float +from ...utils.coerce import as_int as _as_int +from ...utils.coerce import as_mapping as _as_mapping +from ...utils.coerce import as_sequence as _as_sequence +from ..overview import build_directory_hotspots +from ._common import _contract_report_location_path, _is_absolute_path + +if TYPE_CHECKING: + from ...models import ( + Suggestion, + ) + + +def _sort_flat_finding_ids( + groups: Sequence[Mapping[str, object]], +) -> list[str]: + ordered = sorted( + groups, + key=lambda group: ( + -_as_float(group.get("priority")), + SEVERITY_ORDER.get(str(group.get("severity", SEVERITY_INFO)), 9), + -_as_int(_as_mapping(group.get("spread")).get("files")), + -_as_int(_as_mapping(group.get("spread")).get("functions")), + -_as_int(group.get("count")), + str(group.get("id", "")), + ), + ) + return [str(group["id"]) for group in ordered] + + +def _sort_highest_spread_ids( + groups: Sequence[Mapping[str, object]], +) -> list[str]: + ordered = sorted( + groups, + key=lambda group: ( + -_as_int(_as_mapping(group.get("spread")).get("files")), + -_as_int(_as_mapping(group.get("spread")).get("functions")), + -_as_int(group.get("count")), + -_as_float(group.get("priority")), + str(group.get("id", "")), + ), + ) + return [str(group["id"]) for group in ordered] + + +def _health_snapshot(metrics_payload: Mapping[str, object]) -> dict[str, object]: + health = _as_mapping(_as_mapping(metrics_payload.get("families")).get("health")) + summary = _as_mapping(health.get("summary")) + dimensions = { + str(key): _as_int(value) + for key, value in _as_mapping(summary.get("dimensions")).items() + } + strongest = None + weakest = None + if dimensions: + strongest = min( + sorted(dimensions), + key=lambda key: (-dimensions[key], key), + ) + weakest = min( + sorted(dimensions), + key=lambda key: (dimensions[key], key), + ) + return { + "score": _as_int(summary.get("score")), + "grade": str(summary.get("grade", "")), + "strongest_dimension": strongest, + "weakest_dimension": weakest, + } + + +def _combined_impact_scope(groups: Sequence[Mapping[str, object]]) -> str: + impact_scopes = { + str( + _as_mapping(group.get("source_scope")).get( + "impact_scope", + IMPACT_SCOPE_NON_RUNTIME, + ) + ) + for group in groups + } + if not impact_scopes: + return IMPACT_SCOPE_NON_RUNTIME + if len(impact_scopes) == 1: + return next(iter(impact_scopes)) + return IMPACT_SCOPE_MIXED + + +def _top_risks( + *, + dead_code_groups: Sequence[Mapping[str, object]], + design_groups: Sequence[Mapping[str, object]], + structural_groups: Sequence[Mapping[str, object]], + clone_groups: Sequence[Mapping[str, object]], +) -> list[dict[str, object]]: + risks: list[dict[str, object]] = [] + + if dead_code_groups: + label = ( + "1 dead code item" + if len(dead_code_groups) == 1 + else f"{len(dead_code_groups)} dead code items" + ) + risks.append( + { + "kind": "family_summary", + "family": FAMILY_DEAD_CODE, + "count": len(dead_code_groups), + "scope": IMPACT_SCOPE_MIXED + if len( + { + _as_mapping(group.get("source_scope")).get("impact_scope") + for group in dead_code_groups + } + ) + > 1 + else str( + _as_mapping(dead_code_groups[0].get("source_scope")).get( + "impact_scope", + IMPACT_SCOPE_NON_RUNTIME, + ) + ), + "label": label, + } + ) + + low_cohesion = [ + group + for group in design_groups + if str(group.get("category", "")) == CATEGORY_COHESION + ] + if low_cohesion: + label = ( + "1 low cohesion class" + if len(low_cohesion) == 1 + else f"{len(low_cohesion)} low cohesion classes" + ) + risks.append( + { + "kind": "family_summary", + "family": FAMILY_DESIGN, + "category": CATEGORY_COHESION, + "count": len(low_cohesion), + "scope": _combined_impact_scope(low_cohesion), + "label": label, + } + ) + + production_structural = [ + group + for group in structural_groups + if str(_as_mapping(group.get("source_scope")).get("impact_scope")) + in {IMPACT_SCOPE_RUNTIME, IMPACT_SCOPE_MIXED} + ] + if production_structural: + label = ( + "1 structural finding in production code" + if len(production_structural) == 1 + else ( + f"{len(production_structural)} structural findings in production code" + ) + ) + risks.append( + { + "kind": "family_summary", + "family": FAMILY_STRUCTURAL, + "count": len(production_structural), + "scope": SOURCE_KIND_PRODUCTION, + "label": label, + } + ) + + fixture_test_clones = [ + group + for group in clone_groups + if _as_mapping(group.get("source_scope")).get("impact_scope") + == IMPACT_SCOPE_NON_RUNTIME + and _as_mapping(group.get("source_scope")).get("dominant_kind") + in {SOURCE_KIND_TESTS, SOURCE_KIND_FIXTURES} + ] + if fixture_test_clones: + label = ( + "1 clone group in fixtures/tests" + if len(fixture_test_clones) == 1 + else f"{len(fixture_test_clones)} clone groups in fixtures/tests" + ) + risks.append( + { + "kind": "family_summary", + "family": FAMILY_CLONE, + "count": len(fixture_test_clones), + "scope": IMPACT_SCOPE_NON_RUNTIME, + "label": label, + } + ) + + return risks[:6] + + +def _build_derived_overview( + *, + findings: Mapping[str, object], + metrics_payload: Mapping[str, object], +) -> tuple[dict[str, object], dict[str, object]]: + groups = _as_mapping(findings.get("groups")) + clones = _as_mapping(groups.get(FAMILY_CLONES)) + clone_groups = [ + *_as_sequence(clones.get("functions")), + *_as_sequence(clones.get("blocks")), + *_as_sequence(clones.get("segments")), + ] + structural_groups = _as_sequence( + _as_mapping(groups.get(FAMILY_STRUCTURAL)).get("groups") + ) + dead_code_groups = _as_sequence( + _as_mapping(groups.get(FAMILY_DEAD_CODE)).get("groups") + ) + design_groups = _as_sequence(_as_mapping(groups.get("design")).get("groups")) + flat_groups = [ + *clone_groups, + *structural_groups, + *dead_code_groups, + *design_groups, + ] + dominant_kind_counts: Counter[str] = Counter( + str( + _as_mapping(_as_mapping(group).get("source_scope")).get( + "dominant_kind", + SOURCE_KIND_OTHER, + ) + ) + for group in flat_groups + ) + summary = _as_mapping(findings.get("summary")) + overview: dict[str, object] = { + "families": dict(_as_mapping(summary.get("families"))), + "top_risks": _top_risks( + dead_code_groups=[_as_mapping(group) for group in dead_code_groups], + design_groups=[_as_mapping(group) for group in design_groups], + structural_groups=[_as_mapping(group) for group in structural_groups], + clone_groups=[_as_mapping(group) for group in clone_groups], + ), + "source_scope_breakdown": { + key: dominant_kind_counts[key] + for key in ( + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, + SOURCE_KIND_OTHER, + ) + if dominant_kind_counts[key] > 0 + }, + "health_snapshot": _health_snapshot(metrics_payload), + "directory_hotspots": build_directory_hotspots(findings=findings), + } + hotlists: dict[str, object] = { + "most_actionable_ids": _sort_flat_finding_ids( + [ + group + for group in map(_as_mapping, flat_groups) + if str(group.get("severity")) != SEVERITY_INFO + ] + )[:5], + "highest_spread_ids": _sort_highest_spread_ids( + list(map(_as_mapping, flat_groups)) + )[:5], + "production_hotspot_ids": _sort_flat_finding_ids( + [ + group + for group in map(_as_mapping, flat_groups) + if str(_as_mapping(group.get("source_scope")).get("impact_scope")) + in {IMPACT_SCOPE_RUNTIME, IMPACT_SCOPE_MIXED} + ] + )[:5], + "test_fixture_hotspot_ids": _sort_flat_finding_ids( + [ + group + for group in map(_as_mapping, flat_groups) + if str(_as_mapping(group.get("source_scope")).get("impact_scope")) + == IMPACT_SCOPE_NON_RUNTIME + and str(_as_mapping(group.get("source_scope")).get("dominant_kind")) + in {SOURCE_KIND_TESTS, SOURCE_KIND_FIXTURES} + ] + )[:5], + } + return overview, hotlists + + +def _representative_location_rows( + suggestion: Suggestion, +) -> list[dict[str, object]]: + rows = [ + { + "relative_path": ( + location.relative_path + if ( + location.relative_path + and not _is_absolute_path(location.relative_path) + ) + else _contract_report_location_path( + location.filepath, + scan_root="", + ) + ), + "start_line": location.start_line, + "end_line": location.end_line, + "qualname": location.qualname, + "source_kind": location.source_kind, + } + for location in suggestion.representative_locations + ] + rows.sort( + key=lambda row: ( + str(row["relative_path"]), + _as_int(row["start_line"]), + _as_int(row["end_line"]), + str(row["qualname"]), + ) + ) + return rows[:3] + + +def _suggestion_finding_id(suggestion: Suggestion) -> str: + if suggestion.finding_family == FAMILY_CLONES: + if suggestion.fact_kind.startswith("Function"): + return clone_group_id(CLONE_KIND_FUNCTION, suggestion.subject_key) + if suggestion.fact_kind.startswith("Block"): + return clone_group_id(CLONE_KIND_BLOCK, suggestion.subject_key) + return clone_group_id(CLONE_KIND_SEGMENT, suggestion.subject_key) + if suggestion.finding_family == FAMILY_STRUCTURAL: + return structural_group_id( + suggestion.finding_kind or "duplicated_branches", + suggestion.subject_key, + ) + if suggestion.category == CATEGORY_DEAD_CODE: + return dead_code_group_id(suggestion.subject_key) + if suggestion.category in { + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_COHESION, + CATEGORY_DEPENDENCY, + }: + return design_group_id(suggestion.category, suggestion.subject_key) + return design_group_id( + suggestion.category, + suggestion.subject_key or suggestion.title, + ) + + +def _build_derived_suggestions( + suggestions: Sequence[Suggestion] | None, +) -> list[dict[str, object]]: + suggestion_rows = list(suggestions or ()) + suggestion_rows.sort( + key=lambda suggestion: ( + -suggestion.priority, + SEVERITY_ORDER.get(suggestion.severity, 9), + suggestion.title, + _suggestion_finding_id(suggestion), + ) + ) + return [ + { + "id": f"suggestion:{_suggestion_finding_id(suggestion)}", + "finding_id": _suggestion_finding_id(suggestion), + "title": suggestion.title, + "summary": suggestion.fact_summary, + "location_label": suggestion.location_label or suggestion.location, + "representative_locations": _representative_location_rows(suggestion), + "action": { + "effort": suggestion.effort, + "steps": list(suggestion.steps), + }, + } + for suggestion in suggestion_rows + ] diff --git a/codeclone/report/document/findings.py b/codeclone/report/document/findings.py new file mode 100644 index 0000000..43aaedd --- /dev/null +++ b/codeclone/report/document/findings.py @@ -0,0 +1,245 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Collection, Mapping, Sequence +from typing import TYPE_CHECKING + +from ...domain.findings import ( + CLONE_KIND_BLOCK, + CLONE_KIND_FUNCTION, + CLONE_KIND_SEGMENT, + CLONE_NOVELTY_KNOWN, + CLONE_NOVELTY_NEW, + FAMILY_CLONES, + FAMILY_DEAD_CODE, + FAMILY_STRUCTURAL, +) +from ...domain.quality import ( + SEVERITY_CRITICAL, + SEVERITY_INFO, + SEVERITY_WARNING, +) +from ...domain.source_scope import ( + IMPACT_SCOPE_MIXED, + IMPACT_SCOPE_NON_RUNTIME, + IMPACT_SCOPE_RUNTIME, +) +from ...utils.coerce import as_int as _as_int +from ...utils.coerce import as_mapping as _as_mapping +from ...utils.coerce import as_sequence as _as_sequence + +if TYPE_CHECKING: + from ...models import ( + GroupMapLike, + StructuralFindingGroup, + SuppressedCloneGroup, + ) + +from ._design_groups import _build_design_groups +from ._findings_groups import ( + _build_clone_groups, + _build_dead_code_groups, + _build_structural_groups, + _build_suppressed_clone_groups, +) + + +def _findings_summary( + *, + clone_functions: Sequence[Mapping[str, object]], + clone_blocks: Sequence[Mapping[str, object]], + clone_segments: Sequence[Mapping[str, object]], + structural_groups: Sequence[Mapping[str, object]], + dead_code_groups: Sequence[Mapping[str, object]], + design_groups: Sequence[Mapping[str, object]], + suppressed_clone_groups: Mapping[str, Sequence[Mapping[str, object]]] | None = None, + dead_code_suppressed: int = 0, +) -> dict[str, object]: + flat_groups = [ + *clone_functions, + *clone_blocks, + *clone_segments, + *structural_groups, + *dead_code_groups, + *design_groups, + ] + severity_counts = dict.fromkeys( + (SEVERITY_CRITICAL, SEVERITY_WARNING, SEVERITY_INFO), + 0, + ) + source_scope_counts = dict.fromkeys( + (IMPACT_SCOPE_RUNTIME, IMPACT_SCOPE_NON_RUNTIME, IMPACT_SCOPE_MIXED), + 0, + ) + for group in flat_groups: + severity = str(group.get("severity", SEVERITY_INFO)) + if severity in severity_counts: + severity_counts[severity] += 1 + impact_scope = str( + _as_mapping(group.get("source_scope")).get( + "impact_scope", + IMPACT_SCOPE_NON_RUNTIME, + ) + ) + if impact_scope in source_scope_counts: + source_scope_counts[impact_scope] += 1 + clone_groups = [*clone_functions, *clone_blocks, *clone_segments] + clone_suppressed_map = _as_mapping(suppressed_clone_groups) + suppressed_functions = len(_as_sequence(clone_suppressed_map.get("function"))) + suppressed_blocks = len(_as_sequence(clone_suppressed_map.get("block"))) + suppressed_segments = len(_as_sequence(clone_suppressed_map.get("segment"))) + suppressed_clone_total = ( + suppressed_functions + suppressed_blocks + suppressed_segments + ) + clones_summary: dict[str, object] = { + "functions": len(clone_functions), + "blocks": len(clone_blocks), + "segments": len(clone_segments), + CLONE_NOVELTY_NEW: sum( + 1 + for group in clone_groups + if str(group.get("novelty", "")) == CLONE_NOVELTY_NEW + ), + CLONE_NOVELTY_KNOWN: sum( + 1 + for group in clone_groups + if str(group.get("novelty", "")) == CLONE_NOVELTY_KNOWN + ), + } + if suppressed_clone_total > 0: + clones_summary.update( + { + "suppressed": suppressed_clone_total, + "suppressed_functions": suppressed_functions, + "suppressed_blocks": suppressed_blocks, + "suppressed_segments": suppressed_segments, + } + ) + suppressed_summary = { + FAMILY_DEAD_CODE: max(0, dead_code_suppressed), + } + if suppressed_clone_total > 0: + suppressed_summary[FAMILY_CLONES] = suppressed_clone_total + return { + "total": len(flat_groups), + "families": { + FAMILY_CLONES: len(clone_groups), + FAMILY_STRUCTURAL: len(structural_groups), + FAMILY_DEAD_CODE: len(dead_code_groups), + "design": len(design_groups), + }, + "severity": severity_counts, + "impact_scope": source_scope_counts, + "clones": clones_summary, + "suppressed": suppressed_summary, + } + + +def _build_findings_payload( + *, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + segment_groups: GroupMapLike, + block_facts: Mapping[str, Mapping[str, str]], + structural_findings: Sequence[StructuralFindingGroup] | None, + metrics_payload: Mapping[str, object], + baseline_trusted: bool, + new_function_group_keys: Collection[str] | None, + new_block_group_keys: Collection[str] | None, + new_segment_group_keys: Collection[str] | None, + suppressed_clone_groups: Sequence[SuppressedCloneGroup] | None, + design_thresholds: Mapping[str, object] | None, + scan_root: str, +) -> dict[str, object]: + clone_functions = _build_clone_groups( + groups=func_groups, + kind=CLONE_KIND_FUNCTION, + baseline_trusted=baseline_trusted, + new_keys=new_function_group_keys, + block_facts=block_facts, + scan_root=scan_root, + ) + clone_blocks = _build_clone_groups( + groups=block_groups, + kind=CLONE_KIND_BLOCK, + baseline_trusted=baseline_trusted, + new_keys=new_block_group_keys, + block_facts=block_facts, + scan_root=scan_root, + ) + clone_segments = _build_clone_groups( + groups=segment_groups, + kind=CLONE_KIND_SEGMENT, + baseline_trusted=baseline_trusted, + new_keys=new_segment_group_keys, + block_facts={}, + scan_root=scan_root, + ) + structural_groups = _build_structural_groups( + structural_findings, + scan_root=scan_root, + ) + dead_code_groups = _build_dead_code_groups( + metrics_payload, + scan_root=scan_root, + ) + dead_code_family = _as_mapping( + _as_mapping(metrics_payload.get("families")).get(FAMILY_DEAD_CODE) + ) + dead_code_summary = _as_mapping(dead_code_family.get("summary")) + dead_code_suppressed = _as_int( + dead_code_summary.get( + "suppressed", + len(_as_sequence(dead_code_family.get("suppressed_items"))), + ) + ) + design_groups = _build_design_groups( + metrics_payload, + design_thresholds=design_thresholds, + scan_root=scan_root, + ) + suppressed_clone_payload = _build_suppressed_clone_groups( + groups=suppressed_clone_groups, + block_facts=block_facts, + scan_root=scan_root, + ) + clone_groups_payload: dict[str, object] = { + "functions": clone_functions, + "blocks": clone_blocks, + "segments": clone_segments, + } + if any(suppressed_clone_payload.values()): + clone_groups_payload["suppressed"] = { + "functions": suppressed_clone_payload[CLONE_KIND_FUNCTION], + "blocks": suppressed_clone_payload[CLONE_KIND_BLOCK], + "segments": suppressed_clone_payload[CLONE_KIND_SEGMENT], + } + return { + "summary": _findings_summary( + clone_functions=clone_functions, + clone_blocks=clone_blocks, + clone_segments=clone_segments, + structural_groups=structural_groups, + dead_code_groups=dead_code_groups, + design_groups=design_groups, + suppressed_clone_groups=suppressed_clone_payload, + dead_code_suppressed=dead_code_suppressed, + ), + "groups": { + FAMILY_CLONES: clone_groups_payload, + FAMILY_STRUCTURAL: { + "groups": structural_groups, + }, + FAMILY_DEAD_CODE: { + "groups": dead_code_groups, + }, + "design": { + "groups": design_groups, + }, + }, + } diff --git a/codeclone/report/document/integrity.py b/codeclone/report/document/integrity.py new file mode 100644 index 0000000..8863370 --- /dev/null +++ b/codeclone/report/document/integrity.py @@ -0,0 +1,91 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from hashlib import sha256 +from typing import TYPE_CHECKING + +import orjson + +if TYPE_CHECKING: + pass + + +def _canonical_integrity_payload( + *, + report_schema_version: str, + meta: Mapping[str, object], + inventory: Mapping[str, object], + findings: Mapping[str, object], + metrics: Mapping[str, object], +) -> dict[str, object]: + canonical_meta = { + str(key): value for key, value in meta.items() if str(key) != "runtime" + } + + def _strip_noncanonical(value: object) -> object: + if isinstance(value, Mapping): + return { + str(key): _strip_noncanonical(item) + for key, item in value.items() + if str(key) != "display_facts" + } + if isinstance(value, Sequence) and not isinstance( + value, + (str, bytes, bytearray), + ): + return [_strip_noncanonical(item) for item in value] + return value + + return { + "report_schema_version": report_schema_version, + "meta": canonical_meta, + "inventory": inventory, + "findings": _strip_noncanonical(findings), + "metrics": metrics, + } + + +def _build_integrity_payload( + *, + report_schema_version: str, + meta: Mapping[str, object], + inventory: Mapping[str, object], + findings: Mapping[str, object], + metrics: Mapping[str, object], +) -> dict[str, object]: + canonical_payload = _canonical_integrity_payload( + report_schema_version=report_schema_version, + meta=meta, + inventory=inventory, + findings=findings, + metrics=metrics, + ) + canonical_json = orjson.dumps( + canonical_payload, + option=orjson.OPT_SORT_KEYS, + ) + payload_sha = sha256(canonical_json).hexdigest() + return { + "canonicalization": { + "version": "1", + "scope": "canonical_only", + "sections": [ + "report_schema_version", + "meta", + "inventory", + "findings", + "metrics", + ], + }, + "digest": { + "verified": True, + "algorithm": "sha256", + "value": payload_sha, + }, + } diff --git a/codeclone/report/document/inventory.py b/codeclone/report/document/inventory.py new file mode 100644 index 0000000..a51fce3 --- /dev/null +++ b/codeclone/report/document/inventory.py @@ -0,0 +1,223 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING + +from ...domain.findings import ( + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, +) +from ...utils.coerce import as_int as _as_int +from ...utils.coerce import as_mapping as _as_mapping +from ...utils.coerce import as_sequence as _as_sequence + +if TYPE_CHECKING: + pass + +from ._common import ( + _analysis_profile_payload, + _contract_path, + _count_file_lines, + _design_findings_thresholds_payload, + _optional_str, +) + + +def _derive_inventory_code_counts( + *, + metrics_payload: Mapping[str, object], + inventory_code: Mapping[str, object], + file_list: Sequence[str], + cached_files: int, +) -> dict[str, object]: + complexity = _as_mapping( + _as_mapping(metrics_payload.get("families")).get(CATEGORY_COMPLEXITY) + ) + cohesion = _as_mapping( + _as_mapping(metrics_payload.get("families")).get(CATEGORY_COHESION) + ) + complexity_items = _as_sequence(complexity.get("items")) + cohesion_items = _as_sequence(cohesion.get("items")) + + exact_entities = bool(complexity_items or cohesion_items) + method_count = sum( + _as_int(_as_mapping(item).get("method_count")) for item in cohesion_items + ) + class_count = len(cohesion_items) + function_total = max(len(complexity_items) - method_count, 0) + + if not exact_entities: + function_total = _as_int(inventory_code.get("functions")) + method_count = _as_int(inventory_code.get("methods")) + class_count = _as_int(inventory_code.get("classes")) + + parsed_lines_raw = inventory_code.get("parsed_lines") + if isinstance(parsed_lines_raw, int) and parsed_lines_raw >= 0: + parsed_lines = parsed_lines_raw + elif cached_files > 0 and file_list: + parsed_lines = _count_file_lines(file_list) + else: + parsed_lines = _as_int(parsed_lines_raw) + + if exact_entities and ((cached_files > 0 and file_list) or parsed_lines > 0): + scope = "analysis_root" + elif cached_files > 0 and file_list: + scope = "mixed" + else: + scope = "current_run" + + return { + "scope": scope, + "parsed_lines": parsed_lines, + "functions": function_total, + "methods": method_count, + "classes": class_count, + } + + +def _build_inventory_payload( + *, + inventory: Mapping[str, object] | None, + file_list: Sequence[str], + metrics_payload: Mapping[str, object], + scan_root: str, +) -> dict[str, object]: + inventory_map = _as_mapping(inventory) + files_map = _as_mapping(inventory_map.get("files")) + code_map = _as_mapping(inventory_map.get("code")) + cached_files = _as_int(files_map.get("cached")) + file_registry = [ + path + for path in ( + _contract_path(filepath, scan_root=scan_root)[0] for filepath in file_list + ) + if path is not None + ] + return { + "files": { + "total_found": _as_int(files_map.get("total_found"), len(file_list)), + "analyzed": _as_int(files_map.get("analyzed")), + "cached": cached_files, + "skipped": _as_int(files_map.get("skipped")), + "source_io_skipped": _as_int(files_map.get("source_io_skipped")), + }, + "code": _derive_inventory_code_counts( + metrics_payload=metrics_payload, + inventory_code=code_map, + file_list=file_list, + cached_files=cached_files, + ), + "file_registry": { + "encoding": "relative_path", + "items": file_registry, + }, + } + + +def _baseline_is_trusted(meta: Mapping[str, object]) -> bool: + baseline = _as_mapping(meta.get("baseline")) + return ( + baseline.get("loaded") is True + and str(baseline.get("status", "")).strip().lower() == "ok" + ) + + +def _build_meta_payload( + raw_meta: Mapping[str, object] | None, + *, + scan_root: str, +) -> dict[str, object]: + meta = dict(raw_meta or {}) + metrics_computed = sorted( + { + str(item) + for item in _as_sequence(meta.get("metrics_computed")) + if str(item).strip() + } + ) + baseline_path, baseline_path_scope, baseline_abs = _contract_path( + meta.get("baseline_path"), + scan_root=scan_root, + ) + cache_path, cache_path_scope, cache_abs = _contract_path( + meta.get("cache_path"), + scan_root=scan_root, + ) + metrics_baseline_path, metrics_baseline_path_scope, metrics_baseline_abs = ( + _contract_path( + meta.get("metrics_baseline_path"), + scan_root=scan_root, + ) + ) + payload: dict[str, object] = { + "codeclone_version": str(meta.get("codeclone_version", "")), + "project_name": str(meta.get("project_name", "")), + "scan_root": ".", + "python_version": str(meta.get("python_version", "")), + "python_tag": str(meta.get("python_tag", "")), + "analysis_mode": str(meta.get("analysis_mode", "full") or "full"), + "report_mode": str(meta.get("report_mode", "full") or "full"), + "computed_metric_families": metrics_computed, + "analysis_thresholds": _design_findings_thresholds_payload(meta), + "baseline": { + "path": baseline_path, + "path_scope": baseline_path_scope, + "loaded": bool(meta.get("baseline_loaded")), + "status": _optional_str(meta.get("baseline_status")), + "fingerprint_version": _optional_str( + meta.get("baseline_fingerprint_version") + ), + "schema_version": _optional_str(meta.get("baseline_schema_version")), + "python_tag": _optional_str(meta.get("baseline_python_tag")), + "generator_name": _optional_str(meta.get("baseline_generator_name")), + "generator_version": _optional_str(meta.get("baseline_generator_version")), + "payload_sha256": _optional_str(meta.get("baseline_payload_sha256")), + "payload_sha256_verified": bool( + meta.get("baseline_payload_sha256_verified") + ), + }, + "cache": { + "path": cache_path, + "path_scope": cache_path_scope, + "used": bool(meta.get("cache_used")), + "status": _optional_str(meta.get("cache_status")), + "schema_version": _optional_str(meta.get("cache_schema_version")), + }, + "metrics_baseline": { + "path": metrics_baseline_path, + "path_scope": metrics_baseline_path_scope, + "loaded": bool(meta.get("metrics_baseline_loaded")), + "status": _optional_str(meta.get("metrics_baseline_status")), + "schema_version": _optional_str( + meta.get("metrics_baseline_schema_version") + ), + "payload_sha256": _optional_str( + meta.get("metrics_baseline_payload_sha256") + ), + "payload_sha256_verified": bool( + meta.get("metrics_baseline_payload_sha256_verified") + ), + }, + "runtime": { + "analysis_started_at_utc": _optional_str( + meta.get("analysis_started_at_utc") + ), + "report_generated_at_utc": _optional_str( + meta.get("report_generated_at_utc") + ), + "scan_root_absolute": _optional_str(meta.get("scan_root")), + "baseline_path_absolute": baseline_abs, + "cache_path_absolute": cache_abs, + "metrics_baseline_path_absolute": metrics_baseline_abs, + }, + } + analysis_profile = _analysis_profile_payload(meta) + if analysis_profile is not None: + payload["analysis_profile"] = analysis_profile + return payload diff --git a/codeclone/report/document/metrics.py b/codeclone/report/document/metrics.py new file mode 100644 index 0000000..3ca942b --- /dev/null +++ b/codeclone/report/document/metrics.py @@ -0,0 +1,701 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping +from typing import TYPE_CHECKING + +from ...domain.findings import ( + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + FAMILY_DEAD_CODE, +) +from ...domain.quality import ( + CONFIDENCE_HIGH, + CONFIDENCE_MEDIUM, + RISK_LOW, +) +from ...domain.source_scope import ( + SOURCE_KIND_OTHER, +) +from ...metrics import METRIC_FAMILIES +from ...suppressions import INLINE_CODECLONE_SUPPRESSION_SOURCE +from ...utils.coerce import as_float as _as_float +from ...utils.coerce import as_int as _as_int +from ...utils.coerce import as_mapping as _as_mapping +from ...utils.coerce import as_sequence as _as_sequence + +if TYPE_CHECKING: + pass + +from ._common import ( + _contract_path, + _normalize_nested_string_rows, + _optional_str, +) + +_OVERLOADED_MODULES_FAMILY = "overloaded_modules" + +_COVERAGE_ADOPTION_FAMILY = "coverage_adoption" + +_API_SURFACE_FAMILY = "api_surface" + +_COVERAGE_JOIN_FAMILY = "coverage_join" + + +def _normalize_metrics_families( + metrics: Mapping[str, object] | None, + *, + scan_root: str, +) -> dict[str, object]: + metrics_map = _as_mapping(metrics) + complexity = _as_mapping(metrics_map.get(CATEGORY_COMPLEXITY)) + complexity_items = sorted( + ( + { + "qualname": str(item_map.get("qualname", "")), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + "cyclomatic_complexity": _as_int( + item_map.get("cyclomatic_complexity"), + 1, + ), + "nesting_depth": _as_int(item_map.get("nesting_depth")), + "risk": str(item_map.get("risk", RISK_LOW)), + } + for item in _as_sequence(complexity.get("functions")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["relative_path"], + item["start_line"], + item["end_line"], + item["qualname"], + ), + ) + + coupling = _as_mapping(metrics_map.get(CATEGORY_COUPLING)) + coupling_items = sorted( + ( + { + "qualname": str(item_map.get("qualname", "")), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + "cbo": _as_int(item_map.get("cbo")), + "risk": str(item_map.get("risk", RISK_LOW)), + "coupled_classes": sorted( + { + str(name) + for name in _as_sequence(item_map.get("coupled_classes")) + if str(name).strip() + } + ), + } + for item in _as_sequence(coupling.get("classes")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["relative_path"], + item["start_line"], + item["end_line"], + item["qualname"], + ), + ) + + cohesion = _as_mapping(metrics_map.get(CATEGORY_COHESION)) + cohesion_items = sorted( + ( + { + "qualname": str(item_map.get("qualname", "")), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + "lcom4": _as_int(item_map.get("lcom4")), + "risk": str(item_map.get("risk", RISK_LOW)), + "method_count": _as_int(item_map.get("method_count")), + "instance_var_count": _as_int(item_map.get("instance_var_count")), + } + for item in _as_sequence(cohesion.get("classes")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["relative_path"], + item["start_line"], + item["end_line"], + item["qualname"], + ), + ) + + dependencies = _as_mapping(metrics_map.get("dependencies")) + dependency_edges = sorted( + ( + { + "source": str(item_map.get("source", "")), + "target": str(item_map.get("target", "")), + "import_type": str(item_map.get("import_type", "")), + "line": _as_int(item_map.get("line")), + } + for item in _as_sequence(dependencies.get("edge_list")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["source"], + item["target"], + item["import_type"], + item["line"], + ), + ) + dependency_cycles = _normalize_nested_string_rows(dependencies.get("cycles")) + longest_chains = _normalize_nested_string_rows(dependencies.get("longest_chains")) + + dead_code = _as_mapping(metrics_map.get(FAMILY_DEAD_CODE)) + + def _normalize_suppressed_by( + raw_bindings: object, + ) -> list[dict[str, str]]: + normalized_bindings = sorted( + { + ( + str(binding_map.get("rule", "")).strip(), + str(binding_map.get("source", "")).strip(), + ) + for binding in _as_sequence(raw_bindings) + for binding_map in (_as_mapping(binding),) + if str(binding_map.get("rule", "")).strip() + }, + key=lambda item: (item[0], item[1]), + ) + if not normalized_bindings: + return [] + return [ + { + "rule": rule, + "source": source or INLINE_CODECLONE_SUPPRESSION_SOURCE, + } + for rule, source in normalized_bindings + ] + + dead_items = sorted( + ( + { + "qualname": str(item_map.get("qualname", "")), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + "kind": str(item_map.get("kind", "")), + "confidence": str(item_map.get("confidence", CONFIDENCE_MEDIUM)), + } + for item in _as_sequence(dead_code.get("items")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["relative_path"], + item["start_line"], + item["end_line"], + item["qualname"], + item["kind"], + ), + ) + dead_suppressed_items = sorted( + ( + { + "qualname": str(item_map.get("qualname", "")), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + "kind": str(item_map.get("kind", "")), + "confidence": str(item_map.get("confidence", CONFIDENCE_MEDIUM)), + "suppressed_by": _normalize_suppressed_by( + item_map.get("suppressed_by") + ), + } + for item in _as_sequence(dead_code.get("suppressed_items")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["relative_path"], + item["start_line"], + item["end_line"], + item["qualname"], + item["kind"], + item["confidence"], + tuple( + ( + str(_as_mapping(binding).get("rule", "")), + str(_as_mapping(binding).get("source", "")), + ) + for binding in _as_sequence(item.get("suppressed_by")) + ), + ), + ) + for item in dead_suppressed_items: + suppressed_by = _as_sequence(item.get("suppressed_by")) + first_binding = _as_mapping(suppressed_by[0]) if suppressed_by else {} + item["suppression_rule"] = str(first_binding.get("rule", "")) + item["suppression_source"] = str(first_binding.get("source", "")) + + health = _as_mapping(metrics_map.get("health")) + health_dimensions = { + str(key): _as_int(value) + for key, value in sorted(_as_mapping(health.get("dimensions")).items()) + } + overloaded_modules = _as_mapping(metrics_map.get(_OVERLOADED_MODULES_FAMILY)) + overloaded_modules_detection = _as_mapping(overloaded_modules.get("detection")) + overloaded_module_items = sorted( + ( + { + "module": str(item_map.get("module", "")).strip(), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "source_kind": str(item_map.get("source_kind", SOURCE_KIND_OTHER)), + "loc": _as_int(item_map.get("loc")), + "functions": _as_int(item_map.get("functions")), + "methods": _as_int(item_map.get("methods")), + "classes": _as_int(item_map.get("classes")), + "callable_count": _as_int(item_map.get("callable_count")), + "complexity_total": _as_int(item_map.get("complexity_total")), + "complexity_max": _as_int(item_map.get("complexity_max")), + "fan_in": _as_int(item_map.get("fan_in")), + "fan_out": _as_int(item_map.get("fan_out")), + "total_deps": _as_int(item_map.get("total_deps")), + "import_edges": _as_int(item_map.get("import_edges")), + "reimport_edges": _as_int(item_map.get("reimport_edges")), + "reimport_ratio": round( + _as_float(item_map.get("reimport_ratio")), + 4, + ), + "instability": round(_as_float(item_map.get("instability")), 4), + "hub_balance": round(_as_float(item_map.get("hub_balance")), 4), + "size_score": round(_as_float(item_map.get("size_score")), 4), + "dependency_score": round( + _as_float(item_map.get("dependency_score")), + 4, + ), + "shape_score": round(_as_float(item_map.get("shape_score")), 4), + "score": round(_as_float(item_map.get("score")), 4), + "candidate_status": str( + item_map.get("candidate_status", "non_candidate") + ), + "candidate_reasons": [ + str(reason) + for reason in _as_sequence(item_map.get("candidate_reasons")) + if str(reason).strip() + ], + } + for item in _as_sequence(overloaded_modules.get("items")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + {"candidate": 0, "ranked_only": 1, "non_candidate": 2}.get( + str(item["candidate_status"]), + 3, + ), + -_as_float(item["score"]), + -_as_float(item["size_score"]), + -_as_float(item["dependency_score"]), + item["relative_path"], + item["module"], + ), + ) + + complexity_summary = _as_mapping(complexity.get("summary")) + coupling_summary = _as_mapping(coupling.get("summary")) + cohesion_summary = _as_mapping(cohesion.get("summary")) + dead_code_summary = _as_mapping(dead_code.get("summary")) + overloaded_modules_summary = _as_mapping(overloaded_modules.get("summary")) + coverage_adoption = _as_mapping(metrics_map.get(_COVERAGE_ADOPTION_FAMILY)) + coverage_adoption_summary = _as_mapping(coverage_adoption.get("summary")) + coverage_adoption_items = sorted( + ( + { + "module": str(item_map.get("module", "")).strip(), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "callable_count": _as_int(item_map.get("callable_count")), + "params_total": _as_int(item_map.get("params_total")), + "params_annotated": _as_int(item_map.get("params_annotated")), + "param_permille": _as_int(item_map.get("param_permille")), + "returns_total": _as_int(item_map.get("returns_total")), + "returns_annotated": _as_int(item_map.get("returns_annotated")), + "return_permille": _as_int(item_map.get("return_permille")), + "any_annotation_count": _as_int(item_map.get("any_annotation_count")), + "public_symbol_total": _as_int(item_map.get("public_symbol_total")), + "public_symbol_documented": _as_int( + item_map.get("public_symbol_documented") + ), + "docstring_permille": _as_int(item_map.get("docstring_permille")), + } + for item in _as_sequence(coverage_adoption.get("items")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["relative_path"], + item["module"], + ), + ) + api_surface = _as_mapping(metrics_map.get(_API_SURFACE_FAMILY)) + api_surface_summary = _as_mapping(api_surface.get("summary")) + api_surface_items = sorted( + ( + { + "record_kind": str(item_map.get("record_kind", "symbol")), + "module": str(item_map.get("module", "")).strip(), + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "qualname": str(item_map.get("qualname", "")), + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + "symbol_kind": str(item_map.get("symbol_kind", "")), + "exported_via": _optional_str(item_map.get("exported_via")), + "params_total": _as_int(item_map.get("params_total")), + "params": [ + { + "name": str(param_map.get("name", "")), + "kind": str(param_map.get("kind", "")), + "has_default": bool(param_map.get("has_default")), + "annotated": bool(param_map.get("annotated")), + } + for param in _as_sequence(item_map.get("params")) + for param_map in (_as_mapping(param),) + ], + "returns_annotated": bool(item_map.get("returns_annotated")), + "change_kind": _optional_str(item_map.get("change_kind")), + "detail": _optional_str(item_map.get("detail")), + } + for item in _as_sequence(api_surface.get("items")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + item["relative_path"], + item["start_line"], + item["end_line"], + item["qualname"], + item["record_kind"], + ), + ) + coverage_join = _as_mapping(metrics_map.get(_COVERAGE_JOIN_FAMILY)) + coverage_join_summary = _as_mapping(coverage_join.get("summary")) + coverage_join_items = sorted( + ( + { + "relative_path": _contract_path( + item_map.get("filepath", ""), + scan_root=scan_root, + )[0] + or "", + "qualname": str(item_map.get("qualname", "")).strip(), + "start_line": _as_int(item_map.get("start_line")), + "end_line": _as_int(item_map.get("end_line")), + "cyclomatic_complexity": _as_int( + item_map.get("cyclomatic_complexity"), + 1, + ), + "risk": str(item_map.get("risk", RISK_LOW)).strip() or RISK_LOW, + "executable_lines": _as_int(item_map.get("executable_lines")), + "covered_lines": _as_int(item_map.get("covered_lines")), + "coverage_permille": _as_int(item_map.get("coverage_permille")), + "coverage_status": str(item_map.get("coverage_status", "")).strip(), + "coverage_hotspot": bool(item_map.get("coverage_hotspot")), + "scope_gap_hotspot": bool(item_map.get("scope_gap_hotspot")), + } + for item in _as_sequence(coverage_join.get("items")) + for item_map in (_as_mapping(item),) + ), + key=lambda item: ( + 0 if bool(item["coverage_hotspot"]) else 1, + 0 if bool(item["scope_gap_hotspot"]) else 1, + {"high": 0, "medium": 1, "low": 2}.get(str(item["risk"]), 3), + _as_int(item["coverage_permille"]), + -_as_int(item["cyclomatic_complexity"]), + item["relative_path"], + _as_int(item["start_line"]), + item["qualname"], + ), + ) + dead_high_confidence = sum( + 1 + for item in dead_items + if str(_as_mapping(item).get("confidence", "")).strip().lower() + == CONFIDENCE_HIGH + ) + + family_sections: dict[str, object] = { + CATEGORY_COMPLEXITY: { + "summary": { + "total": len(complexity_items), + "average": round(_as_float(complexity_summary.get("average")), 2), + "max": _as_int(complexity_summary.get("max")), + "high_risk": _as_int(complexity_summary.get("high_risk")), + }, + "items": complexity_items, + "items_truncated": False, + }, + CATEGORY_COUPLING: { + "summary": { + "total": len(coupling_items), + "average": round(_as_float(coupling_summary.get("average")), 2), + "max": _as_int(coupling_summary.get("max")), + "high_risk": _as_int(coupling_summary.get("high_risk")), + }, + "items": coupling_items, + "items_truncated": False, + }, + CATEGORY_COHESION: { + "summary": { + "total": len(cohesion_items), + "average": round(_as_float(cohesion_summary.get("average")), 2), + "max": _as_int(cohesion_summary.get("max")), + "low_cohesion": _as_int(cohesion_summary.get("low_cohesion")), + }, + "items": cohesion_items, + "items_truncated": False, + }, + "dependencies": { + "summary": { + "modules": _as_int(dependencies.get("modules")), + "edges": _as_int(dependencies.get("edges")), + "cycles": len(dependency_cycles), + "max_depth": _as_int(dependencies.get("max_depth")), + }, + "items": dependency_edges, + "cycles": dependency_cycles, + "longest_chains": longest_chains, + "items_truncated": False, + }, + FAMILY_DEAD_CODE: { + "summary": { + "total": len(dead_items), + "high_confidence": dead_high_confidence + or _as_int( + dead_code_summary.get( + "high_confidence", dead_code_summary.get("critical") + ) + ), + "suppressed": len(dead_suppressed_items) + or _as_int(dead_code_summary.get("suppressed")), + }, + "items": dead_items, + "suppressed_items": dead_suppressed_items, + "items_truncated": False, + }, + "health": { + "summary": { + "score": _as_int(health.get("score")), + "grade": str(health.get("grade", "")), + "dimensions": health_dimensions, + }, + "items": [], + "items_truncated": False, + }, + _COVERAGE_ADOPTION_FAMILY: { + "summary": { + "modules": len(coverage_adoption_items), + "params_total": _as_int(coverage_adoption_summary.get("params_total")), + "params_annotated": _as_int( + coverage_adoption_summary.get("params_annotated") + ), + "param_permille": _as_int( + coverage_adoption_summary.get("param_permille") + ), + "baseline_diff_available": bool( + coverage_adoption_summary.get("baseline_diff_available") + ), + "param_delta": _as_int(coverage_adoption_summary.get("param_delta")), + "returns_total": _as_int( + coverage_adoption_summary.get("returns_total") + ), + "returns_annotated": _as_int( + coverage_adoption_summary.get("returns_annotated") + ), + "return_permille": _as_int( + coverage_adoption_summary.get("return_permille") + ), + "return_delta": _as_int(coverage_adoption_summary.get("return_delta")), + "public_symbol_total": _as_int( + coverage_adoption_summary.get("public_symbol_total") + ), + "public_symbol_documented": _as_int( + coverage_adoption_summary.get("public_symbol_documented") + ), + "docstring_permille": _as_int( + coverage_adoption_summary.get("docstring_permille") + ), + "docstring_delta": _as_int( + coverage_adoption_summary.get("docstring_delta") + ), + "typing_any_count": _as_int( + coverage_adoption_summary.get("typing_any_count") + ), + }, + "items": coverage_adoption_items, + "items_truncated": False, + }, + _API_SURFACE_FAMILY: { + "summary": { + "enabled": bool(api_surface_summary.get("enabled")), + "baseline_diff_available": bool( + api_surface_summary.get("baseline_diff_available") + ), + "modules": _as_int(api_surface_summary.get("modules")), + "public_symbols": _as_int(api_surface_summary.get("public_symbols")), + "added": _as_int(api_surface_summary.get("added")), + "breaking": _as_int(api_surface_summary.get("breaking")), + "strict_types": bool(api_surface_summary.get("strict_types")), + }, + "items": api_surface_items, + "items_truncated": False, + }, + _OVERLOADED_MODULES_FAMILY: { + "summary": { + "total": len(overloaded_module_items), + "candidates": _as_int(overloaded_modules_summary.get("candidates")), + "population_status": str( + overloaded_modules_summary.get("population_status", "limited") + ), + "top_score": round( + _as_float(overloaded_modules_summary.get("top_score")), + 4, + ), + "average_score": round( + _as_float(overloaded_modules_summary.get("average_score")), + 4, + ), + "candidate_score_cutoff": round( + _as_float(overloaded_modules_summary.get("candidate_score_cutoff")), + 4, + ), + }, + "detection": { + "version": str(overloaded_modules_detection.get("version", "1")), + "scope": str(overloaded_modules_detection.get("scope", "report_only")), + "strategy": str( + overloaded_modules_detection.get( + "strategy", + "project_relative_composite", + ) + ), + "minimum_population": _as_int( + overloaded_modules_detection.get("minimum_population"), + ), + "size_signals": [ + str(signal) + for signal in _as_sequence( + overloaded_modules_detection.get("size_signals") + ) + if str(signal).strip() + ], + "dependency_signals": [ + str(signal) + for signal in _as_sequence( + overloaded_modules_detection.get("dependency_signals") + ) + if str(signal).strip() + ], + "shape_signals": [ + str(signal) + for signal in _as_sequence( + overloaded_modules_detection.get("shape_signals") + ) + if str(signal).strip() + ], + }, + "items": overloaded_module_items, + "items_truncated": False, + }, + } + if coverage_join_summary or coverage_join_items or coverage_join: + family_sections[_COVERAGE_JOIN_FAMILY] = { + "summary": { + "status": str(coverage_join_summary.get("status", "")), + "source": _contract_path( + coverage_join_summary.get("source", ""), + scan_root=scan_root, + )[0], + "files": _as_int(coverage_join_summary.get("files")), + "units": _as_int(coverage_join_summary.get("units")), + "measured_units": _as_int(coverage_join_summary.get("measured_units")), + "overall_executable_lines": _as_int( + coverage_join_summary.get("overall_executable_lines") + ), + "overall_covered_lines": _as_int( + coverage_join_summary.get("overall_covered_lines") + ), + "overall_permille": _as_int( + coverage_join_summary.get("overall_permille") + ), + "missing_from_report_units": _as_int( + coverage_join_summary.get("missing_from_report_units") + ), + "coverage_hotspots": _as_int( + coverage_join_summary.get("coverage_hotspots") + ), + "scope_gap_hotspots": _as_int( + coverage_join_summary.get("scope_gap_hotspots") + ), + "hotspot_threshold_percent": _as_int( + coverage_join_summary.get("hotspot_threshold_percent") + ), + "invalid_reason": _optional_str( + coverage_join_summary.get("invalid_reason") + ), + }, + "items": coverage_join_items, + "items_truncated": False, + } + normalized: dict[str, object] = {} + for family in METRIC_FAMILIES.values(): + section = family.report_section + if section in family_sections: + normalized[section] = family_sections[section] + return normalized + + +def _build_metrics_payload( + metrics: Mapping[str, object] | None, + *, + scan_root: str, +) -> dict[str, object]: + families = _normalize_metrics_families(metrics, scan_root=scan_root) + return { + "summary": { + family_name: _as_mapping(_as_mapping(family_payload).get("summary")) + for family_name, family_payload in families.items() + }, + "families": families, + } diff --git a/codeclone/report/explain.py b/codeclone/report/explain.py index 73605b0..2a85a1f 100644 --- a/codeclone/report/explain.py +++ b/codeclone/report/explain.py @@ -12,7 +12,7 @@ from pathlib import Path from typing import TYPE_CHECKING -from .._coerce import as_int +from ..utils.coerce import as_int from .explain_contract import ( BLOCK_HINT_ASSERT_ONLY, BLOCK_HINT_ASSERT_ONLY_LABEL, diff --git a/codeclone/report/findings.py b/codeclone/report/findings.py index 350b836..7843967 100644 --- a/codeclone/report/findings.py +++ b/codeclone/report/findings.py @@ -6,7 +6,7 @@ """Deterministic structural-finding helpers for the report layer. -HTML rendering lives in ``codeclone._html_report._sections._structural``. +HTML rendering lives in ``codeclone.report.html.sections._structural``. """ from __future__ import annotations diff --git a/codeclone/report/gates/__init__.py b/codeclone/report/gates/__init__.py new file mode 100644 index 0000000..151bd5e --- /dev/null +++ b/codeclone/report/gates/__init__.py @@ -0,0 +1,39 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from .evaluator import ( + GateResult, + GateState, + MetricGateConfig, + evaluate_gate_state, + evaluate_gates, + gate_state_from_project_metrics, + metric_gate_reasons, + metric_gate_reasons_for_state, + summarize_metrics_diff, +) +from .reasons import ( + parse_metric_reason_entry, + policy_context, + print_gating_failure_block, +) + +__all__ = [ + "GateResult", + "GateState", + "MetricGateConfig", + "evaluate_gate_state", + "evaluate_gates", + "gate_state_from_project_metrics", + "metric_gate_reasons", + "metric_gate_reasons_for_state", + "parse_metric_reason_entry", + "policy_context", + "print_gating_failure_block", + "summarize_metrics_diff", +] diff --git a/codeclone/report/gates/evaluator.py b/codeclone/report/gates/evaluator.py new file mode 100644 index 0000000..c632c50 --- /dev/null +++ b/codeclone/report/gates/evaluator.py @@ -0,0 +1,695 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Callable, Mapping +from dataclasses import dataclass +from typing import TYPE_CHECKING, cast + +from ...contracts import ExitCode +from ...metrics import METRIC_FAMILIES +from ...utils.coerce import as_int as _as_int +from ...utils.coerce import as_mapping as _as_mapping +from ...utils.coerce import as_sequence as _as_sequence + +if TYPE_CHECKING: + from ...models import CoverageJoinResult, ProjectMetrics + + +@dataclass(frozen=True, slots=True) +class MetricGateConfig: + fail_complexity: int + fail_coupling: int + fail_cohesion: int + fail_cycles: bool + fail_dead_code: bool + fail_health: int + fail_on_new_metrics: bool + fail_on_typing_regression: bool = False + fail_on_docstring_regression: bool = False + fail_on_api_break: bool = False + fail_on_untested_hotspots: bool = False + min_typing_coverage: int = -1 + min_docstring_coverage: int = -1 + coverage_min: int = 50 + fail_on_new: bool = False + fail_threshold: int = -1 + + +@dataclass(frozen=True, slots=True) +class GateResult: + exit_code: int + reasons: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class GateState: + clone_new_count: int = 0 + clone_total: int = 0 + complexity_max: int = 0 + coupling_max: int = 0 + cohesion_max: int = 0 + dependency_cycles: int = 0 + dead_high_confidence: int = 0 + health_score: int = 0 + typing_param_permille: int = 0 + docstring_permille: int = 0 + coverage_join_status: str = "" + coverage_hotspots: int = 0 + api_breaking_changes: int = 0 + diff_new_high_risk_functions: int = 0 + diff_new_high_coupling_classes: int = 0 + diff_new_cycles: int = 0 + diff_new_dead_code: int = 0 + diff_health_delta: int = 0 + diff_typing_param_permille_delta: int = 0 + diff_typing_return_permille_delta: int = 0 + diff_docstring_permille_delta: int = 0 + + +def summarize_metrics_diff(metrics_diff: object | None) -> dict[str, object] | None: + if metrics_diff is None: + return None + + if isinstance(metrics_diff, Mapping): + payload = metrics_diff + return { + "new_high_risk_functions": _as_int( + payload.get("new_high_risk_functions"), + 0, + ), + "new_high_coupling_classes": _as_int( + payload.get("new_high_coupling_classes"), + 0, + ), + "new_cycles": _as_int(payload.get("new_cycles"), 0), + "new_dead_code": _as_int(payload.get("new_dead_code"), 0), + "health_delta": _as_int(payload.get("health_delta"), 0), + "typing_param_permille_delta": _as_int( + payload.get("typing_param_permille_delta"), + 0, + ), + "typing_return_permille_delta": _as_int( + payload.get("typing_return_permille_delta"), + 0, + ), + "docstring_permille_delta": _as_int( + payload.get("docstring_permille_delta"), + 0, + ), + "new_api_symbols": _as_int(payload.get("new_api_symbols"), 0), + "api_breaking_changes": _as_int( + payload.get("api_breaking_changes"), + _as_int(payload.get("new_api_breaking_changes"), 0), + ), + } + + new_high_risk_functions = tuple( + cast( # pragma: no branch - tuple normalization is deterministic + "tuple[str, ...]", + tuple( + str(item) + for item in _as_sequence( + getattr(metrics_diff, "new_high_risk_functions", ()) + ) + if str(item).strip() + ), + ) + ) + new_high_coupling_classes = tuple( + cast( + "tuple[str, ...]", + tuple( + str(item) + for item in _as_sequence( + getattr(metrics_diff, "new_high_coupling_classes", ()) + ) + if str(item).strip() + ), + ) + ) + new_cycles = tuple( + tuple(str(part) for part in _as_sequence(item) if str(part).strip()) + for item in _as_sequence(getattr(metrics_diff, "new_cycles", ())) + ) + new_dead_code = tuple( + str(item) + for item in _as_sequence(getattr(metrics_diff, "new_dead_code", ())) + if str(item).strip() + ) + api_breaking_changes = tuple( + _as_sequence(getattr(metrics_diff, "new_api_breaking_changes", ())) + ) + new_api_symbols = tuple(_as_sequence(getattr(metrics_diff, "new_api_symbols", ()))) + return { + "new_high_risk_functions": len(new_high_risk_functions), + "new_high_coupling_classes": len(new_high_coupling_classes), + "new_cycles": len(new_cycles), + "new_dead_code": len(new_dead_code), + "health_delta": _as_int(getattr(metrics_diff, "health_delta", 0), 0), + "typing_param_permille_delta": _as_int( + getattr(metrics_diff, "typing_param_permille_delta", 0), + 0, + ), + "typing_return_permille_delta": _as_int( + getattr(metrics_diff, "typing_return_permille_delta", 0), + 0, + ), + "docstring_permille_delta": _as_int( + getattr(metrics_diff, "docstring_permille_delta", 0), + 0, + ), + "new_api_symbols": len(new_api_symbols), + "api_breaking_changes": len(api_breaking_changes), + } + + +def gate_state_from_project_metrics( + *, + project_metrics: ProjectMetrics, + coverage_join: CoverageJoinResult | None, + metrics_diff: object | None, + clone_new_count: int = 0, + clone_total: int = 0, +) -> GateState: + diff_summary = summarize_metrics_diff(metrics_diff) or {} + return GateState( + clone_new_count=max(clone_new_count, 0), + clone_total=max(clone_total, 0), + complexity_max=max(int(project_metrics.complexity_max), 0), + coupling_max=max(int(project_metrics.coupling_max), 0), + cohesion_max=max(int(project_metrics.cohesion_max), 0), + dependency_cycles=len(tuple(project_metrics.dependency_cycles)), + dead_high_confidence=sum( + 1 + for item in project_metrics.dead_code + if str(getattr(item, "confidence", "")).strip().lower() == "high" + ), + health_score=max(int(project_metrics.health.total), 0), + typing_param_permille=_permille( + int(project_metrics.typing_param_annotated), + int(project_metrics.typing_param_total), + ), + docstring_permille=_permille( + int(project_metrics.docstring_public_documented), + int(project_metrics.docstring_public_total), + ), + coverage_join_status=( + str(coverage_join.status) if coverage_join is not None else "" + ), + coverage_hotspots=( + int(coverage_join.coverage_hotspots) if coverage_join is not None else 0 + ), + api_breaking_changes=_as_int(diff_summary.get("api_breaking_changes"), 0), + diff_new_high_risk_functions=_as_int( + diff_summary.get("new_high_risk_functions"), + 0, + ), + diff_new_high_coupling_classes=_as_int( + diff_summary.get("new_high_coupling_classes"), + 0, + ), + diff_new_cycles=_as_int(diff_summary.get("new_cycles"), 0), + diff_new_dead_code=_as_int(diff_summary.get("new_dead_code"), 0), + diff_health_delta=_as_int(diff_summary.get("health_delta"), 0), + diff_typing_param_permille_delta=_as_int( + diff_summary.get("typing_param_permille_delta"), + 0, + ), + diff_typing_return_permille_delta=_as_int( + diff_summary.get("typing_return_permille_delta"), + 0, + ), + diff_docstring_permille_delta=_as_int( + diff_summary.get("docstring_permille_delta"), + 0, + ), + ) + + +def metric_gate_reasons_for_state( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + gate_keys = sorted( + { + gate_key + for family in METRIC_FAMILIES.values() + for gate_key in family.gate_keys + }, + key=lambda gate_key: (_GATE_REASON_ORDER.get(gate_key, 999), gate_key), + ) + reasons: list[str] = [] + for gate_key in gate_keys: + builder = _GATE_REASON_BUILDERS.get(gate_key) + if builder is None: + continue + reasons.extend(builder(state=state, config=config)) + return tuple(reasons) + + +_GATE_REASON_ORDER = { + "complexity_threshold": 10, + "coupling_threshold": 20, + "cohesion_threshold": 30, + "health_threshold": 40, + "dependency_cycles": 50, + "dead_code_high_confidence": 60, + "new_high_risk_functions": 70, + "new_high_coupling_classes": 80, + "new_dependency_cycles": 90, + "new_dead_code": 100, + "health_regression": 110, + "typing_coverage_threshold": 120, + "docstring_coverage_threshold": 130, + "typing_regression": 140, + "docstring_regression": 150, + "api_breaking_changes": 160, + "coverage_hotspots": 170, +} + + +def _reason_if(triggered: bool, message: str) -> tuple[str, ...]: + return (message,) if triggered else () + + +def _complexity_threshold_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + 0 <= config.fail_complexity < state.complexity_max, + "Complexity threshold exceeded: " + f"max CC={state.complexity_max}, " + f"threshold={config.fail_complexity}.", + ) + + +def _coupling_threshold_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + 0 <= config.fail_coupling < state.coupling_max, + "Coupling threshold exceeded: " + f"max CBO={state.coupling_max}, " + f"threshold={config.fail_coupling}.", + ) + + +def _cohesion_threshold_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + 0 <= config.fail_cohesion < state.cohesion_max, + "Cohesion threshold exceeded: " + f"max LCOM4={state.cohesion_max}, " + f"threshold={config.fail_cohesion}.", + ) + + +def _health_threshold_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + config.fail_health >= 0 and state.health_score < config.fail_health, + "Health score below threshold: " + f"score={state.health_score}, threshold={config.fail_health}.", + ) + + +def _dependency_cycles_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + config.fail_cycles and state.dependency_cycles > 0, + f"Dependency cycles detected: {state.dependency_cycles} cycle(s).", + ) + + +def _dead_code_high_confidence_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + config.fail_dead_code and state.dead_high_confidence > 0, + f"Dead code detected (high confidence): {state.dead_high_confidence} item(s).", + ) + + +def _new_high_risk_functions_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + config.fail_on_new_metrics and state.diff_new_high_risk_functions > 0, + "New high-risk functions vs metrics baseline: " + f"{state.diff_new_high_risk_functions}.", + ) + + +def _new_high_coupling_classes_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + config.fail_on_new_metrics and state.diff_new_high_coupling_classes > 0, + "New high-coupling classes vs metrics baseline: " + f"{state.diff_new_high_coupling_classes}.", + ) + + +def _new_dependency_cycles_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + config.fail_on_new_metrics and state.diff_new_cycles > 0, + f"New dependency cycles vs metrics baseline: {state.diff_new_cycles}.", + ) + + +def _new_dead_code_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + config.fail_on_new_metrics and state.diff_new_dead_code > 0, + f"New dead code items vs metrics baseline: {state.diff_new_dead_code}.", + ) + + +def _health_regression_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + config.fail_on_new_metrics and state.diff_health_delta < 0, + f"Health score regressed vs metrics baseline: delta={state.diff_health_delta}.", + ) + + +def _typing_coverage_threshold_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + typing_percent = state.typing_param_permille / 10.0 + return _reason_if( + config.min_typing_coverage >= 0 + and typing_percent < float(config.min_typing_coverage), + "Typing coverage below threshold: " + f"coverage={typing_percent:.1f}%, threshold={config.min_typing_coverage}%.", + ) + + +def _docstring_coverage_threshold_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + docstring_percent = state.docstring_permille / 10.0 + return _reason_if( + config.min_docstring_coverage >= 0 + and docstring_percent < float(config.min_docstring_coverage), + "Docstring coverage below threshold: " + f"coverage={docstring_percent:.1f}%, " + f"threshold={config.min_docstring_coverage}%.", + ) + + +def _typing_regression_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + config.fail_on_typing_regression + and ( + state.diff_typing_param_permille_delta < 0 + or state.diff_typing_return_permille_delta < 0 + ), + "Typing coverage regressed vs metrics baseline: " + f"params_delta={state.diff_typing_param_permille_delta}, " + f"returns_delta={state.diff_typing_return_permille_delta}.", + ) + + +def _docstring_regression_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + config.fail_on_docstring_regression and state.diff_docstring_permille_delta < 0, + "Docstring coverage regressed vs metrics baseline: " + f"delta={state.diff_docstring_permille_delta}.", + ) + + +def _api_breaking_changes_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + config.fail_on_api_break and state.api_breaking_changes > 0, + "Public API breaking changes vs metrics baseline: " + f"{state.api_breaking_changes}.", + ) + + +def _coverage_hotspots_reason( + *, + state: GateState, + config: MetricGateConfig, +) -> tuple[str, ...]: + return _reason_if( + config.fail_on_untested_hotspots + and state.coverage_join_status == "ok" + and state.coverage_hotspots > 0, + "Coverage hotspots detected: " + f"hotspots={state.coverage_hotspots}, " + f"threshold={config.coverage_min}%.", + ) + + +_GATE_REASON_BUILDERS: dict[str, Callable[..., tuple[str, ...]]] = { + "complexity_threshold": _complexity_threshold_reason, + "coupling_threshold": _coupling_threshold_reason, + "cohesion_threshold": _cohesion_threshold_reason, + "health_threshold": _health_threshold_reason, + "dependency_cycles": _dependency_cycles_reason, + "dead_code_high_confidence": _dead_code_high_confidence_reason, + "new_high_risk_functions": _new_high_risk_functions_reason, + "new_high_coupling_classes": _new_high_coupling_classes_reason, + "new_dependency_cycles": _new_dependency_cycles_reason, + "new_dead_code": _new_dead_code_reason, + "health_regression": _health_regression_reason, + "typing_coverage_threshold": _typing_coverage_threshold_reason, + "docstring_coverage_threshold": _docstring_coverage_threshold_reason, + "typing_regression": _typing_regression_reason, + "docstring_regression": _docstring_regression_reason, + "api_breaking_changes": _api_breaking_changes_reason, + "coverage_hotspots": _coverage_hotspots_reason, +} + + +def evaluate_gate_state( + *, + state: GateState, + config: MetricGateConfig, +) -> GateResult: + reasons = [ + f"metric:{reason}" + for reason in metric_gate_reasons_for_state(state=state, config=config) + ] + + if config.fail_on_new and state.clone_new_count > 0: + reasons.append("clone:new") + + if 0 <= config.fail_threshold < state.clone_total: + reasons.append(f"clone:threshold:{state.clone_total}:{config.fail_threshold}") + + if reasons: + return GateResult( + exit_code=int(ExitCode.GATING_FAILURE), + reasons=tuple(reasons), + ) + return GateResult(exit_code=int(ExitCode.SUCCESS), reasons=()) + + +# codeclone: ignore[dead-code] +def metric_gate_reasons( + *, + report_document: Mapping[str, object], + config: MetricGateConfig, + metrics_diff: object | None = None, +) -> tuple[str, ...]: + state = _gate_state_from_report_document( + report_document=report_document, + metrics_diff=metrics_diff, + ) + return metric_gate_reasons_for_state(state=state, config=config) + + +def evaluate_gates( + *, + report_document: Mapping[str, object], + config: MetricGateConfig, + baseline_status: str | None = None, + metrics_diff: object | None = None, + clone_new_count: int | None = None, + clone_total: int | None = None, +) -> GateResult: + _ = baseline_status + state = _gate_state_from_report_document( + report_document=report_document, + metrics_diff=metrics_diff, + clone_new_count=clone_new_count, + clone_total=clone_total, + ) + return evaluate_gate_state(state=state, config=config) + + +def _gate_state_from_report_document( + *, + report_document: Mapping[str, object], + metrics_diff: object | None, + clone_new_count: int | None = None, + clone_total: int | None = None, +) -> GateState: + findings = _as_mapping(report_document.get("findings")) + groups = _as_mapping(findings.get("groups")) + clone_groups = _as_mapping(groups.get("clones")) + function_groups = _as_sequence(clone_groups.get("functions")) + block_groups = _as_sequence(clone_groups.get("blocks")) + derived_clone_new_count = sum( + 1 + for group in (*function_groups, *block_groups) + if str(_as_mapping(group).get("novelty", "")).strip() == "new" + ) + metrics = _as_mapping(report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + complexity_summary = _as_mapping( + _as_mapping(families.get("complexity")).get("summary") + ) + coupling_summary = _as_mapping(_as_mapping(families.get("coupling")).get("summary")) + cohesion_summary = _as_mapping(_as_mapping(families.get("cohesion")).get("summary")) + dependencies_summary = _as_mapping( + _as_mapping(families.get("dependencies")).get("summary") + ) + dead_code_summary = _as_mapping( + _as_mapping(families.get("dead_code")).get("summary") + ) + health_summary = _as_mapping(_as_mapping(families.get("health")).get("summary")) + coverage_adoption_summary = _as_mapping( + _as_mapping(families.get("coverage_adoption")).get("summary") + ) + api_surface_summary = _as_mapping( + _as_mapping(families.get("api_surface")).get("summary") + ) + coverage_join_summary = _as_mapping( + _as_mapping(families.get("coverage_join")).get("summary") + ) + diff_summary = summarize_metrics_diff(metrics_diff) or {} + prefer_diff_summary = metrics_diff is not None + return GateState( + clone_new_count=max( + clone_new_count if clone_new_count is not None else derived_clone_new_count, + 0, + ), + clone_total=max( + clone_total + if clone_total is not None + else len(function_groups) + len(block_groups), + 0, + ), + complexity_max=_as_int(complexity_summary.get("max"), 0), + coupling_max=_as_int(coupling_summary.get("max"), 0), + cohesion_max=_as_int(cohesion_summary.get("max"), 0), + dependency_cycles=_as_int(dependencies_summary.get("cycles"), 0), + dead_high_confidence=_as_int(dead_code_summary.get("high_confidence"), 0), + health_score=_as_int(health_summary.get("score"), 0), + typing_param_permille=_as_int( + coverage_adoption_summary.get("param_permille"), 0 + ), + docstring_permille=_as_int( + coverage_adoption_summary.get("docstring_permille"), + 0, + ), + coverage_join_status=str(coverage_join_summary.get("status", "")), + coverage_hotspots=_as_int( + coverage_join_summary.get("coverage_hotspots"), + 0, + ), + api_breaking_changes=( + _as_int(diff_summary.get("api_breaking_changes"), 0) + if prefer_diff_summary + else _as_int(api_surface_summary.get("breaking"), 0) + ), + diff_new_high_risk_functions=_as_int( + diff_summary.get("new_high_risk_functions"), + 0, + ), + diff_new_high_coupling_classes=_as_int( + diff_summary.get("new_high_coupling_classes"), + 0, + ), + diff_new_cycles=_as_int(diff_summary.get("new_cycles"), 0), + diff_new_dead_code=_as_int(diff_summary.get("new_dead_code"), 0), + diff_health_delta=_as_int(diff_summary.get("health_delta"), 0), + diff_typing_param_permille_delta=( + _as_int(diff_summary.get("typing_param_permille_delta"), 0) + if prefer_diff_summary + else _as_int(coverage_adoption_summary.get("param_delta"), 0) + ), + diff_typing_return_permille_delta=( + _as_int(diff_summary.get("typing_return_permille_delta"), 0) + if prefer_diff_summary + else _as_int(coverage_adoption_summary.get("return_delta"), 0) + ), + diff_docstring_permille_delta=( + _as_int(diff_summary.get("docstring_permille_delta"), 0) + if prefer_diff_summary + else _as_int(coverage_adoption_summary.get("docstring_delta"), 0) + ), + ) + + +def _permille(numerator: int, denominator: int) -> int: + if denominator <= 0: + return 0 + return round(numerator * 1000 / denominator) + + +__all__ = [ + "GateResult", + "GateState", + "MetricGateConfig", + "evaluate_gate_state", + "evaluate_gates", + "gate_state_from_project_metrics", + "metric_gate_reasons", + "metric_gate_reasons_for_state", + "summarize_metrics_diff", +] diff --git a/codeclone/_cli_gating.py b/codeclone/report/gates/reasons.py similarity index 100% rename from codeclone/_cli_gating.py rename to codeclone/report/gates/reasons.py diff --git a/codeclone/html_report.py b/codeclone/report/html/__init__.py similarity index 66% rename from codeclone/html_report.py rename to codeclone/report/html/__init__.py index 16ceab5..d463eb0 100644 --- a/codeclone/html_report.py +++ b/codeclone/report/html/__init__.py @@ -4,16 +4,12 @@ # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy -"""Public facade for HTML report generation. - -Re-exports build_html_report from the new _html_report package and -keeps backward-compatible imports that tests and downstream code rely on. -""" +"""Canonical HTML report package.""" from __future__ import annotations -from ._html_report import build_html_report -from ._html_snippets import ( +from .assemble import build_html_report +from .widgets.snippets import ( _FileCache, _pygments_css, _render_code_block, diff --git a/codeclone/_html_report/_context.py b/codeclone/report/html/_context.py similarity index 97% rename from codeclone/_html_report/_context.py rename to codeclone/report/html/_context.py index efac981..c851377 100644 --- a/codeclone/_html_report/_context.py +++ b/codeclone/report/html/_context.py @@ -12,19 +12,19 @@ from dataclasses import dataclass from typing import TYPE_CHECKING -from .._coerce import as_mapping as _as_mapping -from ..contracts import REPORT_SCHEMA_VERSION -from ..report.overview import build_report_overview, materialize_report_overview +from ...contracts import REPORT_SCHEMA_VERSION +from ...utils.coerce import as_mapping as _as_mapping +from ..overview import build_report_overview, materialize_report_overview if TYPE_CHECKING: - from .._html_snippets import _FileCache - from ..models import ( + from ...models import ( GroupItemLike, GroupMapLike, MetricsDiff, StructuralFindingGroup, Suggestion, ) + from .widgets.snippets import _FileCache @dataclass(frozen=True, slots=True) @@ -166,7 +166,7 @@ def build_context( max_snippet_lines: int = 220, ) -> ReportContext: """Build a ReportContext from raw build_html_report parameters.""" - from .._html_escape import _escape_html + from .primitives.escape import _escape_html meta = dict(report_meta or {}) baseline_meta = _as_mapping(meta.get("baseline")) diff --git a/codeclone/_html_report/_assemble.py b/codeclone/report/html/assemble.py similarity index 93% rename from codeclone/_html_report/_assemble.py rename to codeclone/report/html/assemble.py index 13f4964..860d852 100644 --- a/codeclone/_html_report/_assemble.py +++ b/codeclone/report/html/assemble.py @@ -11,28 +11,29 @@ from collections.abc import Collection, Mapping, Sequence from typing import TYPE_CHECKING -from .. import __version__, _coerce -from .._html_css import build_css -from .._html_escape import _escape_html -from .._html_js import build_js -from .._html_snippets import _FileCache, _pygments_css -from ..contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL -from ..domain.quality import CONFIDENCE_HIGH -from ..structural_findings import normalize_structural_findings -from ..templates import FONT_CSS_URL, REPORT_TEMPLATE +from ... import __version__ +from ...contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL +from ...domain.quality import CONFIDENCE_HIGH +from ...findings.structural import normalize_structural_findings +from ...templates import FONT_CSS_URL, REPORT_TEMPLATE +from ...utils import coerce as _coerce from ._context import _meta_pick, build_context -from ._icons import BRAND_LOGO, ICONS, section_icon_html -from ._sections._clones import render_clones_panel -from ._sections._coupling import render_quality_panel -from ._sections._dead_code import render_dead_code_panel -from ._sections._dependencies import render_dependencies_panel -from ._sections._meta import build_topbar_provenance_summary, render_meta_panel -from ._sections._overview import render_overview_panel -from ._sections._structural import render_structural_panel -from ._sections._suggestions import render_suggestions_panel +from .assets.css import build_css +from .assets.js import build_js +from .primitives.escape import _escape_html +from .sections._clones import render_clones_panel +from .sections._coupling import render_quality_panel +from .sections._dead_code import render_dead_code_panel +from .sections._dependencies import render_dependencies_panel +from .sections._meta import build_topbar_provenance_summary, render_meta_panel +from .sections._overview import render_overview_panel +from .sections._structural import render_structural_panel +from .sections._suggestions import render_suggestions_panel +from .widgets.icons import BRAND_LOGO, ICONS, section_icon_html +from .widgets.snippets import _FileCache, _pygments_css if TYPE_CHECKING: - from ..models import GroupMapLike, MetricsDiff, StructuralFindingGroup, Suggestion + from ...models import GroupMapLike, MetricsDiff, StructuralFindingGroup, Suggestion def build_html_report( diff --git a/codeclone/_html_report/_sections/__init__.py b/codeclone/report/html/assets/__init__.py similarity index 100% rename from codeclone/_html_report/_sections/__init__.py rename to codeclone/report/html/assets/__init__.py diff --git a/codeclone/_html_css.py b/codeclone/report/html/assets/css.py similarity index 99% rename from codeclone/_html_css.py rename to codeclone/report/html/assets/css.py index 66a4609..0db810f 100644 --- a/codeclone/_html_css.py +++ b/codeclone/report/html/assets/css.py @@ -1129,7 +1129,6 @@ .prov-copy-btn svg{width:12px;height:12px} """ - # --------------------------------------------------------------------------- # Shared micro-interactions # --------------------------------------------------------------------------- @@ -1414,7 +1413,6 @@ font-variant-numeric:tabular-nums;opacity:.85} """ - # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- diff --git a/codeclone/_html_js.py b/codeclone/report/html/assets/js.py similarity index 100% rename from codeclone/_html_js.py rename to codeclone/report/html/assets/js.py diff --git a/codeclone/report/html/primitives/__init__.py b/codeclone/report/html/primitives/__init__.py new file mode 100644 index 0000000..9135843 --- /dev/null +++ b/codeclone/report/html/primitives/__init__.py @@ -0,0 +1,5 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy diff --git a/codeclone/_html_data_attrs.py b/codeclone/report/html/primitives/data_attrs.py similarity index 96% rename from codeclone/_html_data_attrs.py rename to codeclone/report/html/primitives/data_attrs.py index d4e94f3..3c942a1 100644 --- a/codeclone/_html_data_attrs.py +++ b/codeclone/report/html/primitives/data_attrs.py @@ -8,7 +8,7 @@ from __future__ import annotations -from ._html_escape import _escape_html +from .escape import _escape_html __all__ = ["_build_data_attrs"] diff --git a/codeclone/_html_escape.py b/codeclone/report/html/primitives/escape.py similarity index 100% rename from codeclone/_html_escape.py rename to codeclone/report/html/primitives/escape.py diff --git a/codeclone/_html_filters.py b/codeclone/report/html/primitives/filters.py similarity index 97% rename from codeclone/_html_filters.py rename to codeclone/report/html/primitives/filters.py index e700fad..f578b16 100644 --- a/codeclone/_html_filters.py +++ b/codeclone/report/html/primitives/filters.py @@ -10,7 +10,7 @@ from collections.abc import Sequence -from ._html_escape import _escape_html +from .escape import _escape_html __all__ = [ "CLONE_TYPE_OPTIONS", diff --git a/codeclone/report/html/sections/__init__.py b/codeclone/report/html/sections/__init__.py new file mode 100644 index 0000000..9135843 --- /dev/null +++ b/codeclone/report/html/sections/__init__.py @@ -0,0 +1,5 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy diff --git a/codeclone/_html_report/_sections/_clones.py b/codeclone/report/html/sections/_clones.py similarity index 97% rename from codeclone/_html_report/_sections/_clones.py rename to codeclone/report/html/sections/_clones.py index 65ab657..b18624f 100644 --- a/codeclone/_html_report/_sections/_clones.py +++ b/codeclone/report/html/sections/_clones.py @@ -11,29 +11,31 @@ from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Literal -from ... import _coerce -from ..._html_badges import _micro_badges, _source_kind_badge_html, _stat_card -from ..._html_data_attrs import _build_data_attrs -from ..._html_escape import _escape_html -from ..._html_filters import CLONE_TYPE_OPTIONS, SPREAD_OPTIONS, _render_select -from ..._html_snippets import _render_code_block -from ...report._source_kinds import SOURCE_KIND_FILTER_VALUES -from ...report.derived import ( +from codeclone.findings.ids import clone_group_id +from codeclone.utils import coerce as _coerce + +from ..._source_kinds import SOURCE_KIND_FILTER_VALUES +from ...derived import ( combine_source_kinds, group_spread, report_location_from_group_item, ) -from ...report.explain_contract import format_group_instance_compare_meta -from ...report.json_contract import clone_group_id -from ...report.suggestions import classify_clone_type -from .._components import Tone, insight_block -from .._glossary import glossary_tip -from .._icons import ICONS -from .._tables import render_rows_table -from .._tabs import render_split_tabs +from ...explain_contract import format_group_instance_compare_meta +from ...suggestions import classify_clone_type +from ..primitives.data_attrs import _build_data_attrs +from ..primitives.escape import _escape_html +from ..primitives.filters import CLONE_TYPE_OPTIONS, SPREAD_OPTIONS, _render_select +from ..widgets.badges import _micro_badges, _source_kind_badge_html, _stat_card +from ..widgets.components import Tone, insight_block +from ..widgets.glossary import glossary_tip +from ..widgets.icons import ICONS +from ..widgets.snippets import _render_code_block +from ..widgets.tables import render_rows_table +from ..widgets.tabs import render_split_tabs if TYPE_CHECKING: - from ...models import GroupItemLike + from codeclone.models import GroupItemLike + from .._context import ReportContext _as_int = _coerce.as_int diff --git a/codeclone/_html_report/_sections/_coupling.py b/codeclone/report/html/sections/_coupling.py similarity index 97% rename from codeclone/_html_report/_sections/_coupling.py rename to codeclone/report/html/sections/_coupling.py index 08fdf22..6386a75 100644 --- a/codeclone/_html_report/_sections/_coupling.py +++ b/codeclone/report/html/sections/_coupling.py @@ -10,12 +10,13 @@ from typing import TYPE_CHECKING -from ... import _coerce -from ..._html_badges import _micro_badges, _render_chain_flow, _stat_card -from .._components import Tone, insight_block -from .._glossary import glossary_tip -from .._tables import render_rows_table -from .._tabs import render_split_tabs +from codeclone.utils import coerce as _coerce + +from ..widgets.badges import _micro_badges, _render_chain_flow, _stat_card +from ..widgets.components import Tone, insight_block +from ..widgets.glossary import glossary_tip +from ..widgets.tables import render_rows_table +from ..widgets.tabs import render_split_tabs from ._coverage_join import ( coverage_join_quality_count, coverage_join_quality_summary, diff --git a/codeclone/_html_report/_sections/_coverage_join.py b/codeclone/report/html/sections/_coverage_join.py similarity index 96% rename from codeclone/_html_report/_sections/_coverage_join.py rename to codeclone/report/html/sections/_coverage_join.py index 5268d50..b23d105 100644 --- a/codeclone/_html_report/_sections/_coverage_join.py +++ b/codeclone/report/html/sections/_coverage_join.py @@ -11,11 +11,12 @@ from pathlib import Path from typing import TYPE_CHECKING -from ... import _coerce -from ..._html_badges import _micro_badges, _stat_card, _tab_empty_info -from ..._html_escape import _escape_html -from .._glossary import glossary_tip -from .._tables import render_rows_table +from codeclone.utils import coerce as _coerce + +from ..primitives.escape import _escape_html +from ..widgets.badges import _micro_badges, _stat_card, _tab_empty_info +from ..widgets.glossary import glossary_tip +from ..widgets.tables import render_rows_table if TYPE_CHECKING: from collections.abc import Mapping diff --git a/codeclone/_html_report/_sections/_dead_code.py b/codeclone/report/html/sections/_dead_code.py similarity index 94% rename from codeclone/_html_report/_sections/_dead_code.py rename to codeclone/report/html/sections/_dead_code.py index eaa5bd2..ffdad1d 100644 --- a/codeclone/_html_report/_sections/_dead_code.py +++ b/codeclone/report/html/sections/_dead_code.py @@ -10,12 +10,13 @@ from typing import TYPE_CHECKING -from ... import _coerce -from ..._html_badges import _micro_badges, _stat_card -from .._components import Tone, insight_block -from .._glossary import glossary_tip -from .._tables import render_rows_table -from .._tabs import render_split_tabs +from codeclone.utils import coerce as _coerce + +from ..widgets.badges import _micro_badges, _stat_card +from ..widgets.components import Tone, insight_block +from ..widgets.glossary import glossary_tip +from ..widgets.tables import render_rows_table +from ..widgets.tabs import render_split_tabs if TYPE_CHECKING: from collections.abc import Mapping diff --git a/codeclone/_html_report/_sections/_dependencies.py b/codeclone/report/html/sections/_dependencies.py similarity index 98% rename from codeclone/_html_report/_sections/_dependencies.py rename to codeclone/report/html/sections/_dependencies.py index b0df4af..e667358 100644 --- a/codeclone/_html_report/_sections/_dependencies.py +++ b/codeclone/report/html/sections/_dependencies.py @@ -12,18 +12,19 @@ from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING -from ... import _coerce -from ..._html_badges import ( +from codeclone.utils import coerce as _coerce + +from ..primitives.escape import _escape_html +from ..widgets.badges import ( _micro_badges, _render_chain_flow, _short_label, _stat_card, _tab_empty, ) -from ..._html_escape import _escape_html -from .._components import Tone, insight_block -from .._glossary import glossary_tip -from .._tables import render_rows_table +from ..widgets.components import Tone, insight_block +from ..widgets.glossary import glossary_tip +from ..widgets.tables import render_rows_table if TYPE_CHECKING: from .._context import ReportContext diff --git a/codeclone/_html_report/_sections/_meta.py b/codeclone/report/html/sections/_meta.py similarity index 98% rename from codeclone/_html_report/_sections/_meta.py rename to codeclone/report/html/sections/_meta.py index 2704446..e66cb4e 100644 --- a/codeclone/_html_report/_sections/_meta.py +++ b/codeclone/report/html/sections/_meta.py @@ -10,11 +10,13 @@ from typing import TYPE_CHECKING -from ... import __version__, _coerce -from ..._html_data_attrs import _build_data_attrs -from ..._html_escape import _escape_html, _meta_display +from codeclone import __version__ +from codeclone.utils import coerce as _coerce + from .._context import _meta_pick -from .._glossary import glossary_tip +from ..primitives.data_attrs import _build_data_attrs +from ..primitives.escape import _escape_html, _meta_display +from ..widgets.glossary import glossary_tip if TYPE_CHECKING: from .._context import ReportContext diff --git a/codeclone/_html_report/_sections/_overview.py b/codeclone/report/html/sections/_overview.py similarity index 99% rename from codeclone/_html_report/_sections/_overview.py rename to codeclone/report/html/sections/_overview.py index 7afdf6c..c9ac47b 100644 --- a/codeclone/_html_report/_sections/_overview.py +++ b/codeclone/report/html/sections/_overview.py @@ -12,22 +12,23 @@ from collections.abc import Mapping from typing import TYPE_CHECKING -from ... import _coerce -from ..._html_badges import ( +from codeclone.utils import coerce as _coerce + +from ..primitives.escape import _escape_html +from ..widgets.badges import ( _inline_empty, _micro_badges, _source_kind_badge_html, _stat_card, ) -from ..._html_escape import _escape_html -from .._components import ( +from ..widgets.components import ( Tone, insight_block, overview_cluster_header, overview_source_breakdown_html, overview_summary_item_html, ) -from .._glossary import glossary_tip +from ..widgets.glossary import glossary_tip if TYPE_CHECKING: from .._context import ReportContext diff --git a/codeclone/_html_report/_sections/_structural.py b/codeclone/report/html/sections/_structural.py similarity index 96% rename from codeclone/_html_report/_sections/_structural.py rename to codeclone/report/html/sections/_structural.py index d86428d..9f194b1 100644 --- a/codeclone/_html_report/_sections/_structural.py +++ b/codeclone/report/html/sections/_structural.py @@ -10,35 +10,37 @@ from typing import TYPE_CHECKING -from ..._html_badges import _source_kind_badge_html, _tab_empty -from ..._html_escape import _escape_html -from ..._html_snippets import _FileCache, _render_code_block -from ...domain.findings import ( +from codeclone.domain.findings import ( STRUCTURAL_KIND_CLONE_COHORT_DRIFT, STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, STRUCTURAL_KIND_DUPLICATED_BRANCHES, ) -from ...domain.quality import RISK_HIGH, RISK_LOW -from ...report._source_kinds import SOURCE_KIND_FILTER_VALUES, source_kind_label -from ...report.derived import ( +from codeclone.domain.quality import RISK_HIGH, RISK_LOW +from codeclone.findings.ids import structural_group_id +from codeclone.findings.structural import normalize_structural_findings + +from ..._source_kinds import SOURCE_KIND_FILTER_VALUES, source_kind_label +from ...derived import ( combine_source_kinds, group_spread, relative_report_path, report_location_from_structural_occurrence, ) -from ...report.findings import _dedupe_items, _finding_scope_text, _spread -from ...report.json_contract import structural_group_id -from ...report.suggestions import ( +from ...findings import _dedupe_items, _finding_scope_text, _spread +from ...suggestions import ( structural_action_steps, structural_has_separate_suggestion, ) -from ...structural_findings import normalize_structural_findings -from .._tabs import render_split_tabs +from ..primitives.escape import _escape_html +from ..widgets.badges import _source_kind_badge_html, _tab_empty +from ..widgets.snippets import _FileCache, _render_code_block +from ..widgets.tabs import render_split_tabs if TYPE_CHECKING: from collections.abc import Sequence - from ...models import StructuralFindingGroup, StructuralFindingOccurrence + from codeclone.models import StructuralFindingGroup, StructuralFindingOccurrence + from .._context import ReportContext __all__ = [ @@ -46,7 +48,6 @@ "render_structural_panel", ] - _KIND_LABEL: dict[str, str] = { STRUCTURAL_KIND_DUPLICATED_BRANCHES: "Duplicated branches", STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: "Clone guard/exit divergence", diff --git a/codeclone/_html_report/_sections/_suggestions.py b/codeclone/report/html/sections/_suggestions.py similarity index 95% rename from codeclone/_html_report/_sections/_suggestions.py rename to codeclone/report/html/sections/_suggestions.py index b0d8d04..f5ad11b 100644 --- a/codeclone/_html_report/_sections/_suggestions.py +++ b/codeclone/report/html/sections/_suggestions.py @@ -11,12 +11,7 @@ from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING -from ... import _coerce -from ..._html_badges import _micro_badges, _stat_card, _tab_empty -from ..._html_data_attrs import _build_data_attrs -from ..._html_escape import _escape_html -from ..._html_filters import SPREAD_OPTIONS, _render_select -from ...domain.findings import ( +from codeclone.domain.findings import ( CATEGORY_CLONE, CATEGORY_COHESION, CATEGORY_COMPLEXITY, @@ -28,13 +23,20 @@ FAMILY_METRICS, FAMILY_STRUCTURAL, ) -from ...domain.quality import SEVERITY_CRITICAL, SEVERITY_INFO, SEVERITY_WARNING -from ...report._source_kinds import SOURCE_KIND_FILTER_VALUES, source_kind_label -from .._components import insight_block -from .._glossary import glossary_tip +from codeclone.domain.quality import SEVERITY_CRITICAL, SEVERITY_INFO, SEVERITY_WARNING +from codeclone.utils import coerce as _coerce + +from ..._source_kinds import SOURCE_KIND_FILTER_VALUES, source_kind_label +from ..primitives.data_attrs import _build_data_attrs +from ..primitives.escape import _escape_html +from ..primitives.filters import SPREAD_OPTIONS, _render_select +from ..widgets.badges import _micro_badges, _stat_card, _tab_empty +from ..widgets.components import insight_block +from ..widgets.glossary import glossary_tip if TYPE_CHECKING: - from ...models import Suggestion + from codeclone.models import Suggestion + from .._context import ReportContext _as_int = _coerce.as_int diff --git a/codeclone/report/html/widgets/__init__.py b/codeclone/report/html/widgets/__init__.py new file mode 100644 index 0000000..9135843 --- /dev/null +++ b/codeclone/report/html/widgets/__init__.py @@ -0,0 +1,5 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy diff --git a/codeclone/_html_badges.py b/codeclone/report/html/widgets/badges.py similarity index 98% rename from codeclone/_html_badges.py rename to codeclone/report/html/widgets/badges.py index 716d1ad..ba77e0a 100644 --- a/codeclone/_html_badges.py +++ b/codeclone/report/html/widgets/badges.py @@ -18,8 +18,7 @@ from collections.abc import Callable, Sequence -from ._html_escape import _escape_html -from .domain.quality import ( +from codeclone.domain.quality import ( EFFORT_EASY, EFFORT_HARD, EFFORT_MODERATE, @@ -30,7 +29,9 @@ SEVERITY_INFO, SEVERITY_WARNING, ) -from .report._source_kinds import normalize_source_kind, source_kind_label + +from ..._source_kinds import normalize_source_kind, source_kind_label +from ..primitives.escape import _escape_html __all__ = [ "CHECK_CIRCLE_SVG", diff --git a/codeclone/_html_report/_components.py b/codeclone/report/html/widgets/components.py similarity index 95% rename from codeclone/_html_report/_components.py rename to codeclone/report/html/widgets/components.py index 7a9fcae..e970332 100644 --- a/codeclone/_html_report/_components.py +++ b/codeclone/report/html/widgets/components.py @@ -11,10 +11,11 @@ from collections.abc import Mapping from typing import Literal -from .._coerce import as_int as _as_int -from .._html_badges import _inline_empty, _source_kind_badge_html -from .._html_escape import _escape_html -from ._icons import section_icon_html +from codeclone.utils.coerce import as_int as _as_int + +from ..primitives.escape import _escape_html +from .badges import _inline_empty, _source_kind_badge_html +from .icons import section_icon_html Tone = Literal["ok", "warn", "risk", "info"] diff --git a/codeclone/_html_report/_glossary.py b/codeclone/report/html/widgets/glossary.py similarity index 99% rename from codeclone/_html_report/_glossary.py rename to codeclone/report/html/widgets/glossary.py index e48d4f0..5b30e32 100644 --- a/codeclone/_html_report/_glossary.py +++ b/codeclone/report/html/widgets/glossary.py @@ -8,7 +8,7 @@ from __future__ import annotations -from .._html_escape import _escape_html +from ..primitives.escape import _escape_html GLOSSARY: dict[str, str] = { # Complexity diff --git a/codeclone/_html_report/_icons.py b/codeclone/report/html/widgets/icons.py similarity index 100% rename from codeclone/_html_report/_icons.py rename to codeclone/report/html/widgets/icons.py diff --git a/codeclone/_html_snippets.py b/codeclone/report/html/widgets/snippets.py similarity index 99% rename from codeclone/_html_snippets.py rename to codeclone/report/html/widgets/snippets.py index dac7eec..49afecc 100644 --- a/codeclone/_html_snippets.py +++ b/codeclone/report/html/widgets/snippets.py @@ -12,7 +12,7 @@ from functools import lru_cache from typing import TYPE_CHECKING, NamedTuple, cast -from .errors import FileProcessingError +from ....contracts.errors import FileProcessingError if TYPE_CHECKING: from types import ModuleType diff --git a/codeclone/_html_report/_tables.py b/codeclone/report/html/widgets/tables.py similarity index 95% rename from codeclone/_html_report/_tables.py rename to codeclone/report/html/widgets/tables.py index 7f633f2..59cbf0e 100644 --- a/codeclone/_html_report/_tables.py +++ b/codeclone/report/html/widgets/tables.py @@ -11,12 +11,12 @@ from collections.abc import Collection, Sequence from typing import TYPE_CHECKING -from .._html_badges import _quality_badge_html, _tab_empty -from .._html_escape import _escape_html -from ._glossary import glossary_tip +from ..primitives.escape import _escape_html +from .badges import _quality_badge_html, _tab_empty +from .glossary import glossary_tip if TYPE_CHECKING: - from ._context import ReportContext + from .._context import ReportContext _RISK_HEADERS = {"risk", "confidence", "severity", "effort"} _PATH_HEADERS = {"file", "location"} diff --git a/codeclone/_html_report/_tabs.py b/codeclone/report/html/widgets/tabs.py similarity index 97% rename from codeclone/_html_report/_tabs.py rename to codeclone/report/html/widgets/tabs.py index 8ce1e43..5b708e3 100644 --- a/codeclone/_html_report/_tabs.py +++ b/codeclone/report/html/widgets/tabs.py @@ -10,7 +10,7 @@ from collections.abc import Sequence -from .._html_escape import _escape_html +from ..primitives.escape import _escape_html def render_split_tabs( diff --git a/codeclone/report/json_contract.py b/codeclone/report/json_contract.py index decfeb6..0529ae7 100644 --- a/codeclone/report/json_contract.py +++ b/codeclone/report/json_contract.py @@ -6,402 +6,40 @@ from __future__ import annotations -from collections import Counter -from collections.abc import Collection, Iterable, Mapping, Sequence -from hashlib import sha256 -from typing import TYPE_CHECKING, Literal - -import orjson - -from .._coerce import as_float as _as_float -from .._coerce import as_int as _as_int -from .._coerce import as_mapping as _as_mapping -from .._coerce import as_sequence as _as_sequence -from ..contracts import ( - DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, - DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, - DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - REPORT_SCHEMA_VERSION, -) -from ..domain.findings import ( - CATEGORY_COHESION, - CATEGORY_COMPLEXITY, - CATEGORY_COUPLING, - CATEGORY_COVERAGE, - CATEGORY_DEAD_CODE, - CATEGORY_DEPENDENCY, - CLONE_KIND_BLOCK, - CLONE_KIND_FUNCTION, - CLONE_KIND_SEGMENT, - CLONE_NOVELTY_KNOWN, - CLONE_NOVELTY_NEW, - FAMILY_CLONE, - FAMILY_CLONES, - FAMILY_DEAD_CODE, - FAMILY_DESIGN, - FAMILY_STRUCTURAL, - FINDING_KIND_COVERAGE_HOTSPOT, - FINDING_KIND_COVERAGE_SCOPE_GAP, -) -from ..domain.quality import ( - CONFIDENCE_HIGH, - CONFIDENCE_MEDIUM, - EFFORT_EASY, - EFFORT_HARD, - EFFORT_MODERATE, - EFFORT_WEIGHT, - RISK_LOW, - SEVERITY_CRITICAL, - SEVERITY_INFO, - SEVERITY_ORDER, - SEVERITY_RANK, - SEVERITY_WARNING, -) -from ..domain.source_scope import ( - IMPACT_SCOPE_MIXED, - IMPACT_SCOPE_NON_RUNTIME, - IMPACT_SCOPE_RUNTIME, - SOURCE_KIND_FIXTURES, - SOURCE_KIND_MIXED, - SOURCE_KIND_OTHER, - SOURCE_KIND_PRODUCTION, - SOURCE_KIND_TESTS, -) -from ..structural_findings import normalize_structural_findings -from ..suppressions import INLINE_CODECLONE_SUPPRESSION_SOURCE -from .derived import ( - group_spread, - relative_report_path, - report_location_from_group_item, - report_location_from_structural_occurrence, -) -from .derived import ( - normalized_source_kind as _normalized_source_kind, -) -from .derived import ( - source_scope_from_counts as _report_source_scope_from_counts, -) -from .derived import ( - source_scope_from_locations as _report_source_scope_from_locations, +from typing import TYPE_CHECKING + +from ..findings.structural.detectors import normalize_structural_findings +from .document import ( + _build_design_groups, + _clone_group_assessment, + _collect_paths_from_metrics, + _combined_impact_scope, + _contract_path, + _count_file_lines, + _count_file_lines_for_path, + _csv_values, + _derive_inventory_code_counts, + _findings_summary, + _is_absolute_path, + _normalize_block_machine_facts, + _normalize_nested_string_rows, + _parse_ratio_percent, + _source_scope_from_filepaths, + _source_scope_from_locations, + _structural_group_assessment, + _suggestion_finding_id, + build_report_document, + clone_group_id, + dead_code_group_id, + design_group_id, + structural_group_id, ) -from .overview import build_directory_hotspots -from .suggestions import classify_clone_type +from .document import _common as _document_common if TYPE_CHECKING: - from ..models import ( - GroupItemLike, - GroupMapLike, - SourceKind, - StructuralFindingGroup, - Suggestion, - SuppressedCloneGroup, - ) - -__all__ = [ - "build_report_document", - "clone_group_id", - "dead_code_group_id", - "design_group_id", - "structural_group_id", -] - -_OVERLOADED_MODULES_FAMILY = "overloaded_modules" -_COVERAGE_ADOPTION_FAMILY = "coverage_adoption" -_API_SURFACE_FAMILY = "api_surface" -_COVERAGE_JOIN_FAMILY = "coverage_join" - - -def _optional_str(value: object) -> str | None: - if value is None: - return None - text = str(value).strip() - return text or None - - -def _coerced_nonnegative_threshold(value: object, *, default: int) -> int: - threshold = _as_int(value, default) - return threshold if threshold >= 0 else default - - -def _design_findings_thresholds_payload( - raw_meta: Mapping[str, object] | None, -) -> dict[str, object]: - meta = dict(raw_meta or {}) - return { - "design_findings": { - CATEGORY_COMPLEXITY: { - "metric": "cyclomatic_complexity", - "operator": ">", - "value": _coerced_nonnegative_threshold( - meta.get("design_complexity_threshold"), - default=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, - ), - }, - CATEGORY_COUPLING: { - "metric": "cbo", - "operator": ">", - "value": _coerced_nonnegative_threshold( - meta.get("design_coupling_threshold"), - default=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - ), - }, - CATEGORY_COHESION: { - "metric": "lcom4", - "operator": ">=", - "value": _coerced_nonnegative_threshold( - meta.get("design_cohesion_threshold"), - default=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, - ), - }, - } - } - - -def _analysis_profile_payload( - raw_meta: Mapping[str, object] | None, -) -> dict[str, int] | None: - meta = dict(raw_meta or {}) - nested = _as_mapping(meta.get("analysis_profile")) - if nested: - meta = dict(nested) - keys = ( - "min_loc", - "min_stmt", - "block_min_loc", - "block_min_stmt", - "segment_min_loc", - "segment_min_stmt", - ) - if any(key not in meta for key in keys): - return None - payload = {key: _as_int(meta.get(key), -1) for key in keys} - if any(value < 0 for value in payload.values()): - return None - return payload - - -def _normalize_path(value: str) -> str: - return value.replace("\\", "/").strip() - - -def _is_absolute_path(value: str) -> bool: - normalized = _normalize_path(value) - if not normalized: - return False - if normalized.startswith("/"): - return True - return len(normalized) > 2 and normalized[1] == ":" and normalized[2] == "/" - - -def _contract_path( - value: object, - *, - scan_root: str, -) -> tuple[str | None, str | None, str | None]: - path_text = _optional_str(value) - if path_text is None: - return None, None, None - normalized_path = _normalize_path(path_text) - relative_path = relative_report_path(normalized_path, scan_root=scan_root) - if relative_path and relative_path != normalized_path: - return relative_path, "in_root", normalized_path - if _is_absolute_path(normalized_path): - return normalized_path.rsplit("/", maxsplit=1)[-1], "external", normalized_path - return normalized_path, "relative", None - - -def _contract_report_location_path(location_path: str, *, scan_root: str) -> str: - contract_path, _scope, _absolute = _contract_path( - location_path, - scan_root=scan_root, - ) - return contract_path or "" - - -def _priority( - severity: str, - effort: str, -) -> float: - severity_rank = SEVERITY_RANK.get(severity, 1) - effort_rank = EFFORT_WEIGHT.get(effort, 1) - return float(severity_rank) / float(effort_rank) - - -def clone_group_id(kind: str, group_key: str) -> str: - return f"clone:{kind}:{group_key}" - - -def structural_group_id(finding_kind: str, finding_key: str) -> str: - return f"structural:{finding_kind}:{finding_key}" - - -def dead_code_group_id(subject_key: str) -> str: - return f"dead_code:{subject_key}" - - -def design_group_id(category: str, subject_key: str) -> str: - return f"design:{category}:{subject_key}" - - -def _clone_novelty( - *, - group_key: str, - baseline_trusted: bool, - new_keys: Collection[str] | None, -) -> str: - if not baseline_trusted: - return CLONE_NOVELTY_NEW - if new_keys is None: - return CLONE_NOVELTY_NEW - return CLONE_NOVELTY_NEW if group_key in new_keys else CLONE_NOVELTY_KNOWN - - -def _item_sort_key(item: Mapping[str, object]) -> tuple[str, int, int, str]: - return ( - str(item.get("relative_path", "")), - _as_int(item.get("start_line")), - _as_int(item.get("end_line")), - str(item.get("qualname", "")), - ) - - -def _parse_bool_text(value: object) -> bool: - text = str(value).strip().lower() - return text in {"1", "true", "yes"} - - -def _parse_ratio_percent(value: object) -> float | None: - text = str(value).strip() - if not text: - return None - if text.endswith("%"): - try: - return float(text[:-1]) / 100.0 - except ValueError: - return None - try: - numeric = float(text) - except ValueError: - return None - return numeric if numeric <= 1.0 else numeric / 100.0 - - -def _normalize_block_machine_facts( - *, - group_key: str, - group_arity: int, - block_facts: Mapping[str, str], -) -> tuple[dict[str, object], dict[str, str]]: - facts: dict[str, object] = { - "group_key": group_key, - "group_arity": group_arity, - } - display_facts: dict[str, str] = {} - for key in sorted(block_facts): - value = str(block_facts[key]) - match key: - case "group_arity": - facts[key] = _as_int(value) - case "block_size" | "consecutive_asserts" | "instance_peer_count": - facts[key] = _as_int(value) - case "merged_regions": - facts[key] = _parse_bool_text(value) - case "assert_ratio": - ratio = _parse_ratio_percent(value) - if ratio is not None: - facts[key] = ratio - display_facts[key] = value - case ( - "match_rule" | "pattern" | "signature_kind" | "hint" | "hint_confidence" - ): - facts[key] = value - case _: - display_facts[key] = value - return facts, display_facts - - -def _source_scope_from_filepaths( - filepaths: Iterable[str], - *, - scan_root: str, -) -> dict[str, object]: - counts: Counter[SourceKind] = Counter() - for filepath in filepaths: - location = report_location_from_group_item( - {"filepath": filepath, "start_line": 0, "end_line": 0, "qualname": ""}, - scan_root=scan_root, - ) - counts[location.source_kind] += 1 - return _source_scope_from_counts(counts) - - -def _source_scope_from_counts( - counts: Mapping[SourceKind, int], -) -> dict[str, object]: - return _report_source_scope_from_counts(counts) - - -def _source_scope_from_locations( - locations: Sequence[Mapping[str, object]], -) -> dict[str, object]: - normalized_locations = [ - {"source_kind": _normalized_source_kind(location.get("source_kind"))} - for location in locations - ] - return _report_source_scope_from_locations(normalized_locations) - + from collections.abc import Mapping, Sequence -def _collect_paths_from_metrics(metrics: Mapping[str, object]) -> set[str]: - paths: set[str] = set() - complexity = _as_mapping(metrics.get(CATEGORY_COMPLEXITY)) - for item in _as_sequence(complexity.get("functions")): - item_map = _as_mapping(item) - filepath = _optional_str(item_map.get("filepath")) - if filepath is not None: - paths.add(filepath) - for family_name in (CATEGORY_COUPLING, CATEGORY_COHESION): - family = _as_mapping(metrics.get(family_name)) - for item in _as_sequence(family.get("classes")): - item_map = _as_mapping(item) - filepath = _optional_str(item_map.get("filepath")) - if filepath is not None: - paths.add(filepath) - dead_code = _as_mapping(metrics.get(FAMILY_DEAD_CODE)) - for item in _as_sequence(dead_code.get("items")): - item_map = _as_mapping(item) - filepath = _optional_str(item_map.get("filepath")) - if filepath is not None: - paths.add(filepath) - for item in _as_sequence(dead_code.get("suppressed_items")): - item_map = _as_mapping(item) - filepath = _optional_str(item_map.get("filepath")) - if filepath is not None: - paths.add(filepath) - overloaded_modules = _as_mapping(metrics.get(_OVERLOADED_MODULES_FAMILY)) - for item in _as_sequence(overloaded_modules.get("items")): - item_map = _as_mapping(item) - filepath = _optional_str(item_map.get("filepath")) - if filepath is not None: - paths.add(filepath) - coverage_adoption = _as_mapping(metrics.get(_COVERAGE_ADOPTION_FAMILY)) - for item in _as_sequence(coverage_adoption.get("items")): - item_map = _as_mapping(item) - filepath = _optional_str(item_map.get("filepath")) - if filepath is not None: - paths.add(filepath) - api_surface = _as_mapping(metrics.get(_API_SURFACE_FAMILY)) - for item in _as_sequence(api_surface.get("items")): - item_map = _as_mapping(item) - filepath = _optional_str(item_map.get("filepath")) - if filepath is not None: - paths.add(filepath) - coverage_join = _as_mapping(metrics.get(_COVERAGE_JOIN_FAMILY)) - for item in _as_sequence(coverage_join.get("items")): - item_map = _as_mapping(item) - filepath = _optional_str(item_map.get("filepath")) - if filepath is not None: - paths.add(filepath) - return paths + from ..models import GroupMapLike, StructuralFindingGroup, SuppressedCloneGroup def _collect_report_file_list( @@ -414,2505 +52,46 @@ def _collect_report_file_list( metrics: Mapping[str, object] | None, structural_findings: Sequence[StructuralFindingGroup] | None, ) -> list[str]: - files: set[str] = set() - inventory_map = _as_mapping(inventory) - for filepath in _as_sequence(inventory_map.get("file_list")): - file_text = _optional_str(filepath) - if file_text is not None: - files.add(file_text) - for groups in (func_groups, block_groups, segment_groups): - for items in groups.values(): - for item in items: - filepath = _optional_str(item.get("filepath")) - if filepath is not None: - files.add(filepath) - for suppressed_group in suppressed_clone_groups or (): - for item in suppressed_group.items: - filepath = _optional_str(item.get("filepath")) - if filepath is not None: - files.add(filepath) - if metrics is not None: - files.update(_collect_paths_from_metrics(metrics)) - if structural_findings: - for structural_group in normalize_structural_findings(structural_findings): - for occurrence in structural_group.items: - filepath = _optional_str(occurrence.file_path) - if filepath is not None: - files.add(filepath) - return sorted(files) - - -def _count_file_lines(filepaths: Sequence[str]) -> int: - total = 0 - for filepath in filepaths: - total += _count_file_lines_for_path(filepath) - return total - - -def _count_file_lines_for_path(filepath: str) -> int: + original = _document_common.normalize_structural_findings + _document_common.normalize_structural_findings = normalize_structural_findings try: - with open(filepath, encoding="utf-8", errors="surrogateescape") as handle: - return sum(1 for _ in handle) - except OSError: - return 0 - - -def _normalize_nested_string_rows(value: object) -> list[list[str]]: - rows: list[tuple[str, ...]] = [] - for row in _as_sequence(value): - modules = tuple( - str(module) for module in _as_sequence(row) if str(module).strip() + return _document_common._collect_report_file_list( + inventory=inventory, + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + suppressed_clone_groups=suppressed_clone_groups, + metrics=metrics, + structural_findings=structural_findings, ) - if modules: - rows.append(modules) - rows.sort(key=lambda row: (len(row), row)) - return [list(row) for row in rows] - - -def _normalize_metrics_families( - metrics: Mapping[str, object] | None, - *, - scan_root: str, -) -> dict[str, object]: - metrics_map = _as_mapping(metrics) - complexity = _as_mapping(metrics_map.get(CATEGORY_COMPLEXITY)) - complexity_items = sorted( - ( - { - "qualname": str(item_map.get("qualname", "")), - "relative_path": _contract_path( - item_map.get("filepath", ""), - scan_root=scan_root, - )[0] - or "", - "start_line": _as_int(item_map.get("start_line")), - "end_line": _as_int(item_map.get("end_line")), - "cyclomatic_complexity": _as_int( - item_map.get("cyclomatic_complexity"), - 1, - ), - "nesting_depth": _as_int(item_map.get("nesting_depth")), - "risk": str(item_map.get("risk", RISK_LOW)), - } - for item in _as_sequence(complexity.get("functions")) - for item_map in (_as_mapping(item),) - ), - key=lambda item: ( - item["relative_path"], - item["start_line"], - item["end_line"], - item["qualname"], - ), - ) - - coupling = _as_mapping(metrics_map.get(CATEGORY_COUPLING)) - coupling_items = sorted( - ( - { - "qualname": str(item_map.get("qualname", "")), - "relative_path": _contract_path( - item_map.get("filepath", ""), - scan_root=scan_root, - )[0] - or "", - "start_line": _as_int(item_map.get("start_line")), - "end_line": _as_int(item_map.get("end_line")), - "cbo": _as_int(item_map.get("cbo")), - "risk": str(item_map.get("risk", RISK_LOW)), - "coupled_classes": sorted( - { - str(name) - for name in _as_sequence(item_map.get("coupled_classes")) - if str(name).strip() - } - ), - } - for item in _as_sequence(coupling.get("classes")) - for item_map in (_as_mapping(item),) - ), - key=lambda item: ( - item["relative_path"], - item["start_line"], - item["end_line"], - item["qualname"], - ), - ) - - cohesion = _as_mapping(metrics_map.get(CATEGORY_COHESION)) - cohesion_items = sorted( - ( - { - "qualname": str(item_map.get("qualname", "")), - "relative_path": _contract_path( - item_map.get("filepath", ""), - scan_root=scan_root, - )[0] - or "", - "start_line": _as_int(item_map.get("start_line")), - "end_line": _as_int(item_map.get("end_line")), - "lcom4": _as_int(item_map.get("lcom4")), - "risk": str(item_map.get("risk", RISK_LOW)), - "method_count": _as_int(item_map.get("method_count")), - "instance_var_count": _as_int(item_map.get("instance_var_count")), - } - for item in _as_sequence(cohesion.get("classes")) - for item_map in (_as_mapping(item),) - ), - key=lambda item: ( - item["relative_path"], - item["start_line"], - item["end_line"], - item["qualname"], - ), - ) - - dependencies = _as_mapping(metrics_map.get("dependencies")) - dependency_edges = sorted( - ( - { - "source": str(item_map.get("source", "")), - "target": str(item_map.get("target", "")), - "import_type": str(item_map.get("import_type", "")), - "line": _as_int(item_map.get("line")), - } - for item in _as_sequence(dependencies.get("edge_list")) - for item_map in (_as_mapping(item),) - ), - key=lambda item: ( - item["source"], - item["target"], - item["import_type"], - item["line"], - ), - ) - dependency_cycles = _normalize_nested_string_rows(dependencies.get("cycles")) - longest_chains = _normalize_nested_string_rows(dependencies.get("longest_chains")) - - dead_code = _as_mapping(metrics_map.get(FAMILY_DEAD_CODE)) - - def _normalize_suppressed_by( - raw_bindings: object, - ) -> list[dict[str, str]]: - normalized_bindings = sorted( - { - ( - str(binding_map.get("rule", "")).strip(), - str(binding_map.get("source", "")).strip(), - ) - for binding in _as_sequence(raw_bindings) - for binding_map in (_as_mapping(binding),) - if str(binding_map.get("rule", "")).strip() - }, - key=lambda item: (item[0], item[1]), - ) - if not normalized_bindings: - return [] - return [ - { - "rule": rule, - "source": source or INLINE_CODECLONE_SUPPRESSION_SOURCE, - } - for rule, source in normalized_bindings - ] - - dead_items = sorted( - ( - { - "qualname": str(item_map.get("qualname", "")), - "relative_path": _contract_path( - item_map.get("filepath", ""), - scan_root=scan_root, - )[0] - or "", - "start_line": _as_int(item_map.get("start_line")), - "end_line": _as_int(item_map.get("end_line")), - "kind": str(item_map.get("kind", "")), - "confidence": str(item_map.get("confidence", CONFIDENCE_MEDIUM)), - } - for item in _as_sequence(dead_code.get("items")) - for item_map in (_as_mapping(item),) - ), - key=lambda item: ( - item["relative_path"], - item["start_line"], - item["end_line"], - item["qualname"], - item["kind"], - ), - ) - dead_suppressed_items = sorted( - ( - { - "qualname": str(item_map.get("qualname", "")), - "relative_path": _contract_path( - item_map.get("filepath", ""), - scan_root=scan_root, - )[0] - or "", - "start_line": _as_int(item_map.get("start_line")), - "end_line": _as_int(item_map.get("end_line")), - "kind": str(item_map.get("kind", "")), - "confidence": str(item_map.get("confidence", CONFIDENCE_MEDIUM)), - "suppressed_by": _normalize_suppressed_by( - item_map.get("suppressed_by") - ), - } - for item in _as_sequence(dead_code.get("suppressed_items")) - for item_map in (_as_mapping(item),) - ), - key=lambda item: ( - item["relative_path"], - item["start_line"], - item["end_line"], - item["qualname"], - item["kind"], - item["confidence"], - tuple( - ( - str(_as_mapping(binding).get("rule", "")), - str(_as_mapping(binding).get("source", "")), - ) - for binding in _as_sequence(item.get("suppressed_by")) - ), - ), - ) - for item in dead_suppressed_items: - suppressed_by = _as_sequence(item.get("suppressed_by")) - first_binding = _as_mapping(suppressed_by[0]) if suppressed_by else {} - item["suppression_rule"] = str(first_binding.get("rule", "")) - item["suppression_source"] = str(first_binding.get("source", "")) + finally: + _document_common.normalize_structural_findings = original - health = _as_mapping(metrics_map.get("health")) - health_dimensions = { - str(key): _as_int(value) - for key, value in sorted(_as_mapping(health.get("dimensions")).items()) - } - overloaded_modules = _as_mapping(metrics_map.get(_OVERLOADED_MODULES_FAMILY)) - overloaded_modules_detection = _as_mapping(overloaded_modules.get("detection")) - overloaded_module_items = sorted( - ( - { - "module": str(item_map.get("module", "")).strip(), - "relative_path": _contract_path( - item_map.get("filepath", ""), - scan_root=scan_root, - )[0] - or "", - "source_kind": str(item_map.get("source_kind", SOURCE_KIND_OTHER)), - "loc": _as_int(item_map.get("loc")), - "functions": _as_int(item_map.get("functions")), - "methods": _as_int(item_map.get("methods")), - "classes": _as_int(item_map.get("classes")), - "callable_count": _as_int(item_map.get("callable_count")), - "complexity_total": _as_int(item_map.get("complexity_total")), - "complexity_max": _as_int(item_map.get("complexity_max")), - "fan_in": _as_int(item_map.get("fan_in")), - "fan_out": _as_int(item_map.get("fan_out")), - "total_deps": _as_int(item_map.get("total_deps")), - "import_edges": _as_int(item_map.get("import_edges")), - "reimport_edges": _as_int(item_map.get("reimport_edges")), - "reimport_ratio": round( - _as_float(item_map.get("reimport_ratio")), - 4, - ), - "instability": round(_as_float(item_map.get("instability")), 4), - "hub_balance": round(_as_float(item_map.get("hub_balance")), 4), - "size_score": round(_as_float(item_map.get("size_score")), 4), - "dependency_score": round( - _as_float(item_map.get("dependency_score")), - 4, - ), - "shape_score": round(_as_float(item_map.get("shape_score")), 4), - "score": round(_as_float(item_map.get("score")), 4), - "candidate_status": str( - item_map.get("candidate_status", "non_candidate") - ), - "candidate_reasons": [ - str(reason) - for reason in _as_sequence(item_map.get("candidate_reasons")) - if str(reason).strip() - ], - } - for item in _as_sequence(overloaded_modules.get("items")) - for item_map in (_as_mapping(item),) - ), - key=lambda item: ( - {"candidate": 0, "ranked_only": 1, "non_candidate": 2}.get( - str(item["candidate_status"]), - 3, - ), - -_as_float(item["score"]), - -_as_float(item["size_score"]), - -_as_float(item["dependency_score"]), - item["relative_path"], - item["module"], - ), - ) - complexity_summary = _as_mapping(complexity.get("summary")) - coupling_summary = _as_mapping(coupling.get("summary")) - cohesion_summary = _as_mapping(cohesion.get("summary")) - dead_code_summary = _as_mapping(dead_code.get("summary")) - overloaded_modules_summary = _as_mapping(overloaded_modules.get("summary")) - coverage_adoption = _as_mapping(metrics_map.get(_COVERAGE_ADOPTION_FAMILY)) - coverage_adoption_summary = _as_mapping(coverage_adoption.get("summary")) - coverage_adoption_items = sorted( - ( - { - "module": str(item_map.get("module", "")).strip(), - "relative_path": _contract_path( - item_map.get("filepath", ""), - scan_root=scan_root, - )[0] - or "", - "callable_count": _as_int(item_map.get("callable_count")), - "params_total": _as_int(item_map.get("params_total")), - "params_annotated": _as_int(item_map.get("params_annotated")), - "param_permille": _as_int(item_map.get("param_permille")), - "returns_total": _as_int(item_map.get("returns_total")), - "returns_annotated": _as_int(item_map.get("returns_annotated")), - "return_permille": _as_int(item_map.get("return_permille")), - "any_annotation_count": _as_int(item_map.get("any_annotation_count")), - "public_symbol_total": _as_int(item_map.get("public_symbol_total")), - "public_symbol_documented": _as_int( - item_map.get("public_symbol_documented") - ), - "docstring_permille": _as_int(item_map.get("docstring_permille")), - } - for item in _as_sequence(coverage_adoption.get("items")) - for item_map in (_as_mapping(item),) - ), - key=lambda item: ( - item["relative_path"], - item["module"], - ), - ) - api_surface = _as_mapping(metrics_map.get(_API_SURFACE_FAMILY)) - api_surface_summary = _as_mapping(api_surface.get("summary")) - api_surface_items = sorted( - ( - { - "record_kind": str(item_map.get("record_kind", "symbol")), - "module": str(item_map.get("module", "")).strip(), - "relative_path": _contract_path( - item_map.get("filepath", ""), - scan_root=scan_root, - )[0] - or "", - "qualname": str(item_map.get("qualname", "")), - "start_line": _as_int(item_map.get("start_line")), - "end_line": _as_int(item_map.get("end_line")), - "symbol_kind": str(item_map.get("symbol_kind", "")), - "exported_via": _optional_str(item_map.get("exported_via")), - "params_total": _as_int(item_map.get("params_total")), - "params": [ - { - "name": str(param_map.get("name", "")), - "kind": str(param_map.get("kind", "")), - "has_default": bool(param_map.get("has_default")), - "annotated": bool(param_map.get("annotated")), - } - for param in _as_sequence(item_map.get("params")) - for param_map in (_as_mapping(param),) - ], - "returns_annotated": bool(item_map.get("returns_annotated")), - "change_kind": _optional_str(item_map.get("change_kind")), - "detail": _optional_str(item_map.get("detail")), - } - for item in _as_sequence(api_surface.get("items")) - for item_map in (_as_mapping(item),) - ), - key=lambda item: ( - item["relative_path"], - item["start_line"], - item["end_line"], - item["qualname"], - item["record_kind"], - ), - ) - coverage_join = _as_mapping(metrics_map.get(_COVERAGE_JOIN_FAMILY)) - coverage_join_summary = _as_mapping(coverage_join.get("summary")) - coverage_join_items = sorted( - ( - { - "relative_path": _contract_path( - item_map.get("filepath", ""), - scan_root=scan_root, - )[0] - or "", - "qualname": str(item_map.get("qualname", "")).strip(), - "start_line": _as_int(item_map.get("start_line")), - "end_line": _as_int(item_map.get("end_line")), - "cyclomatic_complexity": _as_int( - item_map.get("cyclomatic_complexity"), - 1, - ), - "risk": str(item_map.get("risk", RISK_LOW)).strip() or RISK_LOW, - "executable_lines": _as_int(item_map.get("executable_lines")), - "covered_lines": _as_int(item_map.get("covered_lines")), - "coverage_permille": _as_int(item_map.get("coverage_permille")), - "coverage_status": str(item_map.get("coverage_status", "")).strip(), - "coverage_hotspot": bool(item_map.get("coverage_hotspot")), - "scope_gap_hotspot": bool(item_map.get("scope_gap_hotspot")), - } - for item in _as_sequence(coverage_join.get("items")) - for item_map in (_as_mapping(item),) - ), - key=lambda item: ( - 0 if bool(item["coverage_hotspot"]) else 1, - 0 if bool(item["scope_gap_hotspot"]) else 1, - {"high": 0, "medium": 1, "low": 2}.get(str(item["risk"]), 3), - _as_int(item["coverage_permille"]), - -_as_int(item["cyclomatic_complexity"]), - item["relative_path"], - _as_int(item["start_line"]), - item["qualname"], - ), - ) - dead_high_confidence = sum( - 1 - for item in dead_items - if str(_as_mapping(item).get("confidence", "")).strip().lower() - == CONFIDENCE_HIGH - ) - - normalized: dict[str, object] = { - CATEGORY_COMPLEXITY: { - "summary": { - "total": len(complexity_items), - "average": round(_as_float(complexity_summary.get("average")), 2), - "max": _as_int(complexity_summary.get("max")), - "high_risk": _as_int(complexity_summary.get("high_risk")), - }, - "items": complexity_items, - "items_truncated": False, - }, - CATEGORY_COUPLING: { - "summary": { - "total": len(coupling_items), - "average": round(_as_float(coupling_summary.get("average")), 2), - "max": _as_int(coupling_summary.get("max")), - "high_risk": _as_int(coupling_summary.get("high_risk")), - }, - "items": coupling_items, - "items_truncated": False, - }, - CATEGORY_COHESION: { - "summary": { - "total": len(cohesion_items), - "average": round(_as_float(cohesion_summary.get("average")), 2), - "max": _as_int(cohesion_summary.get("max")), - "low_cohesion": _as_int(cohesion_summary.get("low_cohesion")), - }, - "items": cohesion_items, - "items_truncated": False, - }, - "dependencies": { - "summary": { - "modules": _as_int(dependencies.get("modules")), - "edges": _as_int(dependencies.get("edges")), - "cycles": len(dependency_cycles), - "max_depth": _as_int(dependencies.get("max_depth")), - }, - "items": dependency_edges, - "cycles": dependency_cycles, - "longest_chains": longest_chains, - "items_truncated": False, - }, - FAMILY_DEAD_CODE: { - "summary": { - "total": len(dead_items), - "high_confidence": dead_high_confidence - or _as_int( - dead_code_summary.get( - "high_confidence", dead_code_summary.get("critical") - ) - ), - "suppressed": len(dead_suppressed_items) - or _as_int(dead_code_summary.get("suppressed")), - }, - "items": dead_items, - "suppressed_items": dead_suppressed_items, - "items_truncated": False, - }, - "health": { - "summary": { - "score": _as_int(health.get("score")), - "grade": str(health.get("grade", "")), - "dimensions": health_dimensions, - }, - "items": [], - "items_truncated": False, - }, - _COVERAGE_ADOPTION_FAMILY: { - "summary": { - "modules": len(coverage_adoption_items), - "params_total": _as_int(coverage_adoption_summary.get("params_total")), - "params_annotated": _as_int( - coverage_adoption_summary.get("params_annotated") - ), - "param_permille": _as_int( - coverage_adoption_summary.get("param_permille") - ), - "baseline_diff_available": bool( - coverage_adoption_summary.get("baseline_diff_available") - ), - "param_delta": _as_int(coverage_adoption_summary.get("param_delta")), - "returns_total": _as_int( - coverage_adoption_summary.get("returns_total") - ), - "returns_annotated": _as_int( - coverage_adoption_summary.get("returns_annotated") - ), - "return_permille": _as_int( - coverage_adoption_summary.get("return_permille") - ), - "return_delta": _as_int(coverage_adoption_summary.get("return_delta")), - "public_symbol_total": _as_int( - coverage_adoption_summary.get("public_symbol_total") - ), - "public_symbol_documented": _as_int( - coverage_adoption_summary.get("public_symbol_documented") - ), - "docstring_permille": _as_int( - coverage_adoption_summary.get("docstring_permille") - ), - "docstring_delta": _as_int( - coverage_adoption_summary.get("docstring_delta") - ), - "typing_any_count": _as_int( - coverage_adoption_summary.get("typing_any_count") - ), - }, - "items": coverage_adoption_items, - "items_truncated": False, - }, - _API_SURFACE_FAMILY: { - "summary": { - "enabled": bool(api_surface_summary.get("enabled")), - "baseline_diff_available": bool( - api_surface_summary.get("baseline_diff_available") - ), - "modules": _as_int(api_surface_summary.get("modules")), - "public_symbols": _as_int(api_surface_summary.get("public_symbols")), - "added": _as_int(api_surface_summary.get("added")), - "breaking": _as_int(api_surface_summary.get("breaking")), - "strict_types": bool(api_surface_summary.get("strict_types")), - }, - "items": api_surface_items, - "items_truncated": False, - }, - _OVERLOADED_MODULES_FAMILY: { - "summary": { - "total": len(overloaded_module_items), - "candidates": _as_int(overloaded_modules_summary.get("candidates")), - "population_status": str( - overloaded_modules_summary.get("population_status", "limited") - ), - "top_score": round( - _as_float(overloaded_modules_summary.get("top_score")), - 4, - ), - "average_score": round( - _as_float(overloaded_modules_summary.get("average_score")), - 4, - ), - "candidate_score_cutoff": round( - _as_float(overloaded_modules_summary.get("candidate_score_cutoff")), - 4, - ), - }, - "detection": { - "version": str(overloaded_modules_detection.get("version", "1")), - "scope": str(overloaded_modules_detection.get("scope", "report_only")), - "strategy": str( - overloaded_modules_detection.get( - "strategy", - "project_relative_composite", - ) - ), - "minimum_population": _as_int( - overloaded_modules_detection.get("minimum_population"), - ), - "size_signals": [ - str(signal) - for signal in _as_sequence( - overloaded_modules_detection.get("size_signals") - ) - if str(signal).strip() - ], - "dependency_signals": [ - str(signal) - for signal in _as_sequence( - overloaded_modules_detection.get("dependency_signals") - ) - if str(signal).strip() - ], - "shape_signals": [ - str(signal) - for signal in _as_sequence( - overloaded_modules_detection.get("shape_signals") - ) - if str(signal).strip() - ], - }, - "items": overloaded_module_items, - "items_truncated": False, - }, - } - if coverage_join_summary or coverage_join_items or coverage_join: - normalized[_COVERAGE_JOIN_FAMILY] = { - "summary": { - "status": str(coverage_join_summary.get("status", "")), - "source": _contract_path( - coverage_join_summary.get("source", ""), - scan_root=scan_root, - )[0], - "files": _as_int(coverage_join_summary.get("files")), - "units": _as_int(coverage_join_summary.get("units")), - "measured_units": _as_int(coverage_join_summary.get("measured_units")), - "overall_executable_lines": _as_int( - coverage_join_summary.get("overall_executable_lines") - ), - "overall_covered_lines": _as_int( - coverage_join_summary.get("overall_covered_lines") - ), - "overall_permille": _as_int( - coverage_join_summary.get("overall_permille") - ), - "missing_from_report_units": _as_int( - coverage_join_summary.get("missing_from_report_units") - ), - "coverage_hotspots": _as_int( - coverage_join_summary.get("coverage_hotspots") - ), - "scope_gap_hotspots": _as_int( - coverage_join_summary.get("scope_gap_hotspots") - ), - "hotspot_threshold_percent": _as_int( - coverage_join_summary.get("hotspot_threshold_percent") - ), - "invalid_reason": _optional_str( - coverage_join_summary.get("invalid_reason") - ), - }, - "items": coverage_join_items, - "items_truncated": False, - } - return normalized - - -def _build_metrics_payload( - metrics: Mapping[str, object] | None, - *, - scan_root: str, -) -> dict[str, object]: - families = _normalize_metrics_families(metrics, scan_root=scan_root) - return { - "summary": { - family_name: _as_mapping(_as_mapping(family_payload).get("summary")) - for family_name, family_payload in families.items() - }, - "families": families, - } - - -def _derive_inventory_code_counts( - *, - metrics_payload: Mapping[str, object], - inventory_code: Mapping[str, object], - file_list: Sequence[str], - cached_files: int, -) -> dict[str, object]: - complexity = _as_mapping( - _as_mapping(metrics_payload.get("families")).get(CATEGORY_COMPLEXITY) - ) - cohesion = _as_mapping( - _as_mapping(metrics_payload.get("families")).get(CATEGORY_COHESION) - ) - complexity_items = _as_sequence(complexity.get("items")) - cohesion_items = _as_sequence(cohesion.get("items")) - - exact_entities = bool(complexity_items or cohesion_items) - method_count = sum( - _as_int(_as_mapping(item).get("method_count")) for item in cohesion_items - ) - class_count = len(cohesion_items) - function_total = max(len(complexity_items) - method_count, 0) - - if not exact_entities: - function_total = _as_int(inventory_code.get("functions")) - method_count = _as_int(inventory_code.get("methods")) - class_count = _as_int(inventory_code.get("classes")) - - parsed_lines_raw = inventory_code.get("parsed_lines") - if isinstance(parsed_lines_raw, int) and parsed_lines_raw >= 0: - parsed_lines = parsed_lines_raw - elif cached_files > 0 and file_list: - parsed_lines = _count_file_lines(file_list) - else: - parsed_lines = _as_int(parsed_lines_raw) - - if exact_entities and ((cached_files > 0 and file_list) or parsed_lines > 0): - scope = "analysis_root" - elif cached_files > 0 and file_list: - scope = "mixed" - else: - scope = "current_run" - - return { - "scope": scope, - "parsed_lines": parsed_lines, - "functions": function_total, - "methods": method_count, - "classes": class_count, - } - - -def _build_inventory_payload( - *, - inventory: Mapping[str, object] | None, - file_list: Sequence[str], - metrics_payload: Mapping[str, object], - scan_root: str, -) -> dict[str, object]: - inventory_map = _as_mapping(inventory) - files_map = _as_mapping(inventory_map.get("files")) - code_map = _as_mapping(inventory_map.get("code")) - cached_files = _as_int(files_map.get("cached")) - file_registry = [ - path - for path in ( - _contract_path(filepath, scan_root=scan_root)[0] for filepath in file_list - ) - if path is not None - ] - return { - "files": { - "total_found": _as_int(files_map.get("total_found"), len(file_list)), - "analyzed": _as_int(files_map.get("analyzed")), - "cached": cached_files, - "skipped": _as_int(files_map.get("skipped")), - "source_io_skipped": _as_int(files_map.get("source_io_skipped")), - }, - "code": _derive_inventory_code_counts( - metrics_payload=metrics_payload, - inventory_code=code_map, - file_list=file_list, - cached_files=cached_files, - ), - "file_registry": { - "encoding": "relative_path", - "items": file_registry, - }, - } - - -def _baseline_is_trusted(meta: Mapping[str, object]) -> bool: - baseline = _as_mapping(meta.get("baseline")) - return ( - baseline.get("loaded") is True - and str(baseline.get("status", "")).strip().lower() == "ok" - ) - - -def _build_meta_payload( - raw_meta: Mapping[str, object] | None, - *, - scan_root: str, -) -> dict[str, object]: - meta = dict(raw_meta or {}) - metrics_computed = sorted( - { - str(item) - for item in _as_sequence(meta.get("metrics_computed")) - if str(item).strip() - } - ) - baseline_path, baseline_path_scope, baseline_abs = _contract_path( - meta.get("baseline_path"), - scan_root=scan_root, - ) - cache_path, cache_path_scope, cache_abs = _contract_path( - meta.get("cache_path"), - scan_root=scan_root, - ) - metrics_baseline_path, metrics_baseline_path_scope, metrics_baseline_abs = ( - _contract_path( - meta.get("metrics_baseline_path"), - scan_root=scan_root, - ) - ) - payload: dict[str, object] = { - "codeclone_version": str(meta.get("codeclone_version", "")), - "project_name": str(meta.get("project_name", "")), - "scan_root": ".", - "python_version": str(meta.get("python_version", "")), - "python_tag": str(meta.get("python_tag", "")), - "analysis_mode": str(meta.get("analysis_mode", "full") or "full"), - "report_mode": str(meta.get("report_mode", "full") or "full"), - "computed_metric_families": metrics_computed, - "analysis_thresholds": _design_findings_thresholds_payload(meta), - "baseline": { - "path": baseline_path, - "path_scope": baseline_path_scope, - "loaded": bool(meta.get("baseline_loaded")), - "status": _optional_str(meta.get("baseline_status")), - "fingerprint_version": _optional_str( - meta.get("baseline_fingerprint_version") - ), - "schema_version": _optional_str(meta.get("baseline_schema_version")), - "python_tag": _optional_str(meta.get("baseline_python_tag")), - "generator_name": _optional_str(meta.get("baseline_generator_name")), - "generator_version": _optional_str(meta.get("baseline_generator_version")), - "payload_sha256": _optional_str(meta.get("baseline_payload_sha256")), - "payload_sha256_verified": bool( - meta.get("baseline_payload_sha256_verified") - ), - }, - "cache": { - "path": cache_path, - "path_scope": cache_path_scope, - "used": bool(meta.get("cache_used")), - "status": _optional_str(meta.get("cache_status")), - "schema_version": _optional_str(meta.get("cache_schema_version")), - }, - "metrics_baseline": { - "path": metrics_baseline_path, - "path_scope": metrics_baseline_path_scope, - "loaded": bool(meta.get("metrics_baseline_loaded")), - "status": _optional_str(meta.get("metrics_baseline_status")), - "schema_version": _optional_str( - meta.get("metrics_baseline_schema_version") - ), - "payload_sha256": _optional_str( - meta.get("metrics_baseline_payload_sha256") - ), - "payload_sha256_verified": bool( - meta.get("metrics_baseline_payload_sha256_verified") - ), - }, - "runtime": { - "analysis_started_at_utc": _optional_str( - meta.get("analysis_started_at_utc") - ), - "report_generated_at_utc": _optional_str( - meta.get("report_generated_at_utc") - ), - "scan_root_absolute": _optional_str(meta.get("scan_root")), - "baseline_path_absolute": baseline_abs, - "cache_path_absolute": cache_abs, - "metrics_baseline_path_absolute": metrics_baseline_abs, - }, - } - analysis_profile = _analysis_profile_payload(meta) - if analysis_profile is not None: - payload["analysis_profile"] = analysis_profile - return payload - - -def _clone_group_assessment( - *, - count: int, - clone_type: str, -) -> tuple[str, float]: - match (count >= 4, clone_type in {"Type-1", "Type-2"}): - case (True, _): - severity = SEVERITY_CRITICAL - case (False, True): - severity = SEVERITY_WARNING - case _: - severity = SEVERITY_INFO - effort = "easy" if clone_type in {"Type-1", "Type-2"} else "moderate" - return severity, _priority(severity, effort) - - -def _build_clone_group_facts( - *, - group_key: str, - kind: Literal["function", "block", "segment"], - items: Sequence[GroupItemLike], - block_facts: Mapping[str, Mapping[str, str]], -) -> tuple[dict[str, object], dict[str, str]]: - base: dict[str, object] = { - "group_key": group_key, - "group_arity": len(items), - } - display_facts: dict[str, str] = {} - match kind: - case "function": - loc_buckets = sorted( - { - str(item.get("loc_bucket", "")) - for item in items - if str(item.get("loc_bucket", "")).strip() - } - ) - base["loc_buckets"] = loc_buckets - case "block" if group_key in block_facts: - typed_facts, block_display_facts = _normalize_block_machine_facts( - group_key=group_key, - group_arity=len(items), - block_facts=block_facts[group_key], - ) - base.update(typed_facts) - display_facts.update(block_display_facts) - case _: - pass - return base, display_facts - - -def _clone_item_payload( - item: GroupItemLike, - *, - kind: Literal["function", "block", "segment"], - scan_root: str, -) -> dict[str, object]: - payload: dict[str, object] = { - "relative_path": _contract_report_location_path( - str(item.get("filepath", "")), - scan_root=scan_root, - ), - "qualname": str(item.get("qualname", "")), - "start_line": _as_int(item.get("start_line", 0)), - "end_line": _as_int(item.get("end_line", 0)), - } - match kind: - case "function": - payload.update( - { - "loc": _as_int(item.get("loc", 0)), - "stmt_count": _as_int(item.get("stmt_count", 0)), - "fingerprint": str(item.get("fingerprint", "")), - "loc_bucket": str(item.get("loc_bucket", "")), - "cyclomatic_complexity": _as_int( - item.get("cyclomatic_complexity", 1) - ), - "nesting_depth": _as_int(item.get("nesting_depth", 0)), - "risk": str(item.get("risk", RISK_LOW)), - "raw_hash": str(item.get("raw_hash", "")), - } - ) - case "block": - payload["size"] = _as_int(item.get("size", 0)) - case _: - payload.update( - { - "size": _as_int(item.get("size", 0)), - "segment_hash": str(item.get("segment_hash", "")), - "segment_sig": str(item.get("segment_sig", "")), - } - ) - return payload - - -def _build_clone_groups( - *, - groups: GroupMapLike, - kind: Literal["function", "block", "segment"], - baseline_trusted: bool, - new_keys: Collection[str] | None, - block_facts: Mapping[str, Mapping[str, str]], - scan_root: str, -) -> list[dict[str, object]]: - encoded_groups: list[dict[str, object]] = [] - new_key_set = set(new_keys) if new_keys is not None else None - for group_key in sorted(groups): - items = groups[group_key] - clone_type = classify_clone_type(items=items, kind=kind) - severity, priority = _clone_group_assessment( - count=len(items), - clone_type=clone_type, - ) - novelty = _clone_novelty( - group_key=group_key, - baseline_trusted=baseline_trusted, - new_keys=new_key_set, - ) - locations = tuple( - report_location_from_group_item(item, scan_root=scan_root) for item in items - ) - source_scope = _source_scope_from_locations( - [ - { - "source_kind": location.source_kind, - } - for location in locations - ] - ) - spread_files, spread_functions = group_spread(locations) - rows = sorted( - [ - _clone_item_payload( - item, - kind=kind, - scan_root=scan_root, - ) - for item in items - ], - key=_item_sort_key, - ) - facts, display_facts = _build_clone_group_facts( - group_key=group_key, - kind=kind, - items=items, - block_facts=block_facts, - ) - encoded_groups.append( - { - "id": clone_group_id(kind, group_key), - "family": FAMILY_CLONE, - "category": kind, - "kind": "clone_group", - "severity": severity, - "confidence": CONFIDENCE_HIGH, - "priority": priority, - "clone_kind": kind, - "clone_type": clone_type, - "novelty": novelty, - "count": len(items), - "source_scope": source_scope, - "spread": { - "files": spread_files, - "functions": spread_functions, - }, - "items": rows, - "facts": facts, - **({"display_facts": display_facts} if display_facts else {}), - } - ) - encoded_groups.sort( - key=lambda group: (-_as_int(group.get("count")), str(group["id"])) - ) - return encoded_groups - - -def _build_suppressed_clone_groups( - *, - groups: Sequence[SuppressedCloneGroup] | None, - block_facts: Mapping[str, Mapping[str, str]], - scan_root: str, -) -> dict[str, list[dict[str, object]]]: - buckets: dict[str, list[dict[str, object]]] = { - CLONE_KIND_FUNCTION: [], - CLONE_KIND_BLOCK: [], - CLONE_KIND_SEGMENT: [], - } - for group in groups or (): - items = group.items - clone_type = classify_clone_type(items=items, kind=group.kind) - severity, priority = _clone_group_assessment( - count=len(items), - clone_type=clone_type, - ) - locations = tuple( - report_location_from_group_item(item, scan_root=scan_root) for item in items - ) - source_scope = _source_scope_from_locations( - [ - { - "source_kind": location.source_kind, - } - for location in locations - ] - ) - spread_files, spread_functions = group_spread(locations) - rows = sorted( - [ - _clone_item_payload( - item, - kind=group.kind, - scan_root=scan_root, - ) - for item in items - ], - key=_item_sort_key, - ) - facts, display_facts = _build_clone_group_facts( - group_key=group.group_key, - kind=group.kind, - items=items, - block_facts=block_facts, - ) - encoded: dict[str, object] = { - "id": clone_group_id(group.kind, group.group_key), - "family": FAMILY_CLONE, - "category": group.kind, - "kind": "clone_group", - "severity": severity, - "confidence": CONFIDENCE_HIGH, - "priority": priority, - "clone_kind": group.kind, - "clone_type": clone_type, - "count": len(items), - "source_scope": source_scope, - "spread": { - "files": spread_files, - "functions": spread_functions, - }, - "items": rows, - "facts": facts, - "suppression_rule": group.suppression_rule, - "suppression_source": group.suppression_source, - "matched_patterns": list(group.matched_patterns), - } - if display_facts: - encoded["display_facts"] = display_facts - buckets[group.kind].append(encoded) - for bucket in buckets.values(): - bucket.sort(key=lambda group: (-_as_int(group.get("count")), str(group["id"]))) - return buckets - - -def _structural_group_assessment( - *, - finding_kind: str, - count: int, - spread_functions: int, -) -> tuple[str, float]: - match finding_kind: - case "clone_guard_exit_divergence" | "clone_cohort_drift": - severity = SEVERITY_WARNING - if count >= 3 or spread_functions > 1: - severity = SEVERITY_CRITICAL - return severity, _priority(severity, "moderate") - case _: - severity = ( - SEVERITY_WARNING - if count >= 4 or spread_functions > 1 - else SEVERITY_INFO - ) - return severity, _priority(severity, "moderate") - - -def _csv_values(value: object) -> list[str]: - raw = str(value).strip() - if not raw: - return [] - return sorted({part.strip() for part in raw.split(",") if part.strip()}) - - -def _build_structural_signature( - finding_kind: str, - signature: Mapping[str, str], -) -> dict[str, object]: - debug = {str(key): str(signature[key]) for key in sorted(signature)} - match finding_kind: - case "clone_guard_exit_divergence": - return { - "version": "1", - "stable": { - "family": "clone_guard_exit_divergence", - "cohort_id": str(signature.get("cohort_id", "")), - "majority_guard_count": _as_int( - signature.get("majority_guard_count") - ), - "majority_guard_terminal_profile": str( - signature.get("majority_guard_terminal_profile", "none") - ), - "majority_terminal_kind": str( - signature.get("majority_terminal_kind", "fallthrough") - ), - "majority_side_effect_before_guard": ( - str(signature.get("majority_side_effect_before_guard", "0")) - == "1" - ), - }, - "debug": debug, - } - case "clone_cohort_drift": - return { - "version": "1", - "stable": { - "family": "clone_cohort_drift", - "cohort_id": str(signature.get("cohort_id", "")), - "drift_fields": _csv_values(signature.get("drift_fields")), - "majority_profile": { - "terminal_kind": str( - signature.get("majority_terminal_kind", "") - ), - "guard_exit_profile": str( - signature.get("majority_guard_exit_profile", "") - ), - "try_finally_profile": str( - signature.get("majority_try_finally_profile", "") - ), - "side_effect_order_profile": str( - signature.get("majority_side_effect_order_profile", "") - ), - }, - }, - "debug": debug, - } - case _: - return { - "version": "1", - "stable": { - "family": "duplicated_branches", - "stmt_shape": str(signature.get("stmt_seq", "")), - "terminal_kind": str(signature.get("terminal", "")), - "control_flow": { - "has_loop": str(signature.get("has_loop", "0")) == "1", - "has_try": str(signature.get("has_try", "0")) == "1", - "nested_if": str(signature.get("nested_if", "0")) == "1", - }, - }, - "debug": debug, - } - - -def _build_structural_facts( - finding_kind: str, - signature: Mapping[str, str], - *, - count: int, -) -> dict[str, object]: - match finding_kind: - case "clone_guard_exit_divergence": - return { - "cohort_id": str(signature.get("cohort_id", "")), - "cohort_arity": _as_int(signature.get("cohort_arity")), - "divergent_members": _as_int(signature.get("divergent_members"), count), - "majority_entry_guard_count": _as_int( - signature.get("majority_guard_count"), - ), - "majority_guard_terminal_profile": str( - signature.get("majority_guard_terminal_profile", "none") - ), - "majority_terminal_kind": str( - signature.get("majority_terminal_kind", "fallthrough") - ), - "majority_side_effect_before_guard": ( - str(signature.get("majority_side_effect_before_guard", "0")) == "1" - ), - "guard_count_values": _csv_values(signature.get("guard_count_values")), - "guard_terminal_values": _csv_values( - signature.get("guard_terminal_values"), - ), - "terminal_values": _csv_values(signature.get("terminal_values")), - "side_effect_before_guard_values": _csv_values( - signature.get("side_effect_before_guard_values"), - ), - } - case "clone_cohort_drift": - return { - "cohort_id": str(signature.get("cohort_id", "")), - "cohort_arity": _as_int(signature.get("cohort_arity")), - "divergent_members": _as_int(signature.get("divergent_members"), count), - "drift_fields": _csv_values(signature.get("drift_fields")), - "stable_majority_profile": { - "terminal_kind": str(signature.get("majority_terminal_kind", "")), - "guard_exit_profile": str( - signature.get("majority_guard_exit_profile", "") - ), - "try_finally_profile": str( - signature.get("majority_try_finally_profile", "") - ), - "side_effect_order_profile": str( - signature.get("majority_side_effect_order_profile", "") - ), - }, - } - case _: - return { - "occurrence_count": count, - "non_overlapping": True, - "call_bucket": _as_int(signature.get("calls", "0")), - "raise_bucket": _as_int(signature.get("raises", "0")), - } - - -def _build_structural_groups( - groups: Sequence[StructuralFindingGroup] | None, - *, - scan_root: str, -) -> list[dict[str, object]]: - normalized_groups = normalize_structural_findings(groups or ()) - out: list[dict[str, object]] = [] - for group in normalized_groups: - locations = tuple( - report_location_from_structural_occurrence(item, scan_root=scan_root) - for item in group.items - ) - source_scope = _source_scope_from_locations( - [{"source_kind": location.source_kind} for location in locations] - ) - spread_files, spread_functions = group_spread(locations) - severity, priority = _structural_group_assessment( - finding_kind=group.finding_kind, - count=len(group.items), - spread_functions=spread_functions, - ) - out.append( - { - "id": structural_group_id(group.finding_kind, group.finding_key), - "family": FAMILY_STRUCTURAL, - "category": group.finding_kind, - "kind": group.finding_kind, - "severity": severity, - "confidence": ( - CONFIDENCE_HIGH - if group.finding_kind - in {"clone_guard_exit_divergence", "clone_cohort_drift"} - else CONFIDENCE_MEDIUM - ), - "priority": priority, - "count": len(group.items), - "source_scope": source_scope, - "spread": { - "files": spread_files, - "functions": spread_functions, - }, - "signature": _build_structural_signature( - group.finding_kind, - group.signature, - ), - "items": sorted( - [ - { - "relative_path": _contract_report_location_path( - item.file_path, - scan_root=scan_root, - ), - "qualname": item.qualname, - "start_line": item.start, - "end_line": item.end, - } - for item in group.items - ], - key=_item_sort_key, - ), - "facts": _build_structural_facts( - group.finding_kind, - group.signature, - count=len(group.items), - ), - } - ) - out.sort(key=lambda group: (-_as_int(group.get("count")), str(group["id"]))) - return out - - -def _single_location_source_scope( - filepath: str, - *, - scan_root: str, -) -> dict[str, object]: - location = report_location_from_group_item( - { - "filepath": filepath, - "qualname": "", - "start_line": 0, - "end_line": 0, - }, - scan_root=scan_root, - ) - return _source_scope_from_locations([{"source_kind": location.source_kind}]) - - -def _build_dead_code_groups( - metrics_payload: Mapping[str, object], - *, - scan_root: str, -) -> list[dict[str, object]]: - families = _as_mapping(metrics_payload.get("families")) - dead_code = _as_mapping(families.get(FAMILY_DEAD_CODE)) - groups: list[dict[str, object]] = [] - for item in _as_sequence(dead_code.get("items")): - item_map = _as_mapping(item) - qualname = str(item_map.get("qualname", "")) - filepath = str(item_map.get("relative_path", "")) - confidence = str(item_map.get("confidence", CONFIDENCE_MEDIUM)) - severity = SEVERITY_WARNING if confidence == CONFIDENCE_HIGH else SEVERITY_INFO - groups.append( - { - "id": dead_code_group_id(qualname), - "family": FAMILY_DEAD_CODE, - "category": str(item_map.get("kind", "unknown")), - "kind": "unused_symbol", - "severity": severity, - "confidence": confidence, - "priority": _priority(severity, EFFORT_EASY), - "count": 1, - "source_scope": _single_location_source_scope( - filepath, - scan_root=scan_root, - ), - "spread": {"files": 1, "functions": 1 if qualname else 0}, - "items": [ - { - "relative_path": _contract_report_location_path( - filepath, - scan_root=scan_root, - ), - "qualname": qualname, - "start_line": _as_int(item_map.get("start_line")), - "end_line": _as_int(item_map.get("end_line")), - } - ], - "facts": { - "kind": str(item_map.get("kind", "unknown")), - "confidence": confidence, - }, - } - ) - groups.sort(key=lambda group: (-_as_float(group["priority"]), str(group["id"]))) - return groups - - -def _design_singleton_group( - *, - category: str, - kind: str, - severity: str, - qualname: str, - filepath: str, - start_line: int, - end_line: int, - scan_root: str, - item_data: Mapping[str, object], - facts: Mapping[str, object], -) -> dict[str, object]: - return { - "id": design_group_id(category, qualname), - "family": FAMILY_DESIGN, - "category": category, - "kind": kind, - "severity": severity, - "confidence": CONFIDENCE_HIGH, - "priority": _priority(severity, EFFORT_MODERATE), - "count": 1, - "source_scope": _single_location_source_scope( - filepath, - scan_root=scan_root, - ), - "spread": {"files": 1, "functions": 1}, - "items": [ - { - "relative_path": _contract_report_location_path( - filepath, - scan_root=scan_root, - ), - "qualname": qualname, - "start_line": start_line, - "end_line": end_line, - **item_data, - } - ], - "facts": dict(facts), - } - - -def _complexity_design_group( - item_map: Mapping[str, object], - *, - threshold: int, - scan_root: str, -) -> dict[str, object] | None: - cc = _as_int(item_map.get("cyclomatic_complexity"), 1) - if cc <= threshold: - return None - qualname = str(item_map.get("qualname", "")) - filepath = str(item_map.get("relative_path", "")) - nesting_depth = _as_int(item_map.get("nesting_depth")) - severity = SEVERITY_CRITICAL if cc > 40 else SEVERITY_WARNING - return _design_singleton_group( - category=CATEGORY_COMPLEXITY, - kind="function_hotspot", - severity=severity, - qualname=qualname, - filepath=filepath, - start_line=_as_int(item_map.get("start_line")), - end_line=_as_int(item_map.get("end_line")), - scan_root=scan_root, - item_data={ - "cyclomatic_complexity": cc, - "nesting_depth": nesting_depth, - "risk": str(item_map.get("risk", RISK_LOW)), - }, - facts={ - "cyclomatic_complexity": cc, - "nesting_depth": nesting_depth, - }, - ) - - -def _coupling_design_group( - item_map: Mapping[str, object], - *, - threshold: int, - scan_root: str, -) -> dict[str, object] | None: - cbo = _as_int(item_map.get("cbo")) - if cbo <= threshold: - return None - qualname = str(item_map.get("qualname", "")) - filepath = str(item_map.get("relative_path", "")) - coupled_classes = list(_as_sequence(item_map.get("coupled_classes"))) - return _design_singleton_group( - category=CATEGORY_COUPLING, - kind="class_hotspot", - severity=SEVERITY_WARNING, - qualname=qualname, - filepath=filepath, - start_line=_as_int(item_map.get("start_line")), - end_line=_as_int(item_map.get("end_line")), - scan_root=scan_root, - item_data={ - "cbo": cbo, - "risk": str(item_map.get("risk", RISK_LOW)), - "coupled_classes": coupled_classes, - }, - facts={ - "cbo": cbo, - "coupled_classes": coupled_classes, - }, - ) - - -def _cohesion_design_group( - item_map: Mapping[str, object], - *, - threshold: int, - scan_root: str, -) -> dict[str, object] | None: - lcom4 = _as_int(item_map.get("lcom4")) - if lcom4 < threshold: - return None - qualname = str(item_map.get("qualname", "")) - filepath = str(item_map.get("relative_path", "")) - method_count = _as_int(item_map.get("method_count")) - instance_var_count = _as_int(item_map.get("instance_var_count")) - return _design_singleton_group( - category=CATEGORY_COHESION, - kind="class_hotspot", - severity=SEVERITY_WARNING, - qualname=qualname, - filepath=filepath, - start_line=_as_int(item_map.get("start_line")), - end_line=_as_int(item_map.get("end_line")), - scan_root=scan_root, - item_data={ - "lcom4": lcom4, - "risk": str(item_map.get("risk", RISK_LOW)), - "method_count": method_count, - "instance_var_count": instance_var_count, - }, - facts={ - "lcom4": lcom4, - "method_count": method_count, - "instance_var_count": instance_var_count, - }, - ) - - -def _dependency_design_group( - cycle: object, - *, - scan_root: str, -) -> dict[str, object] | None: - modules = [str(module) for module in _as_sequence(cycle) if str(module).strip()] - if not modules: - return None - cycle_key = " -> ".join(modules) - return { - "id": design_group_id(CATEGORY_DEPENDENCY, cycle_key), - "family": FAMILY_DESIGN, - "category": CATEGORY_DEPENDENCY, - "kind": "cycle", - "severity": SEVERITY_CRITICAL, - "confidence": CONFIDENCE_HIGH, - "priority": _priority(SEVERITY_CRITICAL, EFFORT_HARD), - "count": len(modules), - "source_scope": _source_scope_from_filepaths( - (module.replace(".", "/") + ".py" for module in modules), - scan_root=scan_root, - ), - "spread": {"files": len(modules), "functions": 0}, - "items": [ - { - "module": module, - "relative_path": module.replace(".", "/") + ".py", - "source_kind": report_location_from_group_item( - { - "filepath": module.replace(".", "/") + ".py", - "qualname": "", - "start_line": 0, - "end_line": 0, - } - ).source_kind, - } - for module in modules - ], - "facts": { - "cycle_length": len(modules), - }, - } - - -def _coverage_design_group( - item_map: Mapping[str, object], - *, - threshold_percent: int, - scan_root: str, -) -> dict[str, object] | None: - coverage_hotspot = bool(item_map.get("coverage_hotspot")) - scope_gap_hotspot = bool(item_map.get("scope_gap_hotspot")) - if not coverage_hotspot and not scope_gap_hotspot: - return None - qualname = str(item_map.get("qualname", "")).strip() - filepath = str(item_map.get("relative_path", "")).strip() - if not filepath: - return None - start_line = _as_int(item_map.get("start_line")) - end_line = _as_int(item_map.get("end_line")) - subject_key = qualname or f"{filepath}:{start_line}:{end_line}" - risk = str(item_map.get("risk", RISK_LOW)).strip() or RISK_LOW - coverage_status = str(item_map.get("coverage_status", "")).strip() - coverage_permille = _as_int(item_map.get("coverage_permille")) - covered_lines = _as_int(item_map.get("covered_lines")) - executable_lines = _as_int(item_map.get("executable_lines")) - complexity = _as_int(item_map.get("cyclomatic_complexity"), 1) - severity = SEVERITY_CRITICAL if risk == "high" else SEVERITY_WARNING - if scope_gap_hotspot: - kind = FINDING_KIND_COVERAGE_SCOPE_GAP - detail = "The supplied coverage.xml did not map to this function's file." - else: - kind = FINDING_KIND_COVERAGE_HOTSPOT - detail = "Joined line coverage is below the configured hotspot threshold." - return { - "id": design_group_id(CATEGORY_COVERAGE, subject_key), - "family": FAMILY_DESIGN, - "category": CATEGORY_COVERAGE, - "kind": kind, - "severity": severity, - "confidence": CONFIDENCE_HIGH, - "priority": _priority(severity, EFFORT_MODERATE), - "count": 1, - "source_scope": _single_location_source_scope( - filepath, - scan_root=scan_root, - ), - "spread": {"files": 1, "functions": 1}, - "items": [ - { - "relative_path": filepath, - "qualname": qualname, - "start_line": start_line, - "end_line": end_line, - "risk": risk, - "cyclomatic_complexity": complexity, - "coverage_permille": coverage_permille, - "coverage_status": coverage_status, - "covered_lines": covered_lines, - "executable_lines": executable_lines, - "coverage_hotspot": coverage_hotspot, - "scope_gap_hotspot": scope_gap_hotspot, - } - ], - "facts": { - "coverage_permille": coverage_permille, - "hotspot_threshold_percent": threshold_percent, - "coverage_status": coverage_status, - "covered_lines": covered_lines, - "executable_lines": executable_lines, - "cyclomatic_complexity": complexity, - "coverage_hotspot": coverage_hotspot, - "scope_gap_hotspot": scope_gap_hotspot, - "detail": detail, - }, - } - - -def _build_design_groups( - metrics_payload: Mapping[str, object], - *, - design_thresholds: Mapping[str, object] | None = None, - scan_root: str, -) -> list[dict[str, object]]: - families = _as_mapping(metrics_payload.get("families")) - thresholds = _as_mapping(design_thresholds) - complexity_threshold = _coerced_nonnegative_threshold( - _as_mapping(thresholds.get(CATEGORY_COMPLEXITY)).get("value"), - default=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, - ) - coupling_threshold = _coerced_nonnegative_threshold( - _as_mapping(thresholds.get(CATEGORY_COUPLING)).get("value"), - default=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - ) - cohesion_threshold = _coerced_nonnegative_threshold( - _as_mapping(thresholds.get(CATEGORY_COHESION)).get("value"), - default=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, - ) - coverage_join = _as_mapping(families.get(_COVERAGE_JOIN_FAMILY)) - coverage_threshold = _as_int( - _as_mapping(coverage_join.get("summary")).get("hotspot_threshold_percent"), - 50, - ) - groups: list[dict[str, object]] = [] - - complexity = _as_mapping(families.get(CATEGORY_COMPLEXITY)) - for item in _as_sequence(complexity.get("items")): - group = _complexity_design_group( - _as_mapping(item), - threshold=complexity_threshold, - scan_root=scan_root, - ) - if group is not None: - groups.append(group) - - coupling = _as_mapping(families.get(CATEGORY_COUPLING)) - for item in _as_sequence(coupling.get("items")): - group = _coupling_design_group( - _as_mapping(item), - threshold=coupling_threshold, - scan_root=scan_root, - ) - if group is not None: - groups.append(group) - - cohesion = _as_mapping(families.get(CATEGORY_COHESION)) - for item in _as_sequence(cohesion.get("items")): - group = _cohesion_design_group( - _as_mapping(item), - threshold=cohesion_threshold, - scan_root=scan_root, - ) - if group is not None: - groups.append(group) - - dependencies = _as_mapping(families.get("dependencies")) - for cycle in _as_sequence(dependencies.get("cycles")): - group = _dependency_design_group(cycle, scan_root=scan_root) - if group is not None: - groups.append(group) - - for item in _as_sequence(coverage_join.get("items")): - group = _coverage_design_group( - _as_mapping(item), - threshold_percent=coverage_threshold, - scan_root=scan_root, - ) - if group is not None: - groups.append(group) - - groups.sort(key=lambda group: (-_as_float(group["priority"]), str(group["id"]))) - return groups - - -def _findings_summary( - *, - clone_functions: Sequence[Mapping[str, object]], - clone_blocks: Sequence[Mapping[str, object]], - clone_segments: Sequence[Mapping[str, object]], - structural_groups: Sequence[Mapping[str, object]], - dead_code_groups: Sequence[Mapping[str, object]], - design_groups: Sequence[Mapping[str, object]], - suppressed_clone_groups: Mapping[str, Sequence[Mapping[str, object]]] | None = None, - dead_code_suppressed: int = 0, -) -> dict[str, object]: - flat_groups = [ - *clone_functions, - *clone_blocks, - *clone_segments, - *structural_groups, - *dead_code_groups, - *design_groups, - ] - severity_counts = dict.fromkeys( - (SEVERITY_CRITICAL, SEVERITY_WARNING, SEVERITY_INFO), - 0, - ) - source_scope_counts = dict.fromkeys( - (IMPACT_SCOPE_RUNTIME, IMPACT_SCOPE_NON_RUNTIME, IMPACT_SCOPE_MIXED), - 0, - ) - for group in flat_groups: - severity = str(group.get("severity", SEVERITY_INFO)) - if severity in severity_counts: - severity_counts[severity] += 1 - impact_scope = str( - _as_mapping(group.get("source_scope")).get( - "impact_scope", - IMPACT_SCOPE_NON_RUNTIME, - ) - ) - if impact_scope in source_scope_counts: - source_scope_counts[impact_scope] += 1 - clone_groups = [*clone_functions, *clone_blocks, *clone_segments] - clone_suppressed_map = _as_mapping(suppressed_clone_groups) - suppressed_functions = len(_as_sequence(clone_suppressed_map.get("function"))) - suppressed_blocks = len(_as_sequence(clone_suppressed_map.get("block"))) - suppressed_segments = len(_as_sequence(clone_suppressed_map.get("segment"))) - suppressed_clone_total = ( - suppressed_functions + suppressed_blocks + suppressed_segments - ) - clones_summary: dict[str, object] = { - "functions": len(clone_functions), - "blocks": len(clone_blocks), - "segments": len(clone_segments), - CLONE_NOVELTY_NEW: sum( - 1 - for group in clone_groups - if str(group.get("novelty", "")) == CLONE_NOVELTY_NEW - ), - CLONE_NOVELTY_KNOWN: sum( - 1 - for group in clone_groups - if str(group.get("novelty", "")) == CLONE_NOVELTY_KNOWN - ), - } - if suppressed_clone_total > 0: - clones_summary.update( - { - "suppressed": suppressed_clone_total, - "suppressed_functions": suppressed_functions, - "suppressed_blocks": suppressed_blocks, - "suppressed_segments": suppressed_segments, - } - ) - suppressed_summary = { - FAMILY_DEAD_CODE: max(0, dead_code_suppressed), - } - if suppressed_clone_total > 0: - suppressed_summary[FAMILY_CLONES] = suppressed_clone_total - return { - "total": len(flat_groups), - "families": { - FAMILY_CLONES: len(clone_groups), - FAMILY_STRUCTURAL: len(structural_groups), - FAMILY_DEAD_CODE: len(dead_code_groups), - "design": len(design_groups), - }, - "severity": severity_counts, - "impact_scope": source_scope_counts, - "clones": clones_summary, - "suppressed": suppressed_summary, - } - - -def _sort_flat_finding_ids( - groups: Sequence[Mapping[str, object]], -) -> list[str]: - ordered = sorted( - groups, - key=lambda group: ( - -_as_float(group.get("priority")), - SEVERITY_ORDER.get(str(group.get("severity", SEVERITY_INFO)), 9), - -_as_int(_as_mapping(group.get("spread")).get("files")), - -_as_int(_as_mapping(group.get("spread")).get("functions")), - -_as_int(group.get("count")), - str(group.get("id", "")), - ), - ) - return [str(group["id"]) for group in ordered] - - -def _sort_highest_spread_ids( - groups: Sequence[Mapping[str, object]], -) -> list[str]: - ordered = sorted( - groups, - key=lambda group: ( - -_as_int(_as_mapping(group.get("spread")).get("files")), - -_as_int(_as_mapping(group.get("spread")).get("functions")), - -_as_int(group.get("count")), - -_as_float(group.get("priority")), - str(group.get("id", "")), - ), - ) - return [str(group["id"]) for group in ordered] - - -def _health_snapshot(metrics_payload: Mapping[str, object]) -> dict[str, object]: - health = _as_mapping(_as_mapping(metrics_payload.get("families")).get("health")) - summary = _as_mapping(health.get("summary")) - dimensions = { - str(key): _as_int(value) - for key, value in _as_mapping(summary.get("dimensions")).items() - } - strongest = None - weakest = None - if dimensions: - strongest = min( - sorted(dimensions), - key=lambda key: (-dimensions[key], key), - ) - weakest = min( - sorted(dimensions), - key=lambda key: (dimensions[key], key), - ) - return { - "score": _as_int(summary.get("score")), - "grade": str(summary.get("grade", "")), - "strongest_dimension": strongest, - "weakest_dimension": weakest, - } - - -def _combined_impact_scope(groups: Sequence[Mapping[str, object]]) -> str: - impact_scopes = { - str( - _as_mapping(group.get("source_scope")).get( - "impact_scope", - IMPACT_SCOPE_NON_RUNTIME, - ) - ) - for group in groups - } - if not impact_scopes: - return IMPACT_SCOPE_NON_RUNTIME - if len(impact_scopes) == 1: - return next(iter(impact_scopes)) - return IMPACT_SCOPE_MIXED - - -def _top_risks( - *, - dead_code_groups: Sequence[Mapping[str, object]], - design_groups: Sequence[Mapping[str, object]], - structural_groups: Sequence[Mapping[str, object]], - clone_groups: Sequence[Mapping[str, object]], -) -> list[dict[str, object]]: - risks: list[dict[str, object]] = [] - - if dead_code_groups: - label = ( - "1 dead code item" - if len(dead_code_groups) == 1 - else f"{len(dead_code_groups)} dead code items" - ) - risks.append( - { - "kind": "family_summary", - "family": FAMILY_DEAD_CODE, - "count": len(dead_code_groups), - "scope": IMPACT_SCOPE_MIXED - if len( - { - _as_mapping(group.get("source_scope")).get("impact_scope") - for group in dead_code_groups - } - ) - > 1 - else str( - _as_mapping(dead_code_groups[0].get("source_scope")).get( - "impact_scope", - IMPACT_SCOPE_NON_RUNTIME, - ) - ), - "label": label, - } - ) - - low_cohesion = [ - group - for group in design_groups - if str(group.get("category", "")) == CATEGORY_COHESION - ] - if low_cohesion: - label = ( - "1 low cohesion class" - if len(low_cohesion) == 1 - else f"{len(low_cohesion)} low cohesion classes" - ) - risks.append( - { - "kind": "family_summary", - "family": FAMILY_DESIGN, - "category": CATEGORY_COHESION, - "count": len(low_cohesion), - "scope": _combined_impact_scope(low_cohesion), - "label": label, - } - ) - - production_structural = [ - group - for group in structural_groups - if str(_as_mapping(group.get("source_scope")).get("impact_scope")) - in {IMPACT_SCOPE_RUNTIME, IMPACT_SCOPE_MIXED} - ] - if production_structural: - label = ( - "1 structural finding in production code" - if len(production_structural) == 1 - else ( - f"{len(production_structural)} structural findings in production code" - ) - ) - risks.append( - { - "kind": "family_summary", - "family": FAMILY_STRUCTURAL, - "count": len(production_structural), - "scope": SOURCE_KIND_PRODUCTION, - "label": label, - } - ) - - fixture_test_clones = [ - group - for group in clone_groups - if _as_mapping(group.get("source_scope")).get("impact_scope") - == IMPACT_SCOPE_NON_RUNTIME - and _as_mapping(group.get("source_scope")).get("dominant_kind") - in {SOURCE_KIND_TESTS, SOURCE_KIND_FIXTURES} - ] - if fixture_test_clones: - label = ( - "1 clone group in fixtures/tests" - if len(fixture_test_clones) == 1 - else f"{len(fixture_test_clones)} clone groups in fixtures/tests" - ) - risks.append( - { - "kind": "family_summary", - "family": FAMILY_CLONE, - "count": len(fixture_test_clones), - "scope": IMPACT_SCOPE_NON_RUNTIME, - "label": label, - } - ) - - return risks[:6] - - -def _build_derived_overview( - *, - findings: Mapping[str, object], - metrics_payload: Mapping[str, object], -) -> tuple[dict[str, object], dict[str, object]]: - groups = _as_mapping(findings.get("groups")) - clones = _as_mapping(groups.get(FAMILY_CLONES)) - clone_groups = [ - *_as_sequence(clones.get("functions")), - *_as_sequence(clones.get("blocks")), - *_as_sequence(clones.get("segments")), - ] - structural_groups = _as_sequence( - _as_mapping(groups.get(FAMILY_STRUCTURAL)).get("groups") - ) - dead_code_groups = _as_sequence( - _as_mapping(groups.get(FAMILY_DEAD_CODE)).get("groups") - ) - design_groups = _as_sequence(_as_mapping(groups.get("design")).get("groups")) - flat_groups = [ - *clone_groups, - *structural_groups, - *dead_code_groups, - *design_groups, - ] - dominant_kind_counts: Counter[str] = Counter( - str( - _as_mapping(_as_mapping(group).get("source_scope")).get( - "dominant_kind", - SOURCE_KIND_OTHER, - ) - ) - for group in flat_groups - ) - summary = _as_mapping(findings.get("summary")) - overview: dict[str, object] = { - "families": dict(_as_mapping(summary.get("families"))), - "top_risks": _top_risks( - dead_code_groups=[_as_mapping(group) for group in dead_code_groups], - design_groups=[_as_mapping(group) for group in design_groups], - structural_groups=[_as_mapping(group) for group in structural_groups], - clone_groups=[_as_mapping(group) for group in clone_groups], - ), - "source_scope_breakdown": { - key: dominant_kind_counts[key] - for key in ( - SOURCE_KIND_PRODUCTION, - SOURCE_KIND_TESTS, - SOURCE_KIND_FIXTURES, - SOURCE_KIND_MIXED, - SOURCE_KIND_OTHER, - ) - if dominant_kind_counts[key] > 0 - }, - "health_snapshot": _health_snapshot(metrics_payload), - "directory_hotspots": build_directory_hotspots(findings=findings), - } - hotlists: dict[str, object] = { - "most_actionable_ids": _sort_flat_finding_ids( - [ - group - for group in map(_as_mapping, flat_groups) - if str(group.get("severity")) != SEVERITY_INFO - ] - )[:5], - "highest_spread_ids": _sort_highest_spread_ids( - list(map(_as_mapping, flat_groups)) - )[:5], - "production_hotspot_ids": _sort_flat_finding_ids( - [ - group - for group in map(_as_mapping, flat_groups) - if str(_as_mapping(group.get("source_scope")).get("impact_scope")) - in {IMPACT_SCOPE_RUNTIME, IMPACT_SCOPE_MIXED} - ] - )[:5], - "test_fixture_hotspot_ids": _sort_flat_finding_ids( - [ - group - for group in map(_as_mapping, flat_groups) - if str(_as_mapping(group.get("source_scope")).get("impact_scope")) - == IMPACT_SCOPE_NON_RUNTIME - and str(_as_mapping(group.get("source_scope")).get("dominant_kind")) - in {SOURCE_KIND_TESTS, SOURCE_KIND_FIXTURES} - ] - )[:5], - } - return overview, hotlists - - -def _representative_location_rows( - suggestion: Suggestion, -) -> list[dict[str, object]]: - rows = [ - { - "relative_path": ( - location.relative_path - if ( - location.relative_path - and not _is_absolute_path(location.relative_path) - ) - else _contract_report_location_path( - location.filepath, - scan_root="", - ) - ), - "start_line": location.start_line, - "end_line": location.end_line, - "qualname": location.qualname, - "source_kind": location.source_kind, - } - for location in suggestion.representative_locations - ] - rows.sort( - key=lambda row: ( - str(row["relative_path"]), - _as_int(row["start_line"]), - _as_int(row["end_line"]), - str(row["qualname"]), - ) - ) - return rows[:3] - - -def _suggestion_finding_id(suggestion: Suggestion) -> str: - if suggestion.finding_family == FAMILY_CLONES: - if suggestion.fact_kind.startswith("Function"): - return clone_group_id(CLONE_KIND_FUNCTION, suggestion.subject_key) - if suggestion.fact_kind.startswith("Block"): - return clone_group_id(CLONE_KIND_BLOCK, suggestion.subject_key) - return clone_group_id(CLONE_KIND_SEGMENT, suggestion.subject_key) - if suggestion.finding_family == FAMILY_STRUCTURAL: - return structural_group_id( - suggestion.finding_kind or "duplicated_branches", - suggestion.subject_key, - ) - if suggestion.category == CATEGORY_DEAD_CODE: - return dead_code_group_id(suggestion.subject_key) - if suggestion.category in { - CATEGORY_COMPLEXITY, - CATEGORY_COUPLING, - CATEGORY_COHESION, - CATEGORY_DEPENDENCY, - }: - return design_group_id(suggestion.category, suggestion.subject_key) - return design_group_id( - suggestion.category, - suggestion.subject_key or suggestion.title, - ) - - -def _build_derived_suggestions( - suggestions: Sequence[Suggestion] | None, -) -> list[dict[str, object]]: - suggestion_rows = list(suggestions or ()) - suggestion_rows.sort( - key=lambda suggestion: ( - -suggestion.priority, - SEVERITY_ORDER.get(suggestion.severity, 9), - suggestion.title, - _suggestion_finding_id(suggestion), - ) - ) - return [ - { - "id": f"suggestion:{_suggestion_finding_id(suggestion)}", - "finding_id": _suggestion_finding_id(suggestion), - "title": suggestion.title, - "summary": suggestion.fact_summary, - "location_label": suggestion.location_label or suggestion.location, - "representative_locations": _representative_location_rows(suggestion), - "action": { - "effort": suggestion.effort, - "steps": list(suggestion.steps), - }, - } - for suggestion in suggestion_rows - ] - - -def _build_findings_payload( - *, - func_groups: GroupMapLike, - block_groups: GroupMapLike, - segment_groups: GroupMapLike, - block_facts: Mapping[str, Mapping[str, str]], - structural_findings: Sequence[StructuralFindingGroup] | None, - metrics_payload: Mapping[str, object], - baseline_trusted: bool, - new_function_group_keys: Collection[str] | None, - new_block_group_keys: Collection[str] | None, - new_segment_group_keys: Collection[str] | None, - suppressed_clone_groups: Sequence[SuppressedCloneGroup] | None, - design_thresholds: Mapping[str, object] | None, - scan_root: str, -) -> dict[str, object]: - clone_functions = _build_clone_groups( - groups=func_groups, - kind=CLONE_KIND_FUNCTION, - baseline_trusted=baseline_trusted, - new_keys=new_function_group_keys, - block_facts=block_facts, - scan_root=scan_root, - ) - clone_blocks = _build_clone_groups( - groups=block_groups, - kind=CLONE_KIND_BLOCK, - baseline_trusted=baseline_trusted, - new_keys=new_block_group_keys, - block_facts=block_facts, - scan_root=scan_root, - ) - clone_segments = _build_clone_groups( - groups=segment_groups, - kind=CLONE_KIND_SEGMENT, - baseline_trusted=baseline_trusted, - new_keys=new_segment_group_keys, - block_facts={}, - scan_root=scan_root, - ) - structural_groups = _build_structural_groups( - structural_findings, - scan_root=scan_root, - ) - dead_code_groups = _build_dead_code_groups( - metrics_payload, - scan_root=scan_root, - ) - dead_code_family = _as_mapping( - _as_mapping(metrics_payload.get("families")).get(FAMILY_DEAD_CODE) - ) - dead_code_summary = _as_mapping(dead_code_family.get("summary")) - dead_code_suppressed = _as_int( - dead_code_summary.get( - "suppressed", - len(_as_sequence(dead_code_family.get("suppressed_items"))), - ) - ) - design_groups = _build_design_groups( - metrics_payload, - design_thresholds=design_thresholds, - scan_root=scan_root, - ) - suppressed_clone_payload = _build_suppressed_clone_groups( - groups=suppressed_clone_groups, - block_facts=block_facts, - scan_root=scan_root, - ) - clone_groups_payload: dict[str, object] = { - "functions": clone_functions, - "blocks": clone_blocks, - "segments": clone_segments, - } - if any(suppressed_clone_payload.values()): - clone_groups_payload["suppressed"] = { - "functions": suppressed_clone_payload[CLONE_KIND_FUNCTION], - "blocks": suppressed_clone_payload[CLONE_KIND_BLOCK], - "segments": suppressed_clone_payload[CLONE_KIND_SEGMENT], - } - return { - "summary": _findings_summary( - clone_functions=clone_functions, - clone_blocks=clone_blocks, - clone_segments=clone_segments, - structural_groups=structural_groups, - dead_code_groups=dead_code_groups, - design_groups=design_groups, - suppressed_clone_groups=suppressed_clone_payload, - dead_code_suppressed=dead_code_suppressed, - ), - "groups": { - FAMILY_CLONES: clone_groups_payload, - FAMILY_STRUCTURAL: { - "groups": structural_groups, - }, - FAMILY_DEAD_CODE: { - "groups": dead_code_groups, - }, - "design": { - "groups": design_groups, - }, - }, - } - - -def _canonical_integrity_payload( - *, - report_schema_version: str, - meta: Mapping[str, object], - inventory: Mapping[str, object], - findings: Mapping[str, object], - metrics: Mapping[str, object], -) -> dict[str, object]: - canonical_meta = { - str(key): value for key, value in meta.items() if str(key) != "runtime" - } - - def _strip_noncanonical(value: object) -> object: - if isinstance(value, Mapping): - return { - str(key): _strip_noncanonical(item) - for key, item in value.items() - if str(key) != "display_facts" - } - if isinstance(value, Sequence) and not isinstance( - value, - (str, bytes, bytearray), - ): - return [_strip_noncanonical(item) for item in value] - return value - - return { - "report_schema_version": report_schema_version, - "meta": canonical_meta, - "inventory": inventory, - "findings": _strip_noncanonical(findings), - "metrics": metrics, - } - - -def _build_integrity_payload( - *, - report_schema_version: str, - meta: Mapping[str, object], - inventory: Mapping[str, object], - findings: Mapping[str, object], - metrics: Mapping[str, object], -) -> dict[str, object]: - canonical_payload = _canonical_integrity_payload( - report_schema_version=report_schema_version, - meta=meta, - inventory=inventory, - findings=findings, - metrics=metrics, - ) - canonical_json = orjson.dumps( - canonical_payload, - option=orjson.OPT_SORT_KEYS, - ) - payload_sha = sha256(canonical_json).hexdigest() - return { - "canonicalization": { - "version": "1", - "scope": "canonical_only", - "sections": [ - "report_schema_version", - "meta", - "inventory", - "findings", - "metrics", - ], - }, - "digest": { - "verified": True, - "algorithm": "sha256", - "value": payload_sha, - }, - } - - -def build_report_document( - *, - func_groups: GroupMapLike, - block_groups: GroupMapLike, - segment_groups: GroupMapLike, - meta: Mapping[str, object] | None = None, - inventory: Mapping[str, object] | None = None, - block_facts: Mapping[str, Mapping[str, str]] | None = None, - new_function_group_keys: Collection[str] | None = None, - new_block_group_keys: Collection[str] | None = None, - new_segment_group_keys: Collection[str] | None = None, - suppressed_clone_groups: Sequence[SuppressedCloneGroup] | None = None, - metrics: Mapping[str, object] | None = None, - suggestions: Sequence[Suggestion] | None = None, - structural_findings: Sequence[StructuralFindingGroup] | None = None, -) -> dict[str, object]: - report_schema_version = REPORT_SCHEMA_VERSION - scan_root = str(_as_mapping(meta).get("scan_root", "")) - meta_payload = _build_meta_payload(meta, scan_root=scan_root) - design_thresholds = _as_mapping( - _as_mapping(meta_payload.get("analysis_thresholds")).get("design_findings") - ) - metrics_payload = _build_metrics_payload(metrics, scan_root=scan_root) - file_list = _collect_report_file_list( - inventory=inventory, - func_groups=func_groups, - block_groups=block_groups, - segment_groups=segment_groups, - suppressed_clone_groups=suppressed_clone_groups, - metrics=metrics, - structural_findings=structural_findings, - ) - inventory_payload = _build_inventory_payload( - inventory=inventory, - file_list=file_list, - metrics_payload=metrics_payload, - scan_root=scan_root, - ) - findings_payload = _build_findings_payload( - func_groups=func_groups, - block_groups=block_groups, - segment_groups=segment_groups, - block_facts=block_facts or {}, - structural_findings=structural_findings, - metrics_payload=metrics_payload, - baseline_trusted=_baseline_is_trusted(meta_payload), - new_function_group_keys=new_function_group_keys, - new_block_group_keys=new_block_group_keys, - new_segment_group_keys=new_segment_group_keys, - suppressed_clone_groups=suppressed_clone_groups, - design_thresholds=design_thresholds, - scan_root=scan_root, - ) - overview_payload, hotlists_payload = _build_derived_overview( - findings=findings_payload, - metrics_payload=metrics_payload, - ) - derived_payload = { - "suggestions": _build_derived_suggestions(suggestions), - "overview": overview_payload, - "hotlists": hotlists_payload, - } - integrity_payload = _build_integrity_payload( - report_schema_version=report_schema_version, - meta=meta_payload, - inventory=inventory_payload, - findings=findings_payload, - metrics=metrics_payload, - ) - return { - "report_schema_version": report_schema_version, - "meta": meta_payload, - "inventory": inventory_payload, - "findings": findings_payload, - "metrics": metrics_payload, - "derived": derived_payload, - "integrity": integrity_payload, - } +__all__ = [ + "_build_design_groups", + "_clone_group_assessment", + "_collect_paths_from_metrics", + "_collect_report_file_list", + "_combined_impact_scope", + "_contract_path", + "_count_file_lines", + "_count_file_lines_for_path", + "_csv_values", + "_derive_inventory_code_counts", + "_findings_summary", + "_is_absolute_path", + "_normalize_block_machine_facts", + "_normalize_nested_string_rows", + "_parse_ratio_percent", + "_source_scope_from_filepaths", + "_source_scope_from_locations", + "_structural_group_assessment", + "_suggestion_finding_id", + "build_report_document", + "clone_group_id", + "dead_code_group_id", + "design_group_id", + "normalize_structural_findings", + "structural_group_id", +] diff --git a/codeclone/report/markdown.py b/codeclone/report/markdown.py index 6ad5c2e..6395c70 100644 --- a/codeclone/report/markdown.py +++ b/codeclone/report/markdown.py @@ -9,620 +9,20 @@ from collections.abc import Collection, Mapping, Sequence from typing import TYPE_CHECKING -from .._coerce import as_float, as_int, as_mapping, as_sequence -from ..domain.findings import FAMILY_CLONE, FAMILY_DEAD_CODE, FAMILY_STRUCTURAL -from ._formatting import format_spread_text -from .json_contract import build_report_document +from .document import build_report_document +from .renderers.markdown import ( + MARKDOWN_SCHEMA_VERSION, + _append_findings_section, + _append_metric_items, + _as_float, + _location_text, + render_markdown_report_document, +) if TYPE_CHECKING: from ..models import StructuralFindingGroup, Suggestion, SuppressedCloneGroup from .types import GroupMapLike -MARKDOWN_SCHEMA_VERSION = "1.0" -_MAX_FINDING_LOCATIONS = 5 -_MAX_METRIC_ITEMS = 10 - -_as_int = as_int -_as_float = as_float -_as_mapping = as_mapping -_as_sequence = as_sequence - -_ANCHORS: tuple[tuple[str, str, int], ...] = ( - ("overview", "Overview", 2), - ("inventory", "Inventory", 2), - ("findings-summary", "Findings Summary", 2), - ("top-risks", "Top Risks", 2), - ("suggestions", "Suggestions", 2), - ("findings", "Findings", 2), - ("clone-findings", "Clone Findings", 3), - ("structural-findings", "Structural Findings", 3), - ("dead-code-findings", "Dead Code Findings", 3), - ("design-findings", "Design Findings", 3), - ("metrics", "Metrics", 2), - ("health", "Health", 3), - ("complexity", "Complexity", 3), - ("coupling", "Coupling", 3), - ("cohesion", "Cohesion", 3), - ("coverage-join", "Coverage Join", 3), - ("overloaded-modules", "Overloaded Modules", 3), - ("dependencies", "Dependencies", 3), - ("dead-code-metrics", "Dead Code", 3), - ("dead-code-suppressed", "Suppressed Dead Code", 3), - ("integrity", "Integrity", 2), -) -_ANCHOR_MAP: dict[str, tuple[str, str, int]] = { - anchor[0]: anchor for anchor in _ANCHORS -} - - -def _text(value: object) -> str: - if value is None: - return "(none)" - if isinstance(value, float): - return f"{value:.2f}".rstrip("0").rstrip(".") or "0" - if isinstance(value, bool): - return "true" if value else "false" - text = str(value).strip() - return text or "(none)" - - -def _source_scope_text(scope: Mapping[str, object]) -> str: - dominant = _text(scope.get("dominant_kind")) - impact = _text(scope.get("impact_scope")) - return f"{dominant} / {impact}" - - -def _spread_text(spread: Mapping[str, object]) -> str: - return format_spread_text( - _as_int(spread.get("files")), - _as_int(spread.get("functions")), - ) - - -def _location_text(item: Mapping[str, object]) -> str: - relative_path = _text(item.get("relative_path")) - start_line = _as_int(item.get("start_line")) - end_line = _as_int(item.get("end_line")) - qualname = str(item.get("qualname", "")).strip() - line_part = "" - if start_line > 0: - line_part = f":{start_line}" - if end_line > 0 and end_line != start_line: - line_part += f"-{end_line}" - if qualname: - return f"`{relative_path}{line_part}` :: `{qualname}`" - return f"`{relative_path}{line_part}`" - - -def _append_anchor(lines: list[str], anchor_id: str, title: str, level: int) -> None: - lines.append(f'') - lines.append(f"{'#' * level} {title}") - lines.append("") - - -def _anchor(anchor_id: str) -> tuple[str, str, int]: - return _ANCHOR_MAP[anchor_id] - - -def _append_kv_bullets( - lines: list[str], - rows: Sequence[tuple[str, object]], -) -> None: - for label, value in rows: - lines.append(f"- {label}: {_text(value)}") - lines.append("") - - -def _finding_heading(group: Mapping[str, object]) -> str: - family = str(group.get("family", "")).strip() - category = str(group.get("category", "")).strip() - clone_type = str(group.get("clone_type", "")).strip() - if family == FAMILY_CLONE: - suffix = f" ({clone_type})" if clone_type else "" - return f"{category.title()} clone group{suffix}" - if family == FAMILY_STRUCTURAL: - return f"Structural finding: {category}" - if family == FAMILY_DEAD_CODE: - return f"Dead code: {category}" - return f"Design finding: {category}" - - -def _append_facts_block( - lines: list[str], - *, - title: str, - facts: Mapping[str, object], -) -> None: - if not facts: - return - lines.append(f"- {title}:") - lines.extend(f" - `{key}`: {_text(facts[key])}" for key in sorted(facts)) - - -def _append_findings_section( - lines: list[str], - *, - groups: Sequence[object], -) -> None: - finding_rows = [_as_mapping(group) for group in groups] - if not finding_rows: - lines.append("_None._") - lines.append("") - return - for group in finding_rows: - lines.append(f"#### {_finding_heading(group)}") - lines.append("") - _append_kv_bullets( - lines, - ( - ("Finding ID", f"`{_text(group.get('id'))}`"), - ("Family", group.get("family")), - ("Category", group.get("category")), - ("Kind", group.get("kind")), - ("Severity", group.get("severity")), - ("Confidence", group.get("confidence")), - ("Priority", _as_float(group.get("priority"))), - ("Scope", _source_scope_text(_as_mapping(group.get("source_scope")))), - ("Spread", _spread_text(_as_mapping(group.get("spread")))), - ("Occurrences", group.get("count")), - ), - ) - facts = _as_mapping(group.get("facts")) - display_facts = _as_mapping(group.get("display_facts")) - if facts or display_facts: - _append_facts_block(lines, title="Facts", facts=facts) - _append_facts_block(lines, title="Presentation facts", facts=display_facts) - lines.append("") - items = list(map(_as_mapping, _as_sequence(group.get("items")))) - lines.append("- Locations:") - visible_items = items[:_MAX_FINDING_LOCATIONS] - lines.extend(f" - {_location_text(item)}" for item in visible_items) - if len(items) > len(visible_items): - lines.append( - f" - ... and {len(items) - len(visible_items)} more occurrence(s)" - ) - lines.append("") - - -def _append_suppressed_clone_findings( - lines: list[str], - *, - groups: Sequence[object], -) -> None: - finding_rows = [_as_mapping(group) for group in groups] - if not finding_rows: - lines.append("_None._") - lines.append("") - return - for group in finding_rows: - lines.append("#### Suppressed clone group") - lines.append("") - _append_kv_bullets( - lines, - ( - ("Finding ID", f"`{_text(group.get('id'))}`"), - ("Category", group.get("category")), - ("Clone Type", group.get("clone_type")), - ("Severity", group.get("severity")), - ("Scope", _source_scope_text(_as_mapping(group.get("source_scope")))), - ("Spread", _spread_text(_as_mapping(group.get("spread")))), - ("Occurrences", group.get("count")), - ("Suppression Rule", group.get("suppression_rule")), - ("Suppression Source", group.get("suppression_source")), - ( - "Matched Patterns", - ", ".join( - str(item).strip() - for item in _as_sequence(group.get("matched_patterns")) - if str(item).strip() - ) - or "(none)", - ), - ), - ) - facts = _as_mapping(group.get("facts")) - display_facts = _as_mapping(group.get("display_facts")) - if facts or display_facts: - _append_facts_block(lines, title="Facts", facts=facts) - _append_facts_block(lines, title="Presentation facts", facts=display_facts) - lines.append("") - items = list(map(_as_mapping, _as_sequence(group.get("items")))) - lines.append("- Locations:") - visible_items = items[:_MAX_FINDING_LOCATIONS] - lines.extend(f" - {_location_text(item)}" for item in visible_items) - if len(items) > len(visible_items): - lines.append( - f" - ... and {len(items) - len(visible_items)} more occurrence(s)" - ) - lines.append("") - - -def _append_metric_items( - lines: list[str], - *, - items: Sequence[object], - key_order: Sequence[str], -) -> None: - metric_rows = [_as_mapping(item) for item in items[:_MAX_METRIC_ITEMS]] - if not metric_rows: - lines.append("_No detailed items._") - lines.append("") - return - for item in metric_rows: - parts = [f"{key}={_text(item[key])}" for key in key_order if key in item] - if "relative_path" in item: - parts.append(_location_text(item)) - lines.append(f"- {'; '.join(parts)}") - if len(items) > len(metric_rows): - lines.append(f"- ... and {len(items) - len(metric_rows)} more item(s)") - lines.append("") - - -def render_markdown_report_document(payload: Mapping[str, object]) -> str: - meta = _as_mapping(payload.get("meta")) - inventory = _as_mapping(payload.get("inventory")) - findings = _as_mapping(payload.get("findings")) - metrics = _as_mapping(payload.get("metrics")) - derived = _as_mapping(payload.get("derived")) - integrity = _as_mapping(payload.get("integrity")) - runtime = _as_mapping(meta.get("runtime")) - findings_summary = _as_mapping(findings.get("summary")) - findings_groups = _as_mapping(findings.get("groups")) - clone_groups = _as_mapping(findings_groups.get("clones")) - suppressed_clone_groups = _as_mapping(clone_groups.get("suppressed")) - overview = _as_mapping(derived.get("overview")) - hotlists = _as_mapping(derived.get("hotlists")) - suggestions = _as_sequence(derived.get("suggestions")) - metrics_families = _as_mapping(metrics.get("families")) - health_snapshot = _as_mapping(overview.get("health_snapshot")) - inventory_files = _as_mapping(inventory.get("files")) - inventory_code = _as_mapping(inventory.get("code")) - digest = _as_mapping(integrity.get("digest")) - canonicalization = _as_mapping(integrity.get("canonicalization")) - family_summary = _as_mapping(findings_summary.get("families")) - severity_summary = _as_mapping(findings_summary.get("severity")) - impact_summary = _as_mapping(findings_summary.get("impact_scope")) - source_breakdown = _as_mapping(overview.get("source_scope_breakdown")) - - lines = [ - "# CodeClone Report", - "", - f"- Markdown schema: {MARKDOWN_SCHEMA_VERSION}", - f"- Source report schema: {_text(payload.get('report_schema_version'))}", - f"- Project: {_text(meta.get('project_name'))}", - f"- Analysis mode: {_text(meta.get('analysis_mode'))}", - f"- Report mode: {_text(meta.get('report_mode'))}", - f"- Generated by: codeclone {_text(meta.get('codeclone_version'))}", - f"- Python: {_text(meta.get('python_tag'))}", - f"- Report generated (UTC): {_text(runtime.get('report_generated_at_utc'))}", - "", - ] - - _append_anchor(lines, *_anchor("overview")) - _append_kv_bullets( - lines, - ( - ("Project", meta.get("project_name")), - ( - "Health", - ( - f"{_text(health_snapshot.get('score'))} " - f"({_text(health_snapshot.get('grade'))})" - ), - ), - ("Total findings", findings_summary.get("total")), - ( - "Families", - ", ".join( - f"{name}={_text(family_summary.get(name))}" - for name in ("clones", "structural", "dead_code", "design") - ), - ), - ("Strongest dimension", health_snapshot.get("strongest_dimension")), - ("Weakest dimension", health_snapshot.get("weakest_dimension")), - ), - ) - - _append_anchor(lines, *_anchor("inventory")) - _append_kv_bullets( - lines, - ( - ( - "Files", - ", ".join( - f"{name}={_text(inventory_files.get(name))}" - for name in ( - "total_found", - "analyzed", - "cached", - "skipped", - "source_io_skipped", - ) - ), - ), - ( - "Code", - ", ".join( - f"{name}={_text(inventory_code.get(name))}" - for name in ( - "parsed_lines", - "functions", - "methods", - "classes", - ) - ), - ), - ), - ) - - _append_anchor(lines, *_anchor("findings-summary")) - _append_kv_bullets( - lines, - ( - ("Total", findings_summary.get("total")), - ( - "By family", - ", ".join( - f"{name}={_text(family_summary.get(name))}" - for name in ("clones", "structural", "dead_code", "design") - ), - ), - ( - "By severity", - ", ".join( - f"{name}={_text(severity_summary.get(name))}" - for name in ("critical", "warning", "info") - ), - ), - ( - "By impact scope", - ", ".join( - f"{name}={_text(impact_summary.get(name))}" - for name in ("runtime", "non_runtime", "mixed") - ), - ), - ( - "Source scope breakdown", - ", ".join( - f"{name}={_text(source_breakdown.get(name))}" - for name in ("production", "tests", "fixtures", "other") - if name in source_breakdown - ) - or "(none)", - ), - ), - ) - - _append_anchor(lines, *_anchor("top-risks")) - top_risks = [_as_mapping(item) for item in _as_sequence(overview.get("top_risks"))] - if top_risks: - for idx, risk in enumerate(top_risks[:10], start=1): - lines.append( - f"{idx}. {_text(risk.get('label'))} " - f"(family={_text(risk.get('family'))}, " - f"scope={_text(risk.get('scope'))}, " - f"count={_text(risk.get('count'))})" - ) - else: - lines.append("_None._") - lines.append("") - - if suggestions: - _append_anchor(lines, *_anchor("suggestions")) - for suggestion in map(_as_mapping, suggestions): - action = _as_mapping(suggestion.get("action")) - lines.append(f"### {_text(suggestion.get('title'))}") - lines.append("") - _append_kv_bullets( - lines, - ( - ("Finding", f"`{_text(suggestion.get('finding_id'))}`"), - ("Summary", suggestion.get("summary")), - ("Location", suggestion.get("location_label")), - ("Effort", action.get("effort")), - ), - ) - representative = [ - _as_mapping(item) - for item in _as_sequence(suggestion.get("representative_locations")) - ] - if representative: - lines.append(f"- Example: {_location_text(representative[0])}") - steps = [str(step).strip() for step in _as_sequence(action.get("steps"))] - if steps: - lines.append("- Steps:") - for idx, step in enumerate(steps, start=1): - lines.append(f" {idx}. {step}") - lines.append("") - - _append_anchor(lines, *_anchor("findings")) - _append_anchor(lines, *_anchor("clone-findings")) - _append_findings_section( - lines, - groups=[ - *_as_sequence(clone_groups.get("functions")), - *_as_sequence(clone_groups.get("blocks")), - *_as_sequence(clone_groups.get("segments")), - ], - ) - if suppressed_clone_groups: - lines.append("#### Suppressed Golden Fixture Clone Groups") - lines.append("") - _append_suppressed_clone_findings( - lines, - groups=[ - *_as_sequence(suppressed_clone_groups.get("functions")), - *_as_sequence(suppressed_clone_groups.get("blocks")), - *_as_sequence(suppressed_clone_groups.get("segments")), - ], - ) - - _append_anchor(lines, *_anchor("structural-findings")) - _append_findings_section( - lines, - groups=_as_sequence( - _as_mapping(findings_groups.get("structural")).get("groups") - ), - ) - - _append_anchor(lines, *_anchor("dead-code-findings")) - _append_findings_section( - lines, - groups=_as_sequence( - _as_mapping(findings_groups.get("dead_code")).get("groups") - ), - ) - - _append_anchor(lines, *_anchor("design-findings")) - _append_findings_section( - lines, - groups=_as_sequence(_as_mapping(findings_groups.get("design")).get("groups")), - ) - - _append_anchor(lines, *_anchor("metrics")) - for anchor_id, title, summary_keys, item_keys in ( - ("health", "Health", ("score", "grade"), ()), - ( - "complexity", - "Complexity", - ("total", "average", "max", "high_risk"), - ("cyclomatic_complexity", "nesting_depth", "risk"), - ), - ( - "coupling", - "Coupling", - ("total", "average", "max", "high_risk"), - ("cbo", "risk"), - ), - ( - "cohesion", - "Cohesion", - ("total", "average", "max", "low_cohesion"), - ("lcom4", "method_count", "instance_var_count", "risk"), - ), - ( - "coverage-join", - "Coverage Join", - ( - "status", - "source", - "units", - "measured_units", - "overall_permille", - "coverage_hotspots", - "scope_gap_hotspots", - "hotspot_threshold_percent", - ), - ( - "coverage_status", - "risk", - "coverage_permille", - "cyclomatic_complexity", - "covered_lines", - "executable_lines", - "coverage_hotspot", - "scope_gap_hotspot", - ), - ), - ( - "overloaded-modules", - "Overloaded Modules", - ( - "total", - "candidates", - "population_status", - "top_score", - "average_score", - ), - ( - "source_kind", - "score", - "candidate_status", - "loc", - "fan_in", - "fan_out", - "complexity_total", - ), - ), - ( - "dependencies", - "Dependencies", - ("modules", "edges", "cycles", "max_depth"), - ("source", "target", "import_type", "line"), - ), - ( - "dead-code-metrics", - "Dead Code", - ("total", "high_confidence", "suppressed"), - ("kind", "confidence"), - ), - ): - family_key = ( - "dead_code" - if anchor_id == "dead-code-metrics" - else ( - "overloaded_modules" if anchor_id == "overloaded-modules" else anchor_id - ) - ) - if family_key == "coverage-join": - family_key = "coverage_join" - family_payload = _as_mapping(metrics_families.get(family_key)) - if not family_payload and family_key == "overloaded_modules": - family_payload = _as_mapping(metrics_families.get("god_modules")) - if not family_payload and family_key == "coverage_join": - continue - family_summary_map = _as_mapping(family_payload.get("summary")) - _append_anchor(lines, anchor_id, title, 3) - _append_kv_bullets( - lines, - tuple((key, family_summary_map.get(key)) for key in summary_keys), - ) - _append_metric_items( - lines, - items=_as_sequence(family_payload.get("items")), - key_order=item_keys, - ) - - dead_code_family_payload = _as_mapping(metrics_families.get("dead_code")) - _append_anchor(lines, *_anchor("dead-code-suppressed")) - _append_metric_items( - lines, - items=_as_sequence(dead_code_family_payload.get("suppressed_items")), - key_order=("kind", "confidence", "suppression_rule", "suppression_source"), - ) - - _append_anchor(lines, *_anchor("integrity")) - _append_kv_bullets( - lines, - ( - ("Canonicalization version", canonicalization.get("version")), - ("Canonicalization scope", canonicalization.get("scope")), - ( - "Canonical sections", - ", ".join( - str(item) for item in _as_sequence(canonicalization.get("sections")) - ), - ), - ("Digest algorithm", digest.get("algorithm")), - ("Digest verified", digest.get("verified")), - ("Digest value", digest.get("value")), - ( - "Hotlists", - ", ".join( - f"{name}={len(_as_sequence(hotlists.get(name)))}" - for name in ( - "most_actionable_ids", - "highest_spread_ids", - "production_hotspot_ids", - "test_fixture_hotspot_ids", - ) - ), - ), - ), - ) - - return "\n".join(lines).rstrip() + "\n" - def to_markdown_report( *, @@ -657,3 +57,14 @@ def to_markdown_report( structural_findings=tuple(structural_findings or ()), ) return render_markdown_report_document(payload) + + +__all__ = [ + "MARKDOWN_SCHEMA_VERSION", + "_append_findings_section", + "_append_metric_items", + "_as_float", + "_location_text", + "render_markdown_report_document", + "to_markdown_report", +] diff --git a/codeclone/report/overview.py b/codeclone/report/overview.py index c8c4a3a..9e71fc3 100644 --- a/codeclone/report/overview.py +++ b/codeclone/report/overview.py @@ -11,9 +11,6 @@ from pathlib import PurePosixPath from typing import TYPE_CHECKING, cast -from .._coerce import as_int as _as_int -from .._coerce import as_mapping as _as_mapping -from .._coerce import as_sequence as _as_sequence from ..domain.findings import ( CATEGORY_COHESION, CATEGORY_COMPLEXITY, @@ -44,6 +41,9 @@ BLOCK_HINT_ASSERT_ONLY, BLOCK_PATTERN_REPEATED_STMT_HASH, ) +from ..utils.coerce import as_int as _as_int +from ..utils.coerce import as_mapping as _as_mapping +from ..utils.coerce import as_sequence as _as_sequence from .derived import ( classify_source_kind, format_spread_location_label, diff --git a/codeclone/report/renderers/__init__.py b/codeclone/report/renderers/__init__.py new file mode 100644 index 0000000..b744533 --- /dev/null +++ b/codeclone/report/renderers/__init__.py @@ -0,0 +1,19 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from .json import render_json_report_document +from .markdown import render_markdown_report_document +from .sarif import render_sarif_report_document +from .text import render_text_report_document + +__all__ = [ + "render_json_report_document", + "render_markdown_report_document", + "render_sarif_report_document", + "render_text_report_document", +] diff --git a/codeclone/report/renderers/json.py b/codeclone/report/renderers/json.py new file mode 100644 index 0000000..6052bcd --- /dev/null +++ b/codeclone/report/renderers/json.py @@ -0,0 +1,18 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping + +import orjson + + +def render_json_report_document(payload: Mapping[str, object]) -> str: + return orjson.dumps(payload, option=orjson.OPT_INDENT_2).decode("utf-8") + + +__all__ = ["render_json_report_document"] diff --git a/codeclone/report/renderers/markdown.py b/codeclone/report/renderers/markdown.py new file mode 100644 index 0000000..8788cf4 --- /dev/null +++ b/codeclone/report/renderers/markdown.py @@ -0,0 +1,628 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING + +from ...domain.findings import FAMILY_CLONE, FAMILY_DEAD_CODE, FAMILY_STRUCTURAL +from ...utils.coerce import as_float, as_int, as_mapping, as_sequence +from .._formatting import format_spread_text + +if TYPE_CHECKING: + pass + +MARKDOWN_SCHEMA_VERSION = "1.0" +_MAX_FINDING_LOCATIONS = 5 +_MAX_METRIC_ITEMS = 10 + +_as_int = as_int +_as_float = as_float +_as_mapping = as_mapping +_as_sequence = as_sequence + +_ANCHORS: tuple[tuple[str, str, int], ...] = ( + ("overview", "Overview", 2), + ("inventory", "Inventory", 2), + ("findings-summary", "Findings Summary", 2), + ("top-risks", "Top Risks", 2), + ("suggestions", "Suggestions", 2), + ("findings", "Findings", 2), + ("clone-findings", "Clone Findings", 3), + ("structural-findings", "Structural Findings", 3), + ("dead-code-findings", "Dead Code Findings", 3), + ("design-findings", "Design Findings", 3), + ("metrics", "Metrics", 2), + ("health", "Health", 3), + ("complexity", "Complexity", 3), + ("coupling", "Coupling", 3), + ("cohesion", "Cohesion", 3), + ("coverage-join", "Coverage Join", 3), + ("overloaded-modules", "Overloaded Modules", 3), + ("dependencies", "Dependencies", 3), + ("dead-code-metrics", "Dead Code", 3), + ("dead-code-suppressed", "Suppressed Dead Code", 3), + ("integrity", "Integrity", 2), +) +_ANCHOR_MAP: dict[str, tuple[str, str, int]] = { + anchor[0]: anchor for anchor in _ANCHORS +} + + +def _text(value: object) -> str: + if value is None: + return "(none)" + if isinstance(value, float): + return f"{value:.2f}".rstrip("0").rstrip(".") or "0" + if isinstance(value, bool): + return "true" if value else "false" + text = str(value).strip() + return text or "(none)" + + +def _source_scope_text(scope: Mapping[str, object]) -> str: + dominant = _text(scope.get("dominant_kind")) + impact = _text(scope.get("impact_scope")) + return f"{dominant} / {impact}" + + +def _spread_text(spread: Mapping[str, object]) -> str: + return format_spread_text( + _as_int(spread.get("files")), + _as_int(spread.get("functions")), + ) + + +def _location_text(item: Mapping[str, object]) -> str: + relative_path = _text(item.get("relative_path")) + start_line = _as_int(item.get("start_line")) + end_line = _as_int(item.get("end_line")) + qualname = str(item.get("qualname", "")).strip() + line_part = "" + if start_line > 0: + line_part = f":{start_line}" + if end_line > 0 and end_line != start_line: + line_part += f"-{end_line}" + if qualname: + return f"`{relative_path}{line_part}` :: `{qualname}`" + return f"`{relative_path}{line_part}`" + + +def _append_anchor(lines: list[str], anchor_id: str, title: str, level: int) -> None: + lines.append(f'') + lines.append(f"{'#' * level} {title}") + lines.append("") + + +def _anchor(anchor_id: str) -> tuple[str, str, int]: + return _ANCHOR_MAP[anchor_id] + + +def _append_kv_bullets( + lines: list[str], + rows: Sequence[tuple[str, object]], +) -> None: + for label, value in rows: + lines.append(f"- {label}: {_text(value)}") + lines.append("") + + +def _finding_heading(group: Mapping[str, object]) -> str: + family = str(group.get("family", "")).strip() + category = str(group.get("category", "")).strip() + clone_type = str(group.get("clone_type", "")).strip() + if family == FAMILY_CLONE: + suffix = f" ({clone_type})" if clone_type else "" + return f"{category.title()} clone group{suffix}" + if family == FAMILY_STRUCTURAL: + return f"Structural finding: {category}" + if family == FAMILY_DEAD_CODE: + return f"Dead code: {category}" + return f"Design finding: {category}" + + +def _append_facts_block( + lines: list[str], + *, + title: str, + facts: Mapping[str, object], +) -> None: + if not facts: + return + lines.append(f"- {title}:") + lines.extend(f" - `{key}`: {_text(facts[key])}" for key in sorted(facts)) + + +def _append_findings_section( + lines: list[str], + *, + groups: Sequence[object], +) -> None: + finding_rows = [_as_mapping(group) for group in groups] + if not finding_rows: + lines.append("_None._") + lines.append("") + return + for group in finding_rows: + lines.append(f"#### {_finding_heading(group)}") + lines.append("") + _append_kv_bullets( + lines, + ( + ("Finding ID", f"`{_text(group.get('id'))}`"), + ("Family", group.get("family")), + ("Category", group.get("category")), + ("Kind", group.get("kind")), + ("Severity", group.get("severity")), + ("Confidence", group.get("confidence")), + ("Priority", _as_float(group.get("priority"))), + ("Scope", _source_scope_text(_as_mapping(group.get("source_scope")))), + ("Spread", _spread_text(_as_mapping(group.get("spread")))), + ("Occurrences", group.get("count")), + ), + ) + facts = _as_mapping(group.get("facts")) + display_facts = _as_mapping(group.get("display_facts")) + if facts or display_facts: + _append_facts_block(lines, title="Facts", facts=facts) + _append_facts_block(lines, title="Presentation facts", facts=display_facts) + lines.append("") + items = list(map(_as_mapping, _as_sequence(group.get("items")))) + lines.append("- Locations:") + visible_items = items[:_MAX_FINDING_LOCATIONS] + lines.extend(f" - {_location_text(item)}" for item in visible_items) + if len(items) > len(visible_items): + lines.append( + f" - ... and {len(items) - len(visible_items)} more occurrence(s)" + ) + lines.append("") + + +def _append_suppressed_clone_findings( + lines: list[str], + *, + groups: Sequence[object], +) -> None: + finding_rows = [_as_mapping(group) for group in groups] + if not finding_rows: + lines.append("_None._") + lines.append("") + return + for group in finding_rows: + lines.append("#### Suppressed clone group") + lines.append("") + _append_kv_bullets( + lines, + ( + ("Finding ID", f"`{_text(group.get('id'))}`"), + ("Category", group.get("category")), + ("Clone Type", group.get("clone_type")), + ("Severity", group.get("severity")), + ("Scope", _source_scope_text(_as_mapping(group.get("source_scope")))), + ("Spread", _spread_text(_as_mapping(group.get("spread")))), + ("Occurrences", group.get("count")), + ("Suppression Rule", group.get("suppression_rule")), + ("Suppression Source", group.get("suppression_source")), + ( + "Matched Patterns", + ", ".join( + str(item).strip() + for item in _as_sequence(group.get("matched_patterns")) + if str(item).strip() + ) + or "(none)", + ), + ), + ) + facts = _as_mapping(group.get("facts")) + display_facts = _as_mapping(group.get("display_facts")) + if facts or display_facts: + _append_facts_block(lines, title="Facts", facts=facts) + _append_facts_block(lines, title="Presentation facts", facts=display_facts) + lines.append("") + items = list(map(_as_mapping, _as_sequence(group.get("items")))) + lines.append("- Locations:") + visible_items = items[:_MAX_FINDING_LOCATIONS] + lines.extend(f" - {_location_text(item)}" for item in visible_items) + if len(items) > len(visible_items): + lines.append( + f" - ... and {len(items) - len(visible_items)} more occurrence(s)" + ) + lines.append("") + + +def _append_metric_items( + lines: list[str], + *, + items: Sequence[object], + key_order: Sequence[str], +) -> None: + metric_rows = [_as_mapping(item) for item in items[:_MAX_METRIC_ITEMS]] + if not metric_rows: + lines.append("_No detailed items._") + lines.append("") + return + for item in metric_rows: + parts = [f"{key}={_text(item[key])}" for key in key_order if key in item] + if "relative_path" in item: + parts.append(_location_text(item)) + lines.append(f"- {'; '.join(parts)}") + if len(items) > len(metric_rows): + lines.append(f"- ... and {len(items) - len(metric_rows)} more item(s)") + lines.append("") + + +def render_markdown_report_document(payload: Mapping[str, object]) -> str: + meta = _as_mapping(payload.get("meta")) + inventory = _as_mapping(payload.get("inventory")) + findings = _as_mapping(payload.get("findings")) + metrics = _as_mapping(payload.get("metrics")) + derived = _as_mapping(payload.get("derived")) + integrity = _as_mapping(payload.get("integrity")) + runtime = _as_mapping(meta.get("runtime")) + findings_summary = _as_mapping(findings.get("summary")) + findings_groups = _as_mapping(findings.get("groups")) + clone_groups = _as_mapping(findings_groups.get("clones")) + suppressed_clone_groups = _as_mapping(clone_groups.get("suppressed")) + overview = _as_mapping(derived.get("overview")) + hotlists = _as_mapping(derived.get("hotlists")) + suggestions = _as_sequence(derived.get("suggestions")) + metrics_families = _as_mapping(metrics.get("families")) + health_snapshot = _as_mapping(overview.get("health_snapshot")) + inventory_files = _as_mapping(inventory.get("files")) + inventory_code = _as_mapping(inventory.get("code")) + digest = _as_mapping(integrity.get("digest")) + canonicalization = _as_mapping(integrity.get("canonicalization")) + family_summary = _as_mapping(findings_summary.get("families")) + severity_summary = _as_mapping(findings_summary.get("severity")) + impact_summary = _as_mapping(findings_summary.get("impact_scope")) + source_breakdown = _as_mapping(overview.get("source_scope_breakdown")) + + lines = [ + "# CodeClone Report", + "", + f"- Markdown schema: {MARKDOWN_SCHEMA_VERSION}", + f"- Source report schema: {_text(payload.get('report_schema_version'))}", + f"- Project: {_text(meta.get('project_name'))}", + f"- Analysis mode: {_text(meta.get('analysis_mode'))}", + f"- Report mode: {_text(meta.get('report_mode'))}", + f"- Generated by: codeclone {_text(meta.get('codeclone_version'))}", + f"- Python: {_text(meta.get('python_tag'))}", + f"- Report generated (UTC): {_text(runtime.get('report_generated_at_utc'))}", + "", + ] + + _append_anchor(lines, *_anchor("overview")) + _append_kv_bullets( + lines, + ( + ("Project", meta.get("project_name")), + ( + "Health", + ( + f"{_text(health_snapshot.get('score'))} " + f"({_text(health_snapshot.get('grade'))})" + ), + ), + ("Total findings", findings_summary.get("total")), + ( + "Families", + ", ".join( + f"{name}={_text(family_summary.get(name))}" + for name in ("clones", "structural", "dead_code", "design") + ), + ), + ("Strongest dimension", health_snapshot.get("strongest_dimension")), + ("Weakest dimension", health_snapshot.get("weakest_dimension")), + ), + ) + + _append_anchor(lines, *_anchor("inventory")) + _append_kv_bullets( + lines, + ( + ( + "Files", + ", ".join( + f"{name}={_text(inventory_files.get(name))}" + for name in ( + "total_found", + "analyzed", + "cached", + "skipped", + "source_io_skipped", + ) + ), + ), + ( + "Code", + ", ".join( + f"{name}={_text(inventory_code.get(name))}" + for name in ( + "parsed_lines", + "functions", + "methods", + "classes", + ) + ), + ), + ), + ) + + _append_anchor(lines, *_anchor("findings-summary")) + _append_kv_bullets( + lines, + ( + ("Total", findings_summary.get("total")), + ( + "By family", + ", ".join( + f"{name}={_text(family_summary.get(name))}" + for name in ("clones", "structural", "dead_code", "design") + ), + ), + ( + "By severity", + ", ".join( + f"{name}={_text(severity_summary.get(name))}" + for name in ("critical", "warning", "info") + ), + ), + ( + "By impact scope", + ", ".join( + f"{name}={_text(impact_summary.get(name))}" + for name in ("runtime", "non_runtime", "mixed") + ), + ), + ( + "Source scope breakdown", + ", ".join( + f"{name}={_text(source_breakdown.get(name))}" + for name in ("production", "tests", "fixtures", "other") + if name in source_breakdown + ) + or "(none)", + ), + ), + ) + + _append_anchor(lines, *_anchor("top-risks")) + top_risks = [_as_mapping(item) for item in _as_sequence(overview.get("top_risks"))] + if top_risks: + for idx, risk in enumerate(top_risks[:10], start=1): + lines.append( + f"{idx}. {_text(risk.get('label'))} " + f"(family={_text(risk.get('family'))}, " + f"scope={_text(risk.get('scope'))}, " + f"count={_text(risk.get('count'))})" + ) + else: + lines.append("_None._") + lines.append("") + + if suggestions: + _append_anchor(lines, *_anchor("suggestions")) + for suggestion in map(_as_mapping, suggestions): + action = _as_mapping(suggestion.get("action")) + lines.append(f"### {_text(suggestion.get('title'))}") + lines.append("") + _append_kv_bullets( + lines, + ( + ("Finding", f"`{_text(suggestion.get('finding_id'))}`"), + ("Summary", suggestion.get("summary")), + ("Location", suggestion.get("location_label")), + ("Effort", action.get("effort")), + ), + ) + representative = [ + _as_mapping(item) + for item in _as_sequence(suggestion.get("representative_locations")) + ] + if representative: + lines.append(f"- Example: {_location_text(representative[0])}") + steps = [str(step).strip() for step in _as_sequence(action.get("steps"))] + if steps: + lines.append("- Steps:") + for idx, step in enumerate(steps, start=1): + lines.append(f" {idx}. {step}") + lines.append("") + + _append_anchor(lines, *_anchor("findings")) + _append_anchor(lines, *_anchor("clone-findings")) + _append_findings_section( + lines, + groups=[ + *_as_sequence(clone_groups.get("functions")), + *_as_sequence(clone_groups.get("blocks")), + *_as_sequence(clone_groups.get("segments")), + ], + ) + if suppressed_clone_groups: + lines.append("#### Suppressed Golden Fixture Clone Groups") + lines.append("") + _append_suppressed_clone_findings( + lines, + groups=[ + *_as_sequence(suppressed_clone_groups.get("functions")), + *_as_sequence(suppressed_clone_groups.get("blocks")), + *_as_sequence(suppressed_clone_groups.get("segments")), + ], + ) + + _append_anchor(lines, *_anchor("structural-findings")) + _append_findings_section( + lines, + groups=_as_sequence( + _as_mapping(findings_groups.get("structural")).get("groups") + ), + ) + + _append_anchor(lines, *_anchor("dead-code-findings")) + _append_findings_section( + lines, + groups=_as_sequence( + _as_mapping(findings_groups.get("dead_code")).get("groups") + ), + ) + + _append_anchor(lines, *_anchor("design-findings")) + _append_findings_section( + lines, + groups=_as_sequence(_as_mapping(findings_groups.get("design")).get("groups")), + ) + + _append_anchor(lines, *_anchor("metrics")) + for anchor_id, title, summary_keys, item_keys in ( + ("health", "Health", ("score", "grade"), ()), + ( + "complexity", + "Complexity", + ("total", "average", "max", "high_risk"), + ("cyclomatic_complexity", "nesting_depth", "risk"), + ), + ( + "coupling", + "Coupling", + ("total", "average", "max", "high_risk"), + ("cbo", "risk"), + ), + ( + "cohesion", + "Cohesion", + ("total", "average", "max", "low_cohesion"), + ("lcom4", "method_count", "instance_var_count", "risk"), + ), + ( + "coverage-join", + "Coverage Join", + ( + "status", + "source", + "units", + "measured_units", + "overall_permille", + "coverage_hotspots", + "scope_gap_hotspots", + "hotspot_threshold_percent", + ), + ( + "coverage_status", + "risk", + "coverage_permille", + "cyclomatic_complexity", + "covered_lines", + "executable_lines", + "coverage_hotspot", + "scope_gap_hotspot", + ), + ), + ( + "overloaded-modules", + "Overloaded Modules", + ( + "total", + "candidates", + "population_status", + "top_score", + "average_score", + ), + ( + "source_kind", + "score", + "candidate_status", + "loc", + "fan_in", + "fan_out", + "complexity_total", + ), + ), + ( + "dependencies", + "Dependencies", + ("modules", "edges", "cycles", "max_depth"), + ("source", "target", "import_type", "line"), + ), + ( + "dead-code-metrics", + "Dead Code", + ("total", "high_confidence", "suppressed"), + ("kind", "confidence"), + ), + ): + family_key = ( + "dead_code" + if anchor_id == "dead-code-metrics" + else ( + "overloaded_modules" if anchor_id == "overloaded-modules" else anchor_id + ) + ) + if family_key == "coverage-join": + family_key = "coverage_join" + family_payload = _as_mapping(metrics_families.get(family_key)) + if not family_payload and family_key == "overloaded_modules": + family_payload = _as_mapping(metrics_families.get("god_modules")) + if not family_payload and family_key == "coverage_join": + continue + family_summary_map = _as_mapping(family_payload.get("summary")) + _append_anchor(lines, anchor_id, title, 3) + _append_kv_bullets( + lines, + tuple((key, family_summary_map.get(key)) for key in summary_keys), + ) + _append_metric_items( + lines, + items=_as_sequence(family_payload.get("items")), + key_order=item_keys, + ) + + dead_code_family_payload = _as_mapping(metrics_families.get("dead_code")) + _append_anchor(lines, *_anchor("dead-code-suppressed")) + _append_metric_items( + lines, + items=_as_sequence(dead_code_family_payload.get("suppressed_items")), + key_order=("kind", "confidence", "suppression_rule", "suppression_source"), + ) + + _append_anchor(lines, *_anchor("integrity")) + _append_kv_bullets( + lines, + ( + ("Canonicalization version", canonicalization.get("version")), + ("Canonicalization scope", canonicalization.get("scope")), + ( + "Canonical sections", + ", ".join( + str(item) for item in _as_sequence(canonicalization.get("sections")) + ), + ), + ("Digest algorithm", digest.get("algorithm")), + ("Digest verified", digest.get("verified")), + ("Digest value", digest.get("value")), + ( + "Hotlists", + ", ".join( + f"{name}={len(_as_sequence(hotlists.get(name)))}" + for name in ( + "most_actionable_ids", + "highest_spread_ids", + "production_hotspot_ids", + "test_fixture_hotspot_ids", + ) + ), + ), + ), + ) + + return "\n".join(lines).rstrip() + "\n" + + +__all__ = [ + "MARKDOWN_SCHEMA_VERSION", + "render_markdown_report_document", +] diff --git a/codeclone/report/renderers/sarif.py b/codeclone/report/renderers/sarif.py new file mode 100644 index 0000000..8a760f0 --- /dev/null +++ b/codeclone/report/renderers/sarif.py @@ -0,0 +1,974 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, cast + +import orjson + +from ...contracts import DOCS_URL, REPOSITORY_URL +from ...domain.findings import ( + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_COVERAGE, + CATEGORY_DEPENDENCY, + CLONE_KIND_BLOCK, + CLONE_KIND_FUNCTION, + FAMILY_CLONE, + FAMILY_CLONES, + FAMILY_DEAD_CODE, + FAMILY_DESIGN, + FAMILY_STRUCTURAL, + FINDING_KIND_CLASS_HOTSPOT, + FINDING_KIND_CLONE_GROUP, + FINDING_KIND_COVERAGE_HOTSPOT, + FINDING_KIND_COVERAGE_SCOPE_GAP, + FINDING_KIND_CYCLE, + FINDING_KIND_FUNCTION_HOTSPOT, + FINDING_KIND_UNUSED_SYMBOL, + STRUCTURAL_KIND_CLONE_COHORT_DRIFT, + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, + STRUCTURAL_KIND_DUPLICATED_BRANCHES, + SYMBOL_KIND_CLASS, + SYMBOL_KIND_FUNCTION, + SYMBOL_KIND_METHOD, +) +from ...domain.quality import ( + CONFIDENCE_HIGH, + CONFIDENCE_MEDIUM, + SEVERITY_CRITICAL, + SEVERITY_WARNING, +) +from ...utils.coerce import as_float as _as_float +from ...utils.coerce import as_int as _as_int +from ...utils.coerce import as_mapping as _as_mapping +from ...utils.coerce import as_sequence as _as_sequence + +if TYPE_CHECKING: + pass + +SARIF_VERSION = "2.1.0" +SARIF_PROFILE_VERSION = "1.0" +SARIF_SCHEMA_URL = "https://json.schemastore.org/sarif-2.1.0.json" +SARIF_SRCROOT_BASE_ID = "%SRCROOT%" + + +@dataclass(frozen=True, slots=True) +class _RuleSpec: + rule_id: str + short_description: str + full_description: str + default_level: str + category: str + kind: str + precision: str + + +def _text(value: object) -> str: + if value is None: + return "" + return str(value).strip() + + +def _severity_to_level(severity: str) -> str: + if severity == SEVERITY_CRITICAL: + return "error" + if severity == SEVERITY_WARNING: + return "warning" + return "note" + + +def _rule_name(spec: _RuleSpec) -> str: + return f"codeclone.{spec.rule_id}" + + +def _rule_remediation(spec: _RuleSpec) -> str: + rule_id = spec.rule_id + if rule_id.startswith("CCLONE"): + return ( + "Review the representative occurrence and related occurrences, " + "then extract shared behavior or keep accepted debt in the baseline." + ) + if rule_id == "CSTRUCT001": + return ( + "Collapse repeated branch shapes into a shared helper, validator, " + "or control-flow abstraction where the behavior is intentionally shared." + ) + if rule_id == "CSTRUCT002": + return ( + "Review the clone cohort and reconcile guard or early-exit behavior " + "if those members are expected to stay aligned." + ) + if rule_id == "CSTRUCT003": + return ( + "Review the clone cohort and reconcile terminal, guard, or try/finally " + "profiles if the drift is not intentional." + ) + if rule_id.startswith("CDEAD"): + return ( + "Remove the unused symbol or keep it explicitly documented/suppressed " + "when runtime dynamics call it intentionally." + ) + if rule_id == "CDESIGN001": + return ( + "Split the class or regroup behavior so responsibilities become cohesive." + ) + if rule_id == "CDESIGN002": + return "Split the function or simplify control flow to reduce complexity." + if rule_id == "CDESIGN003": + return "Reduce dependencies or split responsibilities to lower coupling." + return ( + "Break the cycle or invert dependencies so modules no longer depend " + "on each other circularly." + ) + + +def _rule_help(spec: _RuleSpec) -> dict[str, str]: + remediation = _rule_remediation(spec) + return { + "text": f"{spec.full_description} {remediation}", + "markdown": ( + f"{spec.full_description}\n\n" + f"{remediation}\n\n" + f"See [CodeClone docs]({DOCS_URL})." + ), + } + + +def _scan_root_uri(payload: Mapping[str, object]) -> str: + meta = _as_mapping(payload.get("meta")) + runtime = _as_mapping(meta.get("runtime")) + scan_root_absolute = _text(runtime.get("scan_root_absolute")) + if not scan_root_absolute: + return "" + scan_root_path = Path(scan_root_absolute) + if not scan_root_path.is_absolute(): + return "" + try: + uri = scan_root_path.as_uri() + except ValueError: + return "" + return uri if uri.endswith("/") else f"{uri}/" + + +def _flatten_findings(payload: Mapping[str, object]) -> list[Mapping[str, object]]: + findings = _as_mapping(payload.get("findings")) + groups = _as_mapping(findings.get("groups")) + clones = _as_mapping(groups.get(FAMILY_CLONES)) + structural = _as_mapping(groups.get(FAMILY_STRUCTURAL)) + dead_code = _as_mapping(groups.get(FAMILY_DEAD_CODE)) + design = _as_mapping(groups.get(FAMILY_DESIGN)) + return [ + *map(_as_mapping, _as_sequence(clones.get("functions"))), + *map(_as_mapping, _as_sequence(clones.get("blocks"))), + *map(_as_mapping, _as_sequence(clones.get("segments"))), + *map(_as_mapping, _as_sequence(structural.get("groups"))), + *map(_as_mapping, _as_sequence(dead_code.get("groups"))), + *map(_as_mapping, _as_sequence(design.get("groups"))), + ] + + +def _artifact_catalog( + findings: Sequence[Mapping[str, object]], + *, + use_uri_base_id: bool, +) -> tuple[list[dict[str, object]], dict[str, int]]: + artifact_paths = sorted( + { + relative_path + for group in findings + for item in map(_as_mapping, _as_sequence(group.get("items"))) + for relative_path in (_text(item.get("relative_path")),) + if relative_path + } + ) + artifact_index_map = {path: index for index, path in enumerate(artifact_paths)} + artifacts = [ + { + "location": { + "uri": path, + **({"uriBaseId": SARIF_SRCROOT_BASE_ID} if use_uri_base_id else {}), + } + } + for path in artifact_paths + ] + return cast(list[dict[str, object]], artifacts), artifact_index_map + + +def _clone_rule_spec(category: str) -> _RuleSpec: + if category == CLONE_KIND_FUNCTION: + return _RuleSpec( + "CCLONE001", + "Function clone group", + "Multiple functions share the same normalized function body.", + SEVERITY_WARNING, + FAMILY_CLONE, + FINDING_KIND_CLONE_GROUP, + CONFIDENCE_HIGH, + ) + if category == CLONE_KIND_BLOCK: + return _RuleSpec( + "CCLONE002", + "Block clone group", + "Repeated normalized statement blocks were detected across occurrences.", + SEVERITY_WARNING, + FAMILY_CLONE, + FINDING_KIND_CLONE_GROUP, + CONFIDENCE_HIGH, + ) + return _RuleSpec( + "CCLONE003", + "Segment clone group", + "Repeated normalized statement segments were detected across occurrences.", + "note", + FAMILY_CLONE, + FINDING_KIND_CLONE_GROUP, + CONFIDENCE_MEDIUM, + ) + + +def _structural_rule_spec(kind: str) -> _RuleSpec: + if kind == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: + return _RuleSpec( + "CSTRUCT002", + "Clone guard/exit divergence", + ( + "Members of the same function-clone cohort diverged in " + "entry guards or early-exit behavior." + ), + SEVERITY_WARNING, + FAMILY_STRUCTURAL, + STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, + CONFIDENCE_HIGH, + ) + if kind == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: + return _RuleSpec( + "CSTRUCT003", + "Clone cohort drift", + ( + "Members of the same function-clone cohort drifted from " + "the majority terminal/guard/try profile." + ), + SEVERITY_WARNING, + FAMILY_STRUCTURAL, + STRUCTURAL_KIND_CLONE_COHORT_DRIFT, + CONFIDENCE_HIGH, + ) + return _RuleSpec( + "CSTRUCT001", + "Duplicated branches", + "Repeated branch families with matching structural signatures were detected.", + SEVERITY_WARNING, + FAMILY_STRUCTURAL, + kind or STRUCTURAL_KIND_DUPLICATED_BRANCHES, + CONFIDENCE_MEDIUM, + ) + + +def _dead_code_rule_spec(category: str) -> _RuleSpec: + if category == SYMBOL_KIND_FUNCTION: + return _RuleSpec( + "CDEAD001", + "Unused function", + "Function appears to be unused with high confidence.", + SEVERITY_WARNING, + FAMILY_DEAD_CODE, + FINDING_KIND_UNUSED_SYMBOL, + CONFIDENCE_HIGH, + ) + if category == SYMBOL_KIND_CLASS: + return _RuleSpec( + "CDEAD002", + "Unused class", + "Class appears to be unused with high confidence.", + SEVERITY_WARNING, + FAMILY_DEAD_CODE, + FINDING_KIND_UNUSED_SYMBOL, + CONFIDENCE_HIGH, + ) + if category == SYMBOL_KIND_METHOD: + return _RuleSpec( + "CDEAD003", + "Unused method", + "Method appears to be unused with high confidence.", + SEVERITY_WARNING, + FAMILY_DEAD_CODE, + FINDING_KIND_UNUSED_SYMBOL, + CONFIDENCE_HIGH, + ) + return _RuleSpec( + "CDEAD004", + "Unused symbol", + "Symbol appears to be unused with reported confidence.", + SEVERITY_WARNING, + FAMILY_DEAD_CODE, + FINDING_KIND_UNUSED_SYMBOL, + CONFIDENCE_MEDIUM, + ) + + +def _design_rule_spec(category: str, kind: str) -> _RuleSpec: + if category == CATEGORY_COHESION: + return _RuleSpec( + "CDESIGN001", + "Low cohesion class", + "Class cohesion is low according to LCOM4 hotspot thresholds.", + SEVERITY_WARNING, + FAMILY_DESIGN, + kind or FINDING_KIND_CLASS_HOTSPOT, + CONFIDENCE_HIGH, + ) + if category == CATEGORY_COMPLEXITY: + return _RuleSpec( + "CDESIGN002", + "Complexity hotspot", + "Function exceeds the project complexity hotspot threshold.", + SEVERITY_WARNING, + FAMILY_DESIGN, + kind or FINDING_KIND_FUNCTION_HOTSPOT, + CONFIDENCE_HIGH, + ) + if category == CATEGORY_COUPLING: + return _RuleSpec( + "CDESIGN003", + "Coupling hotspot", + "Class exceeds the project coupling hotspot threshold.", + SEVERITY_WARNING, + FAMILY_DESIGN, + kind or FINDING_KIND_CLASS_HOTSPOT, + CONFIDENCE_HIGH, + ) + if category == CATEGORY_COVERAGE: + if kind == FINDING_KIND_COVERAGE_SCOPE_GAP: + return _RuleSpec( + "CDESIGN006", + "Coverage scope gap", + "A medium/high-risk function is outside the supplied joined " + "coverage scope.", + SEVERITY_WARNING, + FAMILY_DESIGN, + kind, + CONFIDENCE_HIGH, + ) + return _RuleSpec( + "CDESIGN005", + "Coverage hotspot", + "A medium/high-risk function falls below the configured joined " + "coverage threshold.", + SEVERITY_WARNING, + FAMILY_DESIGN, + kind or FINDING_KIND_COVERAGE_HOTSPOT, + CONFIDENCE_HIGH, + ) + return _RuleSpec( + "CDESIGN004", + "Dependency cycle", + "A dependency cycle was detected between project modules.", + "error", + FAMILY_DESIGN, + kind or FINDING_KIND_CYCLE, + CONFIDENCE_HIGH, + ) + + +def _rule_spec(group: Mapping[str, object]) -> _RuleSpec: + family = _text(group.get("family")) + category = _text(group.get("category")) + kind = _text(group.get("kind")) + if family == FAMILY_CLONE: + return _clone_rule_spec(category) + if family == FAMILY_STRUCTURAL: + return _structural_rule_spec(kind) + if family == FAMILY_DEAD_CODE: + return _dead_code_rule_spec(category) + return _design_rule_spec(category, kind) + + +def _structural_signature(group: Mapping[str, object]) -> Mapping[str, object]: + return _as_mapping(_as_mapping(group.get("signature")).get("stable")) + + +def _clone_result_message( + group: Mapping[str, object], + *, + category: str, + count: int, + spread: Mapping[str, object], +) -> str: + clone_type = _text(group.get("clone_type")) + return ( + f"{category.title()} clone group ({clone_type}), {count} occurrences " + f"across {_as_int(spread.get('files'))} files." + ) + + +def _structural_result_message( + group: Mapping[str, object], + *, + count: int, + qualname: str, +) -> str: + signature = _structural_signature(group) + signature_family = _text(signature.get("family")) + if signature_family == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: + cohort_id = _text(signature.get("cohort_id")) + return ( + "Clone guard/exit divergence" + f" ({count} divergent members) in cohort " + f"{cohort_id or 'unknown'}." + ) + if signature_family == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: + drift_fields = _as_sequence(signature.get("drift_fields")) + drift_label = ", ".join(_text(item) for item in drift_fields) or "profile" + cohort_id = _text(signature.get("cohort_id")) + return ( + f"Clone cohort drift ({drift_label}), " + f"{count} divergent members in cohort {cohort_id or 'unknown'}." + ) + stmt_shape = _text(signature.get("stmt_shape")) + if qualname: + return ( + f"Repeated branch family ({stmt_shape}), {count} occurrences in {qualname}." + ) + return f"Repeated branch family ({stmt_shape}), {count} occurrences." + + +def _dead_code_result_message( + group: Mapping[str, object], + *, + category: str, + qualname: str, + relative_path: str, +) -> str: + confidence = _text(group.get("confidence")) or "reported" + target = qualname or relative_path + return f"Unused {category} with {confidence} confidence: {target}." + + +def _design_result_message( + *, + category: str, + facts: Mapping[str, object], + qualname: str, + items: Sequence[Mapping[str, object]], +) -> str: + metric_specs = { + CATEGORY_COHESION: ("lcom4", "Low cohesion class", "LCOM4"), + CATEGORY_COMPLEXITY: ( + "cyclomatic_complexity", + "High complexity function", + "CC", + ), + CATEGORY_COUPLING: ("cbo", "High coupling class", "CBO"), + } + spec = metric_specs.get(category) + if spec is not None: + fact_key, label, metric_label = spec + value = _as_int(facts.get(fact_key)) + return f"{label} ({metric_label}={value}): {qualname}." + if category == CATEGORY_COVERAGE: + coverage_status = _text(facts.get("coverage_status")) + threshold = _as_int(facts.get("hotspot_threshold_percent")) + if coverage_status == "missing_from_report": + return f"Coverage scope gap (not in coverage.xml): {qualname}." + coverage_pct = _as_int(facts.get("coverage_permille")) / 10.0 + return f"Coverage hotspot ({coverage_pct:.1f}% < {threshold}%): {qualname}." + modules = [_text(item.get("module")) for item in items if _text(item.get("module"))] + return f"Dependency cycle ({len(modules)} modules): {' -> '.join(modules)}." + + +def _result_message(group: Mapping[str, object]) -> str: + family = _text(group.get("family")) + category = _text(group.get("category")) + count = _as_int(group.get("count")) + spread = _as_mapping(group.get("spread")) + items = [_as_mapping(item) for item in _as_sequence(group.get("items"))] + first_item = items[0] if items else {} + qualname = _text(first_item.get("qualname")) + if family == FAMILY_CLONE: + return _clone_result_message( + group, + category=category, + count=count, + spread=spread, + ) + if family == FAMILY_STRUCTURAL: + return _structural_result_message( + group, + count=count, + qualname=qualname, + ) + if family == FAMILY_DEAD_CODE: + return _dead_code_result_message( + group, + category=category, + qualname=qualname, + relative_path=_text(first_item.get("relative_path")), + ) + return _design_result_message( + category=category, + facts=_as_mapping(group.get("facts")), + qualname=qualname, + items=items, + ) + + +def _logical_locations(item: Mapping[str, object]) -> list[dict[str, object]]: + qualname = _text(item.get("qualname")) + if qualname: + return [{"fullyQualifiedName": qualname}] + module = _text(item.get("module")) + if module: + return [{"fullyQualifiedName": module}] + return [] + + +def _location_message( + group: Mapping[str, object], + *, + related_id: int | None = None, +) -> str: + family = _text(group.get("family")) + category = _text(group.get("category")) + if family in {FAMILY_CLONE, FAMILY_STRUCTURAL}: + return ( + "Representative occurrence" + if related_id is None + else f"Related occurrence #{related_id}" + ) + if family == FAMILY_DEAD_CODE: + return ( + "Unused symbol declaration" + if related_id is None + else f"Related declaration #{related_id}" + ) + if category == CATEGORY_DEPENDENCY: + return ( + "Cycle member" + if related_id is None + else f"Related cycle member #{related_id}" + ) + return ( + "Primary location" if related_id is None else f"Related location #{related_id}" + ) + + +def _location_entry( + item: Mapping[str, object], + *, + related_id: int | None = None, + artifact_index_map: Mapping[str, int] | None = None, + use_uri_base_id: bool = False, + message_text: str = "", +) -> dict[str, object]: + relative_path = _text(item.get("relative_path")) + location: dict[str, object] = {} + if relative_path: + artifact_location: dict[str, object] = { + "uri": relative_path, + } + if use_uri_base_id: + artifact_location["uriBaseId"] = SARIF_SRCROOT_BASE_ID + if artifact_index_map and relative_path in artifact_index_map: + artifact_location["index"] = artifact_index_map[relative_path] + physical_location: dict[str, object] = { + "artifactLocation": artifact_location, + } + else: + physical_location = {} + start_line = _as_int(item.get("start_line")) + end_line = _as_int(item.get("end_line")) + if physical_location and start_line > 0: + region: dict[str, object] = {"startLine": start_line} + if end_line > 0: + region["endLine"] = end_line + physical_location["region"] = region + if physical_location: + location["physicalLocation"] = physical_location + logical_locations = _logical_locations(item) + if logical_locations: + location["logicalLocations"] = logical_locations + if message_text: + location["message"] = {"text": message_text} + if related_id is not None: + location["id"] = related_id + return location + + +def _generic_properties(group: Mapping[str, object]) -> dict[str, object]: + source_scope = _as_mapping(group.get("source_scope")) + spread = _as_mapping(group.get("spread")) + properties: dict[str, object] = { + "findingId": _text(group.get("id")), + "family": _text(group.get("family")), + "category": _text(group.get("category")), + "kind": _text(group.get("kind")), + "confidence": _text(group.get("confidence")), + "priority": round(_as_float(group.get("priority")), 2), + "impactScope": _text(source_scope.get("impact_scope")), + "sourceKind": _text(source_scope.get("dominant_kind")), + "spreadFiles": _as_int(spread.get("files")), + "spreadFunctions": _as_int(spread.get("functions")), + "helpUri": DOCS_URL, + } + return properties + + +def _clone_result_properties( + props: dict[str, object], + group: Mapping[str, object], +) -> dict[str, object]: + props.update( + { + "novelty": _text(group.get("novelty")), + "cloneKind": _text(group.get("clone_kind")), + "cloneType": _text(group.get("clone_type")), + "groupArity": _as_int(group.get("count")), + } + ) + return props + + +def _structural_signature_properties( + signature: Mapping[str, object], +) -> dict[str, object]: + signature_family = _text(signature.get("family")) + if signature_family == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: + return { + "cohortId": _text(signature.get("cohort_id")), + "majorityGuardCount": _as_int( + signature.get("majority_guard_count"), + ), + "majorityTerminalKind": _text( + signature.get("majority_terminal_kind"), + ), + } + if signature_family == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: + return { + "cohortId": _text(signature.get("cohort_id")), + "driftFields": [ + _text(field) for field in _as_sequence(signature.get("drift_fields")) + ], + } + return { + "statementShape": _text(signature.get("stmt_shape")), + "terminalKind": _text(signature.get("terminal_kind")), + } + + +def _structural_result_properties( + props: dict[str, object], + group: Mapping[str, object], +) -> dict[str, object]: + signature = _structural_signature(group) + props["occurrenceCount"] = _as_int(group.get("count")) + props.update(_structural_signature_properties(signature)) + return props + + +def _design_result_properties( + props: dict[str, object], + *, + facts: Mapping[str, object], +) -> dict[str, object]: + for key in ( + "lcom4", + "method_count", + "instance_var_count", + "cbo", + "cyclomatic_complexity", + "nesting_depth", + "cycle_length", + "coverage_permille", + "covered_lines", + "executable_lines", + "hotspot_threshold_percent", + "coverage_status", + ): + if key in facts: + props[key] = facts[key] + return props + + +def _result_properties(group: Mapping[str, object]) -> dict[str, object]: + props = _generic_properties(group) + family = _text(group.get("family")) + if family == FAMILY_CLONE: + return _clone_result_properties(props, group) + if family == FAMILY_STRUCTURAL: + return _structural_result_properties(props, group) + if family == FAMILY_DESIGN: + return _design_result_properties( + props, + facts=_as_mapping(group.get("facts")), + ) + return props + + +def _partial_fingerprints( + *, + rule_id: str, + group: Mapping[str, object], + primary_item: Mapping[str, object], +) -> dict[str, str]: + finding_id = _text(group.get("id")) + path = _text(primary_item.get("relative_path")) + qualname = _text(primary_item.get("qualname")) + start_line = _as_int(primary_item.get("start_line")) + if path and start_line > 0: + fingerprint_material = "\0".join( + ( + rule_id, + finding_id, + path, + qualname, + ) + ) + return { + "primaryLocationLineHash": ( + f"{hashlib.sha256(fingerprint_material.encode('utf-8')).hexdigest()[:16]}" + f":{start_line}" + ) + } + return {} + + +def _primary_location_properties( + primary_item: Mapping[str, object], +) -> dict[str, object]: + path = _text(primary_item.get("relative_path")) + qualname = _text(primary_item.get("qualname")) + start_line = _as_int(primary_item.get("start_line")) + end_line = _as_int(primary_item.get("end_line")) + props: dict[str, object] = {} + if path: + props["primaryPath"] = path + if qualname: + props["primaryQualname"] = qualname + if start_line > 0: + props["primaryRegion"] = f"{start_line}-{end_line or start_line}" + return props + + +def _baseline_state(group: Mapping[str, object]) -> str: + novelty = _text(group.get("novelty")) + if novelty == "new": + return "new" + if novelty == "known": + return "unchanged" + return "" + + +def _result_entry( + *, + group: Mapping[str, object], + rule_id: str, + rule_index: int, + artifact_index_map: Mapping[str, int], + use_uri_base_id: bool, +) -> dict[str, object]: + items = [_as_mapping(item) for item in _as_sequence(group.get("items"))] + primary_item = items[0] if items else {} + primary_location = ( + _location_entry( + primary_item, + artifact_index_map=artifact_index_map, + use_uri_base_id=use_uri_base_id, + message_text=_location_message(group), + ) + if primary_item + else {} + ) + result: dict[str, object] = { + "ruleId": rule_id, + "ruleIndex": rule_index, + "kind": "fail", + "level": _severity_to_level(_text(group.get("severity"))), + "message": { + "text": _result_message(group), + }, + "locations": [primary_location] if primary_location else [], + "fingerprints": { + "codecloneFindingId": _text(group.get("id")), + }, + "partialFingerprints": _partial_fingerprints( + rule_id=rule_id, + group=group, + primary_item=primary_item, + ), + "properties": _result_properties(group), + } + if primary_item: + properties = cast(dict[str, object], result["properties"]) + properties.update(_primary_location_properties(primary_item)) + baseline_state = _baseline_state(group) + if baseline_state: + result["baselineState"] = baseline_state + related_items = items[1:] + if related_items: + related_locations = [ + _location_entry( + item, + related_id=index, + artifact_index_map=artifact_index_map, + use_uri_base_id=use_uri_base_id, + message_text=_location_message(group, related_id=index), + ) + for index, item in enumerate(related_items, start=1) + ] + result["relatedLocations"] = [ + location for location in related_locations if location + ] + return result + + +def render_sarif_report_document(payload: Mapping[str, object]) -> str: + meta = _as_mapping(payload.get("meta")) + runtime = _as_mapping(meta.get("runtime")) + analysis_started_at = _text(runtime.get("analysis_started_at_utc")) + generated_at = _text(runtime.get("report_generated_at_utc")) + analysis_mode = _text(meta.get("analysis_mode")) or "full" + findings = sorted( + _flatten_findings(payload), + key=lambda group: ( + _rule_spec(group).rule_id, + _text(group.get("id")), + ), + ) + scan_root_uri = _scan_root_uri(payload) + use_uri_base_id = bool(scan_root_uri) + artifacts, artifact_index_map = _artifact_catalog( + findings, + use_uri_base_id=use_uri_base_id, + ) + used_rule_specs = { + spec.rule_id: spec for spec in (_rule_spec(group) for group in findings) + } + ordered_rule_specs = [used_rule_specs[key] for key in sorted(used_rule_specs)] + rule_index_map = { + spec.rule_id: index for index, spec in enumerate(ordered_rule_specs) + } + results = [ + _result_entry( + group=group, + rule_id=rule.rule_id, + rule_index=rule_index_map[rule.rule_id], + artifact_index_map=artifact_index_map, + use_uri_base_id=use_uri_base_id, + ) + for group in findings + for rule in (_rule_spec(group),) + ] + invocation: dict[str, object] = { + "executionSuccessful": True, + **({"startTimeUtc": analysis_started_at} if analysis_started_at else {}), + **({"endTimeUtc": generated_at} if generated_at else {}), + } + if scan_root_uri: + invocation["workingDirectory"] = {"uri": scan_root_uri} + run: dict[str, object] = { + "tool": { + "driver": { + "name": "codeclone", + "version": _text(meta.get("codeclone_version")), + "informationUri": REPOSITORY_URL, + "rules": [ + { + "id": spec.rule_id, + "name": _rule_name(spec), + "shortDescription": {"text": spec.short_description}, + "fullDescription": {"text": spec.full_description}, + "help": _rule_help(spec), + "defaultConfiguration": {"level": spec.default_level}, + "helpUri": DOCS_URL, + "properties": { + "category": spec.category, + "kind": spec.kind, + "precision": spec.precision, + "tags": [spec.category, spec.kind, spec.precision], + }, + } + for spec in ordered_rule_specs + ], + } + }, + "automationDetails": { + "id": "/".join( + part + for part in ( + "codeclone", + analysis_mode, + generated_at + or _text( + _as_mapping( + _as_mapping(payload.get("integrity")).get("digest") + ).get("value") + )[:12], + ) + if part + ), + }, + **( + { + "originalUriBaseIds": { + SARIF_SRCROOT_BASE_ID: { + "uri": scan_root_uri, + "description": {"text": "The root of the scanned source tree."}, + } + } + } + if scan_root_uri + else {} + ), + "artifacts": artifacts, + "results": results, + "invocations": [invocation], + "properties": { + "profileVersion": SARIF_PROFILE_VERSION, + "reportSchemaVersion": _text(payload.get("report_schema_version")), + "analysisMode": analysis_mode, + "reportMode": _text(meta.get("report_mode")), + "canonicalDigestSha256": _text( + _as_mapping(_as_mapping(payload.get("integrity")).get("digest")).get( + "value" + ) + ), + **({"reportGeneratedAtUtc": generated_at} if generated_at else {}), + }, + } + return orjson.dumps( + { + "$schema": SARIF_SCHEMA_URL, + "version": SARIF_VERSION, + "runs": [run], + }, + option=orjson.OPT_INDENT_2, + ).decode("utf-8") + + +__all__ = [ + "_baseline_state", + "_location_entry", + "_location_message", + "_logical_locations", + "_partial_fingerprints", + "_primary_location_properties", + "_result_entry", + "_result_message", + "_result_properties", + "_rule_name", + "_rule_spec", + "_scan_root_uri", + "_severity_to_level", + "_text", + "render_sarif_report_document", +] diff --git a/codeclone/report/renderers/text.py b/codeclone/report/renderers/text.py new file mode 100644 index 0000000..4074652 --- /dev/null +++ b/codeclone/report/renderers/text.py @@ -0,0 +1,884 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Mapping, Sequence + +from ...domain.source_scope import IMPACT_SCOPE_NON_RUNTIME, SOURCE_KIND_OTHER +from ...utils.coerce import as_int, as_mapping, as_sequence +from .._formatting import format_spread_text + +_as_int = as_int +_as_mapping = as_mapping +_as_sequence = as_sequence + + +def format_meta_text_value(value: object) -> str: + if isinstance(value, bool): + return "true" if value else "false" + if value is None: + return "(none)" + if isinstance(value, float): + return f"{value:.2f}".rstrip("0").rstrip(".") or "0" + if isinstance(value, Sequence) and not isinstance( + value, + (str, bytes, bytearray), + ): + formatted = [format_meta_text_value(item) for item in value] + return ", ".join(formatted) if formatted else "(none)" + text = str(value).strip() + return text if text else "(none)" + + +def _format_key_values( + mapping: Mapping[str, object], + keys: Sequence[str], + *, + skip_empty: bool = False, +) -> str: + parts: list[str] = [] + for key in keys: + if key not in mapping: + continue + formatted = format_meta_text_value(mapping.get(key)) + if not skip_empty or formatted != "(none)": + parts.append(f"{key}={formatted}") + return " ".join(parts) if parts else "(none)" + + +def _spread_text(spread: Mapping[str, object]) -> str: + return format_spread_text( + _as_int(spread.get("files")), + _as_int(spread.get("functions")), + ) + + +def _scope_text(source_scope: Mapping[str, object]) -> str: + dominant = str(source_scope.get("dominant_kind", "")).strip() or SOURCE_KIND_OTHER + impact = ( + str(source_scope.get("impact_scope", "")).strip() or IMPACT_SCOPE_NON_RUNTIME + ) + return f"{dominant}/{impact}" + + +def _structural_kind_label(kind: object) -> str: + kind_text = str(kind).strip() + match kind_text: + case "duplicated_branches": + return "Duplicated branches" + case "clone_guard_exit_divergence": + return "Clone guard/exit divergence" + case "clone_cohort_drift": + return "Clone cohort drift" + case _: + return kind_text or "(none)" + + +def _location_line( + item: Mapping[str, object], + *, + metric_name: str | None = None, +) -> str: + metric_suffix = "" + if metric_name is not None and metric_name in item: + metric_suffix = ( + f" {metric_name}={format_meta_text_value(item.get(metric_name))}" + ) + return ( + f"- {format_meta_text_value(item.get('qualname'))} " + f"{format_meta_text_value(item.get('relative_path'))}:" + f"{format_meta_text_value(item.get('start_line'))}-" + f"{format_meta_text_value(item.get('end_line'))}" + f"{metric_suffix}" + ) + + +def _append_clone_section( + lines: list[str], + *, + title: str, + groups: Sequence[object], + novelty: str, + metric_name: str, +) -> None: + section_groups = [ + _as_mapping(group) + for group in groups + if str(_as_mapping(group).get("novelty", "")) == novelty + ] + lines.append(f"{title} ({novelty.upper()}) (groups={len(section_groups)})") + if not section_groups: + lines.append("(none)") + return + for idx, group in enumerate(section_groups, start=1): + lines.append(f"=== Clone group #{idx} ===") + lines.append( + "id=" + f"{format_meta_text_value(group.get('id'))} " + f"clone_type={format_meta_text_value(group.get('clone_type'))} " + f"severity={format_meta_text_value(group.get('severity'))} " + f"count={format_meta_text_value(group.get('count'))} " + f"spread={_spread_text(_as_mapping(group.get('spread')))} " + f"scope={_scope_text(_as_mapping(group.get('source_scope')))}" + ) + facts = _as_mapping(group.get("facts")) + if facts: + lines.append( + "facts: " + + _format_key_values( + facts, + tuple(sorted(str(key) for key in facts)), + skip_empty=True, + ) + ) + display_facts = _as_mapping(group.get("display_facts")) + if display_facts: + lines.append( + "display_facts: " + + _format_key_values( + display_facts, + tuple(sorted(str(key) for key in display_facts)), + skip_empty=True, + ) + ) + lines.extend( + _location_line(item, metric_name=metric_name) + for item in map(_as_mapping, _as_sequence(group.get("items"))) + ) + lines.append("") + if lines[-1] == "": + lines.pop() + + +def _append_suppressed_clone_section( + lines: list[str], + *, + title: str, + groups: Sequence[object], + metric_name: str, +) -> None: + section_groups = [_as_mapping(group) for group in groups] + lines.append(f"{title} (groups={len(section_groups)})") + if not section_groups: + lines.append("(none)") + return + for idx, group in enumerate(section_groups, start=1): + lines.append(f"=== Suppressed clone group #{idx} ===") + lines.append( + "id=" + f"{format_meta_text_value(group.get('id'))} " + f"clone_type={format_meta_text_value(group.get('clone_type'))} " + f"severity={format_meta_text_value(group.get('severity'))} " + f"count={format_meta_text_value(group.get('count'))} " + f"spread={_spread_text(_as_mapping(group.get('spread')))} " + f"scope={_scope_text(_as_mapping(group.get('source_scope')))} " + "suppressed_by=" + f"{format_meta_text_value(group.get('suppression_rule'))}" + "@" + f"{format_meta_text_value(group.get('suppression_source'))} " + "matched_patterns=" + f"{format_meta_text_value(group.get('matched_patterns'))}" + ) + facts = _as_mapping(group.get("facts")) + if facts: + lines.append( + "facts: " + + _format_key_values( + facts, + tuple(sorted(str(key) for key in facts)), + skip_empty=True, + ) + ) + lines.extend( + _location_line(item, metric_name=metric_name) + for item in map(_as_mapping, _as_sequence(group.get("items"))) + ) + lines.append("") + if lines[-1] == "": + lines.pop() + + +def _append_structural_findings(lines: list[str], groups: Sequence[object]) -> None: + structural_groups = [_as_mapping(group) for group in groups] + lines.append(f"STRUCTURAL FINDINGS (groups={len(structural_groups)})") + if not structural_groups: + lines.append("(none)") + return + for idx, group in enumerate(structural_groups, start=1): + lines.append(f"=== Structural finding #{idx} ===") + signature = _as_mapping(group.get("signature")) + stable = _as_mapping(signature.get("stable")) + control_flow = _as_mapping(stable.get("control_flow")) + lines.append( + "id=" + f"{format_meta_text_value(group.get('id'))} " + f"kind={format_meta_text_value(group.get('kind'))} " + f"label={_structural_kind_label(group.get('kind'))} " + f"severity={format_meta_text_value(group.get('severity'))} " + f"confidence={format_meta_text_value(group.get('confidence'))} " + f"count={format_meta_text_value(group.get('count'))} " + f"spread={_spread_text(_as_mapping(group.get('spread')))} " + f"scope={_scope_text(_as_mapping(group.get('source_scope')))}" + ) + stable_family = str(stable.get("family", "")).strip() + match stable_family: + case "clone_guard_exit_divergence": + lines.append( + "signature: " + f"cohort_id={format_meta_text_value(stable.get('cohort_id'))} " + f"majority_guard_count=" + f"{format_meta_text_value(stable.get('majority_guard_count'))} " + f"majority_terminal_kind=" + f"{format_meta_text_value(stable.get('majority_terminal_kind'))}" + ) + case "clone_cohort_drift": + majority_profile = _as_mapping(stable.get("majority_profile")) + lines.append( + "signature: " + f"cohort_id={format_meta_text_value(stable.get('cohort_id'))} " + f"drift_fields=" + f"{format_meta_text_value(stable.get('drift_fields'))} " + f"majority_terminal_kind=" + f"{format_meta_text_value(majority_profile.get('terminal_kind'))}" + ) + case _: + lines.append( + "signature: " + f"stmt_shape={format_meta_text_value(stable.get('stmt_shape'))} " + f"terminal_kind=" + f"{format_meta_text_value(stable.get('terminal_kind'))} " + f"has_loop={format_meta_text_value(control_flow.get('has_loop'))} " + f"has_try={format_meta_text_value(control_flow.get('has_try'))} " + f"nested_if={format_meta_text_value(control_flow.get('nested_if'))}" + ) + facts = _as_mapping(group.get("facts")) + if facts: + lines.append( + "facts: " + + _format_key_values( + facts, + tuple(sorted(str(key) for key in facts)), + skip_empty=True, + ) + ) + items = list(map(_as_mapping, _as_sequence(group.get("items")))) + visible_items = items[:3] + lines.extend(_location_line(item) for item in visible_items) + if len(items) > len(visible_items): + lines.append(f"... and {len(items) - len(visible_items)} more occurrences") + lines.append("") + if lines[-1] == "": + lines.pop() + + +def _append_single_item_findings( + lines: list[str], + *, + title: str, + groups: Sequence[object], + fact_keys: Sequence[str], +) -> None: + finding_groups = [_as_mapping(group) for group in groups] + lines.append(f"{title} (groups={len(finding_groups)})") + if not finding_groups: + lines.append("(none)") + return + for idx, group in enumerate(finding_groups, start=1): + lines.append(f"=== Finding #{idx} ===") + lines.append( + "id=" + f"{format_meta_text_value(group.get('id'))} " + f"category={format_meta_text_value(group.get('category'))} " + f"kind={format_meta_text_value(group.get('kind'))} " + f"severity={format_meta_text_value(group.get('severity'))} " + f"confidence={format_meta_text_value(group.get('confidence'))} " + f"scope={_scope_text(_as_mapping(group.get('source_scope')))}" + ) + facts = _as_mapping(group.get("facts")) + if facts: + lines.append( + f"facts: {_format_key_values(facts, fact_keys, skip_empty=True)}" + ) + lines.extend( + _location_line(item) + for item in map(_as_mapping, _as_sequence(group.get("items"))) + ) + lines.append("") + if lines[-1] == "": + lines.pop() + + +def _suppression_bindings_text(item: Mapping[str, object]) -> str: + bindings = [ + _as_mapping(binding) + for binding in _as_sequence(item.get("suppressed_by")) + if isinstance(binding, Mapping) + ] + if bindings: + parts = [] + for binding in bindings: + rule = str(binding.get("rule", "")).strip() or "unknown" + source = str(binding.get("source", "")).strip() or "unknown" + parts.append(f"{rule}@{source}") + return ",".join(parts) + rule = str(item.get("suppression_rule", "")).strip() + source = str(item.get("suppression_source", "")).strip() + if rule or source: + return f"{rule or 'unknown'}@{source or 'unknown'}" + return "(none)" + + +def _append_suppressed_dead_code_items( + lines: list[str], + *, + items: Sequence[object], +) -> None: + suppressed_items = [_as_mapping(item) for item in items] + lines.append(f"SUPPRESSED DEAD CODE (items={len(suppressed_items)})") + if not suppressed_items: + lines.append("(none)") + return + for idx, item in enumerate(suppressed_items, start=1): + lines.append(f"=== Suppressed dead-code item #{idx} ===") + lines.append( + "kind=" + f"{format_meta_text_value(item.get('kind'))} " + f"confidence={format_meta_text_value(item.get('confidence'))} " + f"suppressed_by={_suppression_bindings_text(item)}" + ) + lines.append(_location_line(item)) + lines.append("") + if lines[-1] == "": + lines.pop() + + +def _flatten_findings(findings: Mapping[str, object]) -> list[Mapping[str, object]]: + groups = _as_mapping(findings.get("groups")) + clone_groups = _as_mapping(groups.get("clones")) + flat_groups = [ + *map(_as_mapping, _as_sequence(clone_groups.get("functions"))), + *map(_as_mapping, _as_sequence(clone_groups.get("blocks"))), + *map(_as_mapping, _as_sequence(clone_groups.get("segments"))), + *map( + _as_mapping, + _as_sequence(_as_mapping(groups.get("structural")).get("groups")), + ), + *map( + _as_mapping, + _as_sequence(_as_mapping(groups.get("dead_code")).get("groups")), + ), + *map( + _as_mapping, + _as_sequence(_as_mapping(groups.get("design")).get("groups")), + ), + ] + return flat_groups + + +def _append_suggestions( + lines: list[str], + *, + suggestions: Sequence[object], + findings: Mapping[str, object], +) -> None: + suggestion_rows = [_as_mapping(item) for item in suggestions] + finding_index = { + str(group.get("id")): group for group in _flatten_findings(findings) + } + lines.append(f"SUGGESTIONS (count={len(suggestion_rows)})") + if not suggestion_rows: + lines.append("(none)") + return + for idx, suggestion in enumerate(suggestion_rows, start=1): + finding = finding_index.get(str(suggestion.get("finding_id")), {}) + lines.append( + f"{idx}. " + f"[{format_meta_text_value(finding.get('severity'))}] " + f"{format_meta_text_value(suggestion.get('title'))}" + ) + lines.append( + " " + f"finding_id={format_meta_text_value(suggestion.get('finding_id'))} " + f"effort={format_meta_text_value(_as_mapping(suggestion.get('action')).get('effort'))}" + ) + summary = str(suggestion.get("summary", "")).strip() + if summary: + lines.append(f" summary: {summary}") + lines.append( + f" location: {format_meta_text_value(suggestion.get('location_label'))}" + ) + representative = list( + map(_as_mapping, _as_sequence(suggestion.get("representative_locations"))) + ) + if representative: + lines.append(f" example: {_location_line(representative[0])[2:]}") + steps = [ + str(step).strip() + for step in _as_sequence(_as_mapping(suggestion.get("action")).get("steps")) + if str(step).strip() + ] + lines.extend(f" - {step}" for step in steps[:2]) + + +def _append_overview( + lines: list[str], + overview: Mapping[str, object], + hotlists: Mapping[str, object], +) -> None: + lines.append("DERIVED OVERVIEW") + families = _as_mapping(overview.get("families")) + lines.append( + "Families: " + + _format_key_values( + families, + ("clones", "structural", "dead_code", "design"), + ) + ) + source_breakdown = _as_mapping(overview.get("source_scope_breakdown")) + lines.append( + "Source scope breakdown: " + + _format_key_values( + source_breakdown, + ("production", "tests", "fixtures", "other"), + ) + ) + health_snapshot = _as_mapping(overview.get("health_snapshot")) + lines.append( + "Health snapshot: " + + _format_key_values( + health_snapshot, + ("score", "grade", "strongest_dimension", "weakest_dimension"), + ) + ) + hotlist_counts = { + "most_actionable": len(_as_sequence(hotlists.get("most_actionable_ids"))), + "highest_spread": len(_as_sequence(hotlists.get("highest_spread_ids"))), + "production_hotspots": len( + _as_sequence(hotlists.get("production_hotspot_ids")) + ), + "test_fixture_hotspots": len( + _as_sequence(hotlists.get("test_fixture_hotspot_ids")) + ), + } + lines.append( + "Hotlists: " + + _format_key_values( + hotlist_counts, + ( + "most_actionable", + "highest_spread", + "production_hotspots", + "test_fixture_hotspots", + ), + ) + ) + top_risks = list(map(_as_mapping, _as_sequence(overview.get("top_risks")))) + if not top_risks: + lines.append("Top risks: (none)") + return + lines.append("Top risks:") + lines.extend( + ( + "- " + f"{format_meta_text_value(risk.get('family'))} " + f"count={format_meta_text_value(risk.get('count'))} " + f"scope={format_meta_text_value(risk.get('scope'))} " + f"label={format_meta_text_value(risk.get('label'))}" + ) + for risk in top_risks + ) + + +def render_text_report_document(payload: Mapping[str, object]) -> str: + meta_payload = _as_mapping(payload.get("meta")) + baseline = _as_mapping(meta_payload.get("baseline")) + cache = _as_mapping(meta_payload.get("cache")) + metrics_baseline = _as_mapping(meta_payload.get("metrics_baseline")) + inventory_payload = _as_mapping(payload.get("inventory")) + inventory_files = _as_mapping(inventory_payload.get("files")) + inventory_code = _as_mapping(inventory_payload.get("code")) + file_registry = _as_mapping(inventory_payload.get("file_registry")) + findings = _as_mapping(payload.get("findings")) + findings_summary = _as_mapping(findings.get("summary")) + findings_families = _as_mapping(findings_summary.get("families")) + findings_severity = _as_mapping(findings_summary.get("severity")) + findings_impact_scope = _as_mapping(findings_summary.get("impact_scope")) + findings_clones = _as_mapping(findings_summary.get("clones")) + findings_suppressed = _as_mapping(findings_summary.get("suppressed")) + metrics_payload = _as_mapping(payload.get("metrics")) + metrics_summary = _as_mapping(metrics_payload.get("summary")) + metrics_families = _as_mapping(metrics_payload.get("families")) + derived = _as_mapping(payload.get("derived")) + overview = _as_mapping(derived.get("overview")) + hotlists = _as_mapping(derived.get("hotlists")) + suggestions_payload = _as_sequence(derived.get("suggestions")) + integrity = _as_mapping(payload.get("integrity")) + canonicalization = _as_mapping(integrity.get("canonicalization")) + digest = _as_mapping(integrity.get("digest")) + findings_groups = _as_mapping(findings.get("groups")) + clone_groups = _as_mapping(findings_groups.get("clones")) + suppressed_clone_groups = _as_mapping(clone_groups.get("suppressed")) + runtime_meta = _as_mapping(meta_payload.get("runtime")) + clone_summary_keys: list[str] = ["functions", "blocks", "segments", "new", "known"] + if "suppressed" in findings_clones: + clone_summary_keys.append("suppressed") + suppressed_summary_keys: list[str] = ["dead_code"] + if "clones" in findings_suppressed: + suppressed_summary_keys.append("clones") + + lines = [ + "REPORT METADATA", + "Report schema version: " + f"{format_meta_text_value(payload.get('report_schema_version'))}", + "CodeClone version: " + f"{format_meta_text_value(meta_payload.get('codeclone_version'))}", + f"Project name: {format_meta_text_value(meta_payload.get('project_name'))}", + f"Scan root: {format_meta_text_value(meta_payload.get('scan_root'))}", + f"Python version: {format_meta_text_value(meta_payload.get('python_version'))}", + f"Python tag: {format_meta_text_value(meta_payload.get('python_tag'))}", + f"Analysis mode: {format_meta_text_value(meta_payload.get('analysis_mode'))}", + f"Report mode: {format_meta_text_value(meta_payload.get('report_mode'))}", + "Report generated (UTC): " + f"{format_meta_text_value(runtime_meta.get('report_generated_at_utc'))}", + "Computed metric families: " + f"{format_meta_text_value(meta_payload.get('computed_metric_families'))}", + f"Baseline path: {format_meta_text_value(baseline.get('path'))}", + "Baseline fingerprint version: " + f"{format_meta_text_value(baseline.get('fingerprint_version'))}", + "Baseline schema version: " + f"{format_meta_text_value(baseline.get('schema_version'))}", + f"Baseline Python tag: {format_meta_text_value(baseline.get('python_tag'))}", + "Baseline generator name: " + f"{format_meta_text_value(baseline.get('generator_name'))}", + "Baseline generator version: " + f"{format_meta_text_value(baseline.get('generator_version'))}", + "Baseline payload sha256: " + f"{format_meta_text_value(baseline.get('payload_sha256'))}", + "Baseline payload verified: " + f"{format_meta_text_value(baseline.get('payload_sha256_verified'))}", + f"Baseline loaded: {format_meta_text_value(baseline.get('loaded'))}", + f"Baseline status: {format_meta_text_value(baseline.get('status'))}", + f"Cache path: {format_meta_text_value(cache.get('path'))}", + f"Cache schema version: {format_meta_text_value(cache.get('schema_version'))}", + f"Cache status: {format_meta_text_value(cache.get('status'))}", + f"Cache used: {format_meta_text_value(cache.get('used'))}", + "Metrics baseline path: " + f"{format_meta_text_value(metrics_baseline.get('path'))}", + "Metrics baseline loaded: " + f"{format_meta_text_value(metrics_baseline.get('loaded'))}", + "Metrics baseline status: " + f"{format_meta_text_value(metrics_baseline.get('status'))}", + "Metrics baseline schema version: " + f"{format_meta_text_value(metrics_baseline.get('schema_version'))}", + "Metrics baseline payload sha256: " + f"{format_meta_text_value(metrics_baseline.get('payload_sha256'))}", + "Metrics baseline payload verified: " + f"{format_meta_text_value(metrics_baseline.get('payload_sha256_verified'))}", + ] + + if ( + baseline.get("loaded") is not True + or str(baseline.get("status", "")).strip().lower() != "ok" + ): + lines.append("Note: baseline is untrusted; all groups are treated as NEW.") + + lines.extend( + [ + "", + "INVENTORY", + "Files: " + + _format_key_values( + inventory_files, + ( + "total_found", + "analyzed", + "cached", + "skipped", + "source_io_skipped", + ), + ), + "Code: " + + _format_key_values( + inventory_code, + ("scope", "parsed_lines", "functions", "methods", "classes"), + ), + "File registry: " + f"encoding={format_meta_text_value(file_registry.get('encoding'))} " + f"count={len(_as_sequence(file_registry.get('items')))}", + "", + "FINDINGS SUMMARY", + f"Total groups: {format_meta_text_value(findings_summary.get('total'))}", + "Families: " + + _format_key_values( + findings_families, + ("clones", "structural", "dead_code", "design"), + ), + "Severity: " + + _format_key_values( + findings_severity, + ("critical", "warning", "info"), + ), + "Impact scope: " + + _format_key_values( + findings_impact_scope, + ("runtime", "non_runtime", "mixed"), + ), + "Clones: " + + _format_key_values( + findings_clones, + tuple(clone_summary_keys), + ), + "Suppressed: " + + _format_key_values( + findings_suppressed, + tuple(suppressed_summary_keys), + ), + "", + "METRICS SUMMARY", + ] + ) + for family_name in ( + "complexity", + "coupling", + "cohesion", + "coverage_join", + "overloaded_modules", + "dependencies", + "dead_code", + "health", + ): + family_summary = _as_mapping(metrics_summary.get(family_name)) + if family_name == "coverage_join" and not family_summary: + continue + keys: Sequence[str] + match family_name: + case "complexity" | "coupling": + keys = ("total", "average", "max", "high_risk") + case "cohesion": + keys = ("total", "average", "max", "low_cohesion") + case "coverage_join": + keys = ( + "status", + "source", + "units", + "measured_units", + "overall_permille", + "coverage_hotspots", + "scope_gap_hotspots", + "hotspot_threshold_percent", + ) + case "dependencies": + keys = ("modules", "edges", "cycles", "max_depth") + case "overloaded_modules": + keys = ( + "total", + "candidates", + "population_status", + "top_score", + "average_score", + ) + case "dead_code": + keys = ("total", "high_confidence", "suppressed") + case _: + keys = ("score", "grade") + lines.append(f"{family_name}: {_format_key_values(family_summary, keys)}") + + coverage_join_family = _as_mapping(metrics_families.get("coverage_join")) + coverage_join_items = _as_sequence(coverage_join_family.get("items")) + if coverage_join_family: + lines.extend( + [ + "", + "COVERAGE JOIN (top 10)", + ] + ) + if not coverage_join_items: + lines.append("(none)") + else: + lines.extend( + "- " + + _format_key_values( + item, + ( + "relative_path", + "qualname", + "coverage_status", + "risk", + "coverage_permille", + "cyclomatic_complexity", + "coverage_hotspot", + "scope_gap_hotspot", + ), + ) + for item in map(_as_mapping, coverage_join_items[:10]) + ) + + overloaded_modules_family = _as_mapping(metrics_families.get("overloaded_modules")) + if not overloaded_modules_family: + overloaded_modules_family = _as_mapping(metrics_families.get("god_modules")) + overloaded_module_items = _as_sequence(overloaded_modules_family.get("items")) + lines.extend( + [ + "", + "OVERLOADED MODULES (top 10)", + ] + ) + if not overloaded_module_items: + lines.append("(none)") + else: + lines.extend( + "- " + + _format_key_values( + item, + ( + "module", + "relative_path", + "source_kind", + "score", + "candidate_status", + "loc", + "fan_in", + "fan_out", + "complexity_total", + ), + ) + for item in map(_as_mapping, overloaded_module_items[:10]) + ) + + lines.append("") + _append_overview(lines, overview, hotlists) + + lines.append("") + _append_suggestions(lines, suggestions=suggestions_payload, findings=findings) + + lines.append("") + _append_clone_section( + lines, + title="FUNCTION CLONES", + groups=_as_sequence(clone_groups.get("functions")), + novelty="new", + metric_name="loc", + ) + lines.append("") + _append_clone_section( + lines, + title="FUNCTION CLONES", + groups=_as_sequence(clone_groups.get("functions")), + novelty="known", + metric_name="loc", + ) + lines.append("") + _append_clone_section( + lines, + title="BLOCK CLONES", + groups=_as_sequence(clone_groups.get("blocks")), + novelty="new", + metric_name="size", + ) + lines.append("") + _append_clone_section( + lines, + title="BLOCK CLONES", + groups=_as_sequence(clone_groups.get("blocks")), + novelty="known", + metric_name="size", + ) + lines.append("") + _append_clone_section( + lines, + title="SEGMENT CLONES", + groups=_as_sequence(clone_groups.get("segments")), + novelty="new", + metric_name="size", + ) + lines.append("") + _append_clone_section( + lines, + title="SEGMENT CLONES", + groups=_as_sequence(clone_groups.get("segments")), + novelty="known", + metric_name="size", + ) + if suppressed_clone_groups: + lines.append("") + _append_suppressed_clone_section( + lines, + title="SUPPRESSED FUNCTION CLONES", + groups=_as_sequence(suppressed_clone_groups.get("functions")), + metric_name="loc", + ) + lines.append("") + _append_suppressed_clone_section( + lines, + title="SUPPRESSED BLOCK CLONES", + groups=_as_sequence(suppressed_clone_groups.get("blocks")), + metric_name="size", + ) + lines.append("") + _append_suppressed_clone_section( + lines, + title="SUPPRESSED SEGMENT CLONES", + groups=_as_sequence(suppressed_clone_groups.get("segments")), + metric_name="size", + ) + lines.append("") + _append_structural_findings( + lines, + _as_sequence(_as_mapping(findings_groups.get("structural")).get("groups")), + ) + lines.append("") + _append_single_item_findings( + lines, + title="DEAD CODE FINDINGS", + groups=_as_sequence( + _as_mapping(findings_groups.get("dead_code")).get("groups") + ), + fact_keys=("kind", "confidence"), + ) + lines.append("") + dead_code_family = _as_mapping(metrics_families.get("dead_code")) + _append_suppressed_dead_code_items( + lines, + items=_as_sequence(dead_code_family.get("suppressed_items")), + ) + lines.append("") + _append_single_item_findings( + lines, + title="DESIGN FINDINGS", + groups=_as_sequence(_as_mapping(findings_groups.get("design")).get("groups")), + fact_keys=("lcom4", "method_count", "instance_var_count", "fan_out", "risk"), + ) + lines.extend( + [ + "", + "INTEGRITY", + "Canonicalization: " + + _format_key_values( + canonicalization, + ("version", "scope", "sections"), + ), + "Digest: " + + _format_key_values( + digest, + ("algorithm", "verified", "value"), + ), + ] + ) + + return "\n".join(lines).rstrip() + "\n" + + +__all__ = [ + "_append_clone_section", + "_append_single_item_findings", + "_append_structural_findings", + "_append_suggestions", + "_append_suppressed_dead_code_items", + "_as_int", + "_structural_kind_label", + "render_text_report_document", +] diff --git a/codeclone/report/sarif.py b/codeclone/report/sarif.py index ec2177d..0e7cbde 100644 --- a/codeclone/report/sarif.py +++ b/codeclone/report/sarif.py @@ -6,956 +6,32 @@ from __future__ import annotations -import hashlib from collections.abc import Collection, Mapping, Sequence -from dataclasses import dataclass -from pathlib import Path -from typing import TYPE_CHECKING, cast - -import orjson - -from .._coerce import as_float as _as_float -from .._coerce import as_int as _as_int -from .._coerce import as_mapping as _as_mapping -from .._coerce import as_sequence as _as_sequence -from ..contracts import DOCS_URL, REPOSITORY_URL -from ..domain.findings import ( - CATEGORY_COHESION, - CATEGORY_COMPLEXITY, - CATEGORY_COUPLING, - CATEGORY_COVERAGE, - CATEGORY_DEPENDENCY, - CLONE_KIND_BLOCK, - CLONE_KIND_FUNCTION, - FAMILY_CLONE, - FAMILY_CLONES, - FAMILY_DEAD_CODE, - FAMILY_DESIGN, - FAMILY_STRUCTURAL, - FINDING_KIND_CLASS_HOTSPOT, - FINDING_KIND_CLONE_GROUP, - FINDING_KIND_COVERAGE_HOTSPOT, - FINDING_KIND_COVERAGE_SCOPE_GAP, - FINDING_KIND_CYCLE, - FINDING_KIND_FUNCTION_HOTSPOT, - FINDING_KIND_UNUSED_SYMBOL, - STRUCTURAL_KIND_CLONE_COHORT_DRIFT, - STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, - STRUCTURAL_KIND_DUPLICATED_BRANCHES, - SYMBOL_KIND_CLASS, - SYMBOL_KIND_FUNCTION, - SYMBOL_KIND_METHOD, +from typing import TYPE_CHECKING + +from .document import build_report_document +from .renderers.sarif import ( + _baseline_state, + _location_entry, + _location_message, + _logical_locations, + _partial_fingerprints, + _primary_location_properties, + _result_entry, + _result_message, + _result_properties, + _rule_name, + _rule_spec, + _scan_root_uri, + _severity_to_level, + _text, + render_sarif_report_document, ) -from ..domain.quality import ( - CONFIDENCE_HIGH, - CONFIDENCE_MEDIUM, - SEVERITY_CRITICAL, - SEVERITY_WARNING, -) -from .json_contract import build_report_document if TYPE_CHECKING: from ..models import StructuralFindingGroup, Suggestion from .types import GroupMapLike -SARIF_VERSION = "2.1.0" -SARIF_PROFILE_VERSION = "1.0" -SARIF_SCHEMA_URL = "https://json.schemastore.org/sarif-2.1.0.json" -SARIF_SRCROOT_BASE_ID = "%SRCROOT%" - - -@dataclass(frozen=True, slots=True) -class _RuleSpec: - rule_id: str - short_description: str - full_description: str - default_level: str - category: str - kind: str - precision: str - - -def _text(value: object) -> str: - if value is None: - return "" - return str(value).strip() - - -def _severity_to_level(severity: str) -> str: - if severity == SEVERITY_CRITICAL: - return "error" - if severity == SEVERITY_WARNING: - return "warning" - return "note" - - -def _rule_name(spec: _RuleSpec) -> str: - return f"codeclone.{spec.rule_id}" - - -def _rule_remediation(spec: _RuleSpec) -> str: - rule_id = spec.rule_id - if rule_id.startswith("CCLONE"): - return ( - "Review the representative occurrence and related occurrences, " - "then extract shared behavior or keep accepted debt in the baseline." - ) - if rule_id == "CSTRUCT001": - return ( - "Collapse repeated branch shapes into a shared helper, validator, " - "or control-flow abstraction where the behavior is intentionally shared." - ) - if rule_id == "CSTRUCT002": - return ( - "Review the clone cohort and reconcile guard or early-exit behavior " - "if those members are expected to stay aligned." - ) - if rule_id == "CSTRUCT003": - return ( - "Review the clone cohort and reconcile terminal, guard, or try/finally " - "profiles if the drift is not intentional." - ) - if rule_id.startswith("CDEAD"): - return ( - "Remove the unused symbol or keep it explicitly documented/suppressed " - "when runtime dynamics call it intentionally." - ) - if rule_id == "CDESIGN001": - return ( - "Split the class or regroup behavior so responsibilities become cohesive." - ) - if rule_id == "CDESIGN002": - return "Split the function or simplify control flow to reduce complexity." - if rule_id == "CDESIGN003": - return "Reduce dependencies or split responsibilities to lower coupling." - return ( - "Break the cycle or invert dependencies so modules no longer depend " - "on each other circularly." - ) - - -def _rule_help(spec: _RuleSpec) -> dict[str, str]: - remediation = _rule_remediation(spec) - return { - "text": f"{spec.full_description} {remediation}", - "markdown": ( - f"{spec.full_description}\n\n" - f"{remediation}\n\n" - f"See [CodeClone docs]({DOCS_URL})." - ), - } - - -def _scan_root_uri(payload: Mapping[str, object]) -> str: - meta = _as_mapping(payload.get("meta")) - runtime = _as_mapping(meta.get("runtime")) - scan_root_absolute = _text(runtime.get("scan_root_absolute")) - if not scan_root_absolute: - return "" - scan_root_path = Path(scan_root_absolute) - if not scan_root_path.is_absolute(): - return "" - try: - uri = scan_root_path.as_uri() - except ValueError: - return "" - return uri if uri.endswith("/") else f"{uri}/" - - -def _flatten_findings(payload: Mapping[str, object]) -> list[Mapping[str, object]]: - findings = _as_mapping(payload.get("findings")) - groups = _as_mapping(findings.get("groups")) - clones = _as_mapping(groups.get(FAMILY_CLONES)) - structural = _as_mapping(groups.get(FAMILY_STRUCTURAL)) - dead_code = _as_mapping(groups.get(FAMILY_DEAD_CODE)) - design = _as_mapping(groups.get(FAMILY_DESIGN)) - return [ - *map(_as_mapping, _as_sequence(clones.get("functions"))), - *map(_as_mapping, _as_sequence(clones.get("blocks"))), - *map(_as_mapping, _as_sequence(clones.get("segments"))), - *map(_as_mapping, _as_sequence(structural.get("groups"))), - *map(_as_mapping, _as_sequence(dead_code.get("groups"))), - *map(_as_mapping, _as_sequence(design.get("groups"))), - ] - - -def _artifact_catalog( - findings: Sequence[Mapping[str, object]], - *, - use_uri_base_id: bool, -) -> tuple[list[dict[str, object]], dict[str, int]]: - artifact_paths = sorted( - { - relative_path - for group in findings - for item in map(_as_mapping, _as_sequence(group.get("items"))) - for relative_path in (_text(item.get("relative_path")),) - if relative_path - } - ) - artifact_index_map = {path: index for index, path in enumerate(artifact_paths)} - artifacts = [ - { - "location": { - "uri": path, - **({"uriBaseId": SARIF_SRCROOT_BASE_ID} if use_uri_base_id else {}), - } - } - for path in artifact_paths - ] - return cast(list[dict[str, object]], artifacts), artifact_index_map - - -def _clone_rule_spec(category: str) -> _RuleSpec: - if category == CLONE_KIND_FUNCTION: - return _RuleSpec( - "CCLONE001", - "Function clone group", - "Multiple functions share the same normalized function body.", - SEVERITY_WARNING, - FAMILY_CLONE, - FINDING_KIND_CLONE_GROUP, - CONFIDENCE_HIGH, - ) - if category == CLONE_KIND_BLOCK: - return _RuleSpec( - "CCLONE002", - "Block clone group", - "Repeated normalized statement blocks were detected across occurrences.", - SEVERITY_WARNING, - FAMILY_CLONE, - FINDING_KIND_CLONE_GROUP, - CONFIDENCE_HIGH, - ) - return _RuleSpec( - "CCLONE003", - "Segment clone group", - "Repeated normalized statement segments were detected across occurrences.", - "note", - FAMILY_CLONE, - FINDING_KIND_CLONE_GROUP, - CONFIDENCE_MEDIUM, - ) - - -def _structural_rule_spec(kind: str) -> _RuleSpec: - if kind == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: - return _RuleSpec( - "CSTRUCT002", - "Clone guard/exit divergence", - ( - "Members of the same function-clone cohort diverged in " - "entry guards or early-exit behavior." - ), - SEVERITY_WARNING, - FAMILY_STRUCTURAL, - STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, - CONFIDENCE_HIGH, - ) - if kind == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: - return _RuleSpec( - "CSTRUCT003", - "Clone cohort drift", - ( - "Members of the same function-clone cohort drifted from " - "the majority terminal/guard/try profile." - ), - SEVERITY_WARNING, - FAMILY_STRUCTURAL, - STRUCTURAL_KIND_CLONE_COHORT_DRIFT, - CONFIDENCE_HIGH, - ) - return _RuleSpec( - "CSTRUCT001", - "Duplicated branches", - "Repeated branch families with matching structural signatures were detected.", - SEVERITY_WARNING, - FAMILY_STRUCTURAL, - kind or STRUCTURAL_KIND_DUPLICATED_BRANCHES, - CONFIDENCE_MEDIUM, - ) - - -def _dead_code_rule_spec(category: str) -> _RuleSpec: - if category == SYMBOL_KIND_FUNCTION: - return _RuleSpec( - "CDEAD001", - "Unused function", - "Function appears to be unused with high confidence.", - SEVERITY_WARNING, - FAMILY_DEAD_CODE, - FINDING_KIND_UNUSED_SYMBOL, - CONFIDENCE_HIGH, - ) - if category == SYMBOL_KIND_CLASS: - return _RuleSpec( - "CDEAD002", - "Unused class", - "Class appears to be unused with high confidence.", - SEVERITY_WARNING, - FAMILY_DEAD_CODE, - FINDING_KIND_UNUSED_SYMBOL, - CONFIDENCE_HIGH, - ) - if category == SYMBOL_KIND_METHOD: - return _RuleSpec( - "CDEAD003", - "Unused method", - "Method appears to be unused with high confidence.", - SEVERITY_WARNING, - FAMILY_DEAD_CODE, - FINDING_KIND_UNUSED_SYMBOL, - CONFIDENCE_HIGH, - ) - return _RuleSpec( - "CDEAD004", - "Unused symbol", - "Symbol appears to be unused with reported confidence.", - SEVERITY_WARNING, - FAMILY_DEAD_CODE, - FINDING_KIND_UNUSED_SYMBOL, - CONFIDENCE_MEDIUM, - ) - - -def _design_rule_spec(category: str, kind: str) -> _RuleSpec: - if category == CATEGORY_COHESION: - return _RuleSpec( - "CDESIGN001", - "Low cohesion class", - "Class cohesion is low according to LCOM4 hotspot thresholds.", - SEVERITY_WARNING, - FAMILY_DESIGN, - kind or FINDING_KIND_CLASS_HOTSPOT, - CONFIDENCE_HIGH, - ) - if category == CATEGORY_COMPLEXITY: - return _RuleSpec( - "CDESIGN002", - "Complexity hotspot", - "Function exceeds the project complexity hotspot threshold.", - SEVERITY_WARNING, - FAMILY_DESIGN, - kind or FINDING_KIND_FUNCTION_HOTSPOT, - CONFIDENCE_HIGH, - ) - if category == CATEGORY_COUPLING: - return _RuleSpec( - "CDESIGN003", - "Coupling hotspot", - "Class exceeds the project coupling hotspot threshold.", - SEVERITY_WARNING, - FAMILY_DESIGN, - kind or FINDING_KIND_CLASS_HOTSPOT, - CONFIDENCE_HIGH, - ) - if category == CATEGORY_COVERAGE: - if kind == FINDING_KIND_COVERAGE_SCOPE_GAP: - return _RuleSpec( - "CDESIGN006", - "Coverage scope gap", - "A medium/high-risk function is outside the supplied joined " - "coverage scope.", - SEVERITY_WARNING, - FAMILY_DESIGN, - kind, - CONFIDENCE_HIGH, - ) - return _RuleSpec( - "CDESIGN005", - "Coverage hotspot", - "A medium/high-risk function falls below the configured joined " - "coverage threshold.", - SEVERITY_WARNING, - FAMILY_DESIGN, - kind or FINDING_KIND_COVERAGE_HOTSPOT, - CONFIDENCE_HIGH, - ) - return _RuleSpec( - "CDESIGN004", - "Dependency cycle", - "A dependency cycle was detected between project modules.", - "error", - FAMILY_DESIGN, - kind or FINDING_KIND_CYCLE, - CONFIDENCE_HIGH, - ) - - -def _rule_spec(group: Mapping[str, object]) -> _RuleSpec: - family = _text(group.get("family")) - category = _text(group.get("category")) - kind = _text(group.get("kind")) - if family == FAMILY_CLONE: - return _clone_rule_spec(category) - if family == FAMILY_STRUCTURAL: - return _structural_rule_spec(kind) - if family == FAMILY_DEAD_CODE: - return _dead_code_rule_spec(category) - return _design_rule_spec(category, kind) - - -def _structural_signature(group: Mapping[str, object]) -> Mapping[str, object]: - return _as_mapping(_as_mapping(group.get("signature")).get("stable")) - - -def _clone_result_message( - group: Mapping[str, object], - *, - category: str, - count: int, - spread: Mapping[str, object], -) -> str: - clone_type = _text(group.get("clone_type")) - return ( - f"{category.title()} clone group ({clone_type}), {count} occurrences " - f"across {_as_int(spread.get('files'))} files." - ) - - -def _structural_result_message( - group: Mapping[str, object], - *, - count: int, - qualname: str, -) -> str: - signature = _structural_signature(group) - signature_family = _text(signature.get("family")) - if signature_family == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: - cohort_id = _text(signature.get("cohort_id")) - return ( - "Clone guard/exit divergence" - f" ({count} divergent members) in cohort " - f"{cohort_id or 'unknown'}." - ) - if signature_family == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: - drift_fields = _as_sequence(signature.get("drift_fields")) - drift_label = ", ".join(_text(item) for item in drift_fields) or "profile" - cohort_id = _text(signature.get("cohort_id")) - return ( - f"Clone cohort drift ({drift_label}), " - f"{count} divergent members in cohort {cohort_id or 'unknown'}." - ) - stmt_shape = _text(signature.get("stmt_shape")) - if qualname: - return ( - f"Repeated branch family ({stmt_shape}), {count} occurrences in {qualname}." - ) - return f"Repeated branch family ({stmt_shape}), {count} occurrences." - - -def _dead_code_result_message( - group: Mapping[str, object], - *, - category: str, - qualname: str, - relative_path: str, -) -> str: - confidence = _text(group.get("confidence")) or "reported" - target = qualname or relative_path - return f"Unused {category} with {confidence} confidence: {target}." - - -def _design_result_message( - *, - category: str, - facts: Mapping[str, object], - qualname: str, - items: Sequence[Mapping[str, object]], -) -> str: - metric_specs = { - CATEGORY_COHESION: ("lcom4", "Low cohesion class", "LCOM4"), - CATEGORY_COMPLEXITY: ( - "cyclomatic_complexity", - "High complexity function", - "CC", - ), - CATEGORY_COUPLING: ("cbo", "High coupling class", "CBO"), - } - spec = metric_specs.get(category) - if spec is not None: - fact_key, label, metric_label = spec - value = _as_int(facts.get(fact_key)) - return f"{label} ({metric_label}={value}): {qualname}." - if category == CATEGORY_COVERAGE: - coverage_status = _text(facts.get("coverage_status")) - threshold = _as_int(facts.get("hotspot_threshold_percent")) - if coverage_status == "missing_from_report": - return f"Coverage scope gap (not in coverage.xml): {qualname}." - coverage_pct = _as_int(facts.get("coverage_permille")) / 10.0 - return f"Coverage hotspot ({coverage_pct:.1f}% < {threshold}%): {qualname}." - modules = [_text(item.get("module")) for item in items if _text(item.get("module"))] - return f"Dependency cycle ({len(modules)} modules): {' -> '.join(modules)}." - - -def _result_message(group: Mapping[str, object]) -> str: - family = _text(group.get("family")) - category = _text(group.get("category")) - count = _as_int(group.get("count")) - spread = _as_mapping(group.get("spread")) - items = [_as_mapping(item) for item in _as_sequence(group.get("items"))] - first_item = items[0] if items else {} - qualname = _text(first_item.get("qualname")) - if family == FAMILY_CLONE: - return _clone_result_message( - group, - category=category, - count=count, - spread=spread, - ) - if family == FAMILY_STRUCTURAL: - return _structural_result_message( - group, - count=count, - qualname=qualname, - ) - if family == FAMILY_DEAD_CODE: - return _dead_code_result_message( - group, - category=category, - qualname=qualname, - relative_path=_text(first_item.get("relative_path")), - ) - return _design_result_message( - category=category, - facts=_as_mapping(group.get("facts")), - qualname=qualname, - items=items, - ) - - -def _logical_locations(item: Mapping[str, object]) -> list[dict[str, object]]: - qualname = _text(item.get("qualname")) - if qualname: - return [{"fullyQualifiedName": qualname}] - module = _text(item.get("module")) - if module: - return [{"fullyQualifiedName": module}] - return [] - - -def _location_message( - group: Mapping[str, object], - *, - related_id: int | None = None, -) -> str: - family = _text(group.get("family")) - category = _text(group.get("category")) - if family in {FAMILY_CLONE, FAMILY_STRUCTURAL}: - return ( - "Representative occurrence" - if related_id is None - else f"Related occurrence #{related_id}" - ) - if family == FAMILY_DEAD_CODE: - return ( - "Unused symbol declaration" - if related_id is None - else f"Related declaration #{related_id}" - ) - if category == CATEGORY_DEPENDENCY: - return ( - "Cycle member" - if related_id is None - else f"Related cycle member #{related_id}" - ) - return ( - "Primary location" if related_id is None else f"Related location #{related_id}" - ) - - -def _location_entry( - item: Mapping[str, object], - *, - related_id: int | None = None, - artifact_index_map: Mapping[str, int] | None = None, - use_uri_base_id: bool = False, - message_text: str = "", -) -> dict[str, object]: - relative_path = _text(item.get("relative_path")) - location: dict[str, object] = {} - if relative_path: - artifact_location: dict[str, object] = { - "uri": relative_path, - } - if use_uri_base_id: - artifact_location["uriBaseId"] = SARIF_SRCROOT_BASE_ID - if artifact_index_map and relative_path in artifact_index_map: - artifact_location["index"] = artifact_index_map[relative_path] - physical_location: dict[str, object] = { - "artifactLocation": artifact_location, - } - else: - physical_location = {} - start_line = _as_int(item.get("start_line")) - end_line = _as_int(item.get("end_line")) - if physical_location and start_line > 0: - region: dict[str, object] = {"startLine": start_line} - if end_line > 0: - region["endLine"] = end_line - physical_location["region"] = region - if physical_location: - location["physicalLocation"] = physical_location - logical_locations = _logical_locations(item) - if logical_locations: - location["logicalLocations"] = logical_locations - if message_text: - location["message"] = {"text": message_text} - if related_id is not None: - location["id"] = related_id - return location - - -def _generic_properties(group: Mapping[str, object]) -> dict[str, object]: - source_scope = _as_mapping(group.get("source_scope")) - spread = _as_mapping(group.get("spread")) - properties: dict[str, object] = { - "findingId": _text(group.get("id")), - "family": _text(group.get("family")), - "category": _text(group.get("category")), - "kind": _text(group.get("kind")), - "confidence": _text(group.get("confidence")), - "priority": round(_as_float(group.get("priority")), 2), - "impactScope": _text(source_scope.get("impact_scope")), - "sourceKind": _text(source_scope.get("dominant_kind")), - "spreadFiles": _as_int(spread.get("files")), - "spreadFunctions": _as_int(spread.get("functions")), - "helpUri": DOCS_URL, - } - return properties - - -def _clone_result_properties( - props: dict[str, object], - group: Mapping[str, object], -) -> dict[str, object]: - props.update( - { - "novelty": _text(group.get("novelty")), - "cloneKind": _text(group.get("clone_kind")), - "cloneType": _text(group.get("clone_type")), - "groupArity": _as_int(group.get("count")), - } - ) - return props - - -def _structural_signature_properties( - signature: Mapping[str, object], -) -> dict[str, object]: - signature_family = _text(signature.get("family")) - if signature_family == STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: - return { - "cohortId": _text(signature.get("cohort_id")), - "majorityGuardCount": _as_int( - signature.get("majority_guard_count"), - ), - "majorityTerminalKind": _text( - signature.get("majority_terminal_kind"), - ), - } - if signature_family == STRUCTURAL_KIND_CLONE_COHORT_DRIFT: - return { - "cohortId": _text(signature.get("cohort_id")), - "driftFields": [ - _text(field) for field in _as_sequence(signature.get("drift_fields")) - ], - } - return { - "statementShape": _text(signature.get("stmt_shape")), - "terminalKind": _text(signature.get("terminal_kind")), - } - - -def _structural_result_properties( - props: dict[str, object], - group: Mapping[str, object], -) -> dict[str, object]: - signature = _structural_signature(group) - props["occurrenceCount"] = _as_int(group.get("count")) - props.update(_structural_signature_properties(signature)) - return props - - -def _design_result_properties( - props: dict[str, object], - *, - facts: Mapping[str, object], -) -> dict[str, object]: - for key in ( - "lcom4", - "method_count", - "instance_var_count", - "cbo", - "cyclomatic_complexity", - "nesting_depth", - "cycle_length", - "coverage_permille", - "covered_lines", - "executable_lines", - "hotspot_threshold_percent", - "coverage_status", - ): - if key in facts: - props[key] = facts[key] - return props - - -def _result_properties(group: Mapping[str, object]) -> dict[str, object]: - props = _generic_properties(group) - family = _text(group.get("family")) - if family == FAMILY_CLONE: - return _clone_result_properties(props, group) - if family == FAMILY_STRUCTURAL: - return _structural_result_properties(props, group) - if family == FAMILY_DESIGN: - return _design_result_properties( - props, - facts=_as_mapping(group.get("facts")), - ) - return props - - -def _partial_fingerprints( - *, - rule_id: str, - group: Mapping[str, object], - primary_item: Mapping[str, object], -) -> dict[str, str]: - finding_id = _text(group.get("id")) - path = _text(primary_item.get("relative_path")) - qualname = _text(primary_item.get("qualname")) - start_line = _as_int(primary_item.get("start_line")) - if path and start_line > 0: - fingerprint_material = "\0".join( - ( - rule_id, - finding_id, - path, - qualname, - ) - ) - return { - "primaryLocationLineHash": ( - f"{hashlib.sha256(fingerprint_material.encode('utf-8')).hexdigest()[:16]}" - f":{start_line}" - ) - } - return {} - - -def _primary_location_properties( - primary_item: Mapping[str, object], -) -> dict[str, object]: - path = _text(primary_item.get("relative_path")) - qualname = _text(primary_item.get("qualname")) - start_line = _as_int(primary_item.get("start_line")) - end_line = _as_int(primary_item.get("end_line")) - props: dict[str, object] = {} - if path: - props["primaryPath"] = path - if qualname: - props["primaryQualname"] = qualname - if start_line > 0: - props["primaryRegion"] = f"{start_line}-{end_line or start_line}" - return props - - -def _baseline_state(group: Mapping[str, object]) -> str: - novelty = _text(group.get("novelty")) - if novelty == "new": - return "new" - if novelty == "known": - return "unchanged" - return "" - - -def _result_entry( - *, - group: Mapping[str, object], - rule_id: str, - rule_index: int, - artifact_index_map: Mapping[str, int], - use_uri_base_id: bool, -) -> dict[str, object]: - items = [_as_mapping(item) for item in _as_sequence(group.get("items"))] - primary_item = items[0] if items else {} - primary_location = ( - _location_entry( - primary_item, - artifact_index_map=artifact_index_map, - use_uri_base_id=use_uri_base_id, - message_text=_location_message(group), - ) - if primary_item - else {} - ) - result: dict[str, object] = { - "ruleId": rule_id, - "ruleIndex": rule_index, - "kind": "fail", - "level": _severity_to_level(_text(group.get("severity"))), - "message": { - "text": _result_message(group), - }, - "locations": [primary_location] if primary_location else [], - "fingerprints": { - "codecloneFindingId": _text(group.get("id")), - }, - "partialFingerprints": _partial_fingerprints( - rule_id=rule_id, - group=group, - primary_item=primary_item, - ), - "properties": _result_properties(group), - } - if primary_item: - properties = cast(dict[str, object], result["properties"]) - properties.update(_primary_location_properties(primary_item)) - baseline_state = _baseline_state(group) - if baseline_state: - result["baselineState"] = baseline_state - related_items = items[1:] - if related_items: - related_locations = [ - _location_entry( - item, - related_id=index, - artifact_index_map=artifact_index_map, - use_uri_base_id=use_uri_base_id, - message_text=_location_message(group, related_id=index), - ) - for index, item in enumerate(related_items, start=1) - ] - result["relatedLocations"] = [ - location for location in related_locations if location - ] - return result - - -def render_sarif_report_document(payload: Mapping[str, object]) -> str: - meta = _as_mapping(payload.get("meta")) - runtime = _as_mapping(meta.get("runtime")) - analysis_started_at = _text(runtime.get("analysis_started_at_utc")) - generated_at = _text(runtime.get("report_generated_at_utc")) - analysis_mode = _text(meta.get("analysis_mode")) or "full" - findings = sorted( - _flatten_findings(payload), - key=lambda group: ( - _rule_spec(group).rule_id, - _text(group.get("id")), - ), - ) - scan_root_uri = _scan_root_uri(payload) - use_uri_base_id = bool(scan_root_uri) - artifacts, artifact_index_map = _artifact_catalog( - findings, - use_uri_base_id=use_uri_base_id, - ) - used_rule_specs = { - spec.rule_id: spec for spec in (_rule_spec(group) for group in findings) - } - ordered_rule_specs = [used_rule_specs[key] for key in sorted(used_rule_specs)] - rule_index_map = { - spec.rule_id: index for index, spec in enumerate(ordered_rule_specs) - } - results = [ - _result_entry( - group=group, - rule_id=rule.rule_id, - rule_index=rule_index_map[rule.rule_id], - artifact_index_map=artifact_index_map, - use_uri_base_id=use_uri_base_id, - ) - for group in findings - for rule in (_rule_spec(group),) - ] - invocation: dict[str, object] = { - "executionSuccessful": True, - **({"startTimeUtc": analysis_started_at} if analysis_started_at else {}), - **({"endTimeUtc": generated_at} if generated_at else {}), - } - if scan_root_uri: - invocation["workingDirectory"] = {"uri": scan_root_uri} - run: dict[str, object] = { - "tool": { - "driver": { - "name": "codeclone", - "version": _text(meta.get("codeclone_version")), - "informationUri": REPOSITORY_URL, - "rules": [ - { - "id": spec.rule_id, - "name": _rule_name(spec), - "shortDescription": {"text": spec.short_description}, - "fullDescription": {"text": spec.full_description}, - "help": _rule_help(spec), - "defaultConfiguration": {"level": spec.default_level}, - "helpUri": DOCS_URL, - "properties": { - "category": spec.category, - "kind": spec.kind, - "precision": spec.precision, - "tags": [spec.category, spec.kind, spec.precision], - }, - } - for spec in ordered_rule_specs - ], - } - }, - "automationDetails": { - "id": "/".join( - part - for part in ( - "codeclone", - analysis_mode, - generated_at - or _text( - _as_mapping( - _as_mapping(payload.get("integrity")).get("digest") - ).get("value") - )[:12], - ) - if part - ), - }, - **( - { - "originalUriBaseIds": { - SARIF_SRCROOT_BASE_ID: { - "uri": scan_root_uri, - "description": {"text": "The root of the scanned source tree."}, - } - } - } - if scan_root_uri - else {} - ), - "artifacts": artifacts, - "results": results, - "invocations": [invocation], - "properties": { - "profileVersion": SARIF_PROFILE_VERSION, - "reportSchemaVersion": _text(payload.get("report_schema_version")), - "analysisMode": analysis_mode, - "reportMode": _text(meta.get("report_mode")), - "canonicalDigestSha256": _text( - _as_mapping(_as_mapping(payload.get("integrity")).get("digest")).get( - "value" - ) - ), - **({"reportGeneratedAtUtc": generated_at} if generated_at else {}), - }, - } - return orjson.dumps( - { - "$schema": SARIF_SCHEMA_URL, - "version": SARIF_VERSION, - "runs": [run], - }, - option=orjson.OPT_INDENT_2, - ).decode("utf-8") - def to_sarif_report( *, @@ -988,3 +64,23 @@ def to_sarif_report( structural_findings=tuple(structural_findings or ()), ) return render_sarif_report_document(payload) + + +__all__ = [ + "_baseline_state", + "_location_entry", + "_location_message", + "_logical_locations", + "_partial_fingerprints", + "_primary_location_properties", + "_result_entry", + "_result_message", + "_result_properties", + "_rule_name", + "_rule_spec", + "_scan_root_uri", + "_severity_to_level", + "_text", + "render_sarif_report_document", + "to_sarif_report", +] diff --git a/codeclone/report/serialize.py b/codeclone/report/serialize.py index 80c37d4..57a8be2 100644 --- a/codeclone/report/serialize.py +++ b/codeclone/report/serialize.py @@ -6,873 +6,26 @@ from __future__ import annotations -from collections.abc import Mapping, Sequence - -import orjson - -from .._coerce import as_int, as_mapping, as_sequence -from ..domain.source_scope import IMPACT_SCOPE_NON_RUNTIME, SOURCE_KIND_OTHER -from ._formatting import format_spread_text - -_as_int = as_int -_as_mapping = as_mapping -_as_sequence = as_sequence - - -def render_json_report_document(payload: Mapping[str, object]) -> str: - return orjson.dumps(payload, option=orjson.OPT_INDENT_2).decode("utf-8") - - -def format_meta_text_value(value: object) -> str: - if isinstance(value, bool): - return "true" if value else "false" - if value is None: - return "(none)" - if isinstance(value, float): - return f"{value:.2f}".rstrip("0").rstrip(".") or "0" - if isinstance(value, Sequence) and not isinstance( - value, - (str, bytes, bytearray), - ): - formatted = [format_meta_text_value(item) for item in value] - return ", ".join(formatted) if formatted else "(none)" - text = str(value).strip() - return text if text else "(none)" - - -def _format_key_values( - mapping: Mapping[str, object], - keys: Sequence[str], - *, - skip_empty: bool = False, -) -> str: - parts: list[str] = [] - for key in keys: - if key not in mapping: - continue - formatted = format_meta_text_value(mapping.get(key)) - if not skip_empty or formatted != "(none)": - parts.append(f"{key}={formatted}") - return " ".join(parts) if parts else "(none)" - - -def _spread_text(spread: Mapping[str, object]) -> str: - return format_spread_text( - _as_int(spread.get("files")), - _as_int(spread.get("functions")), - ) - - -def _scope_text(source_scope: Mapping[str, object]) -> str: - dominant = str(source_scope.get("dominant_kind", "")).strip() or SOURCE_KIND_OTHER - impact = ( - str(source_scope.get("impact_scope", "")).strip() or IMPACT_SCOPE_NON_RUNTIME - ) - return f"{dominant}/{impact}" - - -def _structural_kind_label(kind: object) -> str: - kind_text = str(kind).strip() - match kind_text: - case "duplicated_branches": - return "Duplicated branches" - case "clone_guard_exit_divergence": - return "Clone guard/exit divergence" - case "clone_cohort_drift": - return "Clone cohort drift" - case _: - return kind_text or "(none)" - - -def _location_line( - item: Mapping[str, object], - *, - metric_name: str | None = None, -) -> str: - metric_suffix = "" - if metric_name is not None and metric_name in item: - metric_suffix = ( - f" {metric_name}={format_meta_text_value(item.get(metric_name))}" - ) - return ( - f"- {format_meta_text_value(item.get('qualname'))} " - f"{format_meta_text_value(item.get('relative_path'))}:" - f"{format_meta_text_value(item.get('start_line'))}-" - f"{format_meta_text_value(item.get('end_line'))}" - f"{metric_suffix}" - ) - - -def _append_clone_section( - lines: list[str], - *, - title: str, - groups: Sequence[object], - novelty: str, - metric_name: str, -) -> None: - section_groups = [ - _as_mapping(group) - for group in groups - if str(_as_mapping(group).get("novelty", "")) == novelty - ] - lines.append(f"{title} ({novelty.upper()}) (groups={len(section_groups)})") - if not section_groups: - lines.append("(none)") - return - for idx, group in enumerate(section_groups, start=1): - lines.append(f"=== Clone group #{idx} ===") - lines.append( - "id=" - f"{format_meta_text_value(group.get('id'))} " - f"clone_type={format_meta_text_value(group.get('clone_type'))} " - f"severity={format_meta_text_value(group.get('severity'))} " - f"count={format_meta_text_value(group.get('count'))} " - f"spread={_spread_text(_as_mapping(group.get('spread')))} " - f"scope={_scope_text(_as_mapping(group.get('source_scope')))}" - ) - facts = _as_mapping(group.get("facts")) - if facts: - lines.append( - "facts: " - + _format_key_values( - facts, - tuple(sorted(str(key) for key in facts)), - skip_empty=True, - ) - ) - display_facts = _as_mapping(group.get("display_facts")) - if display_facts: - lines.append( - "display_facts: " - + _format_key_values( - display_facts, - tuple(sorted(str(key) for key in display_facts)), - skip_empty=True, - ) - ) - lines.extend( - _location_line(item, metric_name=metric_name) - for item in map(_as_mapping, _as_sequence(group.get("items"))) - ) - lines.append("") - if lines[-1] == "": - lines.pop() - - -def _append_suppressed_clone_section( - lines: list[str], - *, - title: str, - groups: Sequence[object], - metric_name: str, -) -> None: - section_groups = [_as_mapping(group) for group in groups] - lines.append(f"{title} (groups={len(section_groups)})") - if not section_groups: - lines.append("(none)") - return - for idx, group in enumerate(section_groups, start=1): - lines.append(f"=== Suppressed clone group #{idx} ===") - lines.append( - "id=" - f"{format_meta_text_value(group.get('id'))} " - f"clone_type={format_meta_text_value(group.get('clone_type'))} " - f"severity={format_meta_text_value(group.get('severity'))} " - f"count={format_meta_text_value(group.get('count'))} " - f"spread={_spread_text(_as_mapping(group.get('spread')))} " - f"scope={_scope_text(_as_mapping(group.get('source_scope')))} " - "suppressed_by=" - f"{format_meta_text_value(group.get('suppression_rule'))}" - "@" - f"{format_meta_text_value(group.get('suppression_source'))} " - "matched_patterns=" - f"{format_meta_text_value(group.get('matched_patterns'))}" - ) - facts = _as_mapping(group.get("facts")) - if facts: - lines.append( - "facts: " - + _format_key_values( - facts, - tuple(sorted(str(key) for key in facts)), - skip_empty=True, - ) - ) - lines.extend( - _location_line(item, metric_name=metric_name) - for item in map(_as_mapping, _as_sequence(group.get("items"))) - ) - lines.append("") - if lines[-1] == "": - lines.pop() - - -def _append_structural_findings(lines: list[str], groups: Sequence[object]) -> None: - structural_groups = [_as_mapping(group) for group in groups] - lines.append(f"STRUCTURAL FINDINGS (groups={len(structural_groups)})") - if not structural_groups: - lines.append("(none)") - return - for idx, group in enumerate(structural_groups, start=1): - lines.append(f"=== Structural finding #{idx} ===") - signature = _as_mapping(group.get("signature")) - stable = _as_mapping(signature.get("stable")) - control_flow = _as_mapping(stable.get("control_flow")) - lines.append( - "id=" - f"{format_meta_text_value(group.get('id'))} " - f"kind={format_meta_text_value(group.get('kind'))} " - f"label={_structural_kind_label(group.get('kind'))} " - f"severity={format_meta_text_value(group.get('severity'))} " - f"confidence={format_meta_text_value(group.get('confidence'))} " - f"count={format_meta_text_value(group.get('count'))} " - f"spread={_spread_text(_as_mapping(group.get('spread')))} " - f"scope={_scope_text(_as_mapping(group.get('source_scope')))}" - ) - stable_family = str(stable.get("family", "")).strip() - match stable_family: - case "clone_guard_exit_divergence": - lines.append( - "signature: " - f"cohort_id={format_meta_text_value(stable.get('cohort_id'))} " - f"majority_guard_count=" - f"{format_meta_text_value(stable.get('majority_guard_count'))} " - f"majority_terminal_kind=" - f"{format_meta_text_value(stable.get('majority_terminal_kind'))}" - ) - case "clone_cohort_drift": - majority_profile = _as_mapping(stable.get("majority_profile")) - lines.append( - "signature: " - f"cohort_id={format_meta_text_value(stable.get('cohort_id'))} " - f"drift_fields=" - f"{format_meta_text_value(stable.get('drift_fields'))} " - f"majority_terminal_kind=" - f"{format_meta_text_value(majority_profile.get('terminal_kind'))}" - ) - case _: - lines.append( - "signature: " - f"stmt_shape={format_meta_text_value(stable.get('stmt_shape'))} " - f"terminal_kind=" - f"{format_meta_text_value(stable.get('terminal_kind'))} " - f"has_loop={format_meta_text_value(control_flow.get('has_loop'))} " - f"has_try={format_meta_text_value(control_flow.get('has_try'))} " - f"nested_if={format_meta_text_value(control_flow.get('nested_if'))}" - ) - facts = _as_mapping(group.get("facts")) - if facts: - lines.append( - "facts: " - + _format_key_values( - facts, - tuple(sorted(str(key) for key in facts)), - skip_empty=True, - ) - ) - items = list(map(_as_mapping, _as_sequence(group.get("items")))) - visible_items = items[:3] - lines.extend(_location_line(item) for item in visible_items) - if len(items) > len(visible_items): - lines.append(f"... and {len(items) - len(visible_items)} more occurrences") - lines.append("") - if lines[-1] == "": - lines.pop() - - -def _append_single_item_findings( - lines: list[str], - *, - title: str, - groups: Sequence[object], - fact_keys: Sequence[str], -) -> None: - finding_groups = [_as_mapping(group) for group in groups] - lines.append(f"{title} (groups={len(finding_groups)})") - if not finding_groups: - lines.append("(none)") - return - for idx, group in enumerate(finding_groups, start=1): - lines.append(f"=== Finding #{idx} ===") - lines.append( - "id=" - f"{format_meta_text_value(group.get('id'))} " - f"category={format_meta_text_value(group.get('category'))} " - f"kind={format_meta_text_value(group.get('kind'))} " - f"severity={format_meta_text_value(group.get('severity'))} " - f"confidence={format_meta_text_value(group.get('confidence'))} " - f"scope={_scope_text(_as_mapping(group.get('source_scope')))}" - ) - facts = _as_mapping(group.get("facts")) - if facts: - lines.append( - f"facts: {_format_key_values(facts, fact_keys, skip_empty=True)}" - ) - lines.extend( - _location_line(item) - for item in map(_as_mapping, _as_sequence(group.get("items"))) - ) - lines.append("") - if lines[-1] == "": - lines.pop() - - -def _suppression_bindings_text(item: Mapping[str, object]) -> str: - bindings = [ - _as_mapping(binding) - for binding in _as_sequence(item.get("suppressed_by")) - if isinstance(binding, Mapping) - ] - if bindings: - parts = [] - for binding in bindings: - rule = str(binding.get("rule", "")).strip() or "unknown" - source = str(binding.get("source", "")).strip() or "unknown" - parts.append(f"{rule}@{source}") - return ",".join(parts) - rule = str(item.get("suppression_rule", "")).strip() - source = str(item.get("suppression_source", "")).strip() - if rule or source: - return f"{rule or 'unknown'}@{source or 'unknown'}" - return "(none)" - - -def _append_suppressed_dead_code_items( - lines: list[str], - *, - items: Sequence[object], -) -> None: - suppressed_items = [_as_mapping(item) for item in items] - lines.append(f"SUPPRESSED DEAD CODE (items={len(suppressed_items)})") - if not suppressed_items: - lines.append("(none)") - return - for idx, item in enumerate(suppressed_items, start=1): - lines.append(f"=== Suppressed dead-code item #{idx} ===") - lines.append( - "kind=" - f"{format_meta_text_value(item.get('kind'))} " - f"confidence={format_meta_text_value(item.get('confidence'))} " - f"suppressed_by={_suppression_bindings_text(item)}" - ) - lines.append(_location_line(item)) - lines.append("") - if lines[-1] == "": - lines.pop() - - -def _flatten_findings(findings: Mapping[str, object]) -> list[Mapping[str, object]]: - groups = _as_mapping(findings.get("groups")) - clone_groups = _as_mapping(groups.get("clones")) - flat_groups = [ - *map(_as_mapping, _as_sequence(clone_groups.get("functions"))), - *map(_as_mapping, _as_sequence(clone_groups.get("blocks"))), - *map(_as_mapping, _as_sequence(clone_groups.get("segments"))), - *map( - _as_mapping, - _as_sequence(_as_mapping(groups.get("structural")).get("groups")), - ), - *map( - _as_mapping, - _as_sequence(_as_mapping(groups.get("dead_code")).get("groups")), - ), - *map( - _as_mapping, - _as_sequence(_as_mapping(groups.get("design")).get("groups")), - ), - ] - return flat_groups - - -def _append_suggestions( - lines: list[str], - *, - suggestions: Sequence[object], - findings: Mapping[str, object], -) -> None: - suggestion_rows = [_as_mapping(item) for item in suggestions] - finding_index = { - str(group.get("id")): group for group in _flatten_findings(findings) - } - lines.append(f"SUGGESTIONS (count={len(suggestion_rows)})") - if not suggestion_rows: - lines.append("(none)") - return - for idx, suggestion in enumerate(suggestion_rows, start=1): - finding = finding_index.get(str(suggestion.get("finding_id")), {}) - lines.append( - f"{idx}. " - f"[{format_meta_text_value(finding.get('severity'))}] " - f"{format_meta_text_value(suggestion.get('title'))}" - ) - lines.append( - " " - f"finding_id={format_meta_text_value(suggestion.get('finding_id'))} " - f"effort={format_meta_text_value(_as_mapping(suggestion.get('action')).get('effort'))}" - ) - summary = str(suggestion.get("summary", "")).strip() - if summary: - lines.append(f" summary: {summary}") - lines.append( - f" location: {format_meta_text_value(suggestion.get('location_label'))}" - ) - representative = list( - map(_as_mapping, _as_sequence(suggestion.get("representative_locations"))) - ) - if representative: - lines.append(f" example: {_location_line(representative[0])[2:]}") - steps = [ - str(step).strip() - for step in _as_sequence(_as_mapping(suggestion.get("action")).get("steps")) - if str(step).strip() - ] - lines.extend(f" - {step}" for step in steps[:2]) - - -def _append_overview( - lines: list[str], - overview: Mapping[str, object], - hotlists: Mapping[str, object], -) -> None: - lines.append("DERIVED OVERVIEW") - families = _as_mapping(overview.get("families")) - lines.append( - "Families: " - + _format_key_values( - families, - ("clones", "structural", "dead_code", "design"), - ) - ) - source_breakdown = _as_mapping(overview.get("source_scope_breakdown")) - lines.append( - "Source scope breakdown: " - + _format_key_values( - source_breakdown, - ("production", "tests", "fixtures", "other"), - ) - ) - health_snapshot = _as_mapping(overview.get("health_snapshot")) - lines.append( - "Health snapshot: " - + _format_key_values( - health_snapshot, - ("score", "grade", "strongest_dimension", "weakest_dimension"), - ) - ) - hotlist_counts = { - "most_actionable": len(_as_sequence(hotlists.get("most_actionable_ids"))), - "highest_spread": len(_as_sequence(hotlists.get("highest_spread_ids"))), - "production_hotspots": len( - _as_sequence(hotlists.get("production_hotspot_ids")) - ), - "test_fixture_hotspots": len( - _as_sequence(hotlists.get("test_fixture_hotspot_ids")) - ), - } - lines.append( - "Hotlists: " - + _format_key_values( - hotlist_counts, - ( - "most_actionable", - "highest_spread", - "production_hotspots", - "test_fixture_hotspots", - ), - ) - ) - top_risks = list(map(_as_mapping, _as_sequence(overview.get("top_risks")))) - if not top_risks: - lines.append("Top risks: (none)") - return - lines.append("Top risks:") - lines.extend( - ( - "- " - f"{format_meta_text_value(risk.get('family'))} " - f"count={format_meta_text_value(risk.get('count'))} " - f"scope={format_meta_text_value(risk.get('scope'))} " - f"label={format_meta_text_value(risk.get('label'))}" - ) - for risk in top_risks - ) - - -def render_text_report_document(payload: Mapping[str, object]) -> str: - meta_payload = _as_mapping(payload.get("meta")) - baseline = _as_mapping(meta_payload.get("baseline")) - cache = _as_mapping(meta_payload.get("cache")) - metrics_baseline = _as_mapping(meta_payload.get("metrics_baseline")) - inventory_payload = _as_mapping(payload.get("inventory")) - inventory_files = _as_mapping(inventory_payload.get("files")) - inventory_code = _as_mapping(inventory_payload.get("code")) - file_registry = _as_mapping(inventory_payload.get("file_registry")) - findings = _as_mapping(payload.get("findings")) - findings_summary = _as_mapping(findings.get("summary")) - findings_families = _as_mapping(findings_summary.get("families")) - findings_severity = _as_mapping(findings_summary.get("severity")) - findings_impact_scope = _as_mapping(findings_summary.get("impact_scope")) - findings_clones = _as_mapping(findings_summary.get("clones")) - findings_suppressed = _as_mapping(findings_summary.get("suppressed")) - metrics_payload = _as_mapping(payload.get("metrics")) - metrics_summary = _as_mapping(metrics_payload.get("summary")) - metrics_families = _as_mapping(metrics_payload.get("families")) - derived = _as_mapping(payload.get("derived")) - overview = _as_mapping(derived.get("overview")) - hotlists = _as_mapping(derived.get("hotlists")) - suggestions_payload = _as_sequence(derived.get("suggestions")) - integrity = _as_mapping(payload.get("integrity")) - canonicalization = _as_mapping(integrity.get("canonicalization")) - digest = _as_mapping(integrity.get("digest")) - findings_groups = _as_mapping(findings.get("groups")) - clone_groups = _as_mapping(findings_groups.get("clones")) - suppressed_clone_groups = _as_mapping(clone_groups.get("suppressed")) - runtime_meta = _as_mapping(meta_payload.get("runtime")) - clone_summary_keys: list[str] = ["functions", "blocks", "segments", "new", "known"] - if "suppressed" in findings_clones: - clone_summary_keys.append("suppressed") - suppressed_summary_keys: list[str] = ["dead_code"] - if "clones" in findings_suppressed: - suppressed_summary_keys.append("clones") - - lines = [ - "REPORT METADATA", - "Report schema version: " - f"{format_meta_text_value(payload.get('report_schema_version'))}", - "CodeClone version: " - f"{format_meta_text_value(meta_payload.get('codeclone_version'))}", - f"Project name: {format_meta_text_value(meta_payload.get('project_name'))}", - f"Scan root: {format_meta_text_value(meta_payload.get('scan_root'))}", - f"Python version: {format_meta_text_value(meta_payload.get('python_version'))}", - f"Python tag: {format_meta_text_value(meta_payload.get('python_tag'))}", - f"Analysis mode: {format_meta_text_value(meta_payload.get('analysis_mode'))}", - f"Report mode: {format_meta_text_value(meta_payload.get('report_mode'))}", - "Report generated (UTC): " - f"{format_meta_text_value(runtime_meta.get('report_generated_at_utc'))}", - "Computed metric families: " - f"{format_meta_text_value(meta_payload.get('computed_metric_families'))}", - f"Baseline path: {format_meta_text_value(baseline.get('path'))}", - "Baseline fingerprint version: " - f"{format_meta_text_value(baseline.get('fingerprint_version'))}", - "Baseline schema version: " - f"{format_meta_text_value(baseline.get('schema_version'))}", - f"Baseline Python tag: {format_meta_text_value(baseline.get('python_tag'))}", - "Baseline generator name: " - f"{format_meta_text_value(baseline.get('generator_name'))}", - "Baseline generator version: " - f"{format_meta_text_value(baseline.get('generator_version'))}", - "Baseline payload sha256: " - f"{format_meta_text_value(baseline.get('payload_sha256'))}", - "Baseline payload verified: " - f"{format_meta_text_value(baseline.get('payload_sha256_verified'))}", - f"Baseline loaded: {format_meta_text_value(baseline.get('loaded'))}", - f"Baseline status: {format_meta_text_value(baseline.get('status'))}", - f"Cache path: {format_meta_text_value(cache.get('path'))}", - f"Cache schema version: {format_meta_text_value(cache.get('schema_version'))}", - f"Cache status: {format_meta_text_value(cache.get('status'))}", - f"Cache used: {format_meta_text_value(cache.get('used'))}", - "Metrics baseline path: " - f"{format_meta_text_value(metrics_baseline.get('path'))}", - "Metrics baseline loaded: " - f"{format_meta_text_value(metrics_baseline.get('loaded'))}", - "Metrics baseline status: " - f"{format_meta_text_value(metrics_baseline.get('status'))}", - "Metrics baseline schema version: " - f"{format_meta_text_value(metrics_baseline.get('schema_version'))}", - "Metrics baseline payload sha256: " - f"{format_meta_text_value(metrics_baseline.get('payload_sha256'))}", - "Metrics baseline payload verified: " - f"{format_meta_text_value(metrics_baseline.get('payload_sha256_verified'))}", - ] - - if ( - baseline.get("loaded") is not True - or str(baseline.get("status", "")).strip().lower() != "ok" - ): - lines.append("Note: baseline is untrusted; all groups are treated as NEW.") - - lines.extend( - [ - "", - "INVENTORY", - "Files: " - + _format_key_values( - inventory_files, - ( - "total_found", - "analyzed", - "cached", - "skipped", - "source_io_skipped", - ), - ), - "Code: " - + _format_key_values( - inventory_code, - ("scope", "parsed_lines", "functions", "methods", "classes"), - ), - "File registry: " - f"encoding={format_meta_text_value(file_registry.get('encoding'))} " - f"count={len(_as_sequence(file_registry.get('items')))}", - "", - "FINDINGS SUMMARY", - f"Total groups: {format_meta_text_value(findings_summary.get('total'))}", - "Families: " - + _format_key_values( - findings_families, - ("clones", "structural", "dead_code", "design"), - ), - "Severity: " - + _format_key_values( - findings_severity, - ("critical", "warning", "info"), - ), - "Impact scope: " - + _format_key_values( - findings_impact_scope, - ("runtime", "non_runtime", "mixed"), - ), - "Clones: " - + _format_key_values( - findings_clones, - tuple(clone_summary_keys), - ), - "Suppressed: " - + _format_key_values( - findings_suppressed, - tuple(suppressed_summary_keys), - ), - "", - "METRICS SUMMARY", - ] - ) - for family_name in ( - "complexity", - "coupling", - "cohesion", - "coverage_join", - "overloaded_modules", - "dependencies", - "dead_code", - "health", - ): - family_summary = _as_mapping(metrics_summary.get(family_name)) - if family_name == "coverage_join" and not family_summary: - continue - keys: Sequence[str] - match family_name: - case "complexity" | "coupling": - keys = ("total", "average", "max", "high_risk") - case "cohesion": - keys = ("total", "average", "max", "low_cohesion") - case "coverage_join": - keys = ( - "status", - "source", - "units", - "measured_units", - "overall_permille", - "coverage_hotspots", - "scope_gap_hotspots", - "hotspot_threshold_percent", - ) - case "dependencies": - keys = ("modules", "edges", "cycles", "max_depth") - case "overloaded_modules": - keys = ( - "total", - "candidates", - "population_status", - "top_score", - "average_score", - ) - case "dead_code": - keys = ("total", "high_confidence", "suppressed") - case _: - keys = ("score", "grade") - lines.append(f"{family_name}: {_format_key_values(family_summary, keys)}") - - coverage_join_family = _as_mapping(metrics_families.get("coverage_join")) - coverage_join_items = _as_sequence(coverage_join_family.get("items")) - if coverage_join_family: - lines.extend( - [ - "", - "COVERAGE JOIN (top 10)", - ] - ) - if not coverage_join_items: - lines.append("(none)") - else: - lines.extend( - "- " - + _format_key_values( - item, - ( - "relative_path", - "qualname", - "coverage_status", - "risk", - "coverage_permille", - "cyclomatic_complexity", - "coverage_hotspot", - "scope_gap_hotspot", - ), - ) - for item in map(_as_mapping, coverage_join_items[:10]) - ) - - overloaded_modules_family = _as_mapping(metrics_families.get("overloaded_modules")) - if not overloaded_modules_family: - overloaded_modules_family = _as_mapping(metrics_families.get("god_modules")) - overloaded_module_items = _as_sequence(overloaded_modules_family.get("items")) - lines.extend( - [ - "", - "OVERLOADED MODULES (top 10)", - ] - ) - if not overloaded_module_items: - lines.append("(none)") - else: - lines.extend( - "- " - + _format_key_values( - item, - ( - "module", - "relative_path", - "source_kind", - "score", - "candidate_status", - "loc", - "fan_in", - "fan_out", - "complexity_total", - ), - ) - for item in map(_as_mapping, overloaded_module_items[:10]) - ) - - lines.append("") - _append_overview(lines, overview, hotlists) - - lines.append("") - _append_suggestions(lines, suggestions=suggestions_payload, findings=findings) - - lines.append("") - _append_clone_section( - lines, - title="FUNCTION CLONES", - groups=_as_sequence(clone_groups.get("functions")), - novelty="new", - metric_name="loc", - ) - lines.append("") - _append_clone_section( - lines, - title="FUNCTION CLONES", - groups=_as_sequence(clone_groups.get("functions")), - novelty="known", - metric_name="loc", - ) - lines.append("") - _append_clone_section( - lines, - title="BLOCK CLONES", - groups=_as_sequence(clone_groups.get("blocks")), - novelty="new", - metric_name="size", - ) - lines.append("") - _append_clone_section( - lines, - title="BLOCK CLONES", - groups=_as_sequence(clone_groups.get("blocks")), - novelty="known", - metric_name="size", - ) - lines.append("") - _append_clone_section( - lines, - title="SEGMENT CLONES", - groups=_as_sequence(clone_groups.get("segments")), - novelty="new", - metric_name="size", - ) - lines.append("") - _append_clone_section( - lines, - title="SEGMENT CLONES", - groups=_as_sequence(clone_groups.get("segments")), - novelty="known", - metric_name="size", - ) - if suppressed_clone_groups: - lines.append("") - _append_suppressed_clone_section( - lines, - title="SUPPRESSED FUNCTION CLONES", - groups=_as_sequence(suppressed_clone_groups.get("functions")), - metric_name="loc", - ) - lines.append("") - _append_suppressed_clone_section( - lines, - title="SUPPRESSED BLOCK CLONES", - groups=_as_sequence(suppressed_clone_groups.get("blocks")), - metric_name="size", - ) - lines.append("") - _append_suppressed_clone_section( - lines, - title="SUPPRESSED SEGMENT CLONES", - groups=_as_sequence(suppressed_clone_groups.get("segments")), - metric_name="size", - ) - lines.append("") - _append_structural_findings( - lines, - _as_sequence(_as_mapping(findings_groups.get("structural")).get("groups")), - ) - lines.append("") - _append_single_item_findings( - lines, - title="DEAD CODE FINDINGS", - groups=_as_sequence( - _as_mapping(findings_groups.get("dead_code")).get("groups") - ), - fact_keys=("kind", "confidence"), - ) - lines.append("") - dead_code_family = _as_mapping(metrics_families.get("dead_code")) - _append_suppressed_dead_code_items( - lines, - items=_as_sequence(dead_code_family.get("suppressed_items")), - ) - lines.append("") - _append_single_item_findings( - lines, - title="DESIGN FINDINGS", - groups=_as_sequence(_as_mapping(findings_groups.get("design")).get("groups")), - fact_keys=("lcom4", "method_count", "instance_var_count", "fan_out", "risk"), - ) - lines.extend( - [ - "", - "INTEGRITY", - "Canonicalization: " - + _format_key_values( - canonicalization, - ("version", "scope", "sections"), - ), - "Digest: " - + _format_key_values( - digest, - ("algorithm", "verified", "value"), - ), - ] - ) - - return "\n".join(lines).rstrip() + "\n" +from .renderers.json import render_json_report_document +from .renderers.text import ( + _append_clone_section, + _append_single_item_findings, + _append_structural_findings, + _append_suggestions, + _append_suppressed_dead_code_items, + _as_int, + _structural_kind_label, + render_text_report_document, +) + +__all__ = [ + "_append_clone_section", + "_append_single_item_findings", + "_append_structural_findings", + "_append_suggestions", + "_append_suppressed_dead_code_items", + "_as_int", + "_structural_kind_label", + "render_json_report_document", + "render_text_report_document", +] diff --git a/codeclone/report/suggestions.py b/codeclone/report/suggestions.py index 3715353..85ff186 100644 --- a/codeclone/report/suggestions.py +++ b/codeclone/report/suggestions.py @@ -8,7 +8,6 @@ from typing import TYPE_CHECKING, Literal -from .._coerce import as_int, as_str from ..domain.findings import ( CATEGORY_CLONE, CATEGORY_COHESION, @@ -36,6 +35,7 @@ SEVERITY_RANK, SEVERITY_WARNING, ) +from ..findings.structural.detectors import normalize_structural_findings from ..models import ( ClassMetrics, GroupItemLike, @@ -49,7 +49,7 @@ BLOCK_HINT_ASSERT_ONLY, BLOCK_PATTERN_REPEATED_STMT_HASH, ) -from ..structural_findings import normalize_structural_findings +from ..utils.coerce import as_int, as_str from .derived import ( combine_source_kinds, format_group_location_label, diff --git a/codeclone/scanner.py b/codeclone/scanner/__init__.py similarity index 98% rename from codeclone/scanner.py rename to codeclone/scanner/__init__.py index a9c65a9..8f05ffc 100644 --- a/codeclone/scanner.py +++ b/codeclone/scanner/__init__.py @@ -11,7 +11,7 @@ from pathlib import Path from typing import TYPE_CHECKING -from .errors import ValidationError +from ..contracts.errors import ValidationError if TYPE_CHECKING: from collections.abc import Iterable diff --git a/codeclone/suppressions.py b/codeclone/suppressions.py index 0b45987..de49616 100644 --- a/codeclone/suppressions.py +++ b/codeclone/suppressions.py @@ -31,7 +31,7 @@ SuppressionTargetKey = tuple[str, str, int, int, DeclarationKind] _SUPPRESSION_DIRECTIVE_PATTERN: Final[re.Pattern[str]] = re.compile( - r"^\s*#\s*codeclone\s*:\s*ignore\s*\[(?P[^\]]+)\]\s*$" + r"^\s*#\s*codeclone\s*:\s*ignore\s*\[(?P[^]]+)]\s*$" ) _RULE_ID_PATTERN: Final[re.Pattern[str]] = re.compile(r"^[a-z0-9][a-z0-9-]*$") @@ -174,7 +174,7 @@ def _declaration_inline_lines(target: DeclarationTarget) -> tuple[int, ...]: end_line = target.declaration_end_line or target.start_line if end_line <= 0 or end_line == target.start_line: return (target.start_line,) - return (target.start_line, end_line) + return target.start_line, end_line def _bound_inline_rules( @@ -250,7 +250,7 @@ def suppression_target_key( end_line: int, kind: DeclarationKind, ) -> SuppressionTargetKey: - return (filepath, qualname, start_line, end_line, kind) + return filepath, qualname, start_line, end_line, kind def build_suppression_index( diff --git a/codeclone/surfaces/__init__.py b/codeclone/surfaces/__init__.py new file mode 100644 index 0000000..557317f --- /dev/null +++ b/codeclone/surfaces/__init__.py @@ -0,0 +1,4 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 diff --git a/codeclone/surfaces/cli/__init__.py b/codeclone/surfaces/cli/__init__.py new file mode 100644 index 0000000..9135843 --- /dev/null +++ b/codeclone/surfaces/cli/__init__.py @@ -0,0 +1,5 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy diff --git a/codeclone/_cli_baselines.py b/codeclone/surfaces/cli/baseline_state.py similarity index 86% rename from codeclone/_cli_baselines.py rename to codeclone/surfaces/cli/baseline_state.py index 2be5a59..a7bc631 100644 --- a/codeclone/_cli_baselines.py +++ b/codeclone/surfaces/cli/baseline_state.py @@ -10,38 +10,46 @@ from dataclasses import dataclass from json import JSONDecodeError from pathlib import Path -from typing import TYPE_CHECKING, Protocol +from typing import TYPE_CHECKING, Any, Protocol, cast import orjson -from . import ui_messages as ui -from .baseline import ( +from ... import __version__ +from ... import ui_messages as ui +from ...baseline import ( BASELINE_UNTRUSTED_STATUSES, Baseline, BaselineStatus, coerce_baseline_status, current_python_tag, ) -from .contracts import ( - BASELINE_FINGERPRINT_VERSION, - BASELINE_SCHEMA_VERSION, - ExitCode, -) -from .errors import BaselineValidationError -from .metrics_baseline import ( +from ...baseline.metrics_baseline import ( METRICS_BASELINE_UNTRUSTED_STATUSES, MetricsBaseline, MetricsBaselineStatus, coerce_metrics_baseline_status, ) +from ...contracts import ( + BASELINE_FINGERPRINT_VERSION, + BASELINE_SCHEMA_VERSION, + ExitCode, +) +from ...contracts.errors import BaselineValidationError +from . import state as cli_state if TYPE_CHECKING: - from .models import GroupMapLike, ProjectMetrics + from ...models import GroupMapLike, ProjectMetrics __all__ = [ "CloneBaselineState", "MetricsBaselineSectionProbe", "MetricsBaselineState", + "_CloneBaselineState", + "_MetricsBaselineSectionProbe", + "_MetricsBaselineState", + "_probe_metrics_baseline_section", + "_resolve_clone_baseline_state", + "_resolve_metrics_baseline_state", "probe_metrics_baseline_section", "resolve_clone_baseline_state", "resolve_metrics_baseline_state", @@ -100,6 +108,11 @@ class MetricsBaselineSectionProbe: payload: dict[str, object] | None +_CloneBaselineState = CloneBaselineState +_MetricsBaselineSectionProbe = MetricsBaselineSectionProbe +_MetricsBaselineState = MetricsBaselineState + + def probe_metrics_baseline_section(path: Path) -> MetricsBaselineSectionProbe: if not path.exists(): return MetricsBaselineSectionProbe( @@ -457,3 +470,49 @@ def _enforce_metrics_gate_schema_requirements( "--update-metrics-baseline first." ) ) + + +def _probe_metrics_baseline_section(path: Path) -> _MetricsBaselineSectionProbe: + return probe_metrics_baseline_section(path) + + +def _resolve_clone_baseline_state( + *, + args: object, + baseline_path: Path, + baseline_exists: bool, + analysis: object, + shared_baseline_payload: dict[str, object] | None = None, +) -> _CloneBaselineState: + analysis_obj = cast("Any", analysis) + return resolve_clone_baseline_state( + args=cast("Any", args), + baseline_path=baseline_path, + baseline_exists=baseline_exists, + func_groups=analysis_obj.func_groups, + block_groups=analysis_obj.block_groups, + codeclone_version=__version__, + console=cast("_PrinterLike", cli_state.get_console()), + shared_baseline_payload=shared_baseline_payload, + ) + + +def _resolve_metrics_baseline_state( + *, + args: object, + metrics_baseline_path: Path, + metrics_baseline_exists: bool, + baseline_updated_path: Path | None, + analysis: object, + shared_baseline_payload: dict[str, object] | None = None, +) -> _MetricsBaselineState: + analysis_obj = cast("Any", analysis) + return resolve_metrics_baseline_state( + args=cast("Any", args), + metrics_baseline_path=metrics_baseline_path, + metrics_baseline_exists=metrics_baseline_exists, + baseline_updated_path=baseline_updated_path, + project_metrics=analysis_obj.project_metrics, + console=cast("_PrinterLike", cli_state.get_console()), + shared_baseline_payload=shared_baseline_payload, + ) diff --git a/codeclone/surfaces/cli/changed_scope.py b/codeclone/surfaces/cli/changed_scope.py new file mode 100644 index 0000000..5088744 --- /dev/null +++ b/codeclone/surfaces/cli/changed_scope.py @@ -0,0 +1,220 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import subprocess +import sys +from collections.abc import Mapping, Sequence +from pathlib import Path +from typing import Any, cast + +from ... import ui_messages as ui +from ...contracts import ExitCode +from ...utils import coerce as _coerce +from ...utils.git_diff import validate_git_diff_ref +from . import state as cli_state +from .types import ChangedCloneGate + +_as_mapping = _coerce.as_mapping +_as_sequence = _coerce.as_sequence + +__all__ = ["ChangedCloneGate"] + + +def _validate_changed_scope_args(*, args: object) -> str | None: + args_obj = cast("Any", args) + console = cast("Any", cli_state.get_console()) + if args_obj.diff_against and args_obj.paths_from_git_diff: + console.print( + ui.fmt_contract_error( + "Use --diff-against or --paths-from-git-diff, not both." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + if args_obj.paths_from_git_diff: + args_obj.changed_only = True + return str(args_obj.paths_from_git_diff) + if args_obj.diff_against and not args_obj.changed_only: + console.print(ui.fmt_contract_error("--diff-against requires --changed-only.")) + sys.exit(ExitCode.CONTRACT_ERROR) + if args_obj.changed_only and not args_obj.diff_against: + console.print( + ui.fmt_contract_error( + "--changed-only requires --diff-against or --paths-from-git-diff." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + return str(args_obj.diff_against) if args_obj.diff_against else None + + +def _normalize_changed_paths( + *, + root_path: Path, + paths: Sequence[str], +) -> tuple[str, ...]: + console = cast("Any", cli_state.get_console()) + normalized: set[str] = set() + for raw_path in paths: + candidate = raw_path.strip() + if not candidate: + continue + candidate_path = Path(candidate) + try: + absolute_path = ( + candidate_path.resolve() + if candidate_path.is_absolute() + else (root_path / candidate_path).resolve() + ) + except OSError as exc: + console.print( + ui.fmt_contract_error( + f"Unable to resolve changed path '{candidate}': {exc}" + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + try: + relative_path = absolute_path.relative_to(root_path) + except ValueError: + console.print( + ui.fmt_contract_error( + f"Changed path '{candidate}' is outside the scan root." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + cleaned = str(relative_path).replace("\\", "/").strip("/") + if cleaned: + normalized.add(cleaned) + return tuple(sorted(normalized)) + + +def _git_diff_changed_paths(*, root_path: Path, git_diff_ref: str) -> tuple[str, ...]: + console = cast("Any", cli_state.get_console()) + try: + validated_ref = validate_git_diff_ref(git_diff_ref) + except ValueError as exc: + console.print(ui.fmt_contract_error(str(exc))) + sys.exit(ExitCode.CONTRACT_ERROR) + try: + completed = subprocess.run( + ["git", "diff", "--name-only", validated_ref, "--"], + cwd=str(root_path), + check=True, + capture_output=True, + text=True, + timeout=30, + ) + except ( + FileNotFoundError, + subprocess.CalledProcessError, + subprocess.TimeoutExpired, + ) as exc: + console.print( + ui.fmt_contract_error( + "Unable to resolve changed files from git diff ref " + f"'{validated_ref}': {exc}" + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + lines = [line.strip() for line in completed.stdout.splitlines() if line.strip()] + return _normalize_changed_paths(root_path=root_path, paths=lines) + + +def _path_matches(relative_path: str, changed_paths: Sequence[str]) -> bool: + return any( + relative_path == candidate or relative_path.startswith(candidate + "/") + for candidate in changed_paths + ) + + +def _flatten_report_findings( + report_document: Mapping[str, object], +) -> list[dict[str, object]]: + findings = _as_mapping(report_document.get("findings")) + groups = _as_mapping(findings.get("groups")) + clone_groups = _as_mapping(groups.get("clones")) + return [ + *[ + dict(_as_mapping(item)) + for item in _as_sequence(clone_groups.get("functions")) + ], + *[dict(_as_mapping(item)) for item in _as_sequence(clone_groups.get("blocks"))], + *[ + dict(_as_mapping(item)) + for item in _as_sequence(clone_groups.get("segments")) + ], + *[ + dict(_as_mapping(item)) + for item in _as_sequence( + _as_mapping(groups.get("structural")).get("groups") + ) + ], + *[ + dict(_as_mapping(item)) + for item in _as_sequence(_as_mapping(groups.get("dead_code")).get("groups")) + ], + *[ + dict(_as_mapping(item)) + for item in _as_sequence(_as_mapping(groups.get("design")).get("groups")) + ], + ] + + +def _finding_touches_changed_paths( + finding: Mapping[str, object], + *, + changed_paths: Sequence[str], +) -> bool: + for item in _as_sequence(finding.get("items")): + relative_path = str(_as_mapping(item).get("relative_path", "")).strip() + if relative_path and _path_matches(relative_path, changed_paths): + return True + return False + + +def _changed_clone_gate_from_report( + report_document: Mapping[str, object], + *, + changed_paths: Sequence[str], +) -> ChangedCloneGate: + findings = [ + finding + for finding in _flatten_report_findings(report_document) + if _finding_touches_changed_paths(finding, changed_paths=changed_paths) + ] + clone_findings = [ + finding + for finding in findings + if str(finding.get("family", "")).strip() == "clone" + and str(finding.get("category", "")).strip() in {"function", "block"} + ] + new_func = frozenset( + str(finding.get("id", "")) + for finding in clone_findings + if str(finding.get("category", "")).strip() == "function" + and str(finding.get("novelty", "")).strip() == "new" + ) + new_block = frozenset( + str(finding.get("id", "")) + for finding in clone_findings + if str(finding.get("category", "")).strip() == "block" + and str(finding.get("novelty", "")).strip() == "new" + ) + findings_new = sum( + 1 for finding in findings if str(finding.get("novelty", "")).strip() == "new" + ) + findings_known = sum( + 1 for finding in findings if str(finding.get("novelty", "")).strip() == "known" + ) + return ChangedCloneGate( + changed_paths=tuple(changed_paths), + new_func=new_func, + new_block=new_block, + total_clone_groups=len(clone_findings), + findings_total=len(findings), + findings_new=findings_new, + findings_known=findings_known, + ) diff --git a/codeclone/_cli_rich.py b/codeclone/surfaces/cli/console.py similarity index 58% rename from codeclone/_cli_rich.py rename to codeclone/surfaces/cli/console.py index 88f9d00..59be3dd 100644 --- a/codeclone/_cli_rich.py +++ b/codeclone/surfaces/cli/console.py @@ -6,10 +6,19 @@ from __future__ import annotations +import os import re +import sys +from collections.abc import Callable, Mapping, Sequence from contextlib import AbstractContextManager, nullcontext from functools import lru_cache -from typing import TYPE_CHECKING, Protocol +from pathlib import Path +from typing import TYPE_CHECKING, Any, Protocol, cast + +from ... import __version__ +from ... import ui_messages as ui +from ...report.gates import reasons as gate_reasons +from . import state as cli_state if TYPE_CHECKING: from rich.console import Console as RichConsole @@ -30,15 +39,6 @@ } _RICH_MARKUP_TAG_RE = re.compile(r"\[/?[a-zA-Z][a-zA-Z0-9_ .#:-]*]") -__all__ = [ - "PlainConsole", - "make_console", - "make_plain_console", - "print_banner", - "rich_console_symbols", - "rich_progress_symbols", -] - class _PrinterLike(Protocol): def print(self, *objects: object, **kwargs: object) -> None: ... @@ -110,7 +110,7 @@ def make_plain_console() -> PlainConsole: return PlainConsole() -def print_banner( +def _render_banner( *, console: _PrinterLike, banner_title: str, @@ -129,3 +129,78 @@ def print_banner( ) if root_display is not None: console.print(f" [dim]Root:[/dim] [dim]{root_display}[/dim]") + + +def _console() -> _PrinterLike: + return cast("_PrinterLike", cli_state.get_console()) + + +def _rich_progress_symbols() -> tuple[type[object], ...]: + return cast("tuple[type[object], ...]", rich_progress_symbols()) + + +def _make_console(*, no_color: bool) -> object: + return make_console(no_color=no_color, width=ui.CLI_LAYOUT_MAX_WIDTH) + + +def _make_plain_console() -> PlainConsole: + return make_plain_console() + + +def _parse_metric_reason_entry(reason: str) -> tuple[str, str]: + return gate_reasons.parse_metric_reason_entry(reason) + + +def _print_gating_failure_block( + *, + code: str, + entries: Sequence[tuple[str, object]], + args: object, +) -> None: + gate_reasons.print_gating_failure_block( + console=_console(), + code=code, + entries=list(entries), + args=cast("Any", args), + ) + + +def build_html_report(*args: object, **kwargs: object) -> str: + from ...report.html import build_html_report as _build_html_report + + html_builder: Callable[..., str] = _build_html_report + return html_builder(*args, **kwargs) + + +def _print_verbose_clone_hashes( + console: _PrinterLike, + *, + label: str, + clone_hashes: set[str], +) -> None: + if not clone_hashes: + return + console.print(f"\n {label}:") + for clone_hash in sorted(clone_hashes): + console.print(f" - {clone_hash}") + + +def print_banner(*, root: Path | None = None) -> None: + _render_banner( + console=_console(), + banner_title=ui.banner_title(__version__), + project_name=(root.name if root is not None else None), + root_display=(str(root) if root is not None else None), + ) + + +def _is_debug_enabled( + *, + argv: Sequence[str] | None = None, + environ: Mapping[str, str] | None = None, +) -> bool: + args = list(sys.argv[1:] if argv is None else argv) + debug_from_flag = any(arg == "--debug" for arg in args) + env = os.environ if environ is None else environ + debug_from_env = env.get("CODECLONE_DEBUG") == "1" + return debug_from_flag or debug_from_env diff --git a/codeclone/surfaces/cli/main.py b/codeclone/surfaces/cli/main.py new file mode 100644 index 0000000..ecabd32 --- /dev/null +++ b/codeclone/surfaces/cli/main.py @@ -0,0 +1,1324 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +import sys +import time +from collections.abc import Collection, Mapping, Sequence +from dataclasses import dataclass, replace +from pathlib import Path +from typing import Any, NoReturn, Protocol, cast + +from ... import __version__ +from ... import ui_messages as ui +from ...baseline import Baseline +from ...cache import Cache, CacheStatus, build_segment_report_projection +from ...config import ( + ConfigValidationError, + apply_pyproject_config_overrides, + build_parser, + collect_explicit_cli_dests, + load_pyproject_config, +) +from ...contracts import ( + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + ISSUES_URL, + ExitCode, +) +from ...contracts.errors import CacheError +from ...core import ( + MAX_FILE_SIZE, + AnalysisResult, + BootstrapResult, + DiscoveryResult, + analyze, + bootstrap, + discover, + gate, + process, + process_file, + report, +) +from ...core._types import FileProcessResult as ProcessingResult +from ...core._types import ProcessingResult as PipelineProcessingResult +from . import report_meta as cli_meta_mod +from . import state as cli_state +from .baseline_state import ( + CloneBaselineState as _CloneBaselineState, +) +from .baseline_state import ( + MetricsBaselineSectionProbe as _MetricsBaselineSectionProbe, +) +from .baseline_state import ( + MetricsBaselineState as _MetricsBaselineState, +) +from .baseline_state import ( + probe_metrics_baseline_section as _probe_metrics_baseline_section_impl, +) +from .baseline_state import ( + resolve_clone_baseline_state as _resolve_clone_baseline_state_impl, +) +from .baseline_state import ( + resolve_metrics_baseline_state as _resolve_metrics_baseline_state_impl, +) +from .changed_scope import ( + ChangedCloneGate, +) +from .changed_scope import ( + _changed_clone_gate_from_report as _changed_clone_gate_from_report_impl, +) +from .changed_scope import ( + _git_diff_changed_paths as _git_diff_changed_paths_impl, +) +from .changed_scope import ( + _normalize_changed_paths as _normalize_changed_paths_impl, +) +from .changed_scope import ( + _validate_changed_scope_args as _validate_changed_scope_args_impl, +) +from .console import ( + PlainConsole, + _is_debug_enabled, + _parse_metric_reason_entry, + _print_verbose_clone_hashes, + _rich_progress_symbols, + build_html_report, +) +from .console import ( + _print_gating_failure_block as _print_gating_failure_block_impl, +) +from .console import ( + make_console as _make_rich_console, +) +from .console import ( + make_plain_console as _make_plain_console_impl, +) +from .console import ( + print_banner as _print_banner_impl, +) +from .reports_output import ( + _report_path_origins as _report_path_origins_impl, +) +from .reports_output import ( + _resolve_output_paths as _resolve_output_paths_impl, +) +from .reports_output import ( + _timestamped_report_path as _timestamped_report_path_impl, +) +from .reports_output import ( + _validate_report_ui_flags as _validate_report_ui_flags_impl, +) +from .reports_output import ( + _write_report_outputs as _write_report_outputs_impl, +) +from .runtime import ( + _configure_metrics_mode as _configure_metrics_mode_impl, +) +from .runtime import ( + _metrics_computed as _metrics_computed_impl, +) +from .runtime import ( + _print_failed_files as _print_failed_files_impl, +) +from .runtime import ( + _resolve_cache_path as _resolve_cache_path_impl, +) +from .runtime import ( + _resolve_cache_status as _resolve_cache_status_impl, +) +from .runtime import ( + _validate_numeric_args as _validate_numeric_args_impl, +) +from .summary import ( + ChangedScopeSnapshot, + _print_changed_scope, + _print_metrics, + _print_summary, + build_metrics_snapshot, + build_summary_counts, +) +from .types import OutputPaths, ReportPathOrigin + +__all__ = [ + "LEGACY_CACHE_PATH", + "MAX_FILE_SIZE", + "Baseline", + "Cache", + "ChangedCloneGate", + "ConfigValidationError", + "ExitCode", + "ProcessingResult", + "_changed_clone_gate_from_report", + "_configure_metrics_mode", + "_enforce_gating", + "_git_diff_changed_paths", + "_main_impl", + "_make_console", + "_make_plain_console", + "_make_rich_console", + "_metrics_computed", + "_normalize_changed_paths", + "_parse_metric_reason_entry", + "_print_changed_scope", + "_print_failed_files", + "_print_gating_failure_block", + "_print_metrics", + "_print_summary", + "_print_verbose_clone_hashes", + "_probe_metrics_baseline_section", + "_report_path_origins", + "_resolve_cache_path", + "_resolve_cache_status", + "_resolve_clone_baseline_state", + "_resolve_metrics_baseline_state", + "_resolve_output_paths", + "_run_analysis_stages", + "_timestamped_report_path", + "_validate_changed_scope_args", + "_validate_numeric_args", + "_validate_report_ui_flags", + "_write_report_outputs", + "analyze", + "bootstrap", + "build_html_report", + "console", + "discover", + "gate", + "main", + "print_banner", + "process", + "process_file", + "report", +] + + +class _PrinterLike(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... + + def status(self, *objects: object, **kwargs: object) -> object: ... + + +@dataclass(frozen=True, slots=True) +class _ResolvedBaselineInputs: + baseline_path: Path + baseline_exists: bool + metrics_baseline_path: Path + metrics_baseline_exists: bool + shared_baseline_payload: dict[str, object] | None + + +@dataclass(frozen=True, slots=True) +class _DiffContext: + new_func: set[str] + new_block: set[str] + new_clones_count: int + metrics_diff: object | None + coverage_adoption_diff_available: bool + api_surface_diff_available: bool + + +def _set_console(value: object) -> object: + cli_state.set_console(value) + return value + + +def _console() -> _PrinterLike: + return cast("_PrinterLike", _set_console(console)) + + +def _make_console(*, no_color: bool) -> object: + return _make_rich_console( + no_color=no_color, + width=ui.CLI_LAYOUT_MAX_WIDTH, + ) + + +def _make_plain_console() -> PlainConsole: + return _make_plain_console_impl() + + +console: object = _make_plain_console() +_set_console(console) +LEGACY_CACHE_PATH = cli_state.LEGACY_CACHE_PATH + + +def print_banner(*, root: Path | None = None) -> None: + _set_console(console) + _print_banner_impl(root=root) + + +def _report_path_origins(argv: Sequence[str]) -> dict[str, ReportPathOrigin | None]: + return _report_path_origins_impl(argv) + + +def _timestamped_report_path(path: Path, *, report_generated_at_utc: str) -> Path: + return _timestamped_report_path_impl( + path, + report_generated_at_utc=report_generated_at_utc, + ) + + +def _validate_changed_scope_args(*, args: object) -> str | None: + _set_console(console) + return _validate_changed_scope_args_impl(args=args) + + +def _normalize_changed_paths( + *, + root_path: Path, + paths: Sequence[str], +) -> tuple[str, ...]: + _set_console(console) + return _normalize_changed_paths_impl(root_path=root_path, paths=paths) + + +def _git_diff_changed_paths(*, root_path: Path, git_diff_ref: str) -> tuple[str, ...]: + _set_console(console) + return _git_diff_changed_paths_impl(root_path=root_path, git_diff_ref=git_diff_ref) + + +def _changed_clone_gate_from_report( + report_document: Mapping[str, object], + *, + changed_paths: Sequence[str], +) -> ChangedCloneGate: + return _changed_clone_gate_from_report_impl( + report_document, + changed_paths=changed_paths, + ) + + +def _resolve_output_paths( + args: object, + *, + report_path_origins: Mapping[str, ReportPathOrigin | None], + report_generated_at_utc: str, +) -> OutputPaths: + _set_console(console) + return _resolve_output_paths_impl( + args, + report_path_origins=report_path_origins, + report_generated_at_utc=report_generated_at_utc, + ) + + +def _validate_report_ui_flags(*, args: object, output_paths: OutputPaths) -> None: + _set_console(console) + _validate_report_ui_flags_impl(args=args, output_paths=output_paths) + + +def _resolve_cache_path(*, root_path: Path, args: object, from_args: bool) -> Path: + cli_state.LEGACY_CACHE_PATH = LEGACY_CACHE_PATH + _set_console(console) + return _resolve_cache_path_impl( + root_path=root_path, + args=args, + from_args=from_args, + ) + + +def _validate_numeric_args(args: object) -> bool: + return _validate_numeric_args_impl(args) + + +def _configure_metrics_mode(*, args: object, metrics_baseline_exists: bool) -> None: + _set_console(console) + _configure_metrics_mode_impl( + args=args, + metrics_baseline_exists=metrics_baseline_exists, + ) + + +def _print_failed_files(failed_files: Sequence[str]) -> None: + _set_console(console) + _print_failed_files_impl(tuple(failed_files)) + + +def _metrics_computed(args: object) -> tuple[str, ...]: + return _metrics_computed_impl(args) + + +def _probe_metrics_baseline_section(path: Path) -> _MetricsBaselineSectionProbe: + return _probe_metrics_baseline_section_impl(path) + + +def _resolve_clone_baseline_state( + *, + args: object, + baseline_path: Path, + baseline_exists: bool, + analysis: AnalysisResult, + shared_baseline_payload: dict[str, object] | None = None, +) -> _CloneBaselineState: + return _resolve_clone_baseline_state_impl( + args=cast("Any", args), + baseline_path=baseline_path, + baseline_exists=baseline_exists, + func_groups=analysis.func_groups, + block_groups=analysis.block_groups, + codeclone_version=__version__, + console=_console(), + shared_baseline_payload=shared_baseline_payload, + ) + + +def _resolve_metrics_baseline_state( + *, + args: object, + metrics_baseline_path: Path, + metrics_baseline_exists: bool, + baseline_updated_path: Path | None, + analysis: AnalysisResult, + shared_baseline_payload: dict[str, object] | None = None, +) -> _MetricsBaselineState: + return _resolve_metrics_baseline_state_impl( + args=cast("Any", args), + metrics_baseline_path=metrics_baseline_path, + metrics_baseline_exists=metrics_baseline_exists, + baseline_updated_path=baseline_updated_path, + project_metrics=analysis.project_metrics, + console=_console(), + shared_baseline_payload=shared_baseline_payload, + ) + + +def _resolve_cache_status(cache: Cache) -> tuple[CacheStatus, str | None]: + return _resolve_cache_status_impl(cache) + + +def _print_gating_failure_block( + *, + code: str, + entries: Sequence[tuple[str, object]], + args: object, +) -> None: + _set_console(console) + _print_gating_failure_block_impl( + code=code, + entries=entries, + args=args, + ) + + +def _write_report_outputs( + *, + args: object, + output_paths: OutputPaths, + report_artifacts: object, + open_html_report: bool = False, +) -> str | None: + _set_console(console) + return _write_report_outputs_impl( + args=args, + output_paths=output_paths, + report_artifacts=report_artifacts, + open_html_report=open_html_report, + ) + + +def _resolve_runtime_path_arg( + *, + root_path: Path, + raw_path: str, + from_cli: bool, +) -> Path: + candidate_path = Path(raw_path).expanduser() + if from_cli or candidate_path.is_absolute(): + return candidate_path.resolve() + return (root_path / candidate_path).resolve() + + +def _exit_contract_error( + message: str, + *, + cause: BaseException | None = None, +) -> NoReturn: + _console().print(ui.fmt_contract_error(message)) + if cause is None: + raise SystemExit(ExitCode.CONTRACT_ERROR) + raise SystemExit(ExitCode.CONTRACT_ERROR) from cause + + +def _resolve_existing_root_path(args: object) -> Path: + args_obj = cast("Any", args) + try: + root_path = Path(args_obj.root).resolve() + except OSError as exc: + _exit_contract_error(ui.ERR_INVALID_ROOT_PATH.format(error=exc), cause=exc) + if not root_path.exists(): + _exit_contract_error(ui.ERR_ROOT_NOT_FOUND.format(path=root_path)) + return root_path + + +def _load_pyproject_config_or_exit(root_path: Path) -> dict[str, object]: + try: + return load_pyproject_config(root_path) + except ConfigValidationError as exc: + _exit_contract_error(str(exc), cause=exc) + + +def _configure_runtime_flags(args: object) -> None: + args_obj = cast("Any", args) + if args_obj.debug: + os.environ["CODECLONE_DEBUG"] = "1" + if args_obj.ci: + args_obj.fail_on_new = True + args_obj.no_color = True + args_obj.quiet = True + + +def _configure_runtime_console(args: object) -> None: + global console + + args_obj = cast("Any", args) + console = ( + _make_plain_console() + if args_obj.quiet + else _make_console(no_color=args_obj.no_color) + ) + _set_console(console) + + +def _validate_numeric_args_or_exit(args: object) -> None: + if _validate_numeric_args(args): + return + _exit_contract_error( + "Size limits must be non-negative integers (MB), " + "threshold flags must be >= 0 or -1, and coverage thresholds " + "must be between 0 and 100." + ) + + +def _resolve_baseline_inputs( + *, + ap: object, + args: object, + root_path: Path, + baseline_path_from_args: bool, + metrics_path_from_args: bool, +) -> _ResolvedBaselineInputs: + args_obj = cast("Any", args) + ap_obj = cast("Any", ap) + + baseline_arg_path = Path(args_obj.baseline).expanduser() + try: + baseline_path = _resolve_runtime_path_arg( + root_path=root_path, + raw_path=args_obj.baseline, + from_cli=baseline_path_from_args, + ) + baseline_exists = baseline_path.exists() + except OSError as exc: + _exit_contract_error( + ui.fmt_invalid_baseline_path(path=baseline_arg_path, error=exc), + cause=exc, + ) + + shared_baseline_payload: dict[str, object] | None = None + default_metrics_baseline = ap_obj.get_default("metrics_baseline") + metrics_path_overridden = metrics_path_from_args or ( + args_obj.metrics_baseline != default_metrics_baseline + ) + metrics_baseline_raw_path = ( + args_obj.metrics_baseline if metrics_path_overridden else args_obj.baseline + ) + metrics_baseline_arg_path = Path(metrics_baseline_raw_path).expanduser() + try: + metrics_baseline_path = _resolve_runtime_path_arg( + root_path=root_path, + raw_path=metrics_baseline_raw_path, + from_cli=metrics_path_from_args, + ) + if metrics_baseline_path == baseline_path: + probe = _probe_metrics_baseline_section(metrics_baseline_path) + metrics_baseline_exists = probe.has_metrics_section + shared_baseline_payload = probe.payload + else: + metrics_baseline_exists = metrics_baseline_path.exists() + except OSError as exc: + _exit_contract_error( + ui.fmt_invalid_baseline_path( + path=metrics_baseline_arg_path, + error=exc, + ), + cause=exc, + ) + + return _ResolvedBaselineInputs( + baseline_path=baseline_path, + baseline_exists=baseline_exists, + metrics_baseline_path=metrics_baseline_path, + metrics_baseline_exists=metrics_baseline_exists, + shared_baseline_payload=shared_baseline_payload, + ) + + +def _prepare_metrics_mode_and_ui( + *, + args: object, + root_path: Path, + baseline_path: Path, + baseline_exists: bool, + metrics_baseline_path: Path, + metrics_baseline_exists: bool, +) -> None: + args_obj = cast("Any", args) + if ( + args_obj.update_baseline + and not args_obj.skip_metrics + and not args_obj.update_metrics_baseline + ): + args_obj.update_metrics_baseline = True + _configure_metrics_mode( + args=args_obj, + metrics_baseline_exists=metrics_baseline_exists, + ) + if ( + args_obj.update_metrics_baseline + and metrics_baseline_path == baseline_path + and not baseline_exists + and not args_obj.update_baseline + ): + args_obj.update_baseline = True + if args_obj.quiet: + args_obj.no_progress = True + return + print_banner(root=root_path) + + +def _resolve_report_cache_path(cache_path: Path) -> Path: + try: + return cache_path.resolve() + except OSError: + return cache_path + + +def _gating_mode_enabled(args: object) -> bool: + args_obj = cast("Any", args) + return bool( + args_obj.fail_on_new + or args_obj.fail_threshold >= 0 + or args_obj.fail_complexity >= 0 + or args_obj.fail_coupling >= 0 + or args_obj.fail_cohesion >= 0 + or args_obj.fail_cycles + or args_obj.fail_dead_code + or args_obj.fail_health >= 0 + or args_obj.fail_on_new_metrics + or args_obj.fail_on_typing_regression + or args_obj.fail_on_docstring_regression + or args_obj.fail_on_api_break + or args_obj.min_typing_coverage >= 0 + or args_obj.min_docstring_coverage >= 0 + ) + + +def _build_diff_context( + *, + analysis: AnalysisResult, + baseline_path: Path, + baseline_state: _CloneBaselineState, + metrics_baseline_state: _MetricsBaselineState, +) -> _DiffContext: + baseline_for_diff = ( + baseline_state.baseline + if baseline_state.trusted_for_diff + else Baseline(baseline_path) + ) + raw_new_func, raw_new_block = baseline_for_diff.diff( + analysis.func_groups, + analysis.block_groups, + ) + metrics_diff = None + if analysis.project_metrics is not None and metrics_baseline_state.trusted_for_diff: + metrics_diff = metrics_baseline_state.baseline.diff(analysis.project_metrics) + return _DiffContext( + new_func=set(raw_new_func), + new_block=set(raw_new_block), + new_clones_count=len(raw_new_func) + len(raw_new_block), + metrics_diff=metrics_diff, + coverage_adoption_diff_available=bool( + metrics_baseline_state.trusted_for_diff + and getattr( + metrics_baseline_state.baseline, + "has_coverage_adoption_snapshot", + False, + ) + ), + api_surface_diff_available=bool( + metrics_baseline_state.trusted_for_diff + and getattr(metrics_baseline_state.baseline, "api_surface_snapshot", None) + is not None + ), + ) + + +def _print_metrics_if_available( + *, + args: object, + analysis: AnalysisResult, + metrics_diff: object | None, + api_surface_diff_available: bool, +) -> None: + args_obj = cast("Any", args) + if analysis.project_metrics is None: + return + _print_metrics( + console=_console(), + quiet=args_obj.quiet, + metrics=build_metrics_snapshot( + analysis_result=analysis, + metrics_diff=metrics_diff, + api_surface_diff_available=api_surface_diff_available, + ), + ) + + +def _resolve_changed_clone_gate( + *, + args: object, + report_document: Mapping[str, object] | None, + changed_paths: Collection[str], +) -> ChangedCloneGate | None: + args_obj = cast("Any", args) + if not args_obj.changed_only or report_document is None: + return None + return _changed_clone_gate_from_report( + report_document, + changed_paths=tuple(changed_paths), + ) + + +def _maybe_print_changed_scope_snapshot( + *, + args: object, + changed_clone_gate: ChangedCloneGate | None, +) -> None: + args_obj = cast("Any", args) + if changed_clone_gate is None: + return + _print_changed_scope( + console=_console(), + quiet=args_obj.quiet, + changed_scope=ChangedScopeSnapshot( + paths_count=len(changed_clone_gate.changed_paths), + findings_total=changed_clone_gate.findings_total, + findings_new=changed_clone_gate.findings_new, + findings_known=changed_clone_gate.findings_known, + ), + ) + + +def _warn_new_clones_without_fail( + *, + args: object, + notice_new_clones_count: int, +) -> None: + args_obj = cast("Any", args) + if args_obj.update_baseline or args_obj.fail_on_new or notice_new_clones_count <= 0: + return + _console().print(ui.WARN_NEW_CLONES_WITHOUT_FAIL) + + +def _print_pipeline_done_if_needed(*, args: object, run_started_at: float) -> None: + args_obj = cast("Any", args) + if args_obj.quiet: + return + elapsed = time.monotonic() - run_started_at + _console().print() + _console().print(ui.fmt_pipeline_done(elapsed)) + + +def _cache_update_segment_projection(cache: Cache, analysis: AnalysisResult) -> None: + if not hasattr(cache, "segment_report_projection"): + return + new_projection = build_segment_report_projection( + digest=analysis.segment_groups_raw_digest, + suppressed=analysis.suppressed_segment_groups, + groups=analysis.segment_groups, + ) + if new_projection != cache.segment_report_projection: + cache.segment_report_projection = new_projection + cache._dirty = True + + +def _run_analysis_stages( + *, + args: object, + boot: BootstrapResult, + cache: Cache, +) -> tuple[DiscoveryResult, PipelineProcessingResult, AnalysisResult]: + def _require_rich_console(value: object) -> object: + if isinstance(value, PlainConsole): + raise RuntimeError("Rich console is required when progress UI is enabled.") + return value + + args_obj = cast("Any", args) + printer = _console() + use_status = not args_obj.quiet and not args_obj.no_progress + + try: + if use_status: + with cast("Any", printer).status(ui.STATUS_DISCOVERING, spinner="dots"): + discovery_result = discover(boot=boot, cache=cache) + else: + discovery_result = discover(boot=boot, cache=cache) + except OSError as exc: + printer.print(ui.fmt_contract_error(ui.ERR_SCAN_FAILED.format(error=exc))) + sys.exit(ExitCode.CONTRACT_ERROR) + + for warning in discovery_result.skipped_warnings: + printer.print(f"[warning]{warning}[/warning]") + + total_files = len(discovery_result.files_to_process) + if total_files > 0 and not args_obj.quiet and args_obj.no_progress: + printer.print(ui.fmt_processing_changed(total_files)) + + if total_files > 0 and not args_obj.no_progress: + ( + progress_cls, + spinner_column_cls, + text_column_cls, + bar_column_cls, + time_elapsed_column_cls, + ) = _rich_progress_symbols() + + progress_factory = cast("Any", progress_cls) + with progress_factory( + cast("Any", spinner_column_cls)(), + cast("Any", text_column_cls)("[progress.description]{task.description}"), + cast("Any", bar_column_cls)(), + cast("Any", text_column_cls)( + "[progress.percentage]{task.percentage:>3.0f}%" + ), + cast("Any", time_elapsed_column_cls)(), + console=_require_rich_console(console), + ) as progress_ui: + progress_ui_any = cast("Any", progress_ui) + task_id = progress_ui_any.add_task( + f"Analyzing {total_files} files...", + total=total_files, + ) + processing_result = process( + boot=boot, + discovery=discovery_result, + cache=cache, + on_advance=lambda: progress_ui_any.advance(task_id), + on_worker_error=lambda reason: printer.print( + ui.fmt_worker_failed(reason) + ), + on_parallel_fallback=lambda exc: printer.print( + ui.fmt_parallel_fallback(exc) + ), + ) + else: + processing_result = process( + boot=boot, + discovery=discovery_result, + cache=cache, + on_worker_error=( + (lambda reason: printer.print(ui.fmt_batch_item_failed(reason))) + if args_obj.no_progress + else (lambda reason: printer.print(ui.fmt_worker_failed(reason))) + ), + on_parallel_fallback=lambda exc: printer.print( + ui.fmt_parallel_fallback(exc) + ), + ) + + _print_failed_files(processing_result.failed_files) + if not processing_result.failed_files and processing_result.source_read_failures: + _print_failed_files(processing_result.source_read_failures) + + if use_status: + with cast("Any", printer).status(ui.STATUS_GROUPING, spinner="dots"): + analysis_result = analyze( + boot=boot, + discovery=discovery_result, + processing=processing_result, + ) + _cache_update_segment_projection(cache, analysis_result) + try: + cache.save() + except CacheError as exc: + printer.print(ui.fmt_cache_save_failed(exc)) + else: + analysis_result = analyze( + boot=boot, + discovery=discovery_result, + processing=processing_result, + ) + _cache_update_segment_projection(cache, analysis_result) + try: + cache.save() + except CacheError as exc: + printer.print(ui.fmt_cache_save_failed(exc)) + + coverage_join = getattr(analysis_result, "coverage_join", None) + if ( + coverage_join is not None + and coverage_join.status != "ok" + and coverage_join.invalid_reason + ): + printer.print(ui.fmt_coverage_join_ignored(coverage_join.invalid_reason)) + + return discovery_result, processing_result, analysis_result + + +def _enforce_gating( + *, + args: object, + boot: BootstrapResult, + analysis: AnalysisResult, + processing: PipelineProcessingResult, + source_read_contract_failure: bool, + baseline_failure_code: ExitCode | None, + metrics_baseline_failure_code: ExitCode | None, + new_func: set[str], + new_block: set[str], + metrics_diff: object | None, + html_report_path: str | None, + clone_threshold_total: int | None = None, +) -> None: + args_obj = cast("Any", args) + printer = _console() + + if source_read_contract_failure: + printer.print( + ui.fmt_contract_error( + ui.fmt_unreadable_source_in_gating( + count=len(processing.source_read_failures) + ) + ) + ) + for failure in processing.source_read_failures[:10]: + printer.print(f" • {failure}") + if len(processing.source_read_failures) > 10: + printer.print(f" ... and {len(processing.source_read_failures) - 10} more") + sys.exit(ExitCode.CONTRACT_ERROR) + + if baseline_failure_code is not None: + printer.print(ui.fmt_contract_error(ui.ERR_BASELINE_GATING_REQUIRES_TRUSTED)) + sys.exit(baseline_failure_code) + + if metrics_baseline_failure_code is not None: + printer.print( + ui.fmt_contract_error( + "Metrics baseline is untrusted or missing for requested metrics gating." + ) + ) + sys.exit(metrics_baseline_failure_code) + + if bool(getattr(args_obj, "fail_on_untested_hotspots", False)): + if analysis.coverage_join is None: + printer.print( + ui.fmt_contract_error( + "--fail-on-untested-hotspots requires --coverage." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + if analysis.coverage_join.status != "ok": + detail = analysis.coverage_join.invalid_reason or "invalid coverage input" + printer.print( + ui.fmt_contract_error( + "Coverage gating requires a valid Cobertura XML input.\n" + f"Reason: {detail}" + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + gating_analysis = analysis + if clone_threshold_total is not None: + preserved_block_count = min( + max(analysis.block_clones_count, 0), + max(clone_threshold_total, 0), + ) + gating_analysis = replace( + analysis, + func_clones_count=max(clone_threshold_total - preserved_block_count, 0), + block_clones_count=preserved_block_count, + ) + + gate_result = gate( + boot=boot, + analysis=gating_analysis, + new_func=new_func, + new_block=new_block, + metrics_diff=cast("Any", metrics_diff), + ) + + metric_reasons = [ + reason[len("metric:") :] + for reason in gate_result.reasons + if reason.startswith("metric:") + ] + if metric_reasons: + _print_gating_failure_block( + code="metrics", + entries=[_parse_metric_reason_entry(reason) for reason in metric_reasons], + args=args_obj, + ) + sys.exit(ExitCode.GATING_FAILURE) + + if "clone:new" in gate_result.reasons: + default_report = Path(".cache/codeclone/report.html") + resolved_html_report_path = html_report_path + if resolved_html_report_path is None and default_report.exists(): + resolved_html_report_path = str(default_report) + + clone_entries: list[tuple[str, object]] = [ + ("new_function_clone_groups", len(new_func)), + ("new_block_clone_groups", len(new_block)), + ] + if resolved_html_report_path: + clone_entries.append(("report", resolved_html_report_path)) + clone_entries.append(("accept", "codeclone . --update-baseline")) + _print_gating_failure_block( + code="new-clones", + entries=clone_entries, + args=args_obj, + ) + + if args_obj.verbose: + _print_verbose_clone_hashes( + printer, + label="Function clone hashes", + clone_hashes=new_func, + ) + _print_verbose_clone_hashes( + printer, + label="Block clone hashes", + clone_hashes=new_block, + ) + + sys.exit(ExitCode.GATING_FAILURE) + + threshold_reason = next( + ( + reason + for reason in gate_result.reasons + if reason.startswith("clone:threshold:") + ), + None, + ) + if threshold_reason is not None: + _, _, total_raw, threshold_raw = threshold_reason.split(":", maxsplit=3) + _print_gating_failure_block( + code="threshold", + entries=( + ("clone_groups_total", int(total_raw)), + ("clone_groups_limit", int(threshold_raw)), + ), + args=args_obj, + ) + sys.exit(ExitCode.GATING_FAILURE) + + +def _main_impl() -> None: + run_started_at = time.monotonic() + analysis_started_at_utc = cli_meta_mod._current_report_timestamp_utc() + ap = build_parser(__version__) + + raw_argv = tuple(sys.argv[1:]) + explicit_cli_dests = collect_explicit_cli_dests(ap, argv=raw_argv) + report_path_origins = _report_path_origins(raw_argv) + report_generated_at_utc = cli_meta_mod._current_report_timestamp_utc() + cache_path_from_args = any( + arg in {"--cache-dir", "--cache-path"} + or arg.startswith(("--cache-dir=", "--cache-path=")) + for arg in sys.argv + ) + baseline_path_from_args = any( + arg == "--baseline" or arg.startswith("--baseline=") for arg in sys.argv + ) + metrics_path_from_args = any( + arg == "--metrics-baseline" or arg.startswith("--metrics-baseline=") + for arg in sys.argv + ) + args = ap.parse_args() + + root_path = _resolve_existing_root_path(args) + pyproject_config = _load_pyproject_config_or_exit(root_path) + apply_pyproject_config_overrides( + args=args, + config_values=pyproject_config, + explicit_cli_dests=explicit_cli_dests, + ) + git_diff_ref = _validate_changed_scope_args(args=args) + changed_paths = ( + _git_diff_changed_paths(root_path=root_path, git_diff_ref=git_diff_ref) + if git_diff_ref is not None + else () + ) + _configure_runtime_flags(args) + _configure_runtime_console(args) + _validate_numeric_args_or_exit(args) + baseline_inputs = _resolve_baseline_inputs( + ap=ap, + args=args, + root_path=root_path, + baseline_path_from_args=baseline_path_from_args, + metrics_path_from_args=metrics_path_from_args, + ) + ( + baseline_path, + baseline_exists, + metrics_baseline_path, + metrics_baseline_exists, + ) = ( + baseline_inputs.baseline_path, + baseline_inputs.baseline_exists, + baseline_inputs.metrics_baseline_path, + baseline_inputs.metrics_baseline_exists, + ) + shared_baseline_payload = baseline_inputs.shared_baseline_payload + + _prepare_metrics_mode_and_ui( + args=args, + root_path=root_path, + baseline_path=baseline_path, + baseline_exists=baseline_exists, + metrics_baseline_path=metrics_baseline_path, + metrics_baseline_exists=metrics_baseline_exists, + ) + + output_paths = _resolve_output_paths( + args, + report_path_origins=report_path_origins, + report_generated_at_utc=report_generated_at_utc, + ) + _validate_report_ui_flags(args=args, output_paths=output_paths) + cache_path = _resolve_cache_path( + root_path=root_path, + args=args, + from_args=cache_path_from_args, + ) + + cache = Cache( + cache_path, + root=root_path, + max_size_bytes=args.max_cache_size_mb * 1024 * 1024, + min_loc=args.min_loc, + min_stmt=args.min_stmt, + block_min_loc=args.block_min_loc, + block_min_stmt=args.block_min_stmt, + segment_min_loc=args.segment_min_loc, + segment_min_stmt=args.segment_min_stmt, + collect_api_surface=bool(args.api_surface), + ) + cache.load() + if cache.load_warning: + _console().print(f"[warning]{cache.load_warning}[/warning]") + + boot = bootstrap( + args=args, + root=root_path, + output_paths=output_paths, + cache_path=cache_path, + ) + discovery_result, processing_result, analysis_result = _run_analysis_stages( + args=args, + boot=boot, + cache=cache, + ) + + gating_mode = _gating_mode_enabled(args) + source_read_contract_failure = ( + bool(processing_result.source_read_failures) + and gating_mode + and not args.update_baseline + ) + baseline_state = _resolve_clone_baseline_state( + args=args, + baseline_path=baseline_path, + baseline_exists=baseline_exists, + analysis=analysis_result, + shared_baseline_payload=( + shared_baseline_payload if metrics_baseline_path == baseline_path else None + ), + ) + metrics_baseline_state = _resolve_metrics_baseline_state( + args=args, + metrics_baseline_path=metrics_baseline_path, + metrics_baseline_exists=metrics_baseline_exists, + baseline_updated_path=baseline_state.updated_path, + analysis=analysis_result, + shared_baseline_payload=( + shared_baseline_payload if metrics_baseline_path == baseline_path else None + ), + ) + + report_cache_path = _resolve_report_cache_path(cache_path) + + cache_status, cache_schema_version = _resolve_cache_status(cache) + report_meta = cli_meta_mod._build_report_meta( + codeclone_version=__version__, + scan_root=root_path, + baseline_path=baseline_path, + baseline=baseline_state.baseline, + baseline_loaded=baseline_state.loaded, + baseline_status=baseline_state.status.value, + cache_path=report_cache_path, + cache_used=cache_status == CacheStatus.OK, + cache_status=cache_status.value, + cache_schema_version=cache_schema_version, + files_skipped_source_io=len(processing_result.source_read_failures), + metrics_baseline_path=metrics_baseline_path, + metrics_baseline=metrics_baseline_state.baseline, + metrics_baseline_loaded=metrics_baseline_state.loaded, + metrics_baseline_status=metrics_baseline_state.status.value, + health_score=( + analysis_result.project_metrics.health.total + if analysis_result.project_metrics + else None + ), + health_grade=( + analysis_result.project_metrics.health.grade + if analysis_result.project_metrics + else None + ), + analysis_mode=("clones_only" if args.skip_metrics else "full"), + metrics_computed=_metrics_computed(args), + min_loc=args.min_loc, + min_stmt=args.min_stmt, + block_min_loc=args.block_min_loc, + block_min_stmt=args.block_min_stmt, + segment_min_loc=args.segment_min_loc, + segment_min_stmt=args.segment_min_stmt, + design_complexity_threshold=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + design_coupling_threshold=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + design_cohesion_threshold=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + analysis_started_at_utc=analysis_started_at_utc, + report_generated_at_utc=report_generated_at_utc, + ) + + diff_context = _build_diff_context( + analysis=analysis_result, + baseline_path=baseline_path, + baseline_state=baseline_state, + metrics_baseline_state=metrics_baseline_state, + ) + + summary_counts = build_summary_counts( + discovery_result=discovery_result, + processing_result=processing_result, + ) + _print_summary( + console=_console(), + quiet=args.quiet, + files_found=discovery_result.files_found, + files_analyzed=processing_result.files_analyzed, + cache_hits=discovery_result.cache_hits, + files_skipped=processing_result.files_skipped, + analyzed_lines=summary_counts["analyzed_lines"], + analyzed_functions=summary_counts["analyzed_functions"], + analyzed_methods=summary_counts["analyzed_methods"], + analyzed_classes=summary_counts["analyzed_classes"], + func_clones_count=analysis_result.func_clones_count, + block_clones_count=analysis_result.block_clones_count, + segment_clones_count=analysis_result.segment_clones_count, + suppressed_golden_fixture_groups=len( + getattr(analysis_result, "suppressed_clone_groups", ()) + ), + suppressed_segment_groups=analysis_result.suppressed_segment_groups, + new_clones_count=diff_context.new_clones_count, + ) + _print_metrics_if_available( + args=args, + analysis=analysis_result, + metrics_diff=diff_context.metrics_diff, + api_surface_diff_available=diff_context.api_surface_diff_available, + ) + + report_artifacts = report( + boot=boot, + discovery=discovery_result, + processing=processing_result, + analysis=analysis_result, + report_meta=report_meta, + new_func=diff_context.new_func, + new_block=diff_context.new_block, + html_builder=build_html_report, + metrics_diff=diff_context.metrics_diff, + coverage_adoption_diff_available=diff_context.coverage_adoption_diff_available, + api_surface_diff_available=diff_context.api_surface_diff_available, + include_report_document=bool(changed_paths), + ) + changed_clone_gate = _resolve_changed_clone_gate( + args=args, + report_document=report_artifacts.report_document, + changed_paths=changed_paths, + ) + _maybe_print_changed_scope_snapshot( + args=args, + changed_clone_gate=changed_clone_gate, + ) + html_report_path = _write_report_outputs( + args=args, + output_paths=output_paths, + report_artifacts=report_artifacts, + open_html_report=args.open_html_report, + ) + + _enforce_gating( + args=args, + boot=boot, + analysis=analysis_result, + processing=processing_result, + source_read_contract_failure=source_read_contract_failure, + baseline_failure_code=baseline_state.failure_code, + metrics_baseline_failure_code=metrics_baseline_state.failure_code, + new_func=( + set(changed_clone_gate.new_func) + if changed_clone_gate + else diff_context.new_func + ), + new_block=( + set(changed_clone_gate.new_block) + if changed_clone_gate + else diff_context.new_block + ), + metrics_diff=diff_context.metrics_diff, + html_report_path=html_report_path, + clone_threshold_total=( + changed_clone_gate.total_clone_groups if changed_clone_gate else None + ), + ) + + notice_new_clones_count = ( + len(changed_clone_gate.new_func) + len(changed_clone_gate.new_block) + if changed_clone_gate is not None + else diff_context.new_clones_count + ) + _warn_new_clones_without_fail( + args=args, + notice_new_clones_count=notice_new_clones_count, + ) + _print_pipeline_done_if_needed(args=args, run_started_at=run_started_at) + + +def main() -> None: + try: + _main_impl() + except SystemExit: + raise + except Exception as exc: + _console().print( + ui.fmt_internal_error( + exc, + issues_url=ISSUES_URL, + debug=_is_debug_enabled(), + ) + ) + sys.exit(ExitCode.INTERNAL_ERROR) + + +if __name__ == "__main__": + main() diff --git a/codeclone/_cli_meta.py b/codeclone/surfaces/cli/report_meta.py similarity index 68% rename from codeclone/_cli_meta.py rename to codeclone/surfaces/cli/report_meta.py index ffa9245..d533f2d 100644 --- a/codeclone/_cli_meta.py +++ b/codeclone/surfaces/cli/report_meta.py @@ -8,28 +8,21 @@ import sys from datetime import datetime, timezone -from typing import TYPE_CHECKING, TypedDict +from typing import TYPE_CHECKING -from .baseline import Baseline, current_python_tag -from .contracts import ( +from ...baseline.clone_baseline import Baseline +from ...baseline.trust import current_python_tag +from ...contracts import ( DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, ) +from ...contracts.schemas import ReportMeta if TYPE_CHECKING: from pathlib import Path - from .metrics_baseline import MetricsBaseline - - -class AnalysisProfileMeta(TypedDict): - min_loc: int - min_stmt: int - block_min_loc: int - block_min_stmt: int - segment_min_loc: int - segment_min_stmt: int + from ...baseline.metrics_baseline import MetricsBaseline def _current_python_version() -> str: @@ -42,56 +35,6 @@ def _current_report_timestamp_utc() -> str: ) -class ReportMeta(TypedDict): - """ - Canonical report metadata contract shared by HTML, JSON, and TXT reports. - - Key semantics: - - python_version: runtime major.minor string for human readability (e.g. "3.13") - - python_tag: runtime compatibility tag used by baseline/cache contracts - (e.g. "cp313") - - baseline_*: values loaded from baseline metadata for audit/provenance - - cache_*: cache status/provenance for run transparency - """ - - codeclone_version: str - project_name: str - scan_root: str - python_version: str - python_tag: str - baseline_path: str - baseline_fingerprint_version: str | None - baseline_schema_version: str | None - baseline_python_tag: str | None - baseline_generator_name: str | None - baseline_generator_version: str | None - baseline_payload_sha256: str | None - baseline_payload_sha256_verified: bool - baseline_loaded: bool - baseline_status: str - cache_path: str - cache_used: bool - cache_status: str - cache_schema_version: str | None - files_skipped_source_io: int - metrics_baseline_path: str - metrics_baseline_loaded: bool - metrics_baseline_status: str - metrics_baseline_schema_version: str | None - metrics_baseline_payload_sha256: str | None - metrics_baseline_payload_sha256_verified: bool - health_score: int | None - health_grade: str | None - analysis_mode: str - metrics_computed: list[str] - analysis_profile: AnalysisProfileMeta - design_complexity_threshold: int - design_coupling_threshold: int - design_cohesion_threshold: int - analysis_started_at_utc: str | None - report_generated_at_utc: str - - def _build_report_meta( *, codeclone_version: str, diff --git a/codeclone/surfaces/cli/reports_output.py b/codeclone/surfaces/cli/reports_output.py new file mode 100644 index 0000000..350893b --- /dev/null +++ b/codeclone/surfaces/cli/reports_output.py @@ -0,0 +1,324 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sys +import webbrowser +from collections.abc import Callable, Mapping, Sequence +from pathlib import Path +from typing import Any, Protocol, cast + +from ... import ui_messages as ui +from ...contracts import ExitCode +from . import state as cli_state +from .types import OutputPaths, ReportPathOrigin + + +class _PrinterLike(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... + + +class _QuietArgs(Protocol): + quiet: bool + + +def _path_attr(obj: object, name: str) -> Path | None: + value = getattr(obj, name, None) + return value if isinstance(value, Path) else None + + +def _text_attr(obj: object, name: str) -> str | None: + value = getattr(obj, name, None) + return value if isinstance(value, str) else None + + +def _write_report_output( + *, + out: Path, + content: str, + label: str, + console: _PrinterLike, +) -> None: + try: + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(content, "utf-8") + except OSError as exc: + console.print( + ui.fmt_contract_error( + ui.fmt_report_write_failed(label=label, path=out, error=exc) + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + +def _open_html_report_in_browser(*, path: Path) -> None: + if not webbrowser.open_new_tab(path.as_uri()): + raise OSError("no browser handler available") + + +def write_report_outputs( + *, + args: _QuietArgs, + output_paths: object, + report_artifacts: object, + console: _PrinterLike, + open_html_report: bool = False, +) -> str | None: + html_report_path: str | None = None + saved_reports: list[tuple[str, Path]] = [] + html_path = _path_attr(output_paths, "html") + json_path = _path_attr(output_paths, "json") + md_path = _path_attr(output_paths, "md") + sarif_path = _path_attr(output_paths, "sarif") + text_path = _path_attr(output_paths, "text") + html_report = _text_attr(report_artifacts, "html") + json_report = _text_attr(report_artifacts, "json") + md_report = _text_attr(report_artifacts, "md") + sarif_report = _text_attr(report_artifacts, "sarif") + text_report = _text_attr(report_artifacts, "text") + + if html_path and html_report is not None: + out = html_path + _write_report_output( + out=out, + content=html_report, + label="HTML", + console=console, + ) + html_report_path = str(out) + saved_reports.append(("HTML", out)) + + if json_path and json_report is not None: + out = json_path + _write_report_output( + out=out, + content=json_report, + label="JSON", + console=console, + ) + saved_reports.append(("JSON", out)) + + if md_path and md_report is not None: + out = md_path + _write_report_output( + out=out, + content=md_report, + label="Markdown", + console=console, + ) + saved_reports.append(("Markdown", out)) + + if sarif_path and sarif_report is not None: + out = sarif_path + _write_report_output( + out=out, + content=sarif_report, + label="SARIF", + console=console, + ) + saved_reports.append(("SARIF", out)) + + if text_path and text_report is not None: + out = text_path + _write_report_output( + out=out, + content=text_report, + label="text", + console=console, + ) + saved_reports.append(("Text", out)) + + if saved_reports and not args.quiet: + cwd = Path.cwd() + console.print() + for label, path in saved_reports: + try: + display = path.relative_to(cwd) + except ValueError: + display = path + console.print(f" [bold]{label} report saved:[/bold] [dim]{display}[/dim]") + + if open_html_report and html_path is not None: + try: + _open_html_report_in_browser(path=html_path) + except Exception as exc: + console.print(ui.fmt_html_report_open_failed(path=html_path, error=exc)) + + return html_report_path + + +def _validate_output_path( + path: str, + *, + expected_suffix: str, + label: str, + console: _PrinterLike, + invalid_message: Callable[..., str], + invalid_path_message: Callable[..., str], +) -> Path: + out = Path(path).expanduser() + if out.suffix.lower() != expected_suffix: + console.print( + ui.fmt_contract_error( + invalid_message(label=label, path=out, expected_suffix=expected_suffix) + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + try: + return out.resolve() + except OSError as exc: + console.print( + ui.fmt_contract_error( + invalid_path_message(label=label, path=out, error=exc) + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + +def _report_path_origins(argv: Sequence[str]) -> dict[str, ReportPathOrigin | None]: + origins: dict[str, ReportPathOrigin | None] = { + "html": None, + "json": None, + "md": None, + "sarif": None, + "text": None, + } + flag_to_field = { + "--html": "html", + "--json": "json", + "--md": "md", + "--sarif": "sarif", + "--text": "text", + } + index = 0 + while index < len(argv): + token = argv[index] + if token == "--": + break + if "=" in token: + flag, _value = token.split("=", maxsplit=1) + field_name = flag_to_field.get(flag) + if field_name is not None: + origins[field_name] = "explicit" + index += 1 + continue + field_name = flag_to_field.get(token) + if field_name is None: + index += 1 + continue + next_token = argv[index + 1] if index + 1 < len(argv) else None + if next_token is None or next_token.startswith("-"): + origins[field_name] = "default" + index += 1 + continue + origins[field_name] = "explicit" + index += 2 + return origins + + +def _report_path_timestamp_slug(report_generated_at_utc: str) -> str: + return report_generated_at_utc.replace("-", "").replace(":", "") + + +def _timestamped_report_path(path: Path, *, report_generated_at_utc: str) -> Path: + suffix = path.suffix + stem = path.name[: -len(suffix)] if suffix else path.name + return path.with_name( + f"{stem}-{_report_path_timestamp_slug(report_generated_at_utc)}{suffix}" + ) + + +def _resolve_output_paths( + args: object, + *, + report_path_origins: Mapping[str, ReportPathOrigin | None], + report_generated_at_utc: str, +) -> OutputPaths: + args_obj = cast("Any", args) + printer = cast("_PrinterLike", cli_state.get_console()) + resolved: dict[str, Path | None] = { + "html": None, + "json": None, + "md": None, + "sarif": None, + "text": None, + } + output_specs = ( + ("html", "html_out", ".html", "HTML"), + ("json", "json_out", ".json", "JSON"), + ("md", "md_out", ".md", "Markdown"), + ("sarif", "sarif_out", ".sarif", "SARIF"), + ("text", "text_out", ".txt", "text"), + ) + + for field_name, arg_name, expected_suffix, label in output_specs: + raw_value = getattr(args_obj, arg_name, None) + if not raw_value: + continue + path = _validate_output_path( + raw_value, + expected_suffix=expected_suffix, + label=label, + console=printer, + invalid_message=ui.fmt_invalid_output_extension, + invalid_path_message=ui.fmt_invalid_output_path, + ) + if ( + args_obj.timestamped_report_paths + and report_path_origins.get(field_name) == "default" + ): + path = _timestamped_report_path( + path, + report_generated_at_utc=report_generated_at_utc, + ) + resolved[field_name] = path + + return OutputPaths( + html=resolved["html"], + json=resolved["json"], + text=resolved["text"], + md=resolved["md"], + sarif=resolved["sarif"], + ) + + +def _validate_report_ui_flags(*, args: object, output_paths: OutputPaths) -> None: + args_obj = cast("Any", args) + console = cast("_PrinterLike", cli_state.get_console()) + if args_obj.open_html_report and output_paths.html is None: + console.print(ui.fmt_contract_error(ui.ERR_OPEN_HTML_REPORT_REQUIRES_HTML)) + sys.exit(ExitCode.CONTRACT_ERROR) + + if args_obj.timestamped_report_paths and not any( + ( + output_paths.html, + output_paths.json, + output_paths.md, + output_paths.sarif, + output_paths.text, + ) + ): + console.print( + ui.fmt_contract_error(ui.ERR_TIMESTAMPED_REPORT_PATHS_REQUIRES_REPORT) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + +def _write_report_outputs( + *, + args: object, + output_paths: OutputPaths, + report_artifacts: object, + open_html_report: bool = False, +) -> str | None: + return write_report_outputs( + args=cast("Any", args), + output_paths=output_paths, + report_artifacts=report_artifacts, + console=cast("_PrinterLike", cli_state.get_console()), + open_html_report=open_html_report, + ) diff --git a/codeclone/_cli_runtime.py b/codeclone/surfaces/cli/runtime.py similarity index 82% rename from codeclone/_cli_runtime.py rename to codeclone/surfaces/cli/runtime.py index 28ca869..79b019d 100644 --- a/codeclone/_cli_runtime.py +++ b/codeclone/surfaces/cli/runtime.py @@ -8,20 +8,12 @@ import sys from pathlib import Path -from typing import Protocol +from typing import Any, Protocol, cast -from . import ui_messages as ui -from .cache import CacheStatus -from .contracts import ExitCode - -__all__ = [ - "configure_metrics_mode", - "metrics_computed", - "print_failed_files", - "resolve_cache_path", - "resolve_cache_status", - "validate_numeric_args", -] +from ... import ui_messages as ui +from ...cache import Cache, CacheStatus +from ...contracts import ExitCode +from . import state as cli_state class _RuntimeArgs(Protocol): @@ -220,3 +212,40 @@ def print_failed_files(*, failed_files: tuple[str, ...], console: _PrinterLike) console.print(f" • {failure}") if len(failed_files) > 10: console.print(f" ... and {len(failed_files) - 10} more") + + +def _resolve_cache_path(*, root_path: Path, args: object, from_args: bool) -> Path: + return resolve_cache_path( + root_path=root_path, + args=cast("Any", args), + from_args=from_args, + legacy_cache_path=cli_state.LEGACY_CACHE_PATH, + console=cast("_PrinterLike", cli_state.get_console()), + ) + + +def _validate_numeric_args(args: object) -> bool: + return validate_numeric_args(cast("Any", args)) + + +def _configure_metrics_mode(*, args: object, metrics_baseline_exists: bool) -> None: + configure_metrics_mode( + args=cast("Any", args), + metrics_baseline_exists=metrics_baseline_exists, + console=cast("_PrinterLike", cli_state.get_console()), + ) + + +def _print_failed_files(failed_files: tuple[str, ...] | list[str]) -> None: + print_failed_files( + failed_files=tuple(failed_files), + console=cast("_PrinterLike", cli_state.get_console()), + ) + + +def _metrics_computed(args: object) -> tuple[str, ...]: + return metrics_computed(cast("Any", args)) + + +def _resolve_cache_status(cache: Cache) -> tuple[CacheStatus, str | None]: + return resolve_cache_status(cache) diff --git a/codeclone/surfaces/cli/state.py b/codeclone/surfaces/cli/state.py new file mode 100644 index 0000000..1c9aa9e --- /dev/null +++ b/codeclone/surfaces/cli/state.py @@ -0,0 +1,25 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from pathlib import Path + +console: object | None = None +LEGACY_CACHE_PATH = Path("~/.cache/codeclone/cache.json").expanduser() + + +def get_console() -> object: + global console + if console is None: + from .console import make_plain_console + + console = make_plain_console() + return console + + +def set_console(value: object) -> None: + global console + console = value diff --git a/codeclone/_cli_summary.py b/codeclone/surfaces/cli/summary.py similarity index 68% rename from codeclone/_cli_summary.py rename to codeclone/surfaces/cli/summary.py index 5b849a8..f94be07 100644 --- a/codeclone/_cli_summary.py +++ b/codeclone/surfaces/cli/summary.py @@ -7,9 +7,13 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Protocol +from typing import Any, Protocol -from . import ui_messages as ui +from ... import ui_messages as ui +from ...utils import coerce as _coerce + +_as_int = _coerce.as_int +_as_mapping = _coerce.as_mapping @dataclass(frozen=True, slots=True) @@ -59,6 +63,117 @@ class _Printer(Protocol): def print(self, *objects: object, **kwargs: object) -> None: ... +def build_summary_counts( + *, + discovery_result: Any, + processing_result: Any, +) -> dict[str, int]: + return { + "analyzed_lines": processing_result.analyzed_lines + + int(getattr(discovery_result, "cached_lines", 0)), + "analyzed_functions": processing_result.analyzed_functions + + int(getattr(discovery_result, "cached_functions", 0)), + "analyzed_methods": processing_result.analyzed_methods + + int(getattr(discovery_result, "cached_methods", 0)), + "analyzed_classes": processing_result.analyzed_classes + + int(getattr(discovery_result, "cached_classes", 0)), + } + + +def build_metrics_snapshot( + *, + analysis_result: Any, + metrics_diff: Any | None, + api_surface_diff_available: bool, +) -> MetricsSnapshot: + project_metrics = analysis_result.project_metrics + metrics_payload_map = _as_mapping(analysis_result.metrics_payload) + overloaded_modules_summary = _as_mapping( + _as_mapping(metrics_payload_map.get("overloaded_modules")).get("summary") + ) + adoption_summary = _as_mapping( + _as_mapping(metrics_payload_map.get("coverage_adoption")).get("summary") + ) + api_surface_summary = _as_mapping( + _as_mapping(metrics_payload_map.get("api_surface")).get("summary") + ) + coverage_join_summary = _as_mapping( + _as_mapping(metrics_payload_map.get("coverage_join")).get("summary") + ) + coverage_join_source = str(coverage_join_summary.get("source", "")).strip() + return MetricsSnapshot( + complexity_avg=project_metrics.complexity_avg, + complexity_max=project_metrics.complexity_max, + high_risk_count=len(project_metrics.high_risk_functions), + coupling_avg=project_metrics.coupling_avg, + coupling_max=project_metrics.coupling_max, + cohesion_avg=project_metrics.cohesion_avg, + cohesion_max=project_metrics.cohesion_max, + cycles_count=len(project_metrics.dependency_cycles), + dead_code_count=len(project_metrics.dead_code), + health_total=project_metrics.health.total, + health_grade=project_metrics.health.grade, + suppressed_dead_code_count=analysis_result.suppressed_dead_code_items, + overloaded_modules_candidates=_as_int( + overloaded_modules_summary.get("candidates") + ), + overloaded_modules_total=_as_int(overloaded_modules_summary.get("total")), + overloaded_modules_population_status=str( + overloaded_modules_summary.get("population_status", "") + ), + overloaded_modules_top_score=_coerce.as_float( + overloaded_modules_summary.get("top_score") + ), + adoption_param_permille=( + _as_int(adoption_summary.get("param_permille")) + if adoption_summary + else None + ), + adoption_return_permille=( + _as_int(adoption_summary.get("return_permille")) + if adoption_summary + else None + ), + adoption_docstring_permille=( + _as_int(adoption_summary.get("docstring_permille")) + if adoption_summary + else None + ), + adoption_any_annotation_count=_as_int(adoption_summary.get("typing_any_count")), + api_surface_enabled=bool(api_surface_summary.get("enabled")), + api_surface_modules=_as_int(api_surface_summary.get("modules")), + api_surface_public_symbols=_as_int(api_surface_summary.get("public_symbols")), + api_surface_added=( + len(metrics_diff.new_api_symbols) + if metrics_diff is not None and api_surface_diff_available + else 0 + ), + api_surface_breaking=( + len(metrics_diff.new_api_breaking_changes) + if metrics_diff is not None and api_surface_diff_available + else 0 + ), + coverage_join_status=str(coverage_join_summary.get("status", "")).strip(), + coverage_join_overall_permille=_as_int( + coverage_join_summary.get("overall_permille") + ), + coverage_join_coverage_hotspots=_as_int( + coverage_join_summary.get("coverage_hotspots") + ), + coverage_join_scope_gap_hotspots=_as_int( + coverage_join_summary.get("scope_gap_hotspots") + ), + coverage_join_threshold_percent=_as_int( + coverage_join_summary.get("hotspot_threshold_percent") + ), + coverage_join_source_label=( + coverage_join_source.rsplit("/", maxsplit=1)[-1] + if coverage_join_source + else "" + ), + ) + + def _print_summary( *, console: _Printer, diff --git a/codeclone/surfaces/cli/types.py b/codeclone/surfaces/cli/types.py new file mode 100644 index 0000000..8d4e640 --- /dev/null +++ b/codeclone/surfaces/cli/types.py @@ -0,0 +1,34 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal + +from ...core._types import FileProcessResult as ProcessingResult +from ...core._types import OutputPaths + +ReportPathOrigin = Literal["default", "explicit"] + + +@dataclass(frozen=True, slots=True) +class ChangedCloneGate: + changed_paths: tuple[str, ...] + new_func: frozenset[str] + new_block: frozenset[str] + total_clone_groups: int + findings_total: int + findings_new: int + findings_known: int + + +__all__ = [ + "ChangedCloneGate", + "OutputPaths", + "ProcessingResult", + "ReportPathOrigin", +] diff --git a/codeclone/surfaces/mcp/__init__.py b/codeclone/surfaces/mcp/__init__.py new file mode 100644 index 0000000..2d2fb1f --- /dev/null +++ b/codeclone/surfaces/mcp/__init__.py @@ -0,0 +1,58 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from .service import CodeCloneMCPService +from .session import ( + DEFAULT_MCP_HISTORY_LIMIT, + MAX_MCP_HISTORY_LIMIT, + MCPAnalysisRequest, + MCPFindingNotFoundError, + MCPGateRequest, + MCPGitDiffError, + MCPRunNotFoundError, + MCPRunRecord, + MCPServiceContractError, + MCPServiceError, + _base_short_finding_id_payload, + _BufferConsole, + _clone_short_id_entry_payload, + _CloneShortIdEntry, + _disambiguated_clone_short_ids_payload, + _disambiguated_short_finding_id_payload, + _git_diff_lines_payload, + _json_text_payload, + _leaf_symbol_name_payload, + _load_report_document_payload, + _partitioned_short_id, + _suggestion_finding_id_payload, + _validated_history_limit, +) + +__all__ = [ + "DEFAULT_MCP_HISTORY_LIMIT", + "MAX_MCP_HISTORY_LIMIT", + "CodeCloneMCPService", + "MCPAnalysisRequest", + "MCPFindingNotFoundError", + "MCPGateRequest", + "MCPGitDiffError", + "MCPRunNotFoundError", + "MCPRunRecord", + "MCPServiceContractError", + "MCPServiceError", + "_BufferConsole", + "_CloneShortIdEntry", + "_base_short_finding_id_payload", + "_clone_short_id_entry_payload", + "_disambiguated_clone_short_ids_payload", + "_disambiguated_short_finding_id_payload", + "_git_diff_lines_payload", + "_json_text_payload", + "_leaf_symbol_name_payload", + "_load_report_document_payload", + "_partitioned_short_id", + "_suggestion_finding_id_payload", + "_validated_history_limit", +] diff --git a/codeclone/surfaces/mcp/__main__.py b/codeclone/surfaces/mcp/__main__.py new file mode 100644 index 0000000..75e1386 --- /dev/null +++ b/codeclone/surfaces/mcp/__main__.py @@ -0,0 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from .server import main + +main() diff --git a/codeclone/surfaces/mcp/payloads.py b/codeclone/surfaces/mcp/payloads.py new file mode 100644 index 0000000..a984206 --- /dev/null +++ b/codeclone/surfaces/mcp/payloads.py @@ -0,0 +1,56 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from typing import Generic, TypeVar + +T = TypeVar("T") + + +@dataclass(frozen=True, slots=True) +class PageWindow(Generic[T]): + items: list[T] + offset: int + limit: int + total: int + next_offset: int | None + + +def paginate( + items: Sequence[T], + *, + offset: int, + limit: int, + max_limit: int, +) -> PageWindow[T]: + normalized_offset = max(0, offset) + normalized_limit = max(1, min(limit, max_limit)) + page = list(items[normalized_offset : normalized_offset + normalized_limit]) + next_offset = normalized_offset + len(page) + return PageWindow( + items=page, + offset=normalized_offset, + limit=normalized_limit, + total=len(items), + next_offset=(next_offset if next_offset < len(items) else None), + ) + + +def resolve_finding_id( + *, + canonical_to_short: Mapping[str, str], + short_to_canonical: Mapping[str, str], + finding_id: str, +) -> str | None: + if finding_id in canonical_to_short: + return finding_id + return short_to_canonical.get(finding_id) + + +def short_id(value: str, *, length: int = 8) -> str: + return value[:length] diff --git a/codeclone/mcp_server.py b/codeclone/surfaces/mcp/server.py similarity index 95% rename from codeclone/mcp_server.py rename to codeclone/surfaces/mcp/server.py index ee7a6fc..7f4d718 100644 --- a/codeclone/mcp_server.py +++ b/codeclone/surfaces/mcp/server.py @@ -12,12 +12,14 @@ from collections.abc import Callable from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast -from . import __version__ -from .contracts import DOCS_URL -from .mcp_service import ( +from ... import __version__ +from ...contracts import DOCS_URL +from .service import CodeCloneMCPService +from .session import ( DEFAULT_MCP_HISTORY_LIMIT, MAX_MCP_HISTORY_LIMIT, - CodeCloneMCPService, + AnalysisMode, + CachePolicy, MCPAnalysisRequest, MCPGateRequest, _validated_history_limit, @@ -175,7 +177,7 @@ def analyze_repository( return service.analyze_repository( MCPAnalysisRequest( root=root, - analysis_mode=analysis_mode, # type: ignore[arg-type] + analysis_mode=cast("AnalysisMode", analysis_mode), respect_pyproject=respect_pyproject, changed_paths=tuple(changed_paths or ()), git_diff_ref=git_diff_ref, @@ -195,7 +197,7 @@ def analyze_repository( baseline_path=baseline_path, metrics_baseline_path=metrics_baseline_path, max_baseline_size_mb=max_baseline_size_mb, - cache_policy=cache_policy, # type: ignore[arg-type] + cache_policy=cast("CachePolicy", cache_policy), cache_path=cache_path, max_cache_size_mb=max_cache_size_mb, ) @@ -247,7 +249,7 @@ def analyze_changed_paths( root=root, changed_paths=tuple(changed_paths or ()), git_diff_ref=git_diff_ref, - analysis_mode=analysis_mode, # type: ignore[arg-type] + analysis_mode=cast("AnalysisMode", analysis_mode), respect_pyproject=respect_pyproject, processes=processes, min_loc=min_loc, @@ -265,7 +267,7 @@ def analyze_changed_paths( baseline_path=baseline_path, metrics_baseline_path=metrics_baseline_path, max_baseline_size_mb=max_baseline_size_mb, - cache_policy=cache_policy, # type: ignore[arg-type] + cache_policy=cast("CachePolicy", cache_policy), cache_path=cache_path, max_cache_size_mb=max_cache_size_mb, ) @@ -323,8 +325,8 @@ def help( detail: str = "compact", ) -> dict[str, object]: return service.get_help( - topic=topic, # type: ignore[arg-type] - detail=detail, # type: ignore[arg-type] + topic=topic, + detail=detail, ) @tool( @@ -399,8 +401,8 @@ def get_report_section( ) -> dict[str, object]: return service.get_report_section( run_id=run_id, - section=section, # type: ignore[arg-type] - family=family, # type: ignore[arg-type] + section=section, + family=family, path=path, offset=offset, limit=limit, @@ -435,13 +437,13 @@ def list_findings( ) -> dict[str, object]: return service.list_findings( run_id=run_id, - family=family, # type: ignore[arg-type] + family=family, category=category, severity=severity, source_kind=source_kind, - novelty=novelty, # type: ignore[arg-type] - sort_by=sort_by, # type: ignore[arg-type] - detail_level=detail_level, # type: ignore[arg-type] + novelty=novelty, + sort_by=sort_by, + detail_level=detail_level, changed_paths=tuple(changed_paths or ()), git_diff_ref=git_diff_ref, exclude_reviewed=exclude_reviewed, @@ -469,7 +471,7 @@ def get_finding( return service.get_finding( finding_id=finding_id, run_id=run_id, - detail_level=detail_level, # type: ignore[arg-type] + detail_level=detail_level, ) @tool( @@ -490,7 +492,7 @@ def get_remediation( return service.get_remediation( finding_id=finding_id, run_id=run_id, - detail_level=detail_level, # type: ignore[arg-type] + detail_level=detail_level, ) @tool( @@ -514,9 +516,9 @@ def list_hotspots( max_results: int | None = None, ) -> dict[str, object]: return service.list_hotspots( - kind=kind, # type: ignore[arg-type] + kind=kind, run_id=run_id, - detail_level=detail_level, # type: ignore[arg-type] + detail_level=detail_level, changed_paths=tuple(changed_paths or ()), git_diff_ref=git_diff_ref, exclude_reviewed=exclude_reviewed, @@ -542,7 +544,7 @@ def compare_runs( return service.compare_runs( run_id_before=run_id_before, run_id_after=run_id_after, - focus=focus, # type: ignore[arg-type] + focus=focus, ) @tool( @@ -571,7 +573,7 @@ def check_complexity( path=path, min_complexity=min_complexity, max_results=max_results, - detail_level=detail_level, # type: ignore[arg-type] + detail_level=detail_level, ) @tool( @@ -602,7 +604,7 @@ def check_clones( clone_type=clone_type, source_kind=source_kind, max_results=max_results, - detail_level=detail_level, # type: ignore[arg-type] + detail_level=detail_level, ) @tool( @@ -629,7 +631,7 @@ def check_coupling( root=root, path=path, max_results=max_results, - detail_level=detail_level, # type: ignore[arg-type] + detail_level=detail_level, ) @tool( @@ -656,7 +658,7 @@ def check_cohesion( root=root, path=path, max_results=max_results, - detail_level=detail_level, # type: ignore[arg-type] + detail_level=detail_level, ) @tool( @@ -685,7 +687,7 @@ def check_dead_code( path=path, min_severity=min_severity, max_results=max_results, - detail_level=detail_level, # type: ignore[arg-type] + detail_level=detail_level, ) @tool( @@ -708,7 +710,7 @@ def generate_pr_summary( run_id=run_id, changed_paths=tuple(changed_paths or ()), git_diff_ref=git_diff_ref, - format=format, # type: ignore[arg-type] + format=format, ) @tool( diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py new file mode 100644 index 0000000..d1f16ca --- /dev/null +++ b/codeclone/surfaces/mcp/service.py @@ -0,0 +1,261 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +import inspect +from collections.abc import Mapping +from typing import Any, cast + +from .session import ( + DEFAULT_MCP_HISTORY_LIMIT, + MCPAnalysisRequest, + MCPGateRequest, + MCPSession, +) +from .tools import MCP_TOOLS_BY_NAME +from .tools._base import MCPTool + + +class CodeCloneMCPService: + def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: + self.session = MCPSession(history_limit=history_limit) + self._tools: Mapping[str, MCPTool] = MCP_TOOLS_BY_NAME + + def __getattr__(self, name: str) -> Any: + return getattr(self.session, name) + + def _dispatch(self, name: str, **params: object) -> object: + return self._tools[name].run(self.session, params) + + def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: + return cast( + "dict[str, object]", + self._dispatch("analyze_repository", request=request), + ) + + def analyze_changed_paths(self, request: MCPAnalysisRequest) -> dict[str, object]: + return cast( + "dict[str, object]", + self._dispatch("analyze_changed_paths", request=request), + ) + + def get_run_summary(self, run_id: str | None = None) -> dict[str, object]: + return cast( + "dict[str, object]", + self._dispatch("get_run_summary", run_id=run_id), + ) + + def compare_runs(self, **params: object) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("compare_runs", **params)) + + def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: + return cast( + "dict[str, object]", + self._dispatch("evaluate_gates", request=request), + ) + + def get_report_section(self, **params: object) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("get_report_section", **params)) + + def list_findings(self, **params: object) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("list_findings", **params)) + + def get_finding(self, **params: object) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("get_finding", **params)) + + def get_remediation(self, **params: object) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("get_remediation", **params)) + + def list_hotspots(self, **params: object) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("list_hotspots", **params)) + + def get_production_triage(self, **params: object) -> dict[str, object]: + return cast( + "dict[str, object]", + self._dispatch("get_production_triage", **params), + ) + + def get_help(self, **params: object) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("help", **params)) + + def generate_pr_summary(self, **params: object) -> dict[str, object]: + return cast( + "dict[str, object]", + self._dispatch("generate_pr_summary", **params), + ) + + def mark_finding_reviewed(self, **params: object) -> dict[str, object]: + return cast( + "dict[str, object]", + self._dispatch("mark_finding_reviewed", **params), + ) + + def list_reviewed_findings(self, **params: object) -> dict[str, object]: + return cast( + "dict[str, object]", + self._dispatch("list_reviewed_findings", **params), + ) + + def clear_session_runs(self) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("clear_session_runs")) + + def check_complexity(self, **params: object) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("check_complexity", **params)) + + def check_clones(self, **params: object) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("check_clones", **params)) + + def check_coupling(self, **params: object) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("check_coupling", **params)) + + def check_cohesion(self, **params: object) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("check_cohesion", **params)) + + def check_dead_code(self, **params: object) -> dict[str, object]: + return cast("dict[str, object]", self._dispatch("check_dead_code", **params)) + + def read_resource(self, uri: str) -> str: + return self.session.read_resource(uri) + + +_EMPTY = inspect.Signature.empty + + +def _kwonly( + name: str, + annotation: str, + default: object = _EMPTY, +) -> inspect.Parameter: + return inspect.Parameter( + name, + inspect.Parameter.KEYWORD_ONLY, + default=default, + annotation=annotation, + ) + + +def _apply_public_method_signatures() -> None: + signature_specs: dict[str, tuple[inspect.Parameter, ...]] = { + "check_clones": ( + _kwonly("run_id", "str | None", None), + _kwonly("root", "str | None", None), + _kwonly("path", "str | None", None), + _kwonly("clone_type", "str | None", None), + _kwonly("source_kind", "str | None", None), + _kwonly("max_results", "int", 10), + _kwonly("detail_level", "DetailLevel", "summary"), + ), + "check_cohesion": ( + _kwonly("run_id", "str | None", None), + _kwonly("root", "str | None", None), + _kwonly("path", "str | None", None), + _kwonly("max_results", "int", 10), + _kwonly("detail_level", "DetailLevel", "summary"), + ), + "check_complexity": ( + _kwonly("run_id", "str | None", None), + _kwonly("root", "str | None", None), + _kwonly("path", "str | None", None), + _kwonly("min_complexity", "int | None", None), + _kwonly("max_results", "int", 10), + _kwonly("detail_level", "DetailLevel", "summary"), + ), + "check_coupling": ( + _kwonly("run_id", "str | None", None), + _kwonly("root", "str | None", None), + _kwonly("path", "str | None", None), + _kwonly("max_results", "int", 10), + _kwonly("detail_level", "DetailLevel", "summary"), + ), + "check_dead_code": ( + _kwonly("run_id", "str | None", None), + _kwonly("root", "str | None", None), + _kwonly("path", "str | None", None), + _kwonly("min_severity", "str | None", None), + _kwonly("max_results", "int", 10), + _kwonly("detail_level", "DetailLevel", "summary"), + ), + "compare_runs": ( + _kwonly("run_id_before", "str"), + _kwonly("run_id_after", "str | None", None), + _kwonly("focus", "ComparisonFocus", "all"), + ), + "generate_pr_summary": ( + _kwonly("run_id", "str | None", None), + _kwonly("changed_paths", "Sequence[str]", ()), + _kwonly("git_diff_ref", "str | None", None), + _kwonly("format", "PRSummaryFormat", "markdown"), + ), + "get_finding": ( + _kwonly("finding_id", "str"), + _kwonly("run_id", "str | None", None), + _kwonly("detail_level", "DetailLevel", "normal"), + ), + "get_help": ( + _kwonly("topic", "HelpTopic"), + _kwonly("detail", "HelpDetail", "compact"), + ), + "get_production_triage": ( + _kwonly("run_id", "str | None", None), + _kwonly("max_hotspots", "int", 3), + _kwonly("max_suggestions", "int", 3), + ), + "get_remediation": ( + _kwonly("finding_id", "str"), + _kwonly("run_id", "str | None", None), + _kwonly("detail_level", "DetailLevel", "normal"), + ), + "get_report_section": ( + _kwonly("run_id", "str | None", None), + _kwonly("section", "ReportSection", "all"), + _kwonly("family", "MetricsDetailFamily | None", None), + _kwonly("path", "str | None", None), + _kwonly("offset", "int", 0), + _kwonly("limit", "int", 50), + ), + "list_findings": ( + _kwonly("run_id", "str | None", None), + _kwonly("family", "FindingFamilyFilter", "all"), + _kwonly("category", "str | None", None), + _kwonly("severity", "str | None", None), + _kwonly("source_kind", "str | None", None), + _kwonly("novelty", "FindingNoveltyFilter", "all"), + _kwonly("sort_by", "FindingSort", "default"), + _kwonly("detail_level", "DetailLevel", "summary"), + _kwonly("changed_paths", "Sequence[str]", ()), + _kwonly("git_diff_ref", "str | None", None), + _kwonly("exclude_reviewed", "bool", False), + _kwonly("offset", "int", 0), + _kwonly("limit", "int", 50), + _kwonly("max_results", "int | None", None), + ), + "list_hotspots": ( + _kwonly("kind", "HotlistKind"), + _kwonly("run_id", "str | None", None), + _kwonly("detail_level", "DetailLevel", "summary"), + _kwonly("changed_paths", "Sequence[str]", ()), + _kwonly("git_diff_ref", "str | None", None), + _kwonly("exclude_reviewed", "bool", False), + _kwonly("limit", "int", 10), + _kwonly("max_results", "int | None", None), + ), + "list_reviewed_findings": (_kwonly("run_id", "str | None", None),), + "mark_finding_reviewed": ( + _kwonly("finding_id", "str"), + _kwonly("run_id", "str | None", None), + _kwonly("note", "str | None", None), + ), + } + self_param = inspect.Parameter("self", inspect.Parameter.POSITIONAL_OR_KEYWORD) + for name, params in signature_specs.items(): + method = getattr(CodeCloneMCPService, name) + method.__signature__ = inspect.Signature( + parameters=(self_param, *params), + return_annotation="dict[str, object]", + ) + + +_apply_public_method_signatures() diff --git a/codeclone/mcp_service.py b/codeclone/surfaces/mcp/session.py similarity index 96% rename from codeclone/mcp_service.py rename to codeclone/surfaces/mcp/session.py index cbed02b..fd29abf 100644 --- a/codeclone/mcp_service.py +++ b/codeclone/surfaces/mcp/session.py @@ -15,12 +15,14 @@ from json import JSONDecodeError from pathlib import Path from threading import RLock -from typing import Final, Literal, cast +from typing import Any, Final, Literal, cast import orjson -from . import __version__ -from ._cli_args import ( +from ... import __version__ +from ...baseline import Baseline +from ...cache import Cache, CacheStatus +from ...config import ( DEFAULT_BASELINE_PATH, DEFAULT_BLOCK_MIN_LOC, DEFAULT_BLOCK_MIN_STMT, @@ -30,35 +32,25 @@ DEFAULT_MIN_STMT, DEFAULT_SEGMENT_MIN_LOC, DEFAULT_SEGMENT_MIN_STMT, + ConfigValidationError, + load_pyproject_config, ) -from ._cli_baselines import ( - CloneBaselineState, - MetricsBaselineState, - probe_metrics_baseline_section, - resolve_clone_baseline_state, - resolve_metrics_baseline_state, -) -from ._cli_config import ConfigValidationError, load_pyproject_config -from ._cli_meta import _build_report_meta, _current_report_timestamp_utc -from ._cli_runtime import ( - resolve_cache_path, - resolve_cache_status, - validate_numeric_args, -) -from ._coerce import as_float as _as_float -from ._coerce import as_int as _as_int -from ._git_diff import validate_git_diff_ref -from .baseline import Baseline -from .cache import Cache, CacheStatus -from .contracts import ( +from ...contracts import ( DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, DOCS_URL, REPORT_SCHEMA_VERSION, - ExitCode, ) -from .domain.findings import ( +from ...core import ( + OutputPaths, + analyze, + bootstrap, + discover, + process, + report, +) +from ...domain.findings import ( CATEGORY_CLONE, CATEGORY_COHESION, CATEGORY_COMPLEXITY, @@ -73,7 +65,7 @@ FAMILY_DESIGN, FAMILY_STRUCTURAL, ) -from .domain.quality import ( +from ...domain.quality import ( CONFIDENCE_HIGH, CONFIDENCE_LOW, CONFIDENCE_MEDIUM, @@ -84,7 +76,7 @@ SEVERITY_INFO, SEVERITY_WARNING, ) -from .domain.source_scope import ( +from ...domain.source_scope import ( SOURCE_KIND_FIXTURES, SOURCE_KIND_MIXED, SOURCE_KIND_ORDER, @@ -92,24 +84,34 @@ SOURCE_KIND_PRODUCTION, SOURCE_KIND_TESTS, ) -from .models import CoverageJoinResult, MetricsDiff, ProjectMetrics, Suggestion -from .pipeline import ( - GatingResult, - MetricGateConfig, - OutputPaths, - analyze, - bootstrap, - discover, - metric_gate_reasons, - process, - report, -) -from .report.json_contract import ( +from ...findings.ids import ( clone_group_id, dead_code_group_id, design_group_id, structural_group_id, ) +from ...models import CoverageJoinResult, MetricsDiff, ProjectMetrics, Suggestion +from ...report.gates.evaluator import GateResult as GatingResult +from ...report.gates.evaluator import MetricGateConfig +from ...report.gates.evaluator import evaluate_gates as _evaluate_report_gates +from ...report.gates.evaluator import summarize_metrics_diff as _summarize_metrics_diff +from ...utils.coerce import as_float as _as_float +from ...utils.coerce import as_int as _as_int +from ...utils.git_diff import validate_git_diff_ref +from ..cli.baseline_state import ( + CloneBaselineState, + MetricsBaselineState, + probe_metrics_baseline_section, + resolve_clone_baseline_state, + resolve_metrics_baseline_state, +) +from ..cli.report_meta import _build_report_meta, _current_report_timestamp_utc +from ..cli.runtime import ( + resolve_cache_path, + resolve_cache_status, + validate_numeric_args, +) +from .payloads import paginate, resolve_finding_id, short_id AnalysisMode = Literal["full", "clones_only"] CachePolicy = Literal["reuse", "refresh", "off"] @@ -1072,7 +1074,7 @@ def clear(self) -> tuple[str, ...]: return removed_run_ids -class CodeCloneMCPService: +class MCPSession: def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: self._runs = CodeCloneMCPRunStore(history_limit=history_limit) self._state_lock = RLock() @@ -1229,15 +1231,18 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: analysis_result.project_metrics ) - report_artifacts = report( - boot=boot, - discovery=discovery_result, - processing=processing_result, - analysis=analysis_result, - report_meta=report_meta, - new_func=new_func, - new_block=new_block, - metrics_diff=metrics_diff, + report_artifacts = cast( + "Any", + report( + boot=boot, + discovery=discovery_result, + processing=processing_result, + analysis=analysis_result, + report_meta=report_meta, + new_func=new_func, + new_block=new_block, + metrics_diff=metrics_diff, + ), ) report_json = report_artifacts.json if report_json is None: @@ -1460,7 +1465,6 @@ def _evaluate_gate_snapshot( record: MCPRunRecord, request: MCPGateRequest, ) -> GatingResult: - reasons: list[str] = [] if request.fail_on_untested_hotspots: if record.coverage_join is None: raise MCPServiceContractError( @@ -1472,45 +1476,35 @@ def _evaluate_gate_snapshot( "Coverage gating requires a valid Cobertura XML input. " f"Reason: {detail}" ) - if record.project_metrics is not None: - metric_reasons = metric_gate_reasons( - project_metrics=record.project_metrics, - coverage_join=record.coverage_join, - metrics_diff=record.metrics_diff, - config=MetricGateConfig( - fail_complexity=request.fail_complexity, - fail_coupling=request.fail_coupling, - fail_cohesion=request.fail_cohesion, - fail_cycles=request.fail_cycles, - fail_dead_code=request.fail_dead_code, - fail_health=request.fail_health, - fail_on_new_metrics=request.fail_on_new_metrics, - fail_on_typing_regression=request.fail_on_typing_regression, - fail_on_docstring_regression=request.fail_on_docstring_regression, - fail_on_api_break=request.fail_on_api_break, - fail_on_untested_hotspots=request.fail_on_untested_hotspots, - min_typing_coverage=request.min_typing_coverage, - min_docstring_coverage=request.min_docstring_coverage, - coverage_min=request.coverage_min, - ), - ) - reasons.extend(f"metric:{reason}" for reason in metric_reasons) - - if request.fail_on_new and (record.new_func or record.new_block): - reasons.append("clone:new") - - total_clone_groups = record.func_clones_count + record.block_clones_count - if 0 <= request.fail_threshold < total_clone_groups: - reasons.append( - f"clone:threshold:{total_clone_groups}:{request.fail_threshold}" - ) - - if reasons: - return GatingResult( - exit_code=int(ExitCode.GATING_FAILURE), - reasons=tuple(reasons), - ) - return GatingResult(exit_code=int(ExitCode.SUCCESS), reasons=()) + return _evaluate_report_gates( + report_document=record.report_document, + config=MetricGateConfig( + fail_complexity=request.fail_complexity, + fail_coupling=request.fail_coupling, + fail_cohesion=request.fail_cohesion, + fail_cycles=request.fail_cycles, + fail_dead_code=request.fail_dead_code, + fail_health=request.fail_health, + fail_on_new_metrics=request.fail_on_new_metrics, + fail_on_typing_regression=request.fail_on_typing_regression, + fail_on_docstring_regression=request.fail_on_docstring_regression, + fail_on_api_break=request.fail_on_api_break, + fail_on_untested_hotspots=request.fail_on_untested_hotspots, + min_typing_coverage=request.min_typing_coverage, + min_docstring_coverage=request.min_docstring_coverage, + coverage_min=request.coverage_min, + fail_on_new=request.fail_on_new, + fail_threshold=request.fail_threshold, + ), + baseline_status=str( + self._as_mapping( + self._as_mapping(record.report_document.get("meta")).get("baseline") + ).get("status", "") + ), + metrics_diff=record.metrics_diff, + clone_new_count=len(record.new_func) + len(record.new_block), + clone_total=record.func_clones_count + record.block_clones_count, + ) def get_report_section( self, @@ -1636,21 +1630,23 @@ def list_findings( changed_paths=paths_filter, exclude_reviewed=exclude_reviewed, ) - total = len(filtered) - normalized_offset = max(0, offset) - items = filtered[normalized_offset : normalized_offset + normalized_limit] - next_offset = normalized_offset + len(items) + page = paginate( + filtered, + offset=offset, + limit=normalized_limit, + max_limit=200, + ) return { "run_id": self._short_run_id(record.run_id), "detail_level": validated_detail, "sort_by": validated_sort, "changed_paths": list(paths_filter), - "offset": normalized_offset, - "limit": normalized_limit, - "returned": len(items), - "total": total, - "next_offset": next_offset if next_offset < total else None, - "items": items, + "offset": page.offset, + "limit": page.limit, + "returned": len(page.items), + "total": page.total, + "next_offset": page.next_offset, + "items": page.items, } def get_finding( @@ -1678,6 +1674,19 @@ def get_finding( f"'{self._short_run_id(record.run_id)}'." ) + def _service_get_finding( + self, + *, + finding_id: str, + run_id: str | None = None, + detail_level: DetailLevel = "normal", + ) -> dict[str, object]: + return self.get_finding( + finding_id=finding_id, + run_id=run_id, + detail_level=detail_level, + ) + def get_remediation( self, *, @@ -1691,7 +1700,7 @@ def get_remediation( ) record = self._runs.get(run_id) canonical_id = self._resolve_canonical_finding_id(record, finding_id) - finding = self.get_finding( + finding = self._service_get_finding( finding_id=canonical_id, run_id=record.run_id, detail_level="full", @@ -1946,7 +1955,7 @@ def mark_finding_reviewed( ) -> dict[str, object]: record = self._runs.get(run_id) canonical_id = self._resolve_canonical_finding_id(record, finding_id) - self.get_finding( + self._service_get_finding( finding_id=canonical_id, run_id=record.run_id, detail_level="normal", @@ -1978,7 +1987,10 @@ def list_reviewed_findings( items = [] for finding_id, note in review_items: try: - finding = self.get_finding(finding_id=finding_id, run_id=record.run_id) + finding = self._service_get_finding( + finding_id=finding_id, + run_id=record.run_id, + ) except MCPFindingNotFoundError: continue items.append( @@ -2303,7 +2315,10 @@ def _render_resource(self, record: MCPRunRecord, suffix: str) -> str: if suffix.startswith(finding_prefix): finding_id = suffix[len(finding_prefix) :] return _json_text_payload( - self.get_finding(run_id=record.run_id, finding_id=finding_id) + self._service_get_finding( + run_id=record.run_id, + finding_id=finding_id, + ) ) raise MCPServiceContractError( f"Unsupported CodeClone resource suffix '{suffix}'." @@ -2420,7 +2435,7 @@ def _summary_health_payload( @staticmethod def _short_run_id(run_id: str) -> str: - return run_id[:_SHORT_RUN_ID_LENGTH] + return short_id(run_id, length=_SHORT_RUN_ID_LENGTH) def _finding_id_maps( self, @@ -2436,15 +2451,15 @@ def _finding_id_maps( for canonical_id in canonical_ids } grouped: dict[str, list[str]] = {} - for canonical_id, short_id in base_ids.items(): - grouped.setdefault(short_id, []).append(canonical_id) + for canonical_id, short_name in base_ids.items(): + grouped.setdefault(short_name, []).append(canonical_id) canonical_to_short: dict[str, str] = {} short_to_canonical: dict[str, str] = {} - for short_id, group in grouped.items(): + for short_name, group in grouped.items(): if len(group) == 1: canonical_id = group[0] - canonical_to_short[canonical_id] = short_id - short_to_canonical[short_id] = canonical_id + canonical_to_short[canonical_id] = short_name + short_to_canonical[short_name] = canonical_id continue disambiguated_ids = self._disambiguated_short_finding_ids(group) for canonical_id, disambiguated in disambiguated_ids.items(): @@ -2492,9 +2507,11 @@ def _resolve_canonical_finding_id( finding_id: str, ) -> str: canonical_to_short, short_to_canonical = self._finding_id_maps(record) - if finding_id in canonical_to_short: - return finding_id - canonical = short_to_canonical.get(finding_id) + canonical = resolve_finding_id( + canonical_to_short=canonical_to_short, + short_to_canonical=short_to_canonical, + finding_id=finding_id, + ) if canonical is not None: return canonical raise MCPFindingNotFoundError( @@ -4586,8 +4603,6 @@ def _metrics_detail_payload( "summary": summary, "_hint": "Use family and/or path parameters to access per-item detail.", } - normalized_offset = max(0, offset) - normalized_limit = max(1, min(limit, 200)) family_names: Sequence[str] = ( (family,) if family is not None else tuple(sorted(families)) ) @@ -4614,16 +4629,16 @@ def _metrics_detail_payload( _as_int(item.get("start_line", 0), 0), ) ) - page = items[normalized_offset : normalized_offset + normalized_limit] + page = paginate(items, offset=offset, limit=limit, max_limit=200) return { "family": family, "path": normalized_path or None, - "offset": normalized_offset, - "limit": normalized_limit, - "returned": len(page), - "total": len(items), - "has_more": normalized_offset + len(page) < len(items), - "items": page, + "offset": page.offset, + "limit": page.limit, + "returned": len(page.items), + "total": page.total, + "has_more": page.next_offset is not None, + "items": page.items, } def _metric_item_matches_path( @@ -4666,49 +4681,8 @@ def _compact_metrics_item( def _metrics_diff_payload( metrics_diff: MetricsDiff | None, ) -> dict[str, object] | None: - if metrics_diff is None: - return None - new_high_risk_functions = tuple( - cast(Sequence[str], getattr(metrics_diff, "new_high_risk_functions", ())) - ) - new_high_coupling_classes = tuple( - cast(Sequence[str], getattr(metrics_diff, "new_high_coupling_classes", ())) - ) - new_cycles = tuple( - cast(Sequence[object], getattr(metrics_diff, "new_cycles", ())) - ) - new_dead_code = tuple( - cast(Sequence[str], getattr(metrics_diff, "new_dead_code", ())) - ) - health_delta = getattr(metrics_diff, "health_delta", 0) - return { - "new_high_risk_functions": len(new_high_risk_functions), - "new_high_coupling_classes": len(new_high_coupling_classes), - "new_cycles": len(new_cycles), - "new_dead_code": len(new_dead_code), - "health_delta": _as_int(health_delta, 0), - "typing_param_permille_delta": _as_int( - getattr(metrics_diff, "typing_param_permille_delta", 0), - 0, - ), - "typing_return_permille_delta": _as_int( - getattr(metrics_diff, "typing_return_permille_delta", 0), - 0, - ), - "docstring_permille_delta": _as_int( - getattr(metrics_diff, "docstring_permille_delta", 0), - 0, - ), - "api_breaking_changes": len( - tuple( - cast( - Sequence[object], - getattr(metrics_diff, "new_api_breaking_changes", ()), - ) - ) - ), - "new_api_symbols": len(tuple(getattr(metrics_diff, "new_api_symbols", ()))), - } + payload = _summarize_metrics_diff(metrics_diff) + return dict(payload) if payload is not None else None def _dict_list(self, value: object) -> list[dict[str, object]]: return [dict(self._as_mapping(item)) for item in self._as_sequence(value)] diff --git a/codeclone/surfaces/mcp/tools/__init__.py b/codeclone/surfaces/mcp/tools/__init__.py new file mode 100644 index 0000000..fa68bb9 --- /dev/null +++ b/codeclone/surfaces/mcp/tools/__init__.py @@ -0,0 +1,33 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from ._base import MCPTool +from .analyze import TOOLS as ANALYZE_TOOLS +from .checks import TOOLS as CHECK_TOOLS +from .compare import TOOLS as COMPARE_TOOLS +from .findings import TOOLS as FINDING_TOOLS +from .gates import TOOLS as GATE_TOOLS +from .help import TOOLS as HELP_TOOLS +from .hotspots import TOOLS as HOTSPOT_TOOLS +from .pr import TOOLS as PR_TOOLS +from .report_section import TOOLS as REPORT_SECTION_TOOLS +from .runs import TOOLS as RUN_TOOLS + +MCP_TOOLS: tuple[MCPTool, ...] = ( + *ANALYZE_TOOLS, + *RUN_TOOLS, + *FINDING_TOOLS, + *CHECK_TOOLS, + *HOTSPOT_TOOLS, + *REPORT_SECTION_TOOLS, + *COMPARE_TOOLS, + *GATE_TOOLS, + *PR_TOOLS, + *HELP_TOOLS, +) + +MCP_TOOLS_BY_NAME: dict[str, MCPTool] = {tool.name: tool for tool in MCP_TOOLS} diff --git a/codeclone/surfaces/mcp/tools/_base.py b/codeclone/surfaces/mcp/tools/_base.py new file mode 100644 index 0000000..64c8a51 --- /dev/null +++ b/codeclone/surfaces/mcp/tools/_base.py @@ -0,0 +1,44 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Callable, Mapping +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Protocol, cast + +if TYPE_CHECKING: + from ..session import MCPSession + + +@dataclass(frozen=True, slots=True) +class MCPToolSchema: + title: str + description: str = "" + + +class MCPTool(Protocol): + @property + def name(self) -> str: ... + + @property + def schema(self) -> MCPToolSchema: ... + + def run(self, session: MCPSession, params: Mapping[str, object]) -> object: ... + + +@dataclass(frozen=True, slots=True) +class SimpleMCPTool: + name: str + schema: MCPToolSchema + runner: Callable[[MCPSession, Mapping[str, object]], object] + + def run(self, session: MCPSession, params: Mapping[str, object]) -> object: + return self.runner(session, params) + + +def run_kw(bound: object, params: Mapping[str, object]) -> object: + return cast("Any", bound)(**dict(params)) diff --git a/codeclone/surfaces/mcp/tools/analyze.py b/codeclone/surfaces/mcp/tools/analyze.py new file mode 100644 index 0000000..4ebe7aa --- /dev/null +++ b/codeclone/surfaces/mcp/tools/analyze.py @@ -0,0 +1,28 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from typing import cast + +from ..session import MCPAnalysisRequest +from ._base import MCPToolSchema, SimpleMCPTool + +TOOLS = ( + SimpleMCPTool( + name="analyze_repository", + schema=MCPToolSchema(title="Analyze Repository"), + runner=lambda session, params: session.analyze_repository( + cast("MCPAnalysisRequest", params["request"]) + ), + ), + SimpleMCPTool( + name="analyze_changed_paths", + schema=MCPToolSchema(title="Analyze Changed Paths"), + runner=lambda session, params: session.analyze_changed_paths( + cast("MCPAnalysisRequest", params["request"]) + ), + ), +) diff --git a/codeclone/surfaces/mcp/tools/checks.py b/codeclone/surfaces/mcp/tools/checks.py new file mode 100644 index 0000000..cfbc772 --- /dev/null +++ b/codeclone/surfaces/mcp/tools/checks.py @@ -0,0 +1,36 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from ._base import MCPToolSchema, SimpleMCPTool, run_kw + +TOOLS = ( + SimpleMCPTool( + name="check_complexity", + schema=MCPToolSchema(title="Check Complexity"), + runner=lambda session, params: run_kw(session.check_complexity, params), + ), + SimpleMCPTool( + name="check_clones", + schema=MCPToolSchema(title="Check Clones"), + runner=lambda session, params: run_kw(session.check_clones, params), + ), + SimpleMCPTool( + name="check_coupling", + schema=MCPToolSchema(title="Check Coupling"), + runner=lambda session, params: run_kw(session.check_coupling, params), + ), + SimpleMCPTool( + name="check_cohesion", + schema=MCPToolSchema(title="Check Cohesion"), + runner=lambda session, params: run_kw(session.check_cohesion, params), + ), + SimpleMCPTool( + name="check_dead_code", + schema=MCPToolSchema(title="Check Dead Code"), + runner=lambda session, params: run_kw(session.check_dead_code, params), + ), +) diff --git a/codeclone/surfaces/mcp/tools/compare.py b/codeclone/surfaces/mcp/tools/compare.py new file mode 100644 index 0000000..7967800 --- /dev/null +++ b/codeclone/surfaces/mcp/tools/compare.py @@ -0,0 +1,16 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from ._base import MCPToolSchema, SimpleMCPTool, run_kw + +TOOLS = ( + SimpleMCPTool( + name="compare_runs", + schema=MCPToolSchema(title="Compare Runs"), + runner=lambda session, params: run_kw(session.compare_runs, params), + ), +) diff --git a/codeclone/surfaces/mcp/tools/findings.py b/codeclone/surfaces/mcp/tools/findings.py new file mode 100644 index 0000000..0897afd --- /dev/null +++ b/codeclone/surfaces/mcp/tools/findings.py @@ -0,0 +1,36 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from ._base import MCPToolSchema, SimpleMCPTool, run_kw + +TOOLS = ( + SimpleMCPTool( + name="list_findings", + schema=MCPToolSchema(title="List Findings"), + runner=lambda session, params: run_kw(session.list_findings, params), + ), + SimpleMCPTool( + name="get_finding", + schema=MCPToolSchema(title="Get Finding"), + runner=lambda session, params: run_kw(session.get_finding, params), + ), + SimpleMCPTool( + name="get_remediation", + schema=MCPToolSchema(title="Get Remediation"), + runner=lambda session, params: run_kw(session.get_remediation, params), + ), + SimpleMCPTool( + name="mark_finding_reviewed", + schema=MCPToolSchema(title="Mark Finding Reviewed"), + runner=lambda session, params: run_kw(session.mark_finding_reviewed, params), + ), + SimpleMCPTool( + name="list_reviewed_findings", + schema=MCPToolSchema(title="List Reviewed Findings"), + runner=lambda session, params: run_kw(session.list_reviewed_findings, params), + ), +) diff --git a/codeclone/surfaces/mcp/tools/gates.py b/codeclone/surfaces/mcp/tools/gates.py new file mode 100644 index 0000000..78c6255 --- /dev/null +++ b/codeclone/surfaces/mcp/tools/gates.py @@ -0,0 +1,21 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from typing import cast + +from ..session import MCPGateRequest +from ._base import MCPToolSchema, SimpleMCPTool + +TOOLS = ( + SimpleMCPTool( + name="evaluate_gates", + schema=MCPToolSchema(title="Evaluate Gates"), + runner=lambda session, params: session.evaluate_gates( + cast("MCPGateRequest", params["request"]) + ), + ), +) diff --git a/codeclone/surfaces/mcp/tools/help.py b/codeclone/surfaces/mcp/tools/help.py new file mode 100644 index 0000000..2b64784 --- /dev/null +++ b/codeclone/surfaces/mcp/tools/help.py @@ -0,0 +1,16 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from ._base import MCPToolSchema, SimpleMCPTool, run_kw + +TOOLS = ( + SimpleMCPTool( + name="help", + schema=MCPToolSchema(title="Help"), + runner=lambda session, params: run_kw(session.get_help, params), + ), +) diff --git a/codeclone/surfaces/mcp/tools/hotspots.py b/codeclone/surfaces/mcp/tools/hotspots.py new file mode 100644 index 0000000..2b95dfa --- /dev/null +++ b/codeclone/surfaces/mcp/tools/hotspots.py @@ -0,0 +1,21 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from ._base import MCPToolSchema, SimpleMCPTool, run_kw + +TOOLS = ( + SimpleMCPTool( + name="list_hotspots", + schema=MCPToolSchema(title="List Hotspots"), + runner=lambda session, params: run_kw(session.list_hotspots, params), + ), + SimpleMCPTool( + name="get_production_triage", + schema=MCPToolSchema(title="Get Production Triage"), + runner=lambda session, params: run_kw(session.get_production_triage, params), + ), +) diff --git a/codeclone/surfaces/mcp/tools/pr.py b/codeclone/surfaces/mcp/tools/pr.py new file mode 100644 index 0000000..8561072 --- /dev/null +++ b/codeclone/surfaces/mcp/tools/pr.py @@ -0,0 +1,16 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from ._base import MCPToolSchema, SimpleMCPTool, run_kw + +TOOLS = ( + SimpleMCPTool( + name="generate_pr_summary", + schema=MCPToolSchema(title="Generate PR Summary"), + runner=lambda session, params: run_kw(session.generate_pr_summary, params), + ), +) diff --git a/codeclone/surfaces/mcp/tools/report_section.py b/codeclone/surfaces/mcp/tools/report_section.py new file mode 100644 index 0000000..e53f50b --- /dev/null +++ b/codeclone/surfaces/mcp/tools/report_section.py @@ -0,0 +1,16 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from ._base import MCPToolSchema, SimpleMCPTool, run_kw + +TOOLS = ( + SimpleMCPTool( + name="get_report_section", + schema=MCPToolSchema(title="Get Report Section"), + runner=lambda session, params: run_kw(session.get_report_section, params), + ), +) diff --git a/codeclone/surfaces/mcp/tools/runs.py b/codeclone/surfaces/mcp/tools/runs.py new file mode 100644 index 0000000..ae6caa5 --- /dev/null +++ b/codeclone/surfaces/mcp/tools/runs.py @@ -0,0 +1,25 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 + +from __future__ import annotations + +from typing import cast + +from ._base import MCPToolSchema, SimpleMCPTool + +TOOLS = ( + SimpleMCPTool( + name="get_run_summary", + schema=MCPToolSchema(title="Get Run Summary"), + runner=lambda session, params: session.get_run_summary( + cast("str | None", params.get("run_id")) + ), + ), + SimpleMCPTool( + name="clear_session_runs", + schema=MCPToolSchema(title="Clear Session Runs"), + runner=lambda session, _params: session.clear_session_runs(), + ), +) diff --git a/codeclone/ui_messages.py b/codeclone/ui_messages/__init__.py similarity index 99% rename from codeclone/ui_messages.py rename to codeclone/ui_messages/__init__.py index 998fb52..a945dde 100644 --- a/codeclone/ui_messages.py +++ b/codeclone/ui_messages/__init__.py @@ -12,9 +12,9 @@ import traceback from pathlib import Path -from . import __version__ -from .contracts import ISSUES_URL -from .domain.quality import ( +from .. import __version__ +from ..contracts import ISSUES_URL +from ..domain.quality import ( HEALTH_GRADE_A, HEALTH_GRADE_B, HEALTH_GRADE_C, diff --git a/codeclone/_html_report/__init__.py b/codeclone/utils/__init__.py similarity index 69% rename from codeclone/_html_report/__init__.py rename to codeclone/utils/__init__.py index 69b89c1..b7eef7e 100644 --- a/codeclone/_html_report/__init__.py +++ b/codeclone/utils/__init__.py @@ -4,10 +4,8 @@ # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy -"""New HTML report package — component-based architecture.""" +"""Shared internal utility helpers.""" from __future__ import annotations -from ._assemble import build_html_report - -__all__ = ["build_html_report"] +__all__ = ["coerce", "git_diff", "json_io", "schema_validation"] diff --git a/codeclone/_coerce.py b/codeclone/utils/coerce.py similarity index 100% rename from codeclone/_coerce.py rename to codeclone/utils/coerce.py diff --git a/codeclone/_git_diff.py b/codeclone/utils/git_diff.py similarity index 100% rename from codeclone/_git_diff.py rename to codeclone/utils/git_diff.py diff --git a/codeclone/_json_io.py b/codeclone/utils/json_io.py similarity index 100% rename from codeclone/_json_io.py rename to codeclone/utils/json_io.py diff --git a/codeclone/_schema_validation.py b/codeclone/utils/schema_validation.py similarity index 95% rename from codeclone/_schema_validation.py rename to codeclone/utils/schema_validation.py index e90404f..8233eeb 100644 --- a/codeclone/_schema_validation.py +++ b/codeclone/utils/schema_validation.py @@ -8,7 +8,7 @@ from typing import TYPE_CHECKING -from .errors import BaselineValidationError +from ..contracts.errors import BaselineValidationError if TYPE_CHECKING: from collections.abc import Mapping, Set diff --git a/pyproject.toml b/pyproject.toml index fa7abdd..c38c1ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "codeclone" -version = "2.0.0b5" +version = "2.0.0b6" description = "Structural code quality analysis for Python" readme = { file = "README.md", content-type = "text/markdown" } license = "MPL-2.0 AND MIT" @@ -76,17 +76,43 @@ dev = [ ] [project.scripts] -codeclone = "codeclone.cli:main" -codeclone-mcp = "codeclone.mcp_server:main" +codeclone = "codeclone.main:main" +codeclone-mcp = "codeclone.surfaces.mcp.server:main" [tool.setuptools] packages = [ "codeclone", - "codeclone._html_report", - "codeclone._html_report._sections", + "codeclone.analysis", + "codeclone.baseline", + "codeclone.blocks", + "codeclone.cache", + "codeclone.config", + "codeclone.contracts", + "codeclone.core", + "codeclone.findings", + "codeclone.findings.clones", + "codeclone.findings.structural", "codeclone.domain", + "codeclone.meta_markers", "codeclone.metrics", + "codeclone.paths", + "codeclone.qualnames", "codeclone.report", + "codeclone.report.document", + "codeclone.report.gates", + "codeclone.report.html", + "codeclone.report.html.assets", + "codeclone.report.html.primitives", + "codeclone.report.html.sections", + "codeclone.report.html.widgets", + "codeclone.report.renderers", + "codeclone.scanner", + "codeclone.surfaces", + "codeclone.surfaces.cli", + "codeclone.surfaces.mcp", + "codeclone.surfaces.mcp.tools", + "codeclone.ui_messages", + "codeclone.utils", ] [tool.setuptools.package-data] @@ -107,7 +133,7 @@ min_stmt = 4 fail_on_new = true fail_cycles = true fail_dead_code = true -fail_health = 87 +fail_health = 80 fail_on_new_metrics = true api_surface = false golden_fixture_paths = ["tests/fixtures/golden_*"] @@ -138,7 +164,8 @@ select = ["E", "F", "W", "I", "B", "UP", "SIM", "C4", "PIE", "PERF", "RUF"] [tool.ruff.lint.per-file-ignores] "codeclone/_html_css.py" = ["E501"] "codeclone/_html_js.py" = ["E501"] -"codeclone/_html_report/_sections/*.py" = ["E501"] +"codeclone/report/html/assets/*.py" = ["E501"] +"codeclone/report/html/sections/*.py" = ["E501"] [tool.ruff.format] quote-style = "double" diff --git a/scripts/gen_options_doc.py b/scripts/gen_options_doc.py new file mode 100644 index 0000000..51787ca --- /dev/null +++ b/scripts/gen_options_doc.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +from codeclone.config.spec import OPTIONS + + +def _default_repr(value: object) -> str: + if value is None: + return "`None`" + if isinstance(value, tuple): + return "`()`" if not value else f"`{list(value)!r}`" + if isinstance(value, str): + return f"`{value}`" + return f"`{value}`" + + +def main() -> int: + print("| Group | CLI | Pyproject | Default | Help |") + print("| --- | --- | --- | --- | --- |") + for option in OPTIONS: + if option.flags: + cli = ", ".join(option.flags) + elif option.cli_kind == "positional": + cli = "(positional)" + else: + cli = "-" + pyproject = option.pyproject_key or "-" + default = _default_repr(option.default) if option.has_default else "-" + help_text = (option.help_text or "").replace("\n", " ") + print( + f"| {option.group or '-'} | `{cli}` | `{pyproject}` | " + f"{default} | {help_text} |" + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/_ast_metrics_helpers.py b/tests/_ast_metrics_helpers.py index cd75a99..fe5221d 100644 --- a/tests/_ast_metrics_helpers.py +++ b/tests/_ast_metrics_helpers.py @@ -8,7 +8,7 @@ import ast -from codeclone import extractor +from codeclone.analysis import _module_walk as module_walk_mod from codeclone.qualnames import QualnameCollector @@ -20,7 +20,7 @@ def tree_collector_and_imports( tree = ast.parse(source) collector = QualnameCollector() collector.visit(tree) - walk = extractor._collect_module_walk_data( + walk = module_walk_mod._collect_module_walk_data( tree=tree, module_name=module_name, collector=collector, diff --git a/tests/_contract_snapshots.py b/tests/_contract_snapshots.py new file mode 100644 index 0000000..b6842fb --- /dev/null +++ b/tests/_contract_snapshots.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +import json +from pathlib import Path + +_CONTRACT_SNAPSHOT_ROOT = ( + Path(__file__).resolve().parent / "fixtures" / "contract_snapshots" +) + + +def load_json_snapshot(name: str) -> object: + path = _CONTRACT_SNAPSHOT_ROOT / name + return json.loads(path.read_text(encoding="utf-8")) + + +def load_text_snapshot(name: str) -> str: + path = _CONTRACT_SNAPSHOT_ROOT / name + return path.read_text(encoding="utf-8").replace("\r\n", "\n") diff --git a/tests/_import_graph.py b/tests/_import_graph.py new file mode 100644 index 0000000..186941d --- /dev/null +++ b/tests/_import_graph.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +import ast +from pathlib import Path + + +def _module_name_from_path(path: Path) -> str: + parts = list(path.with_suffix("").parts) + return ".".join(parts) + + +def _resolve_import(module_name: str, node: ast.ImportFrom) -> str: + if node.level == 0: + return node.module or "" + + parts = module_name.split(".") + prefix_parts = parts[: -node.level] + if node.module: + return ".".join([*prefix_parts, node.module]) + return ".".join(prefix_parts) + + +def _iter_local_imports(module_name: str, source: str) -> list[str]: + tree = ast.parse(source) + imports: list[str] = [] + for node in ast.walk(tree): + if isinstance(node, ast.Import): + imports.extend(alias.name for alias in node.names) + elif isinstance(node, ast.ImportFrom): + imports.append(_resolve_import(module_name, node)) + return [name for name in imports if name.startswith("codeclone")] diff --git a/tests/fixtures/contract_snapshots/cli_help.txt b/tests/fixtures/contract_snapshots/cli_help.txt new file mode 100644 index 0000000..5f5de73 --- /dev/null +++ b/tests/fixtures/contract_snapshots/cli_help.txt @@ -0,0 +1,197 @@ +usage: codeclone [--min-loc MIN_LOC] [--min-stmt MIN_STMT] + [--processes PROCESSES] [--changed-only | --no-changed-only] + [--diff-against GIT_REF] [--paths-from-git-diff GIT_REF] + [--cache-path [FILE]] [--cache-dir [FILE]] + [--max-cache-size-mb MB] [--baseline [FILE]] + [--max-baseline-size-mb MB] + [--update-baseline | --no-update-baseline] + [--metrics-baseline [FILE]] + [--update-metrics-baseline | --no-update-metrics-baseline] + [--ci | --no-ci] [--api-surface | --no-api-surface] + [--coverage FILE] [--fail-on-new | --no-fail-on-new] + [--fail-on-new-metrics | --no-fail-on-new-metrics] + [--fail-threshold MAX_CLONES] [--fail-complexity [CC_MAX]] + [--fail-coupling [CBO_MAX]] [--fail-cohesion [LCOM4_MAX]] + [--fail-cycles | --no-fail-cycles] + [--fail-dead-code | --no-fail-dead-code] + [--fail-health [SCORE_MIN]] + [--fail-on-typing-regression | --no-fail-on-typing-regression] + [--fail-on-docstring-regression | --no-fail-on-docstring-regression] + [--fail-on-api-break | --no-fail-on-api-break] + [--fail-on-untested-hotspots | --no-fail-on-untested-hotspots] + [--min-typing-coverage PERCENT] + [--min-docstring-coverage PERCENT] [--coverage-min PERCENT] + [--skip-metrics | --no-skip-metrics] + [--skip-dead-code | --no-skip-dead-code] + [--skip-dependencies | --no-skip-dependencies] + [--html [FILE]] [--json [FILE]] [--md [FILE]] + [--sarif [FILE]] [--text [FILE]] + [--timestamped-report-paths | --no-timestamped-report-paths] + [--open-html-report | --no-open-html-report] [--no-progress] + [--progress] [--no-color] [--color] [--quiet | --no-quiet] + [--verbose | --no-verbose] [--debug | --no-debug] [-h] + [--version] + [root] + +Structural code quality analysis for Python. + +Target: + root Project root directory to scan. + Defaults to the current directory. + +Analysis: + --min-loc MIN_LOC Minimum Lines of Code (LOC) required for clone analysis. + Default: 10. + --min-stmt MIN_STMT Minimum AST statement count required for clone analysis. + Default: 6. + --processes PROCESSES + Number of parallel worker processes. + Default: 4. + --changed-only, --no-changed-only + Limit clone gating and changed-scope summaries to findings that touch + files from a git diff selection. + --diff-against GIT_REF + Resolve changed files from `git diff --name-only `. + Use together with --changed-only. + --paths-from-git-diff GIT_REF + Shorthand for --changed-only using `git diff --name-only `. + Useful for PR and CI review flows. + --cache-path [FILE] Path to the cache file. + If FILE is omitted, uses /.cache/codeclone/cache.json. + --cache-dir [FILE] Legacy alias for --cache-path. + Prefer --cache-path in new configurations. + --max-cache-size-mb MB + Maximum cache file size in MB. + Default: 50. + +Baselines and CI: + --baseline [FILE] Path to the clone baseline. + If FILE is omitted, uses codeclone.baseline.json. + --max-baseline-size-mb MB + Maximum allowed baseline size in MB. + Default: 5. + --update-baseline, --no-update-baseline + Overwrite the clone baseline with current results. + Disabled by default. + --metrics-baseline [FILE] + Path to the metrics baseline. + If FILE is omitted, uses codeclone.baseline.json. + --update-metrics-baseline, --no-update-metrics-baseline + Overwrite the metrics baseline with current metrics. + Disabled by default. + --ci, --no-ci Enable CI preset. + Equivalent to: --fail-on-new --no-color --quiet. + When a trusted metrics baseline is available, CI mode also enables + metrics regression gating. + --api-surface, --no-api-surface + Collect public API surface facts for baseline-aware compatibility review. + Disabled by default. + --coverage FILE Join external Cobertura XML line coverage to function spans. + Pass a `coverage xml` report path. + +Quality gates: + --fail-on-new, --no-fail-on-new + Exit with code 3 if NEW clone findings not present in the baseline + are detected. + --fail-on-new-metrics, --no-fail-on-new-metrics + Exit with code 3 if new metrics violations appear relative to the + metrics baseline. + --fail-threshold MAX_CLONES + Exit with code 3 if the total number of function + block clone groups + exceeds this value. + Disabled unless set. + --fail-complexity [CC_MAX] + Exit with code 3 if any function exceeds the cyclomatic complexity + threshold. + If enabled without a value, uses 20. + --fail-coupling [CBO_MAX] + Exit with code 3 if any class exceeds the coupling threshold. + If enabled without a value, uses 10. + --fail-cohesion [LCOM4_MAX] + Exit with code 3 if any class exceeds the cohesion threshold. + If enabled without a value, uses 4. + --fail-cycles, --no-fail-cycles + Exit with code 3 if circular module dependencies are detected. + --fail-dead-code, --no-fail-dead-code + Exit with code 3 if high-confidence dead code is detected. + --fail-health [SCORE_MIN] + Exit with code 3 if the overall health score falls below the threshold. + If enabled without a value, uses 60. + --fail-on-typing-regression, --no-fail-on-typing-regression + Exit with code 3 if typing adoption coverage regresses relative to the + metrics baseline. + --fail-on-docstring-regression, --no-fail-on-docstring-regression + Exit with code 3 if public docstring coverage regresses relative to the + metrics baseline. + --fail-on-api-break, --no-fail-on-api-break + Exit with code 3 if public API removals or signature breaks are detected + relative to the metrics baseline. + --fail-on-untested-hotspots, --no-fail-on-untested-hotspots + Exit with code 3 if medium/high-risk functions measured by Coverage Join + fall below the joined coverage threshold. + Requires --coverage. + --min-typing-coverage PERCENT + Exit with code 3 if parameter typing coverage falls below the threshold. + Threshold is a whole percent from 0 to 100. + --min-docstring-coverage PERCENT + Exit with code 3 if public docstring coverage falls below the threshold. + Threshold is a whole percent from 0 to 100. + --coverage-min PERCENT + Coverage threshold for untested hotspot detection. + Threshold is a whole percent from 0 to 100. + Default: 50. + +Analysis stages: + --skip-metrics, --no-skip-metrics + Skip full metrics analysis and run in clone-only mode. + --skip-dead-code, --no-skip-dead-code + Skip dead code detection. + --skip-dependencies, --no-skip-dependencies + Skip dependency graph analysis. + +Reporting: + --html [FILE] Generate an HTML report. + If FILE is omitted, writes to .cache/codeclone/report.html. + --json [FILE] Generate the canonical JSON report. + If FILE is omitted, writes to .cache/codeclone/report.json. + --md [FILE] Generate a Markdown report. + If FILE is omitted, writes to .cache/codeclone/report.md. + --sarif [FILE] Generate a SARIF 2.1.0 report. + If FILE is omitted, writes to .cache/codeclone/report.sarif. + --text [FILE] Generate a plain-text report. + If FILE is omitted, writes to .cache/codeclone/report.txt. + --timestamped-report-paths, --no-timestamped-report-paths + Append a UTC timestamp to default report filenames. + Applies only to report flags passed without FILE. + +Output and UI: + --open-html-report, --no-open-html-report + Open the generated HTML report in the default browser. + Requires --html. + --no-progress Disable progress output. + Recommended for CI logs. + --progress Force-enable progress output. + --no-color Disable ANSI colors. + --color Force-enable ANSI colors. + --quiet, --no-quiet Reduce output to warnings, errors, and essential summaries. + --verbose, --no-verbose + Include detailed identifiers for NEW clone findings. + --debug, --no-debug Print debug details for internal errors, including traceback and + environment information. + +General: + -h, --help Show this help message and exit. + --version Print the CodeClone version and exit. + +Exit codes: + 0 Success. + 2 Contract error: untrusted or invalid baseline, invalid output + configuration, incompatible versions, or unreadable sources in + CI/gating mode. + 3 Gating failure: new clones, threshold violations, or metrics + quality gate failures. + 5 Internal error: unexpected exception. + +Repository: https://github.com/orenlab/codeclone +Issues: https://github.com/orenlab/codeclone/issues +Docs: https://orenlab.github.io/codeclone/ diff --git a/tests/fixtures/contract_snapshots/mcp_tool_schemas.json b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json new file mode 100644 index 0000000..4cf9e3b --- /dev/null +++ b/tests/fixtures/contract_snapshots/mcp_tool_schemas.json @@ -0,0 +1,1541 @@ +[ + { + "input_schema": { + "properties": { + "analysis_mode": { + "default": "full", + "title": "Analysis Mode", + "type": "string" + }, + "api_surface": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Api Surface" + }, + "baseline_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Baseline Path" + }, + "block_min_loc": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Block Min Loc" + }, + "block_min_stmt": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Block Min Stmt" + }, + "cache_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cache Path" + }, + "cache_policy": { + "default": "reuse", + "title": "Cache Policy", + "type": "string" + }, + "changed_paths": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Changed Paths" + }, + "cohesion_threshold": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cohesion Threshold" + }, + "complexity_threshold": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Complexity Threshold" + }, + "coupling_threshold": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Coupling Threshold" + }, + "coverage_min": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Coverage Min" + }, + "coverage_xml": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Coverage Xml" + }, + "git_diff_ref": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Git Diff Ref" + }, + "max_baseline_size_mb": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Baseline Size Mb" + }, + "max_cache_size_mb": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Cache Size Mb" + }, + "metrics_baseline_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Metrics Baseline Path" + }, + "min_loc": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Min Loc" + }, + "min_stmt": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Min Stmt" + }, + "processes": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Processes" + }, + "respect_pyproject": { + "default": true, + "title": "Respect Pyproject", + "type": "boolean" + }, + "root": { + "title": "Root", + "type": "string" + }, + "segment_min_loc": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Segment Min Loc" + }, + "segment_min_stmt": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Segment Min Stmt" + } + }, + "required": [ + "root" + ], + "title": "analyze_changed_pathsArguments", + "type": "object" + }, + "name": "analyze_changed_paths" + }, + { + "input_schema": { + "properties": { + "analysis_mode": { + "default": "full", + "title": "Analysis Mode", + "type": "string" + }, + "api_surface": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Api Surface" + }, + "baseline_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Baseline Path" + }, + "block_min_loc": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Block Min Loc" + }, + "block_min_stmt": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Block Min Stmt" + }, + "cache_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cache Path" + }, + "cache_policy": { + "default": "reuse", + "title": "Cache Policy", + "type": "string" + }, + "changed_paths": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Changed Paths" + }, + "cohesion_threshold": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cohesion Threshold" + }, + "complexity_threshold": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Complexity Threshold" + }, + "coupling_threshold": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Coupling Threshold" + }, + "coverage_min": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Coverage Min" + }, + "coverage_xml": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Coverage Xml" + }, + "git_diff_ref": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Git Diff Ref" + }, + "max_baseline_size_mb": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Baseline Size Mb" + }, + "max_cache_size_mb": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Cache Size Mb" + }, + "metrics_baseline_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Metrics Baseline Path" + }, + "min_loc": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Min Loc" + }, + "min_stmt": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Min Stmt" + }, + "processes": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Processes" + }, + "respect_pyproject": { + "default": true, + "title": "Respect Pyproject", + "type": "boolean" + }, + "root": { + "title": "Root", + "type": "string" + }, + "segment_min_loc": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Segment Min Loc" + }, + "segment_min_stmt": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Segment Min Stmt" + } + }, + "required": [ + "root" + ], + "title": "analyze_repositoryArguments", + "type": "object" + }, + "name": "analyze_repository" + }, + { + "input_schema": { + "properties": { + "clone_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Clone Type" + }, + "detail_level": { + "default": "summary", + "title": "Detail Level", + "type": "string" + }, + "max_results": { + "default": 10, + "title": "Max Results", + "type": "integer" + }, + "path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Path" + }, + "root": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Root" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + }, + "source_kind": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Source Kind" + } + }, + "title": "check_clonesArguments", + "type": "object" + }, + "name": "check_clones" + }, + { + "input_schema": { + "properties": { + "detail_level": { + "default": "summary", + "title": "Detail Level", + "type": "string" + }, + "max_results": { + "default": 10, + "title": "Max Results", + "type": "integer" + }, + "path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Path" + }, + "root": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Root" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "title": "check_cohesionArguments", + "type": "object" + }, + "name": "check_cohesion" + }, + { + "input_schema": { + "properties": { + "detail_level": { + "default": "summary", + "title": "Detail Level", + "type": "string" + }, + "max_results": { + "default": 10, + "title": "Max Results", + "type": "integer" + }, + "min_complexity": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Min Complexity" + }, + "path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Path" + }, + "root": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Root" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "title": "check_complexityArguments", + "type": "object" + }, + "name": "check_complexity" + }, + { + "input_schema": { + "properties": { + "detail_level": { + "default": "summary", + "title": "Detail Level", + "type": "string" + }, + "max_results": { + "default": 10, + "title": "Max Results", + "type": "integer" + }, + "path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Path" + }, + "root": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Root" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "title": "check_couplingArguments", + "type": "object" + }, + "name": "check_coupling" + }, + { + "input_schema": { + "properties": { + "detail_level": { + "default": "normal", + "title": "Detail Level", + "type": "string" + }, + "max_results": { + "default": 10, + "title": "Max Results", + "type": "integer" + }, + "min_severity": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Min Severity" + }, + "path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Path" + }, + "root": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Root" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "title": "check_dead_codeArguments", + "type": "object" + }, + "name": "check_dead_code" + }, + { + "input_schema": { + "properties": {}, + "title": "clear_session_runsArguments", + "type": "object" + }, + "name": "clear_session_runs" + }, + { + "input_schema": { + "properties": { + "focus": { + "default": "all", + "title": "Focus", + "type": "string" + }, + "run_id_after": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id After" + }, + "run_id_before": { + "title": "Run Id Before", + "type": "string" + } + }, + "required": [ + "run_id_before" + ], + "title": "compare_runsArguments", + "type": "object" + }, + "name": "compare_runs" + }, + { + "input_schema": { + "properties": { + "coverage_min": { + "default": 50, + "title": "Coverage Min", + "type": "integer" + }, + "fail_cohesion": { + "default": -1, + "title": "Fail Cohesion", + "type": "integer" + }, + "fail_complexity": { + "default": -1, + "title": "Fail Complexity", + "type": "integer" + }, + "fail_coupling": { + "default": -1, + "title": "Fail Coupling", + "type": "integer" + }, + "fail_cycles": { + "default": false, + "title": "Fail Cycles", + "type": "boolean" + }, + "fail_dead_code": { + "default": false, + "title": "Fail Dead Code", + "type": "boolean" + }, + "fail_health": { + "default": -1, + "title": "Fail Health", + "type": "integer" + }, + "fail_on_api_break": { + "default": false, + "title": "Fail On Api Break", + "type": "boolean" + }, + "fail_on_docstring_regression": { + "default": false, + "title": "Fail On Docstring Regression", + "type": "boolean" + }, + "fail_on_new": { + "default": false, + "title": "Fail On New", + "type": "boolean" + }, + "fail_on_new_metrics": { + "default": false, + "title": "Fail On New Metrics", + "type": "boolean" + }, + "fail_on_typing_regression": { + "default": false, + "title": "Fail On Typing Regression", + "type": "boolean" + }, + "fail_on_untested_hotspots": { + "default": false, + "title": "Fail On Untested Hotspots", + "type": "boolean" + }, + "fail_threshold": { + "default": -1, + "title": "Fail Threshold", + "type": "integer" + }, + "min_docstring_coverage": { + "default": -1, + "title": "Min Docstring Coverage", + "type": "integer" + }, + "min_typing_coverage": { + "default": -1, + "title": "Min Typing Coverage", + "type": "integer" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "title": "evaluate_gatesArguments", + "type": "object" + }, + "name": "evaluate_gates" + }, + { + "input_schema": { + "properties": { + "changed_paths": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Changed Paths" + }, + "format": { + "default": "markdown", + "title": "Format", + "type": "string" + }, + "git_diff_ref": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Git Diff Ref" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "title": "generate_pr_summaryArguments", + "type": "object" + }, + "name": "generate_pr_summary" + }, + { + "input_schema": { + "properties": { + "detail_level": { + "default": "normal", + "title": "Detail Level", + "type": "string" + }, + "finding_id": { + "title": "Finding Id", + "type": "string" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "required": [ + "finding_id" + ], + "title": "get_findingArguments", + "type": "object" + }, + "name": "get_finding" + }, + { + "input_schema": { + "properties": { + "max_hotspots": { + "default": 3, + "title": "Max Hotspots", + "type": "integer" + }, + "max_suggestions": { + "default": 3, + "title": "Max Suggestions", + "type": "integer" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "title": "get_production_triageArguments", + "type": "object" + }, + "name": "get_production_triage" + }, + { + "input_schema": { + "properties": { + "detail_level": { + "default": "normal", + "title": "Detail Level", + "type": "string" + }, + "finding_id": { + "title": "Finding Id", + "type": "string" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "required": [ + "finding_id" + ], + "title": "get_remediationArguments", + "type": "object" + }, + "name": "get_remediation" + }, + { + "input_schema": { + "properties": { + "family": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Family" + }, + "limit": { + "default": 50, + "title": "Limit", + "type": "integer" + }, + "offset": { + "default": 0, + "title": "Offset", + "type": "integer" + }, + "path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Path" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + }, + "section": { + "default": "all", + "title": "Section", + "type": "string" + } + }, + "title": "get_report_sectionArguments", + "type": "object" + }, + "name": "get_report_section" + }, + { + "input_schema": { + "properties": { + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "title": "get_run_summaryArguments", + "type": "object" + }, + "name": "get_run_summary" + }, + { + "input_schema": { + "properties": { + "detail": { + "default": "compact", + "title": "Detail", + "type": "string" + }, + "topic": { + "title": "Topic", + "type": "string" + } + }, + "required": [ + "topic" + ], + "title": "helpArguments", + "type": "object" + }, + "name": "help" + }, + { + "input_schema": { + "properties": { + "category": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Category" + }, + "changed_paths": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Changed Paths" + }, + "detail_level": { + "default": "summary", + "title": "Detail Level", + "type": "string" + }, + "exclude_reviewed": { + "default": false, + "title": "Exclude Reviewed", + "type": "boolean" + }, + "family": { + "default": "all", + "title": "Family", + "type": "string" + }, + "git_diff_ref": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Git Diff Ref" + }, + "limit": { + "default": 50, + "title": "Limit", + "type": "integer" + }, + "max_results": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Results" + }, + "novelty": { + "default": "all", + "title": "Novelty", + "type": "string" + }, + "offset": { + "default": 0, + "title": "Offset", + "type": "integer" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + }, + "severity": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Severity" + }, + "sort_by": { + "default": "default", + "title": "Sort By", + "type": "string" + }, + "source_kind": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Source Kind" + } + }, + "title": "list_findingsArguments", + "type": "object" + }, + "name": "list_findings" + }, + { + "input_schema": { + "properties": { + "changed_paths": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Changed Paths" + }, + "detail_level": { + "default": "summary", + "title": "Detail Level", + "type": "string" + }, + "exclude_reviewed": { + "default": false, + "title": "Exclude Reviewed", + "type": "boolean" + }, + "git_diff_ref": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Git Diff Ref" + }, + "kind": { + "title": "Kind", + "type": "string" + }, + "limit": { + "default": 10, + "title": "Limit", + "type": "integer" + }, + "max_results": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Results" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "required": [ + "kind" + ], + "title": "list_hotspotsArguments", + "type": "object" + }, + "name": "list_hotspots" + }, + { + "input_schema": { + "properties": { + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "title": "list_reviewed_findingsArguments", + "type": "object" + }, + "name": "list_reviewed_findings" + }, + { + "input_schema": { + "properties": { + "finding_id": { + "title": "Finding Id", + "type": "string" + }, + "note": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Note" + }, + "run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Run Id" + } + }, + "required": [ + "finding_id" + ], + "title": "mark_finding_reviewedArguments", + "type": "object" + }, + "name": "mark_finding_reviewed" + } +] diff --git a/tests/fixtures/contract_snapshots/public_api_surface.json b/tests/fixtures/contract_snapshots/public_api_surface.json new file mode 100644 index 0000000..4cb866f --- /dev/null +++ b/tests/fixtures/contract_snapshots/public_api_surface.json @@ -0,0 +1,99 @@ +{ + "main_exports": [ + "main" + ], + "main_signature": "() -> 'None'", + "codeclone_exports": [ + "__version__" + ], + "mcp_service_public_methods": [ + { + "name": "analyze_changed_paths", + "signature": "(self, request: 'MCPAnalysisRequest') -> 'dict[str, object]'" + }, + { + "name": "analyze_repository", + "signature": "(self, request: 'MCPAnalysisRequest') -> 'dict[str, object]'" + }, + { + "name": "check_clones", + "signature": "(self, *, run_id: 'str | None' = None, root: 'str | None' = None, path: 'str | None' = None, clone_type: 'str | None' = None, source_kind: 'str | None' = None, max_results: 'int' = 10, detail_level: 'DetailLevel' = 'summary') -> 'dict[str, object]'" + }, + { + "name": "check_cohesion", + "signature": "(self, *, run_id: 'str | None' = None, root: 'str | None' = None, path: 'str | None' = None, max_results: 'int' = 10, detail_level: 'DetailLevel' = 'summary') -> 'dict[str, object]'" + }, + { + "name": "check_complexity", + "signature": "(self, *, run_id: 'str | None' = None, root: 'str | None' = None, path: 'str | None' = None, min_complexity: 'int | None' = None, max_results: 'int' = 10, detail_level: 'DetailLevel' = 'summary') -> 'dict[str, object]'" + }, + { + "name": "check_coupling", + "signature": "(self, *, run_id: 'str | None' = None, root: 'str | None' = None, path: 'str | None' = None, max_results: 'int' = 10, detail_level: 'DetailLevel' = 'summary') -> 'dict[str, object]'" + }, + { + "name": "check_dead_code", + "signature": "(self, *, run_id: 'str | None' = None, root: 'str | None' = None, path: 'str | None' = None, min_severity: 'str | None' = None, max_results: 'int' = 10, detail_level: 'DetailLevel' = 'summary') -> 'dict[str, object]'" + }, + { + "name": "clear_session_runs", + "signature": "(self) -> 'dict[str, object]'" + }, + { + "name": "compare_runs", + "signature": "(self, *, run_id_before: 'str', run_id_after: 'str | None' = None, focus: 'ComparisonFocus' = 'all') -> 'dict[str, object]'" + }, + { + "name": "evaluate_gates", + "signature": "(self, request: 'MCPGateRequest') -> 'dict[str, object]'" + }, + { + "name": "generate_pr_summary", + "signature": "(self, *, run_id: 'str | None' = None, changed_paths: 'Sequence[str]' = (), git_diff_ref: 'str | None' = None, format: 'PRSummaryFormat' = 'markdown') -> 'dict[str, object]'" + }, + { + "name": "get_finding", + "signature": "(self, *, finding_id: 'str', run_id: 'str | None' = None, detail_level: 'DetailLevel' = 'normal') -> 'dict[str, object]'" + }, + { + "name": "get_help", + "signature": "(self, *, topic: 'HelpTopic', detail: 'HelpDetail' = 'compact') -> 'dict[str, object]'" + }, + { + "name": "get_production_triage", + "signature": "(self, *, run_id: 'str | None' = None, max_hotspots: 'int' = 3, max_suggestions: 'int' = 3) -> 'dict[str, object]'" + }, + { + "name": "get_remediation", + "signature": "(self, *, finding_id: 'str', run_id: 'str | None' = None, detail_level: 'DetailLevel' = 'normal') -> 'dict[str, object]'" + }, + { + "name": "get_report_section", + "signature": "(self, *, run_id: 'str | None' = None, section: 'ReportSection' = 'all', family: 'MetricsDetailFamily | None' = None, path: 'str | None' = None, offset: 'int' = 0, limit: 'int' = 50) -> 'dict[str, object]'" + }, + { + "name": "get_run_summary", + "signature": "(self, run_id: 'str | None' = None) -> 'dict[str, object]'" + }, + { + "name": "list_findings", + "signature": "(self, *, run_id: 'str | None' = None, family: 'FindingFamilyFilter' = 'all', category: 'str | None' = None, severity: 'str | None' = None, source_kind: 'str | None' = None, novelty: 'FindingNoveltyFilter' = 'all', sort_by: 'FindingSort' = 'default', detail_level: 'DetailLevel' = 'summary', changed_paths: 'Sequence[str]' = (), git_diff_ref: 'str | None' = None, exclude_reviewed: 'bool' = False, offset: 'int' = 0, limit: 'int' = 50, max_results: 'int | None' = None) -> 'dict[str, object]'" + }, + { + "name": "list_hotspots", + "signature": "(self, *, kind: 'HotlistKind', run_id: 'str | None' = None, detail_level: 'DetailLevel' = 'summary', changed_paths: 'Sequence[str]' = (), git_diff_ref: 'str | None' = None, exclude_reviewed: 'bool' = False, limit: 'int' = 10, max_results: 'int | None' = None) -> 'dict[str, object]'" + }, + { + "name": "list_reviewed_findings", + "signature": "(self, *, run_id: 'str | None' = None) -> 'dict[str, object]'" + }, + { + "name": "mark_finding_reviewed", + "signature": "(self, *, finding_id: 'str', run_id: 'str | None' = None, note: 'str | None' = None) -> 'dict[str, object]'" + }, + { + "name": "read_resource", + "signature": "(self, uri: 'str') -> 'str'" + } + ] +} diff --git a/tests/test_architecture.py b/tests/test_architecture.py index fc71506..48b8a6a 100644 --- a/tests/test_architecture.py +++ b/tests/test_architecture.py @@ -6,24 +6,9 @@ from __future__ import annotations -import ast from pathlib import Path - -def _module_name_from_path(path: Path) -> str: - parts = list(path.with_suffix("").parts) - return ".".join(parts) - - -def _resolve_import(module_name: str, node: ast.ImportFrom) -> str: - if node.level == 0: - return node.module or "" - - parts = module_name.split(".") - prefix_parts = parts[: -node.level] - if node.module: - return ".".join([*prefix_parts, node.module]) - return ".".join(prefix_parts) +from tests._import_graph import _iter_local_imports, _module_name_from_path def _iter_codeclone_modules(root: Path) -> list[tuple[str, Path]]: @@ -33,19 +18,6 @@ def _iter_codeclone_modules(root: Path) -> list[tuple[str, Path]]: ] -def _iter_local_imports(module_name: str, source: str) -> list[str]: - tree = ast.parse(source) - imports: list[str] = [] - for node in ast.walk(tree): - if isinstance(node, ast.Import): - imports.extend(alias.name for alias in node.names) - elif isinstance(node, ast.ImportFrom): - imports.append(_resolve_import(module_name, node)) - return [ - import_name for import_name in imports if import_name.startswith("codeclone") - ] - - def _violates(import_name: str, forbidden_prefixes: tuple[str, ...]) -> bool: return any( import_name == prefix or import_name.startswith(prefix + ".") @@ -73,51 +45,101 @@ def test_architecture_layer_violations() -> None: "codeclone.report.", ( "codeclone.ui_messages", - "codeclone.html_report", - "codeclone.cli", + "codeclone.report.html", + "codeclone.surfaces.cli", "codeclone._html_", - "codeclone._html_report", + "codeclone.report.html", ), ), ( "codeclone.extractor", ( "codeclone.report", - "codeclone.cli", + "codeclone.surfaces.cli", "codeclone.baseline", ), ), ( "codeclone.grouping", ( - "codeclone.cli", + "codeclone.surfaces.cli", "codeclone.baseline", - "codeclone.html_report", + "codeclone.report.html", ), ), ( "codeclone.baseline", ( - "codeclone.cli", + "codeclone.surfaces.cli", "codeclone.ui_messages", - "codeclone.html_report", + "codeclone.report.html", ), ), ( "codeclone.cache", ( - "codeclone.cli", + "codeclone.surfaces.cli", "codeclone.ui_messages", - "codeclone.html_report", + "codeclone.report.html", + ), + ), + ( + "codeclone.core", + ( + "codeclone.surfaces", + "codeclone.config", + ), + ), + ( + "codeclone.analysis", + ( + "codeclone.report", + "codeclone.surfaces", + "codeclone.config", + ), + ), + ( + "codeclone.metrics", + ( + "codeclone.report.document", + "codeclone.report.renderers", + "codeclone.surfaces", + "codeclone.config", + ), + ), + ( + "codeclone.findings", + ( + "codeclone.report", + "codeclone.surfaces", + "codeclone.config", + ), + ), + ( + "codeclone.report.document", + ( + "codeclone.surfaces", + "codeclone.config", + ), + ), + ( + "codeclone.report.renderers", + ( + "codeclone.core", + "codeclone.analysis", + "codeclone.metrics", + "codeclone.findings", + "codeclone.surfaces", + "codeclone.config", ), ), ( "codeclone.domain.", ( - "codeclone.cli", + "codeclone.surfaces.cli", "codeclone.pipeline", "codeclone.report", - "codeclone.html_report", + "codeclone.report.html", "codeclone.ui_messages", "codeclone.baseline", "codeclone.cache", @@ -130,6 +152,10 @@ def test_architecture_layer_violations() -> None: for module_prefix, forbidden_prefixes in forbidden_by_module_prefix: if _matches_module_prefix(module_name, module_prefix): + if module_prefix == "codeclone.report." and module_name.startswith( + "codeclone.report.html" + ): + continue violations.extend( [ ( @@ -142,7 +168,7 @@ def test_architecture_layer_violations() -> None: ) if module_name == "codeclone.models": - allowed_prefixes = ("codeclone.contracts", "codeclone.errors") + allowed_prefixes = ("codeclone.contracts",) unexpected_imports = [ import_name for import_name in imports diff --git a/tests/test_baseline.py b/tests/test_baseline.py index 859b8c1..04bee4c 100644 --- a/tests/test_baseline.py +++ b/tests/test_baseline.py @@ -12,9 +12,11 @@ import pytest import codeclone.baseline as baseline_mod +import codeclone.baseline.clone_baseline as clone_baseline_mod +import codeclone.baseline.trust as baseline_trust_mod from codeclone.baseline import Baseline, BaselineStatus, coerce_baseline_status from codeclone.contracts import BASELINE_FINGERPRINT_VERSION, BASELINE_SCHEMA_VERSION -from codeclone.errors import BaselineValidationError +from codeclone.contracts.errors import BaselineValidationError def _python_tag() -> str: @@ -47,7 +49,7 @@ def _trusted_payload( created_at: str | None = "2026-02-08T11:43:16Z", generator_version: str = "1.4.0", ) -> dict[str, object]: - payload = baseline_mod._baseline_payload( + payload = clone_baseline_mod._baseline_payload( functions=set(functions or [_func_id()]), blocks=set(blocks or [_block_id()]), generator="codeclone", @@ -164,7 +166,7 @@ def test_baseline_load_too_large( ) -> None: baseline_path = tmp_path / "baseline.json" _write_payload(baseline_path, _trusted_payload()) - monkeypatch.setattr(baseline_mod, "MAX_BASELINE_SIZE_BYTES", 1) + monkeypatch.setattr(baseline_trust_mod, "MAX_BASELINE_SIZE_BYTES", 1) baseline = Baseline(baseline_path) with pytest.raises(BaselineValidationError, match="too large") as exc: baseline.load() @@ -576,13 +578,13 @@ def test_baseline_payload_fields_contract_invariant(tmp_path: Path) -> None: def test_baseline_hash_canonical_determinism() -> None: - hash_a = baseline_mod._compute_payload_sha256( + hash_a = baseline_trust_mod._compute_payload_sha256( functions={"a" * 40 + "|0-19", "b" * 40 + "|0-19"}, blocks={_block_id()}, fingerprint_version="1", python_tag="cp313", ) - hash_b = baseline_mod._compute_payload_sha256( + hash_b = baseline_trust_mod._compute_payload_sha256( functions={"b" * 40 + "|0-19", "a" * 40 + "|0-19"}, blocks={_block_id()}, fingerprint_version="1", @@ -803,7 +805,7 @@ def _boom_stat(self: Path) -> object: with pytest.raises( BaselineValidationError, match="Cannot stat baseline file" ) as exc: - baseline_mod._safe_stat_size(path) + baseline_trust_mod._safe_stat_size(path) assert exc.value.status == "invalid_type" @@ -818,10 +820,10 @@ def _boom_replace(src: str | Path, dst: str | Path) -> None: temp_holder["path"] = Path(src) raise OSError("replace failed") - monkeypatch.setattr("codeclone._json_io.os.replace", _boom_replace) + monkeypatch.setattr("codeclone.utils.json_io.os.replace", _boom_replace) with pytest.raises(OSError, match="replace failed"): - baseline_mod._atomic_write_json(path, _trusted_payload()) + clone_baseline_mod._atomic_write_json(path, _trusted_payload()) assert temp_holder["path"].exists() is False @@ -841,18 +843,18 @@ def _boom_read(self: Path, *_args: object, **_kwargs: object) -> str: with pytest.raises( BaselineValidationError, match="Cannot read baseline file" ) as exc: - baseline_mod._load_json_object(path) + baseline_trust_mod._load_json_object(path) assert exc.value.status == "invalid_json" def test_baseline_optional_str_paths(tmp_path: Path) -> None: path = tmp_path / "baseline.json" - assert baseline_mod._optional_str({}, "generator_version", path=path) is None + assert baseline_trust_mod._optional_str({}, "generator_version", path=path) is None with pytest.raises( BaselineValidationError, match="'generator_version' must be string", ) as exc: - baseline_mod._optional_str( + baseline_trust_mod._optional_str( {"generator_version": 1}, "generator_version", path=path, @@ -868,7 +870,7 @@ def test_baseline_require_utc_iso8601_z_rejects_invalid_calendar_date( BaselineValidationError, match="'created_at' must be UTC ISO-8601 with Z", ) as exc: - baseline_mod._require_utc_iso8601_z( + baseline_trust_mod._require_utc_iso8601_z( {"created_at": "2026-02-31T00:00:00Z"}, "created_at", path=path, @@ -894,7 +896,7 @@ def test_baseline_load_legacy_codeclone_version_alias(tmp_path: Path) -> None: def test_parse_generator_meta_string_legacy_alias(tmp_path: Path) -> None: path = tmp_path / "baseline.json" - name, version = baseline_mod._parse_generator_meta( + name, version = baseline_trust_mod._parse_generator_meta( { "generator": "codeclone", "codeclone_version": "1.4.0", @@ -907,7 +909,7 @@ def test_parse_generator_meta_string_legacy_alias(tmp_path: Path) -> None: def test_parse_generator_meta_string_prefers_generator_version(tmp_path: Path) -> None: path = tmp_path / "baseline.json" - name, version = baseline_mod._parse_generator_meta( + name, version = baseline_trust_mod._parse_generator_meta( { "generator": "codeclone", "generator_version": "1.4.2", @@ -921,7 +923,7 @@ def test_parse_generator_meta_string_prefers_generator_version(tmp_path: Path) - def test_parse_generator_meta_object_top_level_fallback(tmp_path: Path) -> None: path = tmp_path / "baseline.json" - name, version = baseline_mod._parse_generator_meta( + name, version = baseline_trust_mod._parse_generator_meta( { "generator": {"name": "codeclone"}, "generator_version": "1.4.1", @@ -937,7 +939,7 @@ def test_parse_generator_meta_rejects_extra_generator_keys(tmp_path: Path) -> No with pytest.raises( BaselineValidationError, match="unexpected generator keys" ) as exc: - baseline_mod._parse_generator_meta( + baseline_trust_mod._parse_generator_meta( {"generator": {"name": "codeclone", "version": "1.4.0", "extra": "x"}}, path=path, ) @@ -946,7 +948,11 @@ def test_parse_generator_meta_rejects_extra_generator_keys(tmp_path: Path) -> No def test_baseline_parse_semver_three_parts(tmp_path: Path) -> None: path = tmp_path / "baseline.json" - assert baseline_mod._parse_semver("1.2.3", key="schema_version", path=path) == ( + assert baseline_trust_mod._parse_semver( + "1.2.3", + key="schema_version", + path=path, + ) == ( 1, 2, 3, @@ -959,10 +965,10 @@ def test_baseline_require_sorted_unique_ids_non_string(tmp_path: Path) -> None: BaselineValidationError, match="'functions' must be list\\[str\\]", ) as exc: - baseline_mod._require_sorted_unique_ids( + baseline_trust_mod._require_sorted_unique_ids( {"functions": [1]}, "functions", - pattern=baseline_mod._FUNCTION_ID_RE, + pattern=clone_baseline_mod._FUNCTION_ID_RE, path=path, ) assert exc.value.status == "invalid_type" @@ -1050,7 +1056,12 @@ def test_baseline_save_preserves_embedded_metrics_without_hash(tmp_path: Path) - def test_preserve_embedded_metrics_variants(tmp_path: Path) -> None: path = tmp_path / "baseline.json" _write_payload(path, {"meta": {}, "clones": {"functions": [], "blocks": []}}) - assert baseline_mod._preserve_embedded_metrics(path) == (None, None, None, None) + assert clone_baseline_mod._preserve_embedded_metrics(path) == ( + None, + None, + None, + None, + ) _write_payload( path, @@ -1060,7 +1071,7 @@ def test_preserve_embedded_metrics_variants(tmp_path: Path) -> None: "metrics": {"x": 1}, }, ) - assert baseline_mod._preserve_embedded_metrics(path) == ( + assert clone_baseline_mod._preserve_embedded_metrics(path) == ( {"x": 1}, None, None, @@ -1075,7 +1086,7 @@ def test_preserve_embedded_metrics_variants(tmp_path: Path) -> None: "metrics": {"x": 2}, }, ) - assert baseline_mod._preserve_embedded_metrics(path) == ( + assert clone_baseline_mod._preserve_embedded_metrics(path) == ( {"x": 2}, None, None, @@ -1090,7 +1101,7 @@ def test_preserve_embedded_metrics_variants(tmp_path: Path) -> None: "metrics": {"x": 3}, }, ) - assert baseline_mod._preserve_embedded_metrics(path) == ( + assert clone_baseline_mod._preserve_embedded_metrics(path) == ( {"x": 3}, "a" * 64, None, @@ -1109,7 +1120,7 @@ def test_preserve_embedded_metrics_variants(tmp_path: Path) -> None: "api_surface": {"modules": [{"module": "pkg.mod"}]}, }, ) - assert baseline_mod._preserve_embedded_metrics(path) == ( + assert clone_baseline_mod._preserve_embedded_metrics(path) == ( {"x": 3}, "a" * 64, {"modules": [{"module": "pkg.mod"}]}, @@ -1133,9 +1144,9 @@ def _payload(**_kwargs: object) -> dict[str, object]: "clones": {"functions": [], "blocks": []}, } - monkeypatch.setattr(baseline_mod, "_baseline_payload", _payload) + monkeypatch.setattr(clone_baseline_mod, "_baseline_payload", _payload) monkeypatch.setattr( - baseline_mod, + clone_baseline_mod, "_preserve_embedded_metrics", lambda _path: ({"health_score": 1}, "a" * 64, None, None), ) @@ -1167,7 +1178,7 @@ def _payload(**_kwargs: object) -> dict[str, object]: "clones": {"functions": [], "blocks": []}, } - monkeypatch.setattr(baseline_mod, "_baseline_payload", _payload) + monkeypatch.setattr(clone_baseline_mod, "_baseline_payload", _payload) baseline.save() _assert_baseline_runtime_meta( @@ -1207,7 +1218,7 @@ def _payload(**_kwargs: object) -> dict[str, object]: "clones": {"functions": [], "blocks": []}, } - monkeypatch.setattr(baseline_mod, "_baseline_payload", _payload) + monkeypatch.setattr(clone_baseline_mod, "_baseline_payload", _payload) baseline.save() _assert_baseline_runtime_meta( @@ -1263,7 +1274,7 @@ def _payload(**_kwargs: object) -> dict[str, object]: "clones": {"functions": [], "blocks": []}, } - monkeypatch.setattr(baseline_mod, "_baseline_payload", _payload) + monkeypatch.setattr(clone_baseline_mod, "_baseline_payload", _payload) baseline.save() assert baseline.generator == "keep-generator" diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index c921cb9..2a9435e 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -12,6 +12,7 @@ BENCHMARK_NEUTRAL_ARGS, RunMeasurement, Scenario, + _timing_regressions, _validate_inventory_sample, ) @@ -33,6 +34,27 @@ def _measurement( ) +def _benchmark_payload( + *, + cold_full: float, + warm_full: float, + warm_clones_only: float, +) -> dict[str, object]: + def _scenario(name: str, median: float) -> dict[str, object]: + return { + "name": name, + "stats_seconds": {"median": median}, + } + + return { + "scenarios": [ + _scenario("cold_full", cold_full), + _scenario("warm_full", warm_full), + _scenario("warm_clones_only", warm_clones_only), + ] + } + + def test_benchmark_inventory_validation_accepts_valid_cold_and_warm_samples() -> None: _validate_inventory_sample( scenario=Scenario(name="cold_full", mode="cold", extra_args=()), @@ -90,3 +112,53 @@ def test_benchmark_inventory_validation_rejects_invalid_samples( scenario=scenario, measurement=measurement, ) + + +def test_benchmark_timing_regressions_accept_within_tolerance() -> None: + baseline = _benchmark_payload( + cold_full=1.0, + warm_full=0.30, + warm_clones_only=0.25, + ) + current = _benchmark_payload( + cold_full=1.04, + warm_full=0.31, + warm_clones_only=0.24, + ) + + assert ( + _timing_regressions( + current_payload=current, + baseline_payload=baseline, + max_regression_pct=5.0, + ) + == [] + ) + + +def test_benchmark_timing_regressions_report_excess_slowdown() -> None: + baseline = _benchmark_payload( + cold_full=1.0, + warm_full=0.30, + warm_clones_only=0.25, + ) + current = _benchmark_payload( + cold_full=1.07, + warm_full=0.32, + warm_clones_only=0.27, + ) + + regressions = _timing_regressions( + current_payload=current, + baseline_payload=baseline, + max_regression_pct=5.0, + ) + + assert regressions == [ + "cold_full: median 1.0700s exceeds baseline 1.0000s by 7.00% (allowed 5.00%)", + ( + "warm_clones_only: median 0.2700s exceeds baseline 0.2500s " + "by 8.00% (allowed 5.00%)" + ), + "warm_full: median 0.3200s exceeds baseline 0.3000s by 6.67% (allowed 5.00%)", + ] diff --git a/tests/test_blocks.py b/tests/test_blocks.py index a875635..64379d8 100644 --- a/tests/test_blocks.py +++ b/tests/test_blocks.py @@ -6,8 +6,8 @@ import ast +from codeclone.analysis.normalizer import NormalizationConfig from codeclone.blocks import extract_blocks -from codeclone.normalize import NormalizationConfig def test_extracts_non_overlapping_blocks() -> None: diff --git a/tests/test_cache.py b/tests/test_cache.py index b449f48..069dece 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -17,11 +17,19 @@ import codeclone.cache as cache_mod from codeclone.blocks import BlockUnit, SegmentUnit from codeclone.cache import Cache, CacheStatus -from codeclone.cache_io import sign_cache_payload -from codeclone.cache_paths import runtime_filepath_from_wire, wire_filepath_from_runtime -from codeclone.errors import CacheError -from codeclone.extractor import Unit -from codeclone.models import ApiParamSpec, FileMetrics, ModuleApiSurface, PublicSymbol +from codeclone.cache.integrity import sign_cache_payload +from codeclone.cache.projection import ( + runtime_filepath_from_wire, + wire_filepath_from_runtime, +) +from codeclone.contracts.errors import CacheError +from codeclone.models import ( + ApiParamSpec, + FileMetrics, + ModuleApiSurface, + PublicSymbol, + Unit, +) def _make_unit(filepath: str) -> Unit: diff --git a/tests/test_cfg.py b/tests/test_cfg.py index 28a423e..7ea6c74 100644 --- a/tests/test_cfg.py +++ b/tests/test_cfg.py @@ -9,12 +9,12 @@ import pytest -from codeclone.cfg import CFG, CFGBuilder -from codeclone.cfg_model import CFG as CFGModel -from codeclone.cfg_model import Block -from codeclone.extractor import _cfg_fingerprint_and_complexity +from codeclone.analysis.cfg import CFG, CFGBuilder +from codeclone.analysis.cfg_model import CFG as CFGModel +from codeclone.analysis.cfg_model import Block +from codeclone.analysis.fingerprint import _cfg_fingerprint_and_complexity +from codeclone.analysis.normalizer import NormalizationConfig from codeclone.meta_markers import CFG_META_PREFIX -from codeclone.normalize import NormalizationConfig from tests._ast_helpers import fix_missing_single_function diff --git a/tests/test_cfg_model.py b/tests/test_cfg_model.py index 36c4eee..f6cb564 100644 --- a/tests/test_cfg_model.py +++ b/tests/test_cfg_model.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy -from codeclone.cfg_model import CFG, Block +from codeclone.analysis.cfg_model import CFG, Block def test_block_hash_and_eq() -> None: diff --git a/tests/test_cli_config.py b/tests/test_cli_config.py index 365d5f1..790f7e6 100644 --- a/tests/test_cli_config.py +++ b/tests/test_cli_config.py @@ -13,8 +13,10 @@ import pytest -import codeclone._cli_config as cfg_mod -from codeclone._cli_config import ConfigValidationError +import codeclone.config.pyproject_loader as loader_mod +import codeclone.config.resolver as resolver_mod +import codeclone.config.spec as spec_mod +from codeclone.config.pyproject_loader import ConfigValidationError def _write_pyproject(path: Path, content: str) -> None: @@ -26,7 +28,7 @@ def test_collect_explicit_cli_dests_stops_on_double_dash() -> None: parser.add_argument("--min-loc", dest="min_loc", type=int, default=20) parser.add_argument("--quiet", action="store_true") parser.add_argument("--json", dest="json_out") - explicit = cfg_mod.collect_explicit_cli_dests( + explicit = resolver_mod.collect_explicit_cli_dests( parser, argv=("--min-loc=10", "--quiet", "--", "--json", "report.json"), ) @@ -34,7 +36,7 @@ def test_collect_explicit_cli_dests_stops_on_double_dash() -> None: def test_load_pyproject_config_missing_file_returns_empty(tmp_path: Path) -> None: - assert cfg_mod.load_pyproject_config(tmp_path) == {} + assert loader_mod.load_pyproject_config(tmp_path) == {} def test_load_pyproject_config_raises_on_loader_errors( @@ -46,19 +48,19 @@ def test_load_pyproject_config_raises_on_loader_errors( def _raise_oserror(_path: Path) -> object: raise OSError("denied") - monkeypatch.setattr(cfg_mod, "_load_toml", _raise_oserror) + monkeypatch.setattr(loader_mod, "_load_toml", _raise_oserror) with pytest.raises( ConfigValidationError, match=r"Cannot read pyproject\.toml", ): - cfg_mod.load_pyproject_config(tmp_path) + loader_mod.load_pyproject_config(tmp_path) def _raise_value_error(_path: Path) -> object: raise ValueError("broken") - monkeypatch.setattr(cfg_mod, "_load_toml", _raise_value_error) + monkeypatch.setattr(loader_mod, "_load_toml", _raise_value_error) with pytest.raises(ConfigValidationError, match="Invalid TOML"): - cfg_mod.load_pyproject_config(tmp_path) + loader_mod.load_pyproject_config(tmp_path) def test_load_pyproject_config_validates_tool_structure( @@ -67,31 +69,37 @@ def test_load_pyproject_config_validates_tool_structure( pyproject = tmp_path / "pyproject.toml" _write_pyproject(pyproject, "[tool]\n") - monkeypatch.setattr(cfg_mod, "_load_toml", lambda _path: []) + monkeypatch.setattr(loader_mod, "_load_toml", lambda _path: []) with pytest.raises(ConfigValidationError, match="root must be object"): - cfg_mod.load_pyproject_config(tmp_path) + loader_mod.load_pyproject_config(tmp_path) - monkeypatch.setattr(cfg_mod, "_load_toml", lambda _path: {"tool": "bad"}) + monkeypatch.setattr(loader_mod, "_load_toml", lambda _path: {"tool": "bad"}) with pytest.raises(ConfigValidationError, match="'tool' must be object"): - cfg_mod.load_pyproject_config(tmp_path) + loader_mod.load_pyproject_config(tmp_path) monkeypatch.setattr( - cfg_mod, "_load_toml", lambda _path: {"tool": {"codeclone": []}} + loader_mod, + "_load_toml", + lambda _path: {"tool": {"codeclone": []}}, ) with pytest.raises( ConfigValidationError, match=r"'tool\.codeclone' must be object", ): - cfg_mod.load_pyproject_config(tmp_path) + loader_mod.load_pyproject_config(tmp_path) - monkeypatch.setattr(cfg_mod, "_load_toml", lambda _path: {"tool": {}}) - assert cfg_mod.load_pyproject_config(tmp_path) == {} + monkeypatch.setattr(loader_mod, "_load_toml", lambda _path: {"tool": {}}) + assert loader_mod.load_pyproject_config(tmp_path) == {} - monkeypatch.setattr(cfg_mod, "_load_toml", lambda _path: {"tool": None}) - assert cfg_mod.load_pyproject_config(tmp_path) == {} + monkeypatch.setattr(loader_mod, "_load_toml", lambda _path: {"tool": None}) + assert loader_mod.load_pyproject_config(tmp_path) == {} - monkeypatch.setattr(cfg_mod, "_load_toml", lambda _path: {"tool": {"other": {}}}) - assert cfg_mod.load_pyproject_config(tmp_path) == {} + monkeypatch.setattr( + loader_mod, + "_load_toml", + lambda _path: {"tool": {"other": {}}}, + ) + assert loader_mod.load_pyproject_config(tmp_path) == {} def test_load_pyproject_config_unknown_key_rejected( @@ -100,12 +108,12 @@ def test_load_pyproject_config_unknown_key_rejected( pyproject = tmp_path / "pyproject.toml" _write_pyproject(pyproject, "[tool]\n") monkeypatch.setattr( - cfg_mod, + loader_mod, "_load_toml", lambda _path: {"tool": {"codeclone": {"unknown_option": 1}}}, ) with pytest.raises(ConfigValidationError, match="Unknown key\\(s\\)"): - cfg_mod.load_pyproject_config(tmp_path) + loader_mod.load_pyproject_config(tmp_path) def test_load_pyproject_config_normalizes_relative_and_absolute_paths( @@ -122,7 +130,7 @@ def test_load_pyproject_config_normalizes_relative_and_absolute_paths( sarif_out = "reports/report.sarif" """.strip(), ) - loaded = cfg_mod.load_pyproject_config(tmp_path) + loaded = loader_mod.load_pyproject_config(tmp_path) assert loaded["min_loc"] == 5 assert loaded["cache_path"] == str(tmp_path / ".cache/codeclone/cache.json") assert loaded["json_out"] == "/tmp/report.json" @@ -132,7 +140,7 @@ def test_load_pyproject_config_normalizes_relative_and_absolute_paths( def test_apply_pyproject_config_overrides_respects_explicit_cli_flags() -> None: args = argparse.Namespace(min_loc=10, quiet=False) - cfg_mod.apply_pyproject_config_overrides( + resolver_mod.apply_pyproject_config_overrides( args=args, config_values={"min_loc": 42, "quiet": True}, explicit_cli_dests={"quiet"}, @@ -158,7 +166,7 @@ def test_apply_pyproject_config_overrides_respects_explicit_cli_flags() -> None: def test_validate_config_value_accepts_expected_types( key: str, value: object, expected: object ) -> None: - assert cfg_mod._validate_config_value(key=key, value=value) == expected + assert loader_mod.validate_config_value(key=key, value=value) == expected @pytest.mark.parametrize( @@ -168,7 +176,11 @@ def test_validate_config_value_accepts_expected_types( ("update_baseline", "yes", "expected bool"), ("min_loc", True, "expected int"), ("baseline", 1, "expected str"), - ("golden_fixture_paths", "tests/fixtures/golden_*", "expected list\\[str\\]"), + ( + "golden_fixture_paths", + "tests/fixtures/golden_*", + "expected list\\[str\\]", + ), ( "golden_fixture_paths", ["tests/fixtures/golden_*", 1], @@ -181,24 +193,24 @@ def test_validate_config_value_rejects_invalid_types( key: str, value: object, error_fragment: str ) -> None: with pytest.raises(ConfigValidationError, match=error_fragment): - cfg_mod._validate_config_value(key=key, value=value) + loader_mod.validate_config_value(key=key, value=value) def test_validate_config_value_unsupported_spec_raises( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setitem( - cfg_mod._CONFIG_KEY_SPECS, + loader_mod.CONFIG_KEY_SPECS, "_unsupported", - cfg_mod._ConfigKeySpec(tuple), + spec_mod.ConfigKeySpec(tuple), ) with pytest.raises(ConfigValidationError, match="Unsupported config key spec"): - cfg_mod._validate_config_value(key="_unsupported", value=("x",)) + loader_mod.validate_config_value(key="_unsupported", value=("x",)) def test_normalize_path_config_value_behaviour(tmp_path: Path) -> None: assert ( - cfg_mod._normalize_path_config_value( + loader_mod.normalize_path_config_value( key="min_loc", value=10, root_path=tmp_path, @@ -206,20 +218,20 @@ def test_normalize_path_config_value_behaviour(tmp_path: Path) -> None: == 10 ) assert ( - cfg_mod._normalize_path_config_value( + loader_mod.normalize_path_config_value( key="cache_path", value=123, root_path=tmp_path, ) == 123 ) - assert cfg_mod._normalize_path_config_value( + assert loader_mod.normalize_path_config_value( key="cache_path", value="relative/cache.json", root_path=tmp_path, ) == str(tmp_path / "relative/cache.json") assert ( - cfg_mod._normalize_path_config_value( + loader_mod.normalize_path_config_value( key="cache_path", value="/tmp/absolute-cache.json", root_path=tmp_path, @@ -228,7 +240,7 @@ def test_normalize_path_config_value_behaviour(tmp_path: Path) -> None: ) patterns = ("tests/fixtures/golden_*",) assert ( - cfg_mod._normalize_path_config_value( + loader_mod.normalize_path_config_value( key="golden_fixture_paths", value=patterns, root_path=tmp_path, @@ -248,7 +260,7 @@ def test_load_pyproject_config_accepts_golden_fixture_paths(tmp_path: Path) -> N ] """.strip(), ) - loaded = cfg_mod.load_pyproject_config(tmp_path) + loaded = loader_mod.load_pyproject_config(tmp_path) assert loaded["golden_fixture_paths"] == ("tests/fixtures/golden_*",) @@ -257,18 +269,18 @@ def test_load_toml_py310_missing_tomli_raises( ) -> None: toml_path = tmp_path / "pyproject.toml" _write_pyproject(toml_path, "[tool]\n") - monkeypatch.setattr(cfg_mod, "sys", SimpleNamespace(version_info=(3, 10, 14))) + monkeypatch.setattr(loader_mod, "sys", SimpleNamespace(version_info=(3, 10, 14))) def _raise_module_not_found(_name: str) -> object: raise ModuleNotFoundError("tomli") monkeypatch.setattr( - cfg_mod, + loader_mod, "importlib", SimpleNamespace(import_module=_raise_module_not_found), ) with pytest.raises(ConfigValidationError, match="requires dependency 'tomli'"): - cfg_mod._load_toml(toml_path) + loader_mod._load_toml(toml_path) def test_load_toml_py310_invalid_tomli_module_raises( @@ -276,14 +288,14 @@ def test_load_toml_py310_invalid_tomli_module_raises( ) -> None: toml_path = tmp_path / "pyproject.toml" _write_pyproject(toml_path, "[tool]\n") - monkeypatch.setattr(cfg_mod, "sys", SimpleNamespace(version_info=(3, 10, 14))) + monkeypatch.setattr(loader_mod, "sys", SimpleNamespace(version_info=(3, 10, 14))) monkeypatch.setattr( - cfg_mod, + loader_mod, "importlib", SimpleNamespace(import_module=lambda _name: object()), ) with pytest.raises(ConfigValidationError, match="missing callable 'load'"): - cfg_mod._load_toml(toml_path) + loader_mod._load_toml(toml_path) def test_load_toml_py310_uses_tomli_load( @@ -291,7 +303,7 @@ def test_load_toml_py310_uses_tomli_load( ) -> None: toml_path = tmp_path / "pyproject.toml" _write_pyproject(toml_path, "[tool]\n") - monkeypatch.setattr(cfg_mod, "sys", SimpleNamespace(version_info=(3, 10, 14))) + monkeypatch.setattr(loader_mod, "sys", SimpleNamespace(version_info=(3, 10, 14))) class _FakeTomli: @staticmethod @@ -301,8 +313,8 @@ def load(file_obj: Any) -> dict[str, object]: return {"tool": {}} monkeypatch.setattr( - cfg_mod, + loader_mod, "importlib", SimpleNamespace(import_module=lambda _name: _FakeTomli), ) - assert cfg_mod._load_toml(toml_path) == {"tool": {}} + assert loader_mod._load_toml(toml_path) == {"tool": {}} diff --git a/tests/test_cli_help_snapshot.py b/tests/test_cli_help_snapshot.py new file mode 100644 index 0000000..ce9e818 --- /dev/null +++ b/tests/test_cli_help_snapshot.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path + +from tests._contract_snapshots import load_text_snapshot + + +def test_cli_help_snapshot() -> None: + root_dir = Path(__file__).resolve().parents[1] + env = os.environ.copy() + env["PYTHONPATH"] = str(root_dir) + os.pathsep + env.get("PYTHONPATH", "") + result = subprocess.run( + [sys.executable, "-m", "codeclone.main", "--help"], + capture_output=True, + text=True, + env=env, + check=False, + ) + + assert result.returncode == 0 + assert result.stderr == "" + assert result.stdout.replace("\r\n", "\n") == load_text_snapshot("cli_help.txt") diff --git a/tests/test_cli_inprocess.py b/tests/test_cli_inprocess.py index c7f2f30..4ae574d 100644 --- a/tests/test_cli_inprocess.py +++ b/tests/test_cli_inprocess.py @@ -16,12 +16,17 @@ import pytest -import codeclone._cli_meta as cli_meta -import codeclone._cli_reports as cli_reports import codeclone.baseline as baseline -import codeclone.pipeline as pipeline -from codeclone import __version__, cli -from codeclone._cli_gating import parse_metric_reason_entry +import codeclone.baseline.trust as baseline_trust +import codeclone.core as pipeline +import codeclone.core.discovery as core_discovery +import codeclone.core.parallelism as core_parallelism +import codeclone.core.pipeline as core_pipeline +import codeclone.core.worker as core_worker +import codeclone.surfaces.cli.main as cli +import codeclone.surfaces.cli.report_meta as cli_meta +import codeclone.surfaces.cli.reports_output as cli_reports +from codeclone import __version__ from codeclone.cache import Cache, file_stat_signature from codeclone.contracts import ( BASELINE_FINGERPRINT_VERSION, @@ -29,8 +34,9 @@ CACHE_VERSION, REPORT_SCHEMA_VERSION, ) -from codeclone.errors import CacheError +from codeclone.contracts.errors import CacheError from codeclone.models import Unit +from codeclone.report.gates.reasons import parse_metric_reason_entry from tests._assertions import ( assert_contains_all, assert_mapping_entries, @@ -167,8 +173,8 @@ def _patch_dummy_progress(monkeypatch: pytest.MonkeyPatch) -> None: def _patch_parallel(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _DummyExecutor) - monkeypatch.setattr(pipeline, "as_completed", lambda futures: futures) + monkeypatch.setattr(core_parallelism, "ProcessPoolExecutor", _DummyExecutor) + monkeypatch.setattr(core_parallelism, "as_completed", lambda futures: futures) def _run_main(monkeypatch: pytest.MonkeyPatch, args: Iterable[str]) -> None: @@ -261,11 +267,11 @@ def _patch_fixed_executor( monkeypatch: pytest.MonkeyPatch, future: _FixedFuture ) -> None: monkeypatch.setattr( - pipeline, + core_parallelism, "ProcessPoolExecutor", lambda *args, **kwargs: _FixedExecutor(future), ) - monkeypatch.setattr(pipeline, "as_completed", lambda futures: futures) + monkeypatch.setattr(core_parallelism, "as_completed", lambda futures: futures) def _baseline_payload( @@ -309,7 +315,7 @@ def _baseline_payload( and isinstance(meta_python_tag, str) and payload_sha256 is None ): - hash_value = baseline._compute_payload_sha256( + hash_value = baseline_trust._compute_payload_sha256( functions=set(function_list), blocks=set(block_list), fingerprint_version=meta_fingerprint, @@ -554,8 +560,8 @@ def __exit__( if not no_progress: _patch_dummy_progress(monkeypatch) - monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _FailExec) - monkeypatch.setattr(pipeline, "process_file", _boom) + monkeypatch.setattr(core_parallelism, "ProcessPoolExecutor", _FailExec) + monkeypatch.setattr(core_worker, "process_file", _boom) args = [str(tmp_path)] if no_progress: args.append("--no-progress") @@ -821,7 +827,7 @@ def test_cli_cache_not_shared_between_projects( legacy_cache.parent.mkdir(parents=True, exist_ok=True) legacy_cache.write_text("{}", "utf-8") - monkeypatch.setattr(pipeline, "iter_py_files", lambda _root: []) + monkeypatch.setattr(core_discovery, "iter_py_files", lambda _root: []) _patch_parallel(monkeypatch) _run_main(monkeypatch, [str(root2), "--no-progress"]) out = capsys.readouterr().out @@ -1040,7 +1046,7 @@ def test_cli_main_progress_fallback( for idx in range(pipeline._parallel_min_files(2) + 1): src = tmp_path / f"a{idx}.py" src.write_text("def f():\n return 1\n", "utf-8") - monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _FailingExecutor) + monkeypatch.setattr(core_parallelism, "ProcessPoolExecutor", _FailingExecutor) _run_main(monkeypatch, [str(tmp_path), "--processes", "2"]) out = capsys.readouterr().out assert "falling back to sequential" in out @@ -1054,7 +1060,7 @@ def test_cli_main_no_progress_fallback( for idx in range(pipeline._parallel_min_files(2) + 1): src = tmp_path / f"a{idx}.py" src.write_text("def f():\n return 1\n", "utf-8") - monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _FailingExecutor) + monkeypatch.setattr(core_parallelism, "ProcessPoolExecutor", _FailingExecutor) _run_main(monkeypatch, [str(tmp_path), "--processes", "2", "--no-progress"]) out = capsys.readouterr().out assert "falling back to sequential" in out @@ -1071,7 +1077,7 @@ def test_cli_main_no_progress_fallback_quiet( tmp_path / "baseline.json", python_version=f"{sys.version_info.major}.{sys.version_info.minor}", ) - monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _FailingExecutor) + monkeypatch.setattr(core_parallelism, "ProcessPoolExecutor", _FailingExecutor) _run_main( monkeypatch, [ @@ -1129,7 +1135,7 @@ def _boom(*_args: object, **_kwargs: object) -> object: raise RuntimeError("boom") _patch_parallel(monkeypatch) - monkeypatch.setattr(pipeline, "build_groups", _boom) + monkeypatch.setattr(core_pipeline, "build_groups", _boom) with pytest.raises(SystemExit) as exc: _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) assert exc.value.code == 5 @@ -3067,7 +3073,7 @@ def test_cli_discovery_skip_oserror( def _bad_stat(_path: str) -> dict[str, int]: raise OSError("nope") - monkeypatch.setattr(pipeline, "file_stat_signature", _bad_stat) + monkeypatch.setattr(core_discovery, "file_stat_signature", _bad_stat) _patch_parallel(monkeypatch) args = [str(tmp_path), *extra_args] if "--ci" in extra_args: @@ -3106,7 +3112,7 @@ def _source_read_error( ) -> cli.ProcessingResult: return _source_read_error_result(fp) - monkeypatch.setattr(pipeline, "process_file", _source_read_error) + monkeypatch.setattr(core_worker, "process_file", _source_read_error) _run_parallel_main( monkeypatch, [ @@ -3140,7 +3146,7 @@ def _source_read_error( ) -> cli.ProcessingResult: return _source_read_error_result(fp) - monkeypatch.setattr(pipeline, "process_file", _source_read_error) + monkeypatch.setattr(core_worker, "process_file", _source_read_error) _patch_parallel(monkeypatch) with pytest.raises(SystemExit) as exc: _run_main( @@ -3204,7 +3210,7 @@ def _diff( ) -> tuple[set[str], set[str]]: return {"f1"}, set() - monkeypatch.setattr(pipeline, "process_file", _source_read_error) + monkeypatch.setattr(core_worker, "process_file", _source_read_error) monkeypatch.setattr(baseline.Baseline, "diff", _diff) _patch_parallel(monkeypatch) with pytest.raises(SystemExit) as exc: @@ -3244,7 +3250,7 @@ def _source_read_error( ) -> cli.ProcessingResult: return _source_read_error_result(fp) - monkeypatch.setattr(pipeline, "process_file", _source_read_error) + monkeypatch.setattr(core_worker, "process_file", _source_read_error) _patch_parallel(monkeypatch) with pytest.raises(SystemExit) as exc: _run_main( @@ -3664,7 +3670,7 @@ def test_cli_scan_failed_is_internal_error( def _boom(_root: str) -> Iterable[str]: raise RuntimeError("scan failed") - monkeypatch.setattr(pipeline, "iter_py_files", _boom) + monkeypatch.setattr(core_discovery, "iter_py_files", _boom) with pytest.raises(SystemExit) as exc: _run_main(monkeypatch, [str(tmp_path)]) assert exc.value.code == 5 @@ -3680,7 +3686,7 @@ def test_cli_scan_oserror_is_contract_error( def _boom(_root: str) -> Iterable[str]: raise OSError("scan denied") - monkeypatch.setattr(pipeline, "iter_py_files", _boom) + monkeypatch.setattr(core_discovery, "iter_py_files", _boom) with pytest.raises(SystemExit) as exc: _run_main(monkeypatch, [str(tmp_path)]) assert exc.value.code == 2 @@ -3702,7 +3708,7 @@ def _bad_process( ) -> cli.ProcessingResult: return cli.ProcessingResult(filepath=_fp, success=False, error="bad") - monkeypatch.setattr(pipeline, "process_file", _bad_process) + monkeypatch.setattr(core_worker, "process_file", _bad_process) _patch_parallel(monkeypatch) _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) out = capsys.readouterr().out @@ -3723,7 +3729,7 @@ def _bad_process( ) -> cli.ProcessingResult: return cli.ProcessingResult(filepath=_fp, success=False, error="bad") - monkeypatch.setattr(pipeline, "process_file", _bad_process) + monkeypatch.setattr(core_worker, "process_file", _bad_process) _patch_parallel(monkeypatch) _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) out = capsys.readouterr().out @@ -3742,7 +3748,7 @@ def test_cli_worker_failed( def _boom(*_args: object, **_kwargs: object) -> cli.ProcessingResult: raise RuntimeError("boom") - monkeypatch.setattr(pipeline, "process_file", _boom) + monkeypatch.setattr(core_worker, "process_file", _boom) _patch_parallel(monkeypatch) with pytest.raises(SystemExit) as exc: _run_main(monkeypatch, [str(tmp_path), "--no-progress"]) diff --git a/tests/test_cli_smoke.py b/tests/test_cli_smoke.py index ea3399f..77ec213 100644 --- a/tests/test_cli_smoke.py +++ b/tests/test_cli_smoke.py @@ -33,7 +33,7 @@ def run_cli( [ executable, "-m", - "codeclone.cli", + "codeclone.main", *args, "--processes", "1", diff --git a/tests/test_cli_unit.py b/tests/test_cli_unit.py index 112c528..cc481f8 100644 --- a/tests/test_cli_unit.py +++ b/tests/test_cli_unit.py @@ -17,24 +17,24 @@ import pytest -import codeclone._cli_baselines as cli_baselines_mod -import codeclone._cli_meta as cli_meta_mod -import codeclone._cli_reports as cli_reports -import codeclone._cli_summary as cli_summary import codeclone.baseline as baseline_mod -import codeclone.cli as cli -import codeclone.metrics_baseline as metrics_baseline_mod -import codeclone.pipeline as pipeline +import codeclone.baseline.metrics_baseline as metrics_baseline_mod +import codeclone.core as pipeline +import codeclone.core.worker as core_worker +import codeclone.surfaces.cli.baseline_state as cli_baselines_mod +import codeclone.surfaces.cli.main as cli +import codeclone.surfaces.cli.report_meta as cli_meta_mod +import codeclone.surfaces.cli.reports_output as cli_reports +import codeclone.surfaces.cli.summary as cli_summary from codeclone import __version__ from codeclone import ui_messages as ui -from codeclone._cli_args import build_parser -from codeclone._cli_config import ConfigValidationError +from codeclone.analysis.normalizer import NormalizationConfig from codeclone.cache import Cache -from codeclone.cli import process_file +from codeclone.config import ConfigValidationError, build_parser from codeclone.contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL -from codeclone.errors import BaselineValidationError +from codeclone.contracts.errors import BaselineValidationError +from codeclone.core.worker import process_file from codeclone.models import HealthScore, ProjectMetrics -from codeclone.normalize import NormalizationConfig from tests._assertions import assert_contains_all @@ -134,7 +134,7 @@ def test_process_file_unexpected_error( def _boom(*_args: object, **_kwargs: object) -> object: raise RuntimeError("boom") - monkeypatch.setattr(pipeline, "extract_units_and_stats_from_source", _boom) + monkeypatch.setattr(core_worker, "extract_units_and_stats_from_source", _boom) result = process_file(str(src), str(tmp_path), NormalizationConfig(), 1, 1) assert result.success is False assert result.error is not None @@ -686,15 +686,32 @@ def test_enforce_gating_rewrites_clone_threshold_for_changed_scope( ) -> None: cli.console = cli._make_console(no_color=True) observed: dict[str, object] = {} + analysis = pipeline.AnalysisResult( + func_groups={}, + block_groups={}, + block_groups_report={}, + segment_groups={}, + suppressed_segment_groups=0, + block_group_facts={}, + func_clones_count=8, + block_clones_count=0, + segment_clones_count=0, + files_analyzed_or_cached=0, + project_metrics=None, + metrics_payload=None, + suggestions=(), + segment_groups_raw_digest="", + ) - monkeypatch.setattr( - cli, - "gate", - lambda **_kwargs: pipeline.GatingResult( + def _fake_gate(**kwargs: object) -> pipeline.GatingResult: + gate_analysis = cast("pipeline.AnalysisResult", kwargs["analysis"]) + observed["clone_threshold_total"] = gate_analysis.func_clones_count + return pipeline.GatingResult( exit_code=3, - reasons=("clone:threshold:8:1",), - ), - ) + reasons=("clone:threshold:2:1",), + ) + + monkeypatch.setattr(cli, "gate", _fake_gate) monkeypatch.setattr( cli, "_print_gating_failure_block", @@ -707,7 +724,7 @@ def test_enforce_gating_rewrites_clone_threshold_for_changed_scope( cli._enforce_gating( args=Namespace(fail_threshold=1, verbose=False), boot=cast("pipeline.BootstrapResult", object()), - analysis=cast("pipeline.AnalysisResult", object()), + analysis=analysis, processing=cast(Any, Namespace(source_read_failures=[])), source_read_contract_failure=False, baseline_failure_code=None, @@ -720,6 +737,7 @@ def test_enforce_gating_rewrites_clone_threshold_for_changed_scope( ) assert exc.value.code == 3 + assert observed["clone_threshold_total"] == 2 assert observed["code"] == "threshold" assert observed["entries"] == ( ("clone_groups_total", 2), @@ -732,15 +750,29 @@ def test_enforce_gating_drops_rewritten_threshold_when_changed_scope_is_within_l ) -> None: cli.console = cli._make_console(no_color=True) observed: dict[str, object] = {} - - monkeypatch.setattr( - cli, - "gate", - lambda **_kwargs: pipeline.GatingResult( - exit_code=3, - reasons=("clone:threshold:8:1",), - ), + analysis = pipeline.AnalysisResult( + func_groups={}, + block_groups={}, + block_groups_report={}, + segment_groups={}, + suppressed_segment_groups=0, + block_group_facts={}, + func_clones_count=8, + block_clones_count=0, + segment_clones_count=0, + files_analyzed_or_cached=0, + project_metrics=None, + metrics_payload=None, + suggestions=(), + segment_groups_raw_digest="", ) + + def _fake_gate(**kwargs: object) -> pipeline.GatingResult: + gate_analysis = cast("pipeline.AnalysisResult", kwargs["analysis"]) + observed["clone_threshold_total"] = gate_analysis.func_clones_count + return pipeline.GatingResult(exit_code=0, reasons=()) + + monkeypatch.setattr(cli, "gate", _fake_gate) monkeypatch.setattr( cli, "_print_gating_failure_block", @@ -750,7 +782,7 @@ def test_enforce_gating_drops_rewritten_threshold_when_changed_scope_is_within_l cli._enforce_gating( args=Namespace(fail_threshold=5, verbose=False), boot=cast("pipeline.BootstrapResult", object()), - analysis=cast("pipeline.AnalysisResult", object()), + analysis=analysis, processing=cast(Any, Namespace(source_read_failures=[])), source_read_contract_failure=False, baseline_failure_code=None, @@ -762,7 +794,7 @@ def test_enforce_gating_drops_rewritten_threshold_when_changed_scope_is_within_l clone_threshold_total=2, ) - assert observed == {} + assert observed == {"clone_threshold_total": 2} def test_main_impl_prints_changed_scope_when_changed_projection_is_available( diff --git a/tests/test_coerce.py b/tests/test_coerce.py index 9b7b0c0..d8d034a 100644 --- a/tests/test_coerce.py +++ b/tests/test_coerce.py @@ -8,7 +8,7 @@ from collections.abc import Mapping, Sequence -from codeclone import _coerce +from codeclone.utils import coerce as _coerce def test_as_int_handles_bool_int_str_and_default() -> None: diff --git a/tests/test_core_branch_coverage.py b/tests/test_core_branch_coverage.py index b26fdb6..0a92b25 100644 --- a/tests/test_core_branch_coverage.py +++ b/tests/test_core_branch_coverage.py @@ -14,9 +14,11 @@ import orjson import pytest -import codeclone.cli as cli -import codeclone.pipeline as pipeline -from codeclone._cli_gating import policy_context +import codeclone.core as pipeline +import codeclone.core.discovery as core_discovery +import codeclone.core.pipeline as core_pipeline +import codeclone.surfaces.cli.main as cli +from codeclone.analysis.normalizer import NormalizationConfig from codeclone.cache import ( Cache, CacheEntry, @@ -34,9 +36,17 @@ _is_dead_candidate_dict, build_segment_report_projection, ) -from codeclone.cache_segments import decode_segment_report_projection -from codeclone.errors import CacheError -from codeclone.grouping import build_segment_groups +from codeclone.cache.projection import decode_segment_report_projection +from codeclone.contracts.errors import CacheError +from codeclone.core._types import ( + _coerce_segment_report_projection, + _segment_groups_digest, +) +from codeclone.core.discovery_cache import ( + _cache_entry_source_stats, + decode_cached_structural_finding_group, +) +from codeclone.findings.clones.grouping import build_segment_groups from codeclone.models import ( BlockUnit, ClassMetrics, @@ -45,7 +55,7 @@ ModuleDep, SegmentUnit, ) -from codeclone.normalize import NormalizationConfig +from codeclone.report.gates.reasons import policy_context from tests._assertions import assert_contains_all @@ -484,7 +494,7 @@ def test_pipeline_analyze_uses_cached_segment_projection( "size": 6, } raw_groups = build_segment_groups((seg_item_a, seg_item_b)) - digest = pipeline._segment_groups_digest(raw_groups) + digest = _segment_groups_digest(raw_groups) cached_projection = { "digest": digest, "suppressed": 7, @@ -522,7 +532,7 @@ def _must_not_run( ) -> tuple[dict[str, list[dict[str, object]]], int]: raise AssertionError("prepare_segment_report_groups must not be called") - monkeypatch.setattr(pipeline, "prepare_segment_report_groups", _must_not_run) + monkeypatch.setattr(core_pipeline, "prepare_segment_report_groups", _must_not_run) boot = pipeline.BootstrapResult( root=Path("."), @@ -582,15 +592,13 @@ def _must_not_run( def test_pipeline_coerce_segment_projection_invalid_shapes() -> None: - assert pipeline._coerce_segment_report_projection("bad") is None + assert _coerce_segment_report_projection("bad") is None assert ( - pipeline._coerce_segment_report_projection( - {"digest": 1, "suppressed": 0, "groups": {}} - ) + _coerce_segment_report_projection({"digest": 1, "suppressed": 0, "groups": {}}) is None ) assert ( - pipeline._coerce_segment_report_projection( + _coerce_segment_report_projection( {"digest": "d", "suppressed": 0, "groups": {"k": "bad"}} ) is None @@ -672,7 +680,7 @@ def test_pipeline_analyze_tracks_suppressed_dead_code_candidates() -> None: def test_pipeline_decode_cached_structural_group() -> None: - decoded = pipeline._decode_cached_structural_finding_group( + decoded = decode_cached_structural_finding_group( { "finding_kind": "duplicated_branches", "finding_key": "k", @@ -708,8 +716,8 @@ def get_file_entry(self, _path: str) -> dict[str, object]: output_paths=pipeline.OutputPaths(), cache_path=tmp_path / "cache.json", ) - monkeypatch.setattr(pipeline, "iter_py_files", lambda _root: [filepath]) - monkeypatch.setattr(pipeline, "file_stat_signature", lambda _path: stat) + monkeypatch.setattr(core_discovery, "iter_py_files", lambda _root: [filepath]) + monkeypatch.setattr(core_discovery, "file_stat_signature", lambda _path: stat) return pipeline.discover(boot=boot, cache=cast(Cache, _FakeCache())) @@ -830,9 +838,9 @@ def test_pipeline_discover_cache_admission_branches( def test_pipeline_cached_source_stats_helper_invalid_shapes() -> None: - assert pipeline._cache_entry_source_stats(cast(CacheEntry, {})) is None + assert _cache_entry_source_stats(cast(CacheEntry, {})) is None assert ( - pipeline._cache_entry_source_stats( + _cache_entry_source_stats( cast( CacheEntry, { diff --git a/tests/test_detector_golden.py b/tests/test_detector_golden.py index d03e103..9d05df2 100644 --- a/tests/test_detector_golden.py +++ b/tests/test_detector_golden.py @@ -12,9 +12,9 @@ import pytest -from codeclone import extractor +from codeclone.analysis.normalizer import NormalizationConfig +from codeclone.analysis.units import extract_units_and_stats_from_source from codeclone.baseline import current_python_tag -from codeclone.normalize import NormalizationConfig from codeclone.report import build_block_groups, build_groups from codeclone.scanner import module_name_from_path from tests._assertions import snapshot_python_tag @@ -29,7 +29,7 @@ def _detect_group_keys(project_root: Path) -> tuple[list[str], list[str]]: source = path.read_text("utf-8") module_name = module_name_from_path(str(project_root), str(path)) units, blocks, _segments, _source_stats, _file_metrics, _sf = ( - extractor.extract_units_and_stats_from_source( + extract_units_and_stats_from_source( source=source, filepath=str(path), module_name=module_name, diff --git a/tests/test_extractor.py b/tests/test_extractor.py index 33e89fe..b1fcdf8 100644 --- a/tests/test_extractor.py +++ b/tests/test_extractor.py @@ -15,11 +15,14 @@ import pytest -from codeclone import extractor, qualnames -from codeclone.errors import ParseError +import codeclone.analysis._module_walk as module_walk_mod +import codeclone.analysis.parser as parser_mod +import codeclone.analysis.units as units_mod +from codeclone import qualnames +from codeclone.analysis.normalizer import NormalizationConfig +from codeclone.contracts.errors import ParseError from codeclone.metrics import find_unused -from codeclone.models import BlockUnit, ClassMetrics, ModuleDep, SegmentUnit -from codeclone.normalize import NormalizationConfig +from codeclone.models import BlockUnit, ClassMetrics, ModuleDep, SegmentUnit, Unit from codeclone.qualnames import FunctionNode, QualnameCollector @@ -36,12 +39,12 @@ def extract_units_from_source( segment_min_loc: int = 20, segment_min_stmt: int = 10, ) -> tuple[ - list[extractor.Unit], + list[Unit], list[BlockUnit], list[SegmentUnit], ]: units, blocks, segments, _source_stats, _file_metrics, _sf = ( - extractor.extract_units_and_stats_from_source( + units_mod.extract_units_and_stats_from_source( source=source, filepath=filepath, module_name=module_name, @@ -71,9 +74,9 @@ def _collect_module_walk( *, module_name: str = "pkg.mod", collect_referenced_names: bool = True, -) -> tuple[ast.Module, QualnameCollector, extractor._ModuleWalkResult]: +) -> tuple[ast.Module, QualnameCollector, module_walk_mod._ModuleWalkResult]: tree, collector = _parse_tree_and_collector(source) - walk = extractor._collect_module_walk_data( + walk = module_walk_mod._collect_module_walk_data( tree=tree, module_name=module_name, collector=collector, @@ -88,7 +91,7 @@ def _dead_qualnames_from_source( filepath: str = "pkg/mod.py", module_name: str = "pkg.mod", ) -> tuple[str, ...]: - _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( + _, _, _, _, file_metrics, _ = units_mod.extract_units_and_stats_from_source( source=source, filepath=filepath, module_name=module_name, @@ -131,13 +134,13 @@ def foo(): def test_source_tokens_returns_empty_on_tokenize_error() -> None: - assert extractor._source_tokens('"""') == () + assert parser_mod._source_tokens('"""') == () def test_declaration_token_index_returns_none_when_start_token_is_missing() -> None: - tokens = extractor._source_tokens("value = 1\n") + tokens = parser_mod._source_tokens("value = 1\n") assert ( - extractor._declaration_token_index( + parser_mod._declaration_token_index( source_tokens=tokens, start_line=1, start_col=0, @@ -148,11 +151,11 @@ def test_declaration_token_index_returns_none_when_start_token_is_missing() -> N def test_declaration_token_index_uses_prebuilt_index() -> None: - tokens = extractor._source_tokens("async def demo():\n return 1\n") - token_index = extractor._build_declaration_token_index(tokens) + tokens = parser_mod._source_tokens("async def demo():\n return 1\n") + token_index = parser_mod._build_declaration_token_index(tokens) assert ( - extractor._declaration_token_index( + parser_mod._declaration_token_index( source_tokens=tokens, start_line=1, start_col=0, @@ -171,11 +174,11 @@ async def demo(): """ ).body[0] assert isinstance(async_node, ast.AsyncFunctionDef) - assert extractor._declaration_token_name(async_node) == "async" + assert parser_mod._declaration_token_name(async_node) == "async" - tokens = extractor._source_tokens("def demo():\n return 1\n") + tokens = parser_mod._source_tokens("def demo():\n return 1\n") assert ( - extractor._declaration_token_index( + parser_mod._declaration_token_index( source_tokens=tokens, start_line=1, start_col=0, @@ -184,22 +187,22 @@ async def demo(): == 0 ) - nested_tokens = extractor._source_tokens( + nested_tokens = parser_mod._source_tokens( "def demo(arg: tuple[int, int]) -> tuple[int, int]:\n return arg\n" ) assert ( - extractor._scan_declaration_colon_line( + parser_mod._scan_declaration_colon_line( source_tokens=nested_tokens, start_index=0, ) == 1 ) - default_tokens = extractor._source_tokens( + default_tokens = parser_mod._source_tokens( "def demo(arg=(1, [2])):\n return arg\n" ) assert ( - extractor._scan_declaration_colon_line( + parser_mod._scan_declaration_colon_line( source_tokens=default_tokens, start_index=0, ) @@ -212,7 +215,7 @@ async def demo(): tokenize.TokenInfo(tokenize.OP, "(", (1, 8), (1, 9), "def demo("), ) assert ( - extractor._scan_declaration_colon_line( + parser_mod._scan_declaration_colon_line( source_tokens=eof_tokens, start_index=0, ) @@ -224,7 +227,7 @@ async def demo(): tokenize.TokenInfo(tokenize.OP, ")", (1, 8), (1, 9), "def demo)"), ) assert ( - extractor._scan_declaration_colon_line( + parser_mod._scan_declaration_colon_line( source_tokens=unmatched_close_tokens, start_index=0, ) @@ -233,9 +236,9 @@ async def demo(): def test_scan_declaration_colon_line_returns_none_when_header_is_incomplete() -> None: - tokens = extractor._source_tokens("def broken\n") + tokens = parser_mod._source_tokens("def broken\n") assert ( - extractor._scan_declaration_colon_line( + parser_mod._scan_declaration_colon_line( source_tokens=tokens, start_index=0, ) @@ -251,7 +254,7 @@ class Demo: """ ).body[0] assert isinstance(node, ast.ClassDef) - assert extractor._declaration_end_line(node, source_tokens=()) == 2 + assert parser_mod._declaration_end_line(node, source_tokens=()) == 2 def test_declaration_end_line_returns_zero_for_invalid_start_line() -> None: @@ -263,7 +266,7 @@ def broken(): ).body[0] assert isinstance(node, ast.FunctionDef) node.lineno = 0 - assert extractor._declaration_end_line(node, source_tokens=()) == 0 + assert parser_mod._declaration_end_line(node, source_tokens=()) == 0 def test_declaration_fallback_helpers_cover_empty_and_same_line_bodies() -> None: @@ -275,7 +278,7 @@ def demo(): ).body[0] assert isinstance(empty_body_node, ast.FunctionDef) empty_body_node.body = [] - assert extractor._fallback_declaration_end_line(empty_body_node, start_line=2) == 2 + assert parser_mod._fallback_declaration_end_line(empty_body_node, start_line=2) == 2 inline_body_node = ast.parse( """ @@ -285,14 +288,16 @@ def demo(): ).body[0] assert isinstance(inline_body_node, ast.FunctionDef) inline_body_node.body[0].lineno = 2 - assert extractor._fallback_declaration_end_line(inline_body_node, start_line=2) == 2 + assert ( + parser_mod._fallback_declaration_end_line(inline_body_node, start_line=2) == 2 + ) no_colon_tokens = ( tokenize.TokenInfo(tokenize.NAME, "def", (2, 0), (2, 3), "def demo"), tokenize.TokenInfo(tokenize.NAME, "demo", (2, 4), (2, 8), "def demo"), ) assert ( - extractor._declaration_end_line( + parser_mod._declaration_end_line( inline_body_node, source_tokens=no_colon_tokens, ) @@ -351,7 +356,7 @@ def test_extract_units_skips_suppression_tokenization_without_inline_directives( source: str, ) -> None: monkeypatch.setattr( - extractor, + module_walk_mod, "_source_tokens", lambda _source: (_ for _ in ()).throw( AssertionError("_source_tokens should not be called") @@ -376,14 +381,17 @@ def test_extract_units_tokenizes_when_inline_suppressions_exist( monkeypatch: pytest.MonkeyPatch, ) -> None: calls = 0 - original_source_tokens = extractor._source_tokens + original_source_tokens = cast( + "Callable[[str], tuple[tokenize.TokenInfo, ...]]", + module_walk_mod.__dict__["_source_tokens"], + ) def _record_tokens(source: str) -> tuple[tokenize.TokenInfo, ...]: nonlocal calls calls += 1 return original_source_tokens(source) - monkeypatch.setattr(extractor, "_source_tokens", _record_tokens) + monkeypatch.setattr(module_walk_mod, "_source_tokens", _record_tokens) units, blocks, segments = extract_units_from_source( source=""" @@ -424,7 +432,7 @@ def foo(x): return a + b + c + d + e """ _units, _blocks, _segments, _source_stats, _file_metrics, sf = ( - extractor.extract_units_and_stats_from_source( + units_mod.extract_units_and_stats_from_source( source=src, filepath="x.py", module_name="mod", @@ -440,19 +448,19 @@ def foo(x): def test_parse_timeout_raises(monkeypatch: pytest.MonkeyPatch) -> None: @contextmanager def _boom(_timeout_s: int) -> Iterator[None]: - raise extractor._ParseTimeoutError("AST parsing timeout") + raise parser_mod._ParseTimeoutError("AST parsing timeout") if False: yield - monkeypatch.setattr(extractor, "_parse_limits", _boom) + monkeypatch.setattr(parser_mod, "_parse_limits", _boom) with pytest.raises(ParseError, match="AST parsing timeout"): - extractor._parse_with_limits("x = 1", 1) + parser_mod._parse_with_limits("x = 1", 1) def test_parse_limits_no_timeout() -> None: - with extractor._parse_limits(0): - tree = extractor._parse_with_limits("x = 1", 0) + with parser_mod._parse_limits(0): + tree = parser_mod._parse_with_limits("x = 1", 0) assert tree is not None @@ -481,8 +489,8 @@ def setrlimit(_key: int, _val: tuple[int, int]) -> None: _patch_posix_parse_limits(monkeypatch, _DummyResource) - with extractor._parse_limits(1): - tree = extractor._parse_with_limits("x = 1", 1) + with parser_mod._parse_limits(1): + tree = parser_mod._parse_with_limits("x = 1", 1) assert tree is not None @@ -505,7 +513,7 @@ def setrlimit(_key: int, val: tuple[int, int]) -> None: _patch_posix_parse_limits(monkeypatch, _DummyResource) - with extractor._parse_limits(5): + with parser_mod._parse_limits(5): pass assert calls @@ -546,7 +554,7 @@ def getrusage(_who: int) -> _DummyUsage: _patch_posix_parse_limits(monkeypatch, _DummyResource) - with extractor._parse_limits(5): + with parser_mod._parse_limits(5): pass assert calls @@ -586,7 +594,7 @@ def getrusage(_who: int) -> _DummyUsage: _patch_posix_parse_limits(monkeypatch, _DummyResource) - with extractor._parse_limits(5): + with parser_mod._parse_limits(5): pass # Raised from 2 to ceil(10)+5 to avoid immediate SIGXCPU. @@ -613,7 +621,7 @@ def setrlimit(_key: int, val: tuple[int, int]) -> None: _patch_posix_parse_limits(monkeypatch, _DummyResource) - with extractor._parse_limits(5): + with parser_mod._parse_limits(5): pass # Finite soft limits are never lowered. @@ -646,22 +654,26 @@ def setrlimit(_key: int, _val: tuple[int, int]) -> None: monkeypatch.setitem(sys.modules, "resource", _DummyResource) # Should not raise even if restoring old limits fails. - with extractor._parse_limits(5): + with parser_mod._parse_limits(5): pass def test_resolve_import_target_absolute_and_relative() -> None: absolute = ast.ImportFrom(module="pkg.util", names=[], level=0) - assert extractor._resolve_import_target("root.mod.sub", absolute) == "pkg.util" + assert ( + module_walk_mod._resolve_import_target("root.mod.sub", absolute) == "pkg.util" + ) relative = ast.ImportFrom(module="helpers", names=[], level=1) assert ( - extractor._resolve_import_target("root.mod.sub", relative) == "root.mod.helpers" + module_walk_mod._resolve_import_target("root.mod.sub", relative) + == "root.mod.helpers" ) relative_no_module = ast.ImportFrom(module=None, names=[], level=2) assert ( - extractor._resolve_import_target("root.mod.sub", relative_no_module) == "root" + module_walk_mod._resolve_import_target("root.mod.sub", relative_no_module) + == "root" ) @@ -680,7 +692,7 @@ def test_collect_module_walk_data_imports_and_references() -> None: ) collector = QualnameCollector() collector.visit(tree) - walk = extractor._collect_module_walk_data( + walk = module_walk_mod._collect_module_walk_data( tree=tree, module_name="root.mod.sub", collector=collector, @@ -720,7 +732,7 @@ def test_collect_module_walk_data_edge_branches() -> None: tree = ast.parse("from .... import parent") collector = QualnameCollector() collector.visit(tree) - walk = extractor._collect_module_walk_data( + walk = module_walk_mod._collect_module_walk_data( tree=tree, module_name="pkg.mod", collector=collector, @@ -733,7 +745,7 @@ def test_collect_module_walk_data_edge_branches() -> None: lambda_call_tree = ast.parse("(lambda x: x)(1)") lambda_collector = QualnameCollector() lambda_collector.visit(lambda_call_tree) - lambda_walk = extractor._collect_module_walk_data( + lambda_walk = module_walk_mod._collect_module_walk_data( tree=lambda_call_tree, module_name="pkg.mod", collector=lambda_collector, @@ -752,7 +764,7 @@ def test_collect_module_walk_data_without_referenced_name_collection() -> None: ) collector = QualnameCollector() collector.visit(tree) - walk = extractor._collect_module_walk_data( + walk = module_walk_mod._collect_module_walk_data( tree=tree, module_name="root.mod.sub", collector=collector, @@ -777,12 +789,12 @@ def test_collect_module_walk_data_without_referenced_name_collection() -> None: def test_module_walk_helpers_cover_import_and_reference_branches() -> None: - state = extractor._ModuleWalkState() + state = module_walk_mod._ModuleWalkState() import_node = cast( ast.Import, ast.parse("import typing_extensions as te").body[0], ) - extractor._collect_import_node( + module_walk_mod._collect_import_node( node=import_node, module_name="pkg.mod", state=state, @@ -796,7 +808,7 @@ def test_module_walk_helpers_cover_import_and_reference_branches() -> None: ast.ImportFrom, ast.parse("from typing import Protocol as Proto, Thing as Alias").body[0], ) - extractor._collect_import_from_node( + module_walk_mod._collect_import_from_node( node=import_from_node, module_name="pkg.mod", state=state, @@ -810,7 +822,7 @@ def test_module_walk_helpers_cover_import_and_reference_branches() -> None: names=[ast.alias(name="parent", asname=None)], level=4, ) - extractor._collect_import_from_node( + module_walk_mod._collect_import_from_node( node=unresolved_import, module_name="pkg.mod", state=state, @@ -820,9 +832,9 @@ def test_module_walk_helpers_cover_import_and_reference_branches() -> None: name_node = cast(ast.Name, ast.parse("value", mode="eval").body) attr_node = cast(ast.Attribute, ast.parse("obj.attr", mode="eval").body) - extractor._collect_load_reference_node(node=name_node, state=state) - extractor._collect_load_reference_node(node=attr_node, state=state) - extractor._collect_load_reference_node( + module_walk_mod._collect_load_reference_node(node=name_node, state=state) + module_walk_mod._collect_load_reference_node(node=attr_node, state=state) + module_walk_mod._collect_load_reference_node( node=cast(ast.Constant, ast.parse("1", mode="eval").body), state=state, ) @@ -832,8 +844,11 @@ def test_module_walk_helpers_cover_import_and_reference_branches() -> None: def test_dotted_expr_protocol_detection_and_runtime_candidate_edges() -> None: dotted_expr = ast.parse("pkg.helpers.decorate", mode="eval").body - assert extractor._dotted_expr_name(dotted_expr) == "pkg.helpers.decorate" - assert extractor._dotted_expr_name(ast.parse("custom()", mode="eval").body) is None + assert module_walk_mod._dotted_expr_name(dotted_expr) == "pkg.helpers.decorate" + assert ( + module_walk_mod._dotted_expr_name(ast.parse("custom()", mode="eval").body) + is None + ) tree = ast.parse( """ @@ -848,7 +863,7 @@ class B(te.Protocol[int]): ) collector = QualnameCollector() collector.visit(tree) - walk = extractor._collect_module_walk_data( + walk = module_walk_mod._collect_module_walk_data( tree=tree, module_name="pkg.mod", collector=collector, @@ -859,12 +874,12 @@ class B(te.Protocol[int]): assert "te" in protocol_module_aliases classes = [node for node in tree.body if isinstance(node, ast.ClassDef)] class_a, class_b = classes - assert extractor._is_protocol_class( + assert module_walk_mod._is_protocol_class( class_a, protocol_symbol_aliases=protocol_symbol_aliases, protocol_module_aliases=protocol_module_aliases, ) - assert not extractor._is_protocol_class( + assert not module_walk_mod._is_protocol_class( class_b, protocol_symbol_aliases=protocol_symbol_aliases, protocol_module_aliases=protocol_module_aliases, @@ -880,7 +895,7 @@ def f(x): """.strip() ).body[0] assert isinstance(runtime_candidate, ast.FunctionDef) - assert extractor._is_non_runtime_candidate(runtime_candidate) + assert module_walk_mod._is_non_runtime_candidate(runtime_candidate) def test_resolve_referenced_qualnames_covers_module_class_and_attr_branches() -> None: @@ -899,26 +914,26 @@ def hook(self) -> int: dynamic = factory().attr """ tree, collector = _parse_tree_and_collector(src) - state = extractor._ModuleWalkState() + state = module_walk_mod._ModuleWalkState() for node in ast.walk(tree): if isinstance(node, ast.Import): - extractor._collect_import_node( + module_walk_mod._collect_import_node( node=node, module_name="pkg.mod", state=state, collect_referenced_names=True, ) elif isinstance(node, ast.ImportFrom): - extractor._collect_import_from_node( + module_walk_mod._collect_import_from_node( node=node, module_name="pkg.mod", state=state, collect_referenced_names=True, ) else: - extractor._collect_load_reference_node(node=node, state=state) + module_walk_mod._collect_load_reference_node(node=node, state=state) - resolved = extractor._resolve_referenced_qualnames( + resolved = module_walk_mod._resolve_referenced_qualnames( module_name="pkg.mod", collector=collector, state=state, @@ -961,7 +976,7 @@ def test_extractor_private_helper_branches_cover_invalid_protocol_and_declaratio attr="method", ctx=ast.Load(), ) - assert extractor._dotted_expr_name(expr) is None + assert module_walk_mod._dotted_expr_name(expr) is None protocol_class = ast.parse( """ @@ -971,7 +986,7 @@ class Demo(Unknown, alias.Protocol): ).body[0] assert isinstance(protocol_class, ast.ClassDef) assert ( - extractor._is_protocol_class( + module_walk_mod._is_protocol_class( protocol_class, protocol_symbol_aliases=frozenset({"Protocol"}), protocol_module_aliases=frozenset({"typing"}), @@ -988,7 +1003,7 @@ def demo(): assert isinstance(bad_span_node, ast.FunctionDef) bad_span_node.lineno = 3 bad_span_node.end_lineno = 2 - assert extractor._eligible_unit_shape(bad_span_node, min_loc=1, min_stmt=1) is None + assert units_mod._eligible_unit_shape(bad_span_node, min_loc=1, min_stmt=1) is None _, missing_method_collector, missing_method_walk = _collect_module_walk( """ @@ -1012,7 +1027,7 @@ def work(self) -> int: declaration_collector.units[0][1].end_lineno = 0 declaration_collector.class_nodes[0][1].end_lineno = 0 assert ( - extractor._collect_declaration_targets( + module_walk_mod._collect_declaration_targets( filepath="pkg/mod.py", module_name="pkg.mod", collector=declaration_collector, @@ -1025,8 +1040,8 @@ def demo(): # codeclone: ignore[dead-code] return 1 """ _, suppression_collector = _parse_tree_and_collector(suppression_source) - monkeypatch.setattr(extractor, "_source_tokens", lambda _source: ()) - suppression_index = extractor._build_suppression_index_for_source( + monkeypatch.setattr(module_walk_mod, "_source_tokens", lambda _source: ()) + suppression_index = module_walk_mod._build_suppression_index_for_source( source=suppression_source, filepath="pkg/mod.py", module_name="pkg.mod", @@ -1041,7 +1056,7 @@ def test_extract_stats_drops_referenced_names_for_test_filepaths() -> None: live() """ - _, _, _, _, test_metrics, _ = extractor.extract_units_and_stats_from_source( + _, _, _, _, test_metrics, _ = units_mod.extract_units_and_stats_from_source( source=src, filepath="pkg/tests/test_usage.py", module_name="pkg.tests.test_usage", @@ -1049,7 +1064,7 @@ def test_extract_stats_drops_referenced_names_for_test_filepaths() -> None: min_loc=1, min_stmt=1, ) - _, _, _, _, regular_metrics, _ = extractor.extract_units_and_stats_from_source( + _, _, _, _, regular_metrics, _ = units_mod.extract_units_and_stats_from_source( source=src, filepath="pkg/usage.py", module_name="pkg.usage", @@ -1086,7 +1101,7 @@ def verify(self): def make(): return Service() """ - _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( + _, _, _, _, file_metrics, _ = units_mod.extract_units_and_stats_from_source( source=src, filepath="pkg/service.py", module_name="pkg.service", @@ -1123,7 +1138,7 @@ def test_orphan_usage(): assert orphan() == 1 """ - _, _, _, _, prod_metrics, _ = extractor.extract_units_and_stats_from_source( + _, _, _, _, prod_metrics, _ = units_mod.extract_units_and_stats_from_source( source=src_prod, filepath="pkg/mod.py", module_name="pkg.mod", @@ -1131,7 +1146,7 @@ def test_orphan_usage(): min_loc=1, min_stmt=1, ) - _, _, _, _, test_metrics, _ = extractor.extract_units_and_stats_from_source( + _, _, _, _, test_metrics, _ = units_mod.extract_units_and_stats_from_source( source=src_test, filepath="pkg/tests/test_mod.py", module_name="pkg.tests.test_mod", @@ -1264,7 +1279,7 @@ def used(): broken_class.lineno = 0 broken_class.end_lineno = 0 collector.class_nodes.append(("Broken", broken_class)) - dead = extractor._collect_dead_candidates( + dead = module_walk_mod._collect_dead_candidates( filepath="pkg/mod.py", module_name="pkg.mod", collector=collector, @@ -1283,7 +1298,7 @@ def visit(self, _tree: ast.AST) -> None: return None monkeypatch.setattr(qualnames, "QualnameCollector", _CollectorNoClassMetrics) - _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( + _, _, _, _, file_metrics, _ = units_mod.extract_units_and_stats_from_source( source="class Broken:\n pass\n", filepath="pkg/mod.py", module_name="pkg.mod", @@ -1303,7 +1318,7 @@ def wrapper(): value = _run_impl() return helpers.decorate(value) """ - _, _, _, _, file_metrics, _ = extractor.extract_units_and_stats_from_source( + _, _, _, _, file_metrics, _ = units_mod.extract_units_and_stats_from_source( source=src, filepath="pkg/cli.py", module_name="pkg.cli", @@ -1335,7 +1350,7 @@ def parse_value(value: object) -> str: return str(value) """ _tree, collector, walk = _collect_module_walk(src) - dead = extractor._collect_dead_candidates( + dead = module_walk_mod._collect_dead_candidates( filepath="pkg/mod.py", module_name="pkg.mod", collector=collector, @@ -1515,7 +1530,7 @@ def _extract_with_thresholds( stmt_count: int, lines_per_stmt: int, **thresholds: int, - ) -> tuple[list[extractor.Unit], list[BlockUnit], list[SegmentUnit]]: + ) -> tuple[list[Unit], list[BlockUnit], list[SegmentUnit]]: return extract_units_from_source( source=self._make_func( stmt_count=stmt_count, @@ -1691,10 +1706,10 @@ def _fake_extract_segments( captured_hashes["value"] = precomputed_hashes return [] - monkeypatch.setattr(extractor, "_parse_with_limits", _fake_parse) - monkeypatch.setattr(extractor, "_stmt_count", lambda _node: 12) - monkeypatch.setattr(extractor, "_cfg_fingerprint_and_complexity", _fake_fingerprint) - monkeypatch.setattr(extractor, "extract_segments", _fake_extract_segments) + monkeypatch.setattr(units_mod, "_parse_with_limits", _fake_parse) + monkeypatch.setattr(units_mod, "_stmt_count", lambda _node: 12) + monkeypatch.setattr(units_mod, "_cfg_fingerprint_and_complexity", _fake_fingerprint) + monkeypatch.setattr(units_mod, "extract_segments", _fake_extract_segments) units, blocks, segments = extract_units_from_source( source="def f():\n pass\n", @@ -1725,7 +1740,7 @@ def f(): def _fake_parse(_source: str, _timeout_s: int) -> ast.AST: return tree - monkeypatch.setattr(extractor, "_parse_with_limits", _fake_parse) + monkeypatch.setattr(units_mod, "_parse_with_limits", _fake_parse) units, blocks, segments = extract_units_from_source( source="def f():\n return 1\n", filepath="x.py", @@ -1771,4 +1786,4 @@ def _fake_signal(_sig: int, handler: Callable[[int, object], None] | None) -> No monkeypatch.setattr(signal, "setitimer", lambda *_args, **_kwargs: None) with pytest.raises(ParseError, match="AST parsing timeout"): - extractor._parse_with_limits("x = 1", 1) + parser_mod._parse_with_limits("x = 1", 1) diff --git a/tests/test_fingerprint.py b/tests/test_fingerprint.py index a785d15..278107e 100644 --- a/tests/test_fingerprint.py +++ b/tests/test_fingerprint.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy -from codeclone.fingerprint import bucket_loc, sha1 +from codeclone.analysis.fingerprint import bucket_loc, sha1 def test_sha1_stable() -> None: diff --git a/tests/test_gating.py b/tests/test_gating.py new file mode 100644 index 0000000..04d2ada --- /dev/null +++ b/tests/test_gating.py @@ -0,0 +1,218 @@ +from __future__ import annotations + +from argparse import Namespace +from pathlib import Path + +from codeclone.analysis.normalizer import NormalizationConfig +from codeclone.core._types import AnalysisResult, BootstrapResult, OutputPaths +from codeclone.core.reporting import gate as cli_gate +from codeclone.models import ( + DeadItem, + HealthScore, + MetricsDiff, + ModuleDep, + ProjectMetrics, +) +from codeclone.report.gates import MetricGateConfig, evaluate_gates +from codeclone.surfaces.mcp.service import CodeCloneMCPService +from codeclone.surfaces.mcp.session import ( + MCPAnalysisRequest, + MCPGateRequest, + MCPRunRecord, +) + + +def _project_metrics() -> ProjectMetrics: + return ProjectMetrics( + complexity_avg=10.0, + complexity_max=30, + high_risk_functions=("pkg.mod:hot",), + coupling_avg=5.0, + coupling_max=12, + high_risk_classes=("pkg.mod:Service",), + cohesion_avg=2.5, + cohesion_max=4, + low_cohesion_classes=("pkg.mod:Service",), + dependency_modules=2, + dependency_edges=1, + dependency_edge_list=( + ModuleDep(source="pkg.mod", target="pkg.dep", import_type="import", line=1), + ), + dependency_cycles=(), + dependency_max_depth=1, + dependency_longest_chains=(), + dead_code=( + DeadItem( + qualname="pkg.mod:unused", + filepath="pkg/mod.py", + start_line=1, + end_line=2, + kind="function", + confidence="high", + ), + ), + health=HealthScore(total=90, grade="A", dimensions={"health": 90}), + ) + + +def _report_document() -> dict[str, object]: + return { + "meta": {"baseline": {"status": "ok"}}, + "findings": { + "groups": { + "clones": { + "functions": [{"id": "clone:function:new", "novelty": "new"}], + "blocks": [], + "segments": [], + } + } + }, + "metrics": { + "families": { + "complexity": {"summary": {"max": 30}}, + "coupling": {"summary": {"max": 12}}, + "cohesion": {"summary": {"max": 4}}, + "dependencies": {"summary": {"cycles": 0}}, + "dead_code": {"summary": {"high_confidence": 1}}, + "health": {"summary": {"score": 90}}, + "coverage_adoption": { + "summary": { + "param_permille": 1000, + "docstring_permille": 1000, + "param_delta": 0, + "return_delta": 0, + "docstring_delta": 0, + } + }, + "api_surface": {"summary": {"breaking": 0}}, + "coverage_join": {"summary": {"status": "", "coverage_hotspots": 0}}, + } + }, + } + + +def test_cli_and_mcp_gate_results_match_for_same_inputs(tmp_path: Path) -> None: + report_document = _report_document() + project_metrics = _project_metrics() + metrics_diff = MetricsDiff( + new_high_risk_functions=(), + new_high_coupling_classes=(), + new_cycles=(), + new_dead_code=("pkg.mod:unused",), + health_delta=-1, + ) + config = MetricGateConfig( + fail_complexity=20, + fail_coupling=-1, + fail_cohesion=-1, + fail_cycles=False, + fail_dead_code=True, + fail_health=-1, + fail_on_new_metrics=True, + fail_on_new=True, + fail_threshold=0, + ) + + args = Namespace( + fail_complexity=config.fail_complexity, + fail_coupling=config.fail_coupling, + fail_cohesion=config.fail_cohesion, + fail_cycles=config.fail_cycles, + fail_dead_code=config.fail_dead_code, + fail_health=config.fail_health, + fail_on_new_metrics=config.fail_on_new_metrics, + fail_on_typing_regression=config.fail_on_typing_regression, + fail_on_docstring_regression=config.fail_on_docstring_regression, + fail_on_api_break=config.fail_on_api_break, + fail_on_untested_hotspots=config.fail_on_untested_hotspots, + min_typing_coverage=config.min_typing_coverage, + min_docstring_coverage=config.min_docstring_coverage, + coverage_min=config.coverage_min, + fail_on_new=config.fail_on_new, + fail_threshold=config.fail_threshold, + ) + boot = BootstrapResult( + root=tmp_path, + config=NormalizationConfig(), + args=args, + output_paths=OutputPaths(), + cache_path=tmp_path / "cache.json", + ) + analysis = AnalysisResult( + func_groups={}, + block_groups={}, + block_groups_report={}, + segment_groups={}, + suppressed_segment_groups=0, + block_group_facts={}, + func_clones_count=1, + block_clones_count=0, + segment_clones_count=0, + files_analyzed_or_cached=1, + project_metrics=project_metrics, + metrics_payload=None, + suggestions=(), + segment_groups_raw_digest="", + ) + + cli_result = cli_gate( + boot=boot, + analysis=analysis, + new_func={"clone:function:new"}, + new_block=set(), + metrics_diff=metrics_diff, + ) + + service = CodeCloneMCPService(history_limit=2) + request = MCPAnalysisRequest(root=str(tmp_path), respect_pyproject=False) + record = MCPRunRecord( + run_id="gate-parity", + root=tmp_path, + request=request, + comparison_settings=(), + report_document=report_document, + summary={}, + changed_paths=(), + changed_projection=None, + warnings=(), + failures=(), + func_clones_count=1, + block_clones_count=0, + project_metrics=project_metrics, + coverage_join=None, + suggestions=(), + new_func=frozenset({"clone:function:new"}), + new_block=frozenset(), + metrics_diff=metrics_diff, + ) + mcp_result = service._evaluate_gate_snapshot( + record=record, + request=MCPGateRequest( + fail_complexity=20, + fail_dead_code=True, + fail_on_new_metrics=True, + fail_on_new=True, + fail_threshold=0, + ), + ) + + evaluator_result = evaluate_gates( + report_document=report_document, + config=config, + baseline_status="ok", + metrics_diff=metrics_diff, + clone_new_count=1, + clone_total=1, + ) + + expected_reasons = ( + "metric:Complexity threshold exceeded: max CC=30, threshold=20.", + "metric:Dead code detected (high confidence): 1 item(s).", + "metric:New dead code items vs metrics baseline: 1.", + "metric:Health score regressed vs metrics baseline: delta=-1.", + "clone:new", + "clone:threshold:1:0", + ) + + assert cli_result == mcp_result == evaluator_result + assert cli_result.reasons == expected_reasons diff --git a/tests/test_golden_v2.py b/tests/test_golden_v2.py index 3de17ea..4ef283c 100644 --- a/tests/test_golden_v2.py +++ b/tests/test_golden_v2.py @@ -16,16 +16,22 @@ import pytest -import codeclone.pipeline as pipeline -from codeclone import cli +import codeclone.core.parallelism as core_parallelism +import codeclone.main as cli +from codeclone.analysis.normalizer import NormalizationConfig +from codeclone.analysis.units import extract_units_and_stats_from_source from codeclone.baseline import current_python_tag -from codeclone.extractor import extract_units_and_stats_from_source -from codeclone.grouping import build_block_groups, build_groups, build_segment_groups +from codeclone.core.pipeline import compute_project_metrics +from codeclone.findings.clones.grouping import ( + build_block_groups, + build_groups, + build_segment_groups, +) +from codeclone.findings.structural.detectors import ( + build_clone_cohort_structural_findings, +) from codeclone.models import ClassMetrics, DeadCandidate, ModuleDep -from codeclone.normalize import NormalizationConfig -from codeclone.pipeline import compute_project_metrics from codeclone.scanner import iter_py_files, module_name_from_path -from codeclone.structural_findings import build_clone_cohort_structural_findings from tests._assertions import snapshot_python_tag _GOLDEN_V2_ROOT = Path("tests/fixtures/golden_v2").resolve() @@ -59,8 +65,12 @@ def _dummy_process_pool_executor( def _patch_parallel(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _dummy_process_pool_executor) - monkeypatch.setattr(pipeline, "as_completed", lambda futures: futures) + monkeypatch.setattr( + core_parallelism, + "ProcessPoolExecutor", + _dummy_process_pool_executor, + ) + monkeypatch.setattr(core_parallelism, "as_completed", lambda futures: futures) def _relative_to_root(path: str, root: Path) -> str: diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 45dfd3b..6405311 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -12,7 +12,6 @@ import pytest -from codeclone._html_badges import _tab_empty_info from codeclone.contracts import ( CACHE_VERSION, DOCS_URL, @@ -20,16 +19,7 @@ REPORT_SCHEMA_VERSION, REPOSITORY_URL, ) -from codeclone.errors import FileProcessingError -from codeclone.html_report import ( - _FileCache, - _pygments_css, - _render_code_block, - _try_pygments, -) -from codeclone.html_report import ( - build_html_report as _core_build_html_report, -) +from codeclone.contracts.errors import FileProcessingError from codeclone.models import ( StructuralFindingGroup, StructuralFindingOccurrence, @@ -37,6 +27,16 @@ SuppressedCloneGroup, ) from codeclone.report import build_block_group_facts +from codeclone.report.html import ( + _FileCache, + _pygments_css, + _render_code_block, + _try_pygments, +) +from codeclone.report.html import ( + build_html_report as _core_build_html_report, +) +from codeclone.report.html.widgets.badges import _tab_empty_info from codeclone.report.json_contract import ( build_report_document, clone_group_id, @@ -1276,7 +1276,7 @@ def test_try_pygments_ok() -> None: def test_render_code_block_without_pygments_uses_escaped_fallback( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: - import codeclone._html_snippets as snippets + import codeclone.report.html.widgets.snippets as snippets src = tmp_path / "a.py" src.write_text("x = ''\n", "utf-8") @@ -1327,7 +1327,7 @@ def test_html_report_with_blocks(tmp_path: Path) -> None: def test_html_report_pygments_fallback(monkeypatch: pytest.MonkeyPatch) -> None: - import codeclone.html_report as hr + import codeclone.report.html as hr def _fake_css(name: str) -> str: if name in ("github-dark", "github-light"): @@ -1460,7 +1460,7 @@ def test_render_code_block_truncates_and_fallback( f = tmp_path / "a.py" f.write_text("\n".join([f"line{i}" for i in range(1, 30)]), "utf-8") - import codeclone.html_report as hr + import codeclone.report.html as hr monkeypatch.setattr(hr, "_try_pygments", lambda _text: None) cache = _FileCache(maxsize=2) diff --git a/tests/test_html_report_helpers.py b/tests/test_html_report_helpers.py index 86ab5ad..3d90b86 100644 --- a/tests/test_html_report_helpers.py +++ b/tests/test_html_report_helpers.py @@ -10,33 +10,29 @@ import pytest -import codeclone._html_report._assemble as assemble_mod -import codeclone._html_report._sections._suggestions as suggestions_section -from codeclone._html_badges import _quality_badge_html, _stat_card -from codeclone._html_report._components import ( - overview_source_breakdown_html, - overview_summary_item_html, -) -from codeclone._html_report._icons import section_icon_html -from codeclone._html_report._sections._clones import ( +import codeclone.report.html.assemble as assemble_mod +import codeclone.report.html.sections._suggestions as suggestions_section +from codeclone.contracts import REPORT_SCHEMA_VERSION +from codeclone.models import MetricsDiff, ReportLocation, Suggestion +from codeclone.report.html.sections._clones import ( _derive_group_display_name, _render_group_explanation, ) -from codeclone._html_report._sections._dead_code import render_dead_code_panel -from codeclone._html_report._sections._dependencies import ( +from codeclone.report.html.sections._dead_code import render_dead_code_panel +from codeclone.report.html.sections._dependencies import ( _hub_threshold, _render_dep_nodes_and_labels, _select_dep_nodes, ) -from codeclone._html_report._sections._meta import _path_basename, render_meta_panel -from codeclone._html_report._sections._overview import ( +from codeclone.report.html.sections._meta import _path_basename, render_meta_panel +from codeclone.report.html.sections._overview import ( _directory_hotspot_bucket_body, _directory_kind_meta_parts, _health_gauge_html, _issue_breakdown_html, render_overview_panel, ) -from codeclone._html_report._sections._suggestions import ( +from codeclone.report.html.sections._suggestions import ( _format_source_breakdown, _priority_badge_label, _render_card, @@ -44,10 +40,14 @@ _spread_label, _suggestion_context_labels, ) -from codeclone._html_report._tabs import render_split_tabs -from codeclone._html_snippets import _FileCache -from codeclone.contracts import REPORT_SCHEMA_VERSION -from codeclone.models import MetricsDiff, ReportLocation, Suggestion +from codeclone.report.html.widgets.badges import _quality_badge_html, _stat_card +from codeclone.report.html.widgets.components import ( + overview_source_breakdown_html, + overview_summary_item_html, +) +from codeclone.report.html.widgets.icons import section_icon_html +from codeclone.report.html.widgets.snippets import _FileCache +from codeclone.report.html.widgets.tabs import render_split_tabs from tests._assertions import assert_contains_none diff --git a/tests/test_cli_main_guard.py b/tests/test_main_entrypoint.py similarity index 56% rename from tests/test_cli_main_guard.py rename to tests/test_main_entrypoint.py index 904c2b3..89ff0e8 100644 --- a/tests/test_cli_main_guard.py +++ b/tests/test_main_entrypoint.py @@ -5,19 +5,30 @@ # Copyright (c) 2026 Den Rozhnovskiy import os +import runpy import subprocess import sys from pathlib import Path +import pytest -def test_cli_main_guard_runs() -> None: + +def test_main_module_guard_runs() -> None: root_dir = Path(__file__).parents[1] env = os.environ.copy() env["PYTHONPATH"] = str(root_dir) + os.pathsep + env.get("PYTHONPATH", "") result = subprocess.run( - [sys.executable, "-m", "codeclone.cli", "--help"], + [sys.executable, "-m", "codeclone.main", "--help"], capture_output=True, text=True, env=env, ) assert result.returncode == 0 + + +def test_main_module_guard_runpy(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delitem(sys.modules, "codeclone.main", raising=False) + monkeypatch.setattr(sys, "argv", ["codeclone", "--help"]) + with pytest.raises(SystemExit) as exc: + runpy.run_module("codeclone.main", run_name="__main__") + assert exc.value.code == 0 diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 5cb23f9..60a7250 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -18,9 +18,9 @@ import pytest from codeclone import __version__ as CODECLONE_VERSION -from codeclone import mcp_server from codeclone.contracts import REPORT_SCHEMA_VERSION -from codeclone.mcp_server import MCPDependencyError, build_mcp_server +from codeclone.surfaces.mcp import server as mcp_server +from codeclone.surfaces.mcp.server import MCPDependencyError, build_mcp_server from tests._mcp_fixtures import write_quality_fixture as _write_shared_quality_fixture diff --git a/tests/test_cli_main_guard_runpy.py b/tests/test_mcp_server_main_guard_runpy.py similarity index 50% rename from tests/test_cli_main_guard_runpy.py rename to tests/test_mcp_server_main_guard_runpy.py index 9685a8a..3649a73 100644 --- a/tests/test_cli_main_guard_runpy.py +++ b/tests/test_mcp_server_main_guard_runpy.py @@ -10,9 +10,10 @@ import pytest -def test_cli_main_guard_runpy(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.delitem(sys.modules, "codeclone.cli", raising=False) - monkeypatch.setattr(sys, "argv", ["codeclone", "--help"]) +def test_mcp_server_main_guard_runpy(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delitem(sys.modules, "codeclone.surfaces.mcp", raising=False) + monkeypatch.delitem(sys.modules, "codeclone.surfaces.mcp.__main__", raising=False) + monkeypatch.setattr(sys, "argv", ["codeclone-mcp", "--help"]) with pytest.raises(SystemExit) as exc: - runpy.run_module("codeclone.cli", run_name="__main__") + runpy.run_module("codeclone.surfaces.mcp", run_name="__main__") assert exc.value.code == 0 diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index d497c94..96bbd8b 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -18,13 +18,14 @@ import pytest -from codeclone import mcp_service as mcp_service_mod -from codeclone._cli_config import ConfigValidationError from codeclone.baseline import Baseline, current_python_tag from codeclone.cache import Cache +from codeclone.config import ConfigValidationError from codeclone.contracts import REPORT_SCHEMA_VERSION -from codeclone.mcp_service import ( - CodeCloneMCPService, +from codeclone.models import MetricsDiff +from codeclone.surfaces.mcp import session as mcp_service_mod +from codeclone.surfaces.mcp.service import CodeCloneMCPService +from codeclone.surfaces.mcp.session import ( DetailLevel, MCPAnalysisRequest, MCPFindingNotFoundError, @@ -35,7 +36,6 @@ MCPServiceContractError, MCPServiceError, ) -from codeclone.models import MetricsDiff from tests._mcp_fixtures import write_quality_fixture as _write_shared_quality_fixture @@ -441,10 +441,10 @@ def test_mcp_service_help_validates_topic_and_detail() -> None: service = CodeCloneMCPService(history_limit=4) with pytest.raises(MCPServiceContractError, match="Invalid value for topic"): - service.get_help(topic="gates") # type: ignore[arg-type] + service.get_help(topic="gates") with pytest.raises(MCPServiceContractError, match="Invalid value for detail"): - service.get_help(topic="baseline", detail="full") # type: ignore[arg-type] + service.get_help(topic="baseline", detail="full") def test_mcp_service_summary_inventory_is_compact_and_report_inventory_stays_canonical( @@ -1242,7 +1242,7 @@ def test_mcp_service_reports_contract_errors_for_resources_and_findings( assert overview["run_id"] == run_id with pytest.raises(MCPServiceContractError): - service.get_report_section(section=cast("object", "unknown")) # type: ignore[arg-type] + service.get_report_section(section=cast("object", "unknown")) with pytest.raises(MCPFindingNotFoundError): service.get_finding(run_id=run_id, finding_id="missing") with pytest.raises(MCPServiceContractError): @@ -1699,7 +1699,10 @@ def test_mcp_service_short_finding_ids_remain_unique_for_overlapping_clones( def test_mcp_service_reports_missing_json_artifact(tmp_path: Path) -> None: _write_clone_fixture(tmp_path) service = CodeCloneMCPService(history_limit=4) - service_module = cast(Any, importlib.import_module("codeclone.mcp_service")) + service_module = cast( + Any, + importlib.import_module("codeclone.surfaces.mcp.session"), + ) original_report = service_module.report def _fake_report(**kwargs: Any) -> object: @@ -1713,7 +1716,7 @@ def _fake_report(**kwargs: Any) -> object: ) monkeypatch = pytest.MonkeyPatch() - monkeypatch.setattr("codeclone.mcp_service.report", _fake_report) + monkeypatch.setattr("codeclone.surfaces.mcp.session.report", _fake_report) try: with pytest.raises(MCPServiceError): service.analyze_repository( @@ -2301,20 +2304,20 @@ def test_mcp_service_additional_projection_and_error_branches( ) ).startswith("design:coupling:") - original_service_get = service.get_finding + original_session_get = service.session.get_finding original_runs_get = service._runs.get monkeypatch.setattr( - service, + service.session, "get_finding", lambda **kwargs: {"id": "no-remediation"}, ) monkeypatch.setattr(service._runs, "get", lambda run_id=None: record) with pytest.raises(MCPFindingNotFoundError): service.get_remediation(finding_id="no-remediation", run_id=run_id) - monkeypatch.setattr(service, "get_finding", original_service_get) + monkeypatch.setattr(service.session, "get_finding", original_session_get) monkeypatch.setattr(service._runs, "get", original_runs_get) - original_get_finding = service.get_finding + original_get_finding = service.session.get_finding def _patched_get_finding( *, @@ -2330,7 +2333,7 @@ def _patched_get_finding( detail_level=detail_level, ) - monkeypatch.setattr(service, "get_finding", _patched_get_finding) + monkeypatch.setattr(service.session, "get_finding", _patched_get_finding) service._review_state[record.run_id] = OrderedDict([("missing", None)]) reviewed_items = service.list_reviewed_findings(run_id=run_id) assert reviewed_items["reviewed_count"] == 0 @@ -2859,7 +2862,11 @@ def test_mcp_service_metrics_diff_warning_and_projection_branches( max_size_bytes=1024 * 1024, ) cache_with_warning.load_warning = "cache warning" - monkeypatch.setattr(service, "_build_cache", lambda **kwargs: cache_with_warning) + monkeypatch.setattr( + service.session, + "_build_cache", + lambda **kwargs: cache_with_warning, + ) summary = service.analyze_repository( MCPAnalysisRequest( @@ -3324,17 +3331,17 @@ def test_mcp_service_short_id_and_comparison_helper_branches( collision_service = CodeCloneMCPService(history_limit=4) monkeypatch.setattr( - collision_service, + collision_service.session, "_base_findings", lambda _record: [{"id": "clone:block:one"}, {"id": "clone:block:two"}], ) monkeypatch.setattr( - collision_service, + collision_service.session, "_base_short_finding_id", lambda _cid: "blk:dup|x1", ) monkeypatch.setattr( - collision_service, + collision_service.session, "_disambiguated_short_finding_ids", lambda _ids: { "clone:block:one": "blk:resolved1|x1", @@ -3454,12 +3461,12 @@ def test_mcp_service_payload_and_resolution_helper_fallbacks( missing_record = _dummy_run_record(tmp_path, "missing-finding") service._runs.register(missing_record) monkeypatch.setattr( - service, + service.session, "_resolve_canonical_finding_id", lambda _record, _finding_id: "design:cohesion:pkg.mod:Runner", ) monkeypatch.setattr( - service, + service.session, "_base_findings", lambda _record: [{"id": "design:cohesion:pkg.mod:Other"}], ) @@ -3469,7 +3476,7 @@ def test_mcp_service_payload_and_resolution_helper_fallbacks( ) monkeypatch.setattr( - service, + service.session, "get_finding", lambda **_kwargs: {"id": "design:cohesion:pkg.mod:Runner"}, ) @@ -3576,7 +3583,7 @@ def test_mcp_service_payload_and_resolution_helper_fallbacks( metrics_diff=None, ) monkeypatch.setattr( - service, + service.session, "_resolve_canonical_finding_id", lambda _record, _finding_id: (_ for _ in ()).throw( MCPFindingNotFoundError("missing") @@ -3644,7 +3651,7 @@ def test_mcp_service_summary_and_metrics_detail_helper_fallbacks( record = _dummy_run_record(tmp_path, "summary-helper") monkeypatch.setattr( - service, + service.session, "_base_findings", lambda _record: [ { diff --git a/tests/test_mcp_tool_schema_snapshot.py b/tests/test_mcp_tool_schema_snapshot.py new file mode 100644 index 0000000..2fc1787 --- /dev/null +++ b/tests/test_mcp_tool_schema_snapshot.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +import asyncio +from typing import cast + +import pytest + +from codeclone.surfaces.mcp.server import build_mcp_server +from tests._contract_snapshots import load_json_snapshot + + +def test_mcp_tool_schema_snapshot() -> None: + pytest.importorskip("mcp.server.fastmcp") + + server = build_mcp_server(history_limit=4) + tools = asyncio.run(server.list_tools()) + snapshot = [ + { + "name": tool.name, + "input_schema": tool.inputSchema, + } + for tool in sorted(tools, key=lambda item: item.name) + ] + expected = cast( + "list[dict[str, object]]", + load_json_snapshot("mcp_tool_schemas.json"), + ) + assert snapshot == expected diff --git a/tests/test_metrics_baseline.py b/tests/test_metrics_baseline.py index 4ad8781..a71544e 100644 --- a/tests/test_metrics_baseline.py +++ b/tests/test_metrics_baseline.py @@ -15,9 +15,14 @@ import pytest import codeclone.baseline as baseline_mod -import codeclone.metrics_baseline as mb_mod -from codeclone.errors import BaselineValidationError -from codeclone.metrics_baseline import MetricsBaseline, MetricsBaselineStatus +import codeclone.baseline._metrics_baseline_payload as mb_payload +import codeclone.baseline._metrics_baseline_validation as mb_validate +import codeclone.baseline.metrics_baseline as mb_mod +from codeclone.baseline.metrics_baseline import ( + MetricsBaseline, + MetricsBaselineStatus, +) +from codeclone.contracts.errors import BaselineValidationError from codeclone.models import ( ApiParamSpec, ApiSurfaceSnapshot, @@ -217,7 +222,7 @@ def _valid_payload( schema_version: str = mb_mod.METRICS_BASELINE_SCHEMA_VERSION, python_tag: str | None = None, ) -> dict[str, object]: - return mb_mod._build_payload( + return mb_payload._build_payload( snapshot=_snapshot(), schema_version=schema_version, python_tag=python_tag or mb_mod.current_python_tag(), @@ -238,7 +243,7 @@ def _ready_metrics_baseline( ) -> MetricsBaseline: baseline = MetricsBaseline(path) baseline.snapshot = _snapshot() - baseline.payload_sha256 = mb_mod._compute_payload_sha256(_snapshot()) + baseline.payload_sha256 = mb_payload._compute_payload_sha256(_snapshot()) baseline.has_coverage_adoption_snapshot = has_adoption baseline.generator_name = generator_name baseline.schema_version = schema_version @@ -444,14 +449,14 @@ def test_api_surface_payload_hashes_are_order_independent() -> None: ) ) - assert mb_mod._compute_api_surface_payload_sha256( + assert mb_payload._compute_api_surface_payload_sha256( reordered - ) == mb_mod._compute_api_surface_payload_sha256( + ) == mb_payload._compute_api_surface_payload_sha256( _api_surface_snapshot(include_added=True) ) - assert mb_mod._compute_legacy_api_surface_payload_sha256( + assert mb_payload._compute_legacy_api_surface_payload_sha256( reordered - ) == mb_mod._compute_legacy_api_surface_payload_sha256( + ) == mb_payload._compute_legacy_api_surface_payload_sha256( _api_surface_snapshot(include_added=True) ) @@ -481,10 +486,10 @@ def _boom_replace(src: str | Path, dst: str | Path) -> None: temp_holder["path"] = Path(src) raise OSError("replace failed") - monkeypatch.setattr("codeclone._json_io.os.replace", _boom_replace) + monkeypatch.setattr("codeclone.utils.json_io.os.replace", _boom_replace) with pytest.raises(OSError, match="replace failed"): - mb_mod._atomic_write_json(path, payload) + mb_validate._atomic_write_json(path, payload) assert temp_holder["path"].exists() is False @@ -678,7 +683,7 @@ def test_metrics_baseline_load_tracks_adoption_snapshot_presence( metrics.pop("typing_return_permille") metrics.pop("docstring_permille") metrics.pop("typing_any_count") - meta["payload_sha256"] = mb_mod._compute_payload_sha256( + meta["payload_sha256"] = mb_payload._compute_payload_sha256( _snapshot(), include_adoption=False, ) @@ -767,7 +772,7 @@ def test_metrics_baseline_load_accepts_legacy_api_surface_qualnames( path = tmp_path / "metrics-baseline.json" snapshot = _snapshot() api_surface_snapshot = _api_surface_snapshot(include_added=True) - payload = mb_mod._build_payload( + payload = mb_payload._build_payload( snapshot=snapshot, schema_version=mb_mod.METRICS_BASELINE_SCHEMA_VERSION, python_tag=mb_mod.current_python_tag(), @@ -784,7 +789,7 @@ def test_metrics_baseline_load_accepts_legacy_api_surface_qualnames( symbol["qualname"] = f"pkg.mod:{local_name}" meta = cast(dict[str, object], payload["meta"]) meta["api_surface_payload_sha256"] = ( - mb_mod._compute_legacy_api_surface_payload_sha256( + mb_payload._compute_legacy_api_surface_payload_sha256( api_surface_snapshot, root=path.parent, ) @@ -808,7 +813,7 @@ def test_metrics_baseline_load_accepts_absolute_api_surface_filepaths( path, absolute_filepath = _repo_metrics_baseline_path_and_abs_filepath(tmp_path) snapshot = _snapshot() api_surface_snapshot = _api_surface_snapshot_with_filepath(absolute_filepath) - payload = mb_mod._build_payload( + payload = mb_payload._build_payload( snapshot=snapshot, schema_version=mb_mod.METRICS_BASELINE_SCHEMA_VERSION, python_tag=mb_mod.current_python_tag(), @@ -822,7 +827,7 @@ def test_metrics_baseline_load_accepts_absolute_api_surface_filepaths( modules = cast(list[dict[str, object]], api_surface["modules"]) modules[0]["filepath"] = absolute_filepath meta = cast(dict[str, object], payload["meta"]) - meta["api_surface_payload_sha256"] = mb_mod._compute_api_surface_payload_sha256( + meta["api_surface_payload_sha256"] = mb_payload._compute_api_surface_payload_sha256( api_surface_snapshot ) baseline = _load_written_metrics_baseline(path, payload) @@ -836,33 +841,40 @@ def test_metrics_baseline_json_and_structure_validators(tmp_path: Path) -> None: path = tmp_path / "metrics-baseline.json" path.write_text("[]", "utf-8") with pytest.raises(BaselineValidationError, match="must be an object"): - mb_mod._load_json_object(path) + mb_validate._load_json_object(path) - mb_mod._validate_top_level_structure(_valid_payload(), path=path) + mb_validate._validate_top_level_structure(_valid_payload(), path=path) with pytest.raises(BaselineValidationError, match="unexpected top-level keys"): - mb_mod._validate_top_level_structure( + mb_validate._validate_top_level_structure( {**_valid_payload(), "extra": 1}, path=path, ) with pytest.raises(BaselineValidationError, match="missing required fields"): - mb_mod._validate_required_keys( + mb_validate._validate_required_keys( {"only": "one"}, frozenset({"required"}), path=path ) with pytest.raises(BaselineValidationError, match="unexpected fields"): - mb_mod._validate_exact_keys({"a": 1, "b": 2}, frozenset({"a"}), path=path) + mb_validate._validate_exact_keys( + {"a": 1, "b": 2}, + frozenset({"a"}), + path=path, + ) def test_metrics_baseline_field_parsers_and_cycle_parser(tmp_path: Path) -> None: path = tmp_path / "metrics-baseline.json" with pytest.raises(BaselineValidationError, match="'name' must be str"): - mb_mod._require_str({"name": 1}, "name", path=path) + mb_validate._require_str({"name": 1}, "name", path=path) assert ( - mb_mod._extract_metrics_payload_sha256({"payload_sha256": "x"}, path=path) + mb_validate._extract_metrics_payload_sha256( + {"payload_sha256": "x"}, + path=path, + ) == "x" ) assert ( - mb_mod._extract_metrics_payload_sha256( + mb_validate._extract_metrics_payload_sha256( {"metrics_payload_sha256": "y", "payload_sha256": "x"}, path=path, ) @@ -870,23 +882,23 @@ def test_metrics_baseline_field_parsers_and_cycle_parser(tmp_path: Path) -> None ) with pytest.raises(BaselineValidationError, match="must be int"): - mb_mod._require_int({"value": True}, "value", path=path) + mb_validate._require_int({"value": True}, "value", path=path) with pytest.raises(BaselineValidationError, match="must be int"): - mb_mod._require_int({"value": "1"}, "value", path=path) + mb_validate._require_int({"value": "1"}, "value", path=path) with pytest.raises(BaselineValidationError, match="must be list\\[str\\]"): - mb_mod._require_str_list({"items": "bad"}, "items", path=path) + mb_validate._require_str_list({"items": "bad"}, "items", path=path) with pytest.raises(BaselineValidationError, match="must be list\\[str\\]"): - mb_mod._require_str_list({"items": [1]}, "items", path=path) + mb_validate._require_str_list({"items": [1]}, "items", path=path) with pytest.raises(BaselineValidationError, match="must be list"): - mb_mod._parse_cycles( + mb_validate._parse_cycles( {"dependency_cycles": "bad"}, key="dependency_cycles", path=path ) with pytest.raises( BaselineValidationError, match="cycle item must be list\\[str\\]" ): - mb_mod._parse_cycles( + mb_validate._parse_cycles( {"dependency_cycles": ["bad"]}, key="dependency_cycles", path=path, @@ -894,12 +906,12 @@ def test_metrics_baseline_field_parsers_and_cycle_parser(tmp_path: Path) -> None with pytest.raises( BaselineValidationError, match="cycle item must be list\\[str\\]" ): - mb_mod._parse_cycles( + mb_validate._parse_cycles( {"dependency_cycles": [[1]]}, key="dependency_cycles", path=path, ) - assert mb_mod._parse_cycles( + assert mb_validate._parse_cycles( {"dependency_cycles": [["b", "a"], ["a", "b"], ["b", "a"]]}, key="dependency_cycles", path=path, @@ -908,31 +920,31 @@ def test_metrics_baseline_field_parsers_and_cycle_parser(tmp_path: Path) -> None def test_metrics_baseline_parse_generator_variants(tmp_path: Path) -> None: path = tmp_path / "metrics-baseline.json" - assert mb_mod._parse_generator({"generator": "codeclone"}, path=path) == ( + assert mb_validate._parse_generator({"generator": "codeclone"}, path=path) == ( "codeclone", None, ) - assert mb_mod._parse_generator( + assert mb_validate._parse_generator( {"generator": "codeclone", "codeclone_version": "1.0.0"}, path=path, ) == ("codeclone", "1.0.0") with pytest.raises(BaselineValidationError, match="generator_version must be str"): - mb_mod._parse_generator( + mb_validate._parse_generator( {"generator": "codeclone", "generator_version": 1}, path=path, ) - assert mb_mod._parse_generator( + assert mb_validate._parse_generator( {"generator": {"name": "codeclone", "version": "2.0.0"}}, path=path, ) == ("codeclone", "2.0.0") with pytest.raises(BaselineValidationError, match="unexpected generator keys"): - mb_mod._parse_generator( + mb_validate._parse_generator( {"generator": {"name": "codeclone", "extra": 1}}, path=path, ) with pytest.raises(BaselineValidationError, match=r"generator\.name must be str"): - mb_mod._parse_generator( + mb_validate._parse_generator( {"generator": {"name": 1, "version": "2.0.0"}}, path=path, ) @@ -940,14 +952,14 @@ def test_metrics_baseline_parse_generator_variants(tmp_path: Path) -> None: BaselineValidationError, match=r"generator\.version must be str", ): - mb_mod._parse_generator( + mb_validate._parse_generator( {"generator": {"name": "codeclone", "version": 2}}, path=path, ) with pytest.raises( BaselineValidationError, match="generator must be object or str" ): - mb_mod._parse_generator({"generator": 1}, path=path) + mb_validate._parse_generator({"generator": 1}, path=path) def test_metrics_baseline_embedded_clone_payload_and_schema_resolution( @@ -967,17 +979,17 @@ def test_metrics_baseline_embedded_clone_payload_and_schema_resolution( "blocks": ["|".join(["a" * 40, "b" * 40, "c" * 40, "d" * 40])], }, } - meta_obj, clones_obj = mb_mod._require_embedded_clone_baseline_payload( + meta_obj, clones_obj = mb_validate._require_embedded_clone_baseline_payload( valid_embedded, path=path ) assert "schema_version" in meta_obj assert "functions" in clones_obj assert ( - mb_mod._resolve_embedded_schema_version(meta_obj, path=path) + mb_validate._resolve_embedded_schema_version(meta_obj, path=path) == mb_mod.BASELINE_SCHEMA_VERSION ) assert ( - mb_mod._resolve_embedded_schema_version( + mb_validate._resolve_embedded_schema_version( {**meta_obj, "schema_version": "2.1"}, path=path, ) @@ -985,12 +997,12 @@ def test_metrics_baseline_embedded_clone_payload_and_schema_resolution( ) with pytest.raises(BaselineValidationError, match="'meta' must be object"): - mb_mod._require_embedded_clone_baseline_payload( + mb_validate._require_embedded_clone_baseline_payload( {"meta": [], "clones": {}}, path=path, ) with pytest.raises(BaselineValidationError, match="'clones' must be object"): - mb_mod._require_embedded_clone_baseline_payload( + mb_validate._require_embedded_clone_baseline_payload( {"meta": {}, "clones": []}, path=path, ) @@ -998,7 +1010,7 @@ def test_metrics_baseline_embedded_clone_payload_and_schema_resolution( BaselineValidationError, match=r"'clones\.functions' must be list\[str\]", ): - mb_mod._require_embedded_clone_baseline_payload( + mb_validate._require_embedded_clone_baseline_payload( { "meta": valid_embedded["meta"], "clones": {"functions": [1], "blocks": []}, @@ -1009,7 +1021,7 @@ def test_metrics_baseline_embedded_clone_payload_and_schema_resolution( BaselineValidationError, match=r"'clones\.blocks' must be list\[str\]", ): - mb_mod._require_embedded_clone_baseline_payload( + mb_validate._require_embedded_clone_baseline_payload( { "meta": valid_embedded["meta"], "clones": {"functions": [], "blocks": [1]}, @@ -1017,7 +1029,7 @@ def test_metrics_baseline_embedded_clone_payload_and_schema_resolution( path=path, ) with pytest.raises(BaselineValidationError, match="must be semver string"): - mb_mod._resolve_embedded_schema_version( + mb_validate._resolve_embedded_schema_version( {**meta_obj, "schema_version": "broken"}, path=path, ) @@ -1025,10 +1037,10 @@ def test_metrics_baseline_embedded_clone_payload_and_schema_resolution( def test_metrics_baseline_parse_snapshot_grade_validation(tmp_path: Path) -> None: path = tmp_path / "metrics-baseline.json" - payload = mb_mod._snapshot_payload(_snapshot()) + payload = mb_payload._snapshot_payload(_snapshot()) payload["health_grade"] = "Z" with pytest.raises(BaselineValidationError, match="must be one of A/B/C/D/F"): - mb_mod._parse_snapshot(payload, path=path) + mb_validate._parse_snapshot(payload, path=path) def test_metrics_baseline_load_json_read_oserror_status( @@ -1044,4 +1056,4 @@ def _boom_read(_self: Path, _encoding: str) -> str: with pytest.raises( BaselineValidationError, match="Cannot read metrics baseline file" ): - mb_mod._load_json_object(path) + mb_validate._load_json_object(path) diff --git a/tests/test_metrics_modules.py b/tests/test_metrics_modules.py index 5c3f090..bc22d1d 100644 --- a/tests/test_metrics_modules.py +++ b/tests/test_metrics_modules.py @@ -10,7 +10,7 @@ import pytest -from codeclone.cfg_model import CFG +from codeclone.analysis.cfg_model import CFG from codeclone.metrics import ( HealthInputs, build_dep_graph, diff --git a/tests/test_metrics_registry.py b/tests/test_metrics_registry.py new file mode 100644 index 0000000..1b54841 --- /dev/null +++ b/tests/test_metrics_registry.py @@ -0,0 +1,22 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from codeclone.metrics import METRIC_FAMILIES, MetricFamily + + +def test_registered_metric_families_define_contract_metadata() -> None: + assert METRIC_FAMILIES + report_sections: set[str] = set() + for family_name, family in METRIC_FAMILIES.items(): + assert isinstance(family, MetricFamily) + assert family.name == family_name + assert callable(family.compute) + assert callable(family.aggregate) + assert family.report_section + assert family.report_section not in report_sections + report_sections.add(family.report_section) diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 4bbbd10..cff53ec 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -9,13 +9,13 @@ import pytest -import codeclone.normalize as normalize_mod -from codeclone.meta_markers import CFG_META_PREFIX -from codeclone.normalize import ( +import codeclone.analysis.normalizer as normalize_mod +from codeclone.analysis.normalizer import ( NormalizationConfig, normalized_ast_dump_from_list, stmt_hashes, ) +from codeclone.meta_markers import CFG_META_PREFIX from tests._assertions import assert_contains_all from tests._ast_helpers import fix_missing_single_function diff --git a/tests/test_options_spec_coverage.py b/tests/test_options_spec_coverage.py new file mode 100644 index 0000000..ac138ad --- /dev/null +++ b/tests/test_options_spec_coverage.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codeclone.config import ( + build_parser, + collect_explicit_cli_dests, + load_pyproject_config, + resolve_config, +) +from codeclone.config.spec import PYPROJECT_OPTIONS, TESTABLE_CLI_OPTIONS, OptionSpec + + +def _option_id(option: OptionSpec) -> str: + if option.flags: + return f"{option.dest}:{option.flags[0]}" + return f"{option.dest}:positional" + + +def _cli_sample(option: OptionSpec) -> tuple[tuple[str, ...], object]: + if option.cli_kind == "positional": + return (("sample-root",), "sample-root") + if option.cli_kind == "bool_optional": + return ((option.flags[0],), True) + if option.cli_kind == "store_true": + return ((option.flags[0],), True) + if option.cli_kind == "store_false": + return ((option.flags[0],), False) + if option.value_type is int: + return ((option.flags[0], "7"), 7) + return ((option.flags[0], "sample-value"), "sample-value") + + +def _pyproject_sample(option: OptionSpec, root_path: Path) -> tuple[str, object]: + config_spec = option.config_spec + assert config_spec is not None + expected_type = config_spec.expected_type + + if expected_type is bool: + return ("true", True) + if expected_type is int: + return ("7", 7) + if expected_type is str: + raw_value = "reports/output.json" if option.path_value else "sample-value" + expected = str(root_path / raw_value) if option.path_value else raw_value + return (f'"{raw_value}"', expected) + if expected_type is list: + return ('["./tests/fixtures/golden_*"]', ("tests/fixtures/golden_*",)) + raise AssertionError(f"Unsupported sample type for {option.pyproject_key}") + + +def test_resolve_config_prefers_explicit_cli_values() -> None: + parser = build_parser("2.0.0") + argv = ["--min-loc", "11"] + args = parser.parse_args(argv) + + resolved = resolve_config( + args=args, + config_values={"min_loc": 42, "max_cache_size_mb": 77}, + explicit_cli_dests=collect_explicit_cli_dests(parser, argv=argv), + ) + + assert resolved.values["min_loc"] == 11 + assert resolved.values["max_cache_size_mb"] == 77 + assert resolved.explicit_cli_dests == frozenset({"min_loc"}) + + +def test_pyproject_option_count_matches_declared_specs() -> None: + pyproject_keys = [option.pyproject_key for option in PYPROJECT_OPTIONS] + assert all(key is not None for key in pyproject_keys) + assert len(pyproject_keys) == len(set(pyproject_keys)) + + +@pytest.mark.parametrize("option", TESTABLE_CLI_OPTIONS, ids=_option_id) +def test_option_specs_have_cli_parsing_coverage(option: OptionSpec) -> None: + parser = build_parser("2.0.0") + argv, expected = _cli_sample(option) + args = parser.parse_args(list(argv)) + assert getattr(args, option.dest) == expected + + +@pytest.mark.parametrize( + "option", + [ + option + for option in TESTABLE_CLI_OPTIONS + if option.const is not None and option.flags + ], + ids=_option_id, +) +def test_option_specs_cover_cli_const_behaviour(option: OptionSpec) -> None: + parser = build_parser("2.0.0") + args = parser.parse_args([option.flags[0]]) + assert getattr(args, option.dest) == option.const + + +@pytest.mark.parametrize("option", PYPROJECT_OPTIONS, ids=_option_id) +def test_option_specs_have_pyproject_loading_coverage( + option: OptionSpec, + tmp_path: Path, +) -> None: + pyproject_key = option.pyproject_key + assert pyproject_key is not None + raw_value, expected = _pyproject_sample(option, tmp_path) + (tmp_path / "pyproject.toml").write_text( + f"[tool.codeclone]\n{pyproject_key} = {raw_value}\n", + encoding="utf-8", + ) + + loaded = load_pyproject_config(tmp_path) + assert loaded[pyproject_key] == expected diff --git a/tests/test_pipeline_metrics.py b/tests/test_pipeline_metrics.py index 9f31bf8..54b5412 100644 --- a/tests/test_pipeline_metrics.py +++ b/tests/test_pipeline_metrics.py @@ -18,6 +18,32 @@ ModuleApiSurfaceDict, PublicSymbolDict, ) +from codeclone.core._types import ( + _as_sorted_str_tuple, + _class_metric_sort_key, + _module_dep_sort_key, + _module_names_from_units, +) +from codeclone.core.bootstrap import _resolve_optional_runtime_path +from codeclone.core.coverage_payload import _coverage_join_rows, _coverage_join_summary +from codeclone.core.discovery_cache import ( + _api_param_spec_from_cache_dict, + _api_surface_from_cache_dict, + _cache_dict_int_fields, + _cache_dict_module_fields, + _docstring_coverage_from_cache_dict, + _public_symbol_from_cache_dict, + _typing_coverage_from_cache_dict, +) +from codeclone.core.discovery_cache import ( + load_cached_metrics_extended as _load_cached_metrics_extended, +) +from codeclone.core.metrics_payload import ( + _enrich_metrics_report_payload, + build_metrics_report_payload, +) +from codeclone.core.parallelism import _should_use_parallel +from codeclone.core.pipeline import compute_project_metrics from codeclone.metrics import build_overloaded_modules_payload from codeclone.models import ( ApiBreakingChange, @@ -37,31 +63,13 @@ PublicSymbol, UnitCoverageFact, ) -from codeclone.pipeline import ( +from codeclone.report.gates import ( MetricGateConfig, - _api_param_spec_from_cache_dict, - _api_surface_from_cache_dict, - _as_int, - _as_sorted_str_tuple, - _as_str, - _cache_dict_int_fields, - _cache_dict_module_fields, - _class_metric_sort_key, - _coverage_join_rows, - _coverage_join_summary, - _docstring_coverage_from_cache_dict, - _enrich_metrics_report_payload, - _load_cached_metrics_extended, - _module_dep_sort_key, - _module_names_from_units, - _public_symbol_from_cache_dict, - _resolve_optional_runtime_path, - _should_use_parallel, - _typing_coverage_from_cache_dict, - build_metrics_report_payload, - compute_project_metrics, - metric_gate_reasons, + gate_state_from_project_metrics, + metric_gate_reasons_for_state, ) +from codeclone.utils.coerce import as_int as _as_int +from codeclone.utils.coerce import as_str as _as_str def _project_metrics(*, dead_confidence: str = "high") -> ProjectMetrics: @@ -155,6 +163,21 @@ def _project_metrics_with_adoption_and_api() -> ProjectMetrics: ) +def _metric_gate_reasons_from_metrics( + *, + project_metrics: ProjectMetrics, + coverage_join: CoverageJoinResult | None, + metrics_diff: MetricsDiff | None, + config: MetricGateConfig, +) -> tuple[str, ...]: + state = gate_state_from_project_metrics( + project_metrics=project_metrics, + coverage_join=coverage_join, + metrics_diff=metrics_diff, + ) + return metric_gate_reasons_for_state(state=state, config=config) + + def test_pipeline_basic_helpers_and_sort_keys() -> None: assert _as_int(True) == 1 assert _as_int("15") == 15 @@ -722,7 +745,7 @@ def test_load_cached_metrics_extended_decodes_adoption_and_api_surface() -> None def test_metric_gate_reasons_collects_all_enabled_reasons() -> None: - reasons = metric_gate_reasons( + reasons = _metric_gate_reasons_from_metrics( project_metrics=_project_metrics(dead_confidence="high"), coverage_join=None, metrics_diff=MetricsDiff( @@ -865,7 +888,7 @@ def test_enrich_metrics_report_payload_hides_api_diff_without_api_baseline() -> def test_metric_gate_reasons_skip_disabled_and_non_critical_paths() -> None: - reasons = metric_gate_reasons( + reasons = _metric_gate_reasons_from_metrics( project_metrics=_project_metrics(dead_confidence="medium"), coverage_join=None, metrics_diff=None, @@ -883,7 +906,7 @@ def test_metric_gate_reasons_skip_disabled_and_non_critical_paths() -> None: def test_metric_gate_reasons_partial_new_metrics_paths() -> None: - reasons = metric_gate_reasons( + reasons = _metric_gate_reasons_from_metrics( project_metrics=_project_metrics(dead_confidence="medium"), coverage_join=None, metrics_diff=MetricsDiff( @@ -910,7 +933,7 @@ def test_metric_gate_reasons_partial_new_metrics_paths() -> None: def test_metric_gate_reasons_new_metrics_optional_buckets_empty() -> None: - reasons = metric_gate_reasons( + reasons = _metric_gate_reasons_from_metrics( project_metrics=_project_metrics(dead_confidence="medium"), coverage_join=None, metrics_diff=MetricsDiff( @@ -937,7 +960,7 @@ def test_metric_gate_reasons_new_metrics_optional_buckets_empty() -> None: def test_metric_gate_reasons_include_adoption_and_api_surface_contracts() -> None: - reasons = metric_gate_reasons( + reasons = _metric_gate_reasons_from_metrics( project_metrics=_project_metrics(dead_confidence="medium"), coverage_join=None, metrics_diff=MetricsDiff( @@ -1066,7 +1089,7 @@ def test_coverage_join_summary_rows_and_gate_reasons() -> None: == [] ) - reasons = metric_gate_reasons( + reasons = _metric_gate_reasons_from_metrics( project_metrics=_project_metrics(dead_confidence="medium"), coverage_join=coverage_join, metrics_diff=None, @@ -1084,7 +1107,7 @@ def test_coverage_join_summary_rows_and_gate_reasons() -> None: ) assert reasons == ("Coverage hotspots detected: hotspots=1, threshold=50%.",) - invalid_reasons = metric_gate_reasons( + invalid_reasons = _metric_gate_reasons_from_metrics( project_metrics=_project_metrics(dead_confidence="medium"), coverage_join=CoverageJoinResult( coverage_xml="/repo/broken.xml", diff --git a/tests/test_pipeline_process.py b/tests/test_pipeline_process.py index cfe3ad5..d5a9c99 100644 --- a/tests/test_pipeline_process.py +++ b/tests/test_pipeline_process.py @@ -14,10 +14,14 @@ import pytest -import codeclone.pipeline as pipeline +import codeclone.core as pipeline +import codeclone.core.parallelism as core_parallelism +import codeclone.core.pipeline as core_pipeline +import codeclone.core.worker as core_worker +from codeclone.analysis.normalizer import NormalizationConfig from codeclone.cache import Cache, CacheEntry, SourceStatsDict, file_stat_signature +from codeclone.core.discovery_cache import usable_cached_source_stats from codeclone.models import HealthScore, ProjectMetrics -from codeclone.normalize import NormalizationConfig class _FailExec: @@ -217,9 +221,9 @@ def test_process_parallel_fallback_without_callback_uses_sequential( ) -> None: boot, discovery, cache, filepaths = _build_large_batch_case(tmp_path) - monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _FailExec) + monkeypatch.setattr(core_parallelism, "ProcessPoolExecutor", _FailExec) monkeypatch.setattr( - pipeline, + core_worker, "process_file", _stub_process_file( expected_root=str(tmp_path), @@ -249,9 +253,9 @@ def test_process_small_batch_skips_parallel_executor( cache = Cache(tmp_path / "cache.json", root=tmp_path) callbacks: list[str] = [] - monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _UnexpectedExec) + monkeypatch.setattr(core_parallelism, "ProcessPoolExecutor", _UnexpectedExec) monkeypatch.setattr( - pipeline, + core_worker, "process_file", _stub_process_file(expected_root=str(tmp_path)), ) @@ -273,9 +277,9 @@ def test_process_parallel_failure_large_batch_invokes_fallback_callback( boot, discovery, cache, filepaths = _build_large_batch_case(tmp_path) callbacks: list[str] = [] - monkeypatch.setattr(pipeline, "ProcessPoolExecutor", _FailExec) + monkeypatch.setattr(core_parallelism, "ProcessPoolExecutor", _FailExec) monkeypatch.setattr( - pipeline, + core_worker, "process_file", _stub_process_file(expected_root=str(tmp_path)), ) @@ -415,13 +419,13 @@ def test_usable_cached_source_stats_respects_required_sections() -> None: "class_names": [], "structural_findings": [], } - assert pipeline._usable_cached_source_stats( + assert usable_cached_source_stats( complete_entry, skip_metrics=False, collect_structural_findings=True, ) == (5, 2, 1, 1) assert ( - pipeline._usable_cached_source_stats( + usable_cached_source_stats( base_entry, skip_metrics=False, collect_structural_findings=False, @@ -429,7 +433,7 @@ def test_usable_cached_source_stats_respects_required_sections() -> None: is None ) assert ( - pipeline._usable_cached_source_stats( + usable_cached_source_stats( { **base_entry, "class_metrics": [], @@ -541,20 +545,20 @@ def test_analyze_skips_suppressed_dead_code_scan_when_dead_code_is_disabled( ) monkeypatch.setattr( - pipeline, + core_pipeline, "compute_project_metrics", lambda **kwargs: (project_metrics, None, ()), ) monkeypatch.setattr( - pipeline, + core_pipeline, "find_suppressed_unused", lambda **kwargs: (_ for _ in ()).throw( AssertionError("should not compute suppressed dead-code items") ), ) - monkeypatch.setattr(pipeline, "compute_suggestions", lambda **kwargs: ()) + monkeypatch.setattr(core_pipeline, "compute_suggestions", lambda **kwargs: ()) monkeypatch.setattr( - pipeline, + core_pipeline, "build_metrics_report_payload", lambda **kwargs: {"health": {"score": 100, "grade": "A", "dimensions": {}}}, ) diff --git a/tests/test_public_api_surface.py b/tests/test_public_api_surface.py new file mode 100644 index 0000000..4e052df --- /dev/null +++ b/tests/test_public_api_surface.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import inspect +from typing import cast + +import codeclone +import codeclone.main as main_module +from codeclone.surfaces.mcp.service import CodeCloneMCPService +from tests._contract_snapshots import load_json_snapshot + + +def test_public_api_surface_snapshot() -> None: + snapshot = { + "codeclone_exports": list(getattr(codeclone, "__all__", ())), + "main_exports": list(getattr(main_module, "__all__", ())), + "main_signature": str(inspect.signature(main_module.main)), + "mcp_service_public_methods": [ + { + "name": name, + "signature": str(inspect.signature(member)), + } + for name, member in inspect.getmembers( + CodeCloneMCPService, + predicate=inspect.isfunction, + ) + if not name.startswith("_") + ], + } + expected = cast( + "dict[str, object]", + load_json_snapshot("public_api_surface.json"), + ) + assert snapshot == expected diff --git a/tests/test_renderer_isolation.py b/tests/test_renderer_isolation.py new file mode 100644 index 0000000..eb72a49 --- /dev/null +++ b/tests/test_renderer_isolation.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from pathlib import Path + +from tests._import_graph import _iter_local_imports, _module_name_from_path + + +def test_report_renderers_do_not_import_pipeline_layers() -> None: + root = Path(__file__).resolve().parents[1] + renderer_root = root / "codeclone" / "report" / "renderers" + forbidden_prefixes = ( + "codeclone.core", + "codeclone.analysis", + "codeclone.metrics", + "codeclone.findings", + ) + violations: list[str] = [] + + for path in sorted(renderer_root.glob("*.py")): + module_name = _module_name_from_path(path.relative_to(root)) + violations.extend( + [ + f"{module_name} -> {import_name}" + for import_name in _iter_local_imports( + module_name, + path.read_text("utf-8"), + ) + if any( + import_name == prefix or import_name.startswith(prefix + ".") + for prefix in forbidden_prefixes + ) + ] + ) + + assert violations == [] diff --git a/tests/test_report.py b/tests/test_report.py index 28132ba..a960a49 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -16,11 +16,6 @@ import codeclone.report.merge as merge_mod import codeclone.report.overview as overview_mod import codeclone.report.serialize as serialize_mod -from codeclone._html_report._sections._structural import ( - _finding_why_template_html, - build_structural_findings_html_panel, -) -from codeclone._html_snippets import _FileCache from codeclone.contracts import CACHE_VERSION, REPORT_SCHEMA_VERSION from codeclone.models import ( StructuralFindingGroup, @@ -39,6 +34,11 @@ to_markdown_report, to_sarif_report, ) +from codeclone.report.html.sections._structural import ( + _finding_why_template_html, + build_structural_findings_html_panel, +) +from codeclone.report.html.widgets.snippets import _FileCache from codeclone.report.json_contract import build_report_document from codeclone.report.overview import materialize_report_overview from codeclone.report.segments import ( diff --git a/tests/test_report_branch_invariants.py b/tests/test_report_branch_invariants.py index 558e1be..13c6009 100644 --- a/tests/test_report_branch_invariants.py +++ b/tests/test_report_branch_invariants.py @@ -6,12 +6,6 @@ from __future__ import annotations -from codeclone._html_report._sections._structural import ( - _finding_matters_html, - _finding_why_template_html, - _occurrences_table_html, -) -from codeclone._html_snippets import _FileCache from codeclone.models import StructuralFindingGroup, StructuralFindingOccurrence from codeclone.report.explain_contract import ( BLOCK_HINT_ASSERT_ONLY, @@ -21,6 +15,12 @@ _dedupe_items, _finding_scope_text, ) +from codeclone.report.html.sections._structural import ( + _finding_matters_html, + _finding_why_template_html, + _occurrences_table_html, +) +from codeclone.report.html.widgets.snippets import _FileCache from codeclone.report.markdown import ( _append_findings_section, _append_metric_items, diff --git a/tests/test_report_contract_coverage.py b/tests/test_report_contract_coverage.py index 47b722e..fe86b4f 100644 --- a/tests/test_report_contract_coverage.py +++ b/tests/test_report_contract_coverage.py @@ -14,7 +14,6 @@ import pytest import codeclone.report.json_contract as json_contract_mod -from codeclone import _coerce from codeclone.contracts import REPORT_SCHEMA_VERSION from codeclone.models import ( ReportLocation, @@ -103,6 +102,7 @@ _structural_kind_label, render_text_report_document, ) +from codeclone.utils import coerce as _coerce from tests._assertions import assert_contains_all, assert_mapping_entries diff --git a/tests/test_scanner_extra.py b/tests/test_scanner_extra.py index a7a77ca..e00bff7 100644 --- a/tests/test_scanner_extra.py +++ b/tests/test_scanner_extra.py @@ -12,7 +12,7 @@ import pytest import codeclone.scanner as scanner -from codeclone.errors import ValidationError +from codeclone.contracts.errors import ValidationError from codeclone.scanner import iter_py_files, module_name_from_path diff --git a/tests/test_security.py b/tests/test_security.py index 599c092..edc73ad 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -11,11 +11,12 @@ import pytest -from codeclone.cli import MAX_FILE_SIZE, process_file -from codeclone.errors import ValidationError -from codeclone.html_report import build_html_report -from codeclone.normalize import NormalizationConfig +from codeclone.analysis.normalizer import NormalizationConfig +from codeclone.contracts.errors import ValidationError +from codeclone.core._types import MAX_FILE_SIZE +from codeclone.core.worker import process_file from codeclone.report import build_block_group_facts +from codeclone.report.html import build_html_report from codeclone.scanner import iter_py_files diff --git a/tests/test_segments.py b/tests/test_segments.py index 6253233..fa821b4 100644 --- a/tests/test_segments.py +++ b/tests/test_segments.py @@ -6,8 +6,8 @@ import ast +from codeclone.analysis.normalizer import NormalizationConfig from codeclone.blocks import extract_segments -from codeclone.normalize import NormalizationConfig def test_extract_segments_windows() -> None: diff --git a/tests/test_structural_findings.py b/tests/test_structural_findings.py index 1837a0d..942792c 100644 --- a/tests/test_structural_findings.py +++ b/tests/test_structural_findings.py @@ -14,13 +14,13 @@ import pytest -import codeclone.structural_findings as sf -from codeclone import _coerce -from codeclone.models import StructuralFindingGroup, StructuralFindingOccurrence -from codeclone.structural_findings import ( +import codeclone.findings.structural.detectors as sf +from codeclone.findings.structural.detectors import ( build_clone_cohort_structural_findings, scan_function_structure, ) +from codeclone.models import StructuralFindingGroup, StructuralFindingOccurrence +from codeclone.utils import coerce as _coerce # --------------------------------------------------------------------------- # Helpers diff --git a/tests/test_target_module_map_imports.py b/tests/test_target_module_map_imports.py new file mode 100644 index 0000000..fe628a3 --- /dev/null +++ b/tests/test_target_module_map_imports.py @@ -0,0 +1,100 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import importlib.util + +from codeclone import baseline as baseline_pkg +from codeclone.analysis import _module_walk as analysis_module_walk +from codeclone.analysis import parser as analysis_parser +from codeclone.analysis import units as analysis_units +from codeclone.analysis.cfg import CFGBuilder +from codeclone.analysis.cfg import CFGBuilder as AnalysisCFGBuilder +from codeclone.analysis.cfg_model import CFG as AnalysisCFG +from codeclone.analysis.cfg_model import Block as AnalysisBlock +from codeclone.analysis.fingerprint import bucket_loc, sha1 +from codeclone.analysis.fingerprint import bucket_loc as analysis_bucket_loc +from codeclone.analysis.fingerprint import sha1 as analysis_sha1 +from codeclone.analysis.normalizer import NormalizationConfig +from codeclone.analysis.normalizer import ( + NormalizationConfig as AnalysisNormalizationConfig, +) +from codeclone.baseline.clone_baseline import Baseline +from codeclone.baseline.metrics_baseline import MetricsBaseline +from codeclone.contracts.errors import BaselineValidationError +from codeclone.contracts.schemas import AnalysisProfile, ReportMeta +from codeclone.findings.clones.grouping import build_groups as canonical_build_groups +from codeclone.findings.structural.detectors import ( + scan_function_structure as canonical_scan_function_structure, +) +from codeclone.report.html import build_html_report +from codeclone.surfaces.mcp.server import build_mcp_server +from codeclone.surfaces.mcp.service import CodeCloneMCPService + + +def test_analysis_canonical_imports_are_stable() -> None: + assert CFGBuilder is AnalysisCFGBuilder + assert AnalysisCFG.__module__ == "codeclone.analysis.cfg_model" + assert AnalysisBlock.__module__ == "codeclone.analysis.cfg_model" + assert NormalizationConfig is AnalysisNormalizationConfig + assert sha1 is analysis_sha1 + assert bucket_loc is analysis_bucket_loc + + +def test_baseline_canonical_imports_match_compat_packages() -> None: + assert Baseline is baseline_pkg.Baseline + assert MetricsBaseline.__module__ == "codeclone.baseline.metrics_baseline" + + +def test_old_analysis_and_findings_paths_are_gone() -> None: + assert importlib.util.find_spec("codeclone.cli") is None + assert importlib.util.find_spec("codeclone.cfg") is None + assert importlib.util.find_spec("codeclone.errors") is None + assert importlib.util.find_spec("codeclone.extractor") is None + assert importlib.util.find_spec("codeclone.metrics_baseline") is None + assert importlib.util.find_spec("codeclone.normalize") is None + assert importlib.util.find_spec("codeclone.fingerprint") is None + assert importlib.util.find_spec("codeclone.grouping") is None + assert importlib.util.find_spec("codeclone.pipeline") is None + assert importlib.util.find_spec("codeclone.structural_findings") is None + assert callable(canonical_build_groups) + assert callable(canonical_scan_function_structure) + + +def test_extractor_canonical_helpers_live_in_analysis_modules() -> None: + assert ( + analysis_module_walk._collect_module_walk_data.__module__ + == "codeclone.analysis._module_walk" + ) + assert ( + analysis_module_walk._resolve_import_target.__module__ + == "codeclone.analysis._module_walk" + ) + assert ( + analysis_parser._declaration_token_index.__module__ + == "codeclone.analysis.parser" + ) + assert analysis_units._eligible_unit_shape.__module__ == "codeclone.analysis.units" + + +def test_html_report_is_canonical_report_subpackage() -> None: + assert importlib.util.find_spec("codeclone.html_report") is None + assert importlib.util.find_spec("codeclone._html_report") is None + assert callable(build_html_report) + + +def test_mcp_is_canonical_surfaces_subpackage() -> None: + assert importlib.util.find_spec("codeclone.mcp_service") is None + assert importlib.util.find_spec("codeclone.mcp_server") is None + assert callable(build_mcp_server) + assert CodeCloneMCPService.__module__ == "codeclone.surfaces.mcp.service" + + +def test_contracts_are_canonical_contracts_package() -> None: + assert BaselineValidationError.__module__ == "codeclone.contracts.errors" + assert AnalysisProfile.__module__ == "codeclone.contracts.schemas" + assert ReportMeta.__module__ == "codeclone.contracts.schemas" diff --git a/uv.lock b/uv.lock index 1e7ac3a..2085084 100644 --- a/uv.lock +++ b/uv.lock @@ -278,7 +278,7 @@ wheels = [ [[package]] name = "codeclone" -version = "2.0.0b5" +version = "2.0.0b6" source = { editable = "." } dependencies = [ { name = "orjson" }, @@ -537,11 +537,11 @@ wheels = [ [[package]] name = "filelock" -version = "3.28.0" +version = "3.29.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/17/6e8890271880903e3538660a21d63a6c1fea969ac71d0d6b608b78727fa9/filelock-3.28.0.tar.gz", hash = "sha256:4ed1010aae813c4ee8d9c660e4792475ee60c4a0ba76073ceaf862bd317e3ca6", size = 56474, upload-time = "2026-04-14T22:54:33.625Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/fe/997687a931ab51049acce6fa1f23e8f01216374ea81374ddee763c493db5/filelock-3.29.0.tar.gz", hash = "sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90", size = 57571, upload-time = "2026-04-19T15:39:10.068Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/21/2f728888c45033d34a417bfcd248ea2564c9e08ab1bfd301377cf05d5586/filelock-3.28.0-py3-none-any.whl", hash = "sha256:de9af6712788e7171df1b28b15eba2446c69721433fa427a9bee07b17820a9db", size = 39189, upload-time = "2026-04-14T22:54:32.037Z" }, + { url = "https://files.pythonhosted.org/packages/81/47/dd9a212ef6e343a6857485ffe25bba537304f1913bdbed446a23f7f592e1/filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258", size = 39812, upload-time = "2026-04-19T15:39:08.752Z" }, ] [[package]] @@ -604,11 +604,11 @@ wheels = [ [[package]] name = "identify" -version = "2.6.18" +version = "2.6.19" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/46/c4/7fb4db12296cdb11893d61c92048fe617ee853f8523b9b296ac03b43757e/identify-2.6.18.tar.gz", hash = "sha256:873ac56a5e3fd63e7438a7ecbc4d91aca692eb3fefa4534db2b7913f3fc352fd", size = 99580, upload-time = "2026-03-15T18:39:50.319Z" } +sdist = { url = "https://files.pythonhosted.org/packages/52/63/51723b5f116cc04b061cb6f5a561790abf249d25931d515cd375e063e0f4/identify-2.6.19.tar.gz", hash = "sha256:6be5020c38fcb07da56c53733538a3081ea5aa70d36a156f83044bfbf9173842", size = 99567, upload-time = "2026-04-17T18:39:50.265Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/46/33/92ef41c6fad0233e41d3d84ba8e8ad18d1780f1e5d99b3c683e6d7f98b63/identify-2.6.18-py2.py3-none-any.whl", hash = "sha256:8db9d3c8ea9079db92cafb0ebf97abdc09d52e97f4dcf773a2e694048b7cd737", size = 99394, upload-time = "2026-03-15T18:39:48.915Z" }, + { url = "https://files.pythonhosted.org/packages/94/84/d9273cd09688070a6523c4aee4663a8538721b2b755c4962aafae0011e72/identify-2.6.19-py2.py3-none-any.whl", hash = "sha256:20e6a87f786f768c092a721ad107fc9df0eb89347be9396cadf3f4abbd1fb78a", size = 99397, upload-time = "2026-04-17T18:39:49.221Z" }, ] [[package]] @@ -1125,7 +1125,7 @@ wheels = [ [[package]] name = "pydantic" -version = "2.13.1" +version = "2.13.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-types" }, @@ -1133,125 +1133,125 @@ dependencies = [ { name = "typing-extensions" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f3/6b/1353beb3d1cd5cf61cdec5b6f87a9872399de3bc5cae0b7ce07ff4de2ab0/pydantic-2.13.1.tar.gz", hash = "sha256:a0f829b279ddd1e39291133fe2539d2aa46cc6b150c1706a270ff0879e3774d2", size = 843746, upload-time = "2026-04-15T14:57:19.398Z" } +sdist = { url = "https://files.pythonhosted.org/packages/09/e5/06d23afac9973109d1e3c8ad38e1547a12e860610e327c05ee686827dc37/pydantic-2.13.2.tar.gz", hash = "sha256:b418196607e61081c3226dcd4f0672f2a194828abb9109e9cfb84026564df2d1", size = 843836, upload-time = "2026-04-17T09:31:59.636Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/5a/2225f4c176dbfed0d809e848b50ef08f70e61daa667b7fa14b0d311ae44d/pydantic-2.13.1-py3-none-any.whl", hash = "sha256:9557ecc2806faaf6037f85b1fbd963d01e30511c48085f0d573650fdeaad378a", size = 471917, upload-time = "2026-04-15T14:57:17.277Z" }, + { url = "https://files.pythonhosted.org/packages/77/ca/b45c378e6e8d0b90577288b533e04e95b7afd61bb1d51b6c263176435489/pydantic-2.13.2-py3-none-any.whl", hash = "sha256:a525087f4c03d7e7456a3de89b64cd693d2229933bb1068b9af6befd5563694e", size = 471947, upload-time = "2026-04-17T09:31:57.541Z" }, ] [[package]] name = "pydantic-core" -version = "2.46.1" +version = "2.46.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a1/93/f97a86a7eb28faa1d038af2fd5d6166418b4433659108a4c311b57128b2d/pydantic_core-2.46.1.tar.gz", hash = "sha256:d408153772d9f298098fb5d620f045bdf0f017af0d5cb6e309ef8c205540caa4", size = 471230, upload-time = "2026-04-15T14:49:34.52Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/a0/07f275411355b567b994e565bc5ea9dbf522978060c18e3b7edf646c0fc2/pydantic_core-2.46.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:84eb5414871fd0293c38d2075802f95030ff11a92cf2189942bf76fd181af77b", size = 2123782, upload-time = "2026-04-15T14:52:57.172Z" }, - { url = "https://files.pythonhosted.org/packages/ab/71/d027c7de46df5b9287ed6f0ef02346c84d61348326253a4f13695d54d66f/pydantic_core-2.46.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5c75fb25db086bf504c55730442e471c12bc9bfae817dd359b1a36bc93049d34", size = 1948561, upload-time = "2026-04-15T14:53:12.07Z" }, - { url = "https://files.pythonhosted.org/packages/77/74/cba894bea0d51a3b2dcada9eb3af9c4cfaa271bf21123372dc82ccef029f/pydantic_core-2.46.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29dc09f0221425453fd9f73fd70bba15817d25b95858282702d7305a08d37306", size = 1974387, upload-time = "2026-04-15T14:50:14.048Z" }, - { url = "https://files.pythonhosted.org/packages/3b/ad/cc122887d6f20ac5d997928b0bf3016ac9c7bae07dce089333aa0c2e868b/pydantic_core-2.46.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:139fd6722abc5e6513aa0a27b06ebeb997838c5b179cf5e83862ace45f281c56", size = 2054868, upload-time = "2026-04-15T14:49:51.912Z" }, - { url = "https://files.pythonhosted.org/packages/9f/09/22049b22d65a67253cbdced88dbce0e97162f35cc433917df37df794ede8/pydantic_core-2.46.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba723fd8ef6011af71f92ed54adb604e7699d172f4273e4b46f1cfb8ee8d72fd", size = 2228717, upload-time = "2026-04-15T14:49:27.384Z" }, - { url = "https://files.pythonhosted.org/packages/e6/98/b35a8a187cf977462668b5064c606e290c88c2561e053883d86193ab9c51/pydantic_core-2.46.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:828410e082555e55da9bbb5e6c17617386fe1415c4d42765a90d372ed9cce813", size = 2298261, upload-time = "2026-04-15T14:52:20.463Z" }, - { url = "https://files.pythonhosted.org/packages/98/ae/46f8d693caefc09d8e2d3f19a6b4f2252cf6542f0b555759f2b5ec2b4ca5/pydantic_core-2.46.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb5cd53264c9906c163a71b489e9ac71b0ae13a2dd0241e6129f4df38ba1c814", size = 2094496, upload-time = "2026-04-15T14:49:59.711Z" }, - { url = "https://files.pythonhosted.org/packages/ee/40/7e4013639d316d2cb67dae288c768d49cc4a7a4b16ef869e486880db1a1f/pydantic_core-2.46.1-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:4530a6594883d9d4a9c7ef68464ef6b4a88d839e3531c089a3942c78bffe0a66", size = 2144795, upload-time = "2026-04-15T14:52:44.731Z" }, - { url = "https://files.pythonhosted.org/packages/0d/87/c00f6450059804faf30f568009c8c98e72e6802c1ccd8b562da57953ad81/pydantic_core-2.46.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ed1c71f60abbf9c9a440dc8fc6b1180c45dcab3a5e311250de99744a0166bc95", size = 2173108, upload-time = "2026-04-15T14:51:37.806Z" }, - { url = "https://files.pythonhosted.org/packages/46/15/7a8fb06c109a07dbc1f5f272b2da1290c8a25f5900a579086e433049fc1a/pydantic_core-2.46.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:254253491f1b8e3ba18c15fe924bb9b175f1a48413b74e8f0c67b8f51b6f726b", size = 2185687, upload-time = "2026-04-15T14:51:33.125Z" }, - { url = "https://files.pythonhosted.org/packages/d9/38/c52ead78febf23d32db898c7022173c674226cf3c8ee1645220ab9516931/pydantic_core-2.46.1-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:dfcf6485ac38698a5b45f37467b8eb2f4f8e3edd5790e2579c5d52fdfffb2e3d", size = 2326273, upload-time = "2026-04-15T14:51:10.614Z" }, - { url = "https://files.pythonhosted.org/packages/1e/af/cb5ea2336e9938b3a0536ce4bfed4a342285caa8a6b8ff449a7bc2f179ec/pydantic_core-2.46.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:592b39150ab5b5a2cb2eb885097ee4c2e4d54e3b902f6ae32528f7e6e42c00fc", size = 2368428, upload-time = "2026-04-15T14:49:25.804Z" }, - { url = "https://files.pythonhosted.org/packages/a2/99/adcfbcbd96556120e7d795aab4fd77f5104a49051929c3805a9d736ec48f/pydantic_core-2.46.1-cp310-cp310-win32.whl", hash = "sha256:eb37b1369ad39ec046a36dc81ffd76870766bda2073f57448bbcb1fd3e4c5ad0", size = 1993405, upload-time = "2026-04-15T14:50:51.082Z" }, - { url = "https://files.pythonhosted.org/packages/c4/ff/2767be513a250293f80748740ce73b0f0677711fc791b1afab3499734dd2/pydantic_core-2.46.1-cp310-cp310-win_amd64.whl", hash = "sha256:c330dab8254d422880177436a5892ac6d9337afff9fe383fb1f8c6caedb685e1", size = 2068177, upload-time = "2026-04-15T14:52:29.899Z" }, - { url = "https://files.pythonhosted.org/packages/37/96/d83d23fc3c822326d808b8c0457d4f7afb1552e741a7c2378a974c522c63/pydantic_core-2.46.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f0f84431981c6ae217ebb96c3eca8212f6f5edf116f62f62cc6c7d72971f826c", size = 2121938, upload-time = "2026-04-15T14:49:21.568Z" }, - { url = "https://files.pythonhosted.org/packages/11/44/94b1251825560f5d90e25ebcd457c4772e1f3e1a378f438c040fe2148f3e/pydantic_core-2.46.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a05f60b36549f59ab585924410187276ec17a94bae939273a213cea252c8471e", size = 1946541, upload-time = "2026-04-15T14:49:57.925Z" }, - { url = "https://files.pythonhosted.org/packages/d6/8f/79aff4c8bd6fb49001ffe4747c775c0f066add9da13dec180eb0023ada34/pydantic_core-2.46.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2c93fd1693afdfae7b2897f7530ed3f180d9fc92ee105df3ebdff24d5061cc8", size = 1973067, upload-time = "2026-04-15T14:51:14.765Z" }, - { url = "https://files.pythonhosted.org/packages/56/01/826ab3afb1d43cbfdc2aa592bff0f1f6f4b90f5a801478ba07bde74e706f/pydantic_core-2.46.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0c19983759394c702a776f42f33df8d7bb7883aefaa44a69ba86356a9fd67367", size = 2053146, upload-time = "2026-04-15T14:51:48.847Z" }, - { url = "https://files.pythonhosted.org/packages/6c/32/be20ec48ccbd85cac3f8d96ca0a0f87d5c14fbf1eb438da0ac733f2546f2/pydantic_core-2.46.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6e8debf586d7d800a718194417497db5126d4f4302885a2dff721e9df3f4851c", size = 2227393, upload-time = "2026-04-15T14:51:53.218Z" }, - { url = "https://files.pythonhosted.org/packages/b5/8e/1fae21c887f363ed1a5cf9f267027700c796b7435313c21723cd3e8aeeb3/pydantic_core-2.46.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:54160da754d63da7780b76e5743d44f026b9daffc6b8c9696a756368c0a298c9", size = 2296193, upload-time = "2026-04-15T14:50:31.065Z" }, - { url = "https://files.pythonhosted.org/packages/0a/29/e5637b539458ffb60ba9c204fc16c52ea36828427fa667e4f9c7d83cfea9/pydantic_core-2.46.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74cee962c8b4df9a9b0bb63582e51986127ee2316f0c49143b2996f4b201bd9c", size = 2092156, upload-time = "2026-04-15T14:52:37.227Z" }, - { url = "https://files.pythonhosted.org/packages/bc/fa/3a453934af019c72652fb75489c504ae689de632fa2e037fec3195cd6948/pydantic_core-2.46.1-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:0ba3462872a678ebe21b15bd78eff40298b43ea50c26f230ec535c00cf93ec7e", size = 2142845, upload-time = "2026-04-15T14:51:04.847Z" }, - { url = "https://files.pythonhosted.org/packages/36/c2/71b56fa10a80b98036f4bf0fbb912833f8e9c61b15e66c236fadaf54c27c/pydantic_core-2.46.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b718873a966d91514c5252775f568985401b54a220919ab22b19a6c4edd8c053", size = 2170756, upload-time = "2026-04-15T14:50:17.16Z" }, - { url = "https://files.pythonhosted.org/packages/e1/da/a4c761dc8d982e2c53f991c0c36d37f6fe308e149bf0a101c25b0750a893/pydantic_core-2.46.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cb1310a9fd722da8cceec1fb59875e1c86bee37f0d8a9c667220f00ee722cc8f", size = 2183579, upload-time = "2026-04-15T14:51:20.888Z" }, - { url = "https://files.pythonhosted.org/packages/e5/d4/b0a6c00622e4afd9a807b8bb05ba8f1a0b69ca068ac138d9d36700fe767b/pydantic_core-2.46.1-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:98e3ede76eb4b9db8e7b5efea07a3f3315135485794a5df91e3adf56c4d573b6", size = 2324516, upload-time = "2026-04-15T14:52:32.521Z" }, - { url = "https://files.pythonhosted.org/packages/45/f1/a4bace0c98b0774b02de99233882c48d94b399ba4394dd5e209665d05062/pydantic_core-2.46.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:780b8f24ff286e21fd010247011a68ea902c34b1eee7d775b598bc28f5f28ab6", size = 2367084, upload-time = "2026-04-15T14:50:37.832Z" }, - { url = "https://files.pythonhosted.org/packages/3a/54/ae827a3976b136d1c9a9a56c2299a8053605a69facaa0c7354ba167305eb/pydantic_core-2.46.1-cp311-cp311-win32.whl", hash = "sha256:1d452f4cad0f39a94414ca68cda7cc55ff4c3801b5ab0bc99818284a3d39f889", size = 1992061, upload-time = "2026-04-15T14:51:44.704Z" }, - { url = "https://files.pythonhosted.org/packages/55/ae/d85de69e0fdfafc0e87d88bd5d0c157a5443efaaef24eed152a8a8f8dfb6/pydantic_core-2.46.1-cp311-cp311-win_amd64.whl", hash = "sha256:f463fd6a67138d70200d2627676e9efbb0cee26d98a5d3042a35aa20f95ec129", size = 2065497, upload-time = "2026-04-15T14:51:17.077Z" }, - { url = "https://files.pythonhosted.org/packages/46/a7/9eb3b1038db630e1550924e81d1211b0dd70ac3740901fd95f30f5497990/pydantic_core-2.46.1-cp311-cp311-win_arm64.whl", hash = "sha256:155aec0a117140e86775eec113b574c1c299358bfd99467b2ea7b2ea26db2614", size = 2045914, upload-time = "2026-04-15T14:51:24.782Z" }, - { url = "https://files.pythonhosted.org/packages/ce/fb/caaa8ee23861c170f07dbd58fc2be3a2c02a32637693cbb23eef02e84808/pydantic_core-2.46.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ae8c8c5eb4c796944f3166f2f0dab6c761c2c2cc5bd20e5f692128be8600b9a4", size = 2119472, upload-time = "2026-04-15T14:49:45.946Z" }, - { url = "https://files.pythonhosted.org/packages/fa/61/bcffaa52894489ff89e5e1cdde67429914bf083c0db7296bef153020f786/pydantic_core-2.46.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:daba6f5f5b986aa0682623a1a4f8d1ecb0ec00ce09cfa9ca71a3b742bc383e3a", size = 1951230, upload-time = "2026-04-15T14:52:27.646Z" }, - { url = "https://files.pythonhosted.org/packages/f8/95/80d2f43a2a1a1e3220fd329d614aa5a39e0a75d24353a3aaf226e605f1c2/pydantic_core-2.46.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0265f3a2460539ecc97817a80c7a23c458dd84191229b655522a2674f701f14e", size = 1976394, upload-time = "2026-04-15T14:50:32.742Z" }, - { url = "https://files.pythonhosted.org/packages/8d/31/2c5b1a207926b5fc1961a2d11da940129bc3841c36cc4df03014195b2966/pydantic_core-2.46.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb16c0156c4b4e94aa3719138cc43c53d30ff21126b6a3af63786dcc0757b56e", size = 2068455, upload-time = "2026-04-15T14:50:01.286Z" }, - { url = "https://files.pythonhosted.org/packages/7d/36/c6aa07274359a51ac62895895325ce90107e811c6cea39d2617a99ef10d7/pydantic_core-2.46.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1b42d80fad8e4b283e1e4138f1142f0d038c46d137aad2f9824ad9086080dd41", size = 2239049, upload-time = "2026-04-15T14:53:02.216Z" }, - { url = "https://files.pythonhosted.org/packages/0a/3f/77cdd0db8bddc714842dfd93f737c863751cf02001c993341504f6b0cd53/pydantic_core-2.46.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9cced85896d5b795293bc36b7e2fb0347a36c828551b50cbba510510d928548c", size = 2318681, upload-time = "2026-04-15T14:50:04.539Z" }, - { url = "https://files.pythonhosted.org/packages/a1/a3/09d929a40e6727274b0b500ad06e1b3f35d4f4665ae1c8ba65acbb17e9b5/pydantic_core-2.46.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a641cb1e74b44c418adaf9f5f450670dbec53511f030d8cde8d8accb66edc363", size = 2096527, upload-time = "2026-04-15T14:53:14.766Z" }, - { url = "https://files.pythonhosted.org/packages/89/ae/544c3a82456ebc254a9fcbe2715bab76c70acf9d291aaea24391147943e4/pydantic_core-2.46.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:191e7a122ab14eb12415fe3f92610fc06c7f1d2b4b9101d24d490d447ac92506", size = 2170407, upload-time = "2026-04-15T14:51:27.138Z" }, - { url = "https://files.pythonhosted.org/packages/9d/ce/0dfd881c7af4c522f47b325707bd9a2cdcf4f40e4f2fd30df0e9a3e8d393/pydantic_core-2.46.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4fe4ff660f7938b5d92f21529ce331b011aa35e481ab64b7cd03f52384e544bb", size = 2188578, upload-time = "2026-04-15T14:50:39.655Z" }, - { url = "https://files.pythonhosted.org/packages/a1/e9/980ea2a6d5114dd1a62ecc5f56feb3d34555f33bd11043f042e5f7f0724a/pydantic_core-2.46.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:18fcea085b3adc3868d8d19606da52d7a52d8bccd8e28652b0778dbe5e6a6660", size = 2188959, upload-time = "2026-04-15T14:52:42.243Z" }, - { url = "https://files.pythonhosted.org/packages/e7/f1/595e0f50f4bfc56cde2fe558f2b0978f29f2865da894c6226231e17464a5/pydantic_core-2.46.1-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:e8e589e7c9466e022d79e13c5764c2239b2e5a7993ba727822b021234f89b56b", size = 2339973, upload-time = "2026-04-15T14:52:10.642Z" }, - { url = "https://files.pythonhosted.org/packages/49/44/be9f979a6ab6b8c36865ccd92c3a38a760c66055e1f384665f35525134c4/pydantic_core-2.46.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f78eb3d4027963bdc9baccd177f02a98bf8714bc51fe17153d8b51218918b5bc", size = 2385228, upload-time = "2026-04-15T14:51:00.77Z" }, - { url = "https://files.pythonhosted.org/packages/5b/d4/c826cd711787d240219f01d0d3ca116cb55516b8b95277820aa9c85e1882/pydantic_core-2.46.1-cp312-cp312-win32.whl", hash = "sha256:54fe30c20cab03844dc63bdc6ddca67f74a2eb8482df69c1e5f68396856241be", size = 1978828, upload-time = "2026-04-15T14:50:29.362Z" }, - { url = "https://files.pythonhosted.org/packages/22/05/8a1fcf8181be4c7a9cfc34e5fbf2d9c3866edc9dfd3c48d5401806e0a523/pydantic_core-2.46.1-cp312-cp312-win_amd64.whl", hash = "sha256:aea4e22ed4c53f2774221435e39969a54d2e783f4aee902cdd6c8011415de893", size = 2070015, upload-time = "2026-04-15T14:49:47.301Z" }, - { url = "https://files.pythonhosted.org/packages/61/d5/fea36ad2882b99c174ef4ffbc7ea6523f6abe26060fbc1f77d6441670232/pydantic_core-2.46.1-cp312-cp312-win_arm64.whl", hash = "sha256:f76fb49c34b4d66aa6e552ce9e852ea97a3a06301a9f01ae82f23e449e3a55f8", size = 2030176, upload-time = "2026-04-15T14:50:47.307Z" }, - { url = "https://files.pythonhosted.org/packages/ff/d2/bda39bad2f426cb5078e6ad28076614d3926704196efe0d7a2a19a99025d/pydantic_core-2.46.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:cdc8a5762a9c4b9d86e204d555444e3227507c92daba06259ee66595834de47a", size = 2119092, upload-time = "2026-04-15T14:49:50.392Z" }, - { url = "https://files.pythonhosted.org/packages/ee/f3/69631e64d69cb3481494b2bddefe0ddd07771209f74e9106d066f9138c2a/pydantic_core-2.46.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ba381dfe9c85692c566ecb60fa5a77a697a2a8eebe274ec5e4d6ec15fafad799", size = 1951400, upload-time = "2026-04-15T14:51:06.588Z" }, - { url = "https://files.pythonhosted.org/packages/53/1c/21cb3db6ae997df31be8e91f213081f72ffa641cb45c89b8a1986832b1f9/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1593d8de98207466dc070118322fef68307a0cc6a5625e7b386f6fdae57f9ab6", size = 1976864, upload-time = "2026-04-15T14:50:54.804Z" }, - { url = "https://files.pythonhosted.org/packages/91/9c/05c819f734318ce5a6ca24da300d93696c105af4adb90494ee571303afd8/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8262c74a1af5b0fdf795f5537f7145785a63f9fbf9e15405f547440c30017ed8", size = 2066669, upload-time = "2026-04-15T14:51:42.346Z" }, - { url = "https://files.pythonhosted.org/packages/cb/23/fadddf1c7f2f517f58731aea9b35c914e6005250f08dac9b8e53904cdbaa/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b88949a24182e83fbbb3f7ca9b7858d0d37b735700ea91081434b7d37b3b444", size = 2238737, upload-time = "2026-04-15T14:50:45.558Z" }, - { url = "https://files.pythonhosted.org/packages/23/07/0cd4f95cb0359c8b1ec71e89c3777e7932c8dfeb9cd54740289f310aaead/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8f3708cd55537aeaf3fd0ea55df0d68d0da51dcb07cbc8508745b34acc4c6e0", size = 2316258, upload-time = "2026-04-15T14:51:08.471Z" }, - { url = "https://files.pythonhosted.org/packages/0c/40/6fc24c3766a19c222a0d60d652b78f0283339d4cd4c173fab06b7ee76571/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f79292435fff1d4f0c18d9cfaf214025cc88e4f5104bfaed53f173621da1c743", size = 2097474, upload-time = "2026-04-15T14:49:56.543Z" }, - { url = "https://files.pythonhosted.org/packages/4b/af/f39795d1ce549e35d0841382b9c616ae211caffb88863147369a8d74fba9/pydantic_core-2.46.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:a2e607aeb59cf4575bb364470288db3b9a1f0e7415d053a322e3e154c1a0802e", size = 2168383, upload-time = "2026-04-15T14:51:29.269Z" }, - { url = "https://files.pythonhosted.org/packages/e6/32/0d563f74582795779df6cc270c3fc220f49f4daf7860d74a5a6cda8491ff/pydantic_core-2.46.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec5ca190b75878a9f6ae1fc8f5eb678497934475aef3d93204c9fa01e97370b6", size = 2186182, upload-time = "2026-04-15T14:50:19.097Z" }, - { url = "https://files.pythonhosted.org/packages/5c/07/1c10d5ce312fc4cf86d1e50bdcdbb8ef248409597b099cab1b4bb3a093f7/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:1f80535259dcdd517d7b8ca588d5ca24b4f337228e583bebedf7a3adcdf5f721", size = 2187859, upload-time = "2026-04-15T14:49:22.974Z" }, - { url = "https://files.pythonhosted.org/packages/92/01/e1f62d4cb39f0913dbf5c95b9b119ef30ddba9493dff8c2b012f0cdd67dc/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:24820b3c82c43df61eca30147e42853e6c127d8b868afdc0c162df829e011eb4", size = 2338372, upload-time = "2026-04-15T14:49:53.316Z" }, - { url = "https://files.pythonhosted.org/packages/44/ed/218dfeea6127fb1781a6ceca241ec6edf00e8a8933ff331af2215975a534/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:f12794b1dd8ac9fb66619e0b3a0427189f5d5638e55a3de1385121a9b7bf9b39", size = 2384039, upload-time = "2026-04-15T14:53:04.929Z" }, - { url = "https://files.pythonhosted.org/packages/6c/1e/011e763cd059238249fbd5780e0f8d0b04b47f86c8925e22784f3e5fc977/pydantic_core-2.46.1-cp313-cp313-win32.whl", hash = "sha256:9bc09aed935cdf50f09e908923f9efbcca54e9244bd14a5a0e2a6c8d2c21b4e9", size = 1977943, upload-time = "2026-04-15T14:52:17.969Z" }, - { url = "https://files.pythonhosted.org/packages/8c/06/b559a490d3ed106e9b1777b8d5c8112dd8d31716243cd662616f66c1f8ea/pydantic_core-2.46.1-cp313-cp313-win_amd64.whl", hash = "sha256:fac2d6c8615b8b42bee14677861ba09d56ee076ba4a65cfb9c3c3d0cc89042f2", size = 2068729, upload-time = "2026-04-15T14:53:07.288Z" }, - { url = "https://files.pythonhosted.org/packages/9f/52/32a198946e2e19508532aa9da02a61419eb15bd2d96bab57f810f2713e31/pydantic_core-2.46.1-cp313-cp313-win_arm64.whl", hash = "sha256:f978329f12ace9f3cb814a5e44d98bbeced2e36f633132bafa06d2d71332e33e", size = 2029550, upload-time = "2026-04-15T14:52:22.707Z" }, - { url = "https://files.pythonhosted.org/packages/bd/2b/6793fe89ab66cb2d3d6e5768044eab80bba1d0fae8fd904d0a1574712e17/pydantic_core-2.46.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:9917cb61effac7ec0f448ef491ec7584526d2193be84ff981e85cbf18b68c42a", size = 2118110, upload-time = "2026-04-15T14:50:52.947Z" }, - { url = "https://files.pythonhosted.org/packages/d2/87/e9a905ddfcc2fd7bd862b340c02be6ab1f827922822d425513635d0ac774/pydantic_core-2.46.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0e749679ca9f8a9d0bff95fb7f6b57bb53f2207fa42ffcc1ec86de7e0029ab89", size = 1948645, upload-time = "2026-04-15T14:51:55.577Z" }, - { url = "https://files.pythonhosted.org/packages/15/23/26e67f86ed62ac9d6f7f3091ee5220bf14b5ac36fb811851d601365ef896/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2ecacee70941e233a2dad23f7796a06f86cc10cc2fbd1c97c7dd5b5a79ffa4f", size = 1977576, upload-time = "2026-04-15T14:49:37.58Z" }, - { url = "https://files.pythonhosted.org/packages/b8/78/813c13c0de323d4de54ee2e6fdd69a0271c09ac8dd65a8a000931aa487a5/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:647d0a2475b8ed471962eed92fa69145b864942f9c6daa10f95ac70676637ae7", size = 2060358, upload-time = "2026-04-15T14:51:40.087Z" }, - { url = "https://files.pythonhosted.org/packages/09/5e/4caf2a15149271fbd2b4d968899a450853c800b85152abcf54b11531417f/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac9cde61965b0697fce6e6cc372df9e1ad93734828aac36e9c1c42a22ad02897", size = 2235980, upload-time = "2026-04-15T14:50:34.535Z" }, - { url = "https://files.pythonhosted.org/packages/c2/c1/a2cdabb5da6f5cb63a3558bcafffc20f790fa14ccffbefbfb1370fadc93f/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a2eb0864085f8b641fb3f54a2fb35c58aff24b175b80bc8a945050fcde03204", size = 2316800, upload-time = "2026-04-15T14:52:46.999Z" }, - { url = "https://files.pythonhosted.org/packages/76/fd/19d711e4e9331f9d77f222bffc202bf30ea0d74f6419046376bb82f244c8/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b83ce9fede4bc4fb649281d9857f06d30198b8f70168f18b987518d713111572", size = 2101762, upload-time = "2026-04-15T14:49:24.278Z" }, - { url = "https://files.pythonhosted.org/packages/dc/64/ce95625448e1a4e219390a2923fd594f3fa368599c6b42ac71a5df7238c9/pydantic_core-2.46.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:cb33192753c60f269d2f4a1db8253c95b0df6e04f2989631a8cc1b0f4f6e2e92", size = 2167737, upload-time = "2026-04-15T14:50:41.637Z" }, - { url = "https://files.pythonhosted.org/packages/ad/31/413572d03ca3e73b408f00f54418b91a8be6401451bc791eaeff210328e5/pydantic_core-2.46.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:96611d51f953f87e1ae97637c01ee596a08b7f494ea00a5afb67ea6547b9f53b", size = 2185658, upload-time = "2026-04-15T14:51:46.799Z" }, - { url = "https://files.pythonhosted.org/packages/36/09/e4f581353bdf3f0c7de8a8b27afd14fc761da29d78146376315a6fedc487/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:9b176fa55f9107db5e6c86099aa5bfd934f1d3ba6a8b43f714ddeebaed3f42b7", size = 2184154, upload-time = "2026-04-15T14:52:49.629Z" }, - { url = "https://files.pythonhosted.org/packages/1a/a4/d0d52849933f5a4bf1ad9d8da612792f96469b37e286a269e3ee9c60bbb1/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:79a59f63a4ce4f3330e27e6f3ce281dd1099453b637350e97d7cf24c207cd120", size = 2332379, upload-time = "2026-04-15T14:49:55.009Z" }, - { url = "https://files.pythonhosted.org/packages/30/93/25bfb08fdbef419f73290e573899ce938a327628c34e8f3a4bafeea30126/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:f200fce071808a385a314b7343f5e3688d7c45746be3d64dc71ee2d3e2a13268", size = 2377964, upload-time = "2026-04-15T14:51:59.649Z" }, - { url = "https://files.pythonhosted.org/packages/15/36/b777766ff83fef1cf97473d64764cd44f38e0d8c269ed06faace9ae17666/pydantic_core-2.46.1-cp314-cp314-win32.whl", hash = "sha256:3a07eccc0559fb9acc26d55b16bf8ebecd7f237c74a9e2c5741367db4e6d8aff", size = 1976450, upload-time = "2026-04-15T14:51:57.665Z" }, - { url = "https://files.pythonhosted.org/packages/7b/4b/4cd19d2437acfc18ca166db5a2067040334991eb862c4ecf2db098c91fbf/pydantic_core-2.46.1-cp314-cp314-win_amd64.whl", hash = "sha256:1706d270309ac7d071ffe393988c471363705feb3d009186e55d17786ada9622", size = 2067750, upload-time = "2026-04-15T14:49:38.941Z" }, - { url = "https://files.pythonhosted.org/packages/7f/a0/490751c0ef8f5b27aae81731859aed1508e72c1a9b5774c6034269db773b/pydantic_core-2.46.1-cp314-cp314-win_arm64.whl", hash = "sha256:22d4e7457ade8af06528012f382bc994a97cc2ce6e119305a70b3deff1e409d6", size = 2021109, upload-time = "2026-04-15T14:50:27.728Z" }, - { url = "https://files.pythonhosted.org/packages/36/3a/2a018968245fffd25d5f1972714121ad309ff2de19d80019ad93494844f9/pydantic_core-2.46.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:607ff9db0b7e2012e7eef78465e69f9a0d7d1c3e7c6a84cf0c4011db0fcc3feb", size = 2111548, upload-time = "2026-04-15T14:52:08.273Z" }, - { url = "https://files.pythonhosted.org/packages/77/5b/4103b6192213217e874e764e5467d2ff10d8873c1147d01fa432ac281880/pydantic_core-2.46.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8cda3eacaea13bd02a1bea7e457cc9fc30b91c5a91245cef9b215140f80dd78c", size = 1926745, upload-time = "2026-04-15T14:50:03.045Z" }, - { url = "https://files.pythonhosted.org/packages/c3/70/602a667cf4be4bec6c3334512b12ae4ea79ce9bfe41dc51be1fd34434453/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9493279cdc7997fe19e5ed9b41f30cbc3806bd4722adb402fedb6f6d41bd72a", size = 1965922, upload-time = "2026-04-15T14:51:12.555Z" }, - { url = "https://files.pythonhosted.org/packages/a9/24/06a89ce5323e755b7d2812189f9706b87aaebe49b34d247b380502f7992c/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3644e5e10059999202355b6c6616e624909e23773717d8f76deb8a6e2a72328c", size = 2043221, upload-time = "2026-04-15T14:51:18.995Z" }, - { url = "https://files.pythonhosted.org/packages/2c/6e/b1d9ad907d9d76964903903349fd2e33c87db4b993cc44713edcad0fc488/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ad6c9de57683e26c92730991960c0c3571b8053263b042de2d3e105930b2767", size = 2243655, upload-time = "2026-04-15T14:50:10.718Z" }, - { url = "https://files.pythonhosted.org/packages/ef/73/787abfaad51174641abb04c8aa125322279b40ad7ce23c495f5a69f76554/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:557ebaa27c7617e7088002318c679a8ce685fa048523417cd1ca52b7f516d955", size = 2295976, upload-time = "2026-04-15T14:53:09.694Z" }, - { url = "https://files.pythonhosted.org/packages/56/0b/b7c5a631b6d5153d4a1ea4923b139aea256dc3bd99c8e6c7b312c7733146/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cd37e39b22b796ba0298fe81e9421dd7b65f97acfbb0fb19b33ffdda7b9a7b4", size = 2103439, upload-time = "2026-04-15T14:50:08.32Z" }, - { url = "https://files.pythonhosted.org/packages/2a/3f/952ee470df69e5674cdec1cbde22331adf643b5cc2ff79f4292d80146ee4/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:6689443b59714992e67d62505cdd2f952d6cf1c14cc9fd9aeec6719befc6f23b", size = 2132871, upload-time = "2026-04-15T14:50:24.445Z" }, - { url = "https://files.pythonhosted.org/packages/e3/8b/1dea3b1e683c60c77a60f710215f90f486755962aa8939dbcb7c0f975ac3/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f32c41ca1e3456b5dd691827b7c1433c12d5f0058cc186afbb3615bc07d97b8", size = 2168658, upload-time = "2026-04-15T14:52:24.897Z" }, - { url = "https://files.pythonhosted.org/packages/67/97/32ae283810910d274d5ba9f48f856f5f2f612410b78b249f302d297816f5/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:88cd1355578852db83954dc36e4f58f299646916da976147c20cf6892ba5dc43", size = 2171184, upload-time = "2026-04-15T14:52:34.854Z" }, - { url = "https://files.pythonhosted.org/packages/a2/57/c9a855527fe56c2072070640221f53095b0b19eaf651f3c77643c9cabbe3/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:a170fefdb068279a473cc9d34848b85e61d68bfcc2668415b172c5dfc6f213bf", size = 2316573, upload-time = "2026-04-15T14:52:12.871Z" }, - { url = "https://files.pythonhosted.org/packages/37/b3/14c39ffc7399819c5448007c7bcb4e6da5669850cfb7dcbb727594290b48/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:556a63ff1006934dba4eed7ea31b58274c227e29298ec398e4275eda4b905e95", size = 2378340, upload-time = "2026-04-15T14:51:02.619Z" }, - { url = "https://files.pythonhosted.org/packages/01/55/a37461fbb29c053ea4e62cfc5c2d56425cb5efbef8316e63f6d84ae45718/pydantic_core-2.46.1-cp314-cp314t-win32.whl", hash = "sha256:3b146d8336a995f7d7da6d36e4a779b7e7dff2719ac00a1eb8bd3ded00bec87b", size = 1960843, upload-time = "2026-04-15T14:52:06.103Z" }, - { url = "https://files.pythonhosted.org/packages/22/d7/97e1221197d17a27f768363f87ec061519eeeed15bbd315d2e9d1429ff03/pydantic_core-2.46.1-cp314-cp314t-win_amd64.whl", hash = "sha256:f1bc856c958e6fe9ec071e210afe6feb695f2e2e81fd8d2b102f558d364c4c17", size = 2048696, upload-time = "2026-04-15T14:52:52.154Z" }, - { url = "https://files.pythonhosted.org/packages/19/d5/4eac95255c7d35094b46a32ec1e4d80eac94729c694726ee1d69948bd5f0/pydantic_core-2.46.1-cp314-cp314t-win_arm64.whl", hash = "sha256:21a5bfd8a1aa4de60494cdf66b0c912b1495f26a8899896040021fbd6038d989", size = 2022343, upload-time = "2026-04-15T14:49:49.036Z" }, - { url = "https://files.pythonhosted.org/packages/44/4b/1952d38a091aa7572c13460db4439d5610a524a1a533fb131e17d8eff9c2/pydantic_core-2.46.1-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:c56887c0ffa05318128a80303c95066a9d819e5e66d75ff24311d9e0a58d6930", size = 2123089, upload-time = "2026-04-15T14:50:20.658Z" }, - { url = "https://files.pythonhosted.org/packages/90/06/f3623aa98e2d7cb4ed0ae0b164c5d8a1b86e5aca01744eba980eefcd5da4/pydantic_core-2.46.1-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:614b24b875c1072631065fa85e195b40700586afecb0b27767602007920dacf8", size = 1945481, upload-time = "2026-04-15T14:50:56.945Z" }, - { url = "https://files.pythonhosted.org/packages/69/f9/a9224203b8426893e22db2cf0da27cd930ad7d76e0a611ebd707e5e6c916/pydantic_core-2.46.1-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6382f6967c48519b6194e9e1e579e5898598b682556260eeaf05910400d827e", size = 1986294, upload-time = "2026-04-15T14:49:31.839Z" }, - { url = "https://files.pythonhosted.org/packages/96/29/954d2174db68b9f14292cef3ae8a05a25255735909adfcf45ca768023713/pydantic_core-2.46.1-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93cb8aa6c93fb833bb53f3a2841fbea6b4dc077453cd5b30c0634af3dee69369", size = 2144185, upload-time = "2026-04-15T14:52:39.449Z" }, - { url = "https://files.pythonhosted.org/packages/f4/97/95de673a1356a88b2efdaa120eb6af357a81555c35f6809a7a1423ff7aef/pydantic_core-2.46.1-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:5f9107a24a4bc00293434dfa95cf8968751ad0dd703b26ea83a75a56f7326041", size = 2107564, upload-time = "2026-04-15T14:50:49.14Z" }, - { url = "https://files.pythonhosted.org/packages/00/fc/a7c16d85211ea9accddc693b7d049f20b0c06440d9264d1e1c074394ee6c/pydantic_core-2.46.1-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:2b1801ba99876984d0a03362782819238141c4d0f3f67f69093663691332fc35", size = 1939925, upload-time = "2026-04-15T14:50:36.188Z" }, - { url = "https://files.pythonhosted.org/packages/2e/23/87841169d77820ddabeb81d82002c95dcb82163846666d74f5bdeeaec750/pydantic_core-2.46.1-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7fd82a91a20ed6d54fa8c91e7a98255b1ff45bf09b051bfe7fe04eb411e232e", size = 1995313, upload-time = "2026-04-15T14:50:22.538Z" }, - { url = "https://files.pythonhosted.org/packages/ea/96/b46609359a354fa9cd336fc5d93334f1c358b756cc81e4b397347a88fa6f/pydantic_core-2.46.1-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f135bf07c92c93def97008bc4496d16934da9efefd7204e5f22a2c92523cb1f", size = 2151197, upload-time = "2026-04-15T14:51:22.925Z" }, - { url = "https://files.pythonhosted.org/packages/f5/e7/3d1d2999ad8e78b124c752e4fc583ecd98f3bea7cc42045add2fb6e31b62/pydantic_core-2.46.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b44b44537efbff2df9567cd6ba51b554d6c009260a021ab25629c81e066f1683", size = 2121103, upload-time = "2026-04-15T14:52:59.537Z" }, - { url = "https://files.pythonhosted.org/packages/de/08/50a56632994007c7a58c86f782accccbe2f3bb7ca80f462533e26424cd18/pydantic_core-2.46.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:8f9ca3af687cc6a5c89aeaa00323222fcbceb4c3cdc78efdac86f46028160c04", size = 1952464, upload-time = "2026-04-15T14:52:04.001Z" }, - { url = "https://files.pythonhosted.org/packages/75/0b/3cf631e33a55b1788add3e42ac921744bd1f39279082a027b4ef6f48bd32/pydantic_core-2.46.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2678a4cbc205f00a44542dca19d15c11ccddd7440fd9df0e322e2cae55bb67a", size = 2138504, upload-time = "2026-04-15T14:52:01.812Z" }, - { url = "https://files.pythonhosted.org/packages/fa/69/f96f3dfc939450b9aeb80d3fe1943e7bc0614b14e9447d84f48d65153e0c/pydantic_core-2.46.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e5a98cbb03a8a7983b0fb954e0af5e7016587f612e6332c6a4453f413f1d1851", size = 2165467, upload-time = "2026-04-15T14:52:15.455Z" }, - { url = "https://files.pythonhosted.org/packages/a8/22/bb61cccddc2ce85b179cd81a580a1746e880870060fbf4bf6024dab7e8aa/pydantic_core-2.46.1-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:b2f098b08860bd149e090ad232f27fffb5ecf1bfd9377015445c8e17355ec2d1", size = 2183882, upload-time = "2026-04-15T14:51:50.868Z" }, - { url = "https://files.pythonhosted.org/packages/0e/01/b9039da255c5fd3a7fd85344fda8861c847ad6d8fdd115580fa4505b2022/pydantic_core-2.46.1-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:d2623606145b55a96efdd181b015c0356804116b2f14d3c2af4832fe4f45ed5f", size = 2323011, upload-time = "2026-04-15T14:49:40.32Z" }, - { url = "https://files.pythonhosted.org/packages/24/b1/f426b20cb72d0235718ccc4de3bc6d6c0d0c2a91a3fd2f32ae11b624bcc9/pydantic_core-2.46.1-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:420f515c42aaec607ff720867b300235bd393abd709b26b190ceacb57a9bfc17", size = 2365696, upload-time = "2026-04-15T14:49:41.936Z" }, - { url = "https://files.pythonhosted.org/packages/ef/d2/d2b0025246481aa2ce6db8ba196e29b92063343ac76e675b3a1fa478ed4d/pydantic_core-2.46.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:375cfdd2a1049910c82ba2ff24f948e93599a529e0fdb066d747975ca31fc663", size = 2190970, upload-time = "2026-04-15T14:49:33.111Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/43/bb/4742f05b739b2478459bb16fa8470549518c802e06ddcf3f106c5081315e/pydantic_core-2.46.2.tar.gz", hash = "sha256:37bb079f9ee3f1a519392b73fda2a96379b31f2013c6b467fe693e7f2987f596", size = 471269, upload-time = "2026-04-17T09:10:07.017Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/f2/98f37e836c5ba0335432768e0d8645e6f50a3c838b48a74d9256256784fc/pydantic_core-2.46.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:160ef93541f4f84e3e5068e6c1f64d8fd6f57586e5853d609b467d3333f8146a", size = 2108178, upload-time = "2026-04-17T09:10:24.689Z" }, + { url = "https://files.pythonhosted.org/packages/55/69/975458de8e5453322cfc57d6c7029c3e66d9e7a4389c53ddd5ad02d5e5da/pydantic_core-2.46.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a9124b63f4f40a12a0666df57450b4c24b98407ff74349221b869ec085a5d8e", size = 1949232, upload-time = "2026-04-17T09:11:39.536Z" }, + { url = "https://files.pythonhosted.org/packages/94/8d/938175e6e82d051ac4644765680db06571d7e106a42f760da09bd90f6525/pydantic_core-2.46.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de12004a7da7f1eb67ece37439a5a23a915636085dd042176fda362e006e6940", size = 1974741, upload-time = "2026-04-17T09:13:01.922Z" }, + { url = "https://files.pythonhosted.org/packages/f2/38/7329f8ac5c732bddf15f939c2add40b95170e0ecca5ef124c12def3f78ba/pydantic_core-2.46.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a070c7769fec277409ad0b3d55b2f0a3703a6f00cf5031fe93090f155bf56382", size = 2041905, upload-time = "2026-04-17T09:11:11.94Z" }, + { url = "https://files.pythonhosted.org/packages/99/2c/47cfd069937ee5cbc0d9e18fa9795c8f80c49a6b4fc777d4cd870f2ade7b/pydantic_core-2.46.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:41d701bb34f81f0b11c724cc544b9a10b26a28f4d0d1197f2037c91225708706", size = 2222703, upload-time = "2026-04-17T09:10:31.196Z" }, + { url = "https://files.pythonhosted.org/packages/83/b0/7ed83ca8cd92c99bcab90cf42ed953723fbc19d8a20c8c12bb68c51febc1/pydantic_core-2.46.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19631e7350b7a574fb6b6db222f4b17e8bd31803074b3307d07df62379d2b2e4", size = 2276317, upload-time = "2026-04-17T09:09:53.263Z" }, + { url = "https://files.pythonhosted.org/packages/85/70/50b1b62990996e7916aae2852b29cbf3ecc3fdae78209eb284cd61e2c918/pydantic_core-2.46.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48b1059e4f2a6ec3e41983148eb1eec5ef9fa3a80bbc4ac0893ac76b115fe039", size = 2092152, upload-time = "2026-04-17T09:10:44.683Z" }, + { url = "https://files.pythonhosted.org/packages/c1/51/a062864e6b34ada7e343ad9ed29368e495620a8ef1c009b47a68b46e1634/pydantic_core-2.46.2-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:df73724fce8ad53c670358c905b37930bd7b9d92e57db640a65c53b2706eee00", size = 2118091, upload-time = "2026-04-17T09:10:05.083Z" }, + { url = "https://files.pythonhosted.org/packages/07/e0/fcc97c4d0319615dc0b5b132b420904639652f8514e9c76482acb70ea1d4/pydantic_core-2.46.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a0891a9be0def16fb320af21a198ece052eed72bf44d73d8ff43f702bd26fd6b", size = 2174304, upload-time = "2026-04-17T09:11:00.54Z" }, + { url = "https://files.pythonhosted.org/packages/00/52/28f53796ca74b7e3dd45938f300517f04970e985ad600d0d0f36a11378bd/pydantic_core-2.46.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2ca790779aa1cba1329b8dc42ccebada441d9ac1d932de980183d544682c646d", size = 2181444, upload-time = "2026-04-17T09:11:45.442Z" }, + { url = "https://files.pythonhosted.org/packages/22/49/164d5d3a7356d2607a72e77264a3b252a7c7d9362a81fc9df47bef7ae3aa/pydantic_core-2.46.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:6b865eb702c3af71cf7331919a787563ce2413f7a54ef49ec6709a01b4f22ce6", size = 2328611, upload-time = "2026-04-17T09:10:08.574Z" }, + { url = "https://files.pythonhosted.org/packages/6b/77/6266bb3b79c27b533e5ee02c1e3da5848872112178880cc5006a84e857ac/pydantic_core-2.46.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:631bec5f951a30a4b332b4a57d0cdd5a2c8187eb71301f966425f2e54a697855", size = 2351070, upload-time = "2026-04-17T09:13:34.92Z" }, + { url = "https://files.pythonhosted.org/packages/10/7f/d4233852d16d8e85b034a524d8017e051a0aa4acd04c64c3a69a1a2a0ba6/pydantic_core-2.46.2-cp310-cp310-win32.whl", hash = "sha256:8cbd9d67357f3a925f2af1d44db3e8ef1ce1a293ea0add98081b072d4a12e3b4", size = 1976750, upload-time = "2026-04-17T09:13:15.537Z" }, + { url = "https://files.pythonhosted.org/packages/70/31/d65117cf5f89d81705da5b1dcdad8efa0a0b65dbbc7f13cafbabb7d01615/pydantic_core-2.46.2-cp310-cp310-win_amd64.whl", hash = "sha256:dd51dd16182b4bfdcefd27b39b856aa4a57b77f15b231a2d10c45391b0a02028", size = 2073989, upload-time = "2026-04-17T09:12:17.315Z" }, + { url = "https://files.pythonhosted.org/packages/89/91/089f517a725f29084364169437833ab0ae4da4d7a6ed9d4474db7f1412e6/pydantic_core-2.46.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8060f42db3cd204871db0afd51fef54a13fa544c4dd48cdcae2e174ef40c8ba", size = 2106218, upload-time = "2026-04-17T09:10:48.023Z" }, + { url = "https://files.pythonhosted.org/packages/a0/92/23858ed1b58f2a134e50c2fdd0e34ea72721ccb257e1e9346514e1ccb5b9/pydantic_core-2.46.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:73a9d2809bd8d4a7cda4d336dc996a565eb4feaaa39932f9d85a65fa18382f28", size = 1948087, upload-time = "2026-04-17T09:11:58.639Z" }, + { url = "https://files.pythonhosted.org/packages/5d/ac/e2240fccb4794e965817593d5a46cf5ea22f2001b73fe360b7578925b7d8/pydantic_core-2.46.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b0a2dee92dfaabcfb93629188c3e9cf74fdfc0f22e7c369cb444a98814a1e50", size = 1972931, upload-time = "2026-04-17T09:13:13.304Z" }, + { url = "https://files.pythonhosted.org/packages/1a/da/3b11dab2aa15c5c8ed20a01eb7aa432a78b8e3a4713659f7e58490a020a5/pydantic_core-2.46.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3098446ba8cf774f61cb8d4008c1dba14a30426a15169cd95ac3392a461193b1", size = 2040454, upload-time = "2026-04-17T09:13:47.895Z" }, + { url = "https://files.pythonhosted.org/packages/d7/39/c4cf5e1f1c6c34c53c0902039c95d81dc15cdd1f03634bd1a93f33e70a72/pydantic_core-2.46.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57c584af6c375ea3f826d8131a94cb212b3d9926eaff67117e3711bbff3a83a5", size = 2221320, upload-time = "2026-04-17T09:13:08.568Z" }, + { url = "https://files.pythonhosted.org/packages/c7/46/891035bc9e93538e754c3188424d24b5a69ec3ae5210fa01d483e99b3302/pydantic_core-2.46.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:547381cca999be88b4715a0ed7afa11f07fc7e53cb1883687b190d25a92c56cf", size = 2274559, upload-time = "2026-04-17T09:11:10.257Z" }, + { url = "https://files.pythonhosted.org/packages/ab/d0/7af0b905b3148152c159c9caf203e7ecd9b90b76389f0862e6ab0cf1b2a3/pydantic_core-2.46.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:caeed15dcb1233a5a94bc6ff37ef5393cf5b33a45e4bdfb2d6042f3d24e1cb27", size = 2089239, upload-time = "2026-04-17T09:13:06.326Z" }, + { url = "https://files.pythonhosted.org/packages/c5/bc/566afe02ba2de37712eece74ac7bfba322abd7916410bf90504f1b17ddad/pydantic_core-2.46.2-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:c05f53362568c75476b5c96659377a5dfd982cfbe5a5c07de5106d08a04efc4f", size = 2116182, upload-time = "2026-04-17T09:11:33.738Z" }, + { url = "https://files.pythonhosted.org/packages/4e/5b/3fcb3a229bbfa23b0e3c65014057af0f9d51ec7a2d9f7adb282f41ff5ac8/pydantic_core-2.46.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2643ac7eae296200dbd48762a1c852cf2cad5f5e3eba34e652053cebf03becf8", size = 2172346, upload-time = "2026-04-17T09:10:46.472Z" }, + { url = "https://files.pythonhosted.org/packages/43/9a/baa9e3aa70ea7bbcb9db0f87162a371649ac80c03e43eb54af193390cf17/pydantic_core-2.46.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dc4620a47c6fe6a39f89392c00833a82fc050ce90169798f78a25a8d4df03b6e", size = 2179540, upload-time = "2026-04-17T09:11:21.881Z" }, + { url = "https://files.pythonhosted.org/packages/bd/46/912047a5427f949c909495704b3c8b9ead9d1c66f87e96606011beab1fcb/pydantic_core-2.46.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:78cb0d2453b50bf2035f85fd0d9cfabdb98c47f9c53ddb7c23873cd83da9560b", size = 2327423, upload-time = "2026-04-17T09:13:40.291Z" }, + { url = "https://files.pythonhosted.org/packages/e9/bf/c5e661451dc9411c2ab88a244c1ba57644950c971486040dc200f77b69f4/pydantic_core-2.46.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f0c1cbb7d6112932cc188c6be007a5e2867005a069e47f42fe67bf5f122b0908", size = 2348652, upload-time = "2026-04-17T09:10:37.76Z" }, + { url = "https://files.pythonhosted.org/packages/77/b3/3219e7c522af54b010cf7422dcb11cc6616a4414d1ccd628b0d3f61c6af6/pydantic_core-2.46.2-cp311-cp311-win32.whl", hash = "sha256:c1ce5b2366f85cfdbf7f0907755043707f86d09a5b1b1acebbb7bf1600d75c64", size = 1974410, upload-time = "2026-04-17T09:13:27.392Z" }, + { url = "https://files.pythonhosted.org/packages/e5/29/e5cfac8a74c59873dfd47d3a1477c39ad9247639a7120d3e251a9ff12417/pydantic_core-2.46.2-cp311-cp311-win_amd64.whl", hash = "sha256:f1a6197eadff5bd0bb932f12bb038d403cb75db5b0b391e70e816a647745ddaf", size = 2071158, upload-time = "2026-04-17T09:09:57.69Z" }, + { url = "https://files.pythonhosted.org/packages/6f/8b/b7b19b717cdb3675cb109de143f62d4dc62f5d4a0b9879b6f1ace62c6654/pydantic_core-2.46.2-cp311-cp311-win_arm64.whl", hash = "sha256:15e42885b283f87846ee79e161002c5c496ef747a73f6e47054f45a13d9035bc", size = 2043507, upload-time = "2026-04-17T09:09:51.828Z" }, + { url = "https://files.pythonhosted.org/packages/97/ec/2fafa4c86f5d2a69372c7cddef30925fd0e370b1efaf556609c1a0196d8a/pydantic_core-2.46.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ea1ad8c89da31512fe2d249cf0638fb666925bda341901541bc5f3311c6fcc9e", size = 2101729, upload-time = "2026-04-17T09:12:30.042Z" }, + { url = "https://files.pythonhosted.org/packages/cf/55/be5386c2c4b49af346e8a26b748194ff25757bbb6cf544130854e997af7a/pydantic_core-2.46.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b308da17b92481e0587244631c5529e5d91d04cb2b08194825627b1eca28e21e", size = 1951546, upload-time = "2026-04-17T09:10:10.585Z" }, + { url = "https://files.pythonhosted.org/packages/29/92/89e273a055ce440e6636c756379af35ad86da9d336a560049c3ba5e41c80/pydantic_core-2.46.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d333a50bdd814a917d8d6a7ee35ba2395d53ddaa882613bc24e54a9d8b129095", size = 1976178, upload-time = "2026-04-17T09:11:49.619Z" }, + { url = "https://files.pythonhosted.org/packages/91/b3/e4664469cf70c0cb0f7b2f5719d64e5968bb6f38217042c2afa3d3c4ba17/pydantic_core-2.46.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1d00b99590c5bd1fabbc5d28b170923e32c1b1071b1f1de1851a4d14d89eb192", size = 2051697, upload-time = "2026-04-17T09:12:04.917Z" }, + { url = "https://files.pythonhosted.org/packages/98/58/dbf68213ee06ce51cdd6d8c95f97980e646858c45bd96bd2dfb40433be73/pydantic_core-2.46.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9f0e686960ffe9e65066395af856ac2d52c159043144433602c50c221d81c1ba", size = 2233160, upload-time = "2026-04-17T09:12:00.956Z" }, + { url = "https://files.pythonhosted.org/packages/f5/d3/68092aa0ee6c60ff4de4740eb82db3d4ce338ec89b3cecb978c532472f12/pydantic_core-2.46.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d1128da41c9cb474e0a4701f9c363ec645c9d1a02229904c76bf4e0a194fde2", size = 2298398, upload-time = "2026-04-17T09:10:29.694Z" }, + { url = "https://files.pythonhosted.org/packages/e4/51/5d6155eb737db55b0ad354ca5f333ef009f75feb67df2d79a84bace45af6/pydantic_core-2.46.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48649cf2d8c358d79586e9fb2f8235902fcaa2d969ec1c5301f2d1873b2f8321", size = 2094058, upload-time = "2026-04-17T09:12:10.995Z" }, + { url = "https://files.pythonhosted.org/packages/6b/f3/eb4a986197d71319430464ff181226c95adc8f06d932189b158bae5a82f5/pydantic_core-2.46.2-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:b902f0fc7c2cf503865a05718b68147c6cd5d0a3867af38c527be574a9fa6e9d", size = 2130388, upload-time = "2026-04-17T09:12:41.159Z" }, + { url = "https://files.pythonhosted.org/packages/56/00/44a9c4fe6d0f64b5786d6a8c649d6f0e34ba6c89b3663add1066e54451a2/pydantic_core-2.46.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e80011f808b03d1d87a8f1e76ae3da19a18eb706c823e17981dcf1fae43744fc", size = 2184245, upload-time = "2026-04-17T09:12:36.532Z" }, + { url = "https://files.pythonhosted.org/packages/78/6b/685b98a834d5e3d1c34a1bde1627525559dd223b75075bc7490cdb24eb33/pydantic_core-2.46.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b839d5c802e31348b949b6473f8190cddbf7d47475856d8ac995a373ee16ec59", size = 2186842, upload-time = "2026-04-17T09:13:04.054Z" }, + { url = "https://files.pythonhosted.org/packages/22/64/caa2f5a2ac8b6113adaa410ccdf31ba7f54897a6e54cd0d726fc7e780c88/pydantic_core-2.46.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:c6b1064f3f9cf9072e1d59dd2936f9f3b668bec1c37039708c9222db703c0d5b", size = 2336066, upload-time = "2026-04-17T09:12:13.006Z" }, + { url = "https://files.pythonhosted.org/packages/ee/f9/7d2701bf82945b5b9e7df8347be97ef6a36da2846bfe5b4afec299ffe27b/pydantic_core-2.46.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:37a68e6f2ac95578ce3c0564802404b27b24988649616e556c07e77111ed3f1d", size = 2363691, upload-time = "2026-04-17T09:13:42.972Z" }, + { url = "https://files.pythonhosted.org/packages/3b/65/0dab11574101522941055109419db3cc09db871643dc3fc74e2413215e5b/pydantic_core-2.46.2-cp312-cp312-win32.whl", hash = "sha256:d9ffa75a7ef4b97d6e5e205fabd4304ef01fec09e6f1bdde04b9ad1b07d20289", size = 1958801, upload-time = "2026-04-17T09:11:31.981Z" }, + { url = "https://files.pythonhosted.org/packages/13/2b/df84baa609c676f6450b8ecad44ea59146c805e3371b7b52443c0899f989/pydantic_core-2.46.2-cp312-cp312-win_amd64.whl", hash = "sha256:0551f2d2ddb68af5a00e26497f8025c538f73ef3cb698f8e5a487042cd2792a8", size = 2072634, upload-time = "2026-04-17T09:11:02.407Z" }, + { url = "https://files.pythonhosted.org/packages/d1/4e/e1ce8029fc438086a946739bf9d596f70ff470aad4a8345555920618cabe/pydantic_core-2.46.2-cp312-cp312-win_arm64.whl", hash = "sha256:83aef30f106edcc21a6a4cc44b82d3169a1dbe255508db788e778f3c804d3583", size = 2026188, upload-time = "2026-04-17T09:13:11.083Z" }, + { url = "https://files.pythonhosted.org/packages/07/2b/662e48254479a2d3450ba24b1e25061108b64339794232f503990c519144/pydantic_core-2.46.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:d26e9eea3715008a09a74585fe9becd0c67fbb145dc4df9756d597d7230a652c", size = 2101762, upload-time = "2026-04-17T09:10:13.87Z" }, + { url = "https://files.pythonhosted.org/packages/73/ab/bafd7c7503757ccc8ec4d1911e106fe474c629443648c51a88f08b0fe91a/pydantic_core-2.46.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:48b36e3235140510dc7861f0cd58b714b1cdd3d48f75e10ce52e69866b746f10", size = 1951814, upload-time = "2026-04-17T09:12:25.934Z" }, + { url = "https://files.pythonhosted.org/packages/92/cc/7549c2d57ba2e9a42caa5861a2d398dbe31c02c6aca783253ace59ce84f8/pydantic_core-2.46.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36b1f99dc451f1a3981f236151465bcf995bbe712d0727c9f7b236fe228a8133", size = 1977329, upload-time = "2026-04-17T09:13:37.605Z" }, + { url = "https://files.pythonhosted.org/packages/18/50/7ed4a8a0d478a4dca8f0134a5efa7193f03cc8520dd4c9509339fb2e5002/pydantic_core-2.46.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8641c8d535c2d95b45c2e19b646ecd23ebba35d461e0ae48a3498277006250ab", size = 2051832, upload-time = "2026-04-17T09:12:49.771Z" }, + { url = "https://files.pythonhosted.org/packages/dc/16/bb35b193741c0298ddc5f5e4234269efdc0c65e2bcd198aa0de9b68845e4/pydantic_core-2.46.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:20fb194788a0a50993e87013e693494ba183a2af5b44e99cf060bbae10912b11", size = 2233127, upload-time = "2026-04-17T09:11:04.449Z" }, + { url = "https://files.pythonhosted.org/packages/91/a5/98f4b637149185addea19e1785ea20c373cca31b202f589111d8209d9873/pydantic_core-2.46.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9262d11d0cd11ee3303a95156939402bed6cedfe5ed0e331b95a283a4da6eb8b", size = 2297418, upload-time = "2026-04-17T09:11:25.929Z" }, + { url = "https://files.pythonhosted.org/packages/36/90/93a5d21990b152da7b7507b7fddb0b935f6a0984d57ac3ec45a6e17777a2/pydantic_core-2.46.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac204542736aa295fa25f713b7fad6fc50b46ab7764d16087575c85f085174f3", size = 2093735, upload-time = "2026-04-17T09:12:06.908Z" }, + { url = "https://files.pythonhosted.org/packages/14/22/b8b1ffdddf08b4e84380bcb67f41dbbf4c171377c1d36fc6290794bb2094/pydantic_core-2.46.2-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9a7c43a0584742dface3ca0daf6f719d46c1ac2f87cf080050f9ae052c75e1b2", size = 2127570, upload-time = "2026-04-17T09:11:53.906Z" }, + { url = "https://files.pythonhosted.org/packages/c6/26/e60d72b4e2d0ce1fa811044a974412ac1c567fe067d97b3e6b290530786e/pydantic_core-2.46.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fd05e1edb6a90ad446fa268ab09e59202766b837597b714b2492db11ee87fab9", size = 2183524, upload-time = "2026-04-17T09:11:30.092Z" }, + { url = "https://files.pythonhosted.org/packages/35/32/36bec7584a1eefb17dec4dfa1c946d3fe4440f466c5705b8adfda69c9a9f/pydantic_core-2.46.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:91155b110788b5501abc7ea954f1d08606219e4e28e3c73a94124307c06efb80", size = 2185408, upload-time = "2026-04-17T09:10:57.228Z" }, + { url = "https://files.pythonhosted.org/packages/fc/d6/1a5689d873620efd67d6b163db0c444c056adb0849b5bc33e2b9f09665a6/pydantic_core-2.46.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:e4e2c72a529fa03ff228be1d2b76944013f428220b764e03cc50ada67e17a42c", size = 2335171, upload-time = "2026-04-17T09:11:43.369Z" }, + { url = "https://files.pythonhosted.org/packages/3e/8e/675104802abe8ef502b072050ee5f2e915251aa1a3af87e1015ce31ec42d/pydantic_core-2.46.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:56291ec1a11c3499890c99a8fd9053b47e60fe837a77ec72c0671b1b8b3dce24", size = 2362743, upload-time = "2026-04-17T09:10:18.333Z" }, + { url = "https://files.pythonhosted.org/packages/8d/bc/86c5dde4fa6e24467680eef5047da3c1a19be0a527d0d8e14aa76b39307c/pydantic_core-2.46.2-cp313-cp313-win32.whl", hash = "sha256:b50f9c5f826ddca1246f055148df939f5f3f2d0d96db73de28e2233f22210d4c", size = 1958074, upload-time = "2026-04-17T09:12:38.622Z" }, + { url = "https://files.pythonhosted.org/packages/2a/97/2537e8c1282b2c4eb062580c0d7a4339e10b072b803d1ee0b7f1f0a5c22c/pydantic_core-2.46.2-cp313-cp313-win_amd64.whl", hash = "sha256:251a57788823230ca8cbc99e6245d1a2ed6e180ec4864f251c94182c580c7f2e", size = 2071741, upload-time = "2026-04-17T09:13:32.405Z" }, + { url = "https://files.pythonhosted.org/packages/da/aa/2ee75798706f9dbc4e76dbe59e41a396c5c311e3d6223b9cf6a5fa7780be/pydantic_core-2.46.2-cp313-cp313-win_arm64.whl", hash = "sha256:315d32d1a71494d6b4e1e14a9fa7a4329597b4c4340088ad7e1a9dafbeed92a9", size = 2025955, upload-time = "2026-04-17T09:10:15.567Z" }, + { url = "https://files.pythonhosted.org/packages/d0/96/a50ccb6b539ae780f73cea74905468777680e30c6c3bdf714b9d4c116ea0/pydantic_core-2.46.2-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:4f59b45f3ef8650c0c736a57f59031d47ed9df4c0a64e83796849d7d14863a2d", size = 2097111, upload-time = "2026-04-17T09:10:49.617Z" }, + { url = "https://files.pythonhosted.org/packages/34/5f/fdead7b3afa822ab6e5a18ee0ecffd54937de1877c01ed13a342e0fb3f07/pydantic_core-2.46.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3a075a29ebef752784a91532a1a85be6b234ccffec0a9d7978a92696387c3da6", size = 1951904, upload-time = "2026-04-17T09:12:32.062Z" }, + { url = "https://files.pythonhosted.org/packages/95/e0/1c5d547e550cdab1bec737492aa08865337af6fe7fc9b96f7f45f17d9519/pydantic_core-2.46.2-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d12d786e30c04a9d307c5d7080bf720d9bac7f1668191d8e37633a9562749e2", size = 1978667, upload-time = "2026-04-17T09:11:35.589Z" }, + { url = "https://files.pythonhosted.org/packages/0e/cb/665ce629e218c8228302cb94beff4f6531082a2c87d3ecc3d5e63a26f392/pydantic_core-2.46.2-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0d5e6d6343b0b5dcacb3503b5de90022968da8ed0ab9ab39d3eda71c20cbf84e", size = 2046721, upload-time = "2026-04-17T09:11:47.725Z" }, + { url = "https://files.pythonhosted.org/packages/77/e9/6cb2cf60f54c1472bbdfce19d957553b43dbba79d1d7b2930a195c594785/pydantic_core-2.46.2-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:233eebac0999b6b9ba76eb56f3ec8fce13164aa16b6d2225a36a79e0f95b5973", size = 2228483, upload-time = "2026-04-17T09:12:08.837Z" }, + { url = "https://files.pythonhosted.org/packages/0d/2a/93e018dd5571f781ebaeda8c0cf65398489d5bee9b1f484df0b6149b43b9/pydantic_core-2.46.2-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9cc0eee720dd2f14f3b7c349469402b99ad81a174ab49d3533974529e9d93992", size = 2294663, upload-time = "2026-04-17T09:12:52.053Z" }, + { url = "https://files.pythonhosted.org/packages/5e/4f/49e57ca55c770c93d9bb046666a54949b42e3c9099a0c5fe94557873fe30/pydantic_core-2.46.2-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83ee76bf2c9910513dbc19e7d82367131fa7508dedd6186a462393071cc11059", size = 2098742, upload-time = "2026-04-17T09:13:45.472Z" }, + { url = "https://files.pythonhosted.org/packages/c6/b0/6e46b5cd3332af665f794b8cdeea206618a8630bd9e7bcc36864518fce81/pydantic_core-2.46.2-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:d61db38eb4ee5192f0c261b7f2d38e420b554df8912245e3546aee5c45e2fd78", size = 2125922, upload-time = "2026-04-17T09:12:54.304Z" }, + { url = "https://files.pythonhosted.org/packages/06/d1/40850c81585be443a2abfdf7f795f8fae831baf8e2f9b2133c8246ac671c/pydantic_core-2.46.2-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8f09a713d17bcd55da8ab02ebd9110c5246a49c44182af213b5212800af8bc83", size = 2183000, upload-time = "2026-04-17T09:10:59.027Z" }, + { url = "https://files.pythonhosted.org/packages/04/af/8493d7dfa03ebb7866909e577c6aa65ea0de7377b86023cc51d0c8e11db3/pydantic_core-2.46.2-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:30cacc5fb696e64b8ef6fd31d9549d394dd7d52760db072eecb98e37e3af1677", size = 2180335, upload-time = "2026-04-17T09:12:57.01Z" }, + { url = "https://files.pythonhosted.org/packages/72/5b/1f6a344c4ffdf284da41c6067b82d5ebcbd11ce1b515ae4b662d4adb6f61/pydantic_core-2.46.2-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:7ccfb105fcfe91a22bbb5563ad3dc124bc1aa75bfd2e53a780ab05f78cdf6108", size = 2330002, upload-time = "2026-04-17T09:12:02.958Z" }, + { url = "https://files.pythonhosted.org/packages/25/ff/9a694126c12d6d2f48a0cafa6f8eef88ef0d8825600e18d03ff2e896c3b2/pydantic_core-2.46.2-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:13ffef637dc8370c249e5b26bd18e9a80a4fca3d809618c44e18ec834a7ca7a8", size = 2359920, upload-time = "2026-04-17T09:10:27.764Z" }, + { url = "https://files.pythonhosted.org/packages/51/c8/3a35c763d68a9cb2675eb10ef242cf66c5d4701b28ae12e688d67d2c180e/pydantic_core-2.46.2-cp314-cp314-win32.whl", hash = "sha256:1b0ab6d756ca2704a938e6c31b53f290c2f9c10d3914235410302a149de1a83e", size = 1953701, upload-time = "2026-04-17T09:13:30.021Z" }, + { url = "https://files.pythonhosted.org/packages/1a/6a/f2726a780365f7dfd89d62036f984f7acb99978c60c5e1fa7c0cb898ed11/pydantic_core-2.46.2-cp314-cp314-win_amd64.whl", hash = "sha256:99ebade8c9ada4df975372d8dd25883daa0e379a05f1cd0c99aa0c04368d01a6", size = 2071867, upload-time = "2026-04-17T09:10:39.205Z" }, + { url = "https://files.pythonhosted.org/packages/e1/79/76baacb9feba3d7c399b245ca1a29c74ea0db04ea693811374827eec2290/pydantic_core-2.46.2-cp314-cp314-win_arm64.whl", hash = "sha256:de87422197cf7f83db91d89c86a21660d749b3cd76cd8a45d115b8e675670f02", size = 2017252, upload-time = "2026-04-17T09:10:26.175Z" }, + { url = "https://files.pythonhosted.org/packages/f1/3b/77c26938f817668d9ad9bab1a905cb23f11d9a3d4bf724d429b3e55a8eaf/pydantic_core-2.46.2-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:236f22b4a206b5b61db955396b7cf9e2e1ff77f372efe9570128ccfcd6a525eb", size = 2094545, upload-time = "2026-04-17T09:12:19.339Z" }, + { url = "https://files.pythonhosted.org/packages/fe/de/42c13f590e3c260966aa49bcdb1674774f975467c49abd51191e502bea28/pydantic_core-2.46.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c2012f64d2cd7cca50f49f22445aa5a88691ac2b4498ee0a9a977f8ca4f7289f", size = 1933953, upload-time = "2026-04-17T09:09:55.889Z" }, + { url = "https://files.pythonhosted.org/packages/4e/84/ebe3ebb3e2d8db656937cfa6f97f544cb7132f2307a4a7dfdcd0ea102a12/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d07d6c63106d3a9c9a333e2636f9c82c703b1a9e3b079299e58747964e4fdb72", size = 1974435, upload-time = "2026-04-17T09:10:12.371Z" }, + { url = "https://files.pythonhosted.org/packages/b9/15/0bf51ca6709477cd4ef86148b6d7844f3308f029eac361dd0383f1e17b1a/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c326a2b4b85e959d9a1fc3a11f32f84611b6ec07c053e1828a860edf8d068208", size = 2031113, upload-time = "2026-04-17T09:10:00.752Z" }, + { url = "https://files.pythonhosted.org/packages/02/ae/b7b5af9b79db036d9e61a44c481c17a213dc8fc4b8b71fe6875a72fc778b/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac8a65e798f2462552c00d2e013d532c94d646729dda98458beaf51f9ec7b120", size = 2236325, upload-time = "2026-04-17T09:10:33.227Z" }, + { url = "https://files.pythonhosted.org/packages/a6/ae/ecef7477b5a03d4a499708f7e75d2836452ebb70b776c2d64612b334f57a/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a3c2bc1cc8164bedbc160b7bb1e8cc1e8b9c27f69ae4f9ae2b976cdae02b2dd", size = 2278135, upload-time = "2026-04-17T09:10:23.287Z" }, + { url = "https://files.pythonhosted.org/packages/db/e4/2f9d82faa47af6c39fc3f120145fd915971e1e0cb6b55b494fad9fdf8275/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e69aa5e10b7e8b1bb4a6888650fd12fcbf11d396ca11d4a44de1450875702830", size = 2109071, upload-time = "2026-04-17T09:11:06.149Z" }, + { url = "https://files.pythonhosted.org/packages/f1/9c/677cf10873fbd0b116575ab7b97c90482b21564f8a8040beb18edef7a577/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:4e6df5c3301e65fb42bc5338bf9a1027a02b0a31dc7f54c33775229af474daf0", size = 2106028, upload-time = "2026-04-17T09:10:51.525Z" }, + { url = "https://files.pythonhosted.org/packages/d6/53/6a06183544daba51c059123a2064a99039df25f115a06bdb26f2ea177038/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2c2f6e32548ac8d559b47944effcf8ae4d81c161f6b6c885edc53bc08b8f192d", size = 2164816, upload-time = "2026-04-17T09:11:56.187Z" }, + { url = "https://files.pythonhosted.org/packages/57/6f/10fcdd9e3eca66fc828eef0f6f5850f2dd3bca2c59e6e041fb8bc3da39be/pydantic_core-2.46.2-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:b089a81c58e6ea0485562bbbbbca4f65c0549521606d5ef27fba217aac9b665a", size = 2166130, upload-time = "2026-04-17T09:10:03.804Z" }, + { url = "https://files.pythonhosted.org/packages/29/83/92d3fd0e0156cad2e3cb5c26de73794af78ac9fa0c22ab666e566dd67061/pydantic_core-2.46.2-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:7f700a6d6f64112ae9193709b84303bbab84424ad4b47d0253301aabce9dfc70", size = 2316605, upload-time = "2026-04-17T09:12:45.249Z" }, + { url = "https://files.pythonhosted.org/packages/97/f1/facffdb970981068219582e499b8d0871ed163ffcc6b347de5c412669e4c/pydantic_core-2.46.2-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:67db6814beaa5fefe91101ec7eb9efda613795767be96f7cf58b1ca8c9ca9972", size = 2358385, upload-time = "2026-04-17T09:09:54.657Z" }, + { url = "https://files.pythonhosted.org/packages/8b/a1/b8160b2f22b2199467bc68581a4ed380643c16b348a27d6165c6c242d694/pydantic_core-2.46.2-cp314-cp314t-win32.whl", hash = "sha256:32fbc7447be8e3be99bf7869f7066308f16be55b61f9882c2cefc7931f5c7664", size = 1942373, upload-time = "2026-04-17T09:12:59.594Z" }, + { url = "https://files.pythonhosted.org/packages/0d/90/db89acabe5b150e11d1b59fe3d947dda2ef6abbfef5c82f056ff63802f5d/pydantic_core-2.46.2-cp314-cp314t-win_amd64.whl", hash = "sha256:b317a2b97019c0b95ce99f4f901ae383f40132da6706cdf1731066a73394c25c", size = 2052078, upload-time = "2026-04-17T09:10:19.96Z" }, + { url = "https://files.pythonhosted.org/packages/97/32/e19b83ceb07a3f1bb21798407790bbc9a31740158fd132b94139cb84e16c/pydantic_core-2.46.2-cp314-cp314t-win_arm64.whl", hash = "sha256:7dcb9d40930dfad7ab6b20bcc6ca9d2b030b0f347a0cd9909b54bd53ead521b1", size = 2016941, upload-time = "2026-04-17T09:12:34.447Z" }, + { url = "https://files.pythonhosted.org/packages/25/ec/e91aa08df1c33d5e3c2b60c07a1eca9f21809728a824c7b467bb3bda68b5/pydantic_core-2.46.2-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:7c5a5b3dbb9e8918e223be6580da5ffcf861c0505bbc196ebed7176ce05b7b4e", size = 2105046, upload-time = "2026-04-17T09:10:55.614Z" }, + { url = "https://files.pythonhosted.org/packages/f0/73/27112400a0452e375290e7c40aef5cc9844ac0920fb1029238cfc68121fa/pydantic_core-2.46.2-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:bc1e8ce33d5a337f2ba862e0719b8201cd54aaed967406c748e009191d47efdd", size = 1940029, upload-time = "2026-04-17T09:12:21.5Z" }, + { url = "https://files.pythonhosted.org/packages/b1/44/3d39f782bc82ddd0b2d82bde83b408aa40a332cdf6f3018acb34e3d4dcfc/pydantic_core-2.46.2-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b737c0b280f41143266445de2689c0e49c79307e51c44ce3a77fef2bedad4994", size = 1987772, upload-time = "2026-04-17T09:10:02.357Z" }, + { url = "https://files.pythonhosted.org/packages/c4/1a/0242e5b7b6cf51dbccc065029f0420107b6bf7e191fcb918f5cb71218acf/pydantic_core-2.46.2-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b877d597afb82b4898e35354bba55de6f7f048421ae0edadbb9886ec137b532", size = 2138468, upload-time = "2026-04-17T09:11:51.546Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d2/66c146f421178641bda880b0267c0d57dd84f5fec9ecc8e46be17b480742/pydantic_core-2.46.2-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:e9fcabd1857492b5bf16f90258babde50f618f55d046b1309972da2396321ff9", size = 2091621, upload-time = "2026-04-17T09:12:47.501Z" }, + { url = "https://files.pythonhosted.org/packages/ee/b2/c28419aa9fc8055f4ac8e801d1d11c6357351bfa4321ed9bafab3eb98087/pydantic_core-2.46.2-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:fb3ec2c7f54c07b30d89983ce78dc32c37dd06a972448b8716d609493802d628", size = 1937059, upload-time = "2026-04-17T09:10:53.554Z" }, + { url = "https://files.pythonhosted.org/packages/30/ce/cd0824a2db213dc17113291b7a09b9b0ccd9fbf97daa4b81548703341baf/pydantic_core-2.46.2-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:130a6c837d819ef33e8c2bf702ed2c3429237ea69807f1140943d6f4bdaf52fa", size = 1997278, upload-time = "2026-04-17T09:12:23.784Z" }, + { url = "https://files.pythonhosted.org/packages/c9/69/47283fe3c0c967d3e9e9cd6c42b70907610c8a6f8d6e8381f1bb55f8006c/pydantic_core-2.46.2-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2e25417cec5cd9bddb151e33cb08c50160f317479ecc02b22a95ec18f8fe004", size = 2147096, upload-time = "2026-04-17T09:12:43.124Z" }, + { url = "https://files.pythonhosted.org/packages/16/d5/dec7c127fa722ff56e1ccf1e960ae1318a9f66742135e97bf9771447216f/pydantic_core-2.46.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c3ad79ed32004d9de91cacd4b5faaff44d56051392fe1d5526feda596f01af25", size = 2107613, upload-time = "2026-04-17T09:10:36.269Z" }, + { url = "https://files.pythonhosted.org/packages/bc/35/975c109b337260a71c93198baf663982b6b39fe3e584e279548a0969e5d4/pydantic_core-2.46.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d157c48d28eebe5d46906de06a6a2f2c9e00b67d3e42de1f1b9c2d42b810f77c", size = 1947099, upload-time = "2026-04-17T09:12:15.304Z" }, + { url = "https://files.pythonhosted.org/packages/4e/11/52a971a0f9218631690274be533f05e5ddde5547f0823bb3e9dfd1be49f6/pydantic_core-2.46.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b42c6471288dedc979ac8400d9c9770f03967dd187db1f8d3405d4d182cc714", size = 2133866, upload-time = "2026-04-17T09:12:27.994Z" }, + { url = "https://files.pythonhosted.org/packages/fe/7a/33d94d0698602b2d1712e78c703a33952eb2ca69e02e8e4b208e7f6602b5/pydantic_core-2.46.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4f27bc4801358dc070d6697b41237fce9923d8e69a1ce1e95606ac36c1552dc1", size = 2161721, upload-time = "2026-04-17T09:11:16.111Z" }, + { url = "https://files.pythonhosted.org/packages/b0/cb/0df7ee0a148e9ce0968a80787967ddca9f6b3f8a49152a881b88da262701/pydantic_core-2.46.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e094a8f85db41aa7f6a45c5dac2950afc9862e66832934231962252b5d284eed", size = 2180175, upload-time = "2026-04-17T09:11:41.577Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a8/258a32878140347532be4e44c6f3b1ace3b52b9c9ca7548a65ce18adf4b4/pydantic_core-2.46.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:807eeda5551f6884d3b4421578be37be50ddb7a58832348e99617a6714a73748", size = 2319882, upload-time = "2026-04-17T09:10:21.872Z" }, + { url = "https://files.pythonhosted.org/packages/13/b9/5071c298a0f91314a5402b8c56e0efbcebe77085327d0b4df7dc9cb0b674/pydantic_core-2.46.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fcaa1c3c846a7f6686b38fe493d1b2e8007380e293bfef6a9354563c026cbf36", size = 2348065, upload-time = "2026-04-17T09:11:08.263Z" }, + { url = "https://files.pythonhosted.org/packages/75/f3/0a7087e5f861d66ca64ce927230b397cc264c87b712156e6a93b26a459c8/pydantic_core-2.46.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:154dbfdfb11b8cbd8ff4d00d0b81e3d19f4cb4bedd5aa9f091060ba071474c6a", size = 2192159, upload-time = "2026-04-17T09:11:20.123Z" }, ] [[package]] @@ -1662,27 +1662,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.10" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/d9/aa3f7d59a10ef6b14fe3431706f854dbf03c5976be614a9796d36326810c/ruff-0.15.10.tar.gz", hash = "sha256:d1f86e67ebfdef88e00faefa1552b5e510e1d35f3be7d423dc7e84e63788c94e", size = 4631728, upload-time = "2026-04-09T14:06:09.884Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/00/a1c2fdc9939b2c03691edbda290afcd297f1f389196172826b03d6b6a595/ruff-0.15.10-py3-none-linux_armv6l.whl", hash = "sha256:0744e31482f8f7d0d10a11fcbf897af272fefdfcb10f5af907b18c2813ff4d5f", size = 10563362, upload-time = "2026-04-09T14:06:21.189Z" }, - { url = "https://files.pythonhosted.org/packages/5c/15/006990029aea0bebe9d33c73c3e28c80c391ebdba408d1b08496f00d422d/ruff-0.15.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b1e7c16ea0ff5a53b7c2df52d947e685973049be1cdfe2b59a9c43601897b22e", size = 10951122, upload-time = "2026-04-09T14:06:02.236Z" }, - { url = "https://files.pythonhosted.org/packages/f2/c0/4ac978fe874d0618c7da647862afe697b281c2806f13ce904ad652fa87e4/ruff-0.15.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:93cc06a19e5155b4441dd72808fdf84290d84ad8a39ca3b0f994363ade4cebb1", size = 10314005, upload-time = "2026-04-09T14:06:00.026Z" }, - { url = "https://files.pythonhosted.org/packages/da/73/c209138a5c98c0d321266372fc4e33ad43d506d7e5dd817dd89b60a8548f/ruff-0.15.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83e1dd04312997c99ea6965df66a14fb4f03ba978564574ffc68b0d61fd3989e", size = 10643450, upload-time = "2026-04-09T14:05:42.137Z" }, - { url = "https://files.pythonhosted.org/packages/ec/76/0deec355d8ec10709653635b1f90856735302cb8e149acfdf6f82a5feb70/ruff-0.15.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8154d43684e4333360fedd11aaa40b1b08a4e37d8ffa9d95fee6fa5b37b6fab1", size = 10379597, upload-time = "2026-04-09T14:05:49.984Z" }, - { url = "https://files.pythonhosted.org/packages/dc/be/86bba8fc8798c081e28a4b3bb6d143ccad3fd5f6f024f02002b8f08a9fa3/ruff-0.15.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ab88715f3a6deb6bde6c227f3a123410bec7b855c3ae331b4c006189e895cef", size = 11146645, upload-time = "2026-04-09T14:06:12.246Z" }, - { url = "https://files.pythonhosted.org/packages/a8/89/140025e65911b281c57be1d385ba1d932c2366ca88ae6663685aed8d4881/ruff-0.15.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a768ff5969b4f44c349d48edf4ab4f91eddb27fd9d77799598e130fb628aa158", size = 12030289, upload-time = "2026-04-09T14:06:04.776Z" }, - { url = "https://files.pythonhosted.org/packages/88/de/ddacca9545a5e01332567db01d44bd8cf725f2db3b3d61a80550b48308ea/ruff-0.15.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ee3ef42dab7078bda5ff6a1bcba8539e9857deb447132ad5566a038674540d0", size = 11496266, upload-time = "2026-04-09T14:05:55.485Z" }, - { url = "https://files.pythonhosted.org/packages/bc/bb/7ddb00a83760ff4a83c4e2fc231fd63937cc7317c10c82f583302e0f6586/ruff-0.15.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51cb8cc943e891ba99989dd92d61e29b1d231e14811db9be6440ecf25d5c1609", size = 11256418, upload-time = "2026-04-09T14:05:57.69Z" }, - { url = "https://files.pythonhosted.org/packages/dc/8d/55de0d35aacf6cd50b6ee91ee0f291672080021896543776f4170fc5c454/ruff-0.15.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:e59c9bdc056a320fb9ea1700a8d591718b8faf78af065484e801258d3a76bc3f", size = 11288416, upload-time = "2026-04-09T14:05:44.695Z" }, - { url = "https://files.pythonhosted.org/packages/68/cf/9438b1a27426ec46a80e0a718093c7f958ef72f43eb3111862949ead3cc1/ruff-0.15.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:136c00ca2f47b0018b073f28cb5c1506642a830ea941a60354b0e8bc8076b151", size = 10621053, upload-time = "2026-04-09T14:05:52.782Z" }, - { url = "https://files.pythonhosted.org/packages/4c/50/e29be6e2c135e9cd4cb15fbade49d6a2717e009dff3766dd080fcb82e251/ruff-0.15.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8b80a2f3c9c8a950d6237f2ca12b206bccff626139be9fa005f14feb881a1ae8", size = 10378302, upload-time = "2026-04-09T14:06:14.361Z" }, - { url = "https://files.pythonhosted.org/packages/18/2f/e0b36a6f99c51bb89f3a30239bc7bf97e87a37ae80aa2d6542d6e5150364/ruff-0.15.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:e3e53c588164dc025b671c9df2462429d60357ea91af7e92e9d56c565a9f1b07", size = 10850074, upload-time = "2026-04-09T14:06:16.581Z" }, - { url = "https://files.pythonhosted.org/packages/11/08/874da392558ce087a0f9b709dc6ec0d60cbc694c1c772dab8d5f31efe8cb/ruff-0.15.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b0c52744cf9f143a393e284125d2576140b68264a93c6716464e129a3e9adb48", size = 11358051, upload-time = "2026-04-09T14:06:18.948Z" }, - { url = "https://files.pythonhosted.org/packages/e4/46/602938f030adfa043e67112b73821024dc79f3ab4df5474c25fa4c1d2d14/ruff-0.15.10-py3-none-win32.whl", hash = "sha256:d4272e87e801e9a27a2e8df7b21011c909d9ddd82f4f3281d269b6ba19789ca5", size = 10588964, upload-time = "2026-04-09T14:06:07.14Z" }, - { url = "https://files.pythonhosted.org/packages/25/b6/261225b875d7a13b33a6d02508c39c28450b2041bb01d0f7f1a83d569512/ruff-0.15.10-py3-none-win_amd64.whl", hash = "sha256:28cb32d53203242d403d819fd6983152489b12e4a3ae44993543d6fe62ab42ed", size = 11745044, upload-time = "2026-04-09T14:05:39.473Z" }, - { url = "https://files.pythonhosted.org/packages/58/ed/dea90a65b7d9e69888890fb14c90d7f51bf0c1e82ad800aeb0160e4bacfd/ruff-0.15.10-py3-none-win_arm64.whl", hash = "sha256:601d1610a9e1f1c2165a4f561eeaa2e2ea1e97f3287c5aa258d3dab8b57c6188", size = 11035607, upload-time = "2026-04-09T14:05:47.593Z" }, +version = "0.15.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/8d/192f3d7103816158dfd5ea50d098ef2aec19194e6cbccd4b3485bdb2eb2d/ruff-0.15.11.tar.gz", hash = "sha256:f092b21708bf0e7437ce9ada249dfe688ff9a0954fc94abab05dcea7dcd29c33", size = 4637264, upload-time = "2026-04-16T18:46:26.58Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/1e/6aca3427f751295ab011828e15e9bf452200ac74484f1db4be0197b8170b/ruff-0.15.11-py3-none-linux_armv6l.whl", hash = "sha256:e927cfff503135c558eb581a0c9792264aae9507904eb27809cdcff2f2c847b7", size = 10607943, upload-time = "2026-04-16T18:46:05.967Z" }, + { url = "https://files.pythonhosted.org/packages/e7/26/1341c262e74f36d4e84f3d6f4df0ac68cd53331a66bfc5080daa17c84c0b/ruff-0.15.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7a1b5b2938d8f890b76084d4fa843604d787a912541eae85fd7e233398bbb73e", size = 10988592, upload-time = "2026-04-16T18:46:00.742Z" }, + { url = "https://files.pythonhosted.org/packages/03/71/850b1d6ffa9564fbb6740429bad53df1094082fe515c8c1e74b6d8d05f18/ruff-0.15.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d4176f3d194afbdaee6e41b9ccb1a2c287dba8700047df474abfbe773825d1cb", size = 10338501, upload-time = "2026-04-16T18:46:03.723Z" }, + { url = "https://files.pythonhosted.org/packages/f2/11/cc1284d3e298c45a817a6aadb6c3e1d70b45c9b36d8d9cce3387b495a03a/ruff-0.15.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b17c886fb88203ced3afe7f14e8d5ae96e9d2f4ccc0ee66aa19f2c2675a27e4", size = 10670693, upload-time = "2026-04-16T18:46:41.941Z" }, + { url = "https://files.pythonhosted.org/packages/ce/9e/f8288b034ab72b371513c13f9a41d9ba3effac54e24bfb467b007daee2ca/ruff-0.15.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:49fafa220220afe7758a487b048de4c8f9f767f37dfefad46b9dd06759d003eb", size = 10416177, upload-time = "2026-04-16T18:46:21.717Z" }, + { url = "https://files.pythonhosted.org/packages/85/71/504d79abfd3d92532ba6bbe3d1c19fada03e494332a59e37c7c2dabae427/ruff-0.15.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2ab8427e74a00d93b8bda1307b1e60970d40f304af38bccb218e056c220120d", size = 11221886, upload-time = "2026-04-16T18:46:15.086Z" }, + { url = "https://files.pythonhosted.org/packages/43/5a/947e6ab7a5ad603d65b474be15a4cbc6d29832db5d762cd142e4e3a74164/ruff-0.15.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:195072c0c8e1fc8f940652073df082e37a5d9cb43b4ab1e4d0566ab8977a13b7", size = 12075183, upload-time = "2026-04-16T18:46:07.944Z" }, + { url = "https://files.pythonhosted.org/packages/9f/a1/0b7bb6268775fdd3a0818aee8efd8f5b4e231d24dd4d528ced2534023182/ruff-0.15.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a3a0996d486af3920dec930a2e7daed4847dfc12649b537a9335585ada163e9e", size = 11516575, upload-time = "2026-04-16T18:46:31.687Z" }, + { url = "https://files.pythonhosted.org/packages/30/c3/bb5168fc4d233cc06e95f482770d0f3c87945a0cd9f614b90ea8dc2f2833/ruff-0.15.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bef2cb556d509259f1fe440bb9cd33c756222cf0a7afe90d15edf0866702431", size = 11306537, upload-time = "2026-04-16T18:46:36.988Z" }, + { url = "https://files.pythonhosted.org/packages/e4/92/4cfae6441f3967317946f3b788136eecf093729b94d6561f963ed810c82e/ruff-0.15.11-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:030d921a836d7d4a12cf6e8d984a88b66094ccb0e0f17ddd55067c331191bf19", size = 11296813, upload-time = "2026-04-16T18:46:24.182Z" }, + { url = "https://files.pythonhosted.org/packages/43/26/972784c5dde8313acde8ac71ba8ac65475b85db4a2352a76c9934361f9bc/ruff-0.15.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0e783b599b4577788dbbb66b9addcef87e9a8832f4ce0c19e34bf55543a2f890", size = 10633136, upload-time = "2026-04-16T18:46:39.802Z" }, + { url = "https://files.pythonhosted.org/packages/5b/53/3985a4f185020c2f367f2e08a103032e12564829742a1b417980ce1514a0/ruff-0.15.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ae90592246625ba4a34349d68ec28d4400d75182b71baa196ddb9f82db025ef5", size = 10424701, upload-time = "2026-04-16T18:46:10.381Z" }, + { url = "https://files.pythonhosted.org/packages/d3/57/bf0dfb32241b56c83bb663a826133da4bf17f682ba8c096973065f6e6a68/ruff-0.15.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1f111d62e3c983ed20e0ca2e800f8d77433a5b1161947df99a5c2a3fb60514f0", size = 10873887, upload-time = "2026-04-16T18:46:29.157Z" }, + { url = "https://files.pythonhosted.org/packages/02/05/e48076b2a57dc33ee8c7a957296f97c744ca891a8ffb4ffb1aaa3b3f517d/ruff-0.15.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:06f483d6646f59eaffba9ae30956370d3a886625f511a3108994000480621d1c", size = 11404316, upload-time = "2026-04-16T18:46:19.462Z" }, + { url = "https://files.pythonhosted.org/packages/88/27/0195d15fe7a897cbcba0904792c4b7c9fdd958456c3a17d2ea6093716a9a/ruff-0.15.11-py3-none-win32.whl", hash = "sha256:476a2aa56b7da0b73a3ee80b6b2f0e19cce544245479adde7baa65466664d5f3", size = 10655535, upload-time = "2026-04-16T18:46:12.47Z" }, + { url = "https://files.pythonhosted.org/packages/3a/5e/c927b325bd4c1d3620211a4b96f47864633199feed60fa936025ab27e090/ruff-0.15.11-py3-none-win_amd64.whl", hash = "sha256:8b6756d88d7e234fb0c98c91511aae3cd519d5e3ed271cae31b20f39cb2a12a3", size = 11779692, upload-time = "2026-04-16T18:46:17.268Z" }, + { url = "https://files.pythonhosted.org/packages/63/b6/aeadee5443e49baa2facd51131159fd6301cc4ccfc1541e4df7b021c37dd/ruff-0.15.11-py3-none-win_arm64.whl", hash = "sha256:063fed18cc1bbe0ee7393957284a6fe8b588c6a406a285af3ee3f46da2391ee4", size = 11032614, upload-time = "2026-04-16T18:46:34.487Z" }, ] [[package]] From 254bee3023098dc496902600d67e6f1793dd713b Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 21 Apr 2026 18:09:10 +0500 Subject: [PATCH 02/32] refactor(app): global and comprehensive refactoring (Stage #2). --- codeclone/analysis/__init__.py | 10 +- codeclone/analysis/class_metrics.py | 3 +- codeclone/analysis/fingerprint.py | 2 +- codeclone/analysis/units.py | 2 +- codeclone/cache/__init__.py | 268 ---- codeclone/cache/store.py | 14 +- codeclone/config/__init__.py | 87 +- codeclone/core/__init__.py | 57 - codeclone/core/_types.py | 3 +- codeclone/core/discovery.py | 2 +- codeclone/core/discovery_cache.py | 2 +- codeclone/core/metrics_payload.py | 2 +- codeclone/core/parallelism.py | 3 +- codeclone/core/pipeline.py | 9 +- codeclone/core/reporting.py | 6 +- codeclone/core/worker.py | 2 +- codeclone/domain/__init__.py | 132 -- codeclone/findings/__init__.py | 16 - codeclone/findings/clones/__init__.py | 4 - codeclone/findings/structural/__init__.py | 16 - codeclone/metrics/__init__.py | 47 - codeclone/metrics/health.py | 2 +- codeclone/report/__init__.py | 44 - codeclone/report/document/__init__.py | 59 - codeclone/report/document/metrics.py | 2 +- codeclone/report/gates/__init__.py | 34 - codeclone/report/gates/evaluator.py | 2 +- codeclone/report/html/assemble.py | 2 +- codeclone/report/html/sections/_structural.py | 2 +- codeclone/report/json_contract.py | 97 -- codeclone/report/markdown.py | 70 - codeclone/report/renderers/__init__.py | 14 - codeclone/report/renderers/markdown.py | 46 +- codeclone/report/renderers/sarif.py | 40 +- codeclone/report/sarif.py | 86 -- codeclone/report/serialize.py | 31 - codeclone/surfaces/cli/console.py | 9 +- codeclone/surfaces/cli/execution.py | 318 ++++ codeclone/surfaces/cli/main.py | 1314 +---------------- codeclone/surfaces/cli/post_run.py | 143 ++ codeclone/surfaces/cli/report_meta.py | 58 +- codeclone/surfaces/cli/runtime.py | 65 +- codeclone/surfaces/cli/startup.py | 189 +++ codeclone/surfaces/cli/workflow.py | 532 +++++++ codeclone/surfaces/mcp/__init__.py | 54 - codeclone/surfaces/mcp/service.py | 91 +- codeclone/surfaces/mcp/session.py | 25 +- pyproject.toml | 1 + tests/test_cache.py | 260 ++-- tests/test_cli_inprocess.py | 43 +- tests/test_cli_unit.py | 140 +- tests/test_core_branch_coverage.py | 94 +- tests/test_coverage_edges.py | 244 +++ tests/test_detector_golden.py | 2 +- tests/test_extractor.py | 2 +- tests/test_gating.py | 2 +- tests/test_html_report.py | 11 +- tests/test_mcp_server.py | 2 +- tests/test_mcp_service.py | 8 +- tests/test_metrics_modules.py | 55 +- tests/test_metrics_registry.py | 3 +- tests/test_options_spec_coverage.py | 9 +- tests/test_pipeline_metrics.py | 6 +- tests/test_pipeline_process.py | 95 +- tests/test_report.py | 40 +- tests/test_report_branch_invariants.py | 8 +- tests/test_report_contract_coverage.py | 56 +- tests/test_report_explain.py | 2 +- tests/test_security.py | 2 +- uv.lock | 1025 +++++++++++-- 70 files changed, 3102 insertions(+), 3024 deletions(-) delete mode 100644 codeclone/report/json_contract.py delete mode 100644 codeclone/report/markdown.py delete mode 100644 codeclone/report/sarif.py delete mode 100644 codeclone/report/serialize.py create mode 100644 codeclone/surfaces/cli/execution.py create mode 100644 codeclone/surfaces/cli/post_run.py create mode 100644 codeclone/surfaces/cli/startup.py create mode 100644 codeclone/surfaces/cli/workflow.py create mode 100644 tests/test_coverage_edges.py diff --git a/codeclone/analysis/__init__.py b/codeclone/analysis/__init__.py index 6938ec3..a521754 100644 --- a/codeclone/analysis/__init__.py +++ b/codeclone/analysis/__init__.py @@ -8,15 +8,7 @@ from .cfg import CFG, CFGBuilder from .fingerprint import bucket_loc, sha1 from .normalizer import AstNormalizer, NormalizationConfig, stmt_hashes - - -def __getattr__(name: str) -> object: - if name == "extract_units_and_stats_from_source": - from .units import extract_units_and_stats_from_source - - return extract_units_and_stats_from_source - raise AttributeError(f"module {__name__!r} has no attribute {name!r}") - +from .units import extract_units_and_stats_from_source __all__ = [ "CFG", diff --git a/codeclone/analysis/class_metrics.py b/codeclone/analysis/class_metrics.py index 2d28d84..d343ec7 100644 --- a/codeclone/analysis/class_metrics.py +++ b/codeclone/analysis/class_metrics.py @@ -8,7 +8,8 @@ import ast -from ..metrics import cohesion_risk, compute_cbo, compute_lcom4, coupling_risk +from ..metrics.cohesion import cohesion_risk, compute_lcom4 +from ..metrics.coupling import compute_cbo, coupling_risk from ..models import ClassMetrics diff --git a/codeclone/analysis/fingerprint.py b/codeclone/analysis/fingerprint.py index 277f724..a33ebe4 100644 --- a/codeclone/analysis/fingerprint.py +++ b/codeclone/analysis/fingerprint.py @@ -10,7 +10,7 @@ from typing import TYPE_CHECKING from .. import qualnames as _qualnames -from ..metrics import cyclomatic_complexity +from ..metrics.complexity import cyclomatic_complexity from .cfg import CFGBuilder from .normalizer import ( AstNormalizer, diff --git a/codeclone/analysis/units.py b/codeclone/analysis/units.py index fc5ce5b..de68bf8 100644 --- a/codeclone/analysis/units.py +++ b/codeclone/analysis/units.py @@ -13,9 +13,9 @@ from ..blocks import extract_blocks, extract_segments from ..contracts.errors import ParseError from ..findings.structural.detectors import scan_function_structure -from ..metrics import risk_level from ..metrics.adoption import collect_module_adoption from ..metrics.api_surface import collect_module_api_surface +from ..metrics.complexity import risk_level from ..models import ( BlockUnit, ClassMetrics, diff --git a/codeclone/cache/__init__.py b/codeclone/cache/__init__.py index bf55c08..9135843 100644 --- a/codeclone/cache/__init__.py +++ b/codeclone/cache/__init__.py @@ -3,271 +3,3 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from ._canonicalize import ( - _as_file_stat_dict, - _as_module_api_surface_dict, - _as_module_docstring_coverage_dict, - _as_module_typing_coverage_dict, - _as_source_stats_dict, - _attach_optional_cache_sections, - _canonicalize_cache_entry, - _decode_optional_cache_sections, - _has_cache_entry_container_shape, - _is_canonical_cache_entry, - _normalized_optional_string_list, -) -from ._validators import ( - _has_typed_fields, - _is_api_param_spec_dict, - _is_block_dict, - _is_class_metrics_dict, - _is_dead_candidate_dict, - _is_file_stat_dict, - _is_module_api_surface_dict, - _is_module_dep_dict, - _is_module_docstring_coverage_dict, - _is_module_typing_coverage_dict, - _is_public_symbol_dict, - _is_segment_dict, - _is_source_stats_dict, - _is_string_list, - _is_unit_dict, -) -from ._wire_decode import ( - _decode_optional_wire_api_surface, - _decode_optional_wire_docstring_coverage, - _decode_optional_wire_module_ints, - _decode_optional_wire_source_stats, - _decode_optional_wire_typing_coverage, - _decode_wire_api_param_spec, - _decode_wire_api_surface_symbol, - _decode_wire_block, - _decode_wire_class_metric, - _decode_wire_dead_candidate, - _decode_wire_file_entry, - _decode_wire_file_sections, - _decode_wire_module_dep, - _decode_wire_name_sections, - _decode_wire_segment, - _decode_wire_stat, - _decode_wire_structural_findings_optional, - _decode_wire_structural_group, - _decode_wire_structural_occurrence, - _decode_wire_structural_signature, - _decode_wire_unit, -) -from ._wire_encode import _encode_wire_file_entry -from ._wire_helpers import ( - _decode_optional_wire_coupled_classes, - _decode_optional_wire_names, - _decode_wire_class_metric_fields, - _decode_wire_int_fields, - _decode_wire_named_sized_span, - _decode_wire_named_span, - _decode_wire_qualname_span, - _decode_wire_qualname_span_size, - _decode_wire_row, - _decode_wire_str_fields, - _decode_wire_unit_core_fields, - _decode_wire_unit_flow_profiles, -) -from .entries import ( - ApiParamSpecDict, - BlockDict, - CacheEntry, - CacheEntryBase, - ClassMetricsDict, - DeadCandidateDict, - FileStat, - ModuleApiSurfaceDict, - ModuleDepDict, - ModuleDocstringCoverageDict, - ModuleTypingCoverageDict, - PublicSymbolDict, - SegmentDict, - SourceStatsDict, - StructuralFindingGroupDict, - StructuralFindingOccurrenceDict, - UnitDict, - _api_surface_dict_from_model, - _as_risk_literal, - _block_dict_from_model, - _class_metrics_dict_from_model, - _dead_candidate_dict_from_model, - _docstring_coverage_dict_from_model, - _module_dep_dict_from_model, - _new_optional_metrics_payload, - _normalize_cached_structural_group, - _normalize_cached_structural_groups, - _segment_dict_from_model, - _structural_group_dict_from_model, - _structural_occurrence_dict_from_model, - _typing_coverage_dict_from_model, - _unit_dict_from_model, -) -from .integrity import ( - as_int_or_none, - as_object_list, - as_str_dict, - as_str_or_none, - canonical_json, - read_json_document, - sign_cache_payload, - verify_cache_payload_signature, - write_json_document_atomically, -) -from .projection import ( - SegmentReportProjection, - build_segment_report_projection, - decode_segment_report_projection, - encode_segment_report_projection, - runtime_filepath_from_wire, - wire_filepath_from_runtime, -) -from .store import Cache, file_stat_signature -from .versioning import ( - _DEFAULT_WIRE_UNIT_FLOW_PROFILES, - CACHE_VERSION, - LEGACY_CACHE_SECRET_FILENAME, - MAX_CACHE_SIZE_BYTES, - AnalysisProfile, - CacheData, - CacheStatus, - _as_analysis_profile, - _empty_cache_data, - _resolve_root, -) - -_as_str = as_str_or_none -_as_int = as_int_or_none -_as_list = as_object_list -_as_str_dict = as_str_dict - -__all__ = [ - "CACHE_VERSION", - "LEGACY_CACHE_SECRET_FILENAME", - "MAX_CACHE_SIZE_BYTES", - "_DEFAULT_WIRE_UNIT_FLOW_PROFILES", - "AnalysisProfile", - "ApiParamSpecDict", - "BlockDict", - "Cache", - "CacheData", - "CacheEntry", - "CacheEntryBase", - "CacheStatus", - "ClassMetricsDict", - "DeadCandidateDict", - "FileStat", - "ModuleApiSurfaceDict", - "ModuleDepDict", - "ModuleDocstringCoverageDict", - "ModuleTypingCoverageDict", - "PublicSymbolDict", - "SegmentDict", - "SegmentReportProjection", - "SourceStatsDict", - "StructuralFindingGroupDict", - "StructuralFindingOccurrenceDict", - "UnitDict", - "_api_surface_dict_from_model", - "_as_analysis_profile", - "_as_file_stat_dict", - "_as_int", - "_as_list", - "_as_module_api_surface_dict", - "_as_module_docstring_coverage_dict", - "_as_module_typing_coverage_dict", - "_as_risk_literal", - "_as_source_stats_dict", - "_as_str", - "_as_str_dict", - "_attach_optional_cache_sections", - "_block_dict_from_model", - "_canonicalize_cache_entry", - "_class_metrics_dict_from_model", - "_dead_candidate_dict_from_model", - "_decode_optional_cache_sections", - "_decode_optional_wire_api_surface", - "_decode_optional_wire_coupled_classes", - "_decode_optional_wire_docstring_coverage", - "_decode_optional_wire_module_ints", - "_decode_optional_wire_names", - "_decode_optional_wire_source_stats", - "_decode_optional_wire_typing_coverage", - "_decode_wire_api_param_spec", - "_decode_wire_api_surface_symbol", - "_decode_wire_block", - "_decode_wire_class_metric", - "_decode_wire_class_metric_fields", - "_decode_wire_dead_candidate", - "_decode_wire_file_entry", - "_decode_wire_file_sections", - "_decode_wire_int_fields", - "_decode_wire_module_dep", - "_decode_wire_name_sections", - "_decode_wire_named_sized_span", - "_decode_wire_named_span", - "_decode_wire_qualname_span", - "_decode_wire_qualname_span_size", - "_decode_wire_row", - "_decode_wire_segment", - "_decode_wire_stat", - "_decode_wire_str_fields", - "_decode_wire_structural_findings_optional", - "_decode_wire_structural_group", - "_decode_wire_structural_occurrence", - "_decode_wire_structural_signature", - "_decode_wire_unit", - "_decode_wire_unit_core_fields", - "_decode_wire_unit_flow_profiles", - "_docstring_coverage_dict_from_model", - "_empty_cache_data", - "_encode_wire_file_entry", - "_has_cache_entry_container_shape", - "_has_typed_fields", - "_is_api_param_spec_dict", - "_is_block_dict", - "_is_canonical_cache_entry", - "_is_class_metrics_dict", - "_is_dead_candidate_dict", - "_is_file_stat_dict", - "_is_module_api_surface_dict", - "_is_module_dep_dict", - "_is_module_docstring_coverage_dict", - "_is_module_typing_coverage_dict", - "_is_public_symbol_dict", - "_is_segment_dict", - "_is_source_stats_dict", - "_is_string_list", - "_is_unit_dict", - "_module_dep_dict_from_model", - "_new_optional_metrics_payload", - "_normalize_cached_structural_group", - "_normalize_cached_structural_groups", - "_normalized_optional_string_list", - "_resolve_root", - "_segment_dict_from_model", - "_structural_group_dict_from_model", - "_structural_occurrence_dict_from_model", - "_typing_coverage_dict_from_model", - "_unit_dict_from_model", - "as_int_or_none", - "as_object_list", - "as_str_dict", - "as_str_or_none", - "build_segment_report_projection", - "canonical_json", - "decode_segment_report_projection", - "encode_segment_report_projection", - "file_stat_signature", - "read_json_document", - "runtime_filepath_from_wire", - "sign_cache_payload", - "verify_cache_payload_signature", - "wire_filepath_from_runtime", - "write_json_document_atomically", -] diff --git a/codeclone/cache/store.py b/codeclone/cache/store.py index 0ad7612..ce85709 100644 --- a/codeclone/cache/store.py +++ b/codeclone/cache/store.py @@ -7,7 +7,6 @@ from __future__ import annotations import os -import sys from json import JSONDecodeError from pathlib import Path @@ -75,15 +74,6 @@ ) -def _default_max_cache_size_bytes() -> int: - public_module = sys.modules.get("codeclone.cache") - if public_module is not None: - candidate = getattr(public_module, "MAX_CACHE_SIZE_BYTES", MAX_CACHE_SIZE_BYTES) - if isinstance(candidate, int): - return candidate - return MAX_CACHE_SIZE_BYTES - - class Cache: __slots__ = ( "_canonical_runtime_paths", @@ -141,9 +131,7 @@ def __init__( self.load_status = CacheStatus.MISSING self.load_warning: str | None = self.legacy_secret_warning self.max_size_bytes = ( - _default_max_cache_size_bytes() - if max_size_bytes is None - else max_size_bytes + MAX_CACHE_SIZE_BYTES if max_size_bytes is None else max_size_bytes ) self.segment_report_projection: SegmentReportProjection | None = None self._dirty: bool = True diff --git a/codeclone/config/__init__.py b/codeclone/config/__init__.py index 8040155..557317f 100644 --- a/codeclone/config/__init__.py +++ b/codeclone/config/__init__.py @@ -1,83 +1,4 @@ -from .argparse_builder import _ArgumentParser, _HelpFormatter, build_parser -from .pyproject_loader import ( - CONFIG_KEY_SPECS, - PATH_CONFIG_KEYS, - ConfigValidationError, - _load_toml, - load_pyproject_config, - normalize_path_config_value, - validate_config_value, -) -from .resolver import ( - ResolvedConfig, - apply_pyproject_config_overrides, - apply_resolved_config, - collect_explicit_cli_dests, - resolve_config, -) -from .spec import ( - ARGUMENT_GROUP_TITLES, - DEFAULT_BASELINE_PATH, - DEFAULT_BLOCK_MIN_LOC, - DEFAULT_BLOCK_MIN_STMT, - DEFAULT_HTML_REPORT_PATH, - DEFAULT_JSON_REPORT_PATH, - DEFAULT_MARKDOWN_REPORT_PATH, - DEFAULT_MAX_BASELINE_SIZE_MB, - DEFAULT_MAX_CACHE_SIZE_MB, - DEFAULT_MIN_LOC, - DEFAULT_MIN_STMT, - DEFAULT_PROCESSES, - DEFAULT_ROOT, - DEFAULT_SARIF_REPORT_PATH, - DEFAULT_SEGMENT_MIN_LOC, - DEFAULT_SEGMENT_MIN_STMT, - DEFAULT_TEXT_REPORT_PATH, - DEFAULTS_BY_DEST, - OPTIONS, - PYPROJECT_OPTIONS, - TESTABLE_CLI_OPTIONS, - ConfigKeySpec, - OptionSpec, -) - -__all__ = [ - "ARGUMENT_GROUP_TITLES", - "CONFIG_KEY_SPECS", - "DEFAULTS_BY_DEST", - "DEFAULT_BASELINE_PATH", - "DEFAULT_BLOCK_MIN_LOC", - "DEFAULT_BLOCK_MIN_STMT", - "DEFAULT_HTML_REPORT_PATH", - "DEFAULT_JSON_REPORT_PATH", - "DEFAULT_MARKDOWN_REPORT_PATH", - "DEFAULT_MAX_BASELINE_SIZE_MB", - "DEFAULT_MAX_CACHE_SIZE_MB", - "DEFAULT_MIN_LOC", - "DEFAULT_MIN_STMT", - "DEFAULT_PROCESSES", - "DEFAULT_ROOT", - "DEFAULT_SARIF_REPORT_PATH", - "DEFAULT_SEGMENT_MIN_LOC", - "DEFAULT_SEGMENT_MIN_STMT", - "DEFAULT_TEXT_REPORT_PATH", - "OPTIONS", - "PATH_CONFIG_KEYS", - "PYPROJECT_OPTIONS", - "TESTABLE_CLI_OPTIONS", - "ConfigKeySpec", - "ConfigValidationError", - "OptionSpec", - "ResolvedConfig", - "_ArgumentParser", - "_HelpFormatter", - "_load_toml", - "apply_pyproject_config_overrides", - "apply_resolved_config", - "build_parser", - "collect_explicit_cli_dests", - "load_pyproject_config", - "normalize_path_config_value", - "resolve_config", - "validate_config_value", -] +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 diff --git a/codeclone/core/__init__.py b/codeclone/core/__init__.py index f684ebc..557317f 100644 --- a/codeclone/core/__init__.py +++ b/codeclone/core/__init__.py @@ -2,60 +2,3 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 - -from ._types import ( - DEFAULT_BATCH_SIZE, - DEFAULT_RUNTIME_PROCESSES, - MAX_FILE_SIZE, - PARALLEL_MIN_FILES_FLOOR, - PARALLEL_MIN_FILES_PER_WORKER, - AnalysisResult, - BootstrapResult, - DiscoveryResult, - FileProcessResult, - OutputPaths, - ProcessingResult, - ReportArtifacts, -) -from .bootstrap import _resolve_optional_runtime_path, bootstrap -from .discovery import discover -from .parallelism import ( - _parallel_min_files, - _resolve_process_count, - _should_use_parallel, - process, -) -from .pipeline import analyze, compute_project_metrics, compute_suggestions -from .reporting import GatingResult, MetricGateConfig, gate, report -from .worker import _invoke_process_file, process_file - -__all__ = [ - "DEFAULT_BATCH_SIZE", - "DEFAULT_RUNTIME_PROCESSES", - "MAX_FILE_SIZE", - "PARALLEL_MIN_FILES_FLOOR", - "PARALLEL_MIN_FILES_PER_WORKER", - "AnalysisResult", - "BootstrapResult", - "DiscoveryResult", - "FileProcessResult", - "GatingResult", - "MetricGateConfig", - "OutputPaths", - "ProcessingResult", - "ReportArtifacts", - "_invoke_process_file", - "_parallel_min_files", - "_resolve_optional_runtime_path", - "_resolve_process_count", - "_should_use_parallel", - "analyze", - "bootstrap", - "compute_project_metrics", - "compute_suggestions", - "discover", - "gate", - "process", - "process_file", - "report", -] diff --git a/codeclone/core/_types.py b/codeclone/core/_types.py index 5d76c9c..ecedcfe 100644 --- a/codeclone/core/_types.py +++ b/codeclone/core/_types.py @@ -16,7 +16,8 @@ import orjson from ..analysis.normalizer import NormalizationConfig -from ..cache import FileStat, SegmentReportProjection +from ..cache.entries import FileStat +from ..cache.projection import SegmentReportProjection from ..models import ( BlockUnit, ClassMetrics, diff --git a/codeclone/core/discovery.py b/codeclone/core/discovery.py index 9ebd83d..4296fc5 100644 --- a/codeclone/core/discovery.py +++ b/codeclone/core/discovery.py @@ -8,7 +8,7 @@ from typing import cast -from ..cache import Cache, file_stat_signature +from ..cache.store import Cache, file_stat_signature from ..models import ( ClassMetrics, DeadCandidate, diff --git a/codeclone/core/discovery_cache.py b/codeclone/core/discovery_cache.py index d6d9470..dbec1ac 100644 --- a/codeclone/core/discovery_cache.py +++ b/codeclone/core/discovery_cache.py @@ -9,7 +9,7 @@ from collections.abc import Mapping from typing import Literal, cast -from ..cache import ( +from ..cache.entries import ( ApiParamSpecDict, CacheEntry, ClassMetricsDict, diff --git a/codeclone/core/metrics_payload.py b/codeclone/core/metrics_payload.py index b675b8f..5776d96 100644 --- a/codeclone/core/metrics_payload.py +++ b/codeclone/core/metrics_payload.py @@ -11,7 +11,7 @@ from ..domain.findings import CATEGORY_COHESION, CATEGORY_COMPLEXITY, CATEGORY_COUPLING from ..domain.quality import CONFIDENCE_HIGH, RISK_LOW -from ..metrics import build_overloaded_modules_payload +from ..metrics.overloaded_modules import build_overloaded_modules_payload from ..models import ( ClassMetrics, CoverageJoinResult, diff --git a/codeclone/core/parallelism.py b/codeclone/core/parallelism.py index b4df298..bc748c5 100644 --- a/codeclone/core/parallelism.py +++ b/codeclone/core/parallelism.py @@ -9,7 +9,8 @@ from collections.abc import Callable, Sequence from concurrent.futures import ProcessPoolExecutor, as_completed -from ..cache import Cache, SourceStatsDict +from ..cache.entries import SourceStatsDict +from ..cache.store import Cache from ..models import ( ClassMetrics, DeadCandidate, diff --git a/codeclone/core/pipeline.py b/codeclone/core/pipeline.py index d974c3f..d6c5cd8 100644 --- a/codeclone/core/pipeline.py +++ b/codeclone/core/pipeline.py @@ -21,13 +21,12 @@ build_suppressed_clone_groups, split_clone_groups_for_golden_fixtures, ) -from ..metrics import ( +from ..metrics._base import MetricProjectContext +from ..metrics.coverage_join import CoverageJoinParseError, build_coverage_join +from ..metrics.dead_code import find_suppressed_unused +from ..metrics.registry import ( METRIC_FAMILIES, - CoverageJoinParseError, - MetricProjectContext, - build_coverage_join, build_project_metrics, - find_suppressed_unused, project_metrics_defaults, ) from ..models import ( diff --git a/codeclone/core/reporting.py b/codeclone/core/reporting.py index 01c21cb..05cc83b 100644 --- a/codeclone/core/reporting.py +++ b/codeclone/core/reporting.py @@ -10,7 +10,7 @@ from typing import cast from ..models import MetricsDiff -from ..report.document import build_report_document +from ..report.document.builder import build_report_document from ..report.gates.evaluator import GateResult, GateState from ..report.gates.evaluator import MetricGateConfig as _MetricGateConfig from ..report.gates.evaluator import evaluate_gate_state as _evaluate_gate_state @@ -33,13 +33,13 @@ def _load_markdown_report_renderer() -> Callable[..., str]: - from ..report.markdown import to_markdown_report + from ..report.renderers.markdown import to_markdown_report return to_markdown_report def _load_sarif_report_renderer() -> Callable[..., str]: - from ..report.sarif import to_sarif_report + from ..report.renderers.sarif import to_sarif_report return to_sarif_report diff --git a/codeclone/core/worker.py b/codeclone/core/worker.py index 07fdd1d..079e083 100644 --- a/codeclone/core/worker.py +++ b/codeclone/core/worker.py @@ -14,7 +14,7 @@ from ..analysis.normalizer import NormalizationConfig from ..analysis.units import extract_units_and_stats_from_source -from ..cache import FileStat +from ..cache.entries import FileStat from ..scanner import module_name_from_path from ._types import MAX_FILE_SIZE, FileProcessResult diff --git a/codeclone/domain/__init__.py b/codeclone/domain/__init__.py index 61cd04f..9135843 100644 --- a/codeclone/domain/__init__.py +++ b/codeclone/domain/__init__.py @@ -3,135 +3,3 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy - -from .findings import ( - CATEGORY_CLONE, - CATEGORY_COHESION, - CATEGORY_COMPLEXITY, - CATEGORY_COUPLING, - CATEGORY_DEAD_CODE, - CATEGORY_DEPENDENCY, - CATEGORY_STRUCTURAL, - CLONE_KIND_BLOCK, - CLONE_KIND_FUNCTION, - CLONE_KIND_SEGMENT, - CLONE_NOVELTY_KNOWN, - CLONE_NOVELTY_NEW, - FAMILY_CLONE, - FAMILY_CLONES, - FAMILY_DEAD_CODE, - FAMILY_DESIGN, - FAMILY_METRICS, - FAMILY_STRUCTURAL, - FINDING_KIND_CLASS_HOTSPOT, - FINDING_KIND_CLONE_GROUP, - FINDING_KIND_CYCLE, - FINDING_KIND_FUNCTION_HOTSPOT, - FINDING_KIND_UNUSED_SYMBOL, - STRUCTURAL_KIND_CLONE_COHORT_DRIFT, - STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE, - STRUCTURAL_KIND_DUPLICATED_BRANCHES, - SYMBOL_KIND_CLASS, - SYMBOL_KIND_FUNCTION, - SYMBOL_KIND_IMPORT, - SYMBOL_KIND_METHOD, -) -from .quality import ( - CONFIDENCE_HIGH, - CONFIDENCE_LOW, - CONFIDENCE_MEDIUM, - EFFORT_EASY, - EFFORT_HARD, - EFFORT_MODERATE, - EFFORT_WEIGHT, - HEALTH_GRADE_A, - HEALTH_GRADE_B, - HEALTH_GRADE_C, - HEALTH_GRADE_D, - HEALTH_GRADE_F, - HEALTH_GRADES, - RISK_HIGH, - RISK_LOW, - RISK_MEDIUM, - SEVERITY_CRITICAL, - SEVERITY_INFO, - SEVERITY_ORDER, - SEVERITY_RANK, - SEVERITY_WARNING, -) -from .source_scope import ( - IMPACT_SCOPE_MIXED, - IMPACT_SCOPE_NON_RUNTIME, - IMPACT_SCOPE_RUNTIME, - SOURCE_KIND_BREAKDOWN_KEYS, - SOURCE_KIND_FIXTURES, - SOURCE_KIND_MIXED, - SOURCE_KIND_ORDER, - SOURCE_KIND_OTHER, - SOURCE_KIND_PRODUCTION, - SOURCE_KIND_TESTS, -) - -__all__ = [ - "CATEGORY_CLONE", - "CATEGORY_COHESION", - "CATEGORY_COMPLEXITY", - "CATEGORY_COUPLING", - "CATEGORY_DEAD_CODE", - "CATEGORY_DEPENDENCY", - "CATEGORY_STRUCTURAL", - "CLONE_KIND_BLOCK", - "CLONE_KIND_FUNCTION", - "CLONE_KIND_SEGMENT", - "CLONE_NOVELTY_KNOWN", - "CLONE_NOVELTY_NEW", - "CONFIDENCE_HIGH", - "CONFIDENCE_LOW", - "CONFIDENCE_MEDIUM", - "EFFORT_EASY", - "EFFORT_HARD", - "EFFORT_MODERATE", - "EFFORT_WEIGHT", - "FAMILY_CLONE", - "FAMILY_CLONES", - "FAMILY_DEAD_CODE", - "FAMILY_DESIGN", - "FAMILY_METRICS", - "FAMILY_STRUCTURAL", - "FINDING_KIND_CLASS_HOTSPOT", - "FINDING_KIND_CLONE_GROUP", - "FINDING_KIND_CYCLE", - "FINDING_KIND_FUNCTION_HOTSPOT", - "FINDING_KIND_UNUSED_SYMBOL", - "HEALTH_GRADES", - "HEALTH_GRADE_A", - "HEALTH_GRADE_B", - "HEALTH_GRADE_C", - "HEALTH_GRADE_D", - "HEALTH_GRADE_F", - "IMPACT_SCOPE_MIXED", - "IMPACT_SCOPE_NON_RUNTIME", - "IMPACT_SCOPE_RUNTIME", - "RISK_HIGH", - "RISK_LOW", - "RISK_MEDIUM", - "SEVERITY_CRITICAL", - "SEVERITY_INFO", - "SEVERITY_ORDER", - "SEVERITY_RANK", - "SEVERITY_WARNING", - "SOURCE_KIND_BREAKDOWN_KEYS", - "SOURCE_KIND_FIXTURES", - "SOURCE_KIND_MIXED", - "SOURCE_KIND_ORDER", - "SOURCE_KIND_OTHER", - "SOURCE_KIND_PRODUCTION", - "SOURCE_KIND_TESTS", - "STRUCTURAL_KIND_CLONE_COHORT_DRIFT", - "STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE", - "STRUCTURAL_KIND_DUPLICATED_BRANCHES", - "SYMBOL_KIND_CLASS", - "SYMBOL_KIND_FUNCTION", - "SYMBOL_KIND_IMPORT", - "SYMBOL_KIND_METHOD", -] diff --git a/codeclone/findings/__init__.py b/codeclone/findings/__init__.py index ed88cfe..9135843 100644 --- a/codeclone/findings/__init__.py +++ b/codeclone/findings/__init__.py @@ -3,19 +3,3 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from .ids import ( - clone_group_id, - dead_code_group_id, - design_group_id, - structural_group_id, -) - -__all__ = [ - "clone_group_id", - "dead_code_group_id", - "design_group_id", - "structural_group_id", -] diff --git a/codeclone/findings/clones/__init__.py b/codeclone/findings/clones/__init__.py index e4f7372..9135843 100644 --- a/codeclone/findings/clones/__init__.py +++ b/codeclone/findings/clones/__init__.py @@ -3,7 +3,3 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy - -from .grouping import build_block_groups, build_groups, build_segment_groups - -__all__ = ["build_block_groups", "build_groups", "build_segment_groups"] diff --git a/codeclone/findings/structural/__init__.py b/codeclone/findings/structural/__init__.py index b64196d..9135843 100644 --- a/codeclone/findings/structural/__init__.py +++ b/codeclone/findings/structural/__init__.py @@ -3,19 +3,3 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy - -from .detectors import ( - build_clone_cohort_structural_findings, - is_reportable_structural_signature, - normalize_structural_finding_group, - normalize_structural_findings, - scan_function_structure, -) - -__all__ = [ - "build_clone_cohort_structural_findings", - "is_reportable_structural_signature", - "normalize_structural_finding_group", - "normalize_structural_findings", - "scan_function_structure", -] diff --git a/codeclone/metrics/__init__.py b/codeclone/metrics/__init__.py index 25efd2e..9135843 100644 --- a/codeclone/metrics/__init__.py +++ b/codeclone/metrics/__init__.py @@ -3,50 +3,3 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from ._base import MetricAggregate, MetricFamily, MetricProjectContext -from .cohesion import cohesion_risk, compute_lcom4 -from .complexity import cyclomatic_complexity, nesting_depth, risk_level -from .coupling import compute_cbo, coupling_risk -from .coverage_join import CoverageJoinParseError, build_coverage_join -from .dead_code import find_suppressed_unused, find_unused -from .dependencies import ( - build_dep_graph, - build_import_graph, - find_cycles, - longest_chains, - max_depth, -) -from .health import HealthInputs, compute_health -from .overloaded_modules import build_overloaded_modules_payload -from .registry import METRIC_FAMILIES, build_project_metrics, project_metrics_defaults - -__all__ = [ - "METRIC_FAMILIES", - "CoverageJoinParseError", - "HealthInputs", - "MetricAggregate", - "MetricFamily", - "MetricProjectContext", - "build_coverage_join", - "build_dep_graph", - "build_import_graph", - "build_overloaded_modules_payload", - "build_project_metrics", - "cohesion_risk", - "compute_cbo", - "compute_health", - "compute_lcom4", - "coupling_risk", - "cyclomatic_complexity", - "find_cycles", - "find_suppressed_unused", - "find_unused", - "longest_chains", - "max_depth", - "nesting_depth", - "project_metrics_defaults", - "risk_level", -] diff --git a/codeclone/metrics/health.py b/codeclone/metrics/health.py index 9f0ab67..1433f01 100644 --- a/codeclone/metrics/health.py +++ b/codeclone/metrics/health.py @@ -105,7 +105,7 @@ def compute_health(inputs: HealthInputs) -> HealthScore: dependency_score = _clamp_score( 100 - inputs.dependency_cycles * 25 - - max(0, inputs.dependency_max_depth - 6) * 4 + - max(0, inputs.dependency_max_depth - 8) * 4 ) coverage_score = _clamp_score( _safe_div(inputs.files_analyzed_or_cached * 100.0, max(1, inputs.files_found)) diff --git a/codeclone/report/__init__.py b/codeclone/report/__init__.py index 4b3685e..9135843 100644 --- a/codeclone/report/__init__.py +++ b/codeclone/report/__init__.py @@ -3,47 +3,3 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from ..findings.clones.grouping import ( - build_block_groups, - build_groups, - build_segment_groups, -) -from .blocks import prepare_block_report_groups -from .document import build_report_document -from .explain import build_block_group_facts -from .markdown import render_markdown_report_document, to_markdown_report -from .sarif import render_sarif_report_document, to_sarif_report -from .segments import ( - SEGMENT_MIN_UNIQUE_STMT_TYPES, - prepare_segment_report_groups, -) -from .serialize import ( - render_json_report_document, - render_text_report_document, -) -from .suggestions import classify_clone_type, generate_suggestions -from .types import GroupItem, GroupMap - -__all__ = [ - "SEGMENT_MIN_UNIQUE_STMT_TYPES", - "GroupItem", - "GroupMap", - "build_block_group_facts", - "build_block_groups", - "build_groups", - "build_report_document", - "build_segment_groups", - "classify_clone_type", - "generate_suggestions", - "prepare_block_report_groups", - "prepare_segment_report_groups", - "render_json_report_document", - "render_markdown_report_document", - "render_sarif_report_document", - "render_text_report_document", - "to_markdown_report", - "to_sarif_report", -] diff --git a/codeclone/report/document/__init__.py b/codeclone/report/document/__init__.py index f56610a..9135843 100644 --- a/codeclone/report/document/__init__.py +++ b/codeclone/report/document/__init__.py @@ -3,62 +3,3 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from ...findings.ids import ( - clone_group_id, - dead_code_group_id, - design_group_id, - structural_group_id, -) -from ._common import ( - _collect_paths_from_metrics, - _collect_report_file_list, - _contract_path, - _count_file_lines, - _count_file_lines_for_path, - _is_absolute_path, - _normalize_block_machine_facts, - _normalize_nested_string_rows, - _parse_ratio_percent, - _source_scope_from_filepaths, - _source_scope_from_locations, -) -from ._design_groups import _build_design_groups -from ._findings_groups import ( - _clone_group_assessment, - _csv_values, - _structural_group_assessment, -) -from .builder import build_report_document -from .derived import _combined_impact_scope, _suggestion_finding_id -from .findings import _findings_summary -from .inventory import _derive_inventory_code_counts - -__all__ = [ - "_build_design_groups", - "_clone_group_assessment", - "_collect_paths_from_metrics", - "_collect_report_file_list", - "_combined_impact_scope", - "_contract_path", - "_count_file_lines", - "_count_file_lines_for_path", - "_csv_values", - "_derive_inventory_code_counts", - "_findings_summary", - "_is_absolute_path", - "_normalize_block_machine_facts", - "_normalize_nested_string_rows", - "_parse_ratio_percent", - "_source_scope_from_filepaths", - "_source_scope_from_locations", - "_structural_group_assessment", - "_suggestion_finding_id", - "build_report_document", - "clone_group_id", - "dead_code_group_id", - "design_group_id", - "structural_group_id", -] diff --git a/codeclone/report/document/metrics.py b/codeclone/report/document/metrics.py index 3ca942b..e1472a5 100644 --- a/codeclone/report/document/metrics.py +++ b/codeclone/report/document/metrics.py @@ -23,7 +23,7 @@ from ...domain.source_scope import ( SOURCE_KIND_OTHER, ) -from ...metrics import METRIC_FAMILIES +from ...metrics.registry import METRIC_FAMILIES from ...suppressions import INLINE_CODECLONE_SUPPRESSION_SOURCE from ...utils.coerce import as_float as _as_float from ...utils.coerce import as_int as _as_int diff --git a/codeclone/report/gates/__init__.py b/codeclone/report/gates/__init__.py index 151bd5e..9135843 100644 --- a/codeclone/report/gates/__init__.py +++ b/codeclone/report/gates/__init__.py @@ -3,37 +3,3 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from .evaluator import ( - GateResult, - GateState, - MetricGateConfig, - evaluate_gate_state, - evaluate_gates, - gate_state_from_project_metrics, - metric_gate_reasons, - metric_gate_reasons_for_state, - summarize_metrics_diff, -) -from .reasons import ( - parse_metric_reason_entry, - policy_context, - print_gating_failure_block, -) - -__all__ = [ - "GateResult", - "GateState", - "MetricGateConfig", - "evaluate_gate_state", - "evaluate_gates", - "gate_state_from_project_metrics", - "metric_gate_reasons", - "metric_gate_reasons_for_state", - "parse_metric_reason_entry", - "policy_context", - "print_gating_failure_block", - "summarize_metrics_diff", -] diff --git a/codeclone/report/gates/evaluator.py b/codeclone/report/gates/evaluator.py index c632c50..4e8c203 100644 --- a/codeclone/report/gates/evaluator.py +++ b/codeclone/report/gates/evaluator.py @@ -11,7 +11,7 @@ from typing import TYPE_CHECKING, cast from ...contracts import ExitCode -from ...metrics import METRIC_FAMILIES +from ...metrics.registry import METRIC_FAMILIES from ...utils.coerce import as_int as _as_int from ...utils.coerce import as_mapping as _as_mapping from ...utils.coerce import as_sequence as _as_sequence diff --git a/codeclone/report/html/assemble.py b/codeclone/report/html/assemble.py index 860d852..f8e40cd 100644 --- a/codeclone/report/html/assemble.py +++ b/codeclone/report/html/assemble.py @@ -14,7 +14,7 @@ from ... import __version__ from ...contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL from ...domain.quality import CONFIDENCE_HIGH -from ...findings.structural import normalize_structural_findings +from ...findings.structural.detectors import normalize_structural_findings from ...templates import FONT_CSS_URL, REPORT_TEMPLATE from ...utils import coerce as _coerce from ._context import _meta_pick, build_context diff --git a/codeclone/report/html/sections/_structural.py b/codeclone/report/html/sections/_structural.py index 9f194b1..e2ba87d 100644 --- a/codeclone/report/html/sections/_structural.py +++ b/codeclone/report/html/sections/_structural.py @@ -17,7 +17,7 @@ ) from codeclone.domain.quality import RISK_HIGH, RISK_LOW from codeclone.findings.ids import structural_group_id -from codeclone.findings.structural import normalize_structural_findings +from codeclone.findings.structural.detectors import normalize_structural_findings from ..._source_kinds import SOURCE_KIND_FILTER_VALUES, source_kind_label from ...derived import ( diff --git a/codeclone/report/json_contract.py b/codeclone/report/json_contract.py deleted file mode 100644 index 0529ae7..0000000 --- a/codeclone/report/json_contract.py +++ /dev/null @@ -1,97 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from typing import TYPE_CHECKING - -from ..findings.structural.detectors import normalize_structural_findings -from .document import ( - _build_design_groups, - _clone_group_assessment, - _collect_paths_from_metrics, - _combined_impact_scope, - _contract_path, - _count_file_lines, - _count_file_lines_for_path, - _csv_values, - _derive_inventory_code_counts, - _findings_summary, - _is_absolute_path, - _normalize_block_machine_facts, - _normalize_nested_string_rows, - _parse_ratio_percent, - _source_scope_from_filepaths, - _source_scope_from_locations, - _structural_group_assessment, - _suggestion_finding_id, - build_report_document, - clone_group_id, - dead_code_group_id, - design_group_id, - structural_group_id, -) -from .document import _common as _document_common - -if TYPE_CHECKING: - from collections.abc import Mapping, Sequence - - from ..models import GroupMapLike, StructuralFindingGroup, SuppressedCloneGroup - - -def _collect_report_file_list( - *, - inventory: Mapping[str, object] | None, - func_groups: GroupMapLike, - block_groups: GroupMapLike, - segment_groups: GroupMapLike, - suppressed_clone_groups: Sequence[SuppressedCloneGroup] | None = None, - metrics: Mapping[str, object] | None, - structural_findings: Sequence[StructuralFindingGroup] | None, -) -> list[str]: - original = _document_common.normalize_structural_findings - _document_common.normalize_structural_findings = normalize_structural_findings - try: - return _document_common._collect_report_file_list( - inventory=inventory, - func_groups=func_groups, - block_groups=block_groups, - segment_groups=segment_groups, - suppressed_clone_groups=suppressed_clone_groups, - metrics=metrics, - structural_findings=structural_findings, - ) - finally: - _document_common.normalize_structural_findings = original - - -__all__ = [ - "_build_design_groups", - "_clone_group_assessment", - "_collect_paths_from_metrics", - "_collect_report_file_list", - "_combined_impact_scope", - "_contract_path", - "_count_file_lines", - "_count_file_lines_for_path", - "_csv_values", - "_derive_inventory_code_counts", - "_findings_summary", - "_is_absolute_path", - "_normalize_block_machine_facts", - "_normalize_nested_string_rows", - "_parse_ratio_percent", - "_source_scope_from_filepaths", - "_source_scope_from_locations", - "_structural_group_assessment", - "_suggestion_finding_id", - "build_report_document", - "clone_group_id", - "dead_code_group_id", - "design_group_id", - "normalize_structural_findings", - "structural_group_id", -] diff --git a/codeclone/report/markdown.py b/codeclone/report/markdown.py deleted file mode 100644 index 6395c70..0000000 --- a/codeclone/report/markdown.py +++ /dev/null @@ -1,70 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from collections.abc import Collection, Mapping, Sequence -from typing import TYPE_CHECKING - -from .document import build_report_document -from .renderers.markdown import ( - MARKDOWN_SCHEMA_VERSION, - _append_findings_section, - _append_metric_items, - _as_float, - _location_text, - render_markdown_report_document, -) - -if TYPE_CHECKING: - from ..models import StructuralFindingGroup, Suggestion, SuppressedCloneGroup - from .types import GroupMapLike - - -def to_markdown_report( - *, - report_document: Mapping[str, object] | None = None, - meta: Mapping[str, object], - inventory: Mapping[str, object] | None = None, - func_groups: GroupMapLike, - block_groups: GroupMapLike, - segment_groups: GroupMapLike, - block_facts: Mapping[str, Mapping[str, str]] | None = None, - new_function_group_keys: Collection[str] | None = None, - new_block_group_keys: Collection[str] | None = None, - new_segment_group_keys: Collection[str] | None = None, - suppressed_clone_groups: Sequence[SuppressedCloneGroup] | None = None, - metrics: Mapping[str, object] | None = None, - suggestions: Collection[Suggestion] | None = None, - structural_findings: Sequence[StructuralFindingGroup] | None = None, -) -> str: - payload = report_document or build_report_document( - func_groups=func_groups, - block_groups=block_groups, - segment_groups=segment_groups, - meta=meta, - inventory=inventory, - block_facts=block_facts or {}, - new_function_group_keys=new_function_group_keys, - new_block_group_keys=new_block_group_keys, - new_segment_group_keys=new_segment_group_keys, - suppressed_clone_groups=suppressed_clone_groups, - metrics=metrics, - suggestions=tuple(suggestions or ()), - structural_findings=tuple(structural_findings or ()), - ) - return render_markdown_report_document(payload) - - -__all__ = [ - "MARKDOWN_SCHEMA_VERSION", - "_append_findings_section", - "_append_metric_items", - "_as_float", - "_location_text", - "render_markdown_report_document", - "to_markdown_report", -] diff --git a/codeclone/report/renderers/__init__.py b/codeclone/report/renderers/__init__.py index b744533..9135843 100644 --- a/codeclone/report/renderers/__init__.py +++ b/codeclone/report/renderers/__init__.py @@ -3,17 +3,3 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from .json import render_json_report_document -from .markdown import render_markdown_report_document -from .sarif import render_sarif_report_document -from .text import render_text_report_document - -__all__ = [ - "render_json_report_document", - "render_markdown_report_document", - "render_sarif_report_document", - "render_text_report_document", -] diff --git a/codeclone/report/renderers/markdown.py b/codeclone/report/renderers/markdown.py index 8788cf4..6cd5e62 100644 --- a/codeclone/report/renderers/markdown.py +++ b/codeclone/report/renderers/markdown.py @@ -6,15 +6,17 @@ from __future__ import annotations -from collections.abc import Mapping, Sequence +from collections.abc import Collection, Mapping, Sequence from typing import TYPE_CHECKING from ...domain.findings import FAMILY_CLONE, FAMILY_DEAD_CODE, FAMILY_STRUCTURAL from ...utils.coerce import as_float, as_int, as_mapping, as_sequence from .._formatting import format_spread_text +from ..document.builder import build_report_document if TYPE_CHECKING: - pass + from ...models import StructuralFindingGroup, Suggestion, SuppressedCloneGroup + from ..types import GroupMapLike MARKDOWN_SCHEMA_VERSION = "1.0" _MAX_FINDING_LOCATIONS = 5 @@ -622,7 +624,47 @@ def render_markdown_report_document(payload: Mapping[str, object]) -> str: return "\n".join(lines).rstrip() + "\n" +def to_markdown_report( + *, + report_document: Mapping[str, object] | None = None, + meta: Mapping[str, object], + inventory: Mapping[str, object] | None = None, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + segment_groups: GroupMapLike, + block_facts: Mapping[str, Mapping[str, str]] | None = None, + new_function_group_keys: Collection[str] | None = None, + new_block_group_keys: Collection[str] | None = None, + new_segment_group_keys: Collection[str] | None = None, + suppressed_clone_groups: Sequence[SuppressedCloneGroup] | None = None, + metrics: Mapping[str, object] | None = None, + suggestions: Collection[Suggestion] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, +) -> str: + payload = report_document or build_report_document( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + meta=meta, + inventory=inventory, + block_facts=block_facts or {}, + new_function_group_keys=new_function_group_keys, + new_block_group_keys=new_block_group_keys, + new_segment_group_keys=new_segment_group_keys, + suppressed_clone_groups=suppressed_clone_groups, + metrics=metrics, + suggestions=tuple(suggestions or ()), + structural_findings=tuple(structural_findings or ()), + ) + return render_markdown_report_document(payload) + + __all__ = [ "MARKDOWN_SCHEMA_VERSION", + "_append_findings_section", + "_append_metric_items", + "_as_float", + "_location_text", "render_markdown_report_document", + "to_markdown_report", ] diff --git a/codeclone/report/renderers/sarif.py b/codeclone/report/renderers/sarif.py index 8a760f0..bfa0bbc 100644 --- a/codeclone/report/renderers/sarif.py +++ b/codeclone/report/renderers/sarif.py @@ -7,7 +7,7 @@ from __future__ import annotations import hashlib -from collections.abc import Mapping, Sequence +from collections.abc import Collection, Mapping, Sequence from dataclasses import dataclass from pathlib import Path from typing import TYPE_CHECKING, cast @@ -52,9 +52,11 @@ from ...utils.coerce import as_int as _as_int from ...utils.coerce import as_mapping as _as_mapping from ...utils.coerce import as_sequence as _as_sequence +from ..document.builder import build_report_document if TYPE_CHECKING: - pass + from ...models import StructuralFindingGroup, Suggestion + from ..types import GroupMapLike SARIF_VERSION = "2.1.0" SARIF_PROFILE_VERSION = "1.0" @@ -955,6 +957,39 @@ def render_sarif_report_document(payload: Mapping[str, object]) -> str: ).decode("utf-8") +def to_sarif_report( + *, + report_document: Mapping[str, object] | None = None, + meta: Mapping[str, object], + inventory: Mapping[str, object] | None = None, + func_groups: GroupMapLike, + block_groups: GroupMapLike, + segment_groups: GroupMapLike, + block_facts: Mapping[str, Mapping[str, str]] | None = None, + new_function_group_keys: Collection[str] | None = None, + new_block_group_keys: Collection[str] | None = None, + new_segment_group_keys: Collection[str] | None = None, + metrics: Mapping[str, object] | None = None, + suggestions: Collection[Suggestion] | None = None, + structural_findings: Sequence[StructuralFindingGroup] | None = None, +) -> str: + payload = report_document or build_report_document( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + meta=meta, + inventory=inventory, + block_facts=block_facts or {}, + new_function_group_keys=new_function_group_keys, + new_block_group_keys=new_block_group_keys, + new_segment_group_keys=new_segment_group_keys, + metrics=metrics, + suggestions=tuple(suggestions or ()), + structural_findings=tuple(structural_findings or ()), + ) + return render_sarif_report_document(payload) + + __all__ = [ "_baseline_state", "_location_entry", @@ -971,4 +1006,5 @@ def render_sarif_report_document(payload: Mapping[str, object]) -> str: "_severity_to_level", "_text", "render_sarif_report_document", + "to_sarif_report", ] diff --git a/codeclone/report/sarif.py b/codeclone/report/sarif.py deleted file mode 100644 index 0e7cbde..0000000 --- a/codeclone/report/sarif.py +++ /dev/null @@ -1,86 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from collections.abc import Collection, Mapping, Sequence -from typing import TYPE_CHECKING - -from .document import build_report_document -from .renderers.sarif import ( - _baseline_state, - _location_entry, - _location_message, - _logical_locations, - _partial_fingerprints, - _primary_location_properties, - _result_entry, - _result_message, - _result_properties, - _rule_name, - _rule_spec, - _scan_root_uri, - _severity_to_level, - _text, - render_sarif_report_document, -) - -if TYPE_CHECKING: - from ..models import StructuralFindingGroup, Suggestion - from .types import GroupMapLike - - -def to_sarif_report( - *, - report_document: Mapping[str, object] | None = None, - meta: Mapping[str, object], - inventory: Mapping[str, object] | None = None, - func_groups: GroupMapLike, - block_groups: GroupMapLike, - segment_groups: GroupMapLike, - block_facts: Mapping[str, Mapping[str, str]] | None = None, - new_function_group_keys: Collection[str] | None = None, - new_block_group_keys: Collection[str] | None = None, - new_segment_group_keys: Collection[str] | None = None, - metrics: Mapping[str, object] | None = None, - suggestions: Collection[Suggestion] | None = None, - structural_findings: Sequence[StructuralFindingGroup] | None = None, -) -> str: - payload = report_document or build_report_document( - func_groups=func_groups, - block_groups=block_groups, - segment_groups=segment_groups, - meta=meta, - inventory=inventory, - block_facts=block_facts or {}, - new_function_group_keys=new_function_group_keys, - new_block_group_keys=new_block_group_keys, - new_segment_group_keys=new_segment_group_keys, - metrics=metrics, - suggestions=tuple(suggestions or ()), - structural_findings=tuple(structural_findings or ()), - ) - return render_sarif_report_document(payload) - - -__all__ = [ - "_baseline_state", - "_location_entry", - "_location_message", - "_logical_locations", - "_partial_fingerprints", - "_primary_location_properties", - "_result_entry", - "_result_message", - "_result_properties", - "_rule_name", - "_rule_spec", - "_scan_root_uri", - "_severity_to_level", - "_text", - "render_sarif_report_document", - "to_sarif_report", -] diff --git a/codeclone/report/serialize.py b/codeclone/report/serialize.py deleted file mode 100644 index 57a8be2..0000000 --- a/codeclone/report/serialize.py +++ /dev/null @@ -1,31 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from .renderers.json import render_json_report_document -from .renderers.text import ( - _append_clone_section, - _append_single_item_findings, - _append_structural_findings, - _append_suggestions, - _append_suppressed_dead_code_items, - _as_int, - _structural_kind_label, - render_text_report_document, -) - -__all__ = [ - "_append_clone_section", - "_append_single_item_findings", - "_append_structural_findings", - "_append_suggestions", - "_append_suppressed_dead_code_items", - "_as_int", - "_structural_kind_label", - "render_json_report_document", - "render_text_report_document", -] diff --git a/codeclone/surfaces/cli/console.py b/codeclone/surfaces/cli/console.py index 59be3dd..422e7f2 100644 --- a/codeclone/surfaces/cli/console.py +++ b/codeclone/surfaces/cli/console.py @@ -9,7 +9,7 @@ import os import re import sys -from collections.abc import Callable, Mapping, Sequence +from collections.abc import Mapping, Sequence from contextlib import AbstractContextManager, nullcontext from functools import lru_cache from pathlib import Path @@ -165,13 +165,6 @@ def _print_gating_failure_block( ) -def build_html_report(*args: object, **kwargs: object) -> str: - from ...report.html import build_html_report as _build_html_report - - html_builder: Callable[..., str] = _build_html_report - return html_builder(*args, **kwargs) - - def _print_verbose_clone_hashes( console: _PrinterLike, *, diff --git a/codeclone/surfaces/cli/execution.py b/codeclone/surfaces/cli/execution.py new file mode 100644 index 0000000..d243392 --- /dev/null +++ b/codeclone/surfaces/cli/execution.py @@ -0,0 +1,318 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sys +import time +from dataclasses import replace +from pathlib import Path +from typing import Any, Protocol, cast + +from ... import ui_messages as ui +from ...cache.store import Cache +from ...contracts import ExitCode +from ...contracts.errors import CacheError +from ...core._types import AnalysisResult, BootstrapResult, DiscoveryResult +from ...core._types import ProcessingResult as PipelineProcessingResult +from . import state as cli_state +from .console import PlainConsole + + +class _PrinterLike(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... + + +def run_analysis_stages( + *, + args: object, + boot: BootstrapResult, + cache: Cache, + discover_fn: Any, + process_fn: Any, + analyze_fn: Any, + print_failed_files_fn: Any, + cache_update_segment_projection_fn: Any, + rich_progress_symbols_fn: Any, +) -> tuple[DiscoveryResult, PipelineProcessingResult, AnalysisResult]: + def _require_rich_console(value: object) -> object: + if isinstance(value, PlainConsole): + raise RuntimeError("Rich console is required when progress UI is enabled.") + return value + + args_obj = cast("Any", args) + printer = cast("_PrinterLike", cli_state.get_console()) + use_status = not args_obj.quiet and not args_obj.no_progress + + try: + if use_status: + with cast("Any", printer).status(ui.STATUS_DISCOVERING, spinner="dots"): + discovery_result = discover_fn(boot=boot, cache=cache) + else: + discovery_result = discover_fn(boot=boot, cache=cache) + except OSError as exc: + printer.print(ui.fmt_contract_error(ui.ERR_SCAN_FAILED.format(error=exc))) + sys.exit(ExitCode.CONTRACT_ERROR) + + for warning in discovery_result.skipped_warnings: + printer.print(f"[warning]{warning}[/warning]") + + total_files = len(discovery_result.files_to_process) + if total_files > 0 and not args_obj.quiet and args_obj.no_progress: + printer.print(ui.fmt_processing_changed(total_files)) + + if total_files > 0 and not args_obj.no_progress: + ( + progress_cls, + spinner_column_cls, + text_column_cls, + bar_column_cls, + time_elapsed_column_cls, + ) = rich_progress_symbols_fn() + + progress_factory = cast("Any", progress_cls) + with progress_factory( + cast("Any", spinner_column_cls)(), + cast("Any", text_column_cls)("[progress.description]{task.description}"), + cast("Any", bar_column_cls)(), + cast("Any", text_column_cls)( + "[progress.percentage]{task.percentage:>3.0f}%" + ), + cast("Any", time_elapsed_column_cls)(), + console=_require_rich_console(cli_state.get_console()), + ) as progress_ui: + progress_ui_any = cast("Any", progress_ui) + task_id = progress_ui_any.add_task( + f"Analyzing {total_files} files...", + total=total_files, + ) + processing_result = process_fn( + boot=boot, + discovery=discovery_result, + cache=cache, + on_advance=lambda: progress_ui_any.advance(task_id), + on_worker_error=lambda reason: printer.print( + ui.fmt_worker_failed(reason) + ), + on_parallel_fallback=lambda exc: printer.print( + ui.fmt_parallel_fallback(exc) + ), + ) + else: + processing_result = process_fn( + boot=boot, + discovery=discovery_result, + cache=cache, + on_worker_error=( + (lambda reason: printer.print(ui.fmt_batch_item_failed(reason))) + if args_obj.no_progress + else (lambda reason: printer.print(ui.fmt_worker_failed(reason))) + ), + on_parallel_fallback=lambda exc: printer.print( + ui.fmt_parallel_fallback(exc) + ), + ) + + print_failed_files_fn(tuple(processing_result.failed_files)) + if not processing_result.failed_files and processing_result.source_read_failures: + print_failed_files_fn(tuple(processing_result.source_read_failures)) + + if use_status: + with cast("Any", printer).status(ui.STATUS_GROUPING, spinner="dots"): + analysis_result = analyze_fn( + boot=boot, + discovery=discovery_result, + processing=processing_result, + ) + cache_update_segment_projection_fn(cache, analysis_result) + try: + cache.save() + except CacheError as exc: + printer.print(ui.fmt_cache_save_failed(exc)) + else: + analysis_result = analyze_fn( + boot=boot, + discovery=discovery_result, + processing=processing_result, + ) + cache_update_segment_projection_fn(cache, analysis_result) + try: + cache.save() + except CacheError as exc: + printer.print(ui.fmt_cache_save_failed(exc)) + + coverage_join = getattr(analysis_result, "coverage_join", None) + if ( + coverage_join is not None + and coverage_join.status != "ok" + and coverage_join.invalid_reason + ): + printer.print(ui.fmt_coverage_join_ignored(coverage_join.invalid_reason)) + + return discovery_result, processing_result, analysis_result + + +def enforce_gating( + *, + args: object, + boot: BootstrapResult, + analysis: AnalysisResult, + processing: PipelineProcessingResult, + source_read_contract_failure: bool, + baseline_failure_code: ExitCode | None, + metrics_baseline_failure_code: ExitCode | None, + new_func: set[str], + new_block: set[str], + metrics_diff: object | None, + html_report_path: str | None, + gate_fn: Any, + parse_metric_reason_entry_fn: Any, + print_gating_failure_block_fn: Any, + print_verbose_clone_hashes_fn: Any, + clone_threshold_total: int | None = None, +) -> None: + args_obj = cast("Any", args) + printer = cast("_PrinterLike", cli_state.get_console()) + + if source_read_contract_failure: + printer.print( + ui.fmt_contract_error( + ui.fmt_unreadable_source_in_gating( + count=len(processing.source_read_failures) + ) + ) + ) + for failure in processing.source_read_failures[:10]: + printer.print(f" • {failure}") + if len(processing.source_read_failures) > 10: + printer.print(f" ... and {len(processing.source_read_failures) - 10} more") + sys.exit(ExitCode.CONTRACT_ERROR) + + if baseline_failure_code is not None: + printer.print(ui.fmt_contract_error(ui.ERR_BASELINE_GATING_REQUIRES_TRUSTED)) + sys.exit(baseline_failure_code) + + if metrics_baseline_failure_code is not None: + printer.print( + ui.fmt_contract_error( + "Metrics baseline is untrusted or missing for requested metrics gating." + ) + ) + sys.exit(metrics_baseline_failure_code) + + if bool(getattr(args_obj, "fail_on_untested_hotspots", False)): + if analysis.coverage_join is None: + printer.print( + ui.fmt_contract_error( + "--fail-on-untested-hotspots requires --coverage." + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + if analysis.coverage_join.status != "ok": + detail = analysis.coverage_join.invalid_reason or "invalid coverage input" + printer.print( + ui.fmt_contract_error( + "Coverage gating requires a valid Cobertura XML input.\n" + f"Reason: {detail}" + ) + ) + sys.exit(ExitCode.CONTRACT_ERROR) + + gating_analysis = analysis + if clone_threshold_total is not None: + preserved_block_count = min( + max(analysis.block_clones_count, 0), + max(clone_threshold_total, 0), + ) + gating_analysis = replace( + analysis, + func_clones_count=max(clone_threshold_total - preserved_block_count, 0), + block_clones_count=preserved_block_count, + ) + + gate_result = gate_fn( + boot=boot, + analysis=gating_analysis, + new_func=new_func, + new_block=new_block, + metrics_diff=cast("Any", metrics_diff), + ) + + metric_reasons = [ + reason[len("metric:") :] + for reason in gate_result.reasons + if reason.startswith("metric:") + ] + if metric_reasons: + print_gating_failure_block_fn( + code="metrics", + entries=[parse_metric_reason_entry_fn(reason) for reason in metric_reasons], + args=args_obj, + ) + sys.exit(ExitCode.GATING_FAILURE) + + if "clone:new" in gate_result.reasons: + default_report = Path(".cache/codeclone/report.html") + resolved_html_report_path = html_report_path + if resolved_html_report_path is None and default_report.exists(): + resolved_html_report_path = str(default_report) + + clone_entries: list[tuple[str, object]] = [ + ("new_function_clone_groups", len(new_func)), + ("new_block_clone_groups", len(new_block)), + ] + if resolved_html_report_path: + clone_entries.append(("report", resolved_html_report_path)) + clone_entries.append(("accept", "codeclone . --update-baseline")) + print_gating_failure_block_fn( + code="new-clones", + entries=clone_entries, + args=args_obj, + ) + + if args_obj.verbose: + print_verbose_clone_hashes_fn( + printer, + label="Function clone hashes", + clone_hashes=new_func, + ) + print_verbose_clone_hashes_fn( + printer, + label="Block clone hashes", + clone_hashes=new_block, + ) + + sys.exit(ExitCode.GATING_FAILURE) + + threshold_reason = next( + ( + reason + for reason in gate_result.reasons + if reason.startswith("clone:threshold:") + ), + None, + ) + if threshold_reason is not None: + _, _, total_raw, threshold_raw = threshold_reason.split(":", maxsplit=3) + print_gating_failure_block_fn( + code="threshold", + entries=( + ("clone_groups_total", int(total_raw)), + ("clone_groups_limit", int(threshold_raw)), + ), + args=args_obj, + ) + sys.exit(ExitCode.GATING_FAILURE) + + +def print_pipeline_done_if_needed(*, args: object, run_started_at: float) -> None: + args_obj = cast("Any", args) + if args_obj.quiet: + return + elapsed = time.monotonic() - run_started_at + printer = cast("_PrinterLike", cli_state.get_console()) + printer.print() + printer.print(ui.fmt_pipeline_done(elapsed)) diff --git a/codeclone/surfaces/cli/main.py b/codeclone/surfaces/cli/main.py index ecabd32..ce049c3 100644 --- a/codeclone/surfaces/cli/main.py +++ b/codeclone/surfaces/cli/main.py @@ -6,1319 +6,9 @@ from __future__ import annotations -import os -import sys -import time -from collections.abc import Collection, Mapping, Sequence -from dataclasses import dataclass, replace -from pathlib import Path -from typing import Any, NoReturn, Protocol, cast - -from ... import __version__ -from ... import ui_messages as ui -from ...baseline import Baseline -from ...cache import Cache, CacheStatus, build_segment_report_projection -from ...config import ( - ConfigValidationError, - apply_pyproject_config_overrides, - build_parser, - collect_explicit_cli_dests, - load_pyproject_config, -) -from ...contracts import ( - DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, - DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, - DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - ISSUES_URL, - ExitCode, -) -from ...contracts.errors import CacheError -from ...core import ( - MAX_FILE_SIZE, - AnalysisResult, - BootstrapResult, - DiscoveryResult, - analyze, - bootstrap, - discover, - gate, - process, - process_file, - report, -) -from ...core._types import FileProcessResult as ProcessingResult -from ...core._types import ProcessingResult as PipelineProcessingResult -from . import report_meta as cli_meta_mod -from . import state as cli_state -from .baseline_state import ( - CloneBaselineState as _CloneBaselineState, -) -from .baseline_state import ( - MetricsBaselineSectionProbe as _MetricsBaselineSectionProbe, -) -from .baseline_state import ( - MetricsBaselineState as _MetricsBaselineState, -) -from .baseline_state import ( - probe_metrics_baseline_section as _probe_metrics_baseline_section_impl, -) -from .baseline_state import ( - resolve_clone_baseline_state as _resolve_clone_baseline_state_impl, -) -from .baseline_state import ( - resolve_metrics_baseline_state as _resolve_metrics_baseline_state_impl, -) -from .changed_scope import ( - ChangedCloneGate, -) -from .changed_scope import ( - _changed_clone_gate_from_report as _changed_clone_gate_from_report_impl, -) -from .changed_scope import ( - _git_diff_changed_paths as _git_diff_changed_paths_impl, -) -from .changed_scope import ( - _normalize_changed_paths as _normalize_changed_paths_impl, -) -from .changed_scope import ( - _validate_changed_scope_args as _validate_changed_scope_args_impl, -) -from .console import ( - PlainConsole, - _is_debug_enabled, - _parse_metric_reason_entry, - _print_verbose_clone_hashes, - _rich_progress_symbols, - build_html_report, -) -from .console import ( - _print_gating_failure_block as _print_gating_failure_block_impl, -) -from .console import ( - make_console as _make_rich_console, -) -from .console import ( - make_plain_console as _make_plain_console_impl, -) -from .console import ( - print_banner as _print_banner_impl, -) -from .reports_output import ( - _report_path_origins as _report_path_origins_impl, -) -from .reports_output import ( - _resolve_output_paths as _resolve_output_paths_impl, -) -from .reports_output import ( - _timestamped_report_path as _timestamped_report_path_impl, -) -from .reports_output import ( - _validate_report_ui_flags as _validate_report_ui_flags_impl, -) -from .reports_output import ( - _write_report_outputs as _write_report_outputs_impl, -) -from .runtime import ( - _configure_metrics_mode as _configure_metrics_mode_impl, -) -from .runtime import ( - _metrics_computed as _metrics_computed_impl, -) -from .runtime import ( - _print_failed_files as _print_failed_files_impl, -) -from .runtime import ( - _resolve_cache_path as _resolve_cache_path_impl, -) -from .runtime import ( - _resolve_cache_status as _resolve_cache_status_impl, -) -from .runtime import ( - _validate_numeric_args as _validate_numeric_args_impl, -) -from .summary import ( - ChangedScopeSnapshot, - _print_changed_scope, - _print_metrics, - _print_summary, - build_metrics_snapshot, - build_summary_counts, -) -from .types import OutputPaths, ReportPathOrigin - -__all__ = [ - "LEGACY_CACHE_PATH", - "MAX_FILE_SIZE", - "Baseline", - "Cache", - "ChangedCloneGate", - "ConfigValidationError", - "ExitCode", - "ProcessingResult", - "_changed_clone_gate_from_report", - "_configure_metrics_mode", - "_enforce_gating", - "_git_diff_changed_paths", - "_main_impl", - "_make_console", - "_make_plain_console", - "_make_rich_console", - "_metrics_computed", - "_normalize_changed_paths", - "_parse_metric_reason_entry", - "_print_changed_scope", - "_print_failed_files", - "_print_gating_failure_block", - "_print_metrics", - "_print_summary", - "_print_verbose_clone_hashes", - "_probe_metrics_baseline_section", - "_report_path_origins", - "_resolve_cache_path", - "_resolve_cache_status", - "_resolve_clone_baseline_state", - "_resolve_metrics_baseline_state", - "_resolve_output_paths", - "_run_analysis_stages", - "_timestamped_report_path", - "_validate_changed_scope_args", - "_validate_numeric_args", - "_validate_report_ui_flags", - "_write_report_outputs", - "analyze", - "bootstrap", - "build_html_report", - "console", - "discover", - "gate", - "main", - "print_banner", - "process", - "process_file", - "report", -] - - -class _PrinterLike(Protocol): - def print(self, *objects: object, **kwargs: object) -> None: ... - - def status(self, *objects: object, **kwargs: object) -> object: ... - - -@dataclass(frozen=True, slots=True) -class _ResolvedBaselineInputs: - baseline_path: Path - baseline_exists: bool - metrics_baseline_path: Path - metrics_baseline_exists: bool - shared_baseline_payload: dict[str, object] | None - - -@dataclass(frozen=True, slots=True) -class _DiffContext: - new_func: set[str] - new_block: set[str] - new_clones_count: int - metrics_diff: object | None - coverage_adoption_diff_available: bool - api_surface_diff_available: bool - - -def _set_console(value: object) -> object: - cli_state.set_console(value) - return value - - -def _console() -> _PrinterLike: - return cast("_PrinterLike", _set_console(console)) - - -def _make_console(*, no_color: bool) -> object: - return _make_rich_console( - no_color=no_color, - width=ui.CLI_LAYOUT_MAX_WIDTH, - ) - - -def _make_plain_console() -> PlainConsole: - return _make_plain_console_impl() - - -console: object = _make_plain_console() -_set_console(console) -LEGACY_CACHE_PATH = cli_state.LEGACY_CACHE_PATH - - -def print_banner(*, root: Path | None = None) -> None: - _set_console(console) - _print_banner_impl(root=root) - - -def _report_path_origins(argv: Sequence[str]) -> dict[str, ReportPathOrigin | None]: - return _report_path_origins_impl(argv) - - -def _timestamped_report_path(path: Path, *, report_generated_at_utc: str) -> Path: - return _timestamped_report_path_impl( - path, - report_generated_at_utc=report_generated_at_utc, - ) - - -def _validate_changed_scope_args(*, args: object) -> str | None: - _set_console(console) - return _validate_changed_scope_args_impl(args=args) - - -def _normalize_changed_paths( - *, - root_path: Path, - paths: Sequence[str], -) -> tuple[str, ...]: - _set_console(console) - return _normalize_changed_paths_impl(root_path=root_path, paths=paths) - - -def _git_diff_changed_paths(*, root_path: Path, git_diff_ref: str) -> tuple[str, ...]: - _set_console(console) - return _git_diff_changed_paths_impl(root_path=root_path, git_diff_ref=git_diff_ref) - - -def _changed_clone_gate_from_report( - report_document: Mapping[str, object], - *, - changed_paths: Sequence[str], -) -> ChangedCloneGate: - return _changed_clone_gate_from_report_impl( - report_document, - changed_paths=changed_paths, - ) - - -def _resolve_output_paths( - args: object, - *, - report_path_origins: Mapping[str, ReportPathOrigin | None], - report_generated_at_utc: str, -) -> OutputPaths: - _set_console(console) - return _resolve_output_paths_impl( - args, - report_path_origins=report_path_origins, - report_generated_at_utc=report_generated_at_utc, - ) - - -def _validate_report_ui_flags(*, args: object, output_paths: OutputPaths) -> None: - _set_console(console) - _validate_report_ui_flags_impl(args=args, output_paths=output_paths) - - -def _resolve_cache_path(*, root_path: Path, args: object, from_args: bool) -> Path: - cli_state.LEGACY_CACHE_PATH = LEGACY_CACHE_PATH - _set_console(console) - return _resolve_cache_path_impl( - root_path=root_path, - args=args, - from_args=from_args, - ) - - -def _validate_numeric_args(args: object) -> bool: - return _validate_numeric_args_impl(args) - - -def _configure_metrics_mode(*, args: object, metrics_baseline_exists: bool) -> None: - _set_console(console) - _configure_metrics_mode_impl( - args=args, - metrics_baseline_exists=metrics_baseline_exists, - ) - - -def _print_failed_files(failed_files: Sequence[str]) -> None: - _set_console(console) - _print_failed_files_impl(tuple(failed_files)) - - -def _metrics_computed(args: object) -> tuple[str, ...]: - return _metrics_computed_impl(args) - - -def _probe_metrics_baseline_section(path: Path) -> _MetricsBaselineSectionProbe: - return _probe_metrics_baseline_section_impl(path) - - -def _resolve_clone_baseline_state( - *, - args: object, - baseline_path: Path, - baseline_exists: bool, - analysis: AnalysisResult, - shared_baseline_payload: dict[str, object] | None = None, -) -> _CloneBaselineState: - return _resolve_clone_baseline_state_impl( - args=cast("Any", args), - baseline_path=baseline_path, - baseline_exists=baseline_exists, - func_groups=analysis.func_groups, - block_groups=analysis.block_groups, - codeclone_version=__version__, - console=_console(), - shared_baseline_payload=shared_baseline_payload, - ) - - -def _resolve_metrics_baseline_state( - *, - args: object, - metrics_baseline_path: Path, - metrics_baseline_exists: bool, - baseline_updated_path: Path | None, - analysis: AnalysisResult, - shared_baseline_payload: dict[str, object] | None = None, -) -> _MetricsBaselineState: - return _resolve_metrics_baseline_state_impl( - args=cast("Any", args), - metrics_baseline_path=metrics_baseline_path, - metrics_baseline_exists=metrics_baseline_exists, - baseline_updated_path=baseline_updated_path, - project_metrics=analysis.project_metrics, - console=_console(), - shared_baseline_payload=shared_baseline_payload, - ) - - -def _resolve_cache_status(cache: Cache) -> tuple[CacheStatus, str | None]: - return _resolve_cache_status_impl(cache) - - -def _print_gating_failure_block( - *, - code: str, - entries: Sequence[tuple[str, object]], - args: object, -) -> None: - _set_console(console) - _print_gating_failure_block_impl( - code=code, - entries=entries, - args=args, - ) - - -def _write_report_outputs( - *, - args: object, - output_paths: OutputPaths, - report_artifacts: object, - open_html_report: bool = False, -) -> str | None: - _set_console(console) - return _write_report_outputs_impl( - args=args, - output_paths=output_paths, - report_artifacts=report_artifacts, - open_html_report=open_html_report, - ) - - -def _resolve_runtime_path_arg( - *, - root_path: Path, - raw_path: str, - from_cli: bool, -) -> Path: - candidate_path = Path(raw_path).expanduser() - if from_cli or candidate_path.is_absolute(): - return candidate_path.resolve() - return (root_path / candidate_path).resolve() - - -def _exit_contract_error( - message: str, - *, - cause: BaseException | None = None, -) -> NoReturn: - _console().print(ui.fmt_contract_error(message)) - if cause is None: - raise SystemExit(ExitCode.CONTRACT_ERROR) - raise SystemExit(ExitCode.CONTRACT_ERROR) from cause - - -def _resolve_existing_root_path(args: object) -> Path: - args_obj = cast("Any", args) - try: - root_path = Path(args_obj.root).resolve() - except OSError as exc: - _exit_contract_error(ui.ERR_INVALID_ROOT_PATH.format(error=exc), cause=exc) - if not root_path.exists(): - _exit_contract_error(ui.ERR_ROOT_NOT_FOUND.format(path=root_path)) - return root_path - - -def _load_pyproject_config_or_exit(root_path: Path) -> dict[str, object]: - try: - return load_pyproject_config(root_path) - except ConfigValidationError as exc: - _exit_contract_error(str(exc), cause=exc) - - -def _configure_runtime_flags(args: object) -> None: - args_obj = cast("Any", args) - if args_obj.debug: - os.environ["CODECLONE_DEBUG"] = "1" - if args_obj.ci: - args_obj.fail_on_new = True - args_obj.no_color = True - args_obj.quiet = True - - -def _configure_runtime_console(args: object) -> None: - global console - - args_obj = cast("Any", args) - console = ( - _make_plain_console() - if args_obj.quiet - else _make_console(no_color=args_obj.no_color) - ) - _set_console(console) - - -def _validate_numeric_args_or_exit(args: object) -> None: - if _validate_numeric_args(args): - return - _exit_contract_error( - "Size limits must be non-negative integers (MB), " - "threshold flags must be >= 0 or -1, and coverage thresholds " - "must be between 0 and 100." - ) - - -def _resolve_baseline_inputs( - *, - ap: object, - args: object, - root_path: Path, - baseline_path_from_args: bool, - metrics_path_from_args: bool, -) -> _ResolvedBaselineInputs: - args_obj = cast("Any", args) - ap_obj = cast("Any", ap) - - baseline_arg_path = Path(args_obj.baseline).expanduser() - try: - baseline_path = _resolve_runtime_path_arg( - root_path=root_path, - raw_path=args_obj.baseline, - from_cli=baseline_path_from_args, - ) - baseline_exists = baseline_path.exists() - except OSError as exc: - _exit_contract_error( - ui.fmt_invalid_baseline_path(path=baseline_arg_path, error=exc), - cause=exc, - ) - - shared_baseline_payload: dict[str, object] | None = None - default_metrics_baseline = ap_obj.get_default("metrics_baseline") - metrics_path_overridden = metrics_path_from_args or ( - args_obj.metrics_baseline != default_metrics_baseline - ) - metrics_baseline_raw_path = ( - args_obj.metrics_baseline if metrics_path_overridden else args_obj.baseline - ) - metrics_baseline_arg_path = Path(metrics_baseline_raw_path).expanduser() - try: - metrics_baseline_path = _resolve_runtime_path_arg( - root_path=root_path, - raw_path=metrics_baseline_raw_path, - from_cli=metrics_path_from_args, - ) - if metrics_baseline_path == baseline_path: - probe = _probe_metrics_baseline_section(metrics_baseline_path) - metrics_baseline_exists = probe.has_metrics_section - shared_baseline_payload = probe.payload - else: - metrics_baseline_exists = metrics_baseline_path.exists() - except OSError as exc: - _exit_contract_error( - ui.fmt_invalid_baseline_path( - path=metrics_baseline_arg_path, - error=exc, - ), - cause=exc, - ) - - return _ResolvedBaselineInputs( - baseline_path=baseline_path, - baseline_exists=baseline_exists, - metrics_baseline_path=metrics_baseline_path, - metrics_baseline_exists=metrics_baseline_exists, - shared_baseline_payload=shared_baseline_payload, - ) - - -def _prepare_metrics_mode_and_ui( - *, - args: object, - root_path: Path, - baseline_path: Path, - baseline_exists: bool, - metrics_baseline_path: Path, - metrics_baseline_exists: bool, -) -> None: - args_obj = cast("Any", args) - if ( - args_obj.update_baseline - and not args_obj.skip_metrics - and not args_obj.update_metrics_baseline - ): - args_obj.update_metrics_baseline = True - _configure_metrics_mode( - args=args_obj, - metrics_baseline_exists=metrics_baseline_exists, - ) - if ( - args_obj.update_metrics_baseline - and metrics_baseline_path == baseline_path - and not baseline_exists - and not args_obj.update_baseline - ): - args_obj.update_baseline = True - if args_obj.quiet: - args_obj.no_progress = True - return - print_banner(root=root_path) - - -def _resolve_report_cache_path(cache_path: Path) -> Path: - try: - return cache_path.resolve() - except OSError: - return cache_path - - -def _gating_mode_enabled(args: object) -> bool: - args_obj = cast("Any", args) - return bool( - args_obj.fail_on_new - or args_obj.fail_threshold >= 0 - or args_obj.fail_complexity >= 0 - or args_obj.fail_coupling >= 0 - or args_obj.fail_cohesion >= 0 - or args_obj.fail_cycles - or args_obj.fail_dead_code - or args_obj.fail_health >= 0 - or args_obj.fail_on_new_metrics - or args_obj.fail_on_typing_regression - or args_obj.fail_on_docstring_regression - or args_obj.fail_on_api_break - or args_obj.min_typing_coverage >= 0 - or args_obj.min_docstring_coverage >= 0 - ) - - -def _build_diff_context( - *, - analysis: AnalysisResult, - baseline_path: Path, - baseline_state: _CloneBaselineState, - metrics_baseline_state: _MetricsBaselineState, -) -> _DiffContext: - baseline_for_diff = ( - baseline_state.baseline - if baseline_state.trusted_for_diff - else Baseline(baseline_path) - ) - raw_new_func, raw_new_block = baseline_for_diff.diff( - analysis.func_groups, - analysis.block_groups, - ) - metrics_diff = None - if analysis.project_metrics is not None and metrics_baseline_state.trusted_for_diff: - metrics_diff = metrics_baseline_state.baseline.diff(analysis.project_metrics) - return _DiffContext( - new_func=set(raw_new_func), - new_block=set(raw_new_block), - new_clones_count=len(raw_new_func) + len(raw_new_block), - metrics_diff=metrics_diff, - coverage_adoption_diff_available=bool( - metrics_baseline_state.trusted_for_diff - and getattr( - metrics_baseline_state.baseline, - "has_coverage_adoption_snapshot", - False, - ) - ), - api_surface_diff_available=bool( - metrics_baseline_state.trusted_for_diff - and getattr(metrics_baseline_state.baseline, "api_surface_snapshot", None) - is not None - ), - ) - - -def _print_metrics_if_available( - *, - args: object, - analysis: AnalysisResult, - metrics_diff: object | None, - api_surface_diff_available: bool, -) -> None: - args_obj = cast("Any", args) - if analysis.project_metrics is None: - return - _print_metrics( - console=_console(), - quiet=args_obj.quiet, - metrics=build_metrics_snapshot( - analysis_result=analysis, - metrics_diff=metrics_diff, - api_surface_diff_available=api_surface_diff_available, - ), - ) - - -def _resolve_changed_clone_gate( - *, - args: object, - report_document: Mapping[str, object] | None, - changed_paths: Collection[str], -) -> ChangedCloneGate | None: - args_obj = cast("Any", args) - if not args_obj.changed_only or report_document is None: - return None - return _changed_clone_gate_from_report( - report_document, - changed_paths=tuple(changed_paths), - ) - - -def _maybe_print_changed_scope_snapshot( - *, - args: object, - changed_clone_gate: ChangedCloneGate | None, -) -> None: - args_obj = cast("Any", args) - if changed_clone_gate is None: - return - _print_changed_scope( - console=_console(), - quiet=args_obj.quiet, - changed_scope=ChangedScopeSnapshot( - paths_count=len(changed_clone_gate.changed_paths), - findings_total=changed_clone_gate.findings_total, - findings_new=changed_clone_gate.findings_new, - findings_known=changed_clone_gate.findings_known, - ), - ) - - -def _warn_new_clones_without_fail( - *, - args: object, - notice_new_clones_count: int, -) -> None: - args_obj = cast("Any", args) - if args_obj.update_baseline or args_obj.fail_on_new or notice_new_clones_count <= 0: - return - _console().print(ui.WARN_NEW_CLONES_WITHOUT_FAIL) - - -def _print_pipeline_done_if_needed(*, args: object, run_started_at: float) -> None: - args_obj = cast("Any", args) - if args_obj.quiet: - return - elapsed = time.monotonic() - run_started_at - _console().print() - _console().print(ui.fmt_pipeline_done(elapsed)) - - -def _cache_update_segment_projection(cache: Cache, analysis: AnalysisResult) -> None: - if not hasattr(cache, "segment_report_projection"): - return - new_projection = build_segment_report_projection( - digest=analysis.segment_groups_raw_digest, - suppressed=analysis.suppressed_segment_groups, - groups=analysis.segment_groups, - ) - if new_projection != cache.segment_report_projection: - cache.segment_report_projection = new_projection - cache._dirty = True - - -def _run_analysis_stages( - *, - args: object, - boot: BootstrapResult, - cache: Cache, -) -> tuple[DiscoveryResult, PipelineProcessingResult, AnalysisResult]: - def _require_rich_console(value: object) -> object: - if isinstance(value, PlainConsole): - raise RuntimeError("Rich console is required when progress UI is enabled.") - return value - - args_obj = cast("Any", args) - printer = _console() - use_status = not args_obj.quiet and not args_obj.no_progress - - try: - if use_status: - with cast("Any", printer).status(ui.STATUS_DISCOVERING, spinner="dots"): - discovery_result = discover(boot=boot, cache=cache) - else: - discovery_result = discover(boot=boot, cache=cache) - except OSError as exc: - printer.print(ui.fmt_contract_error(ui.ERR_SCAN_FAILED.format(error=exc))) - sys.exit(ExitCode.CONTRACT_ERROR) - - for warning in discovery_result.skipped_warnings: - printer.print(f"[warning]{warning}[/warning]") - - total_files = len(discovery_result.files_to_process) - if total_files > 0 and not args_obj.quiet and args_obj.no_progress: - printer.print(ui.fmt_processing_changed(total_files)) - - if total_files > 0 and not args_obj.no_progress: - ( - progress_cls, - spinner_column_cls, - text_column_cls, - bar_column_cls, - time_elapsed_column_cls, - ) = _rich_progress_symbols() - - progress_factory = cast("Any", progress_cls) - with progress_factory( - cast("Any", spinner_column_cls)(), - cast("Any", text_column_cls)("[progress.description]{task.description}"), - cast("Any", bar_column_cls)(), - cast("Any", text_column_cls)( - "[progress.percentage]{task.percentage:>3.0f}%" - ), - cast("Any", time_elapsed_column_cls)(), - console=_require_rich_console(console), - ) as progress_ui: - progress_ui_any = cast("Any", progress_ui) - task_id = progress_ui_any.add_task( - f"Analyzing {total_files} files...", - total=total_files, - ) - processing_result = process( - boot=boot, - discovery=discovery_result, - cache=cache, - on_advance=lambda: progress_ui_any.advance(task_id), - on_worker_error=lambda reason: printer.print( - ui.fmt_worker_failed(reason) - ), - on_parallel_fallback=lambda exc: printer.print( - ui.fmt_parallel_fallback(exc) - ), - ) - else: - processing_result = process( - boot=boot, - discovery=discovery_result, - cache=cache, - on_worker_error=( - (lambda reason: printer.print(ui.fmt_batch_item_failed(reason))) - if args_obj.no_progress - else (lambda reason: printer.print(ui.fmt_worker_failed(reason))) - ), - on_parallel_fallback=lambda exc: printer.print( - ui.fmt_parallel_fallback(exc) - ), - ) - - _print_failed_files(processing_result.failed_files) - if not processing_result.failed_files and processing_result.source_read_failures: - _print_failed_files(processing_result.source_read_failures) - - if use_status: - with cast("Any", printer).status(ui.STATUS_GROUPING, spinner="dots"): - analysis_result = analyze( - boot=boot, - discovery=discovery_result, - processing=processing_result, - ) - _cache_update_segment_projection(cache, analysis_result) - try: - cache.save() - except CacheError as exc: - printer.print(ui.fmt_cache_save_failed(exc)) - else: - analysis_result = analyze( - boot=boot, - discovery=discovery_result, - processing=processing_result, - ) - _cache_update_segment_projection(cache, analysis_result) - try: - cache.save() - except CacheError as exc: - printer.print(ui.fmt_cache_save_failed(exc)) - - coverage_join = getattr(analysis_result, "coverage_join", None) - if ( - coverage_join is not None - and coverage_join.status != "ok" - and coverage_join.invalid_reason - ): - printer.print(ui.fmt_coverage_join_ignored(coverage_join.invalid_reason)) - - return discovery_result, processing_result, analysis_result - - -def _enforce_gating( - *, - args: object, - boot: BootstrapResult, - analysis: AnalysisResult, - processing: PipelineProcessingResult, - source_read_contract_failure: bool, - baseline_failure_code: ExitCode | None, - metrics_baseline_failure_code: ExitCode | None, - new_func: set[str], - new_block: set[str], - metrics_diff: object | None, - html_report_path: str | None, - clone_threshold_total: int | None = None, -) -> None: - args_obj = cast("Any", args) - printer = _console() - - if source_read_contract_failure: - printer.print( - ui.fmt_contract_error( - ui.fmt_unreadable_source_in_gating( - count=len(processing.source_read_failures) - ) - ) - ) - for failure in processing.source_read_failures[:10]: - printer.print(f" • {failure}") - if len(processing.source_read_failures) > 10: - printer.print(f" ... and {len(processing.source_read_failures) - 10} more") - sys.exit(ExitCode.CONTRACT_ERROR) - - if baseline_failure_code is not None: - printer.print(ui.fmt_contract_error(ui.ERR_BASELINE_GATING_REQUIRES_TRUSTED)) - sys.exit(baseline_failure_code) - - if metrics_baseline_failure_code is not None: - printer.print( - ui.fmt_contract_error( - "Metrics baseline is untrusted or missing for requested metrics gating." - ) - ) - sys.exit(metrics_baseline_failure_code) - - if bool(getattr(args_obj, "fail_on_untested_hotspots", False)): - if analysis.coverage_join is None: - printer.print( - ui.fmt_contract_error( - "--fail-on-untested-hotspots requires --coverage." - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - if analysis.coverage_join.status != "ok": - detail = analysis.coverage_join.invalid_reason or "invalid coverage input" - printer.print( - ui.fmt_contract_error( - "Coverage gating requires a valid Cobertura XML input.\n" - f"Reason: {detail}" - ) - ) - sys.exit(ExitCode.CONTRACT_ERROR) - - gating_analysis = analysis - if clone_threshold_total is not None: - preserved_block_count = min( - max(analysis.block_clones_count, 0), - max(clone_threshold_total, 0), - ) - gating_analysis = replace( - analysis, - func_clones_count=max(clone_threshold_total - preserved_block_count, 0), - block_clones_count=preserved_block_count, - ) - - gate_result = gate( - boot=boot, - analysis=gating_analysis, - new_func=new_func, - new_block=new_block, - metrics_diff=cast("Any", metrics_diff), - ) - - metric_reasons = [ - reason[len("metric:") :] - for reason in gate_result.reasons - if reason.startswith("metric:") - ] - if metric_reasons: - _print_gating_failure_block( - code="metrics", - entries=[_parse_metric_reason_entry(reason) for reason in metric_reasons], - args=args_obj, - ) - sys.exit(ExitCode.GATING_FAILURE) - - if "clone:new" in gate_result.reasons: - default_report = Path(".cache/codeclone/report.html") - resolved_html_report_path = html_report_path - if resolved_html_report_path is None and default_report.exists(): - resolved_html_report_path = str(default_report) - - clone_entries: list[tuple[str, object]] = [ - ("new_function_clone_groups", len(new_func)), - ("new_block_clone_groups", len(new_block)), - ] - if resolved_html_report_path: - clone_entries.append(("report", resolved_html_report_path)) - clone_entries.append(("accept", "codeclone . --update-baseline")) - _print_gating_failure_block( - code="new-clones", - entries=clone_entries, - args=args_obj, - ) - - if args_obj.verbose: - _print_verbose_clone_hashes( - printer, - label="Function clone hashes", - clone_hashes=new_func, - ) - _print_verbose_clone_hashes( - printer, - label="Block clone hashes", - clone_hashes=new_block, - ) - - sys.exit(ExitCode.GATING_FAILURE) - - threshold_reason = next( - ( - reason - for reason in gate_result.reasons - if reason.startswith("clone:threshold:") - ), - None, - ) - if threshold_reason is not None: - _, _, total_raw, threshold_raw = threshold_reason.split(":", maxsplit=3) - _print_gating_failure_block( - code="threshold", - entries=( - ("clone_groups_total", int(total_raw)), - ("clone_groups_limit", int(threshold_raw)), - ), - args=args_obj, - ) - sys.exit(ExitCode.GATING_FAILURE) - - -def _main_impl() -> None: - run_started_at = time.monotonic() - analysis_started_at_utc = cli_meta_mod._current_report_timestamp_utc() - ap = build_parser(__version__) - - raw_argv = tuple(sys.argv[1:]) - explicit_cli_dests = collect_explicit_cli_dests(ap, argv=raw_argv) - report_path_origins = _report_path_origins(raw_argv) - report_generated_at_utc = cli_meta_mod._current_report_timestamp_utc() - cache_path_from_args = any( - arg in {"--cache-dir", "--cache-path"} - or arg.startswith(("--cache-dir=", "--cache-path=")) - for arg in sys.argv - ) - baseline_path_from_args = any( - arg == "--baseline" or arg.startswith("--baseline=") for arg in sys.argv - ) - metrics_path_from_args = any( - arg == "--metrics-baseline" or arg.startswith("--metrics-baseline=") - for arg in sys.argv - ) - args = ap.parse_args() - - root_path = _resolve_existing_root_path(args) - pyproject_config = _load_pyproject_config_or_exit(root_path) - apply_pyproject_config_overrides( - args=args, - config_values=pyproject_config, - explicit_cli_dests=explicit_cli_dests, - ) - git_diff_ref = _validate_changed_scope_args(args=args) - changed_paths = ( - _git_diff_changed_paths(root_path=root_path, git_diff_ref=git_diff_ref) - if git_diff_ref is not None - else () - ) - _configure_runtime_flags(args) - _configure_runtime_console(args) - _validate_numeric_args_or_exit(args) - baseline_inputs = _resolve_baseline_inputs( - ap=ap, - args=args, - root_path=root_path, - baseline_path_from_args=baseline_path_from_args, - metrics_path_from_args=metrics_path_from_args, - ) - ( - baseline_path, - baseline_exists, - metrics_baseline_path, - metrics_baseline_exists, - ) = ( - baseline_inputs.baseline_path, - baseline_inputs.baseline_exists, - baseline_inputs.metrics_baseline_path, - baseline_inputs.metrics_baseline_exists, - ) - shared_baseline_payload = baseline_inputs.shared_baseline_payload - - _prepare_metrics_mode_and_ui( - args=args, - root_path=root_path, - baseline_path=baseline_path, - baseline_exists=baseline_exists, - metrics_baseline_path=metrics_baseline_path, - metrics_baseline_exists=metrics_baseline_exists, - ) - - output_paths = _resolve_output_paths( - args, - report_path_origins=report_path_origins, - report_generated_at_utc=report_generated_at_utc, - ) - _validate_report_ui_flags(args=args, output_paths=output_paths) - cache_path = _resolve_cache_path( - root_path=root_path, - args=args, - from_args=cache_path_from_args, - ) - - cache = Cache( - cache_path, - root=root_path, - max_size_bytes=args.max_cache_size_mb * 1024 * 1024, - min_loc=args.min_loc, - min_stmt=args.min_stmt, - block_min_loc=args.block_min_loc, - block_min_stmt=args.block_min_stmt, - segment_min_loc=args.segment_min_loc, - segment_min_stmt=args.segment_min_stmt, - collect_api_surface=bool(args.api_surface), - ) - cache.load() - if cache.load_warning: - _console().print(f"[warning]{cache.load_warning}[/warning]") - - boot = bootstrap( - args=args, - root=root_path, - output_paths=output_paths, - cache_path=cache_path, - ) - discovery_result, processing_result, analysis_result = _run_analysis_stages( - args=args, - boot=boot, - cache=cache, - ) - - gating_mode = _gating_mode_enabled(args) - source_read_contract_failure = ( - bool(processing_result.source_read_failures) - and gating_mode - and not args.update_baseline - ) - baseline_state = _resolve_clone_baseline_state( - args=args, - baseline_path=baseline_path, - baseline_exists=baseline_exists, - analysis=analysis_result, - shared_baseline_payload=( - shared_baseline_payload if metrics_baseline_path == baseline_path else None - ), - ) - metrics_baseline_state = _resolve_metrics_baseline_state( - args=args, - metrics_baseline_path=metrics_baseline_path, - metrics_baseline_exists=metrics_baseline_exists, - baseline_updated_path=baseline_state.updated_path, - analysis=analysis_result, - shared_baseline_payload=( - shared_baseline_payload if metrics_baseline_path == baseline_path else None - ), - ) - - report_cache_path = _resolve_report_cache_path(cache_path) - - cache_status, cache_schema_version = _resolve_cache_status(cache) - report_meta = cli_meta_mod._build_report_meta( - codeclone_version=__version__, - scan_root=root_path, - baseline_path=baseline_path, - baseline=baseline_state.baseline, - baseline_loaded=baseline_state.loaded, - baseline_status=baseline_state.status.value, - cache_path=report_cache_path, - cache_used=cache_status == CacheStatus.OK, - cache_status=cache_status.value, - cache_schema_version=cache_schema_version, - files_skipped_source_io=len(processing_result.source_read_failures), - metrics_baseline_path=metrics_baseline_path, - metrics_baseline=metrics_baseline_state.baseline, - metrics_baseline_loaded=metrics_baseline_state.loaded, - metrics_baseline_status=metrics_baseline_state.status.value, - health_score=( - analysis_result.project_metrics.health.total - if analysis_result.project_metrics - else None - ), - health_grade=( - analysis_result.project_metrics.health.grade - if analysis_result.project_metrics - else None - ), - analysis_mode=("clones_only" if args.skip_metrics else "full"), - metrics_computed=_metrics_computed(args), - min_loc=args.min_loc, - min_stmt=args.min_stmt, - block_min_loc=args.block_min_loc, - block_min_stmt=args.block_min_stmt, - segment_min_loc=args.segment_min_loc, - segment_min_stmt=args.segment_min_stmt, - design_complexity_threshold=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, - design_coupling_threshold=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - design_cohesion_threshold=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, - analysis_started_at_utc=analysis_started_at_utc, - report_generated_at_utc=report_generated_at_utc, - ) - - diff_context = _build_diff_context( - analysis=analysis_result, - baseline_path=baseline_path, - baseline_state=baseline_state, - metrics_baseline_state=metrics_baseline_state, - ) - - summary_counts = build_summary_counts( - discovery_result=discovery_result, - processing_result=processing_result, - ) - _print_summary( - console=_console(), - quiet=args.quiet, - files_found=discovery_result.files_found, - files_analyzed=processing_result.files_analyzed, - cache_hits=discovery_result.cache_hits, - files_skipped=processing_result.files_skipped, - analyzed_lines=summary_counts["analyzed_lines"], - analyzed_functions=summary_counts["analyzed_functions"], - analyzed_methods=summary_counts["analyzed_methods"], - analyzed_classes=summary_counts["analyzed_classes"], - func_clones_count=analysis_result.func_clones_count, - block_clones_count=analysis_result.block_clones_count, - segment_clones_count=analysis_result.segment_clones_count, - suppressed_golden_fixture_groups=len( - getattr(analysis_result, "suppressed_clone_groups", ()) - ), - suppressed_segment_groups=analysis_result.suppressed_segment_groups, - new_clones_count=diff_context.new_clones_count, - ) - _print_metrics_if_available( - args=args, - analysis=analysis_result, - metrics_diff=diff_context.metrics_diff, - api_surface_diff_available=diff_context.api_surface_diff_available, - ) - - report_artifacts = report( - boot=boot, - discovery=discovery_result, - processing=processing_result, - analysis=analysis_result, - report_meta=report_meta, - new_func=diff_context.new_func, - new_block=diff_context.new_block, - html_builder=build_html_report, - metrics_diff=diff_context.metrics_diff, - coverage_adoption_diff_available=diff_context.coverage_adoption_diff_available, - api_surface_diff_available=diff_context.api_surface_diff_available, - include_report_document=bool(changed_paths), - ) - changed_clone_gate = _resolve_changed_clone_gate( - args=args, - report_document=report_artifacts.report_document, - changed_paths=changed_paths, - ) - _maybe_print_changed_scope_snapshot( - args=args, - changed_clone_gate=changed_clone_gate, - ) - html_report_path = _write_report_outputs( - args=args, - output_paths=output_paths, - report_artifacts=report_artifacts, - open_html_report=args.open_html_report, - ) - - _enforce_gating( - args=args, - boot=boot, - analysis=analysis_result, - processing=processing_result, - source_read_contract_failure=source_read_contract_failure, - baseline_failure_code=baseline_state.failure_code, - metrics_baseline_failure_code=metrics_baseline_state.failure_code, - new_func=( - set(changed_clone_gate.new_func) - if changed_clone_gate - else diff_context.new_func - ), - new_block=( - set(changed_clone_gate.new_block) - if changed_clone_gate - else diff_context.new_block - ), - metrics_diff=diff_context.metrics_diff, - html_report_path=html_report_path, - clone_threshold_total=( - changed_clone_gate.total_clone_groups if changed_clone_gate else None - ), - ) - - notice_new_clones_count = ( - len(changed_clone_gate.new_func) + len(changed_clone_gate.new_block) - if changed_clone_gate is not None - else diff_context.new_clones_count - ) - _warn_new_clones_without_fail( - args=args, - notice_new_clones_count=notice_new_clones_count, - ) - _print_pipeline_done_if_needed(args=args, run_started_at=run_started_at) - - -def main() -> None: - try: - _main_impl() - except SystemExit: - raise - except Exception as exc: - _console().print( - ui.fmt_internal_error( - exc, - issues_url=ISSUES_URL, - debug=_is_debug_enabled(), - ) - ) - sys.exit(ExitCode.INTERNAL_ERROR) +from .workflow import main +__all__ = ["main"] if __name__ == "__main__": main() diff --git a/codeclone/surfaces/cli/post_run.py b/codeclone/surfaces/cli/post_run.py new file mode 100644 index 0000000..27ae1ee --- /dev/null +++ b/codeclone/surfaces/cli/post_run.py @@ -0,0 +1,143 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Collection, Mapping +from dataclasses import dataclass +from pathlib import Path +from typing import Any, cast + +from ... import ui_messages as ui +from ...baseline import Baseline +from ...core._types import AnalysisResult +from .baseline_state import CloneBaselineState, MetricsBaselineState +from .changed_scope import ChangedCloneGate +from .summary import ChangedScopeSnapshot + + +@dataclass(frozen=True, slots=True) +class DiffContext: + new_func: set[str] + new_block: set[str] + new_clones_count: int + metrics_diff: object | None + coverage_adoption_diff_available: bool + api_surface_diff_available: bool + + +def build_diff_context( + *, + analysis: AnalysisResult, + baseline_path: Path, + baseline_state: CloneBaselineState, + metrics_baseline_state: MetricsBaselineState, +) -> DiffContext: + baseline_for_diff = ( + baseline_state.baseline + if baseline_state.trusted_for_diff + else Baseline(baseline_path) + ) + raw_new_func, raw_new_block = baseline_for_diff.diff( + analysis.func_groups, + analysis.block_groups, + ) + metrics_diff = None + if analysis.project_metrics is not None and metrics_baseline_state.trusted_for_diff: + metrics_diff = metrics_baseline_state.baseline.diff(analysis.project_metrics) + return DiffContext( + new_func=set(raw_new_func), + new_block=set(raw_new_block), + new_clones_count=len(raw_new_func) + len(raw_new_block), + metrics_diff=metrics_diff, + coverage_adoption_diff_available=bool( + metrics_baseline_state.trusted_for_diff + and getattr( + metrics_baseline_state.baseline, + "has_coverage_adoption_snapshot", + False, + ) + ), + api_surface_diff_available=bool( + metrics_baseline_state.trusted_for_diff + and getattr(metrics_baseline_state.baseline, "api_surface_snapshot", None) + is not None + ), + ) + + +def print_metrics_if_available( + *, + args: object, + analysis: AnalysisResult, + metrics_diff: object | None, + api_surface_diff_available: bool, + console: Any, + build_metrics_snapshot_fn: Any, + print_metrics_fn: Any, +) -> None: + if analysis.project_metrics is None: + return + print_metrics_fn( + console=console, + quiet=bool(cast("Any", args).quiet), + metrics=build_metrics_snapshot_fn( + analysis_result=analysis, + metrics_diff=metrics_diff, + api_surface_diff_available=api_surface_diff_available, + ), + ) + + +def resolve_changed_clone_gate( + *, + args: object, + report_document: Mapping[str, object] | None, + changed_paths: Collection[str], + changed_clone_gate_from_report_fn: Any, +) -> ChangedCloneGate | None: + if not cast("Any", args).changed_only or report_document is None: + return None + return cast( + "ChangedCloneGate", + changed_clone_gate_from_report_fn( + report_document, + changed_paths=tuple(changed_paths), + ), + ) + + +def maybe_print_changed_scope_snapshot( + *, + args: object, + changed_clone_gate: ChangedCloneGate | None, + console: Any, + print_changed_scope_fn: Any, +) -> None: + if changed_clone_gate is None: + return + print_changed_scope_fn( + console=console, + quiet=bool(cast("Any", args).quiet), + changed_scope=ChangedScopeSnapshot( + paths_count=len(changed_clone_gate.changed_paths), + findings_total=changed_clone_gate.findings_total, + findings_new=changed_clone_gate.findings_new, + findings_known=changed_clone_gate.findings_known, + ), + ) + + +def warn_new_clones_without_fail( + *, + args: object, + notice_new_clones_count: int, + console: Any, +) -> None: + args_obj = cast("Any", args) + if args_obj.update_baseline or args_obj.fail_on_new or notice_new_clones_count <= 0: + return + console.print(ui.WARN_NEW_CLONES_WITHOUT_FAIL) diff --git a/codeclone/surfaces/cli/report_meta.py b/codeclone/surfaces/cli/report_meta.py index d533f2d..43b7c98 100644 --- a/codeclone/surfaces/cli/report_meta.py +++ b/codeclone/surfaces/cli/report_meta.py @@ -8,10 +8,11 @@ import sys from datetime import datetime, timezone -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, cast from ...baseline.clone_baseline import Baseline from ...baseline.trust import current_python_tag +from ...cache.versioning import CacheStatus from ...contracts import ( DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, @@ -23,6 +24,10 @@ from pathlib import Path from ...baseline.metrics_baseline import MetricsBaseline + from ...cache.versioning import CacheStatus + from ...core._types import AnalysisResult + from ...core._types import ProcessingResult as PipelineProcessingResult + from .baseline_state import CloneBaselineState, MetricsBaselineState def _current_python_version() -> str: @@ -122,3 +127,54 @@ def _build_report_meta( "analysis_started_at_utc": analysis_started_at_utc, "report_generated_at_utc": report_generated_at_utc, } + + +def build_cli_report_meta( + *, + codeclone_version: str, + scan_root: Path, + baseline_path: Path, + baseline_state: CloneBaselineState, + cache_path: Path, + cache_status: CacheStatus, + cache_schema_version: str | None, + processing_result: PipelineProcessingResult, + metrics_baseline_path: Path, + metrics_baseline_state: MetricsBaselineState, + analysis_result: AnalysisResult, + args: object, + metrics_computed: tuple[str, ...], + analysis_started_at_utc: str | None, + report_generated_at_utc: str, +) -> ReportMeta: + args_obj = cast("Any", args) + project_metrics = analysis_result.project_metrics + return _build_report_meta( + codeclone_version=codeclone_version, + scan_root=scan_root, + baseline_path=baseline_path, + baseline=baseline_state.baseline, + baseline_loaded=baseline_state.loaded, + baseline_status=baseline_state.status.value, + cache_path=cache_path, + cache_used=cache_status == CacheStatus.OK, + cache_status=cache_status.value, + cache_schema_version=cache_schema_version, + files_skipped_source_io=len(processing_result.source_read_failures), + metrics_baseline_path=metrics_baseline_path, + metrics_baseline=metrics_baseline_state.baseline, + metrics_baseline_loaded=metrics_baseline_state.loaded, + metrics_baseline_status=metrics_baseline_state.status.value, + health_score=(project_metrics.health.total if project_metrics else None), + health_grade=(project_metrics.health.grade if project_metrics else None), + analysis_mode=("clones_only" if args_obj.skip_metrics else "full"), + metrics_computed=metrics_computed, + min_loc=args_obj.min_loc, + min_stmt=args_obj.min_stmt, + block_min_loc=args_obj.block_min_loc, + block_min_stmt=args_obj.block_min_stmt, + segment_min_loc=args_obj.segment_min_loc, + segment_min_stmt=args_obj.segment_min_stmt, + analysis_started_at_utc=analysis_started_at_utc, + report_generated_at_utc=report_generated_at_utc, + ) diff --git a/codeclone/surfaces/cli/runtime.py b/codeclone/surfaces/cli/runtime.py index 79b019d..7c76594 100644 --- a/codeclone/surfaces/cli/runtime.py +++ b/codeclone/surfaces/cli/runtime.py @@ -11,7 +11,8 @@ from typing import Any, Protocol, cast from ... import ui_messages as ui -from ...cache import Cache, CacheStatus +from ...cache.store import Cache +from ...cache.versioning import CacheStatus from ...contracts import ExitCode from . import state as cli_state @@ -204,6 +205,68 @@ def resolve_cache_status(cache: _CacheLike) -> tuple[CacheStatus, str | None]: return cache_status, cache_schema_version +def resolve_report_cache_path(cache_path: Path) -> Path: + try: + return cache_path.resolve() + except OSError: + return cache_path + + +def prepare_metrics_mode_and_ui( + *, + args: object, + root_path: Path, + baseline_path: Path, + baseline_exists: bool, + metrics_baseline_path: Path, + metrics_baseline_exists: bool, + configure_metrics_mode: Any, + print_banner: Any, +) -> None: + args_obj = cast("Any", args) + if ( + args_obj.update_baseline + and not args_obj.skip_metrics + and not args_obj.update_metrics_baseline + ): + args_obj.update_metrics_baseline = True + configure_metrics_mode( + args=args_obj, + metrics_baseline_exists=metrics_baseline_exists, + ) + if ( + args_obj.update_metrics_baseline + and metrics_baseline_path == baseline_path + and not baseline_exists + and not args_obj.update_baseline + ): + args_obj.update_baseline = True + if args_obj.quiet: + args_obj.no_progress = True + return + print_banner(root=root_path) + + +def gating_mode_enabled(args: object) -> bool: + args_obj = cast("Any", args) + return bool( + args_obj.fail_on_new + or args_obj.fail_threshold >= 0 + or args_obj.fail_complexity >= 0 + or args_obj.fail_coupling >= 0 + or args_obj.fail_cohesion >= 0 + or args_obj.fail_cycles + or args_obj.fail_dead_code + or args_obj.fail_health >= 0 + or args_obj.fail_on_new_metrics + or args_obj.fail_on_typing_regression + or args_obj.fail_on_docstring_regression + or args_obj.fail_on_api_break + or args_obj.min_typing_coverage >= 0 + or args_obj.min_docstring_coverage >= 0 + ) + + def print_failed_files(*, failed_files: tuple[str, ...], console: _PrinterLike) -> None: if not failed_files: return diff --git a/codeclone/surfaces/cli/startup.py b/codeclone/surfaces/cli/startup.py new file mode 100644 index 0000000..51062ca --- /dev/null +++ b/codeclone/surfaces/cli/startup.py @@ -0,0 +1,189 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +from dataclasses import dataclass +from pathlib import Path +from typing import Any, NoReturn, cast + +from ... import ui_messages as ui +from ...config.pyproject_loader import ConfigValidationError +from ...contracts import ExitCode + + +@dataclass(frozen=True, slots=True) +class ResolvedBaselineInputs: + baseline_path: Path + baseline_exists: bool + metrics_baseline_path: Path + metrics_baseline_exists: bool + shared_baseline_payload: dict[str, object] | None + + +def resolve_runtime_path_arg( + *, + root_path: Path, + raw_path: str, + from_cli: bool, +) -> Path: + candidate_path = Path(raw_path).expanduser() + if from_cli or candidate_path.is_absolute(): + return candidate_path.resolve() + return (root_path / candidate_path).resolve() + + +def exit_contract_error( + message: str, + *, + printer: Any, + cause: BaseException | None = None, +) -> NoReturn: + printer.print(ui.fmt_contract_error(message)) + if cause is None: + raise SystemExit(ExitCode.CONTRACT_ERROR) + raise SystemExit(ExitCode.CONTRACT_ERROR) from cause + + +def resolve_existing_root_path(*, args: object, printer: Any) -> Path: + try: + root_path = Path(cast("Any", args).root).resolve() + except OSError as exc: + exit_contract_error( + ui.ERR_INVALID_ROOT_PATH.format(error=exc), + printer=printer, + cause=exc, + ) + if not root_path.exists(): + exit_contract_error( + ui.ERR_ROOT_NOT_FOUND.format(path=root_path), + printer=printer, + ) + return root_path + + +def load_pyproject_config_or_exit( + *, + root_path: Path, + load_pyproject_config_fn: Any, + printer: Any, +) -> dict[str, object]: + try: + return cast("dict[str, object]", load_pyproject_config_fn(root_path)) + except ConfigValidationError as exc: + exit_contract_error(str(exc), printer=printer, cause=exc) + + +def configure_runtime_flags(args: object) -> None: + args_obj = cast("Any", args) + if args_obj.debug: + os.environ["CODECLONE_DEBUG"] = "1" + if args_obj.ci: + args_obj.fail_on_new = True + args_obj.no_color = True + args_obj.quiet = True + + +def configure_runtime_console( + *, + args: object, + make_plain_console: Any, + make_console: Any, + set_console: Any, +) -> object: + args_obj = cast("Any", args) + console = ( + make_plain_console() + if args_obj.quiet + else make_console(no_color=args_obj.no_color) + ) + set_console(console) + return console + + +def validate_numeric_args_or_exit( + *, + args: object, + validate_numeric_args_fn: Any, + printer: Any, +) -> None: + if validate_numeric_args_fn(args): + return + exit_contract_error( + "Size limits must be non-negative integers (MB), " + "threshold flags must be >= 0 or -1, and coverage thresholds " + "must be between 0 and 100.", + printer=printer, + ) + + +def resolve_baseline_inputs( + *, + ap: object, + args: object, + root_path: Path, + baseline_path_from_args: bool, + metrics_path_from_args: bool, + probe_metrics_baseline_section_fn: Any, + printer: Any, +) -> ResolvedBaselineInputs: + args_obj = cast("Any", args) + ap_obj = cast("Any", ap) + + baseline_arg_path = Path(args_obj.baseline).expanduser() + try: + baseline_path = resolve_runtime_path_arg( + root_path=root_path, + raw_path=args_obj.baseline, + from_cli=baseline_path_from_args, + ) + baseline_exists = baseline_path.exists() + except OSError as exc: + exit_contract_error( + ui.fmt_invalid_baseline_path(path=baseline_arg_path, error=exc), + printer=printer, + cause=exc, + ) + + shared_baseline_payload: dict[str, object] | None = None + default_metrics_baseline = ap_obj.get_default("metrics_baseline") + metrics_path_overridden = metrics_path_from_args or ( + args_obj.metrics_baseline != default_metrics_baseline + ) + metrics_baseline_raw_path = ( + args_obj.metrics_baseline if metrics_path_overridden else args_obj.baseline + ) + metrics_baseline_arg_path = Path(metrics_baseline_raw_path).expanduser() + try: + metrics_baseline_path = resolve_runtime_path_arg( + root_path=root_path, + raw_path=metrics_baseline_raw_path, + from_cli=metrics_path_from_args, + ) + if metrics_baseline_path == baseline_path: + probe = probe_metrics_baseline_section_fn(metrics_baseline_path) + metrics_baseline_exists = probe.has_metrics_section + shared_baseline_payload = probe.payload + else: + metrics_baseline_exists = metrics_baseline_path.exists() + except OSError as exc: + exit_contract_error( + ui.fmt_invalid_baseline_path( + path=metrics_baseline_arg_path, + error=exc, + ), + printer=printer, + cause=exc, + ) + + return ResolvedBaselineInputs( + baseline_path=baseline_path, + baseline_exists=baseline_exists, + metrics_baseline_path=metrics_baseline_path, + metrics_baseline_exists=metrics_baseline_exists, + shared_baseline_payload=shared_baseline_payload, + ) diff --git a/codeclone/surfaces/cli/workflow.py b/codeclone/surfaces/cli/workflow.py new file mode 100644 index 0000000..532d27e --- /dev/null +++ b/codeclone/surfaces/cli/workflow.py @@ -0,0 +1,532 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sys +import time +from pathlib import Path +from typing import Any, Protocol, cast + +from ... import __version__ +from ... import ui_messages as ui +from ...baseline import Baseline +from ...cache.projection import build_segment_report_projection +from ...cache.store import Cache +from ...config.argparse_builder import build_parser +from ...config.pyproject_loader import load_pyproject_config +from ...config.resolver import ( + apply_pyproject_config_overrides, + collect_explicit_cli_dests, +) +from ...contracts import ( + ISSUES_URL, + ExitCode, +) +from ...core._types import AnalysisResult, BootstrapResult, DiscoveryResult +from ...core._types import ProcessingResult as PipelineProcessingResult +from ...core.bootstrap import bootstrap +from ...core.discovery import discover +from ...core.parallelism import process +from ...core.pipeline import analyze +from ...core.reporting import gate, report +from ...report.html import build_html_report +from . import report_meta as cli_meta_mod +from . import state as cli_state +from .baseline_state import ( + _probe_metrics_baseline_section, + _resolve_clone_baseline_state, + _resolve_metrics_baseline_state, +) +from .changed_scope import ( + _changed_clone_gate_from_report, + _git_diff_changed_paths, + _validate_changed_scope_args, +) +from .console import ( + _is_debug_enabled, + _make_plain_console, + _parse_metric_reason_entry, + _print_gating_failure_block, + _print_verbose_clone_hashes, + _rich_progress_symbols, +) +from .console import make_console as _make_rich_console +from .console import print_banner as _print_banner_impl +from .execution import ( + enforce_gating, + print_pipeline_done_if_needed, + run_analysis_stages, +) +from .post_run import build_diff_context as _build_diff_context +from .post_run import ( + maybe_print_changed_scope_snapshot, + print_metrics_if_available, + resolve_changed_clone_gate, + warn_new_clones_without_fail, +) +from .reports_output import ( + _report_path_origins, + _resolve_output_paths, + _validate_report_ui_flags, + _write_report_outputs, +) +from .runtime import ( + _configure_metrics_mode, + _metrics_computed, + _print_failed_files, + _resolve_cache_status, + _validate_numeric_args, + gating_mode_enabled, + prepare_metrics_mode_and_ui, + resolve_report_cache_path, +) +from .runtime import _resolve_cache_path as _resolve_cache_path_impl +from .startup import configure_runtime_console as _configure_runtime_console_impl +from .startup import configure_runtime_flags as _configure_runtime_flags +from .startup import load_pyproject_config_or_exit as _load_pyproject_config_or_exit +from .startup import resolve_baseline_inputs as _resolve_baseline_inputs +from .startup import resolve_existing_root_path as _resolve_existing_root_path +from .startup import validate_numeric_args_or_exit as _validate_numeric_args_or_exit +from .summary import ( + _print_changed_scope, + _print_metrics, + _print_summary, + build_metrics_snapshot, + build_summary_counts, +) + +__all__ = [ + "LEGACY_CACHE_PATH", + "Baseline", + "Cache", + "ExitCode", + "_changed_clone_gate_from_report", + "_configure_metrics_mode", + "_enforce_gating", + "_git_diff_changed_paths", + "_main_impl", + "_make_console", + "_make_plain_console", + "_make_rich_console", + "_print_changed_scope", + "_print_failed_files", + "_print_gating_failure_block", + "_print_summary", + "_probe_metrics_baseline_section", + "_resolve_cache_path", + "_resolve_clone_baseline_state", + "_resolve_metrics_baseline_state", + "_rich_progress_symbols", + "_run_analysis_stages", + "_validate_report_ui_flags", + "_write_report_outputs", + "analyze", + "bootstrap", + "build_html_report", + "console", + "discover", + "gate", + "main", + "print_banner", + "process", + "report", +] + + +class _PrinterLike(Protocol): + def print(self, *objects: object, **kwargs: object) -> None: ... + + +def _set_console(value: object) -> object: + cli_state.set_console(value) + return value + + +def _console() -> _PrinterLike: + return cast("_PrinterLike", _set_console(console)) + + +def _make_console(*, no_color: bool) -> object: + return _make_rich_console(no_color=no_color, width=ui.CLI_LAYOUT_MAX_WIDTH) + + +console: object = _make_plain_console() +_set_console(console) +LEGACY_CACHE_PATH = cli_state.LEGACY_CACHE_PATH + + +def print_banner(*, root: Path | None = None) -> None: + _set_console(console) + _print_banner_impl(root=root) + + +def _configure_runtime_console(args: object) -> None: + global console + console = _configure_runtime_console_impl( + args=args, + make_plain_console=_make_plain_console, + make_console=_make_console, + set_console=_set_console, + ) + + +def _resolve_cache_path(*, root_path: Path, args: object, from_args: bool) -> Path: + cli_state.LEGACY_CACHE_PATH = LEGACY_CACHE_PATH + _set_console(console) + return _resolve_cache_path_impl( + root_path=cast("Any", root_path), + args=args, + from_args=from_args, + ) + + +def _cache_update_segment_projection(cache: Cache, analysis: AnalysisResult) -> None: + if not hasattr(cache, "segment_report_projection"): + return + new_projection = build_segment_report_projection( + digest=analysis.segment_groups_raw_digest, + suppressed=analysis.suppressed_segment_groups, + groups=analysis.segment_groups, + ) + if new_projection != cache.segment_report_projection: + cache.segment_report_projection = new_projection + cache._dirty = True + + +def _run_analysis_stages( + *, + args: object, + boot: BootstrapResult, + cache: Cache, +) -> tuple[DiscoveryResult, PipelineProcessingResult, AnalysisResult]: + _set_console(console) + return run_analysis_stages( + args=args, + boot=boot, + cache=cache, + discover_fn=discover, + process_fn=process, + analyze_fn=analyze, + print_failed_files_fn=_print_failed_files, + cache_update_segment_projection_fn=_cache_update_segment_projection, + rich_progress_symbols_fn=_rich_progress_symbols, + ) + + +def _enforce_gating( + *, + args: object, + boot: BootstrapResult, + analysis: AnalysisResult, + processing: PipelineProcessingResult, + source_read_contract_failure: bool, + baseline_failure_code: ExitCode | None, + metrics_baseline_failure_code: ExitCode | None, + new_func: set[str], + new_block: set[str], + metrics_diff: object | None, + html_report_path: str | None, + clone_threshold_total: int | None = None, +) -> None: + _set_console(console) + enforce_gating( + args=args, + boot=boot, + analysis=analysis, + processing=processing, + source_read_contract_failure=source_read_contract_failure, + baseline_failure_code=baseline_failure_code, + metrics_baseline_failure_code=metrics_baseline_failure_code, + new_func=new_func, + new_block=new_block, + metrics_diff=metrics_diff, + html_report_path=html_report_path, + gate_fn=gate, + parse_metric_reason_entry_fn=_parse_metric_reason_entry, + print_gating_failure_block_fn=_print_gating_failure_block, + print_verbose_clone_hashes_fn=_print_verbose_clone_hashes, + clone_threshold_total=clone_threshold_total, + ) + + +def _main_impl() -> None: + run_started_at = time.monotonic() + analysis_started_at_utc = cli_meta_mod._current_report_timestamp_utc() + ap = build_parser(__version__) + + raw_argv = tuple(sys.argv[1:]) + explicit_cli_dests = collect_explicit_cli_dests(ap, argv=raw_argv) + report_path_origins = _report_path_origins(raw_argv) + report_generated_at_utc = cli_meta_mod._current_report_timestamp_utc() + cache_path_from_args = any( + arg in {"--cache-dir", "--cache-path"} + or arg.startswith(("--cache-dir=", "--cache-path=")) + for arg in sys.argv + ) + baseline_path_from_args = any( + arg == "--baseline" or arg.startswith("--baseline=") for arg in sys.argv + ) + metrics_path_from_args = any( + arg == "--metrics-baseline" or arg.startswith("--metrics-baseline=") + for arg in sys.argv + ) + args = ap.parse_args() + + root_path = _resolve_existing_root_path(args=args, printer=_console()) + pyproject_config = _load_pyproject_config_or_exit( + root_path=root_path, + load_pyproject_config_fn=load_pyproject_config, + printer=_console(), + ) + apply_pyproject_config_overrides( + args=args, + config_values=pyproject_config, + explicit_cli_dests=explicit_cli_dests, + ) + git_diff_ref = _validate_changed_scope_args(args=args) + changed_paths = ( + _git_diff_changed_paths(root_path=root_path, git_diff_ref=git_diff_ref) + if git_diff_ref is not None + else () + ) + _configure_runtime_flags(args) + _configure_runtime_console(args) + _validate_numeric_args_or_exit( + args=args, + validate_numeric_args_fn=_validate_numeric_args, + printer=_console(), + ) + baseline_inputs = _resolve_baseline_inputs( + ap=ap, + args=args, + root_path=root_path, + baseline_path_from_args=baseline_path_from_args, + metrics_path_from_args=metrics_path_from_args, + probe_metrics_baseline_section_fn=_probe_metrics_baseline_section, + printer=_console(), + ) + prepare_metrics_mode_and_ui( + args=args, + root_path=root_path, + baseline_path=baseline_inputs.baseline_path, + baseline_exists=baseline_inputs.baseline_exists, + metrics_baseline_path=baseline_inputs.metrics_baseline_path, + metrics_baseline_exists=baseline_inputs.metrics_baseline_exists, + configure_metrics_mode=_configure_metrics_mode, + print_banner=print_banner, + ) + + output_paths = _resolve_output_paths( + args, + report_path_origins=report_path_origins, + report_generated_at_utc=report_generated_at_utc, + ) + _validate_report_ui_flags(args=args, output_paths=output_paths) + cache_path = _resolve_cache_path( + root_path=root_path, + args=args, + from_args=cache_path_from_args, + ) + + cache = Cache( + cache_path, + root=root_path, + max_size_bytes=args.max_cache_size_mb * 1024 * 1024, + min_loc=args.min_loc, + min_stmt=args.min_stmt, + block_min_loc=args.block_min_loc, + block_min_stmt=args.block_min_stmt, + segment_min_loc=args.segment_min_loc, + segment_min_stmt=args.segment_min_stmt, + collect_api_surface=bool(args.api_surface), + ) + cache.load() + if cache.load_warning: + _console().print(f"[warning]{cache.load_warning}[/warning]") + + boot = bootstrap( + args=args, + root=root_path, + output_paths=output_paths, + cache_path=cache_path, + ) + discovery_result, processing_result, analysis_result = _run_analysis_stages( + args=args, + boot=boot, + cache=cache, + ) + + source_read_contract_failure = ( + bool(processing_result.source_read_failures) + and gating_mode_enabled(args) + and not args.update_baseline + ) + shared_baseline_payload = ( + baseline_inputs.shared_baseline_payload + if baseline_inputs.metrics_baseline_path == baseline_inputs.baseline_path + else None + ) + baseline_state = _resolve_clone_baseline_state( + args=args, + baseline_path=baseline_inputs.baseline_path, + baseline_exists=baseline_inputs.baseline_exists, + analysis=analysis_result, + shared_baseline_payload=shared_baseline_payload, + ) + metrics_baseline_state = _resolve_metrics_baseline_state( + args=args, + metrics_baseline_path=baseline_inputs.metrics_baseline_path, + metrics_baseline_exists=baseline_inputs.metrics_baseline_exists, + baseline_updated_path=baseline_state.updated_path, + analysis=analysis_result, + shared_baseline_payload=shared_baseline_payload, + ) + + cache_status, cache_schema_version = _resolve_cache_status(cache) + report_meta = cli_meta_mod.build_cli_report_meta( + codeclone_version=__version__, + scan_root=root_path, + baseline_path=baseline_inputs.baseline_path, + baseline_state=baseline_state, + cache_path=resolve_report_cache_path(cache_path), + cache_status=cache_status, + cache_schema_version=cache_schema_version, + processing_result=processing_result, + metrics_baseline_path=baseline_inputs.metrics_baseline_path, + metrics_baseline_state=metrics_baseline_state, + analysis_result=analysis_result, + args=args, + metrics_computed=_metrics_computed(args), + analysis_started_at_utc=analysis_started_at_utc, + report_generated_at_utc=report_generated_at_utc, + ) + + diff_context = _build_diff_context( + analysis=analysis_result, + baseline_path=baseline_inputs.baseline_path, + baseline_state=baseline_state, + metrics_baseline_state=metrics_baseline_state, + ) + summary_counts = build_summary_counts( + discovery_result=discovery_result, + processing_result=processing_result, + ) + _print_summary( + console=_console(), + quiet=args.quiet, + files_found=discovery_result.files_found, + files_analyzed=processing_result.files_analyzed, + cache_hits=discovery_result.cache_hits, + files_skipped=processing_result.files_skipped, + analyzed_lines=summary_counts["analyzed_lines"], + analyzed_functions=summary_counts["analyzed_functions"], + analyzed_methods=summary_counts["analyzed_methods"], + analyzed_classes=summary_counts["analyzed_classes"], + func_clones_count=analysis_result.func_clones_count, + block_clones_count=analysis_result.block_clones_count, + segment_clones_count=analysis_result.segment_clones_count, + suppressed_golden_fixture_groups=len( + getattr(analysis_result, "suppressed_clone_groups", ()) + ), + suppressed_segment_groups=analysis_result.suppressed_segment_groups, + new_clones_count=diff_context.new_clones_count, + ) + print_metrics_if_available( + args=args, + analysis=analysis_result, + metrics_diff=diff_context.metrics_diff, + api_surface_diff_available=diff_context.api_surface_diff_available, + console=_console(), + build_metrics_snapshot_fn=build_metrics_snapshot, + print_metrics_fn=_print_metrics, + ) + + report_artifacts = report( + boot=boot, + discovery=discovery_result, + processing=processing_result, + analysis=analysis_result, + report_meta=report_meta, + new_func=diff_context.new_func, + new_block=diff_context.new_block, + html_builder=build_html_report, + metrics_diff=diff_context.metrics_diff, + coverage_adoption_diff_available=diff_context.coverage_adoption_diff_available, + api_surface_diff_available=diff_context.api_surface_diff_available, + include_report_document=bool(changed_paths), + ) + changed_clone_gate = resolve_changed_clone_gate( + args=args, + report_document=report_artifacts.report_document, + changed_paths=changed_paths, + changed_clone_gate_from_report_fn=_changed_clone_gate_from_report, + ) + maybe_print_changed_scope_snapshot( + args=args, + changed_clone_gate=changed_clone_gate, + console=_console(), + print_changed_scope_fn=_print_changed_scope, + ) + html_report_path = _write_report_outputs( + args=args, + output_paths=output_paths, + report_artifacts=report_artifacts, + open_html_report=args.open_html_report, + ) + + _enforce_gating( + args=args, + boot=boot, + analysis=analysis_result, + processing=processing_result, + source_read_contract_failure=source_read_contract_failure, + baseline_failure_code=baseline_state.failure_code, + metrics_baseline_failure_code=metrics_baseline_state.failure_code, + new_func=( + set(changed_clone_gate.new_func) + if changed_clone_gate + else diff_context.new_func + ), + new_block=( + set(changed_clone_gate.new_block) + if changed_clone_gate + else diff_context.new_block + ), + metrics_diff=diff_context.metrics_diff, + html_report_path=html_report_path, + clone_threshold_total=( + changed_clone_gate.total_clone_groups if changed_clone_gate else None + ), + ) + + notice_new_clones_count = ( + len(changed_clone_gate.new_func) + len(changed_clone_gate.new_block) + if changed_clone_gate is not None + else diff_context.new_clones_count + ) + warn_new_clones_without_fail( + args=args, + notice_new_clones_count=notice_new_clones_count, + console=_console(), + ) + print_pipeline_done_if_needed(args=args, run_started_at=run_started_at) + + +def main() -> None: + try: + _main_impl() + except SystemExit: + raise + except Exception as exc: + _console().print( + ui.fmt_internal_error( + exc, + issues_url=ISSUES_URL, + debug=_is_debug_enabled(), + ) + ) + raise SystemExit(ExitCode.INTERNAL_ERROR) from exc diff --git a/codeclone/surfaces/mcp/__init__.py b/codeclone/surfaces/mcp/__init__.py index 2d2fb1f..557317f 100644 --- a/codeclone/surfaces/mcp/__init__.py +++ b/codeclone/surfaces/mcp/__init__.py @@ -2,57 +2,3 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 - -from .service import CodeCloneMCPService -from .session import ( - DEFAULT_MCP_HISTORY_LIMIT, - MAX_MCP_HISTORY_LIMIT, - MCPAnalysisRequest, - MCPFindingNotFoundError, - MCPGateRequest, - MCPGitDiffError, - MCPRunNotFoundError, - MCPRunRecord, - MCPServiceContractError, - MCPServiceError, - _base_short_finding_id_payload, - _BufferConsole, - _clone_short_id_entry_payload, - _CloneShortIdEntry, - _disambiguated_clone_short_ids_payload, - _disambiguated_short_finding_id_payload, - _git_diff_lines_payload, - _json_text_payload, - _leaf_symbol_name_payload, - _load_report_document_payload, - _partitioned_short_id, - _suggestion_finding_id_payload, - _validated_history_limit, -) - -__all__ = [ - "DEFAULT_MCP_HISTORY_LIMIT", - "MAX_MCP_HISTORY_LIMIT", - "CodeCloneMCPService", - "MCPAnalysisRequest", - "MCPFindingNotFoundError", - "MCPGateRequest", - "MCPGitDiffError", - "MCPRunNotFoundError", - "MCPRunRecord", - "MCPServiceContractError", - "MCPServiceError", - "_BufferConsole", - "_CloneShortIdEntry", - "_base_short_finding_id_payload", - "_clone_short_id_entry_payload", - "_disambiguated_clone_short_ids_payload", - "_disambiguated_short_finding_id_payload", - "_git_diff_lines_payload", - "_json_text_payload", - "_leaf_symbol_name_payload", - "_load_report_document_payload", - "_partitioned_short_id", - "_suggestion_finding_id_payload", - "_validated_history_limit", -] diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py index d1f16ca..4a9c0da 100644 --- a/codeclone/surfaces/mcp/service.py +++ b/codeclone/surfaces/mcp/service.py @@ -6,7 +6,6 @@ from __future__ import annotations import inspect -from collections.abc import Mapping from typing import Any, cast from .session import ( @@ -15,110 +14,110 @@ MCPGateRequest, MCPSession, ) -from .tools import MCP_TOOLS_BY_NAME -from .tools._base import MCPTool +from .tools._base import run_kw -class CodeCloneMCPService: +class CodeCloneMCPService(MCPSession): def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: - self.session = MCPSession(history_limit=history_limit) - self._tools: Mapping[str, MCPTool] = MCP_TOOLS_BY_NAME - - def __getattr__(self, name: str) -> Any: - return getattr(self.session, name) - - def _dispatch(self, name: str, **params: object) -> object: - return self._tools[name].run(self.session, params) + super().__init__(history_limit=history_limit) + # Keep a stable seam for tests and monkeypatch-based callers while the + # service itself now owns the real MCP session state. + self.session = self + + def _run_session_method( + self, + name: str, + /, + *args: object, + **kwargs: object, + ) -> object: + method = cast("Any", getattr(MCPSession, name)) + return method(self, *args, **kwargs) + + def _session_bound_method(self, name: str) -> object: + return cast("Any", getattr(MCPSession, name)).__get__(self, MCPSession) + + def _run_dict(self, name: str, **params: object) -> dict[str, object]: + bound = self._session_bound_method(name) + return cast("dict[str, object]", run_kw(bound, params)) def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: return cast( "dict[str, object]", - self._dispatch("analyze_repository", request=request), + self._run_session_method("analyze_repository", request), ) def analyze_changed_paths(self, request: MCPAnalysisRequest) -> dict[str, object]: return cast( "dict[str, object]", - self._dispatch("analyze_changed_paths", request=request), + self._run_session_method("analyze_changed_paths", request), ) def get_run_summary(self, run_id: str | None = None) -> dict[str, object]: return cast( "dict[str, object]", - self._dispatch("get_run_summary", run_id=run_id), + self._run_session_method("get_run_summary", run_id), ) def compare_runs(self, **params: object) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("compare_runs", **params)) + return self._run_dict("compare_runs", **params) def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: return cast( "dict[str, object]", - self._dispatch("evaluate_gates", request=request), + self._run_session_method("evaluate_gates", request), ) def get_report_section(self, **params: object) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("get_report_section", **params)) + return self._run_dict("get_report_section", **params) def list_findings(self, **params: object) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("list_findings", **params)) + return self._run_dict("list_findings", **params) def get_finding(self, **params: object) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("get_finding", **params)) + return self._run_dict("get_finding", **params) def get_remediation(self, **params: object) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("get_remediation", **params)) + return self._run_dict("get_remediation", **params) def list_hotspots(self, **params: object) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("list_hotspots", **params)) + return self._run_dict("list_hotspots", **params) def get_production_triage(self, **params: object) -> dict[str, object]: - return cast( - "dict[str, object]", - self._dispatch("get_production_triage", **params), - ) + return self._run_dict("get_production_triage", **params) def get_help(self, **params: object) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("help", **params)) + return self._run_dict("get_help", **params) def generate_pr_summary(self, **params: object) -> dict[str, object]: - return cast( - "dict[str, object]", - self._dispatch("generate_pr_summary", **params), - ) + return self._run_dict("generate_pr_summary", **params) def mark_finding_reviewed(self, **params: object) -> dict[str, object]: - return cast( - "dict[str, object]", - self._dispatch("mark_finding_reviewed", **params), - ) + return self._run_dict("mark_finding_reviewed", **params) def list_reviewed_findings(self, **params: object) -> dict[str, object]: - return cast( - "dict[str, object]", - self._dispatch("list_reviewed_findings", **params), - ) + return self._run_dict("list_reviewed_findings", **params) def clear_session_runs(self) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("clear_session_runs")) + return cast("dict[str, object]", self._run_session_method("clear_session_runs")) def check_complexity(self, **params: object) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("check_complexity", **params)) + return self._run_dict("check_complexity", **params) def check_clones(self, **params: object) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("check_clones", **params)) + return self._run_dict("check_clones", **params) def check_coupling(self, **params: object) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("check_coupling", **params)) + return self._run_dict("check_coupling", **params) def check_cohesion(self, **params: object) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("check_cohesion", **params)) + return self._run_dict("check_cohesion", **params) def check_dead_code(self, **params: object) -> dict[str, object]: - return cast("dict[str, object]", self._dispatch("check_dead_code", **params)) + return self._run_dict("check_dead_code", **params) def read_resource(self, uri: str) -> str: - return self.session.read_resource(uri) + return cast("str", self._run_session_method("read_resource", uri)) _EMPTY = inspect.Signature.empty diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index fd29abf..55af87b 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -21,8 +21,13 @@ from ... import __version__ from ...baseline import Baseline -from ...cache import Cache, CacheStatus -from ...config import ( +from ...cache.store import Cache +from ...cache.versioning import CacheStatus +from ...config.pyproject_loader import ( + ConfigValidationError, + load_pyproject_config, +) +from ...config.spec import ( DEFAULT_BASELINE_PATH, DEFAULT_BLOCK_MIN_LOC, DEFAULT_BLOCK_MIN_STMT, @@ -32,8 +37,6 @@ DEFAULT_MIN_STMT, DEFAULT_SEGMENT_MIN_LOC, DEFAULT_SEGMENT_MIN_STMT, - ConfigValidationError, - load_pyproject_config, ) from ...contracts import ( DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, @@ -42,14 +45,12 @@ DOCS_URL, REPORT_SCHEMA_VERSION, ) -from ...core import ( - OutputPaths, - analyze, - bootstrap, - discover, - process, - report, -) +from ...core._types import OutputPaths +from ...core.bootstrap import bootstrap +from ...core.discovery import discover +from ...core.parallelism import process +from ...core.pipeline import analyze +from ...core.reporting import report from ...domain.findings import ( CATEGORY_CLONE, CATEGORY_COHESION, diff --git a/pyproject.toml b/pyproject.toml index c38c1ff..d2c6248 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,6 +73,7 @@ dev = [ "mypy>=1.20.1", "ruff>=0.15.10", "pre-commit>=4.5.1", + "codegraphcontext>=0.4.2" ] [project.scripts] diff --git a/tests/test_cache.py b/tests/test_cache.py index 069dece..eafd041 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -14,20 +14,65 @@ import pytest -import codeclone.cache as cache_mod -from codeclone.blocks import BlockUnit, SegmentUnit -from codeclone.cache import Cache, CacheStatus +import codeclone.cache.store as cache_store +from codeclone.cache._canonicalize import ( + _as_module_api_surface_dict, + _as_module_docstring_coverage_dict, + _as_module_typing_coverage_dict, + _canonicalize_cache_entry, + _has_cache_entry_container_shape, +) +from codeclone.cache._validators import ( + _is_api_param_spec_dict, + _is_class_metrics_dict, + _is_dead_candidate_dict, + _is_module_dep_dict, + _is_public_symbol_dict, +) +from codeclone.cache._wire_decode import ( + _decode_optional_wire_api_surface, + _decode_optional_wire_module_ints, + _decode_optional_wire_source_stats, + _decode_wire_api_param_spec, + _decode_wire_api_surface_symbol, + _decode_wire_block, + _decode_wire_class_metric, + _decode_wire_dead_candidate, + _decode_wire_file_entry, + _decode_wire_file_sections, + _decode_wire_module_dep, + _decode_wire_name_sections, + _decode_wire_segment, + _decode_wire_unit, +) +from codeclone.cache._wire_encode import _encode_wire_file_entry +from codeclone.cache._wire_helpers import ( + _decode_optional_wire_coupled_classes, + _decode_wire_int_fields, + _decode_wire_qualname_span_size, +) +from codeclone.cache.entries import ( + CacheEntry, + _block_dict_from_model, + _segment_dict_from_model, + _unit_dict_from_model, +) +from codeclone.cache.integrity import as_str_dict as _as_str_dict from codeclone.cache.integrity import sign_cache_payload from codeclone.cache.projection import ( runtime_filepath_from_wire, wire_filepath_from_runtime, ) +from codeclone.cache.store import Cache, file_stat_signature +from codeclone.cache.versioning import CacheStatus, _as_analysis_profile, _resolve_root from codeclone.contracts.errors import CacheError from codeclone.models import ( ApiParamSpec, + BlockUnit, FileMetrics, ModuleApiSurface, PublicSymbol, + SegmentUnit, Unit, ) @@ -231,7 +276,7 @@ def test_cache_load_normalizes_stale_structural_findings(tmp_path: Path) -> None ) payload = _analysis_payload( cache, - files={"x.py": cache_mod._encode_wire_file_entry(entry)}, + files={"x.py": _encode_wire_file_entry(entry)}, ) signature = sign_cache_payload(payload) cache_path.write_text( @@ -292,7 +337,7 @@ def test_store_canonical_file_entry_marks_dirty_only_when_entry_changes( cache = Cache(tmp_path / "cache.json") canonical_entry = cast( Any, - cache_mod._canonicalize_cache_entry( + _canonicalize_cache_entry( { "stat": {"mtime_ns": 1, "size": 1}, "units": [], @@ -327,11 +372,11 @@ def test_store_canonical_file_entry_marks_dirty_only_when_entry_changes( def test_cache_helper_type_guards_and_wire_api_decoders_cover_invalid_inputs() -> None: - assert cache_mod._as_module_typing_coverage_dict({"module": "pkg"}) is None - assert cache_mod._as_module_docstring_coverage_dict({"module": "pkg"}) is None - assert cache_mod._as_module_api_surface_dict({"module": "pkg"}) is None + assert _as_module_typing_coverage_dict({"module": "pkg"}) is None + assert _as_module_docstring_coverage_dict({"module": "pkg"}) is None + assert _as_module_api_surface_dict({"module": "pkg"}) is None assert ( - cache_mod._has_cache_entry_container_shape( + _has_cache_entry_container_shape( { "stat": {"mtime_ns": 1, "size": 1}, "units": [], @@ -343,7 +388,7 @@ def test_cache_helper_type_guards_and_wire_api_decoders_cover_invalid_inputs() - is False ) assert ( - cache_mod._has_cache_entry_container_shape( + _has_cache_entry_container_shape( { "stat": {"mtime_ns": 1, "size": 1}, "units": [], @@ -355,7 +400,7 @@ def test_cache_helper_type_guards_and_wire_api_decoders_cover_invalid_inputs() - is False ) assert ( - cache_mod._has_cache_entry_container_shape( + _has_cache_entry_container_shape( { "stat": {"mtime_ns": 1, "size": 1}, "units": [], @@ -367,14 +412,14 @@ def test_cache_helper_type_guards_and_wire_api_decoders_cover_invalid_inputs() - is False ) assert ( - cache_mod._decode_optional_wire_api_surface( + _decode_optional_wire_api_surface( obj={"as": ["pkg.mod", ["run"], [None]]}, filepath="pkg/mod.py", ) is None ) assert ( - cache_mod._decode_optional_wire_module_ints( + _decode_optional_wire_module_ints( obj={"tc": ["pkg.mod", "bad"]}, key="tc", expected_len=2, @@ -382,18 +427,18 @@ def test_cache_helper_type_guards_and_wire_api_decoders_cover_invalid_inputs() - ) is None ) - assert cache_mod._decode_wire_api_surface_symbol(["pkg.mod:run"]) is None + assert _decode_wire_api_surface_symbol(["pkg.mod:run"]) is None assert ( - cache_mod._decode_wire_api_surface_symbol( + _decode_wire_api_surface_symbol( ["pkg.mod:run", "function", 1, 2, "name", "", [None]] ) is None ) - assert cache_mod._decode_wire_api_param_spec(["value"]) is None - assert cache_mod._is_api_param_spec_dict([]) is False - assert cache_mod._is_public_symbol_dict([]) is False + assert _decode_wire_api_param_spec(["value"]) is None + assert _is_api_param_spec_dict([]) is False + assert _is_public_symbol_dict([]) is False assert ( - cache_mod._is_public_symbol_dict( + _is_public_symbol_dict( { "qualname": "pkg.mod:run", "kind": "function", @@ -472,14 +517,12 @@ def test_cache_signature_validation_ignores_json_whitespace(tmp_path: Path) -> N def test_decode_wire_file_and_name_section_helpers_cover_valid_and_invalid() -> None: - encoded = cache_mod._encode_wire_file_entry( + encoded = _encode_wire_file_entry( { "stat": {"mtime_ns": 1, "size": 10}, - "units": [cache_mod._unit_dict_from_model(_make_unit("x.py"), "x.py")], - "blocks": [cache_mod._block_dict_from_model(_make_block("x.py"), "x.py")], - "segments": [ - cache_mod._segment_dict_from_model(_make_segment("x.py"), "x.py") - ], + "units": [_unit_dict_from_model(_make_unit("x.py"), "x.py")], + "blocks": [_block_dict_from_model(_make_block("x.py"), "x.py")], + "segments": [_segment_dict_from_model(_make_segment("x.py"), "x.py")], "class_metrics": [], "module_deps": [], "dead_candidates": [], @@ -491,7 +534,7 @@ def test_decode_wire_file_and_name_section_helpers_cover_valid_and_invalid() -> ) assert isinstance(encoded, dict) - file_sections = cache_mod._decode_wire_file_sections(obj=encoded, filepath="x.py") + file_sections = _decode_wire_file_sections(obj=encoded, filepath="x.py") assert file_sections is not None units, blocks, segments, class_metrics, module_deps, dead_candidates = file_sections assert units[0]["qualname"] == "mod:func" @@ -501,7 +544,7 @@ def test_decode_wire_file_and_name_section_helpers_cover_valid_and_invalid() -> assert module_deps == [] assert dead_candidates == [] - name_sections = cache_mod._decode_wire_name_sections(obj=encoded) + name_sections = _decode_wire_name_sections(obj=encoded) assert name_sections == ( ["used"], ["pkg.mod:used"], @@ -512,7 +555,7 @@ def test_decode_wire_file_and_name_section_helpers_cover_valid_and_invalid() -> invalid_sections = dict(encoded) invalid_sections["u"] = "bad" assert ( - cache_mod._decode_wire_file_sections( + _decode_wire_file_sections( obj=invalid_sections, filepath="x.py", ) @@ -521,7 +564,7 @@ def test_decode_wire_file_and_name_section_helpers_cover_valid_and_invalid() -> invalid_names = dict(encoded) invalid_names["rn"] = 1 - assert cache_mod._decode_wire_name_sections(obj=invalid_names) is None + assert _decode_wire_name_sections(obj=invalid_names) is None def test_cache_signature_mismatch_warns(tmp_path: Path) -> None: @@ -585,7 +628,7 @@ def test_cache_v_field_version_mismatch_warns(tmp_path: Path, version: str) -> N def test_cache_too_large_warns(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: cache_path = tmp_path / "cache.json" cache_path.write_text(json.dumps({"version": Cache._CACHE_VERSION, "files": {}})) - monkeypatch.setattr(cache_mod, "MAX_CACHE_SIZE_BYTES", 1) + monkeypatch.setattr(cache_store, "MAX_CACHE_SIZE_BYTES", 1) cache = Cache(cache_path) cache.load() assert cache.load_warning is not None @@ -822,7 +865,7 @@ def test_cache_entry_not_dict(tmp_path: Path) -> None: def test_file_stat_signature(tmp_path: Path) -> None: file_path = tmp_path / "x.py" file_path.write_text("print('x')\n", "utf-8") - stat = cache_mod.file_stat_signature(str(file_path)) + stat = file_stat_signature(str(file_path)) assert stat["size"] == file_path.stat().st_size assert isinstance(stat["mtime_ns"], int) @@ -1266,7 +1309,7 @@ def _resolve_with_error(self: Path, *, strict: bool = False) -> Path: def test_as_str_dict_rejects_non_string_keys() -> None: - assert cache_mod._as_str_dict({1: "x"}) is None + assert _as_str_dict({1: "x"}) is None @pytest.mark.parametrize( @@ -1285,31 +1328,29 @@ def test_as_str_dict_rejects_non_string_keys() -> None: ], ) def test_decode_wire_file_entry_invalid_variants(entry: object, filepath: str) -> None: - assert cache_mod._decode_wire_file_entry(entry, filepath) is None + assert _decode_wire_file_entry(entry, filepath) is None def test_decode_wire_item_type_failures() -> None: - assert cache_mod._decode_wire_unit(["q", 1, 2, 3, 4, "fp"], "x.py") is None - assert cache_mod._decode_wire_unit(["q", 1, 2, 3, 4, "fp", "0-19"], "x.py") is None - assert ( - cache_mod._decode_wire_unit(["q", "1", 2, 3, 4, "fp", "0-19"], "x.py") is None - ) - assert cache_mod._decode_wire_block(["q", 1, 2, 3], "x.py") is None - assert cache_mod._decode_wire_block(["q", 1, 2, "4", "hash"], "x.py") is None - assert cache_mod._decode_wire_segment(["q", 1, 2, 3, "h"], "x.py") is None - assert cache_mod._decode_wire_segment(["q", 1, 2, "3", "h", "sig"], "x.py") is None + assert _decode_wire_unit(["q", 1, 2, 3, 4, "fp"], "x.py") is None + assert _decode_wire_unit(["q", 1, 2, 3, 4, "fp", "0-19"], "x.py") is None + assert _decode_wire_unit(["q", "1", 2, 3, 4, "fp", "0-19"], "x.py") is None + assert _decode_wire_block(["q", 1, 2, 3], "x.py") is None + assert _decode_wire_block(["q", 1, 2, "4", "hash"], "x.py") is None + assert _decode_wire_segment(["q", 1, 2, 3, "h"], "x.py") is None + assert _decode_wire_segment(["q", 1, 2, "3", "h", "sig"], "x.py") is None def test_decode_wire_item_rejects_invalid_risk_fields() -> None: assert ( - cache_mod._decode_wire_unit( + _decode_wire_unit( ["q", 1, 2, 3, 4, "fp", "0-19", 2, 1, "critical", "raw"], "x.py", ) is None ) assert ( - cache_mod._decode_wire_class_metric( + _decode_wire_class_metric( ["pkg.mod:Service", 1, 10, 3, 2, 4, 1, 7, 8], "x.py", ) @@ -1328,7 +1369,7 @@ def _resolve_with_error(self: Path, *, strict: bool = False) -> Path: return original_resolve(self, strict=strict) monkeypatch.setattr(Path, "resolve", _resolve_with_error) - assert cache_mod._resolve_root(tmp_path) is None + assert _resolve_root(tmp_path) is None def test_cache_entry_rejects_invalid_metrics_sections(tmp_path: Path) -> None: @@ -1352,46 +1393,36 @@ def test_cache_entry_rejects_invalid_metrics_sections(tmp_path: Path) -> None: def test_decode_wire_file_entry_rejects_metrics_related_invalid_sections() -> None: + assert _decode_wire_file_entry({"st": [1, 2], "cm": "bad"}, "x.py") is None assert ( - cache_mod._decode_wire_file_entry({"st": [1, 2], "cm": "bad"}, "x.py") is None - ) - assert ( - cache_mod._decode_wire_file_entry( + _decode_wire_file_entry( {"st": [1, 2], "cm": [["Q", 1, 2, 3, 4, 5, 6, "low"]]}, "x.py", ) is None ) + assert _decode_wire_file_entry({"st": [1, 2], "md": "bad"}, "x.py") is None assert ( - cache_mod._decode_wire_file_entry({"st": [1, 2], "md": "bad"}, "x.py") is None - ) - assert ( - cache_mod._decode_wire_file_entry( + _decode_wire_file_entry( {"st": [1, 2], "md": [["source", "target", "import"]]}, "x.py", ) is None ) - assert ( - cache_mod._decode_wire_file_entry({"st": [1, 2], "dc": "bad"}, "x.py") is None - ) - decoded = cache_mod._decode_wire_file_entry( + assert _decode_wire_file_entry({"st": [1, 2], "dc": "bad"}, "x.py") is None + decoded = _decode_wire_file_entry( {"st": [1, 2], "dc": [["q", "n", 1, 2, "function"]]}, "x.py", ) assert decoded is not None assert decoded["dead_candidates"][0]["filepath"] == "x.py" - assert cache_mod._decode_wire_file_entry({"st": [1, 2], "rn": [1]}, "x.py") is None - assert cache_mod._decode_wire_file_entry({"st": [1, 2], "in": [1]}, "x.py") is None - assert cache_mod._decode_wire_file_entry({"st": [1, 2], "cn": [1]}, "x.py") is None - assert ( - cache_mod._decode_wire_file_entry({"st": [1, 2], "cc": "bad"}, "x.py") is None - ) + assert _decode_wire_file_entry({"st": [1, 2], "rn": [1]}, "x.py") is None + assert _decode_wire_file_entry({"st": [1, 2], "in": [1]}, "x.py") is None + assert _decode_wire_file_entry({"st": [1, 2], "cn": [1]}, "x.py") is None + assert _decode_wire_file_entry({"st": [1, 2], "cc": "bad"}, "x.py") is None + assert _decode_wire_file_entry({"st": [1, 2], "cc": [["Q"]]}, "x.py") is None assert ( - cache_mod._decode_wire_file_entry({"st": [1, 2], "cc": [["Q"]]}, "x.py") is None - ) - assert ( - cache_mod._decode_wire_file_entry( + _decode_wire_file_entry( {"st": [1, 2], "cc": [["Q", ["A", 1]]]}, "x.py", ) @@ -1400,7 +1431,7 @@ def test_decode_wire_file_entry_rejects_metrics_related_invalid_sections() -> No def test_decode_wire_file_entry_accepts_metrics_sections() -> None: - decoded = cache_mod._decode_wire_file_entry( + decoded = _decode_wire_file_entry( { "st": [1, 2], "cm": [["pkg.mod:Service", 1, 10, 3, 2, 4, 1, "low", "medium"]], @@ -1423,7 +1454,7 @@ def test_decode_wire_file_entry_accepts_metrics_sections() -> None: def test_decode_wire_file_entry_optional_source_stats() -> None: - decoded = cache_mod._decode_wire_file_entry( + decoded = _decode_wire_file_entry( {"st": [1, 2], "ss": [10, 3, 1, 1]}, "x.py", ) @@ -1435,20 +1466,16 @@ def test_decode_wire_file_entry_optional_source_stats() -> None: "classes": 1, } - assert cache_mod._decode_optional_wire_source_stats(obj={"ss": "bad"}) is None - assert cache_mod._decode_optional_wire_source_stats(obj={"ss": [1, 2, 3]}) is None - assert ( - cache_mod._decode_optional_wire_source_stats(obj={"ss": [1, 2, -1, 0]}) is None - ) + assert _decode_optional_wire_source_stats(obj={"ss": "bad"}) is None + assert _decode_optional_wire_source_stats(obj={"ss": [1, 2, 3]}) is None + assert _decode_optional_wire_source_stats(obj={"ss": [1, 2, -1, 0]}) is None def test_cache_helpers_cover_invalid_analysis_profile_and_source_stats_shapes() -> None: + assert _decode_wire_qualname_span_size(["pkg.mod:fn", 1, 2, "bad"]) is None + assert _decode_wire_qualname_span_size([None, 1, 2, 4]) is None assert ( - cache_mod._decode_wire_qualname_span_size(["pkg.mod:fn", 1, 2, "bad"]) is None - ) - assert cache_mod._decode_wire_qualname_span_size([None, 1, 2, 4]) is None - assert ( - cache_mod._as_analysis_profile( + _as_analysis_profile( { "min_loc": 1, "min_stmt": 1, @@ -1460,16 +1487,13 @@ def test_cache_helpers_cover_invalid_analysis_profile_and_source_stats_shapes() ) is None ) - assert ( - cache_mod._decode_optional_wire_source_stats(obj={"ss": [1, 2, "bad", 0]}) - is None - ) + assert _decode_optional_wire_source_stats(obj={"ss": [1, 2, "bad", 0]}) is None def test_canonicalize_cache_entry_skips_invalid_dead_candidate_suppression_shape() -> ( None ): - normalized = cache_mod._canonicalize_cache_entry( + normalized = _canonicalize_cache_entry( cast( Any, { @@ -1511,7 +1535,7 @@ def test_canonicalize_cache_entry_skips_invalid_dead_candidate_suppression_shape def test_decode_optional_wire_coupled_classes_rejects_non_string_qualname() -> None: assert ( - cache_mod._decode_optional_wire_coupled_classes( + _decode_optional_wire_coupled_classes( obj={"cc": [[1, ["A"]]]}, key="cc", ) @@ -1520,7 +1544,7 @@ def test_decode_optional_wire_coupled_classes_rejects_non_string_qualname() -> N def test_decode_wire_file_entry_skips_empty_coupled_classes_mapping() -> None: - decoded = cache_mod._decode_wire_file_entry( + decoded = _decode_wire_file_entry( { "st": [1, 2], "cm": [["pkg.mod:Service", 1, 10, 3, 2, 4, 1, "low", "medium"]], @@ -1533,46 +1557,46 @@ def test_decode_wire_file_entry_skips_empty_coupled_classes_mapping() -> None: def test_decode_wire_metrics_items_and_deps_roundtrip_shape() -> None: - class_metric = cache_mod._decode_wire_class_metric( + class_metric = _decode_wire_class_metric( ["pkg.mod:Service", 1, 10, 3, 2, 4, 1, "low", "medium"], "x.py", ) assert class_metric is not None assert class_metric["filepath"] == "x.py" assert ( - cache_mod._decode_wire_class_metric( + _decode_wire_class_metric( ["pkg.mod:Service", "1", 10, 3, 2, 4, 1, "low", "medium"], "x.py", ) is None ) - module_dep = cache_mod._decode_wire_module_dep(["a", "b", "import", 1]) + module_dep = _decode_wire_module_dep(["a", "b", "import", 1]) assert module_dep is not None assert module_dep["source"] == "a" - assert cache_mod._decode_wire_module_dep(["a", "b", "import", "1"]) is None + assert _decode_wire_module_dep(["a", "b", "import", "1"]) is None - dead_candidate = cache_mod._decode_wire_dead_candidate( + dead_candidate = _decode_wire_dead_candidate( ["pkg.mod:unused", "unused", 1, 2, "function"], "fallback.py", ) assert dead_candidate is not None assert dead_candidate["filepath"] == "fallback.py" assert ( - cache_mod._decode_wire_dead_candidate( + _decode_wire_dead_candidate( ["pkg.mod:unused", "unused", "1", 2, "function"], "fallback.py", ) is None ) assert ( - cache_mod._decode_wire_dead_candidate( + _decode_wire_dead_candidate( ["pkg.mod:unused", "unused", 1, 2, "function", "legacy.py"], "fallback.py", ) is None ) - dead_candidate_with_suppression = cache_mod._decode_wire_dead_candidate( + dead_candidate_with_suppression = _decode_wire_dead_candidate( ["pkg.mod:unused", "unused", 1, 2, "function", ["dead-code", "dead-code"]], "fallback.py", ) @@ -1581,7 +1605,7 @@ def test_decode_wire_metrics_items_and_deps_roundtrip_shape() -> None: def test_encode_wire_file_entry_includes_optional_metrics_sections() -> None: - entry: cache_mod.CacheEntry = { + entry: CacheEntry = { "stat": {"mtime_ns": 1, "size": 2}, "units": [], "blocks": [], @@ -1609,7 +1633,7 @@ def test_encode_wire_file_entry_includes_optional_metrics_sections() -> None: "import_names": ["z", "a"], "class_names": ["B", "A"], } - wire = cache_mod._encode_wire_file_entry(entry) + wire = _encode_wire_file_entry(entry) assert "cm" in wire assert "cc" in wire assert "md" in wire @@ -1619,7 +1643,7 @@ def test_encode_wire_file_entry_includes_optional_metrics_sections() -> None: def test_encode_wire_file_entry_compacts_dead_candidate_filepaths() -> None: - entry: cache_mod.CacheEntry = { + entry: CacheEntry = { "stat": {"mtime_ns": 1, "size": 2}, "units": [], "blocks": [], @@ -1640,12 +1664,12 @@ def test_encode_wire_file_entry_compacts_dead_candidate_filepaths() -> None: "import_names": [], "class_names": [], } - wire = cache_mod._encode_wire_file_entry(entry) + wire = _encode_wire_file_entry(entry) assert wire["dc"] == [["pkg.mod:unused", "unused", 3, 4, "function"]] def test_encode_wire_file_entry_encodes_dead_candidate_suppressions() -> None: - entry: cache_mod.CacheEntry = { + entry: CacheEntry = { "stat": {"mtime_ns": 1, "size": 2}, "units": [], "blocks": [], @@ -1667,12 +1691,12 @@ def test_encode_wire_file_entry_encodes_dead_candidate_suppressions() -> None: "import_names": [], "class_names": [], } - wire = cache_mod._encode_wire_file_entry(entry) + wire = _encode_wire_file_entry(entry) assert wire["dc"] == [["pkg.mod:unused", "unused", 3, 4, "function", ["dead-code"]]] def test_encode_wire_file_entry_skips_empty_or_invalid_coupled_classes() -> None: - entry: cache_mod.CacheEntry = { + entry: CacheEntry = { "stat": {"mtime_ns": 1, "size": 2}, "units": [], "blocks": [], @@ -1711,7 +1735,7 @@ def test_encode_wire_file_entry_skips_empty_or_invalid_coupled_classes() -> None "import_names": [], "class_names": [], } - wire = cache_mod._encode_wire_file_entry(entry) + wire = _encode_wire_file_entry(entry) assert "cc" not in wire @@ -1772,7 +1796,7 @@ def test_get_file_entry_sorts_coupled_classes_in_runtime_payload( def test_cache_entry_container_shape_rejects_invalid_source_stats() -> None: assert ( - cache_mod._has_cache_entry_container_shape( + _has_cache_entry_container_shape( { "stat": {"mtime_ns": 1, "size": 1}, "source_stats": { @@ -1791,11 +1815,11 @@ def test_cache_entry_container_shape_rejects_invalid_source_stats() -> None: def test_cache_type_predicates_reject_non_dict_variants() -> None: - assert cache_mod._is_class_metrics_dict([]) is False - assert cache_mod._is_module_dep_dict([]) is False - assert cache_mod._is_dead_candidate_dict([]) is False + assert _is_class_metrics_dict([]) is False + assert _is_module_dep_dict([]) is False + assert _is_dead_candidate_dict([]) is False assert ( - cache_mod._is_dead_candidate_dict( + _is_dead_candidate_dict( { "qualname": "pkg.mod:broken", "local_name": "broken", @@ -1807,7 +1831,7 @@ def test_cache_type_predicates_reject_non_dict_variants() -> None: is False ) assert ( - cache_mod._is_dead_candidate_dict( + _is_dead_candidate_dict( { "qualname": "pkg.mod:unused", "local_name": "unused", @@ -1821,7 +1845,7 @@ def test_cache_type_predicates_reject_non_dict_variants() -> None: is True ) assert ( - cache_mod._is_dead_candidate_dict( + _is_dead_candidate_dict( { "qualname": "pkg.mod:unused", "local_name": "unused", @@ -1835,7 +1859,7 @@ def test_cache_type_predicates_reject_non_dict_variants() -> None: is False ) assert ( - cache_mod._is_class_metrics_dict( + _is_class_metrics_dict( { "qualname": "pkg.mod:Service", "filepath": "x.py", @@ -1852,7 +1876,7 @@ def test_cache_type_predicates_reject_non_dict_variants() -> None: is True ) assert ( - cache_mod._is_class_metrics_dict( + _is_class_metrics_dict( { "qualname": "pkg.mod:Service", "filepath": "x.py", @@ -1870,7 +1894,7 @@ def test_cache_type_predicates_reject_non_dict_variants() -> None: is True ) assert ( - cache_mod._is_class_metrics_dict( + _is_class_metrics_dict( { "qualname": "pkg.mod:Service", "filepath": "x.py", @@ -1887,9 +1911,9 @@ def test_cache_type_predicates_reject_non_dict_variants() -> None: ) is False ) - assert cache_mod._is_class_metrics_dict({"qualname": "pkg.mod:Service"}) is False + assert _is_class_metrics_dict({"qualname": "pkg.mod:Service"}) is False assert ( - cache_mod._is_module_dep_dict( + _is_module_dep_dict( { "source": "a", "target": "b", @@ -1902,12 +1926,12 @@ def test_cache_type_predicates_reject_non_dict_variants() -> None: def test_decode_wire_int_fields_rejects_non_int_values() -> None: - assert cache_mod._decode_wire_int_fields(["x", "nope"], 1) is None + assert _decode_wire_int_fields(["x", "nope"], 1) is None def test_decode_wire_block_rejects_missing_block_hash() -> None: assert ( - cache_mod._decode_wire_block( + _decode_wire_block( ["pkg.mod:func", 10, 12, 4, None], "pkg/mod.py", ) @@ -1917,7 +1941,7 @@ def test_decode_wire_block_rejects_missing_block_hash() -> None: def test_decode_wire_segment_rejects_missing_segment_signature() -> None: assert ( - cache_mod._decode_wire_segment( + _decode_wire_segment( ["pkg.mod:func", 10, 12, 4, "seg-hash", None], "pkg/mod.py", ) @@ -1926,4 +1950,4 @@ def test_decode_wire_segment_rejects_missing_segment_signature() -> None: def test_decode_wire_dead_candidate_rejects_invalid_rows() -> None: - assert cache_mod._decode_wire_dead_candidate(object(), "pkg/mod.py") is None + assert _decode_wire_dead_candidate(object(), "pkg/mod.py") is None diff --git a/tests/test_cli_inprocess.py b/tests/test_cli_inprocess.py index 4ae574d..c43795b 100644 --- a/tests/test_cli_inprocess.py +++ b/tests/test_cli_inprocess.py @@ -18,16 +18,15 @@ import codeclone.baseline as baseline import codeclone.baseline.trust as baseline_trust -import codeclone.core as pipeline import codeclone.core.discovery as core_discovery import codeclone.core.parallelism as core_parallelism import codeclone.core.pipeline as core_pipeline import codeclone.core.worker as core_worker -import codeclone.surfaces.cli.main as cli import codeclone.surfaces.cli.report_meta as cli_meta import codeclone.surfaces.cli.reports_output as cli_reports +import codeclone.surfaces.cli.workflow as cli from codeclone import __version__ -from codeclone.cache import Cache, file_stat_signature +from codeclone.cache.store import Cache, file_stat_signature from codeclone.contracts import ( BASELINE_FINGERPRINT_VERSION, BASELINE_SCHEMA_VERSION, @@ -35,6 +34,8 @@ REPORT_SCHEMA_VERSION, ) from codeclone.contracts.errors import CacheError +from codeclone.core._types import FileProcessResult as CliFileProcessResult +from codeclone.core.parallelism import _parallel_min_files from codeclone.models import Unit from codeclone.report.gates.reasons import parse_metric_reason_entry from tests._assertions import ( @@ -540,7 +541,7 @@ def _assert_worker_failure_internal_error( ) -> None: _write_default_source(tmp_path) - def _boom(*_args: object, **_kwargs: object) -> cli.ProcessingResult: + def _boom(*_args: object, **_kwargs: object) -> CliFileProcessResult: raise RuntimeError("boom") class _FailExec: @@ -692,8 +693,8 @@ def _prepare_single_source_cache(tmp_path: Path) -> tuple[Path, Path, Cache]: return src, cache_path, Cache(cache_path) -def _source_read_error_result(filepath: str) -> cli.ProcessingResult: - return cli.ProcessingResult( +def _source_read_error_result(filepath: str) -> CliFileProcessResult: + return CliFileProcessResult( filepath=filepath, success=False, error="Cannot read file: [Errno 13] Permission denied", @@ -1043,7 +1044,7 @@ def test_cli_main_progress_fallback( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - for idx in range(pipeline._parallel_min_files(2) + 1): + for idx in range(_parallel_min_files(2) + 1): src = tmp_path / f"a{idx}.py" src.write_text("def f():\n return 1\n", "utf-8") monkeypatch.setattr(core_parallelism, "ProcessPoolExecutor", _FailingExecutor) @@ -1057,7 +1058,7 @@ def test_cli_main_no_progress_fallback( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - for idx in range(pipeline._parallel_min_files(2) + 1): + for idx in range(_parallel_min_files(2) + 1): src = tmp_path / f"a{idx}.py" src.write_text("def f():\n return 1\n", "utf-8") monkeypatch.setattr(core_parallelism, "ProcessPoolExecutor", _FailingExecutor) @@ -3109,7 +3110,7 @@ def test_cli_unreadable_source_normal_mode_warns_and_continues( def _source_read_error( fp: str, *_args: object, **_kwargs: object - ) -> cli.ProcessingResult: + ) -> CliFileProcessResult: return _source_read_error_result(fp) monkeypatch.setattr(core_worker, "process_file", _source_read_error) @@ -3143,7 +3144,7 @@ def test_cli_unreadable_source_fails_in_ci_with_contract_error( def _source_read_error( fp: str, *_args: object, **_kwargs: object - ) -> cli.ProcessingResult: + ) -> CliFileProcessResult: return _source_read_error_result(fp) monkeypatch.setattr(core_worker, "process_file", _source_read_error) @@ -3202,7 +3203,7 @@ def test_cli_contract_error_priority_over_gating_failure_for_unreadable_source( def _source_read_error( fp: str, *_args: object, **_kwargs: object - ) -> cli.ProcessingResult: + ) -> CliFileProcessResult: return _source_read_error_result(fp) def _diff( @@ -3247,7 +3248,7 @@ def test_cli_unreadable_source_ci_shows_overflow_summary( def _source_read_error( fp: str, *_args: object, **_kwargs: object - ) -> cli.ProcessingResult: + ) -> CliFileProcessResult: return _source_read_error_result(fp) monkeypatch.setattr(core_worker, "process_file", _source_read_error) @@ -3705,8 +3706,8 @@ def test_cli_failed_files_report( def _bad_process( _fp: str, *_args: object, **_kwargs: object - ) -> cli.ProcessingResult: - return cli.ProcessingResult(filepath=_fp, success=False, error="bad") + ) -> CliFileProcessResult: + return CliFileProcessResult(filepath=_fp, success=False, error="bad") monkeypatch.setattr(core_worker, "process_file", _bad_process) _patch_parallel(monkeypatch) @@ -3726,8 +3727,8 @@ def test_cli_failed_files_report_single( def _bad_process( _fp: str, *_args: object, **_kwargs: object - ) -> cli.ProcessingResult: - return cli.ProcessingResult(filepath=_fp, success=False, error="bad") + ) -> CliFileProcessResult: + return CliFileProcessResult(filepath=_fp, success=False, error="bad") monkeypatch.setattr(core_worker, "process_file", _bad_process) _patch_parallel(monkeypatch) @@ -3745,7 +3746,7 @@ def test_cli_worker_failed( src = tmp_path / "a.py" src.write_text("def f():\n return 1\n", "utf-8") - def _boom(*_args: object, **_kwargs: object) -> cli.ProcessingResult: + def _boom(*_args: object, **_kwargs: object) -> CliFileProcessResult: raise RuntimeError("boom") monkeypatch.setattr(core_worker, "process_file", _boom) @@ -3977,7 +3978,7 @@ def test_cli_batch_result_none_no_progress( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - for idx in range(pipeline._parallel_min_files(2) + 1): + for idx in range(_parallel_min_files(2) + 1): src = tmp_path / f"a{idx}.py" src.write_text("def f():\n return 1\n", "utf-8") _patch_fixed_executor(monkeypatch, _FixedFuture(value=None)) @@ -3991,7 +3992,7 @@ def test_cli_batch_result_none_progress( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - for idx in range(pipeline._parallel_min_files(2) + 1): + for idx in range(_parallel_min_files(2) + 1): src = tmp_path / f"a{idx}.py" src.write_text("def f():\n return 1\n", "utf-8") _patch_dummy_progress(monkeypatch) @@ -4006,7 +4007,7 @@ def test_cli_failed_batch_item_no_progress( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - for idx in range(pipeline._parallel_min_files(2) + 1): + for idx in range(_parallel_min_files(2) + 1): src = tmp_path / f"a{idx}.py" src.write_text("def f():\n return 1\n", "utf-8") _patch_fixed_executor(monkeypatch, _FixedFuture(error=RuntimeError("boom"))) @@ -4020,7 +4021,7 @@ def test_cli_failed_batch_item_progress( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - for idx in range(pipeline._parallel_min_files(2) + 1): + for idx in range(_parallel_min_files(2) + 1): src = tmp_path / f"a{idx}.py" src.write_text("def f():\n return 1\n", "utf-8") _patch_dummy_progress(monkeypatch) diff --git a/tests/test_cli_unit.py b/tests/test_cli_unit.py index cc481f8..6bb25dc 100644 --- a/tests/test_cli_unit.py +++ b/tests/test_cli_unit.py @@ -19,20 +19,30 @@ import codeclone.baseline as baseline_mod import codeclone.baseline.metrics_baseline as metrics_baseline_mod -import codeclone.core as pipeline import codeclone.core.worker as core_worker import codeclone.surfaces.cli.baseline_state as cli_baselines_mod -import codeclone.surfaces.cli.main as cli +import codeclone.surfaces.cli.changed_scope as cli_changed_scope +import codeclone.surfaces.cli.console as cli_console import codeclone.surfaces.cli.report_meta as cli_meta_mod import codeclone.surfaces.cli.reports_output as cli_reports +import codeclone.surfaces.cli.runtime as cli_runtime import codeclone.surfaces.cli.summary as cli_summary +import codeclone.surfaces.cli.workflow as cli from codeclone import __version__ from codeclone import ui_messages as ui from codeclone.analysis.normalizer import NormalizationConfig -from codeclone.cache import Cache -from codeclone.config import ConfigValidationError, build_parser +from codeclone.cache.store import Cache +from codeclone.config.argparse_builder import build_parser +from codeclone.config.pyproject_loader import ConfigValidationError from codeclone.contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL from codeclone.contracts.errors import BaselineValidationError +from codeclone.core._types import ( + AnalysisResult, + BootstrapResult, + DiscoveryResult, + ProcessingResult, +) +from codeclone.core.reporting import GatingResult from codeclone.core.worker import process_file from codeclone.models import HealthScore, ProjectMetrics from tests._assertions import assert_contains_all @@ -230,7 +240,7 @@ def test_cli_help_text_consistency( def test_report_path_origins_distinguish_bare_and_explicit_flags() -> None: - assert cli._report_path_origins( + assert cli_reports._report_path_origins( ( "--html", "--json", @@ -249,7 +259,7 @@ def test_report_path_origins_distinguish_bare_and_explicit_flags() -> None: def test_report_path_origins_stops_at_double_dash() -> None: - assert cli._report_path_origins(("--json=out.json", "--", "--html")) == { + assert cli_reports._report_path_origins(("--json=out.json", "--", "--html")) == { "html": None, "json": "explicit", "md": None, @@ -260,7 +270,7 @@ def test_report_path_origins_stops_at_double_dash() -> None: def test_timestamped_report_path_appends_utc_slug() -> None: path = Path("/tmp/report.html") - assert cli._timestamped_report_path( + assert cli_reports._timestamped_report_path( path, report_generated_at_utc="2026-03-22T21:30:45Z", ) == Path("/tmp/report-20260322T213045Z.html") @@ -400,7 +410,7 @@ def test_validate_changed_scope_args_rejects_invalid_combinations( ) -> None: cli.console = cli._make_console(no_color=True) with pytest.raises(SystemExit) as exc: - cli._validate_changed_scope_args(args=args) + cli_changed_scope._validate_changed_scope_args(args=args) assert exc.value.code == 2 @@ -410,7 +420,7 @@ def test_validate_changed_scope_args_promotes_paths_from_git_diff() -> None: diff_against=None, paths_from_git_diff="HEAD~1", ) - assert cli._validate_changed_scope_args(args=args) == "HEAD~1" + assert cli_changed_scope._validate_changed_scope_args(args=args) == "HEAD~1" assert args.changed_only is True @@ -423,7 +433,7 @@ def test_normalize_changed_paths_relativizes_dedupes_and_sorts(tmp_path: Path) - first.write_text("pass\n", "utf-8") second.write_text("pass\n", "utf-8") - assert cli._normalize_changed_paths( + assert cli_changed_scope._normalize_changed_paths( root_path=root_path, paths=("pkg/b.py", str(second), " pkg/b.py ", ""), ) == ("pkg/a.py", "pkg/b.py") @@ -445,7 +455,11 @@ def _fake_relative_to(self: Path, *other: str | Path) -> Path: monkeypatch.setattr(Path, "relative_to", _fake_relative_to) assert ( - cli._normalize_changed_paths(root_path=root_path, paths=(str(candidate),)) == () + cli_changed_scope._normalize_changed_paths( + root_path=root_path, + paths=(str(candidate),), + ) + == () ) @@ -463,7 +477,10 @@ def _broken_resolve(self: Path, strict: bool = False) -> Path: monkeypatch.setattr(Path, "resolve", _broken_resolve) with pytest.raises(SystemExit) as exc: - cli._normalize_changed_paths(root_path=root_path, paths=("broken.py",)) + cli_changed_scope._normalize_changed_paths( + root_path=root_path, + paths=("broken.py",), + ) assert exc.value.code == 2 @@ -476,7 +493,10 @@ def test_normalize_changed_paths_rejects_outside_root(tmp_path: Path) -> None: outside_path.write_text("pass\n", "utf-8") with pytest.raises(SystemExit) as exc: - cli._normalize_changed_paths(root_path=root_path, paths=(str(outside_path),)) + cli_changed_scope._normalize_changed_paths( + root_path=root_path, + paths=(str(outside_path),), + ) assert exc.value.code == 2 @@ -498,7 +518,10 @@ def _run(*args: object, **kwargs: object) -> subprocess.CompletedProcess[str]: ) monkeypatch.setattr(subprocess, "run", _run) - assert cli._git_diff_changed_paths(root_path=root_path, git_diff_ref="HEAD~1") == ( + assert cli_changed_scope._git_diff_changed_paths( + root_path=root_path, + git_diff_ref="HEAD~1", + ) == ( "pkg/a.py", "pkg/b.py", ) @@ -514,14 +537,17 @@ def _run(*args: object, **kwargs: object) -> subprocess.CompletedProcess[str]: monkeypatch.setattr(subprocess, "run", _run) with pytest.raises(SystemExit) as exc: - cli._git_diff_changed_paths(root_path=tmp_path.resolve(), git_diff_ref="HEAD~1") + cli_changed_scope._git_diff_changed_paths( + root_path=tmp_path.resolve(), + git_diff_ref="HEAD~1", + ) assert exc.value.code == 2 def test_git_diff_changed_paths_rejects_option_like_ref(tmp_path: Path) -> None: cli.console = cli._make_console(no_color=True) with pytest.raises(SystemExit) as exc: - cli._git_diff_changed_paths( + cli_changed_scope._git_diff_changed_paths( root_path=tmp_path.resolve(), git_diff_ref="--cached" ) assert exc.value.code == 2 @@ -543,7 +569,7 @@ def test_git_diff_changed_paths_rejects_unsafe_ref_syntax( ) -> None: cli.console = cli._make_console(no_color=True) with pytest.raises(SystemExit) as exc: - cli._git_diff_changed_paths( + cli_changed_scope._git_diff_changed_paths( root_path=tmp_path.resolve(), git_diff_ref=git_diff_ref, ) @@ -551,7 +577,7 @@ def test_git_diff_changed_paths_rejects_unsafe_ref_syntax( def test_report_path_origins_ignores_unrelated_equals_tokens() -> None: - assert cli._report_path_origins(("--unknown=value", "--json=out.json")) == { + assert cli_reports._report_path_origins(("--unknown=value", "--json=out.json")) == { "html": None, "json": "explicit", "md": None, @@ -561,7 +587,7 @@ def test_report_path_origins_ignores_unrelated_equals_tokens() -> None: def test_changed_clone_gate_from_report_filters_changed_scope() -> None: - gate = cli._changed_clone_gate_from_report( + gate = cli_changed_scope._changed_clone_gate_from_report( { "findings": { "groups": { @@ -686,7 +712,7 @@ def test_enforce_gating_rewrites_clone_threshold_for_changed_scope( ) -> None: cli.console = cli._make_console(no_color=True) observed: dict[str, object] = {} - analysis = pipeline.AnalysisResult( + analysis = AnalysisResult( func_groups={}, block_groups={}, block_groups_report={}, @@ -703,10 +729,10 @@ def test_enforce_gating_rewrites_clone_threshold_for_changed_scope( segment_groups_raw_digest="", ) - def _fake_gate(**kwargs: object) -> pipeline.GatingResult: - gate_analysis = cast("pipeline.AnalysisResult", kwargs["analysis"]) + def _fake_gate(**kwargs: object) -> GatingResult: + gate_analysis = cast("AnalysisResult", kwargs["analysis"]) observed["clone_threshold_total"] = gate_analysis.func_clones_count - return pipeline.GatingResult( + return GatingResult( exit_code=3, reasons=("clone:threshold:2:1",), ) @@ -723,7 +749,7 @@ def _fake_gate(**kwargs: object) -> pipeline.GatingResult: with pytest.raises(SystemExit) as exc: cli._enforce_gating( args=Namespace(fail_threshold=1, verbose=False), - boot=cast("pipeline.BootstrapResult", object()), + boot=cast("BootstrapResult", object()), analysis=analysis, processing=cast(Any, Namespace(source_read_failures=[])), source_read_contract_failure=False, @@ -750,7 +776,7 @@ def test_enforce_gating_drops_rewritten_threshold_when_changed_scope_is_within_l ) -> None: cli.console = cli._make_console(no_color=True) observed: dict[str, object] = {} - analysis = pipeline.AnalysisResult( + analysis = AnalysisResult( func_groups={}, block_groups={}, block_groups_report={}, @@ -767,10 +793,10 @@ def test_enforce_gating_drops_rewritten_threshold_when_changed_scope_is_within_l segment_groups_raw_digest="", ) - def _fake_gate(**kwargs: object) -> pipeline.GatingResult: - gate_analysis = cast("pipeline.AnalysisResult", kwargs["analysis"]) + def _fake_gate(**kwargs: object) -> GatingResult: + gate_analysis = cast("AnalysisResult", kwargs["analysis"]) observed["clone_threshold_total"] = gate_analysis.func_clones_count - return pipeline.GatingResult(exit_code=0, reasons=()) + return GatingResult(exit_code=0, reasons=()) monkeypatch.setattr(cli, "gate", _fake_gate) monkeypatch.setattr( @@ -781,7 +807,7 @@ def _fake_gate(**kwargs: object) -> pipeline.GatingResult: cli._enforce_gating( args=Namespace(fail_threshold=5, verbose=False), - boot=cast("pipeline.BootstrapResult", object()), + boot=cast("BootstrapResult", object()), analysis=analysis, processing=cast(Any, Namespace(source_read_failures=[])), source_read_contract_failure=False, @@ -893,7 +919,7 @@ def test_main_impl_prints_changed_scope_when_changed_projection_is_available( monkeypatch.setattr( cli, "_changed_clone_gate_from_report", - lambda _report, changed_paths: cli.ChangedCloneGate( + lambda _report, changed_paths: cli_changed_scope.ChangedCloneGate( changed_paths=tuple(changed_paths), new_func=frozenset(), new_block=frozenset(), @@ -1358,7 +1384,7 @@ def test_configure_metrics_mode_rejects_skip_metrics_with_metrics_flags( skip_dependencies=False, ) with pytest.raises(SystemExit) as exc: - cli._configure_metrics_mode(args=args, metrics_baseline_exists=False) + cli_runtime._configure_metrics_mode(args=args, metrics_baseline_exists=False) assert exc.value.code == 2 @@ -1376,7 +1402,7 @@ def test_configure_metrics_mode_forces_dependency_and_dead_code_when_gated() -> skip_dead_code=True, skip_dependencies=True, ) - cli._configure_metrics_mode(args=args, metrics_baseline_exists=True) + cli_runtime._configure_metrics_mode(args=args, metrics_baseline_exists=True) assert args.skip_dead_code is False assert args.skip_dependencies is False @@ -1406,7 +1432,7 @@ def test_configure_metrics_mode_does_not_force_api_surface_for_baseline_update() coverage_xml=None, ) - cli._configure_metrics_mode(args=args, metrics_baseline_exists=True) + cli_runtime._configure_metrics_mode(args=args, metrics_baseline_exists=True) assert args.api_surface is False @@ -1434,7 +1460,7 @@ def test_configure_metrics_mode_forces_api_surface_for_api_break_gate() -> None: coverage_xml=None, ) - cli._configure_metrics_mode(args=args, metrics_baseline_exists=True) + cli_runtime._configure_metrics_mode(args=args, metrics_baseline_exists=True) assert args.api_surface is True @@ -1442,20 +1468,20 @@ def test_configure_metrics_mode_forces_api_surface_for_api_break_gate() -> None: def test_probe_metrics_baseline_section_for_non_object_payload(tmp_path: Path) -> None: path = tmp_path / "baseline.json" path.write_text("[]", "utf-8") - probe = cli._probe_metrics_baseline_section(path) + probe = cli_baselines_mod._probe_metrics_baseline_section(path) assert probe.has_metrics_section is True assert probe.payload is None def test_metrics_computed_respects_skip_switches() -> None: - assert cli._metrics_computed( + assert cli_runtime._metrics_computed( Namespace( skip_metrics=False, skip_dependencies=True, skip_dead_code=True, ) ) == ("complexity", "coupling", "cohesion", "health", "coverage_adoption") - assert cli._metrics_computed( + assert cli_runtime._metrics_computed( Namespace( skip_metrics=False, skip_dependencies=False, @@ -1473,7 +1499,7 @@ def test_metrics_computed_respects_skip_switches() -> None: def test_metrics_computed_includes_api_surface_only_when_enabled() -> None: - assert cli._metrics_computed( + assert cli_runtime._metrics_computed( Namespace( skip_metrics=False, skip_dependencies=True, @@ -1481,7 +1507,7 @@ def test_metrics_computed_includes_api_surface_only_when_enabled() -> None: api_surface=False, ) ) == ("complexity", "coupling", "cohesion", "health", "coverage_adoption") - assert cli._metrics_computed( + assert cli_runtime._metrics_computed( Namespace( skip_metrics=False, skip_dependencies=True, @@ -1499,7 +1525,7 @@ def test_metrics_computed_includes_api_surface_only_when_enabled() -> None: def test_metrics_computed_includes_coverage_join_only_with_xml() -> None: - assert cli._metrics_computed( + assert cli_runtime._metrics_computed( Namespace( skip_metrics=False, skip_dependencies=True, @@ -1508,7 +1534,7 @@ def test_metrics_computed_includes_coverage_join_only_with_xml() -> None: coverage_xml=None, ) ) == ("complexity", "coupling", "cohesion", "health", "coverage_adoption") - assert cli._metrics_computed( + assert cli_runtime._metrics_computed( Namespace( skip_metrics=False, skip_dependencies=True, @@ -1530,7 +1556,7 @@ def test_enforce_gating_requires_coverage_input_for_hotspot_gate( monkeypatch: pytest.MonkeyPatch, ) -> None: cli.console = cli._make_console(no_color=True) - monkeypatch.setattr(cli, "gate", lambda **_kwargs: pipeline.GatingResult(0, ())) + monkeypatch.setattr(cli, "gate", lambda **_kwargs: GatingResult(0, ())) with pytest.raises(SystemExit) as exc: cli._enforce_gating( args=Namespace( @@ -1538,7 +1564,7 @@ def test_enforce_gating_requires_coverage_input_for_hotspot_gate( fail_threshold=-1, verbose=False, ), - boot=cast("pipeline.BootstrapResult", object()), + boot=cast("BootstrapResult", object()), analysis=cast(Any, SimpleNamespace(coverage_join=None)), processing=cast(Any, Namespace(source_read_failures=[])), source_read_contract_failure=False, @@ -1556,7 +1582,7 @@ def test_enforce_gating_requires_valid_coverage_input_for_hotspot_gate( monkeypatch: pytest.MonkeyPatch, ) -> None: cli.console = cli._make_console(no_color=True) - monkeypatch.setattr(cli, "gate", lambda **_kwargs: pipeline.GatingResult(0, ())) + monkeypatch.setattr(cli, "gate", lambda **_kwargs: GatingResult(0, ())) with pytest.raises(SystemExit) as exc: cli._enforce_gating( args=Namespace( @@ -1564,7 +1590,7 @@ def test_enforce_gating_requires_valid_coverage_input_for_hotspot_gate( fail_threshold=-1, verbose=False, ), - boot=cast("pipeline.BootstrapResult", object()), + boot=cast("BootstrapResult", object()), analysis=cast( Any, SimpleNamespace( @@ -1633,8 +1659,8 @@ def _resolve(self: Path, *, strict: bool = False) -> Path: assert os.environ.get("CODECLONE_DEBUG") == "1" -def _stub_discovery_result() -> pipeline.DiscoveryResult: - return pipeline.DiscoveryResult( +def _stub_discovery_result() -> DiscoveryResult: + return DiscoveryResult( files_found=0, cache_hits=0, files_skipped=0, @@ -1651,8 +1677,8 @@ def _stub_discovery_result() -> pipeline.DiscoveryResult: ) -def _stub_processing_result() -> pipeline.ProcessingResult: - return pipeline.ProcessingResult( +def _stub_processing_result() -> ProcessingResult: + return ProcessingResult( units=(), blocks=(), segments=(), @@ -1674,8 +1700,8 @@ def _stub_processing_result() -> pipeline.ProcessingResult: def _stub_analysis_result( *, project_metrics: ProjectMetrics | None = None, -) -> pipeline.AnalysisResult: - return pipeline.AnalysisResult( +) -> AnalysisResult: + return AnalysisResult( func_groups={}, block_groups={}, block_groups_report={}, @@ -1840,7 +1866,7 @@ def test_main_impl_prints_metric_gate_reasons_and_exits_gating_failure( monkeypatch.setattr( cli, "gate", - lambda **_kwargs: pipeline.GatingResult( + lambda **_kwargs: GatingResult( exit_code=3, reasons=( "metric:Health score regressed vs metrics baseline: delta=-1.", @@ -2123,11 +2149,11 @@ def test_main_impl_ci_enables_fail_on_new_metrics_when_metrics_baseline_loaded( observed: dict[str, bool] = {} - def _capture_gate(**kwargs: object) -> pipeline.GatingResult: + def _capture_gate(**kwargs: object) -> GatingResult: boot = kwargs["boot"] - assert isinstance(boot, pipeline.BootstrapResult) + assert isinstance(boot, BootstrapResult) observed["fail_on_new_metrics"] = bool(boot.args.fail_on_new_metrics) - return pipeline.GatingResult(exit_code=0, reasons=()) + return GatingResult(exit_code=0, reasons=()) monkeypatch.setattr(cli, "gate", _capture_gate) monkeypatch.setattr( @@ -2151,7 +2177,7 @@ def _capture_gate(**kwargs: object) -> pipeline.GatingResult: def test_print_verbose_clone_hashes_noop_on_empty() -> None: printer = _RecordingPrinter() - cli._print_verbose_clone_hashes( + cli_console._print_verbose_clone_hashes( printer, label="Function clone hashes", clone_hashes=set(), @@ -2161,7 +2187,7 @@ def test_print_verbose_clone_hashes_noop_on_empty() -> None: def test_print_verbose_clone_hashes_prints_sorted_values() -> None: printer = _RecordingPrinter() - cli._print_verbose_clone_hashes( + cli_console._print_verbose_clone_hashes( printer, label="Block clone hashes", clone_hashes={"b-hash", "a-hash"}, diff --git a/tests/test_core_branch_coverage.py b/tests/test_core_branch_coverage.py index 0a92b25..1a058bf 100644 --- a/tests/test_core_branch_coverage.py +++ b/tests/test_core_branch_coverage.py @@ -14,38 +14,48 @@ import orjson import pytest -import codeclone.core as pipeline import codeclone.core.discovery as core_discovery import codeclone.core.pipeline as core_pipeline -import codeclone.surfaces.cli.main as cli +import codeclone.surfaces.cli.console as cli_console +import codeclone.surfaces.cli.workflow as cli from codeclone.analysis.normalizer import NormalizationConfig -from codeclone.cache import ( - Cache, - CacheEntry, - SegmentReportProjection, +from codeclone.cache._canonicalize import ( _as_file_stat_dict, - _as_risk_literal, + _has_cache_entry_container_shape, +) +from codeclone.cache._validators import _is_dead_candidate_dict +from codeclone.cache._wire_decode import ( _decode_wire_file_entry, _decode_wire_structural_findings_optional, _decode_wire_structural_group, _decode_wire_structural_occurrence, _decode_wire_structural_signature, _decode_wire_unit, - _encode_wire_file_entry, - _has_cache_entry_container_shape, - _is_dead_candidate_dict, +) +from codeclone.cache._wire_encode import _encode_wire_file_entry +from codeclone.cache.entries import CacheEntry, _as_risk_literal +from codeclone.cache.projection import ( + SegmentReportProjection, build_segment_report_projection, + decode_segment_report_projection, ) -from codeclone.cache.projection import decode_segment_report_projection +from codeclone.cache.store import Cache from codeclone.contracts.errors import CacheError from codeclone.core._types import ( + AnalysisResult, + BootstrapResult, + DiscoveryResult, + OutputPaths, + ProcessingResult, _coerce_segment_report_projection, _segment_groups_digest, ) +from codeclone.core.discovery import discover from codeclone.core.discovery_cache import ( _cache_entry_source_stats, decode_cached_structural_finding_group, ) +from codeclone.core.pipeline import analyze from codeclone.findings.clones.grouping import build_segment_groups from codeclone.models import ( BlockUnit, @@ -534,7 +544,7 @@ def _must_not_run( monkeypatch.setattr(core_pipeline, "prepare_segment_report_groups", _must_not_run) - boot = pipeline.BootstrapResult( + boot = BootstrapResult( root=Path("."), config=NormalizationConfig(), args=Namespace( @@ -545,10 +555,10 @@ def _must_not_run( min_stmt=1, processes=1, ), - output_paths=pipeline.OutputPaths(), + output_paths=OutputPaths(), cache_path=Path("cache.json"), ) - discovery = pipeline.DiscoveryResult( + discovery = DiscoveryResult( files_found=0, cache_hits=0, files_skipped=0, @@ -567,7 +577,7 @@ def _must_not_run( cached_projection, ), ) - processing = pipeline.ProcessingResult( + processing = ProcessingResult( units=(), blocks=(), segments=(seg_item_a, seg_item_b), @@ -585,7 +595,7 @@ def _must_not_run( source_read_failures=(), ) - result = pipeline.analyze(boot=boot, discovery=discovery, processing=processing) + result = analyze(boot=boot, discovery=discovery, processing=processing) assert result.suppressed_segment_groups == 7 assert result.segment_groups == cached_projection["groups"] assert result.segment_groups_raw_digest == digest @@ -606,7 +616,7 @@ def test_pipeline_coerce_segment_projection_invalid_shapes() -> None: def test_pipeline_analyze_tracks_suppressed_dead_code_candidates() -> None: - boot = pipeline.BootstrapResult( + boot = BootstrapResult( root=Path("."), config=NormalizationConfig(), args=Namespace( @@ -617,10 +627,10 @@ def test_pipeline_analyze_tracks_suppressed_dead_code_candidates() -> None: min_stmt=1, processes=1, ), - output_paths=pipeline.OutputPaths(), + output_paths=OutputPaths(), cache_path=Path("cache.json"), ) - discovery = pipeline.DiscoveryResult( + discovery = DiscoveryResult( files_found=1, cache_hits=0, files_skipped=0, @@ -635,7 +645,7 @@ def test_pipeline_analyze_tracks_suppressed_dead_code_candidates() -> None: files_to_process=(), skipped_warnings=(), ) - processing = pipeline.ProcessingResult( + processing = ProcessingResult( units=(), blocks=(), segments=(), @@ -663,7 +673,7 @@ def test_pipeline_analyze_tracks_suppressed_dead_code_candidates() -> None: source_read_failures=(), ) - result = pipeline.analyze(boot=boot, discovery=discovery, processing=processing) + result = analyze(boot=boot, discovery=discovery, processing=processing) assert result.project_metrics is not None assert result.project_metrics.dead_code == () assert result.suppressed_dead_code_items == 1 @@ -698,7 +708,7 @@ def _discover_with_single_cached_entry( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, cached_entry: dict[str, object], -) -> pipeline.DiscoveryResult: +) -> DiscoveryResult: source = tmp_path / "a.py" source.write_text("def f():\n return 1\n", "utf-8") filepath = str(source) @@ -709,16 +719,16 @@ class _FakeCache: def get_file_entry(self, _path: str) -> dict[str, object]: return cache_entry - boot = pipeline.BootstrapResult( + boot = BootstrapResult( root=tmp_path, config=NormalizationConfig(), args=Namespace(skip_metrics=False, min_loc=1, min_stmt=1, processes=1), - output_paths=pipeline.OutputPaths(), + output_paths=OutputPaths(), cache_path=tmp_path / "cache.json", ) monkeypatch.setattr(core_discovery, "iter_py_files", lambda _root: [filepath]) monkeypatch.setattr(core_discovery, "file_stat_signature", lambda _path: stat) - return pipeline.discover(boot=boot, cache=cast(Cache, _FakeCache())) + return discover(boot=boot, cache=cast(Cache, _FakeCache())) @pytest.mark.parametrize( @@ -858,40 +868,40 @@ def test_pipeline_cached_source_stats_helper_invalid_shapes() -> None: def test_cli_metric_reason_parser_and_policy_context() -> None: - assert cli._parse_metric_reason_entry( + assert cli_console._parse_metric_reason_entry( "New high-risk functions vs metrics baseline: 1." ) == ("new_high_risk_functions", "1") - assert cli._parse_metric_reason_entry( + assert cli_console._parse_metric_reason_entry( "New high-coupling classes vs metrics baseline: 2." ) == ("new_high_coupling_classes", "2") - assert cli._parse_metric_reason_entry( + assert cli_console._parse_metric_reason_entry( "New dependency cycles vs metrics baseline: 3." ) == ("new_dependency_cycles", "3") - assert cli._parse_metric_reason_entry( + assert cli_console._parse_metric_reason_entry( "New dead code items vs metrics baseline: 4." ) == ("new_dead_code_items", "4") - assert cli._parse_metric_reason_entry( + assert cli_console._parse_metric_reason_entry( "Health score regressed vs metrics baseline: delta=-7." ) == ("health_delta", "-7") - assert cli._parse_metric_reason_entry( + assert cli_console._parse_metric_reason_entry( "Dependency cycles detected: 3 cycle(s)." ) == ("dependency_cycles", "3") - assert cli._parse_metric_reason_entry( + assert cli_console._parse_metric_reason_entry( "Dead code detected (high confidence): 2 item(s)." ) == ("dead_code_items", "2") - assert cli._parse_metric_reason_entry( + assert cli_console._parse_metric_reason_entry( "Complexity threshold exceeded: max=11, threshold=10." ) == ("complexity_max", "11 (threshold=10)") - assert cli._parse_metric_reason_entry( + assert cli_console._parse_metric_reason_entry( "Coupling threshold exceeded: max=12, threshold=9." ) == ("coupling_max", "12 (threshold=9)") - assert cli._parse_metric_reason_entry( + assert cli_console._parse_metric_reason_entry( "Cohesion threshold exceeded: max=13, threshold=8." ) == ("cohesion_max", "13 (threshold=8)") - assert cli._parse_metric_reason_entry( + assert cli_console._parse_metric_reason_entry( "Health score below threshold: score=70, threshold=80." ) == ("health_score", "70 (threshold=80)") - assert cli._parse_metric_reason_entry("custom reason.") == ( + assert cli_console._parse_metric_reason_entry("custom reason.") == ( "detail", "custom reason", ) @@ -932,18 +942,18 @@ def test_cli_run_analysis_stages_handles_cache_save_error( monkeypatch: pytest.MonkeyPatch, ) -> None: args = Namespace(quiet=False, no_progress=False, skip_metrics=True) - boot = pipeline.BootstrapResult( + boot = BootstrapResult( root=Path("."), config=NormalizationConfig(), args=args, - output_paths=pipeline.OutputPaths(), + output_paths=OutputPaths(), cache_path=Path("cache.json"), ) monkeypatch.setattr( cli, "discover", - lambda **_kwargs: pipeline.DiscoveryResult( + lambda **_kwargs: DiscoveryResult( files_found=0, cache_hits=0, files_skipped=0, @@ -962,7 +972,7 @@ def test_cli_run_analysis_stages_handles_cache_save_error( monkeypatch.setattr( cli, "process", - lambda **_kwargs: pipeline.ProcessingResult( + lambda **_kwargs: ProcessingResult( units=(), blocks=(), segments=(), @@ -983,7 +993,7 @@ def test_cli_run_analysis_stages_handles_cache_save_error( monkeypatch.setattr( cli, "analyze", - lambda **_kwargs: pipeline.AnalysisResult( + lambda **_kwargs: AnalysisResult( func_groups={}, block_groups={}, block_groups_report={}, diff --git a/tests/test_coverage_edges.py b/tests/test_coverage_edges.py new file mode 100644 index 0000000..84a100e --- /dev/null +++ b/tests/test_coverage_edges.py @@ -0,0 +1,244 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import argparse +import ast +import operator +from typing import Any, cast + +import pytest + +import codeclone.analysis as analysis_mod +import codeclone.analysis.units as units_mod +import codeclone.config.argparse_builder as argparse_builder_mod +import codeclone.config.spec as spec_mod +import codeclone.report.gates.evaluator as evaluator_mod +import codeclone.surfaces.cli.console as cli_console_mod +import codeclone.surfaces.cli.state as cli_state_mod +from codeclone.analysis.normalizer import NormalizationConfig +from codeclone.config.spec import OptionSpec +from codeclone.contracts.errors import ParseError +from codeclone.report.gates.evaluator import MetricGateConfig +from codeclone.utils.git_diff import validate_git_diff_ref + + +def _report_document() -> dict[str, object]: + return { + "findings": { + "groups": { + "clones": { + "functions": [{"id": "clone:function:new", "novelty": "new"}], + "blocks": [], + } + } + }, + "metrics": { + "families": { + "complexity": {"summary": {"max": 30}}, + "coupling": {"summary": {"max": 12}}, + "cohesion": {"summary": {"max": 4}}, + "dependencies": {"summary": {"cycles": 0}}, + "dead_code": {"summary": {"high_confidence": 1}}, + "health": {"summary": {"score": 90}}, + "coverage_adoption": { + "summary": { + "param_permille": 1000, + "docstring_permille": 1000, + "param_delta": 0, + "return_delta": 0, + "docstring_delta": 0, + } + }, + "api_surface": {"summary": {"breaking": 0}}, + "coverage_join": {"summary": {"status": "", "coverage_hotspots": 0}}, + } + }, + } + + +def test_analysis_module_exports_extract_units_directly() -> None: + assert ( + analysis_mod.extract_units_and_stats_from_source + is units_mod.extract_units_and_stats_from_source + ) + with pytest.raises(AttributeError, match="has no attribute 'missing'"): + operator.attrgetter("missing")(analysis_mod) + + +def test_extract_units_rejects_non_module_ast_root( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(units_mod, "_parse_with_limits", lambda *_args: ast.Pass()) + with pytest.raises(ParseError, match="expected module AST root"): + units_mod.extract_units_and_stats_from_source( + source="pass\n", + filepath="pkg/mod.py", + module_name="pkg.mod", + cfg=NormalizationConfig(), + min_loc=1, + min_stmt=1, + ) + + +def test_cli_state_initializes_console_once_and_allows_override( + monkeypatch: pytest.MonkeyPatch, +) -> None: + sentinel = object() + monkeypatch.setattr(cli_state_mod, "console", None) + monkeypatch.setattr(cli_console_mod, "make_plain_console", lambda: sentinel) + + assert cli_state_mod.get_console() is sentinel + assert cli_state_mod.get_console() is sentinel + + replacement = object() + cli_state_mod.set_console(replacement) + assert cli_state_mod.get_console() is replacement + + +def test_validate_git_diff_ref_rejects_control_whitespace_characters() -> None: + with pytest.raises(ValueError, match="whitespace and control characters"): + validate_git_diff_ref("main\tHEAD") + + +def test_add_option_rejects_unsupported_cli_kind() -> None: + parser = argparse.ArgumentParser() + group = parser.add_argument_group("Example") + option = OptionSpec( + dest="broken", + group="Example", + cli_kind=cast(Any, "broken-kind"), + flags=("--broken",), + ) + + with pytest.raises(RuntimeError, match="Unsupported CLI option kind"): + argparse_builder_mod._add_option(group, option=option, version="2.0.0") + + +def test_config_spec_option_supports_explicit_pyproject_key_and_conflict_guard( + monkeypatch: pytest.MonkeyPatch, +) -> None: + explicit = spec_mod._option( + dest="baseline_path", + group="Example", + pyproject_type=str, + pyproject_key="baseline-file", + ) + assert explicit.pyproject_key == "baseline-file" + + monkeypatch.setattr( + spec_mod, + "OPTIONS", + ( + spec_mod._option( + dest="first", + group="Example", + pyproject_type=str, + pyproject_key="shared", + ), + spec_mod._option( + dest="second", + group="Example", + pyproject_type=int, + pyproject_key="shared", + ), + ), + ) + with pytest.raises(RuntimeError, match="Conflicting pyproject spec for shared"): + spec_mod._build_pyproject_specs() + + +def test_summarize_metrics_diff_accepts_mapping_payload() -> None: + summary = evaluator_mod.summarize_metrics_diff( + { + "new_high_risk_functions": 2, + "new_high_coupling_classes": 3, + "new_cycles": 4, + "new_dead_code": 5, + "health_delta": -2, + "typing_param_permille_delta": -100, + "typing_return_permille_delta": -200, + "docstring_permille_delta": -300, + "new_api_breaking_changes": 7, + } + ) + + assert summary == { + "new_high_risk_functions": 2, + "new_high_coupling_classes": 3, + "new_cycles": 4, + "new_dead_code": 5, + "health_delta": -2, + "typing_param_permille_delta": -100, + "typing_return_permille_delta": -200, + "docstring_permille_delta": -300, + "new_api_symbols": 0, + "api_breaking_changes": 7, + } + + +def test_metric_gate_reasons_wrapper_uses_report_document_snapshot() -> None: + reasons = evaluator_mod.metric_gate_reasons( + report_document=_report_document(), + config=MetricGateConfig( + fail_complexity=20, + fail_coupling=-1, + fail_cohesion=-1, + fail_cycles=False, + fail_dead_code=True, + fail_health=-1, + fail_on_new_metrics=False, + fail_on_typing_regression=False, + fail_on_docstring_regression=False, + fail_on_api_break=False, + fail_on_untested_hotspots=False, + min_typing_coverage=-1, + min_docstring_coverage=-1, + coverage_min=50, + fail_on_new=True, + fail_threshold=0, + ), + metrics_diff={"new_dead_code": 1, "health_delta": -1}, + ) + + assert "Complexity threshold exceeded: max CC=30, threshold=20." in reasons + assert "Dead code detected (high confidence): 1 item(s)." in reasons + assert "New dead code items vs metrics baseline: 1." not in reasons + + +def test_metric_gate_reasons_for_state_skips_missing_builder( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delitem( + evaluator_mod._GATE_REASON_BUILDERS, + "complexity_threshold", + raising=False, + ) + + reasons = evaluator_mod.metric_gate_reasons_for_state( + state=evaluator_mod.GateState(complexity_max=10), + config=MetricGateConfig( + fail_complexity=5, + fail_coupling=-1, + fail_cohesion=-1, + fail_cycles=False, + fail_dead_code=False, + fail_health=-1, + fail_on_new_metrics=False, + fail_on_typing_regression=False, + fail_on_docstring_regression=False, + fail_on_api_break=False, + fail_on_untested_hotspots=False, + min_typing_coverage=-1, + min_docstring_coverage=-1, + coverage_min=50, + fail_on_new=False, + fail_threshold=-1, + ), + ) + + assert reasons == () diff --git a/tests/test_detector_golden.py b/tests/test_detector_golden.py index 9d05df2..fc1bebb 100644 --- a/tests/test_detector_golden.py +++ b/tests/test_detector_golden.py @@ -15,7 +15,7 @@ from codeclone.analysis.normalizer import NormalizationConfig from codeclone.analysis.units import extract_units_and_stats_from_source from codeclone.baseline import current_python_tag -from codeclone.report import build_block_groups, build_groups +from codeclone.findings.clones.grouping import build_block_groups, build_groups from codeclone.scanner import module_name_from_path from tests._assertions import snapshot_python_tag diff --git a/tests/test_extractor.py b/tests/test_extractor.py index b1fcdf8..82f327f 100644 --- a/tests/test_extractor.py +++ b/tests/test_extractor.py @@ -21,7 +21,7 @@ from codeclone import qualnames from codeclone.analysis.normalizer import NormalizationConfig from codeclone.contracts.errors import ParseError -from codeclone.metrics import find_unused +from codeclone.metrics.dead_code import find_unused from codeclone.models import BlockUnit, ClassMetrics, ModuleDep, SegmentUnit, Unit from codeclone.qualnames import FunctionNode, QualnameCollector diff --git a/tests/test_gating.py b/tests/test_gating.py index 04d2ada..641c24d 100644 --- a/tests/test_gating.py +++ b/tests/test_gating.py @@ -13,7 +13,7 @@ ModuleDep, ProjectMetrics, ) -from codeclone.report.gates import MetricGateConfig, evaluate_gates +from codeclone.report.gates.evaluator import MetricGateConfig, evaluate_gates from codeclone.surfaces.mcp.service import CodeCloneMCPService from codeclone.surfaces.mcp.session import ( MCPAnalysisRequest, diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 6405311..1e21f7b 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -20,13 +20,15 @@ REPOSITORY_URL, ) from codeclone.contracts.errors import FileProcessingError +from codeclone.findings.ids import clone_group_id, structural_group_id from codeclone.models import ( StructuralFindingGroup, StructuralFindingOccurrence, Suggestion, SuppressedCloneGroup, ) -from codeclone.report import build_block_group_facts +from codeclone.report.document.builder import build_report_document +from codeclone.report.explain import build_block_group_facts from codeclone.report.html import ( _FileCache, _pygments_css, @@ -37,12 +39,7 @@ build_html_report as _core_build_html_report, ) from codeclone.report.html.widgets.badges import _tab_empty_info -from codeclone.report.json_contract import ( - build_report_document, - clone_group_id, - structural_group_id, -) -from codeclone.report.serialize import render_json_report_document +from codeclone.report.renderers.json import render_json_report_document from tests._assertions import assert_contains_all from tests._report_fixtures import ( REPEATED_ASSERT_SOURCE, diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 60a7250..33ec49d 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -17,9 +17,9 @@ import pytest +import codeclone.surfaces.mcp.server as mcp_server from codeclone import __version__ as CODECLONE_VERSION from codeclone.contracts import REPORT_SCHEMA_VERSION -from codeclone.surfaces.mcp import server as mcp_server from codeclone.surfaces.mcp.server import MCPDependencyError, build_mcp_server from tests._mcp_fixtures import write_quality_fixture as _write_shared_quality_fixture diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index 96bbd8b..b0342b6 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -18,12 +18,12 @@ import pytest +import codeclone.surfaces.mcp.session as mcp_service_mod from codeclone.baseline import Baseline, current_python_tag -from codeclone.cache import Cache -from codeclone.config import ConfigValidationError +from codeclone.cache.store import Cache +from codeclone.config.pyproject_loader import ConfigValidationError from codeclone.contracts import REPORT_SCHEMA_VERSION from codeclone.models import MetricsDiff -from codeclone.surfaces.mcp import session as mcp_service_mod from codeclone.surfaces.mcp.service import CodeCloneMCPService from codeclone.surfaces.mcp.session import ( DetailLevel, @@ -1716,7 +1716,7 @@ def _fake_report(**kwargs: Any) -> object: ) monkeypatch = pytest.MonkeyPatch() - monkeypatch.setattr("codeclone.surfaces.mcp.session.report", _fake_report) + monkeypatch.setattr(mcp_service_mod, "report", _fake_report) try: with pytest.raises(MCPServiceError): service.analyze_repository( diff --git a/tests/test_metrics_modules.py b/tests/test_metrics_modules.py index bc22d1d..ddc8608 100644 --- a/tests/test_metrics_modules.py +++ b/tests/test_metrics_modules.py @@ -11,27 +11,25 @@ import pytest from codeclone.analysis.cfg_model import CFG -from codeclone.metrics import ( - HealthInputs, +from codeclone.metrics import complexity as complexity_mod +from codeclone.metrics import coupling as coupling_mod +from codeclone.metrics import health as health_mod +from codeclone.metrics.cohesion import cohesion_risk, compute_lcom4 +from codeclone.metrics.complexity import ( + cyclomatic_complexity, + nesting_depth, + risk_level, +) +from codeclone.metrics.coupling import compute_cbo, coupling_risk +from codeclone.metrics.dead_code import find_suppressed_unused, find_unused +from codeclone.metrics.dependencies import ( build_dep_graph, build_import_graph, - cohesion_risk, - compute_cbo, - compute_health, - compute_lcom4, - coupling_risk, - cyclomatic_complexity, find_cycles, - find_suppressed_unused, - find_unused, longest_chains, max_depth, - nesting_depth, - risk_level, ) -from codeclone.metrics import complexity as complexity_mod -from codeclone.metrics import coupling as coupling_mod -from codeclone.metrics import health as health_mod +from codeclone.metrics.health import HealthInputs, compute_health from codeclone.models import DeadCandidate, DeadItem, ModuleDep from codeclone.paths import is_test_filepath @@ -626,3 +624,30 @@ def test_health_helpers_and_compute_health_boundaries() -> None: "dependencies", "coverage", } + + +def test_health_dependency_depth_safe_zone_matches_html_threshold() -> None: + def _health_inputs(*, dependency_max_depth: int) -> HealthInputs: + return HealthInputs( + files_found=10, + files_analyzed_or_cached=10, + function_clone_groups=0, + block_clone_groups=0, + complexity_avg=0.0, + complexity_max=0, + high_risk_functions=0, + coupling_avg=0.0, + coupling_max=0, + high_risk_classes=0, + cohesion_avg=1.0, + low_cohesion_classes=0, + dependency_cycles=0, + dependency_max_depth=dependency_max_depth, + dead_code_items=0, + ) + + safe = compute_health(_health_inputs(dependency_max_depth=8)) + warn = compute_health(_health_inputs(dependency_max_depth=9)) + + assert safe.dimensions["dependencies"] == 100 + assert warn.dimensions["dependencies"] == 96 diff --git a/tests/test_metrics_registry.py b/tests/test_metrics_registry.py index 1b54841..00d737d 100644 --- a/tests/test_metrics_registry.py +++ b/tests/test_metrics_registry.py @@ -6,7 +6,8 @@ from __future__ import annotations -from codeclone.metrics import METRIC_FAMILIES, MetricFamily +from codeclone.metrics._base import MetricFamily +from codeclone.metrics.registry import METRIC_FAMILIES def test_registered_metric_families_define_contract_metadata() -> None: diff --git a/tests/test_options_spec_coverage.py b/tests/test_options_spec_coverage.py index ac138ad..f6ae2a0 100644 --- a/tests/test_options_spec_coverage.py +++ b/tests/test_options_spec_coverage.py @@ -4,12 +4,9 @@ import pytest -from codeclone.config import ( - build_parser, - collect_explicit_cli_dests, - load_pyproject_config, - resolve_config, -) +from codeclone.config.argparse_builder import build_parser +from codeclone.config.pyproject_loader import load_pyproject_config +from codeclone.config.resolver import collect_explicit_cli_dests, resolve_config from codeclone.config.spec import PYPROJECT_OPTIONS, TESTABLE_CLI_OPTIONS, OptionSpec diff --git a/tests/test_pipeline_metrics.py b/tests/test_pipeline_metrics.py index 54b5412..ca1ab38 100644 --- a/tests/test_pipeline_metrics.py +++ b/tests/test_pipeline_metrics.py @@ -12,7 +12,7 @@ import pytest -from codeclone.cache import ( +from codeclone.cache.entries import ( ApiParamSpecDict, CacheEntry, ModuleApiSurfaceDict, @@ -44,7 +44,7 @@ ) from codeclone.core.parallelism import _should_use_parallel from codeclone.core.pipeline import compute_project_metrics -from codeclone.metrics import build_overloaded_modules_payload +from codeclone.metrics.overloaded_modules import build_overloaded_modules_payload from codeclone.models import ( ApiBreakingChange, ApiParamSpec, @@ -63,7 +63,7 @@ PublicSymbol, UnitCoverageFact, ) -from codeclone.report.gates import ( +from codeclone.report.gates.evaluator import ( MetricGateConfig, gate_state_from_project_metrics, metric_gate_reasons_for_state, diff --git a/tests/test_pipeline_process.py b/tests/test_pipeline_process.py index d5a9c99..45b4588 100644 --- a/tests/test_pipeline_process.py +++ b/tests/test_pipeline_process.py @@ -14,13 +14,29 @@ import pytest -import codeclone.core as pipeline import codeclone.core.parallelism as core_parallelism import codeclone.core.pipeline as core_pipeline import codeclone.core.worker as core_worker from codeclone.analysis.normalizer import NormalizationConfig -from codeclone.cache import Cache, CacheEntry, SourceStatsDict, file_stat_signature +from codeclone.cache.entries import CacheEntry, SourceStatsDict +from codeclone.cache.store import Cache, file_stat_signature +from codeclone.core._types import ( + DEFAULT_RUNTIME_PROCESSES, + AnalysisResult, + BootstrapResult, + DiscoveryResult, + FileProcessResult, + OutputPaths, + ProcessingResult, +) from codeclone.core.discovery_cache import usable_cached_source_stats +from codeclone.core.parallelism import ( + _parallel_min_files, + _resolve_process_count, + process, +) +from codeclone.core.pipeline import analyze +from codeclone.core.reporting import report from codeclone.models import HealthScore, ProjectMetrics @@ -45,8 +61,8 @@ def __init__(self, *args: object, **kwargs: object) -> None: raise AssertionError("ProcessPoolExecutor should not be used for small batches") -def _build_boot(tmp_path: Path, *, processes: int) -> pipeline.BootstrapResult: - return pipeline.BootstrapResult( +def _build_boot(tmp_path: Path, *, processes: int) -> BootstrapResult: + return BootstrapResult( root=tmp_path, config=NormalizationConfig(), args=Namespace( @@ -59,19 +75,19 @@ def _build_boot(tmp_path: Path, *, processes: int) -> pipeline.BootstrapResult: segment_min_stmt=10, skip_metrics=True, ), - output_paths=pipeline.OutputPaths(html=None, json=None, text=None), + output_paths=OutputPaths(html=None, json=None, text=None), cache_path=tmp_path / "cache.json", ) def test_resolve_process_count_defaults_in_runtime() -> None: - assert pipeline._resolve_process_count(None) == pipeline.DEFAULT_RUNTIME_PROCESSES - assert pipeline._resolve_process_count(0) == 1 - assert pipeline._resolve_process_count(3) == 3 + assert _resolve_process_count(None) == DEFAULT_RUNTIME_PROCESSES + assert _resolve_process_count(0) == 1 + assert _resolve_process_count(3) == 3 -def _build_discovery(filepaths: tuple[str, ...]) -> pipeline.DiscoveryResult: - return pipeline.DiscoveryResult( +def _build_discovery(filepaths: tuple[str, ...]) -> DiscoveryResult: + return DiscoveryResult( files_found=len(filepaths), cache_hits=0, files_skipped=0, @@ -88,8 +104,8 @@ def _build_discovery(filepaths: tuple[str, ...]) -> pipeline.DiscoveryResult: ) -def _ok_result(filepath: str) -> pipeline.FileProcessResult: - return pipeline.FileProcessResult( +def _ok_result(filepath: str) -> FileProcessResult: + return FileProcessResult( filepath=filepath, success=True, units=[], @@ -119,7 +135,7 @@ def _process_file( block_min_stmt: int = 8, segment_min_loc: int = 20, segment_min_stmt: int = 10, - ) -> pipeline.FileProcessResult: + ) -> FileProcessResult: if expected_root is not None: assert root == expected_root if expected_filepath is not None: @@ -134,9 +150,9 @@ def _process_file( def _build_large_batch_case( tmp_path: Path, -) -> tuple[pipeline.BootstrapResult, pipeline.DiscoveryResult, Cache, list[str]]: +) -> tuple[BootstrapResult, DiscoveryResult, Cache, list[str]]: filepaths: list[str] = [] - for idx in range(pipeline._parallel_min_files(2) + 1): + for idx in range(_parallel_min_files(2) + 1): src = tmp_path / f"a{idx}.py" src.write_text("def f():\n return 1\n", "utf-8") filepaths.append(str(src)) @@ -149,7 +165,7 @@ def _build_large_batch_case( def _build_single_file_process_case( tmp_path: Path, -) -> tuple[str, pipeline.BootstrapResult, pipeline.DiscoveryResult]: +) -> tuple[str, BootstrapResult, DiscoveryResult]: src = tmp_path / "a.py" src.write_text("def f():\n return 1\n", "utf-8") filepath = str(src) @@ -163,16 +179,16 @@ def _build_report_case( md_out: bool = False, sarif_out: bool = False, ) -> tuple[ - pipeline.BootstrapResult, - pipeline.DiscoveryResult, - pipeline.ProcessingResult, - pipeline.AnalysisResult, + BootstrapResult, + DiscoveryResult, + ProcessingResult, + AnalysisResult, ]: - boot = pipeline.BootstrapResult( + boot = BootstrapResult( root=tmp_path, config=NormalizationConfig(), args=Namespace(), - output_paths=pipeline.OutputPaths( + output_paths=OutputPaths( json=tmp_path / "report.json" if json_out else None, md=tmp_path / "report.md" if md_out else None, sarif=tmp_path / "report.sarif" if sarif_out else None, @@ -180,7 +196,7 @@ def _build_report_case( cache_path=tmp_path / "cache.json", ) discovery = _build_discovery(()) - processing = pipeline.ProcessingResult( + processing = ProcessingResult( units=(), blocks=(), segments=(), @@ -197,7 +213,7 @@ def _build_report_case( failed_files=(), source_read_failures=(), ) - analysis = pipeline.AnalysisResult( + analysis = AnalysisResult( func_groups={}, block_groups={}, block_groups_report={}, @@ -230,7 +246,7 @@ def test_process_parallel_fallback_without_callback_uses_sequential( ), ) - result = pipeline.process( + result = process( boot=boot, discovery=discovery, cache=cache, @@ -259,7 +275,7 @@ def test_process_small_batch_skips_parallel_executor( "process_file", _stub_process_file(expected_root=str(tmp_path)), ) - result = pipeline.process( + result = process( boot=boot, discovery=discovery, cache=cache, @@ -283,7 +299,7 @@ def test_process_parallel_failure_large_batch_invokes_fallback_callback( "process_file", _stub_process_file(expected_root=str(tmp_path)), ) - result = pipeline.process( + result = process( boot=boot, discovery=discovery, cache=cache, @@ -298,7 +314,7 @@ def test_process_parallel_failure_large_batch_invokes_fallback_callback( def test_process_parallel_executor_analyzes_real_files(tmp_path: Path) -> None: boot, discovery, cache, filepaths = _build_large_batch_case(tmp_path) - result = pipeline.process( + result = process( boot=boot, discovery=discovery, cache=cache, @@ -337,7 +353,7 @@ def save(self) -> None: cache = _LegacyCache() monkeypatch.setattr( - pipeline, + core_worker, "process_file", _stub_process_file( expected_root=str(tmp_path), @@ -345,7 +361,7 @@ def save(self) -> None: ), ) - result = pipeline.process( + result = process( boot=boot, discovery=discovery, cache=cache, # type: ignore[arg-type] @@ -377,7 +393,7 @@ def put_file_entry( raise TypeError("broken cache write") monkeypatch.setattr( - pipeline, + core_worker, "process_file", _stub_process_file( expected_root=str(tmp_path), @@ -386,7 +402,7 @@ def put_file_entry( ) with pytest.raises(TypeError, match="broken cache write"): - pipeline.process( + process( boot=boot, discovery=discovery, cache=_BrokenCache(), # type: ignore[arg-type] @@ -465,13 +481,16 @@ def _guard_import( fromlist: tuple[str, ...] = (), level: int = 0, ) -> object: - if name in {"codeclone.report.markdown", "codeclone.report.sarif"}: + if name in { + "codeclone.report.renderers.markdown", + "codeclone.report.renderers.sarif", + }: raise AssertionError(f"unexpected import: {name}") return original_import(name, globals, locals, fromlist, level) monkeypatch.setattr(builtins, "__import__", _guard_import) - artifacts = pipeline.report( + artifacts = report( boot=boot, discovery=discovery, processing=processing, @@ -492,7 +511,7 @@ def test_analyze_skips_suppressed_dead_code_scan_when_dead_code_is_disabled( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - boot = pipeline.BootstrapResult( + boot = BootstrapResult( root=tmp_path, config=NormalizationConfig(), args=Namespace( @@ -501,11 +520,11 @@ def test_analyze_skips_suppressed_dead_code_scan_when_dead_code_is_disabled( skip_dead_code=True, skip_dependencies=True, ), - output_paths=pipeline.OutputPaths(), + output_paths=OutputPaths(), cache_path=tmp_path / "cache.json", ) discovery = _build_discovery(()) - processing = pipeline.ProcessingResult( + processing = ProcessingResult( units=(), blocks=(), segments=(), @@ -563,6 +582,6 @@ def test_analyze_skips_suppressed_dead_code_scan_when_dead_code_is_disabled( lambda **kwargs: {"health": {"score": 100, "grade": "A", "dimensions": {}}}, ) - analysis = pipeline.analyze(boot=boot, discovery=discovery, processing=processing) + analysis = analyze(boot=boot, discovery=discovery, processing=processing) assert analysis.project_metrics == project_metrics assert analysis.suppressed_dead_code_items == 0 diff --git a/tests/test_report.py b/tests/test_report.py index a960a49..54024e0 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -15,32 +15,32 @@ import codeclone.report.merge as merge_mod import codeclone.report.overview as overview_mod -import codeclone.report.serialize as serialize_mod +import codeclone.report.renderers.text as text_renderer_mod from codeclone.contracts import CACHE_VERSION, REPORT_SCHEMA_VERSION +from codeclone.findings.clones.grouping import ( + build_block_groups, + build_groups, + build_segment_groups, +) from codeclone.models import ( StructuralFindingGroup, StructuralFindingOccurrence, Suggestion, SuppressedCloneGroup, ) -from codeclone.report import ( - GroupMap, - build_block_group_facts, - build_block_groups, - build_groups, - build_segment_groups, - prepare_block_report_groups, - prepare_segment_report_groups, - to_markdown_report, - to_sarif_report, -) +from codeclone.report.blocks import prepare_block_report_groups +from codeclone.report.document.builder import build_report_document +from codeclone.report.explain import build_block_group_facts from codeclone.report.html.sections._structural import ( _finding_why_template_html, build_structural_findings_html_panel, ) from codeclone.report.html.widgets.snippets import _FileCache -from codeclone.report.json_contract import build_report_document from codeclone.report.overview import materialize_report_overview +from codeclone.report.renderers.json import render_json_report_document +from codeclone.report.renderers.markdown import to_markdown_report +from codeclone.report.renderers.sarif import to_sarif_report +from codeclone.report.renderers.text import render_text_report_document from codeclone.report.segments import ( analyze_segment_statements as _analyze_segment_statements, ) @@ -51,13 +51,11 @@ collect_file_functions as _collect_file_functions, ) from codeclone.report.segments import merge_segment_items as _merge_segment_items +from codeclone.report.segments import prepare_segment_report_groups from codeclone.report.segments import ( segment_statements as _segment_statements, ) -from codeclone.report.serialize import ( - render_json_report_document, - render_text_report_document, -) +from codeclone.report.types import GroupMap from tests._assertions import assert_contains_all, assert_mapping_entries from tests._report_access import ( report_clone_groups as _clone_groups, @@ -2620,10 +2618,10 @@ def test_collect_file_functions_class_and_async(tmp_path: Path) -> None: def test_report_serialize_helpers_and_text_metrics_section() -> None: assert merge_mod.coerce_positive_int(True) == 1 - assert serialize_mod._as_int(True) == 1 - assert serialize_mod._as_int("42") == 42 - assert serialize_mod._as_int("bad") == 0 - assert serialize_mod._as_int(1.2) == 0 + assert text_renderer_mod._as_int(True) == 1 + assert text_renderer_mod._as_int("42") == 42 + assert text_renderer_mod._as_int("bad") == 0 + assert text_renderer_mod._as_int(1.2) == 0 text_report = to_text_report( meta={}, diff --git a/tests/test_report_branch_invariants.py b/tests/test_report_branch_invariants.py index 13c6009..07dd194 100644 --- a/tests/test_report_branch_invariants.py +++ b/tests/test_report_branch_invariants.py @@ -21,16 +21,16 @@ _occurrences_table_html, ) from codeclone.report.html.widgets.snippets import _FileCache -from codeclone.report.markdown import ( +from codeclone.report.overview import _health_snapshot +from codeclone.report.renderers.markdown import ( _append_findings_section, _append_metric_items, _location_text, ) -from codeclone.report.markdown import ( +from codeclone.report.renderers.markdown import ( _as_float as _markdown_as_float, ) -from codeclone.report.overview import _health_snapshot -from codeclone.report.sarif import _result_properties +from codeclone.report.renderers.sarif import _result_properties from codeclone.report.suggestions import ( _clone_steps, _clone_summary, diff --git a/tests/test_report_contract_coverage.py b/tests/test_report_contract_coverage.py index fe86b4f..fc536b1 100644 --- a/tests/test_report_contract_coverage.py +++ b/tests/test_report_contract_coverage.py @@ -13,7 +13,7 @@ import pytest -import codeclone.report.json_contract as json_contract_mod +import codeclone.report.document._common as document_common_mod from codeclone.contracts import REPORT_SCHEMA_VERSION from codeclone.models import ( ReportLocation, @@ -23,77 +23,81 @@ ) from codeclone.report import derived as derived_mod from codeclone.report import overview as overview_mod -from codeclone.report.json_contract import ( - _build_design_groups, - _clone_group_assessment, +from codeclone.report.document._common import ( _collect_paths_from_metrics, _collect_report_file_list, - _combined_impact_scope, _contract_path, _count_file_lines, _count_file_lines_for_path, - _csv_values, - _derive_inventory_code_counts, - _findings_summary, _is_absolute_path, _normalize_block_machine_facts, _normalize_nested_string_rows, _parse_ratio_percent, _source_scope_from_filepaths, _source_scope_from_locations, +) +from codeclone.report.document._design_groups import _build_design_groups +from codeclone.report.document._findings_groups import ( + _clone_group_assessment, + _csv_values, _structural_group_assessment, +) +from codeclone.report.document.builder import build_report_document +from codeclone.report.document.derived import ( + _combined_impact_scope, _suggestion_finding_id, - build_report_document, ) -from codeclone.report.markdown import ( +from codeclone.report.document.findings import _findings_summary +from codeclone.report.document.inventory import _derive_inventory_code_counts +from codeclone.report.renderers.markdown import ( render_markdown_report_document, to_markdown_report, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _baseline_state as _sarif_baseline_state, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _location_entry as _sarif_location_entry, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _location_message as _sarif_location_message, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _logical_locations as _sarif_logical_locations, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _partial_fingerprints as _sarif_partial_fingerprints, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _primary_location_properties as _sarif_primary_location_properties, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _result_entry as _sarif_result_entry, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _result_message as _sarif_result_message, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _result_properties as _sarif_result_properties, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _rule_name as _sarif_rule_name, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _rule_spec as _sarif_rule_spec, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _scan_root_uri as _sarif_scan_root_uri, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _severity_to_level, render_sarif_report_document, to_sarif_report, ) -from codeclone.report.sarif import ( +from codeclone.report.renderers.sarif import ( _text as _sarif_text, ) -from codeclone.report.serialize import ( +from codeclone.report.renderers.text import ( _append_clone_section, _append_single_item_findings, _append_structural_findings, @@ -2354,7 +2358,7 @@ def __init__(self, *paths: str) -> None: self.items = tuple(_Occurrence(path) for path in paths) monkeypatch.setattr( - json_contract_mod, + document_common_mod, "normalize_structural_findings", lambda _findings: [_Group("/repo/struct.py", "")], ) diff --git a/tests/test_report_explain.py b/tests/test_report_explain.py index 8fe4e8c..a060ce6 100644 --- a/tests/test_report_explain.py +++ b/tests/test_report_explain.py @@ -8,7 +8,7 @@ from pathlib import Path import codeclone.report.explain as explain_mod -from codeclone.report import build_block_group_facts +from codeclone.report.explain import build_block_group_facts from tests._report_fixtures import ( repeated_block_group_key, write_repeated_assert_source, diff --git a/tests/test_security.py b/tests/test_security.py index edc73ad..18a74c9 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -15,7 +15,7 @@ from codeclone.contracts.errors import ValidationError from codeclone.core._types import MAX_FILE_SIZE from codeclone.core.worker import process_file -from codeclone.report import build_block_group_facts +from codeclone.report.explain import build_block_group_facts from codeclone.report.html import build_html_report from codeclone.scanner import iter_py_files diff --git a/uv.lock b/uv.lock index 2085084..64d288d 100644 --- a/uv.lock +++ b/uv.lock @@ -1,6 +1,19 @@ version = 1 revision = 3 requires-python = ">=3.10" +resolution-markers = [ + "python_full_version >= '3.12' and sys_platform != 'win32'", + "python_full_version < '3.12' or sys_platform == 'win32'", +] + +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] [[package]] name = "annotated-types" @@ -25,6 +38,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, ] +[[package]] +name = "async-timeout" +version = "5.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a5/ae/136395dfbfe00dfc94da3f3e136d0b13f394cba8f4841120e34226265780/async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3", size = 9274, upload-time = "2024-11-06T16:41:39.6Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/ba/e2081de779ca30d473f21f5b30e0e737c438205440784c7dfc81efc2b029/async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c", size = 6233, upload-time = "2024-11-06T16:41:37.9Z" }, +] + [[package]] name = "attrs" version = "26.1.0" @@ -43,6 +65,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b9/fa/123043af240e49752f1c4bd24da5053b6bd00cad78c2be53c0d1e8b975bc/backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34", size = 30181, upload-time = "2024-05-28T17:01:53.112Z" }, ] +[[package]] +name = "beautifulsoup4" +version = "4.14.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload-time = "2025-11-30T15:08:26.084Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" }, +] + +[[package]] +name = "bleach" +version = "6.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "webencodings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/07/18/3c8523962314be6bf4c8989c79ad9531c825210dd13a8669f6b84336e8bd/bleach-6.3.0.tar.gz", hash = "sha256:6f3b91b1c0a02bb9a78b5a454c92506aa0fdf197e1d5e114d2e00c6f64306d22", size = 203533, upload-time = "2025-10-27T17:57:39.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cd/3a/577b549de0cc09d95f11087ee63c739bba856cd3952697eec4c4bb91350a/bleach-6.3.0-py3-none-any.whl", hash = "sha256:fe10ec77c93ddf3d13a73b035abaac7a9f5e436513864ccdad516693213c65d6", size = 164437, upload-time = "2025-10-27T17:57:37.538Z" }, +] + +[package.optional-dependencies] +css = [ + { name = "tinycss2" }, +] + [[package]] name = "build" version = "1.4.3" @@ -290,6 +342,7 @@ dependencies = [ [package.optional-dependencies] dev = [ { name = "build" }, + { name = "codegraphcontext" }, { name = "mypy" }, { name = "pre-commit" }, { name = "pytest" }, @@ -304,6 +357,7 @@ mcp = [ [package.metadata] requires-dist = [ { name = "build", marker = "extra == 'dev'", specifier = ">=1.4.3" }, + { name = "codegraphcontext", marker = "extra == 'dev'", specifier = ">=0.4.2" }, { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.27.0,<2" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.20.1" }, { name = "orjson", specifier = ">=3.11.8" }, @@ -318,6 +372,37 @@ requires-dist = [ ] provides-extras = ["mcp", "dev"] +[[package]] +name = "codegraphcontext" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "falkordb" }, + { name = "falkordblite", marker = "python_full_version >= '3.12' and sys_platform != 'win32'" }, + { name = "fastapi" }, + { name = "inquirerpy" }, + { name = "kuzu", marker = "python_full_version < '3.12' or sys_platform == 'win32'" }, + { name = "nbconvert" }, + { name = "nbformat" }, + { name = "neo4j" }, + { name = "pathspec" }, + { name = "python-dotenv" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "rich" }, + { name = "stdlibs" }, + { name = "tree-sitter" }, + { name = "tree-sitter-c-sharp" }, + { name = "tree-sitter-language-pack" }, + { name = "typer" }, + { name = "uvicorn" }, + { name = "watchdog" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/de/d0e3923b7582f5b2c650d2a3aeafb07f312b813639fa33a265cdafe99651/codegraphcontext-0.4.2.tar.gz", hash = "sha256:3357cde680a67fc9176155788a931b0e927e7e76aec871680a3bbe8162a2b826", size = 5155083, upload-time = "2026-04-09T10:47:49.189Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/96/4580dba4718cf03741483d92477cb6116992b8b6963a3e4df27f72cb5a65/codegraphcontext-0.4.2-py3-none-any.whl", hash = "sha256:58ed372defd83614d1f7f1be4c4e0fa2730c67606ba94817747c2047ff8055f5", size = 5249182, upload-time = "2026-04-09T10:47:44.143Z" }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -505,6 +590,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/2a/1b016902351a523aa2bd446b50a5bc1175d7a7d1cf90fe2ef904f9b84ebc/cryptography-46.0.7-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:258514877e15963bd43b558917bc9f54cf7cf866c38aa576ebf47a77ddbc43a4", size = 3412829, upload-time = "2026-04-08T01:57:48.874Z" }, ] +[[package]] +name = "defusedxml" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, +] + [[package]] name = "distlib" version = "0.4.0" @@ -528,13 +622,74 @@ name = "exceptiongroup" version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "(python_full_version < '3.12' and sys_platform != 'win32') or (python_full_version < '3.13' and sys_platform == 'win32')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, ] +[[package]] +name = "falkordb" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, + { name = "redis" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/52/5495fcba8e21c269a605092a7c4a8b33ceecae283dbfe76fc53f7f5b50ab/falkordb-1.6.0.tar.gz", hash = "sha256:5c307d973f3fc3987a18478ebd5882f7e842d4225463a8ef5e026970ebfba8c6", size = 98157, upload-time = "2026-02-21T06:36:19.107Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/8b/59ec60885abd3b6b2b3a1e5917627c3cae656b4cff7f847c5217ec3dc952/falkordb-1.6.0-py3-none-any.whl", hash = "sha256:0f190e9d6104595fd51ece4f1e7b5d49d62cfee346d94151d7986a138fd90d89", size = 37378, upload-time = "2026-02-21T06:36:17.769Z" }, +] + +[[package]] +name = "falkordblite" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "falkordb", marker = "python_full_version >= '3.12' and sys_platform != 'win32'" }, + { name = "psutil", marker = "python_full_version >= '3.12' and sys_platform != 'win32'" }, + { name = "redis", marker = "python_full_version >= '3.12' and sys_platform != 'win32'" }, + { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6b/c8/b359a7ad9e80e2ea0a17de875b3923be25f6d8adb3118a67f608d163433d/falkordblite-0.9.0.tar.gz", hash = "sha256:14eb34f617a847927e1c55d54c02b01115ad3db87f16adae4ec54e2f540dac64", size = 18068099, upload-time = "2026-03-02T12:36:27.877Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/a0/eba50f288c35380bc93533107c8fd28397b3f24f135c3547887ec72282d4/falkordblite-0.9.0-cp312-cp312-macosx_10_13_x86_64.macosx_15_0_arm64.whl", hash = "sha256:09d8101b82acc35d8b7097bb6ec7243233e7aa48ad13edaaf507b12439b29c85", size = 14930337, upload-time = "2026-03-02T12:36:00.718Z" }, + { url = "https://files.pythonhosted.org/packages/1d/1d/451c45e3eaa6627d17e38844a1e55f5dcf4b4d735441b9926c5d9ee78374/falkordblite-0.9.0-cp312-cp312-manylinux_2_39_aarch64.whl", hash = "sha256:e473624290cfb06f986662121bdeb3c39aaf96c2e065158a2f5f1554b2a24ce3", size = 28176214, upload-time = "2026-03-02T12:36:03.24Z" }, + { url = "https://files.pythonhosted.org/packages/01/d4/c466e4bdbb22a604ac6d48427290206b85af6982b162f8a8f56aca22a968/falkordblite-0.9.0-cp312-cp312-manylinux_2_39_x86_64.whl", hash = "sha256:1a8643adc5c8fbac1081bd0840e4ee44369213e6ddb44588561f47da140f3e0c", size = 29610627, upload-time = "2026-03-02T12:36:06.251Z" }, + { url = "https://files.pythonhosted.org/packages/93/fb/65823b693fa3944de850b5b52ec6808c2b272b61a5105a28428ae6b89baa/falkordblite-0.9.0-cp313-cp313-macosx_10_13_x86_64.macosx_15_0_arm64.whl", hash = "sha256:e181aa80fa45efe05b64b717d78cb1ba728885d28ca94cf25afe60f3db69a2ee", size = 14930332, upload-time = "2026-03-02T12:36:08.985Z" }, + { url = "https://files.pythonhosted.org/packages/db/ee/6d84ad30e0f7e70a25791aaea32abde2a2cf893f451b307ac9e5b18b9db4/falkordblite-0.9.0-cp313-cp313-manylinux_2_39_aarch64.whl", hash = "sha256:dc3c1c96413c410eddea63730071bfdf46ddd7b6bf08ab7a112bf8911bd37a15", size = 28176213, upload-time = "2026-03-02T12:36:11.745Z" }, + { url = "https://files.pythonhosted.org/packages/8a/79/e132647168c135d1c92b67203d388a340ac2158ac77281d55021ae6adabd/falkordblite-0.9.0-cp313-cp313-manylinux_2_39_x86_64.whl", hash = "sha256:ac4fab5c9e3b2cd4991b4e7f214c07c4d682869182ae06ff8a52c092ae64208f", size = 29610624, upload-time = "2026-03-02T12:36:14.474Z" }, + { url = "https://files.pythonhosted.org/packages/2c/cf/5132ae79444a4cc5947e3d44d39e82ca8c2873bcea7ebd81c407a3366cdd/falkordblite-0.9.0-cp314-cp314-macosx_10_15_x86_64.macosx_15_0_arm64.whl", hash = "sha256:a594daeef3467e8df4a7381dff4e61560a2309a09558c115b045b7bdd049e63c", size = 14930341, upload-time = "2026-03-02T12:36:18.41Z" }, + { url = "https://files.pythonhosted.org/packages/c4/35/6a51b3f5ea0263294a7fd5c4e1e24e229374dc245520fd9358991e271349/falkordblite-0.9.0-cp314-cp314-manylinux_2_39_aarch64.whl", hash = "sha256:8395d20c520b58d2cbcf72497f5a18d21a51dac13cc683ed31323468d7f27c63", size = 28176212, upload-time = "2026-03-02T12:36:21.662Z" }, + { url = "https://files.pythonhosted.org/packages/84/ab/befc9a1bbb8fc1796fe57a2d91559e17df0207f1adc6d382bc9db27145a2/falkordblite-0.9.0-cp314-cp314-manylinux_2_39_x86_64.whl", hash = "sha256:dd448872b4a59b3df12fd8503938b27d9841be759b5bc41115358edea4c1513c", size = 29610625, upload-time = "2026-03-02T12:36:25.447Z" }, +] + +[[package]] +name = "fastapi" +version = "0.136.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "pydantic" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4e/d9/e66315807e41e69e7f6a1b42a162dada2f249c5f06ad3f1a95f84ab336ef/fastapi-0.136.0.tar.gz", hash = "sha256:cf08e067cc66e106e102d9ba659463abfac245200752f8a5b7b1e813de4ff73e", size = 396607, upload-time = "2026-04-16T11:47:13.623Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/a3/0bd5f0cdb0bbc92650e8dc457e9250358411ee5d1b65e42b6632387daf81/fastapi-0.136.0-py3-none-any.whl", hash = "sha256:8793d44ec7378e2be07f8a013cf7f7aa47d6327d0dfe9804862688ec4541a6b4", size = 117556, upload-time = "2026-04-16T11:47:11.922Z" }, +] + +[[package]] +name = "fastjsonschema" +version = "2.21.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/b5/23b216d9d985a956623b6bd12d4086b60f0059b27799f23016af04a74ea1/fastjsonschema-2.21.2.tar.gz", hash = "sha256:b1eb43748041c880796cd077f1a07c3d94e93ae84bba5ed36800a33554ae05de", size = 374130, upload-time = "2025-08-14T18:49:36.666Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/a8/20d0723294217e47de6d9e2e40fd4a9d2f7c4b6ef974babd482a59743694/fastjsonschema-2.21.2-py3-none-any.whl", hash = "sha256:1c797122d0a86c5cace2e54bf4e819c36223b552017172f32c5c024a6b77e463", size = 24024, upload-time = "2025-08-14T18:49:34.776Z" }, +] + [[package]] name = "filelock" version = "3.29.0" @@ -641,6 +796,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] +[[package]] +name = "inquirerpy" +version = "0.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pfzy" }, + { name = "prompt-toolkit" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/64/73/7570847b9da026e07053da3bbe2ac7ea6cde6bb2cbd3c7a5a950fa0ae40b/InquirerPy-0.3.4.tar.gz", hash = "sha256:89d2ada0111f337483cb41ae31073108b2ec1e618a49d7110b0d7ade89fc197e", size = 44431, upload-time = "2022-06-27T23:11:20.598Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/ff/3b59672c47c6284e8005b42e84ceba13864aa0f39f067c973d1af02f5d91/InquirerPy-0.3.4-py3-none-any.whl", hash = "sha256:c65fdfbac1fa00e3ee4fb10679f4d3ed7a012abf4833910e63c295827fe2a7d4", size = 67677, upload-time = "2022-06-27T23:11:17.723Z" }, +] + [[package]] name = "jaraco-classes" version = "3.4.0" @@ -686,6 +854,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" }, ] +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + [[package]] name = "jsonschema" version = "4.26.0" @@ -713,6 +893,44 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, ] +[[package]] +name = "jupyter-client" +version = "8.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-core" }, + { name = "python-dateutil" }, + { name = "pyzmq" }, + { name = "tornado" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/e4/ba649102a3bc3fbca54e7239fb924fd434c766f855693d86de0b1f2bec81/jupyter_client-8.8.0.tar.gz", hash = "sha256:d556811419a4f2d96c869af34e854e3f059b7cc2d6d01a9cd9c85c267691be3e", size = 348020, upload-time = "2026-01-08T13:55:47.938Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/0b/ceb7694d864abc0a047649aec263878acb9f792e1fec3e676f22dc9015e3/jupyter_client-8.8.0-py3-none-any.whl", hash = "sha256:f93a5b99c5e23a507b773d3a1136bd6e16c67883ccdbd9a829b0bbdb98cd7d7a", size = 107371, upload-time = "2026-01-08T13:55:45.562Z" }, +] + +[[package]] +name = "jupyter-core" +version = "5.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "platformdirs" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/49/9d1284d0dc65e2c757b74c6687b6d319b02f822ad039e5c512df9194d9dd/jupyter_core-5.9.1.tar.gz", hash = "sha256:4d09aaff303b9566c3ce657f580bd089ff5c91f5f89cf7d8846c3cdf465b5508", size = 89814, upload-time = "2025-10-16T19:19:18.444Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/e7/80988e32bf6f73919a113473a604f5a8f09094de312b9d52b79c2df7612b/jupyter_core-5.9.1-py3-none-any.whl", hash = "sha256:ebf87fdc6073d142e114c72c9e29a9d7ca03fad818c5d300ce2adc1fb0743407", size = 29032, upload-time = "2025-10-16T19:19:16.783Z" }, +] + +[[package]] +name = "jupyterlab-pygments" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/90/51/9187be60d989df97f5f0aba133fa54e7300f17616e065d1ada7d7646b6d6/jupyterlab_pygments-0.3.0.tar.gz", hash = "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d", size = 512900, upload-time = "2023-11-23T09:26:37.44Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/dd/ead9d8ea85bf202d90cc513b533f9c363121c7792674f78e0d8a854b63b4/jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780", size = 15884, upload-time = "2023-11-23T09:26:34.325Z" }, +] + [[package]] name = "keyring" version = "25.7.0" @@ -731,6 +949,40 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" }, ] +[[package]] +name = "kuzu" +version = "0.11.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/0c/f141a81485729a072dc527b474e7580d5632309c68ad1a5aa6ed9ac45387/kuzu-0.11.3.tar.gz", hash = "sha256:e7bea3ca30c4bb462792eedcaa7f2125c800b243bb4a872e1eedc16917c1967a", size = 19430620, upload-time = "2025-10-10T13:36:54.984Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/3d/830489670618ceb6094b6ddc322e3e6457f5cf4fd6916526b7b8cd6e2c1f/kuzu-0.11.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3d93131f3dc7b647da7a5124c975dc2cc207afc38a9fbb83badc23d2e25dbfec", size = 4093610, upload-time = "2025-10-10T13:35:47.691Z" }, + { url = "https://files.pythonhosted.org/packages/82/1c/7cf246a66a287d466c6cc2f01d5f4e12bd6d23350217c592dc6b51988ed5/kuzu-0.11.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:4f021a6b4e31867a0b49e369431c8f05ef4244cea1cb337c76114649975f56e3", size = 4517385, upload-time = "2025-10-10T13:35:49.471Z" }, + { url = "https://files.pythonhosted.org/packages/d6/35/ed37f146225167ed7a8573a09e2ee4ebafd0111b815cb0532dc823069729/kuzu-0.11.3-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4ac7d2a81d7370dc12400431a3331c45954340b96439fcf2b730794cf670684a", size = 6795559, upload-time = "2025-10-10T13:35:50.977Z" }, + { url = "https://files.pythonhosted.org/packages/03/9f/2c8e3cf777aba73d515f73c9491ba65fbebb7852f41abedfc70a26bec229/kuzu-0.11.3-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8621bb470390f95810c7e79e36a670e9a2f27e189ef429f3df892626de63a652", size = 7616362, upload-time = "2025-10-10T13:35:52.717Z" }, + { url = "https://files.pythonhosted.org/packages/fb/02/22789dc00abb34206e6d96b0ae0eb1884b524fb7fdffd5121fc3178a9ed6/kuzu-0.11.3-cp310-cp310-win_amd64.whl", hash = "sha256:126efba7f8a504614f5b307bcb94ac0e6d9f87f7e34b4334ca769ad6cfe2215e", size = 4712323, upload-time = "2025-10-10T13:35:54.44Z" }, + { url = "https://files.pythonhosted.org/packages/46/1b/65d3974551f10d100ca4682b1e4beff23a9c5b7555c6ea552a3855555cc0/kuzu-0.11.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:836739dced9f61912a80bb7ad1df2159cef456c5b5cfe92f15394b9c51a785cb", size = 4094223, upload-time = "2025-10-10T13:35:56.023Z" }, + { url = "https://files.pythonhosted.org/packages/a9/e8/0efbc4812796468ca47273fc53c21c63706bc5f7bc4fa3459918d323ced8/kuzu-0.11.3-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:8d4f0e4085d3a85b0e7a8e337082bec6a3cf8c92c9a35209ffe53b2ed212ab08", size = 4519024, upload-time = "2025-10-10T13:35:57.665Z" }, + { url = "https://files.pythonhosted.org/packages/dd/b2/07e81d9f1858a592d1ddc1f02a483718cdfac3315bbca019b13b2ddd8c3e/kuzu-0.11.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1419ada227d15107c2b2536c66ae715c59876585d434b1918c17598956dcd5f7", size = 6796096, upload-time = "2025-10-10T13:35:59.174Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e0/8ea0d289ef6840fbd00e642657ed07d03690a97a01676e2b79d5c3e9ddf8/kuzu-0.11.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef094001d319804fcc8eb72775a184f1119d84af1bace29581a003bd806c36cd", size = 7616892, upload-time = "2025-10-10T13:36:00.866Z" }, + { url = "https://files.pythonhosted.org/packages/a3/d6/9ea65a74c9140e13d7f68dd9d8f95f42b55b9d7750e7a20df3d9b2f09734/kuzu-0.11.3-cp311-cp311-win_amd64.whl", hash = "sha256:eb0858ec8084b10badeae37e730fbe0c3b2846dfe3508001d123087de262efbb", size = 4712696, upload-time = "2025-10-10T13:36:02.607Z" }, + { url = "https://files.pythonhosted.org/packages/64/88/ed193fd0ddfdbdde6c79e96b96df3b760fe48b2626e7151d81a1ed90fd9f/kuzu-0.11.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d865ca31506867cf1ccf50c094c44de96de94bc77ffb350bfcaca0e4c5e469da", size = 4093637, upload-time = "2025-10-10T13:36:04.206Z" }, + { url = "https://files.pythonhosted.org/packages/d6/6d/06e02828b78297d6d99ff3dfb0ab7b5ec5d075053aae33b53189437bbb66/kuzu-0.11.3-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:109372bc16ce6724f88e0312bc686e34145e330d69b163b22ba92f4d3d96b48f", size = 4520482, upload-time = "2025-10-10T13:36:06.302Z" }, + { url = "https://files.pythonhosted.org/packages/72/d5/0939a953860a8b373bef7b8a66a4571b27ff9faeb22672d2cd2cf3b6ba15/kuzu-0.11.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d6274da6c470c001b7d332ec78a076395b009c2f267914640884fd6fa78bf47d", size = 6795398, upload-time = "2025-10-10T13:36:08Z" }, + { url = "https://files.pythonhosted.org/packages/7d/5d/8e3dfb89aa3f70f63aa283c523f2dd2ac90a1b3ed990643e3a89909236f9/kuzu-0.11.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1bb3b833ca2d1d919423cb3e0150592c2587562ab85259277f622e6f06e0b487", size = 7615389, upload-time = "2025-10-10T13:36:09.809Z" }, + { url = "https://files.pythonhosted.org/packages/6c/19/c8e93185d6142f01b2e6daec4ad537dfd32afd1f69894889769b725b08c1/kuzu-0.11.3-cp312-cp312-win_amd64.whl", hash = "sha256:605909f744763775b8647014a03526d7f928a7b5a62a8b8c1d1e7bbdaf9dbb6c", size = 4714355, upload-time = "2025-10-10T13:36:11.527Z" }, + { url = "https://files.pythonhosted.org/packages/c9/e4/2c0e222a9b0605745234fec2774a25dd2e472699931f683f15d28ab8c076/kuzu-0.11.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e20ab3e3b20ccf75219872feb86582f959e313eeb59f51131adf4c91ebfabe30", size = 4093664, upload-time = "2025-10-10T13:36:13.116Z" }, + { url = "https://files.pythonhosted.org/packages/88/05/3020ed9a0a7b492597f211f805233b77ef37266a23c27efc40bb7cb37402/kuzu-0.11.3-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:054479d3ce71410b8af2f5fa6aa37883db7fea5b25606af8d3bd7cf717aa5395", size = 4520498, upload-time = "2025-10-10T13:36:14.933Z" }, + { url = "https://files.pythonhosted.org/packages/0f/66/1a502700a7f2863f8f60621a412a7074d7eda9e92f18fd1d8d86905aa4d3/kuzu-0.11.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:143a37f1ae38b6b4337ccfcf42fa4f779a897223fff9c6c29f1a5a5a86911300", size = 6795804, upload-time = "2025-10-10T13:36:16.55Z" }, + { url = "https://files.pythonhosted.org/packages/79/c2/1ea8cdc05946cb5906a7ebb451d7268e501ebb51ebecc0437969f8c07450/kuzu-0.11.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:548bfb3045d89bce1fbe89f4a890d636789671abfa80cbde2054c671e6069133", size = 7615668, upload-time = "2025-10-10T13:36:18.882Z" }, + { url = "https://files.pythonhosted.org/packages/d1/c3/336d6181f8f50126cf3d7186b3c5479f9f49d973145f79bed45cf87a9bb7/kuzu-0.11.3-cp313-cp313-win_amd64.whl", hash = "sha256:87bf6c369f182a59e5b8a38b3ca288b90fab2827577d9b0d2170a202c42bc8f5", size = 4714372, upload-time = "2025-10-10T13:36:20.877Z" }, + { url = "https://files.pythonhosted.org/packages/94/db/e7e6cada6dc924eb8939bd35c5f724f5de4fc430a64d6d9e71b75cd0c271/kuzu-0.11.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb5c165bc5838059e498e8325939fc6bac075e1941157e8df6ebdd710135d43b", size = 6798556, upload-time = "2025-10-10T13:36:22.472Z" }, + { url = "https://files.pythonhosted.org/packages/cf/68/a0fa02134cb255c80b5ed5bb5f6130fbbc75a8ae8be4fd6ea6eb6bc8014b/kuzu-0.11.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:88c73dfd3d6a1fb374031050b725236fa9dd9a95424b09b20086a3d274bed51f", size = 7620378, upload-time = "2025-10-10T13:36:24.453Z" }, + { url = "https://files.pythonhosted.org/packages/d6/b7/2a4569984995f09476dbf1ef2e0a7298aa9fdb8896f2e8195d80e11786f4/kuzu-0.11.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9d2752a9e37adda6aef3bf041932ae3a1cf74ca7e893bbbacdd5e62b3ac6f8c2", size = 6795649, upload-time = "2025-10-10T13:36:26.15Z" }, + { url = "https://files.pythonhosted.org/packages/77/13/df6e06a7d7506743c3a6cfbe50ee3f9d3fc58228e2a2fcbe7e74e7c17b00/kuzu-0.11.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86be7d113e4e2c6761b1701079af8aeffc04c6981517f2d6aa393e883cc46036", size = 7615882, upload-time = "2025-10-10T13:36:28.215Z" }, + { url = "https://files.pythonhosted.org/packages/32/85/c52c3b167edcc67da3b8788a20a2fb5b4f045060cbe1aed6121ce3ce83d3/kuzu-0.11.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d72ebd88e231b562e7a60ff88d200825d53e78a681bddd7f8d77b78126a5060c", size = 6798657, upload-time = "2025-10-10T13:36:29.935Z" }, + { url = "https://files.pythonhosted.org/packages/06/be/5b4ff168718165c2ff5848ab79e22ecce72ad00522afee6820d390cb0753/kuzu-0.11.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64c7ec822906bdee154eb38d93e64f184d8f94b30bbeaceaa252725f2b9efab3", size = 7620394, upload-time = "2025-10-10T13:36:31.69Z" }, +] + [[package]] name = "librt" version = "0.9.0" @@ -828,6 +1080,91 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, ] +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e8/4b/3541d44f3937ba468b75da9eebcae497dcf67adb65caa16760b0a6807ebb/markupsafe-3.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559", size = 11631, upload-time = "2025-09-27T18:36:05.558Z" }, + { url = "https://files.pythonhosted.org/packages/98/1b/fbd8eed11021cabd9226c37342fa6ca4e8a98d8188a8d9b66740494960e4/markupsafe-3.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419", size = 12057, upload-time = "2025-09-27T18:36:07.165Z" }, + { url = "https://files.pythonhosted.org/packages/40/01/e560d658dc0bb8ab762670ece35281dec7b6c1b33f5fbc09ebb57a185519/markupsafe-3.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ba88449deb3de88bd40044603fafffb7bc2b055d626a330323a9ed736661695", size = 22050, upload-time = "2025-09-27T18:36:08.005Z" }, + { url = "https://files.pythonhosted.org/packages/af/cd/ce6e848bbf2c32314c9b237839119c5a564a59725b53157c856e90937b7a/markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591", size = 20681, upload-time = "2025-09-27T18:36:08.881Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2a/b5c12c809f1c3045c4d580b035a743d12fcde53cf685dbc44660826308da/markupsafe-3.0.3-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c0c0b3ade1c0b13b936d7970b1d37a57acde9199dc2aecc4c336773e1d86049c", size = 20705, upload-time = "2025-09-27T18:36:10.131Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e3/9427a68c82728d0a88c50f890d0fc072a1484de2f3ac1ad0bfc1a7214fd5/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0303439a41979d9e74d18ff5e2dd8c43ed6c6001fd40e5bf2e43f7bd9bbc523f", size = 21524, upload-time = "2025-09-27T18:36:11.324Z" }, + { url = "https://files.pythonhosted.org/packages/bc/36/23578f29e9e582a4d0278e009b38081dbe363c5e7165113fad546918a232/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:d2ee202e79d8ed691ceebae8e0486bd9a2cd4794cec4824e1c99b6f5009502f6", size = 20282, upload-time = "2025-09-27T18:36:12.573Z" }, + { url = "https://files.pythonhosted.org/packages/56/21/dca11354e756ebd03e036bd8ad58d6d7168c80ce1fe5e75218e4945cbab7/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:177b5253b2834fe3678cb4a5f0059808258584c559193998be2601324fdeafb1", size = 20745, upload-time = "2025-09-27T18:36:13.504Z" }, + { url = "https://files.pythonhosted.org/packages/87/99/faba9369a7ad6e4d10b6a5fbf71fa2a188fe4a593b15f0963b73859a1bbd/markupsafe-3.0.3-cp310-cp310-win32.whl", hash = "sha256:2a15a08b17dd94c53a1da0438822d70ebcd13f8c3a95abe3a9ef9f11a94830aa", size = 14571, upload-time = "2025-09-27T18:36:14.779Z" }, + { url = "https://files.pythonhosted.org/packages/d6/25/55dc3ab959917602c96985cb1253efaa4ff42f71194bddeb61eb7278b8be/markupsafe-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:c4ffb7ebf07cfe8931028e3e4c85f0357459a3f9f9490886198848f4fa002ec8", size = 15056, upload-time = "2025-09-27T18:36:16.125Z" }, + { url = "https://files.pythonhosted.org/packages/d0/9e/0a02226640c255d1da0b8d12e24ac2aa6734da68bff14c05dd53b94a0fc3/markupsafe-3.0.3-cp310-cp310-win_arm64.whl", hash = "sha256:e2103a929dfa2fcaf9bb4e7c091983a49c9ac3b19c9061b6d5427dd7d14d81a1", size = 13932, upload-time = "2025-09-27T18:36:17.311Z" }, + { url = "https://files.pythonhosted.org/packages/08/db/fefacb2136439fc8dd20e797950e749aa1f4997ed584c62cfb8ef7c2be0e/markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad", size = 11631, upload-time = "2025-09-27T18:36:18.185Z" }, + { url = "https://files.pythonhosted.org/packages/e1/2e/5898933336b61975ce9dc04decbc0a7f2fee78c30353c5efba7f2d6ff27a/markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a", size = 12058, upload-time = "2025-09-27T18:36:19.444Z" }, + { url = "https://files.pythonhosted.org/packages/1d/09/adf2df3699d87d1d8184038df46a9c80d78c0148492323f4693df54e17bb/markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50", size = 24287, upload-time = "2025-09-27T18:36:20.768Z" }, + { url = "https://files.pythonhosted.org/packages/30/ac/0273f6fcb5f42e314c6d8cd99effae6a5354604d461b8d392b5ec9530a54/markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf", size = 22940, upload-time = "2025-09-27T18:36:22.249Z" }, + { url = "https://files.pythonhosted.org/packages/19/ae/31c1be199ef767124c042c6c3e904da327a2f7f0cd63a0337e1eca2967a8/markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f", size = 21887, upload-time = "2025-09-27T18:36:23.535Z" }, + { url = "https://files.pythonhosted.org/packages/b2/76/7edcab99d5349a4532a459e1fe64f0b0467a3365056ae550d3bcf3f79e1e/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a", size = 23692, upload-time = "2025-09-27T18:36:24.823Z" }, + { url = "https://files.pythonhosted.org/packages/a4/28/6e74cdd26d7514849143d69f0bf2399f929c37dc2b31e6829fd2045b2765/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115", size = 21471, upload-time = "2025-09-27T18:36:25.95Z" }, + { url = "https://files.pythonhosted.org/packages/62/7e/a145f36a5c2945673e590850a6f8014318d5577ed7e5920a4b3448e0865d/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a", size = 22923, upload-time = "2025-09-27T18:36:27.109Z" }, + { url = "https://files.pythonhosted.org/packages/0f/62/d9c46a7f5c9adbeeeda52f5b8d802e1094e9717705a645efc71b0913a0a8/markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19", size = 14572, upload-time = "2025-09-27T18:36:28.045Z" }, + { url = "https://files.pythonhosted.org/packages/83/8a/4414c03d3f891739326e1783338e48fb49781cc915b2e0ee052aa490d586/markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01", size = 15077, upload-time = "2025-09-27T18:36:29.025Z" }, + { url = "https://files.pythonhosted.org/packages/35/73/893072b42e6862f319b5207adc9ae06070f095b358655f077f69a35601f0/markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c", size = 13876, upload-time = "2025-09-27T18:36:29.954Z" }, + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, +] + [[package]] name = "mcp" version = "1.27.0" @@ -862,6 +1199,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "mistune" +version = "3.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/55/d01f0c4b45ade6536c51170b9043db8b2ec6ddf4a35c7ea3f5f559ac935b/mistune-3.2.0.tar.gz", hash = "sha256:708487c8a8cdd99c9d90eb3ed4c3ed961246ff78ac82f03418f5183ab70e398a", size = 95467, upload-time = "2025-12-23T11:36:34.994Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/f7/4a5e785ec9fbd65146a27b6b70b6cdc161a66f2024e4b04ac06a67f5578b/mistune-3.2.0-py3-none-any.whl", hash = "sha256:febdc629a3c78616b94393c6580551e0e34cc289987ec6c35ed3f4be42d0eee1", size = 53598, upload-time = "2025-12-23T11:36:33.211Z" }, +] + [[package]] name = "more-itertools" version = "11.0.2" @@ -938,6 +1287,73 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, ] +[[package]] +name = "nbclient" +version = "0.10.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-client" }, + { name = "jupyter-core" }, + { name = "nbformat" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/91/1c1d5a4b9a9ebba2b4e32b8c852c2975c872aec1fe42ab5e516b2cecd193/nbclient-0.10.4.tar.gz", hash = "sha256:1e54091b16e6da39e297b0ece3e10f6f29f4ac4e8ee515d29f8a7099bd6553c9", size = 62554, upload-time = "2025-12-23T07:45:46.369Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/83/a0/5b0c2f11142ed1dddec842457d3f65eaf71a0080894eb6f018755b319c3a/nbclient-0.10.4-py3-none-any.whl", hash = "sha256:9162df5a7373d70d606527300a95a975a47c137776cd942e52d9c7e29ff83440", size = 25465, upload-time = "2025-12-23T07:45:44.51Z" }, +] + +[[package]] +name = "nbconvert" +version = "7.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "bleach", extra = ["css"] }, + { name = "defusedxml" }, + { name = "jinja2" }, + { name = "jupyter-core" }, + { name = "jupyterlab-pygments" }, + { name = "markupsafe" }, + { name = "mistune" }, + { name = "nbclient" }, + { name = "nbformat" }, + { name = "packaging" }, + { name = "pandocfilters" }, + { name = "pygments" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/01/b1/708e53fe2e429c103c6e6e159106bcf0357ac41aa4c28772bd8402339051/nbconvert-7.17.1.tar.gz", hash = "sha256:34d0d0a7e73ce3cbab6c5aae8f4f468797280b01fd8bd2ca746da8569eddd7d2", size = 865311, upload-time = "2026-04-08T00:44:14.914Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/f8/bb0a9d5f46819c821dc1f004aa2cc29b1d91453297dbf5ff20470f00f193/nbconvert-7.17.1-py3-none-any.whl", hash = "sha256:aa85c087b435e7bf1ffd03319f658e285f2b89eccab33bc1ba7025495ab3e7c8", size = 261927, upload-time = "2026-04-08T00:44:12.845Z" }, +] + +[[package]] +name = "nbformat" +version = "5.10.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fastjsonschema" }, + { name = "jsonschema" }, + { name = "jupyter-core" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/fd/91545e604bc3dad7dca9ed03284086039b294c6b3d75c0d2fa45f9e9caf3/nbformat-5.10.4.tar.gz", hash = "sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a", size = 142749, upload-time = "2024-04-04T11:20:37.371Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b", size = 78454, upload-time = "2024-04-04T11:20:34.895Z" }, +] + +[[package]] +name = "neo4j" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytz" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1b/01/d6ce65e4647f6cb2b9cca3b813978f7329b54b4e36660aaec1ddf0ccce7a/neo4j-6.1.0.tar.gz", hash = "sha256:b5dde8c0d8481e7b6ae3733569d990dd3e5befdc5d452f531ad1884ed3500b84", size = 239629, upload-time = "2026-01-12T11:27:34.777Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/70/5c/ee71e2dd955045425ef44283f40ba1da67673cf06404916ca2950ac0cd39/neo4j-6.1.0-py3-none-any.whl", hash = "sha256:3bd93941f3a3559af197031157220af9fd71f4f93a311db687bd69ffa417b67d", size = 325326, upload-time = "2026-01-12T11:27:33.196Z" }, +] + [[package]] name = "nh3" version = "0.3.4" @@ -1071,6 +1487,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7a/c2/920ef838e2f0028c8262f16101ec09ebd5969864e5a64c4c05fad0617c56/packaging-26.1-py3-none-any.whl", hash = "sha256:5d9c0669c6285e491e0ced2eee587eaf67b670d94a19e94e3984a481aba6802f", size = 95831, upload-time = "2026-04-14T21:12:47.56Z" }, ] +[[package]] +name = "pandocfilters" +version = "1.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/70/6f/3dd4940bbe001c06a65f88e36bad298bc7a0de5036115639926b0c5c0458/pandocfilters-1.5.1.tar.gz", hash = "sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e", size = 8454, upload-time = "2024-01-18T20:08:13.726Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/af/4fbc8cab944db5d21b7e2a5b8e9211a03a79852b1157e2c102fcc61ac440/pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc", size = 8663, upload-time = "2024-01-18T20:08:11.28Z" }, +] + [[package]] name = "pathspec" version = "1.0.4" @@ -1080,6 +1505,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" }, ] +[[package]] +name = "pfzy" +version = "0.3.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d9/5a/32b50c077c86bfccc7bed4881c5a2b823518f5450a30e639db5d3711952e/pfzy-0.3.4.tar.gz", hash = "sha256:717ea765dd10b63618e7298b2d98efd819e0b30cd5905c9707223dceeb94b3f1", size = 8396, upload-time = "2022-01-28T02:26:17.946Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8c/d7/8ff98376b1acc4503253b685ea09981697385ce344d4e3935c2af49e044d/pfzy-0.3.4-py3-none-any.whl", hash = "sha256:5f50d5b2b3207fa72e7ec0ef08372ef652685470974a107d0d4999fc5a903a96", size = 8537, upload-time = "2022-01-28T02:26:16.047Z" }, +] + [[package]] name = "platformdirs" version = "4.9.6" @@ -1114,6 +1548,40 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437, upload-time = "2025-12-16T21:14:32.409Z" }, ] +[[package]] +name = "prompt-toolkit" +version = "3.0.52" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wcwidth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/96/06e01a7b38dce6fe1db213e061a4602dd6032a8a97ef6c1a862537732421/prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855", size = 434198, upload-time = "2025-08-27T15:24:02.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" }, +] + +[[package]] +name = "psutil" +version = "7.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" }, + { url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" }, + { url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" }, + { url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" }, + { url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" }, + { url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" }, + { url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" }, + { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" }, + { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" }, + { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" }, + { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" }, + { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" }, + { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" }, +] + [[package]] name = "pycparser" version = "3.0" @@ -1125,7 +1593,7 @@ wheels = [ [[package]] name = "pydantic" -version = "2.13.2" +version = "2.13.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-types" }, @@ -1133,139 +1601,139 @@ dependencies = [ { name = "typing-extensions" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/09/e5/06d23afac9973109d1e3c8ad38e1547a12e860610e327c05ee686827dc37/pydantic-2.13.2.tar.gz", hash = "sha256:b418196607e61081c3226dcd4f0672f2a194828abb9109e9cfb84026564df2d1", size = 843836, upload-time = "2026-04-17T09:31:59.636Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d9/e4/40d09941a2cebcb20609b86a559817d5b9291c49dd6f8c87e5feffbe703a/pydantic-2.13.3.tar.gz", hash = "sha256:af09e9d1d09f4e7fe37145c1f577e1d61ceb9a41924bf0094a36506285d0a84d", size = 844068, upload-time = "2026-04-20T14:46:43.632Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/77/ca/b45c378e6e8d0b90577288b533e04e95b7afd61bb1d51b6c263176435489/pydantic-2.13.2-py3-none-any.whl", hash = "sha256:a525087f4c03d7e7456a3de89b64cd693d2229933bb1068b9af6befd5563694e", size = 471947, upload-time = "2026-04-17T09:31:57.541Z" }, + { url = "https://files.pythonhosted.org/packages/f3/0a/fd7d723f8f8153418fb40cf9c940e82004fce7e987026b08a68a36dd3fe7/pydantic-2.13.3-py3-none-any.whl", hash = "sha256:6db14ac8dfc9a1e57f87ea2c0de670c251240f43cb0c30a5130e9720dc612927", size = 471981, upload-time = "2026-04-20T14:46:41.402Z" }, ] [[package]] name = "pydantic-core" -version = "2.46.2" +version = "2.46.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/43/bb/4742f05b739b2478459bb16fa8470549518c802e06ddcf3f106c5081315e/pydantic_core-2.46.2.tar.gz", hash = "sha256:37bb079f9ee3f1a519392b73fda2a96379b31f2013c6b467fe693e7f2987f596", size = 471269, upload-time = "2026-04-17T09:10:07.017Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/f2/98f37e836c5ba0335432768e0d8645e6f50a3c838b48a74d9256256784fc/pydantic_core-2.46.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:160ef93541f4f84e3e5068e6c1f64d8fd6f57586e5853d609b467d3333f8146a", size = 2108178, upload-time = "2026-04-17T09:10:24.689Z" }, - { url = "https://files.pythonhosted.org/packages/55/69/975458de8e5453322cfc57d6c7029c3e66d9e7a4389c53ddd5ad02d5e5da/pydantic_core-2.46.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a9124b63f4f40a12a0666df57450b4c24b98407ff74349221b869ec085a5d8e", size = 1949232, upload-time = "2026-04-17T09:11:39.536Z" }, - { url = "https://files.pythonhosted.org/packages/94/8d/938175e6e82d051ac4644765680db06571d7e106a42f760da09bd90f6525/pydantic_core-2.46.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de12004a7da7f1eb67ece37439a5a23a915636085dd042176fda362e006e6940", size = 1974741, upload-time = "2026-04-17T09:13:01.922Z" }, - { url = "https://files.pythonhosted.org/packages/f2/38/7329f8ac5c732bddf15f939c2add40b95170e0ecca5ef124c12def3f78ba/pydantic_core-2.46.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a070c7769fec277409ad0b3d55b2f0a3703a6f00cf5031fe93090f155bf56382", size = 2041905, upload-time = "2026-04-17T09:11:11.94Z" }, - { url = "https://files.pythonhosted.org/packages/99/2c/47cfd069937ee5cbc0d9e18fa9795c8f80c49a6b4fc777d4cd870f2ade7b/pydantic_core-2.46.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:41d701bb34f81f0b11c724cc544b9a10b26a28f4d0d1197f2037c91225708706", size = 2222703, upload-time = "2026-04-17T09:10:31.196Z" }, - { url = "https://files.pythonhosted.org/packages/83/b0/7ed83ca8cd92c99bcab90cf42ed953723fbc19d8a20c8c12bb68c51febc1/pydantic_core-2.46.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19631e7350b7a574fb6b6db222f4b17e8bd31803074b3307d07df62379d2b2e4", size = 2276317, upload-time = "2026-04-17T09:09:53.263Z" }, - { url = "https://files.pythonhosted.org/packages/85/70/50b1b62990996e7916aae2852b29cbf3ecc3fdae78209eb284cd61e2c918/pydantic_core-2.46.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48b1059e4f2a6ec3e41983148eb1eec5ef9fa3a80bbc4ac0893ac76b115fe039", size = 2092152, upload-time = "2026-04-17T09:10:44.683Z" }, - { url = "https://files.pythonhosted.org/packages/c1/51/a062864e6b34ada7e343ad9ed29368e495620a8ef1c009b47a68b46e1634/pydantic_core-2.46.2-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:df73724fce8ad53c670358c905b37930bd7b9d92e57db640a65c53b2706eee00", size = 2118091, upload-time = "2026-04-17T09:10:05.083Z" }, - { url = "https://files.pythonhosted.org/packages/07/e0/fcc97c4d0319615dc0b5b132b420904639652f8514e9c76482acb70ea1d4/pydantic_core-2.46.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a0891a9be0def16fb320af21a198ece052eed72bf44d73d8ff43f702bd26fd6b", size = 2174304, upload-time = "2026-04-17T09:11:00.54Z" }, - { url = "https://files.pythonhosted.org/packages/00/52/28f53796ca74b7e3dd45938f300517f04970e985ad600d0d0f36a11378bd/pydantic_core-2.46.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2ca790779aa1cba1329b8dc42ccebada441d9ac1d932de980183d544682c646d", size = 2181444, upload-time = "2026-04-17T09:11:45.442Z" }, - { url = "https://files.pythonhosted.org/packages/22/49/164d5d3a7356d2607a72e77264a3b252a7c7d9362a81fc9df47bef7ae3aa/pydantic_core-2.46.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:6b865eb702c3af71cf7331919a787563ce2413f7a54ef49ec6709a01b4f22ce6", size = 2328611, upload-time = "2026-04-17T09:10:08.574Z" }, - { url = "https://files.pythonhosted.org/packages/6b/77/6266bb3b79c27b533e5ee02c1e3da5848872112178880cc5006a84e857ac/pydantic_core-2.46.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:631bec5f951a30a4b332b4a57d0cdd5a2c8187eb71301f966425f2e54a697855", size = 2351070, upload-time = "2026-04-17T09:13:34.92Z" }, - { url = "https://files.pythonhosted.org/packages/10/7f/d4233852d16d8e85b034a524d8017e051a0aa4acd04c64c3a69a1a2a0ba6/pydantic_core-2.46.2-cp310-cp310-win32.whl", hash = "sha256:8cbd9d67357f3a925f2af1d44db3e8ef1ce1a293ea0add98081b072d4a12e3b4", size = 1976750, upload-time = "2026-04-17T09:13:15.537Z" }, - { url = "https://files.pythonhosted.org/packages/70/31/d65117cf5f89d81705da5b1dcdad8efa0a0b65dbbc7f13cafbabb7d01615/pydantic_core-2.46.2-cp310-cp310-win_amd64.whl", hash = "sha256:dd51dd16182b4bfdcefd27b39b856aa4a57b77f15b231a2d10c45391b0a02028", size = 2073989, upload-time = "2026-04-17T09:12:17.315Z" }, - { url = "https://files.pythonhosted.org/packages/89/91/089f517a725f29084364169437833ab0ae4da4d7a6ed9d4474db7f1412e6/pydantic_core-2.46.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8060f42db3cd204871db0afd51fef54a13fa544c4dd48cdcae2e174ef40c8ba", size = 2106218, upload-time = "2026-04-17T09:10:48.023Z" }, - { url = "https://files.pythonhosted.org/packages/a0/92/23858ed1b58f2a134e50c2fdd0e34ea72721ccb257e1e9346514e1ccb5b9/pydantic_core-2.46.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:73a9d2809bd8d4a7cda4d336dc996a565eb4feaaa39932f9d85a65fa18382f28", size = 1948087, upload-time = "2026-04-17T09:11:58.639Z" }, - { url = "https://files.pythonhosted.org/packages/5d/ac/e2240fccb4794e965817593d5a46cf5ea22f2001b73fe360b7578925b7d8/pydantic_core-2.46.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b0a2dee92dfaabcfb93629188c3e9cf74fdfc0f22e7c369cb444a98814a1e50", size = 1972931, upload-time = "2026-04-17T09:13:13.304Z" }, - { url = "https://files.pythonhosted.org/packages/1a/da/3b11dab2aa15c5c8ed20a01eb7aa432a78b8e3a4713659f7e58490a020a5/pydantic_core-2.46.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3098446ba8cf774f61cb8d4008c1dba14a30426a15169cd95ac3392a461193b1", size = 2040454, upload-time = "2026-04-17T09:13:47.895Z" }, - { url = "https://files.pythonhosted.org/packages/d7/39/c4cf5e1f1c6c34c53c0902039c95d81dc15cdd1f03634bd1a93f33e70a72/pydantic_core-2.46.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57c584af6c375ea3f826d8131a94cb212b3d9926eaff67117e3711bbff3a83a5", size = 2221320, upload-time = "2026-04-17T09:13:08.568Z" }, - { url = "https://files.pythonhosted.org/packages/c7/46/891035bc9e93538e754c3188424d24b5a69ec3ae5210fa01d483e99b3302/pydantic_core-2.46.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:547381cca999be88b4715a0ed7afa11f07fc7e53cb1883687b190d25a92c56cf", size = 2274559, upload-time = "2026-04-17T09:11:10.257Z" }, - { url = "https://files.pythonhosted.org/packages/ab/d0/7af0b905b3148152c159c9caf203e7ecd9b90b76389f0862e6ab0cf1b2a3/pydantic_core-2.46.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:caeed15dcb1233a5a94bc6ff37ef5393cf5b33a45e4bdfb2d6042f3d24e1cb27", size = 2089239, upload-time = "2026-04-17T09:13:06.326Z" }, - { url = "https://files.pythonhosted.org/packages/c5/bc/566afe02ba2de37712eece74ac7bfba322abd7916410bf90504f1b17ddad/pydantic_core-2.46.2-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:c05f53362568c75476b5c96659377a5dfd982cfbe5a5c07de5106d08a04efc4f", size = 2116182, upload-time = "2026-04-17T09:11:33.738Z" }, - { url = "https://files.pythonhosted.org/packages/4e/5b/3fcb3a229bbfa23b0e3c65014057af0f9d51ec7a2d9f7adb282f41ff5ac8/pydantic_core-2.46.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2643ac7eae296200dbd48762a1c852cf2cad5f5e3eba34e652053cebf03becf8", size = 2172346, upload-time = "2026-04-17T09:10:46.472Z" }, - { url = "https://files.pythonhosted.org/packages/43/9a/baa9e3aa70ea7bbcb9db0f87162a371649ac80c03e43eb54af193390cf17/pydantic_core-2.46.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dc4620a47c6fe6a39f89392c00833a82fc050ce90169798f78a25a8d4df03b6e", size = 2179540, upload-time = "2026-04-17T09:11:21.881Z" }, - { url = "https://files.pythonhosted.org/packages/bd/46/912047a5427f949c909495704b3c8b9ead9d1c66f87e96606011beab1fcb/pydantic_core-2.46.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:78cb0d2453b50bf2035f85fd0d9cfabdb98c47f9c53ddb7c23873cd83da9560b", size = 2327423, upload-time = "2026-04-17T09:13:40.291Z" }, - { url = "https://files.pythonhosted.org/packages/e9/bf/c5e661451dc9411c2ab88a244c1ba57644950c971486040dc200f77b69f4/pydantic_core-2.46.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f0c1cbb7d6112932cc188c6be007a5e2867005a069e47f42fe67bf5f122b0908", size = 2348652, upload-time = "2026-04-17T09:10:37.76Z" }, - { url = "https://files.pythonhosted.org/packages/77/b3/3219e7c522af54b010cf7422dcb11cc6616a4414d1ccd628b0d3f61c6af6/pydantic_core-2.46.2-cp311-cp311-win32.whl", hash = "sha256:c1ce5b2366f85cfdbf7f0907755043707f86d09a5b1b1acebbb7bf1600d75c64", size = 1974410, upload-time = "2026-04-17T09:13:27.392Z" }, - { url = "https://files.pythonhosted.org/packages/e5/29/e5cfac8a74c59873dfd47d3a1477c39ad9247639a7120d3e251a9ff12417/pydantic_core-2.46.2-cp311-cp311-win_amd64.whl", hash = "sha256:f1a6197eadff5bd0bb932f12bb038d403cb75db5b0b391e70e816a647745ddaf", size = 2071158, upload-time = "2026-04-17T09:09:57.69Z" }, - { url = "https://files.pythonhosted.org/packages/6f/8b/b7b19b717cdb3675cb109de143f62d4dc62f5d4a0b9879b6f1ace62c6654/pydantic_core-2.46.2-cp311-cp311-win_arm64.whl", hash = "sha256:15e42885b283f87846ee79e161002c5c496ef747a73f6e47054f45a13d9035bc", size = 2043507, upload-time = "2026-04-17T09:09:51.828Z" }, - { url = "https://files.pythonhosted.org/packages/97/ec/2fafa4c86f5d2a69372c7cddef30925fd0e370b1efaf556609c1a0196d8a/pydantic_core-2.46.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ea1ad8c89da31512fe2d249cf0638fb666925bda341901541bc5f3311c6fcc9e", size = 2101729, upload-time = "2026-04-17T09:12:30.042Z" }, - { url = "https://files.pythonhosted.org/packages/cf/55/be5386c2c4b49af346e8a26b748194ff25757bbb6cf544130854e997af7a/pydantic_core-2.46.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b308da17b92481e0587244631c5529e5d91d04cb2b08194825627b1eca28e21e", size = 1951546, upload-time = "2026-04-17T09:10:10.585Z" }, - { url = "https://files.pythonhosted.org/packages/29/92/89e273a055ce440e6636c756379af35ad86da9d336a560049c3ba5e41c80/pydantic_core-2.46.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d333a50bdd814a917d8d6a7ee35ba2395d53ddaa882613bc24e54a9d8b129095", size = 1976178, upload-time = "2026-04-17T09:11:49.619Z" }, - { url = "https://files.pythonhosted.org/packages/91/b3/e4664469cf70c0cb0f7b2f5719d64e5968bb6f38217042c2afa3d3c4ba17/pydantic_core-2.46.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1d00b99590c5bd1fabbc5d28b170923e32c1b1071b1f1de1851a4d14d89eb192", size = 2051697, upload-time = "2026-04-17T09:12:04.917Z" }, - { url = "https://files.pythonhosted.org/packages/98/58/dbf68213ee06ce51cdd6d8c95f97980e646858c45bd96bd2dfb40433be73/pydantic_core-2.46.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9f0e686960ffe9e65066395af856ac2d52c159043144433602c50c221d81c1ba", size = 2233160, upload-time = "2026-04-17T09:12:00.956Z" }, - { url = "https://files.pythonhosted.org/packages/f5/d3/68092aa0ee6c60ff4de4740eb82db3d4ce338ec89b3cecb978c532472f12/pydantic_core-2.46.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d1128da41c9cb474e0a4701f9c363ec645c9d1a02229904c76bf4e0a194fde2", size = 2298398, upload-time = "2026-04-17T09:10:29.694Z" }, - { url = "https://files.pythonhosted.org/packages/e4/51/5d6155eb737db55b0ad354ca5f333ef009f75feb67df2d79a84bace45af6/pydantic_core-2.46.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48649cf2d8c358d79586e9fb2f8235902fcaa2d969ec1c5301f2d1873b2f8321", size = 2094058, upload-time = "2026-04-17T09:12:10.995Z" }, - { url = "https://files.pythonhosted.org/packages/6b/f3/eb4a986197d71319430464ff181226c95adc8f06d932189b158bae5a82f5/pydantic_core-2.46.2-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:b902f0fc7c2cf503865a05718b68147c6cd5d0a3867af38c527be574a9fa6e9d", size = 2130388, upload-time = "2026-04-17T09:12:41.159Z" }, - { url = "https://files.pythonhosted.org/packages/56/00/44a9c4fe6d0f64b5786d6a8c649d6f0e34ba6c89b3663add1066e54451a2/pydantic_core-2.46.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e80011f808b03d1d87a8f1e76ae3da19a18eb706c823e17981dcf1fae43744fc", size = 2184245, upload-time = "2026-04-17T09:12:36.532Z" }, - { url = "https://files.pythonhosted.org/packages/78/6b/685b98a834d5e3d1c34a1bde1627525559dd223b75075bc7490cdb24eb33/pydantic_core-2.46.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b839d5c802e31348b949b6473f8190cddbf7d47475856d8ac995a373ee16ec59", size = 2186842, upload-time = "2026-04-17T09:13:04.054Z" }, - { url = "https://files.pythonhosted.org/packages/22/64/caa2f5a2ac8b6113adaa410ccdf31ba7f54897a6e54cd0d726fc7e780c88/pydantic_core-2.46.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:c6b1064f3f9cf9072e1d59dd2936f9f3b668bec1c37039708c9222db703c0d5b", size = 2336066, upload-time = "2026-04-17T09:12:13.006Z" }, - { url = "https://files.pythonhosted.org/packages/ee/f9/7d2701bf82945b5b9e7df8347be97ef6a36da2846bfe5b4afec299ffe27b/pydantic_core-2.46.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:37a68e6f2ac95578ce3c0564802404b27b24988649616e556c07e77111ed3f1d", size = 2363691, upload-time = "2026-04-17T09:13:42.972Z" }, - { url = "https://files.pythonhosted.org/packages/3b/65/0dab11574101522941055109419db3cc09db871643dc3fc74e2413215e5b/pydantic_core-2.46.2-cp312-cp312-win32.whl", hash = "sha256:d9ffa75a7ef4b97d6e5e205fabd4304ef01fec09e6f1bdde04b9ad1b07d20289", size = 1958801, upload-time = "2026-04-17T09:11:31.981Z" }, - { url = "https://files.pythonhosted.org/packages/13/2b/df84baa609c676f6450b8ecad44ea59146c805e3371b7b52443c0899f989/pydantic_core-2.46.2-cp312-cp312-win_amd64.whl", hash = "sha256:0551f2d2ddb68af5a00e26497f8025c538f73ef3cb698f8e5a487042cd2792a8", size = 2072634, upload-time = "2026-04-17T09:11:02.407Z" }, - { url = "https://files.pythonhosted.org/packages/d1/4e/e1ce8029fc438086a946739bf9d596f70ff470aad4a8345555920618cabe/pydantic_core-2.46.2-cp312-cp312-win_arm64.whl", hash = "sha256:83aef30f106edcc21a6a4cc44b82d3169a1dbe255508db788e778f3c804d3583", size = 2026188, upload-time = "2026-04-17T09:13:11.083Z" }, - { url = "https://files.pythonhosted.org/packages/07/2b/662e48254479a2d3450ba24b1e25061108b64339794232f503990c519144/pydantic_core-2.46.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:d26e9eea3715008a09a74585fe9becd0c67fbb145dc4df9756d597d7230a652c", size = 2101762, upload-time = "2026-04-17T09:10:13.87Z" }, - { url = "https://files.pythonhosted.org/packages/73/ab/bafd7c7503757ccc8ec4d1911e106fe474c629443648c51a88f08b0fe91a/pydantic_core-2.46.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:48b36e3235140510dc7861f0cd58b714b1cdd3d48f75e10ce52e69866b746f10", size = 1951814, upload-time = "2026-04-17T09:12:25.934Z" }, - { url = "https://files.pythonhosted.org/packages/92/cc/7549c2d57ba2e9a42caa5861a2d398dbe31c02c6aca783253ace59ce84f8/pydantic_core-2.46.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36b1f99dc451f1a3981f236151465bcf995bbe712d0727c9f7b236fe228a8133", size = 1977329, upload-time = "2026-04-17T09:13:37.605Z" }, - { url = "https://files.pythonhosted.org/packages/18/50/7ed4a8a0d478a4dca8f0134a5efa7193f03cc8520dd4c9509339fb2e5002/pydantic_core-2.46.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8641c8d535c2d95b45c2e19b646ecd23ebba35d461e0ae48a3498277006250ab", size = 2051832, upload-time = "2026-04-17T09:12:49.771Z" }, - { url = "https://files.pythonhosted.org/packages/dc/16/bb35b193741c0298ddc5f5e4234269efdc0c65e2bcd198aa0de9b68845e4/pydantic_core-2.46.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:20fb194788a0a50993e87013e693494ba183a2af5b44e99cf060bbae10912b11", size = 2233127, upload-time = "2026-04-17T09:11:04.449Z" }, - { url = "https://files.pythonhosted.org/packages/91/a5/98f4b637149185addea19e1785ea20c373cca31b202f589111d8209d9873/pydantic_core-2.46.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9262d11d0cd11ee3303a95156939402bed6cedfe5ed0e331b95a283a4da6eb8b", size = 2297418, upload-time = "2026-04-17T09:11:25.929Z" }, - { url = "https://files.pythonhosted.org/packages/36/90/93a5d21990b152da7b7507b7fddb0b935f6a0984d57ac3ec45a6e17777a2/pydantic_core-2.46.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac204542736aa295fa25f713b7fad6fc50b46ab7764d16087575c85f085174f3", size = 2093735, upload-time = "2026-04-17T09:12:06.908Z" }, - { url = "https://files.pythonhosted.org/packages/14/22/b8b1ffdddf08b4e84380bcb67f41dbbf4c171377c1d36fc6290794bb2094/pydantic_core-2.46.2-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9a7c43a0584742dface3ca0daf6f719d46c1ac2f87cf080050f9ae052c75e1b2", size = 2127570, upload-time = "2026-04-17T09:11:53.906Z" }, - { url = "https://files.pythonhosted.org/packages/c6/26/e60d72b4e2d0ce1fa811044a974412ac1c567fe067d97b3e6b290530786e/pydantic_core-2.46.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fd05e1edb6a90ad446fa268ab09e59202766b837597b714b2492db11ee87fab9", size = 2183524, upload-time = "2026-04-17T09:11:30.092Z" }, - { url = "https://files.pythonhosted.org/packages/35/32/36bec7584a1eefb17dec4dfa1c946d3fe4440f466c5705b8adfda69c9a9f/pydantic_core-2.46.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:91155b110788b5501abc7ea954f1d08606219e4e28e3c73a94124307c06efb80", size = 2185408, upload-time = "2026-04-17T09:10:57.228Z" }, - { url = "https://files.pythonhosted.org/packages/fc/d6/1a5689d873620efd67d6b163db0c444c056adb0849b5bc33e2b9f09665a6/pydantic_core-2.46.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:e4e2c72a529fa03ff228be1d2b76944013f428220b764e03cc50ada67e17a42c", size = 2335171, upload-time = "2026-04-17T09:11:43.369Z" }, - { url = "https://files.pythonhosted.org/packages/3e/8e/675104802abe8ef502b072050ee5f2e915251aa1a3af87e1015ce31ec42d/pydantic_core-2.46.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:56291ec1a11c3499890c99a8fd9053b47e60fe837a77ec72c0671b1b8b3dce24", size = 2362743, upload-time = "2026-04-17T09:10:18.333Z" }, - { url = "https://files.pythonhosted.org/packages/8d/bc/86c5dde4fa6e24467680eef5047da3c1a19be0a527d0d8e14aa76b39307c/pydantic_core-2.46.2-cp313-cp313-win32.whl", hash = "sha256:b50f9c5f826ddca1246f055148df939f5f3f2d0d96db73de28e2233f22210d4c", size = 1958074, upload-time = "2026-04-17T09:12:38.622Z" }, - { url = "https://files.pythonhosted.org/packages/2a/97/2537e8c1282b2c4eb062580c0d7a4339e10b072b803d1ee0b7f1f0a5c22c/pydantic_core-2.46.2-cp313-cp313-win_amd64.whl", hash = "sha256:251a57788823230ca8cbc99e6245d1a2ed6e180ec4864f251c94182c580c7f2e", size = 2071741, upload-time = "2026-04-17T09:13:32.405Z" }, - { url = "https://files.pythonhosted.org/packages/da/aa/2ee75798706f9dbc4e76dbe59e41a396c5c311e3d6223b9cf6a5fa7780be/pydantic_core-2.46.2-cp313-cp313-win_arm64.whl", hash = "sha256:315d32d1a71494d6b4e1e14a9fa7a4329597b4c4340088ad7e1a9dafbeed92a9", size = 2025955, upload-time = "2026-04-17T09:10:15.567Z" }, - { url = "https://files.pythonhosted.org/packages/d0/96/a50ccb6b539ae780f73cea74905468777680e30c6c3bdf714b9d4c116ea0/pydantic_core-2.46.2-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:4f59b45f3ef8650c0c736a57f59031d47ed9df4c0a64e83796849d7d14863a2d", size = 2097111, upload-time = "2026-04-17T09:10:49.617Z" }, - { url = "https://files.pythonhosted.org/packages/34/5f/fdead7b3afa822ab6e5a18ee0ecffd54937de1877c01ed13a342e0fb3f07/pydantic_core-2.46.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3a075a29ebef752784a91532a1a85be6b234ccffec0a9d7978a92696387c3da6", size = 1951904, upload-time = "2026-04-17T09:12:32.062Z" }, - { url = "https://files.pythonhosted.org/packages/95/e0/1c5d547e550cdab1bec737492aa08865337af6fe7fc9b96f7f45f17d9519/pydantic_core-2.46.2-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d12d786e30c04a9d307c5d7080bf720d9bac7f1668191d8e37633a9562749e2", size = 1978667, upload-time = "2026-04-17T09:11:35.589Z" }, - { url = "https://files.pythonhosted.org/packages/0e/cb/665ce629e218c8228302cb94beff4f6531082a2c87d3ecc3d5e63a26f392/pydantic_core-2.46.2-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0d5e6d6343b0b5dcacb3503b5de90022968da8ed0ab9ab39d3eda71c20cbf84e", size = 2046721, upload-time = "2026-04-17T09:11:47.725Z" }, - { url = "https://files.pythonhosted.org/packages/77/e9/6cb2cf60f54c1472bbdfce19d957553b43dbba79d1d7b2930a195c594785/pydantic_core-2.46.2-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:233eebac0999b6b9ba76eb56f3ec8fce13164aa16b6d2225a36a79e0f95b5973", size = 2228483, upload-time = "2026-04-17T09:12:08.837Z" }, - { url = "https://files.pythonhosted.org/packages/0d/2a/93e018dd5571f781ebaeda8c0cf65398489d5bee9b1f484df0b6149b43b9/pydantic_core-2.46.2-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9cc0eee720dd2f14f3b7c349469402b99ad81a174ab49d3533974529e9d93992", size = 2294663, upload-time = "2026-04-17T09:12:52.053Z" }, - { url = "https://files.pythonhosted.org/packages/5e/4f/49e57ca55c770c93d9bb046666a54949b42e3c9099a0c5fe94557873fe30/pydantic_core-2.46.2-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83ee76bf2c9910513dbc19e7d82367131fa7508dedd6186a462393071cc11059", size = 2098742, upload-time = "2026-04-17T09:13:45.472Z" }, - { url = "https://files.pythonhosted.org/packages/c6/b0/6e46b5cd3332af665f794b8cdeea206618a8630bd9e7bcc36864518fce81/pydantic_core-2.46.2-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:d61db38eb4ee5192f0c261b7f2d38e420b554df8912245e3546aee5c45e2fd78", size = 2125922, upload-time = "2026-04-17T09:12:54.304Z" }, - { url = "https://files.pythonhosted.org/packages/06/d1/40850c81585be443a2abfdf7f795f8fae831baf8e2f9b2133c8246ac671c/pydantic_core-2.46.2-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8f09a713d17bcd55da8ab02ebd9110c5246a49c44182af213b5212800af8bc83", size = 2183000, upload-time = "2026-04-17T09:10:59.027Z" }, - { url = "https://files.pythonhosted.org/packages/04/af/8493d7dfa03ebb7866909e577c6aa65ea0de7377b86023cc51d0c8e11db3/pydantic_core-2.46.2-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:30cacc5fb696e64b8ef6fd31d9549d394dd7d52760db072eecb98e37e3af1677", size = 2180335, upload-time = "2026-04-17T09:12:57.01Z" }, - { url = "https://files.pythonhosted.org/packages/72/5b/1f6a344c4ffdf284da41c6067b82d5ebcbd11ce1b515ae4b662d4adb6f61/pydantic_core-2.46.2-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:7ccfb105fcfe91a22bbb5563ad3dc124bc1aa75bfd2e53a780ab05f78cdf6108", size = 2330002, upload-time = "2026-04-17T09:12:02.958Z" }, - { url = "https://files.pythonhosted.org/packages/25/ff/9a694126c12d6d2f48a0cafa6f8eef88ef0d8825600e18d03ff2e896c3b2/pydantic_core-2.46.2-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:13ffef637dc8370c249e5b26bd18e9a80a4fca3d809618c44e18ec834a7ca7a8", size = 2359920, upload-time = "2026-04-17T09:10:27.764Z" }, - { url = "https://files.pythonhosted.org/packages/51/c8/3a35c763d68a9cb2675eb10ef242cf66c5d4701b28ae12e688d67d2c180e/pydantic_core-2.46.2-cp314-cp314-win32.whl", hash = "sha256:1b0ab6d756ca2704a938e6c31b53f290c2f9c10d3914235410302a149de1a83e", size = 1953701, upload-time = "2026-04-17T09:13:30.021Z" }, - { url = "https://files.pythonhosted.org/packages/1a/6a/f2726a780365f7dfd89d62036f984f7acb99978c60c5e1fa7c0cb898ed11/pydantic_core-2.46.2-cp314-cp314-win_amd64.whl", hash = "sha256:99ebade8c9ada4df975372d8dd25883daa0e379a05f1cd0c99aa0c04368d01a6", size = 2071867, upload-time = "2026-04-17T09:10:39.205Z" }, - { url = "https://files.pythonhosted.org/packages/e1/79/76baacb9feba3d7c399b245ca1a29c74ea0db04ea693811374827eec2290/pydantic_core-2.46.2-cp314-cp314-win_arm64.whl", hash = "sha256:de87422197cf7f83db91d89c86a21660d749b3cd76cd8a45d115b8e675670f02", size = 2017252, upload-time = "2026-04-17T09:10:26.175Z" }, - { url = "https://files.pythonhosted.org/packages/f1/3b/77c26938f817668d9ad9bab1a905cb23f11d9a3d4bf724d429b3e55a8eaf/pydantic_core-2.46.2-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:236f22b4a206b5b61db955396b7cf9e2e1ff77f372efe9570128ccfcd6a525eb", size = 2094545, upload-time = "2026-04-17T09:12:19.339Z" }, - { url = "https://files.pythonhosted.org/packages/fe/de/42c13f590e3c260966aa49bcdb1674774f975467c49abd51191e502bea28/pydantic_core-2.46.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c2012f64d2cd7cca50f49f22445aa5a88691ac2b4498ee0a9a977f8ca4f7289f", size = 1933953, upload-time = "2026-04-17T09:09:55.889Z" }, - { url = "https://files.pythonhosted.org/packages/4e/84/ebe3ebb3e2d8db656937cfa6f97f544cb7132f2307a4a7dfdcd0ea102a12/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d07d6c63106d3a9c9a333e2636f9c82c703b1a9e3b079299e58747964e4fdb72", size = 1974435, upload-time = "2026-04-17T09:10:12.371Z" }, - { url = "https://files.pythonhosted.org/packages/b9/15/0bf51ca6709477cd4ef86148b6d7844f3308f029eac361dd0383f1e17b1a/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c326a2b4b85e959d9a1fc3a11f32f84611b6ec07c053e1828a860edf8d068208", size = 2031113, upload-time = "2026-04-17T09:10:00.752Z" }, - { url = "https://files.pythonhosted.org/packages/02/ae/b7b5af9b79db036d9e61a44c481c17a213dc8fc4b8b71fe6875a72fc778b/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac8a65e798f2462552c00d2e013d532c94d646729dda98458beaf51f9ec7b120", size = 2236325, upload-time = "2026-04-17T09:10:33.227Z" }, - { url = "https://files.pythonhosted.org/packages/a6/ae/ecef7477b5a03d4a499708f7e75d2836452ebb70b776c2d64612b334f57a/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a3c2bc1cc8164bedbc160b7bb1e8cc1e8b9c27f69ae4f9ae2b976cdae02b2dd", size = 2278135, upload-time = "2026-04-17T09:10:23.287Z" }, - { url = "https://files.pythonhosted.org/packages/db/e4/2f9d82faa47af6c39fc3f120145fd915971e1e0cb6b55b494fad9fdf8275/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e69aa5e10b7e8b1bb4a6888650fd12fcbf11d396ca11d4a44de1450875702830", size = 2109071, upload-time = "2026-04-17T09:11:06.149Z" }, - { url = "https://files.pythonhosted.org/packages/f1/9c/677cf10873fbd0b116575ab7b97c90482b21564f8a8040beb18edef7a577/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:4e6df5c3301e65fb42bc5338bf9a1027a02b0a31dc7f54c33775229af474daf0", size = 2106028, upload-time = "2026-04-17T09:10:51.525Z" }, - { url = "https://files.pythonhosted.org/packages/d6/53/6a06183544daba51c059123a2064a99039df25f115a06bdb26f2ea177038/pydantic_core-2.46.2-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2c2f6e32548ac8d559b47944effcf8ae4d81c161f6b6c885edc53bc08b8f192d", size = 2164816, upload-time = "2026-04-17T09:11:56.187Z" }, - { url = "https://files.pythonhosted.org/packages/57/6f/10fcdd9e3eca66fc828eef0f6f5850f2dd3bca2c59e6e041fb8bc3da39be/pydantic_core-2.46.2-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:b089a81c58e6ea0485562bbbbbca4f65c0549521606d5ef27fba217aac9b665a", size = 2166130, upload-time = "2026-04-17T09:10:03.804Z" }, - { url = "https://files.pythonhosted.org/packages/29/83/92d3fd0e0156cad2e3cb5c26de73794af78ac9fa0c22ab666e566dd67061/pydantic_core-2.46.2-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:7f700a6d6f64112ae9193709b84303bbab84424ad4b47d0253301aabce9dfc70", size = 2316605, upload-time = "2026-04-17T09:12:45.249Z" }, - { url = "https://files.pythonhosted.org/packages/97/f1/facffdb970981068219582e499b8d0871ed163ffcc6b347de5c412669e4c/pydantic_core-2.46.2-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:67db6814beaa5fefe91101ec7eb9efda613795767be96f7cf58b1ca8c9ca9972", size = 2358385, upload-time = "2026-04-17T09:09:54.657Z" }, - { url = "https://files.pythonhosted.org/packages/8b/a1/b8160b2f22b2199467bc68581a4ed380643c16b348a27d6165c6c242d694/pydantic_core-2.46.2-cp314-cp314t-win32.whl", hash = "sha256:32fbc7447be8e3be99bf7869f7066308f16be55b61f9882c2cefc7931f5c7664", size = 1942373, upload-time = "2026-04-17T09:12:59.594Z" }, - { url = "https://files.pythonhosted.org/packages/0d/90/db89acabe5b150e11d1b59fe3d947dda2ef6abbfef5c82f056ff63802f5d/pydantic_core-2.46.2-cp314-cp314t-win_amd64.whl", hash = "sha256:b317a2b97019c0b95ce99f4f901ae383f40132da6706cdf1731066a73394c25c", size = 2052078, upload-time = "2026-04-17T09:10:19.96Z" }, - { url = "https://files.pythonhosted.org/packages/97/32/e19b83ceb07a3f1bb21798407790bbc9a31740158fd132b94139cb84e16c/pydantic_core-2.46.2-cp314-cp314t-win_arm64.whl", hash = "sha256:7dcb9d40930dfad7ab6b20bcc6ca9d2b030b0f347a0cd9909b54bd53ead521b1", size = 2016941, upload-time = "2026-04-17T09:12:34.447Z" }, - { url = "https://files.pythonhosted.org/packages/25/ec/e91aa08df1c33d5e3c2b60c07a1eca9f21809728a824c7b467bb3bda68b5/pydantic_core-2.46.2-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:7c5a5b3dbb9e8918e223be6580da5ffcf861c0505bbc196ebed7176ce05b7b4e", size = 2105046, upload-time = "2026-04-17T09:10:55.614Z" }, - { url = "https://files.pythonhosted.org/packages/f0/73/27112400a0452e375290e7c40aef5cc9844ac0920fb1029238cfc68121fa/pydantic_core-2.46.2-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:bc1e8ce33d5a337f2ba862e0719b8201cd54aaed967406c748e009191d47efdd", size = 1940029, upload-time = "2026-04-17T09:12:21.5Z" }, - { url = "https://files.pythonhosted.org/packages/b1/44/3d39f782bc82ddd0b2d82bde83b408aa40a332cdf6f3018acb34e3d4dcfc/pydantic_core-2.46.2-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b737c0b280f41143266445de2689c0e49c79307e51c44ce3a77fef2bedad4994", size = 1987772, upload-time = "2026-04-17T09:10:02.357Z" }, - { url = "https://files.pythonhosted.org/packages/c4/1a/0242e5b7b6cf51dbccc065029f0420107b6bf7e191fcb918f5cb71218acf/pydantic_core-2.46.2-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b877d597afb82b4898e35354bba55de6f7f048421ae0edadbb9886ec137b532", size = 2138468, upload-time = "2026-04-17T09:11:51.546Z" }, - { url = "https://files.pythonhosted.org/packages/f3/d2/66c146f421178641bda880b0267c0d57dd84f5fec9ecc8e46be17b480742/pydantic_core-2.46.2-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:e9fcabd1857492b5bf16f90258babde50f618f55d046b1309972da2396321ff9", size = 2091621, upload-time = "2026-04-17T09:12:47.501Z" }, - { url = "https://files.pythonhosted.org/packages/ee/b2/c28419aa9fc8055f4ac8e801d1d11c6357351bfa4321ed9bafab3eb98087/pydantic_core-2.46.2-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:fb3ec2c7f54c07b30d89983ce78dc32c37dd06a972448b8716d609493802d628", size = 1937059, upload-time = "2026-04-17T09:10:53.554Z" }, - { url = "https://files.pythonhosted.org/packages/30/ce/cd0824a2db213dc17113291b7a09b9b0ccd9fbf97daa4b81548703341baf/pydantic_core-2.46.2-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:130a6c837d819ef33e8c2bf702ed2c3429237ea69807f1140943d6f4bdaf52fa", size = 1997278, upload-time = "2026-04-17T09:12:23.784Z" }, - { url = "https://files.pythonhosted.org/packages/c9/69/47283fe3c0c967d3e9e9cd6c42b70907610c8a6f8d6e8381f1bb55f8006c/pydantic_core-2.46.2-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2e25417cec5cd9bddb151e33cb08c50160f317479ecc02b22a95ec18f8fe004", size = 2147096, upload-time = "2026-04-17T09:12:43.124Z" }, - { url = "https://files.pythonhosted.org/packages/16/d5/dec7c127fa722ff56e1ccf1e960ae1318a9f66742135e97bf9771447216f/pydantic_core-2.46.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c3ad79ed32004d9de91cacd4b5faaff44d56051392fe1d5526feda596f01af25", size = 2107613, upload-time = "2026-04-17T09:10:36.269Z" }, - { url = "https://files.pythonhosted.org/packages/bc/35/975c109b337260a71c93198baf663982b6b39fe3e584e279548a0969e5d4/pydantic_core-2.46.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d157c48d28eebe5d46906de06a6a2f2c9e00b67d3e42de1f1b9c2d42b810f77c", size = 1947099, upload-time = "2026-04-17T09:12:15.304Z" }, - { url = "https://files.pythonhosted.org/packages/4e/11/52a971a0f9218631690274be533f05e5ddde5547f0823bb3e9dfd1be49f6/pydantic_core-2.46.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b42c6471288dedc979ac8400d9c9770f03967dd187db1f8d3405d4d182cc714", size = 2133866, upload-time = "2026-04-17T09:12:27.994Z" }, - { url = "https://files.pythonhosted.org/packages/fe/7a/33d94d0698602b2d1712e78c703a33952eb2ca69e02e8e4b208e7f6602b5/pydantic_core-2.46.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4f27bc4801358dc070d6697b41237fce9923d8e69a1ce1e95606ac36c1552dc1", size = 2161721, upload-time = "2026-04-17T09:11:16.111Z" }, - { url = "https://files.pythonhosted.org/packages/b0/cb/0df7ee0a148e9ce0968a80787967ddca9f6b3f8a49152a881b88da262701/pydantic_core-2.46.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e094a8f85db41aa7f6a45c5dac2950afc9862e66832934231962252b5d284eed", size = 2180175, upload-time = "2026-04-17T09:11:41.577Z" }, - { url = "https://files.pythonhosted.org/packages/8e/a8/258a32878140347532be4e44c6f3b1ace3b52b9c9ca7548a65ce18adf4b4/pydantic_core-2.46.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:807eeda5551f6884d3b4421578be37be50ddb7a58832348e99617a6714a73748", size = 2319882, upload-time = "2026-04-17T09:10:21.872Z" }, - { url = "https://files.pythonhosted.org/packages/13/b9/5071c298a0f91314a5402b8c56e0efbcebe77085327d0b4df7dc9cb0b674/pydantic_core-2.46.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fcaa1c3c846a7f6686b38fe493d1b2e8007380e293bfef6a9354563c026cbf36", size = 2348065, upload-time = "2026-04-17T09:11:08.263Z" }, - { url = "https://files.pythonhosted.org/packages/75/f3/0a7087e5f861d66ca64ce927230b397cc264c87b712156e6a93b26a459c8/pydantic_core-2.46.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:154dbfdfb11b8cbd8ff4d00d0b81e3d19f4cb4bedd5aa9f091060ba071474c6a", size = 2192159, upload-time = "2026-04-17T09:11:20.123Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/2a/ef/f7abb56c49382a246fd2ce9c799691e3c3e7175ec74b14d99e798bcddb1a/pydantic_core-2.46.3.tar.gz", hash = "sha256:41c178f65b8c29807239d47e6050262eb6bf84eb695e41101e62e38df4a5bc2c", size = 471412, upload-time = "2026-04-20T14:40:56.672Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/98/b50eb9a411e87483b5c65dba4fa430a06bac4234d3403a40e5a9905ebcd0/pydantic_core-2.46.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:1da3786b8018e60349680720158cc19161cc3b4bdd815beb0a321cd5ce1ad5b1", size = 2108971, upload-time = "2026-04-20T14:43:51.945Z" }, + { url = "https://files.pythonhosted.org/packages/08/4b/f364b9d161718ff2217160a4b5d41ce38de60aed91c3689ebffa1c939d23/pydantic_core-2.46.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc0988cb29d21bf4a9d5cf2ef970b5c0e38d8d8e107a493278c05dc6c1dda69f", size = 1949588, upload-time = "2026-04-20T14:44:10.386Z" }, + { url = "https://files.pythonhosted.org/packages/8f/8b/30bd03ee83b2f5e29f5ba8e647ab3c456bf56f2ec72fdbcc0215484a0854/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f9067c3bfadd04c55484b89c0d267981b2f3512850f6f66e1e74204a4e4ce3", size = 1975986, upload-time = "2026-04-20T14:43:57.106Z" }, + { url = "https://files.pythonhosted.org/packages/3c/54/13ccf954d84ec275d5d023d5786e4aa48840bc9f161f2838dc98e1153518/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a642ac886ecf6402d9882d10c405dcf4b902abeb2972cd5fb4a48c83cd59279a", size = 2055830, upload-time = "2026-04-20T14:44:15.499Z" }, + { url = "https://files.pythonhosted.org/packages/be/0e/65f38125e660fdbd72aa858e7dfae893645cfa0e7b13d333e174a367cd23/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79f561438481f28681584b89e2effb22855e2179880314bcddbf5968e935e807", size = 2222340, upload-time = "2026-04-20T14:41:51.353Z" }, + { url = "https://files.pythonhosted.org/packages/d1/88/f3ab7739efe0e7e80777dbb84c59eb98518e3f57ea433206194c2e425272/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57a973eae4665352a47cf1a99b4ee864620f2fe663a217d7a8da68a1f3a5bfda", size = 2280727, upload-time = "2026-04-20T14:41:30.461Z" }, + { url = "https://files.pythonhosted.org/packages/2a/6d/c228219080817bec4982f9531cadb18da6aaa770fdeb114f49c237ac2c9f/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83d002b97072a53ea150d63e0a3adfae5670cef5aa8a6e490240e482d3b22e57", size = 2092158, upload-time = "2026-04-20T14:44:07.305Z" }, + { url = "https://files.pythonhosted.org/packages/0f/b1/525a16711e7c6d61635fac3b0bd54600b5c5d9f60c6fc5aaab26b64a2297/pydantic_core-2.46.3-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:b40ddd51e7c44b28cfaef746c9d3c506d658885e0a46f9eeef2ee815cbf8e045", size = 2116626, upload-time = "2026-04-20T14:42:34.118Z" }, + { url = "https://files.pythonhosted.org/packages/ef/7c/17d30673351439a6951bf54f564cf2443ab00ae264ec9df00e2efd710eb5/pydantic_core-2.46.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ac5ec7fb9b87f04ee839af2d53bcadea57ded7d229719f56c0ed895bff987943", size = 2160691, upload-time = "2026-04-20T14:41:14.023Z" }, + { url = "https://files.pythonhosted.org/packages/86/66/af8adbcbc0886ead7f1a116606a534d75a307e71e6e08226000d51b880d2/pydantic_core-2.46.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a3b11c812f61b3129c4905781a2601dfdfdea5fe1e6c1cfb696b55d14e9c054f", size = 2182543, upload-time = "2026-04-20T14:40:48.886Z" }, + { url = "https://files.pythonhosted.org/packages/b0/37/6de71e0f54c54a4190010f57deb749e1ddf75c568ada3b1320b70067f121/pydantic_core-2.46.3-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:1108da631e602e5b3c38d6d04fe5bb3bfa54349e6918e3ca6cf570b2e2b2f9d4", size = 2324513, upload-time = "2026-04-20T14:42:36.121Z" }, + { url = "https://files.pythonhosted.org/packages/51/b1/9fc74ce94f603d5ef59ff258ca9c2c8fb902fb548d340a96f77f4d1c3b7f/pydantic_core-2.46.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:de885175515bcfa98ae618c1df7a072f13d179f81376c8007112af20567fd08a", size = 2361853, upload-time = "2026-04-20T14:43:24.886Z" }, + { url = "https://files.pythonhosted.org/packages/40/d0/4c652fc592db35f100279ee751d5a145aca1b9a7984b9684ba7c1b5b0535/pydantic_core-2.46.3-cp310-cp310-win32.whl", hash = "sha256:d11058e3201527d41bc6b545c79187c9e4bf85e15a236a6007f0e991518882b7", size = 1980465, upload-time = "2026-04-20T14:44:46.239Z" }, + { url = "https://files.pythonhosted.org/packages/27/b8/a920453c38afbe1f355e1ea0b0d94a0a3e0b0879d32d793108755fa171d5/pydantic_core-2.46.3-cp310-cp310-win_amd64.whl", hash = "sha256:3612edf65c8ea67ac13616c4d23af12faef1ae435a8a93e5934c2a0cbbdd1fd6", size = 2073884, upload-time = "2026-04-20T14:43:01.201Z" }, + { url = "https://files.pythonhosted.org/packages/22/a2/1ba90a83e85a3f94c796b184f3efde9c72f2830dcda493eea8d59ba78e6d/pydantic_core-2.46.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ab124d49d0459b2373ecf54118a45c28a1e6d4192a533fbc915e70f556feb8e5", size = 2106740, upload-time = "2026-04-20T14:41:20.932Z" }, + { url = "https://files.pythonhosted.org/packages/b6/f6/99ae893c89a0b9d3daec9f95487aa676709aa83f67643b3f0abaf4ab628a/pydantic_core-2.46.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cca67d52a5c7a16aed2b3999e719c4bcf644074eac304a5d3d62dd70ae7d4b2c", size = 1948293, upload-time = "2026-04-20T14:43:42.115Z" }, + { url = "https://files.pythonhosted.org/packages/3e/b8/2e8e636dc9e3f16c2e16bf0849e24be82c5ee82c603c65fc0326666328fc/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c024e08c0ba23e6fd68c771a521e9d6a792f2ebb0fa734296b36394dc30390e", size = 1973222, upload-time = "2026-04-20T14:41:57.841Z" }, + { url = "https://files.pythonhosted.org/packages/34/36/0e730beec4d83c5306f417afbd82ff237d9a21e83c5edf675f31ed84c1fe/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6645ce7eec4928e29a1e3b3d5c946621d105d3e79f0c9cddf07c2a9770949287", size = 2053852, upload-time = "2026-04-20T14:40:43.077Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f0/3071131f47e39136a17814576e0fada9168569f7f8c0e6ac4d1ede6a4958/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a712c7118e6c5ea96562f7b488435172abb94a3c53c22c9efc1412264a45cbbe", size = 2221134, upload-time = "2026-04-20T14:43:03.349Z" }, + { url = "https://files.pythonhosted.org/packages/2f/a9/a2dc023eec5aa4b02a467874bad32e2446957d2adcab14e107eab502e978/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69a868ef3ff206343579021c40faf3b1edc64b1cc508ff243a28b0a514ccb050", size = 2279785, upload-time = "2026-04-20T14:41:19.285Z" }, + { url = "https://files.pythonhosted.org/packages/0a/44/93f489d16fb63fbd41c670441536541f6e8cfa1e5a69f40bc9c5d30d8c90/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc7e8c32db809aa0f6ea1d6869ebc8518a65d5150fdfad8bcae6a49ae32a22e2", size = 2089404, upload-time = "2026-04-20T14:43:10.108Z" }, + { url = "https://files.pythonhosted.org/packages/2a/78/8692e3aa72b2d004f7a5d937f1dfdc8552ba26caf0bec75f342c40f00dec/pydantic_core-2.46.3-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:3481bd1341dc85779ee506bc8e1196a277ace359d89d28588a9468c3ecbe63fa", size = 2114898, upload-time = "2026-04-20T14:44:51.475Z" }, + { url = "https://files.pythonhosted.org/packages/6a/62/e83133f2e7832532060175cebf1f13748f4c7e7e7165cdd1f611f174494b/pydantic_core-2.46.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8690eba565c6d68ffd3a8655525cbdd5246510b44a637ee2c6c03a7ebfe64d3c", size = 2157856, upload-time = "2026-04-20T14:43:46.64Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ec/6a500e3ad7718ee50583fae79c8651f5d37e3abce1fa9ae177ae65842c53/pydantic_core-2.46.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4de88889d7e88d50d40ee5b39d5dac0bcaef9ba91f7e536ac064e6b2834ecccf", size = 2180168, upload-time = "2026-04-20T14:42:00.302Z" }, + { url = "https://files.pythonhosted.org/packages/d8/53/8267811054b1aa7fc1dc7ded93812372ef79a839f5e23558136a6afbfde1/pydantic_core-2.46.3-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:e480080975c1ef7f780b8f99ed72337e7cc5efea2e518a20a692e8e7b278eb8b", size = 2322885, upload-time = "2026-04-20T14:41:05.253Z" }, + { url = "https://files.pythonhosted.org/packages/c8/c1/1c0acdb3aa0856ddc4ecc55214578f896f2de16f400cf51627eb3c26c1c4/pydantic_core-2.46.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:de3a5c376f8cd94da9a1b8fd3dd1c16c7a7b216ed31dc8ce9fd7a22bf13b836e", size = 2360328, upload-time = "2026-04-20T14:41:43.991Z" }, + { url = "https://files.pythonhosted.org/packages/f0/d0/ef39cd0f4a926814f360e71c1adeab48ad214d9727e4deb48eedfb5bce1a/pydantic_core-2.46.3-cp311-cp311-win32.whl", hash = "sha256:fc331a5314ffddd5385b9ee9d0d2fee0b13c27e0e02dad71b1ae5d6561f51eeb", size = 1979464, upload-time = "2026-04-20T14:43:12.215Z" }, + { url = "https://files.pythonhosted.org/packages/18/9c/f41951b0d858e343f1cf09398b2a7b3014013799744f2c4a8ad6a3eec4f2/pydantic_core-2.46.3-cp311-cp311-win_amd64.whl", hash = "sha256:b5b9c6cf08a8a5e502698f5e153056d12c34b8fb30317e0c5fd06f45162a6346", size = 2070837, upload-time = "2026-04-20T14:41:47.707Z" }, + { url = "https://files.pythonhosted.org/packages/9f/1e/264a17cd582f6ed50950d4d03dd5fefd84e570e238afe1cb3e25cf238769/pydantic_core-2.46.3-cp311-cp311-win_arm64.whl", hash = "sha256:5dfd51cf457482f04ec49491811a2b8fd5b843b64b11eecd2d7a1ee596ea78a6", size = 2053647, upload-time = "2026-04-20T14:42:27.535Z" }, + { url = "https://files.pythonhosted.org/packages/4b/cb/5b47425556ecc1f3fe18ed2a0083188aa46e1dd812b06e406475b3a5d536/pydantic_core-2.46.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b11b59b3eee90a80a36701ddb4576d9ae31f93f05cb9e277ceaa09e6bf074a67", size = 2101946, upload-time = "2026-04-20T14:40:52.581Z" }, + { url = "https://files.pythonhosted.org/packages/a1/4f/2fb62c2267cae99b815bbf4a7b9283812c88ca3153ef29f7707200f1d4e5/pydantic_core-2.46.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af8653713055ea18a3abc1537fe2ebc42f5b0bbb768d1eb79fd74eb47c0ac089", size = 1951612, upload-time = "2026-04-20T14:42:42.996Z" }, + { url = "https://files.pythonhosted.org/packages/50/6e/b7348fd30d6556d132cddd5bd79f37f96f2601fe0608afac4f5fb01ec0b3/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75a519dab6d63c514f3a81053e5266c549679e4aa88f6ec57f2b7b854aceb1b0", size = 1977027, upload-time = "2026-04-20T14:42:02.001Z" }, + { url = "https://files.pythonhosted.org/packages/82/11/31d60ee2b45540d3fb0b29302a393dbc01cd771c473f5b5147bcd353e593/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a6cd87cb1575b1ad05ba98894c5b5c96411ef678fa2f6ed2576607095b8d9789", size = 2063008, upload-time = "2026-04-20T14:44:17.952Z" }, + { url = "https://files.pythonhosted.org/packages/8a/db/3a9d1957181b59258f44a2300ab0f0be9d1e12d662a4f57bb31250455c52/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f80a55484b8d843c8ada81ebf70a682f3f00a3d40e378c06cf17ecb44d280d7d", size = 2233082, upload-time = "2026-04-20T14:40:57.934Z" }, + { url = "https://files.pythonhosted.org/packages/9c/e1/3277c38792aeb5cfb18c2f0c5785a221d9ff4e149abbe1184d53d5f72273/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3861f1731b90c50a3266316b9044f5c9b405eecb8e299b0a7120596334e4fe9c", size = 2304615, upload-time = "2026-04-20T14:42:12.584Z" }, + { url = "https://files.pythonhosted.org/packages/5e/d5/e3d9717c9eba10855325650afd2a9cba8e607321697f18953af9d562da2f/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb528e295ed31570ac3dcc9bfdd6e0150bc11ce6168ac87a8082055cf1a67395", size = 2094380, upload-time = "2026-04-20T14:43:05.522Z" }, + { url = "https://files.pythonhosted.org/packages/a1/20/abac35dedcbfd66c6f0b03e4e3564511771d6c9b7ede10a362d03e110d9b/pydantic_core-2.46.3-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:367508faa4973b992b271ba1494acaab36eb7e8739d1e47be5035fb1ea225396", size = 2135429, upload-time = "2026-04-20T14:41:55.549Z" }, + { url = "https://files.pythonhosted.org/packages/6c/a5/41bfd1df69afad71b5cf0535055bccc73022715ad362edbc124bc1e021d7/pydantic_core-2.46.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ad3c826fe523e4becf4fe39baa44286cff85ef137c729a2c5e269afbfd0905d", size = 2174582, upload-time = "2026-04-20T14:41:45.96Z" }, + { url = "https://files.pythonhosted.org/packages/79/65/38d86ea056b29b2b10734eb23329b7a7672ca604df4f2b6e9c02d4ee22fe/pydantic_core-2.46.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ec638c5d194ef8af27db69f16c954a09797c0dc25015ad6123eb2c73a4d271ca", size = 2187533, upload-time = "2026-04-20T14:40:55.367Z" }, + { url = "https://files.pythonhosted.org/packages/b6/55/a1129141678a2026badc539ad1dee0a71d06f54c2f06a4bd68c030ac781b/pydantic_core-2.46.3-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:28ed528c45446062ee66edb1d33df5d88828ae167de76e773a3c7f64bd14e976", size = 2332985, upload-time = "2026-04-20T14:44:13.05Z" }, + { url = "https://files.pythonhosted.org/packages/d7/60/cb26f4077719f709e54819f4e8e1d43f4091f94e285eb6bd21e1190a7b7c/pydantic_core-2.46.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aed19d0c783886d5bd86d80ae5030006b45e28464218747dcf83dabfdd092c7b", size = 2373670, upload-time = "2026-04-20T14:41:53.421Z" }, + { url = "https://files.pythonhosted.org/packages/6b/7e/c3f21882bdf1d8d086876f81b5e296206c69c6082551d776895de7801fa0/pydantic_core-2.46.3-cp312-cp312-win32.whl", hash = "sha256:06d5d8820cbbdb4147578c1fe7ffcd5b83f34508cb9f9ab76e807be7db6ff0a4", size = 1966722, upload-time = "2026-04-20T14:44:30.588Z" }, + { url = "https://files.pythonhosted.org/packages/57/be/6b5e757b859013ebfbd7adba02f23b428f37c86dcbf78b5bb0b4ffd36e99/pydantic_core-2.46.3-cp312-cp312-win_amd64.whl", hash = "sha256:c3212fda0ee959c1dd04c60b601ec31097aaa893573a3a1abd0a47bcac2968c1", size = 2072970, upload-time = "2026-04-20T14:42:54.248Z" }, + { url = "https://files.pythonhosted.org/packages/bf/f8/a989b21cc75e9a32d24192ef700eea606521221a89faa40c919ce884f2b1/pydantic_core-2.46.3-cp312-cp312-win_arm64.whl", hash = "sha256:f1f8338dd7a7f31761f1f1a3c47503a9a3b34eea3c8b01fa6ee96408affb5e72", size = 2035963, upload-time = "2026-04-20T14:44:20.4Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3c/9b5e8eb9821936d065439c3b0fb1490ffa64163bfe7e1595985a47896073/pydantic_core-2.46.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:12bc98de041458b80c86c56b24df1d23832f3e166cbaff011f25d187f5c62c37", size = 2102109, upload-time = "2026-04-20T14:41:24.219Z" }, + { url = "https://files.pythonhosted.org/packages/91/97/1c41d1f5a19f241d8069f1e249853bcce378cdb76eec8ab636d7bc426280/pydantic_core-2.46.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:85348b8f89d2c3508b65b16c3c33a4da22b8215138d8b996912bb1532868885f", size = 1951820, upload-time = "2026-04-20T14:42:14.236Z" }, + { url = "https://files.pythonhosted.org/packages/30/b4/d03a7ae14571bc2b6b3c7b122441154720619afe9a336fa3a95434df5e2f/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1105677a6df914b1fb71a81b96c8cce7726857e1717d86001f29be06a25ee6f8", size = 1977785, upload-time = "2026-04-20T14:42:31.648Z" }, + { url = "https://files.pythonhosted.org/packages/ae/0c/4086f808834b59e3c8f1aa26df8f4b6d998cdcf354a143d18ef41529d1fe/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87082cd65669a33adeba5470769e9704c7cf026cc30afb9cc77fd865578ebaad", size = 2062761, upload-time = "2026-04-20T14:40:37.093Z" }, + { url = "https://files.pythonhosted.org/packages/fa/71/a649be5a5064c2df0db06e0a512c2281134ed2fcc981f52a657936a7527c/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60e5f66e12c4f5212d08522963380eaaeac5ebd795826cfd19b2dfb0c7a52b9c", size = 2232989, upload-time = "2026-04-20T14:42:59.254Z" }, + { url = "https://files.pythonhosted.org/packages/a2/84/7756e75763e810b3a710f4724441d1ecc5883b94aacb07ca71c5fb5cfb69/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b6cdf19bf84128d5e7c37e8a73a0c5c10d51103a650ac585d42dd6ae233f2b7f", size = 2303975, upload-time = "2026-04-20T14:41:32.287Z" }, + { url = "https://files.pythonhosted.org/packages/6c/35/68a762e0c1e31f35fa0dac733cbd9f5b118042853698de9509c8e5bf128b/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031bb17f4885a43773c8c763089499f242aee2ea85cf17154168775dccdecf35", size = 2095325, upload-time = "2026-04-20T14:42:47.685Z" }, + { url = "https://files.pythonhosted.org/packages/77/bf/1bf8c9a8e91836c926eae5e3e51dce009bf495a60ca56060689d3df3f340/pydantic_core-2.46.3-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:bcf2a8b2982a6673693eae7348ef3d8cf3979c1d63b54fca7c397a635cc68687", size = 2133368, upload-time = "2026-04-20T14:41:22.766Z" }, + { url = "https://files.pythonhosted.org/packages/e5/50/87d818d6bab915984995157ceb2380f5aac4e563dddbed6b56f0ed057aba/pydantic_core-2.46.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28e8cf2f52d72ced402a137145923a762cbb5081e48b34312f7a0c8f55928ec3", size = 2173908, upload-time = "2026-04-20T14:42:52.044Z" }, + { url = "https://files.pythonhosted.org/packages/91/88/a311fb306d0bd6185db41fa14ae888fb81d0baf648a761ae760d30819d33/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:17eaface65d9fc5abb940003020309c1bf7a211f5f608d7870297c367e6f9022", size = 2186422, upload-time = "2026-04-20T14:43:29.55Z" }, + { url = "https://files.pythonhosted.org/packages/8f/79/28fd0d81508525ab2054fef7c77a638c8b5b0afcbbaeee493cf7c3fef7e1/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:93fd339f23408a07e98950a89644f92c54d8729719a40b30c0a30bb9ebc55d23", size = 2332709, upload-time = "2026-04-20T14:42:16.134Z" }, + { url = "https://files.pythonhosted.org/packages/b3/21/795bf5fe5c0f379308b8ef19c50dedab2e7711dbc8d0c2acf08f1c7daa05/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:23cbdb3aaa74dfe0837975dbf69b469753bbde8eacace524519ffdb6b6e89eb7", size = 2372428, upload-time = "2026-04-20T14:41:10.974Z" }, + { url = "https://files.pythonhosted.org/packages/45/b3/ed14c659cbe7605e3ef063077680a64680aec81eb1a04763a05190d49b7f/pydantic_core-2.46.3-cp313-cp313-win32.whl", hash = "sha256:610eda2e3838f401105e6326ca304f5da1e15393ae25dacae5c5c63f2c275b13", size = 1965601, upload-time = "2026-04-20T14:41:42.128Z" }, + { url = "https://files.pythonhosted.org/packages/ef/bb/adb70d9a762ddd002d723fbf1bd492244d37da41e3af7b74ad212609027e/pydantic_core-2.46.3-cp313-cp313-win_amd64.whl", hash = "sha256:68cc7866ed863db34351294187f9b729964c371ba33e31c26f478471c52e1ed0", size = 2071517, upload-time = "2026-04-20T14:43:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/52/eb/66faefabebfe68bd7788339c9c9127231e680b11906368c67ce112fdb47f/pydantic_core-2.46.3-cp313-cp313-win_arm64.whl", hash = "sha256:f64b5537ac62b231572879cd08ec05600308636a5d63bcbdb15063a466977bec", size = 2035802, upload-time = "2026-04-20T14:43:38.507Z" }, + { url = "https://files.pythonhosted.org/packages/7f/db/a7bcb4940183fda36022cd18ba8dd12f2dff40740ec7b58ce7457befa416/pydantic_core-2.46.3-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:afa3aa644f74e290cdede48a7b0bee37d1c35e71b05105f6b340d484af536d9b", size = 2097614, upload-time = "2026-04-20T14:44:38.374Z" }, + { url = "https://files.pythonhosted.org/packages/24/35/e4066358a22e3e99519db370494c7528f5a2aa1367370e80e27e20283543/pydantic_core-2.46.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ced3310e51aa425f7f77da8bbbb5212616655bedbe82c70944320bc1dbe5e018", size = 1951896, upload-time = "2026-04-20T14:40:53.996Z" }, + { url = "https://files.pythonhosted.org/packages/87/92/37cf4049d1636996e4b888c05a501f40a43ff218983a551d57f9d5e14f0d/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e29908922ce9da1a30b4da490bd1d3d82c01dcfdf864d2a74aacee674d0bfa34", size = 1979314, upload-time = "2026-04-20T14:41:49.446Z" }, + { url = "https://files.pythonhosted.org/packages/d8/36/9ff4d676dfbdfb2d591cf43f3d90ded01e15b1404fd101180ed2d62a2fd3/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0c9ff69140423eea8ed2d5477df3ba037f671f5e897d206d921bc9fdc39613e7", size = 2056133, upload-time = "2026-04-20T14:42:23.574Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f0/405b442a4d7ba855b06eec8b2bf9c617d43b8432d099dfdc7bf999293495/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b675ab0a0d5b1c8fdb81195dc5bcefea3f3c240871cdd7ff9a2de8aa50772eb2", size = 2228726, upload-time = "2026-04-20T14:44:22.816Z" }, + { url = "https://files.pythonhosted.org/packages/e7/f8/65cd92dd5a0bd89ba277a98ecbfaf6fc36bbd3300973c7a4b826d6ab1391/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0087084960f209a9a4af50ecd1fb063d9ad3658c07bb81a7a53f452dacbfb2ba", size = 2301214, upload-time = "2026-04-20T14:44:48.792Z" }, + { url = "https://files.pythonhosted.org/packages/fd/86/ef96a4c6e79e7a2d0410826a68fbc0eccc0fd44aa733be199d5fcac3bb87/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed42e6cc8e1b0e2b9b96e2276bad70ae625d10d6d524aed0c93de974ae029f9f", size = 2099927, upload-time = "2026-04-20T14:41:40.196Z" }, + { url = "https://files.pythonhosted.org/packages/6d/53/269caf30e0096e0a8a8f929d1982a27b3879872cca2d917d17c2f9fdf4fe/pydantic_core-2.46.3-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:f1771ce258afb3e4201e67d154edbbae712a76a6081079fe247c2f53c6322c22", size = 2128789, upload-time = "2026-04-20T14:41:15.868Z" }, + { url = "https://files.pythonhosted.org/packages/00/b0/1a6d9b6a587e118482910c244a1c5acf4d192604174132efd12bf0ac486f/pydantic_core-2.46.3-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a7610b6a5242a6c736d8ad47fd5fff87fcfe8f833b281b1c409c3d6835d9227f", size = 2173815, upload-time = "2026-04-20T14:44:25.152Z" }, + { url = "https://files.pythonhosted.org/packages/87/56/e7e00d4041a7e62b5a40815590114db3b535bf3ca0bf4dca9f16cef25246/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:ff5e7783bcc5476e1db448bf268f11cb257b1c276d3e89f00b5727be86dd0127", size = 2181608, upload-time = "2026-04-20T14:41:28.933Z" }, + { url = "https://files.pythonhosted.org/packages/e8/22/4bd23c3d41f7c185d60808a1de83c76cf5aeabf792f6c636a55c3b1ec7f9/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:9d2e32edcc143bc01e95300671915d9ca052d4f745aa0a49c48d4803f8a85f2c", size = 2326968, upload-time = "2026-04-20T14:42:03.962Z" }, + { url = "https://files.pythonhosted.org/packages/24/ac/66cd45129e3915e5ade3b292cb3bc7fd537f58f8f8dbdaba6170f7cabb74/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d83d1c6b87fa56b521479cff237e626a292f3b31b6345c15a99121b454c1", size = 2369842, upload-time = "2026-04-20T14:41:35.52Z" }, + { url = "https://files.pythonhosted.org/packages/a2/51/dd4248abb84113615473aa20d5545b7c4cd73c8644003b5259686f93996c/pydantic_core-2.46.3-cp314-cp314-win32.whl", hash = "sha256:07bc6d2a28c3adb4f7c6ae46aa4f2d2929af127f587ed44057af50bf1ce0f505", size = 1959661, upload-time = "2026-04-20T14:41:00.042Z" }, + { url = "https://files.pythonhosted.org/packages/20/eb/59980e5f1ae54a3b86372bd9f0fa373ea2d402e8cdcd3459334430f91e91/pydantic_core-2.46.3-cp314-cp314-win_amd64.whl", hash = "sha256:8940562319bc621da30714617e6a7eaa6b98c84e8c685bcdc02d7ed5e7c7c44e", size = 2071686, upload-time = "2026-04-20T14:43:16.471Z" }, + { url = "https://files.pythonhosted.org/packages/8c/db/1cf77e5247047dfee34bc01fa9bca134854f528c8eb053e144298893d370/pydantic_core-2.46.3-cp314-cp314-win_arm64.whl", hash = "sha256:5dcbbcf4d22210ced8f837c96db941bdb078f419543472aca5d9a0bb7cddc7df", size = 2026907, upload-time = "2026-04-20T14:43:31.732Z" }, + { url = "https://files.pythonhosted.org/packages/57/c0/b3df9f6a543276eadba0a48487b082ca1f201745329d97dbfa287034a230/pydantic_core-2.46.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:d0fe3dce1e836e418f912c1ad91c73357d03e556a4d286f441bf34fed2dbeecf", size = 2095047, upload-time = "2026-04-20T14:42:37.982Z" }, + { url = "https://files.pythonhosted.org/packages/66/57/886a938073b97556c168fd99e1a7305bb363cd30a6d2c76086bf0587b32a/pydantic_core-2.46.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9ce92e58abc722dac1bf835a6798a60b294e48eb0e625ec9fd994b932ac5feee", size = 1934329, upload-time = "2026-04-20T14:43:49.655Z" }, + { url = "https://files.pythonhosted.org/packages/0b/7c/b42eaa5c34b13b07ecb51da21761297a9b8eb43044c864a035999998f328/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a03e6467f0f5ab796a486146d1b887b2dc5e5f9b3288898c1b1c3ad974e53e4a", size = 1974847, upload-time = "2026-04-20T14:42:10.737Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9b/92b42db6543e7de4f99ae977101a2967b63122d4b6cf7773812da2d7d5b5/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2798b6ba041b9d70acfb9071a2ea13c8456dd1e6a5555798e41ba7b0790e329c", size = 2041742, upload-time = "2026-04-20T14:40:44.262Z" }, + { url = "https://files.pythonhosted.org/packages/0f/19/46fbe1efabb5aa2834b43b9454e70f9a83ad9c338c1291e48bdc4fecf167/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9be3e221bdc6d69abf294dcf7aff6af19c31a5cdcc8f0aa3b14be29df4bd03b1", size = 2236235, upload-time = "2026-04-20T14:41:27.307Z" }, + { url = "https://files.pythonhosted.org/packages/77/da/b3f95bc009ad60ec53120f5d16c6faa8cabdbe8a20d83849a1f2b8728148/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f13936129ce841f2a5ddf6f126fea3c43cd128807b5a59588c37cf10178c2e64", size = 2282633, upload-time = "2026-04-20T14:44:33.271Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6e/401336117722e28f32fb8220df676769d28ebdf08f2f4469646d404c43a3/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28b5f2ef03416facccb1c6ef744c69793175fd27e44ef15669201601cf423acb", size = 2109679, upload-time = "2026-04-20T14:44:41.065Z" }, + { url = "https://files.pythonhosted.org/packages/fc/53/b289f9bc8756a32fe718c46f55afaeaf8d489ee18d1a1e7be1db73f42cc4/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:830d1247d77ad23852314f069e9d7ddafeec5f684baf9d7e7065ed46a049c4e6", size = 2108342, upload-time = "2026-04-20T14:42:50.144Z" }, + { url = "https://files.pythonhosted.org/packages/10/5b/8292fc7c1f9111f1b2b7c1b0dcf1179edcd014fc3ea4517499f50b829d71/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0793c90c1a3c74966e7975eaef3ed30ebdff3260a0f815a62a22adc17e4c01c", size = 2157208, upload-time = "2026-04-20T14:42:08.133Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9e/f80044e9ec07580f057a89fc131f78dda7a58751ddf52bbe05eaf31db50f/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:d2d0aead851b66f5245ec0c4fb2612ef457f8bbafefdf65a2bf9d6bac6140f47", size = 2167237, upload-time = "2026-04-20T14:42:25.412Z" }, + { url = "https://files.pythonhosted.org/packages/f8/84/6781a1b037f3b96be9227edbd1101f6d3946746056231bf4ac48cdff1a8d/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:2f40e4246676beb31c5ce77c38a55ca4e465c6b38d11ea1bd935420568e0b1ab", size = 2312540, upload-time = "2026-04-20T14:40:40.313Z" }, + { url = "https://files.pythonhosted.org/packages/3e/db/19c0839feeb728e7df03255581f198dfdf1c2aeb1e174a8420b63c5252e5/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:cf489cf8986c543939aeee17a09c04d6ffb43bfef8ca16fcbcc5cfdcbed24dba", size = 2369556, upload-time = "2026-04-20T14:41:09.427Z" }, + { url = "https://files.pythonhosted.org/packages/e0/15/3228774cb7cd45f5f721ddf1b2242747f4eb834d0c491f0c02d606f09fed/pydantic_core-2.46.3-cp314-cp314t-win32.whl", hash = "sha256:ffe0883b56cfc05798bf994164d2b2ff03efe2d22022a2bb080f3b626176dd56", size = 1949756, upload-time = "2026-04-20T14:41:25.717Z" }, + { url = "https://files.pythonhosted.org/packages/b8/2a/c79cf53fd91e5a87e30d481809f52f9a60dd221e39de66455cf04deaad37/pydantic_core-2.46.3-cp314-cp314t-win_amd64.whl", hash = "sha256:706d9d0ce9cf4593d07270d8e9f53b161f90c57d315aeec4fb4fd7a8b10240d8", size = 2051305, upload-time = "2026-04-20T14:43:18.627Z" }, + { url = "https://files.pythonhosted.org/packages/0b/db/d8182a7f1d9343a032265aae186eb063fe26ca4c40f256b21e8da4498e89/pydantic_core-2.46.3-cp314-cp314t-win_arm64.whl", hash = "sha256:77706aeb41df6a76568434701e0917da10692da28cb69d5fb6919ce5fdb07374", size = 2026310, upload-time = "2026-04-20T14:41:01.778Z" }, + { url = "https://files.pythonhosted.org/packages/66/7f/03dbad45cd3aa9083fbc93c210ae8b005af67e4136a14186950a747c6874/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:9715525891ed524a0a1eb6d053c74d4d4ad5017677fb00af0b7c2644a31bae46", size = 2105683, upload-time = "2026-04-20T14:42:19.779Z" }, + { url = "https://files.pythonhosted.org/packages/26/22/4dc186ac8ea6b257e9855031f51b62a9637beac4d68ac06bee02f046f836/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:9d2f400712a99a013aff420ef1eb9be077f8189a36c1e3ef87660b4e1088a874", size = 1940052, upload-time = "2026-04-20T14:43:59.274Z" }, + { url = "https://files.pythonhosted.org/packages/0d/ca/d376391a5aff1f2e8188960d7873543608130a870961c2b6b5236627c116/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd2aab0e2e9dc2daf36bd2686c982535d5e7b1d930a1344a7bb6e82baab42a76", size = 1988172, upload-time = "2026-04-20T14:41:17.469Z" }, + { url = "https://files.pythonhosted.org/packages/0e/6b/523b9f85c23788755d6ab949329de692a2e3a584bc6beb67fef5e035aa9d/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e9d76736da5f362fabfeea6a69b13b7f2be405c6d6966f06b2f6bfff7e64531", size = 2128596, upload-time = "2026-04-20T14:40:41.707Z" }, + { url = "https://files.pythonhosted.org/packages/34/42/f426db557e8ab2791bc7562052299944a118655496fbff99914e564c0a94/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:b12dd51f1187c2eb489af8e20f880362db98e954b54ab792fa5d92e8bcc6b803", size = 2091877, upload-time = "2026-04-20T14:43:27.091Z" }, + { url = "https://files.pythonhosted.org/packages/5c/4f/86a832a9d14df58e663bfdf4627dc00d3317c2bd583c4fb23390b0f04b8e/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:f00a0961b125f1a47af7bcc17f00782e12f4cd056f83416006b30111d941dfa3", size = 1932428, upload-time = "2026-04-20T14:40:45.781Z" }, + { url = "https://files.pythonhosted.org/packages/11/1a/fe857968954d93fb78e0d4b6df5c988c74c4aaa67181c60be7cfe327c0ca/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57697d7c056aca4bbb680200f96563e841a6386ac1129370a0102592f4dddff5", size = 1997550, upload-time = "2026-04-20T14:44:02.425Z" }, + { url = "https://files.pythonhosted.org/packages/17/eb/9d89ad2d9b0ba8cd65393d434471621b98912abb10fbe1df08e480ba57b5/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd35aa21299def8db7ef4fe5c4ff862941a9a158ca7b63d61e66fe67d30416b4", size = 2137657, upload-time = "2026-04-20T14:42:45.149Z" }, + { url = "https://files.pythonhosted.org/packages/1f/da/99d40830684f81dec901cac521b5b91c095394cc1084b9433393cde1c2df/pydantic_core-2.46.3-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:13afdd885f3d71280cf286b13b310ee0f7ccfefd1dbbb661514a474b726e2f25", size = 2107973, upload-time = "2026-04-20T14:42:06.175Z" }, + { url = "https://files.pythonhosted.org/packages/99/a5/87024121818d75bbb2a98ddbaf638e40e7a18b5e0f5492c9ca4b1b316107/pydantic_core-2.46.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:f91c0aff3e3ee0928edd1232c57f643a7a003e6edf1860bc3afcdc749cb513f3", size = 1947191, upload-time = "2026-04-20T14:43:14.319Z" }, + { url = "https://files.pythonhosted.org/packages/60/62/0c1acfe10945b83a6a59d19fbaa92f48825381509e5701b855c08f13db76/pydantic_core-2.46.3-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6529d1d128321a58d30afcc97b49e98836542f68dd41b33c2e972bb9e5290536", size = 2123791, upload-time = "2026-04-20T14:43:22.766Z" }, + { url = "https://files.pythonhosted.org/packages/75/3e/3b2393b4c8f44285561dc30b00cf307a56a2eff7c483a824db3b8221ca51/pydantic_core-2.46.3-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:975c267cff4f7e7272eacbe50f6cc03ca9a3da4c4fbd66fffd89c94c1e311aa1", size = 2153197, upload-time = "2026-04-20T14:44:27.932Z" }, + { url = "https://files.pythonhosted.org/packages/ba/75/5af02fb35505051eee727c061f2881c555ab4f8ddb2d42da715a42c9731b/pydantic_core-2.46.3-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:2b8e4f2bbdf71415c544b4b1138b8060db7b6611bc927e8064c769f64bed651c", size = 2181073, upload-time = "2026-04-20T14:43:20.729Z" }, + { url = "https://files.pythonhosted.org/packages/10/92/7e0e1bd9ca3c68305db037560ca2876f89b2647deb2f8b6319005de37505/pydantic_core-2.46.3-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:e61ea8e9fff9606d09178f577ff8ccdd7206ff73d6552bcec18e1033c4254b85", size = 2315886, upload-time = "2026-04-20T14:44:04.826Z" }, + { url = "https://files.pythonhosted.org/packages/b8/d8/101655f27eaf3e44558ead736b2795d12500598beed4683f279396fa186e/pydantic_core-2.46.3-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b504bda01bafc69b6d3c7a0c7f039dcf60f47fab70e06fe23f57b5c75bdc82b8", size = 2360528, upload-time = "2026-04-20T14:40:47.431Z" }, + { url = "https://files.pythonhosted.org/packages/07/0f/1c34a74c8d07136f0d729ffe5e1fdab04fbdaa7684f61a92f92511a84a15/pydantic_core-2.46.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b00b76f7142fc60c762ce579bd29c8fa44aaa56592dd3c54fab3928d0d4ca6ff", size = 2184144, upload-time = "2026-04-20T14:42:57Z" }, ] [[package]] name = "pydantic-settings" -version = "2.13.1" +version = "2.14.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "python-dotenv" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" } +sdist = { url = "https://files.pythonhosted.org/packages/42/98/c8345dccdc31de4228c039a98f6467a941e39558da41c1744fbe29fa5666/pydantic_settings-2.14.0.tar.gz", hash = "sha256:24285fd4b0e0c06507dd9fdfd331ee23794305352aaec8fc4eb92d4047aeb67d", size = 235709, upload-time = "2026-04-20T13:37:40.293Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" }, + { url = "https://files.pythonhosted.org/packages/01/dd/bebff3040138f00ae8a102d426b27349b9a49acc310fcae7f92112d867e3/pydantic_settings-2.14.0-py3-none-any.whl", hash = "sha256:fc8d5d692eb7092e43c8647c1c35a3ecd00e040fcf02ed86f4cb5458ca62182e", size = 60940, upload-time = "2026-04-20T13:37:38.586Z" }, ] [[package]] @@ -1335,6 +1803,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" }, ] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + [[package]] name = "python-discovery" version = "1.2.2" @@ -1366,6 +1846,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/22/f1925cdda983ab66fc8ec6ec8014b959262747e58bdca26a4e3d1da29d56/python_multipart-0.0.26-py3-none-any.whl", hash = "sha256:c0b169f8c4484c13b0dcf2ef0ec3a4adb255c4b7d18d8e420477d2b1dd03f185", size = 28847, upload-time = "2026-04-10T14:09:58.131Z" }, ] +[[package]] +name = "pytz" +version = "2026.1.post1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/56/db/b8721d71d945e6a8ac63c0fc900b2067181dbb50805958d4d4661cf7d277/pytz-2026.1.post1.tar.gz", hash = "sha256:3378dde6a0c3d26719182142c56e60c7f9af7e968076f31aae569d72a0358ee1", size = 321088, upload-time = "2026-03-03T07:47:50.683Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/99/781fe0c827be2742bcc775efefccb3b048a3a9c6ce9aec0cbf4a101677e5/pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a", size = 510489, upload-time = "2026-03-03T07:47:49.167Z" }, +] + [[package]] name = "pywin32" version = "311" @@ -1461,6 +1950,79 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, ] +[[package]] +name = "pyzmq" +version = "27.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "implementation_name == 'pypy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/04/0b/3c9baedbdf613ecaa7aa07027780b8867f57b6293b6ee50de316c9f3222b/pyzmq-27.1.0.tar.gz", hash = "sha256:ac0765e3d44455adb6ddbf4417dcce460fc40a05978c08efdf2948072f6db540", size = 281750, upload-time = "2025-09-08T23:10:18.157Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/b9/52aa9ec2867528b54f1e60846728d8b4d84726630874fee3a91e66c7df81/pyzmq-27.1.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:508e23ec9bc44c0005c4946ea013d9317ae00ac67778bd47519fdf5a0e930ff4", size = 1329850, upload-time = "2025-09-08T23:07:26.274Z" }, + { url = "https://files.pythonhosted.org/packages/99/64/5653e7b7425b169f994835a2b2abf9486264401fdef18df91ddae47ce2cc/pyzmq-27.1.0-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:507b6f430bdcf0ee48c0d30e734ea89ce5567fd7b8a0f0044a369c176aa44556", size = 906380, upload-time = "2025-09-08T23:07:29.78Z" }, + { url = "https://files.pythonhosted.org/packages/73/78/7d713284dbe022f6440e391bd1f3c48d9185673878034cfb3939cdf333b2/pyzmq-27.1.0-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf7b38f9fd7b81cb6d9391b2946382c8237fd814075c6aa9c3b746d53076023b", size = 666421, upload-time = "2025-09-08T23:07:31.263Z" }, + { url = "https://files.pythonhosted.org/packages/30/76/8f099f9d6482450428b17c4d6b241281af7ce6a9de8149ca8c1c649f6792/pyzmq-27.1.0-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:03ff0b279b40d687691a6217c12242ee71f0fba28bf8626ff50e3ef0f4410e1e", size = 854149, upload-time = "2025-09-08T23:07:33.17Z" }, + { url = "https://files.pythonhosted.org/packages/59/f0/37fbfff06c68016019043897e4c969ceab18bde46cd2aca89821fcf4fb2e/pyzmq-27.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:677e744fee605753eac48198b15a2124016c009a11056f93807000ab11ce6526", size = 1655070, upload-time = "2025-09-08T23:07:35.205Z" }, + { url = "https://files.pythonhosted.org/packages/47/14/7254be73f7a8edc3587609554fcaa7bfd30649bf89cd260e4487ca70fdaa/pyzmq-27.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:dd2fec2b13137416a1c5648b7009499bcc8fea78154cd888855fa32514f3dad1", size = 2033441, upload-time = "2025-09-08T23:07:37.432Z" }, + { url = "https://files.pythonhosted.org/packages/22/dc/49f2be26c6f86f347e796a4d99b19167fc94503f0af3fd010ad262158822/pyzmq-27.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:08e90bb4b57603b84eab1d0ca05b3bbb10f60c1839dc471fc1c9e1507bef3386", size = 1891529, upload-time = "2025-09-08T23:07:39.047Z" }, + { url = "https://files.pythonhosted.org/packages/a3/3e/154fb963ae25be70c0064ce97776c937ecc7d8b0259f22858154a9999769/pyzmq-27.1.0-cp310-cp310-win32.whl", hash = "sha256:a5b42d7a0658b515319148875fcb782bbf118dd41c671b62dae33666c2213bda", size = 567276, upload-time = "2025-09-08T23:07:40.695Z" }, + { url = "https://files.pythonhosted.org/packages/62/b2/f4ab56c8c595abcb26b2be5fd9fa9e6899c1e5ad54964e93ae8bb35482be/pyzmq-27.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:c0bb87227430ee3aefcc0ade2088100e528d5d3298a0a715a64f3d04c60ba02f", size = 632208, upload-time = "2025-09-08T23:07:42.298Z" }, + { url = "https://files.pythonhosted.org/packages/3b/e3/be2cc7ab8332bdac0522fdb64c17b1b6241a795bee02e0196636ec5beb79/pyzmq-27.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:9a916f76c2ab8d045b19f2286851a38e9ac94ea91faf65bd64735924522a8b32", size = 559766, upload-time = "2025-09-08T23:07:43.869Z" }, + { url = "https://files.pythonhosted.org/packages/06/5d/305323ba86b284e6fcb0d842d6adaa2999035f70f8c38a9b6d21ad28c3d4/pyzmq-27.1.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:226b091818d461a3bef763805e75685e478ac17e9008f49fce2d3e52b3d58b86", size = 1333328, upload-time = "2025-09-08T23:07:45.946Z" }, + { url = "https://files.pythonhosted.org/packages/bd/a0/fc7e78a23748ad5443ac3275943457e8452da67fda347e05260261108cbc/pyzmq-27.1.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0790a0161c281ca9723f804871b4027f2e8b5a528d357c8952d08cd1a9c15581", size = 908803, upload-time = "2025-09-08T23:07:47.551Z" }, + { url = "https://files.pythonhosted.org/packages/7e/22/37d15eb05f3bdfa4abea6f6d96eb3bb58585fbd3e4e0ded4e743bc650c97/pyzmq-27.1.0-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c895a6f35476b0c3a54e3eb6ccf41bf3018de937016e6e18748317f25d4e925f", size = 668836, upload-time = "2025-09-08T23:07:49.436Z" }, + { url = "https://files.pythonhosted.org/packages/b1/c4/2a6fe5111a01005fc7af3878259ce17684fabb8852815eda6225620f3c59/pyzmq-27.1.0-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bbf8d3630bf96550b3be8e1fc0fea5cbdc8d5466c1192887bd94869da17a63e", size = 857038, upload-time = "2025-09-08T23:07:51.234Z" }, + { url = "https://files.pythonhosted.org/packages/cb/eb/bfdcb41d0db9cd233d6fb22dc131583774135505ada800ebf14dfb0a7c40/pyzmq-27.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:15c8bd0fe0dabf808e2d7a681398c4e5ded70a551ab47482067a572c054c8e2e", size = 1657531, upload-time = "2025-09-08T23:07:52.795Z" }, + { url = "https://files.pythonhosted.org/packages/ab/21/e3180ca269ed4a0de5c34417dfe71a8ae80421198be83ee619a8a485b0c7/pyzmq-27.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:bafcb3dd171b4ae9f19ee6380dfc71ce0390fefaf26b504c0e5f628d7c8c54f2", size = 2034786, upload-time = "2025-09-08T23:07:55.047Z" }, + { url = "https://files.pythonhosted.org/packages/3b/b1/5e21d0b517434b7f33588ff76c177c5a167858cc38ef740608898cd329f2/pyzmq-27.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e829529fcaa09937189178115c49c504e69289abd39967cd8a4c215761373394", size = 1894220, upload-time = "2025-09-08T23:07:57.172Z" }, + { url = "https://files.pythonhosted.org/packages/03/f2/44913a6ff6941905efc24a1acf3d3cb6146b636c546c7406c38c49c403d4/pyzmq-27.1.0-cp311-cp311-win32.whl", hash = "sha256:6df079c47d5902af6db298ec92151db82ecb557af663098b92f2508c398bb54f", size = 567155, upload-time = "2025-09-08T23:07:59.05Z" }, + { url = "https://files.pythonhosted.org/packages/23/6d/d8d92a0eb270a925c9b4dd039c0b4dc10abc2fcbc48331788824ef113935/pyzmq-27.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:190cbf120fbc0fc4957b56866830def56628934a9d112aec0e2507aa6a032b97", size = 633428, upload-time = "2025-09-08T23:08:00.663Z" }, + { url = "https://files.pythonhosted.org/packages/ae/14/01afebc96c5abbbd713ecfc7469cfb1bc801c819a74ed5c9fad9a48801cb/pyzmq-27.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:eca6b47df11a132d1745eb3b5b5e557a7dae2c303277aa0e69c6ba91b8736e07", size = 559497, upload-time = "2025-09-08T23:08:02.15Z" }, + { url = "https://files.pythonhosted.org/packages/92/e7/038aab64a946d535901103da16b953c8c9cc9c961dadcbf3609ed6428d23/pyzmq-27.1.0-cp312-abi3-macosx_10_15_universal2.whl", hash = "sha256:452631b640340c928fa343801b0d07eb0c3789a5ffa843f6e1a9cee0ba4eb4fc", size = 1306279, upload-time = "2025-09-08T23:08:03.807Z" }, + { url = "https://files.pythonhosted.org/packages/e8/5e/c3c49fdd0f535ef45eefcc16934648e9e59dace4a37ee88fc53f6cd8e641/pyzmq-27.1.0-cp312-abi3-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:1c179799b118e554b66da67d88ed66cd37a169f1f23b5d9f0a231b4e8d44a113", size = 895645, upload-time = "2025-09-08T23:08:05.301Z" }, + { url = "https://files.pythonhosted.org/packages/f8/e5/b0b2504cb4e903a74dcf1ebae157f9e20ebb6ea76095f6cfffea28c42ecd/pyzmq-27.1.0-cp312-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3837439b7f99e60312f0c926a6ad437b067356dc2bc2ec96eb395fd0fe804233", size = 652574, upload-time = "2025-09-08T23:08:06.828Z" }, + { url = "https://files.pythonhosted.org/packages/f8/9b/c108cdb55560eaf253f0cbdb61b29971e9fb34d9c3499b0e96e4e60ed8a5/pyzmq-27.1.0-cp312-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43ad9a73e3da1fab5b0e7e13402f0b2fb934ae1c876c51d0afff0e7c052eca31", size = 840995, upload-time = "2025-09-08T23:08:08.396Z" }, + { url = "https://files.pythonhosted.org/packages/c2/bb/b79798ca177b9eb0825b4c9998c6af8cd2a7f15a6a1a4272c1d1a21d382f/pyzmq-27.1.0-cp312-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0de3028d69d4cdc475bfe47a6128eb38d8bc0e8f4d69646adfbcd840facbac28", size = 1642070, upload-time = "2025-09-08T23:08:09.989Z" }, + { url = "https://files.pythonhosted.org/packages/9c/80/2df2e7977c4ede24c79ae39dcef3899bfc5f34d1ca7a5b24f182c9b7a9ca/pyzmq-27.1.0-cp312-abi3-musllinux_1_2_i686.whl", hash = "sha256:cf44a7763aea9298c0aa7dbf859f87ed7012de8bda0f3977b6fb1d96745df856", size = 2021121, upload-time = "2025-09-08T23:08:11.907Z" }, + { url = "https://files.pythonhosted.org/packages/46/bd/2d45ad24f5f5ae7e8d01525eb76786fa7557136555cac7d929880519e33a/pyzmq-27.1.0-cp312-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f30f395a9e6fbca195400ce833c731e7b64c3919aa481af4d88c3759e0cb7496", size = 1878550, upload-time = "2025-09-08T23:08:13.513Z" }, + { url = "https://files.pythonhosted.org/packages/e6/2f/104c0a3c778d7c2ab8190e9db4f62f0b6957b53c9d87db77c284b69f33ea/pyzmq-27.1.0-cp312-abi3-win32.whl", hash = "sha256:250e5436a4ba13885494412b3da5d518cd0d3a278a1ae640e113c073a5f88edd", size = 559184, upload-time = "2025-09-08T23:08:15.163Z" }, + { url = "https://files.pythonhosted.org/packages/fc/7f/a21b20d577e4100c6a41795842028235998a643b1ad406a6d4163ea8f53e/pyzmq-27.1.0-cp312-abi3-win_amd64.whl", hash = "sha256:9ce490cf1d2ca2ad84733aa1d69ce6855372cb5ce9223802450c9b2a7cba0ccf", size = 619480, upload-time = "2025-09-08T23:08:17.192Z" }, + { url = "https://files.pythonhosted.org/packages/78/c2/c012beae5f76b72f007a9e91ee9401cb88c51d0f83c6257a03e785c81cc2/pyzmq-27.1.0-cp312-abi3-win_arm64.whl", hash = "sha256:75a2f36223f0d535a0c919e23615fc85a1e23b71f40c7eb43d7b1dedb4d8f15f", size = 552993, upload-time = "2025-09-08T23:08:18.926Z" }, + { url = "https://files.pythonhosted.org/packages/60/cb/84a13459c51da6cec1b7b1dc1a47e6db6da50b77ad7fd9c145842750a011/pyzmq-27.1.0-cp313-cp313-android_24_arm64_v8a.whl", hash = "sha256:93ad4b0855a664229559e45c8d23797ceac03183c7b6f5b4428152a6b06684a5", size = 1122436, upload-time = "2025-09-08T23:08:20.801Z" }, + { url = "https://files.pythonhosted.org/packages/dc/b6/94414759a69a26c3dd674570a81813c46a078767d931a6c70ad29fc585cb/pyzmq-27.1.0-cp313-cp313-android_24_x86_64.whl", hash = "sha256:fbb4f2400bfda24f12f009cba62ad5734148569ff4949b1b6ec3b519444342e6", size = 1156301, upload-time = "2025-09-08T23:08:22.47Z" }, + { url = "https://files.pythonhosted.org/packages/a5/ad/15906493fd40c316377fd8a8f6b1f93104f97a752667763c9b9c1b71d42d/pyzmq-27.1.0-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:e343d067f7b151cfe4eb3bb796a7752c9d369eed007b91231e817071d2c2fec7", size = 1341197, upload-time = "2025-09-08T23:08:24.286Z" }, + { url = "https://files.pythonhosted.org/packages/14/1d/d343f3ce13db53a54cb8946594e567410b2125394dafcc0268d8dda027e0/pyzmq-27.1.0-cp313-cp313t-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:08363b2011dec81c354d694bdecaef4770e0ae96b9afea70b3f47b973655cc05", size = 897275, upload-time = "2025-09-08T23:08:26.063Z" }, + { url = "https://files.pythonhosted.org/packages/69/2d/d83dd6d7ca929a2fc67d2c3005415cdf322af7751d773524809f9e585129/pyzmq-27.1.0-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d54530c8c8b5b8ddb3318f481297441af102517602b569146185fa10b63f4fa9", size = 660469, upload-time = "2025-09-08T23:08:27.623Z" }, + { url = "https://files.pythonhosted.org/packages/3e/cd/9822a7af117f4bc0f1952dbe9ef8358eb50a24928efd5edf54210b850259/pyzmq-27.1.0-cp313-cp313t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f3afa12c392f0a44a2414056d730eebc33ec0926aae92b5ad5cf26ebb6cc128", size = 847961, upload-time = "2025-09-08T23:08:29.672Z" }, + { url = "https://files.pythonhosted.org/packages/9a/12/f003e824a19ed73be15542f172fd0ec4ad0b60cf37436652c93b9df7c585/pyzmq-27.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c65047adafe573ff023b3187bb93faa583151627bc9c51fc4fb2c561ed689d39", size = 1650282, upload-time = "2025-09-08T23:08:31.349Z" }, + { url = "https://files.pythonhosted.org/packages/d5/4a/e82d788ed58e9a23995cee70dbc20c9aded3d13a92d30d57ec2291f1e8a3/pyzmq-27.1.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:90e6e9441c946a8b0a667356f7078d96411391a3b8f80980315455574177ec97", size = 2024468, upload-time = "2025-09-08T23:08:33.543Z" }, + { url = "https://files.pythonhosted.org/packages/d9/94/2da0a60841f757481e402b34bf4c8bf57fa54a5466b965de791b1e6f747d/pyzmq-27.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:add071b2d25f84e8189aaf0882d39a285b42fa3853016ebab234a5e78c7a43db", size = 1885394, upload-time = "2025-09-08T23:08:35.51Z" }, + { url = "https://files.pythonhosted.org/packages/4f/6f/55c10e2e49ad52d080dc24e37adb215e5b0d64990b57598abc2e3f01725b/pyzmq-27.1.0-cp313-cp313t-win32.whl", hash = "sha256:7ccc0700cfdf7bd487bea8d850ec38f204478681ea02a582a8da8171b7f90a1c", size = 574964, upload-time = "2025-09-08T23:08:37.178Z" }, + { url = "https://files.pythonhosted.org/packages/87/4d/2534970ba63dd7c522d8ca80fb92777f362c0f321900667c615e2067cb29/pyzmq-27.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:8085a9fba668216b9b4323be338ee5437a235fe275b9d1610e422ccc279733e2", size = 641029, upload-time = "2025-09-08T23:08:40.595Z" }, + { url = "https://files.pythonhosted.org/packages/f6/fa/f8aea7a28b0641f31d40dea42d7ef003fded31e184ef47db696bc74cd610/pyzmq-27.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:6bb54ca21bcfe361e445256c15eedf083f153811c37be87e0514934d6913061e", size = 561541, upload-time = "2025-09-08T23:08:42.668Z" }, + { url = "https://files.pythonhosted.org/packages/87/45/19efbb3000956e82d0331bafca5d9ac19ea2857722fa2caacefb6042f39d/pyzmq-27.1.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:ce980af330231615756acd5154f29813d553ea555485ae712c491cd483df6b7a", size = 1341197, upload-time = "2025-09-08T23:08:44.973Z" }, + { url = "https://files.pythonhosted.org/packages/48/43/d72ccdbf0d73d1343936296665826350cb1e825f92f2db9db3e61c2162a2/pyzmq-27.1.0-cp314-cp314t-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:1779be8c549e54a1c38f805e56d2a2e5c009d26de10921d7d51cfd1c8d4632ea", size = 897175, upload-time = "2025-09-08T23:08:46.601Z" }, + { url = "https://files.pythonhosted.org/packages/2f/2e/a483f73a10b65a9ef0161e817321d39a770b2acf8bcf3004a28d90d14a94/pyzmq-27.1.0-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7200bb0f03345515df50d99d3db206a0a6bee1955fbb8c453c76f5bf0e08fb96", size = 660427, upload-time = "2025-09-08T23:08:48.187Z" }, + { url = "https://files.pythonhosted.org/packages/f5/d2/5f36552c2d3e5685abe60dfa56f91169f7a2d99bbaf67c5271022ab40863/pyzmq-27.1.0-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01c0e07d558b06a60773744ea6251f769cd79a41a97d11b8bf4ab8f034b0424d", size = 847929, upload-time = "2025-09-08T23:08:49.76Z" }, + { url = "https://files.pythonhosted.org/packages/c4/2a/404b331f2b7bf3198e9945f75c4c521f0c6a3a23b51f7a4a401b94a13833/pyzmq-27.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:80d834abee71f65253c91540445d37c4c561e293ba6e741b992f20a105d69146", size = 1650193, upload-time = "2025-09-08T23:08:51.7Z" }, + { url = "https://files.pythonhosted.org/packages/1c/0b/f4107e33f62a5acf60e3ded67ed33d79b4ce18de432625ce2fc5093d6388/pyzmq-27.1.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:544b4e3b7198dde4a62b8ff6685e9802a9a1ebf47e77478a5eb88eca2a82f2fd", size = 2024388, upload-time = "2025-09-08T23:08:53.393Z" }, + { url = "https://files.pythonhosted.org/packages/0d/01/add31fe76512642fd6e40e3a3bd21f4b47e242c8ba33efb6809e37076d9b/pyzmq-27.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cedc4c68178e59a4046f97eca31b148ddcf51e88677de1ef4e78cf06c5376c9a", size = 1885316, upload-time = "2025-09-08T23:08:55.702Z" }, + { url = "https://files.pythonhosted.org/packages/c4/59/a5f38970f9bf07cee96128de79590bb354917914a9be11272cfc7ff26af0/pyzmq-27.1.0-cp314-cp314t-win32.whl", hash = "sha256:1f0b2a577fd770aa6f053211a55d1c47901f4d537389a034c690291485e5fe92", size = 587472, upload-time = "2025-09-08T23:08:58.18Z" }, + { url = "https://files.pythonhosted.org/packages/70/d8/78b1bad170f93fcf5e3536e70e8fadac55030002275c9a29e8f5719185de/pyzmq-27.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:19c9468ae0437f8074af379e986c5d3d7d7bfe033506af442e8c879732bedbe0", size = 661401, upload-time = "2025-09-08T23:08:59.802Z" }, + { url = "https://files.pythonhosted.org/packages/81/d6/4bfbb40c9a0b42fc53c7cf442f6385db70b40f74a783130c5d0a5aa62228/pyzmq-27.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dc5dbf68a7857b59473f7df42650c621d7e8923fb03fa74a526890f4d33cc4d7", size = 575170, upload-time = "2025-09-08T23:09:01.418Z" }, + { url = "https://files.pythonhosted.org/packages/f3/81/a65e71c1552f74dec9dff91d95bafb6e0d33338a8dfefbc88aa562a20c92/pyzmq-27.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c17e03cbc9312bee223864f1a2b13a99522e0dc9f7c5df0177cd45210ac286e6", size = 836266, upload-time = "2025-09-08T23:09:40.048Z" }, + { url = "https://files.pythonhosted.org/packages/58/ed/0202ca350f4f2b69faa95c6d931e3c05c3a397c184cacb84cb4f8f42f287/pyzmq-27.1.0-pp310-pypy310_pp73-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:f328d01128373cb6763823b2b4e7f73bdf767834268c565151eacb3b7a392f90", size = 800206, upload-time = "2025-09-08T23:09:41.902Z" }, + { url = "https://files.pythonhosted.org/packages/47/42/1ff831fa87fe8f0a840ddb399054ca0009605d820e2b44ea43114f5459f4/pyzmq-27.1.0-pp310-pypy310_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c1790386614232e1b3a40a958454bdd42c6d1811837b15ddbb052a032a43f62", size = 567747, upload-time = "2025-09-08T23:09:43.741Z" }, + { url = "https://files.pythonhosted.org/packages/d1/db/5c4d6807434751e3f21231bee98109aa57b9b9b55e058e450d0aef59b70f/pyzmq-27.1.0-pp310-pypy310_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:448f9cb54eb0cee4732b46584f2710c8bc178b0e5371d9e4fc8125201e413a74", size = 747371, upload-time = "2025-09-08T23:09:45.575Z" }, + { url = "https://files.pythonhosted.org/packages/26/af/78ce193dbf03567eb8c0dc30e3df2b9e56f12a670bf7eb20f9fb532c7e8a/pyzmq-27.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:05b12f2d32112bf8c95ef2e74ec4f1d4beb01f8b5e703b38537f8849f92cb9ba", size = 544862, upload-time = "2025-09-08T23:09:47.448Z" }, + { url = "https://files.pythonhosted.org/packages/4c/c6/c4dcdecdbaa70969ee1fdced6d7b8f60cfabe64d25361f27ac4665a70620/pyzmq-27.1.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:18770c8d3563715387139060d37859c02ce40718d1faf299abddcdcc6a649066", size = 836265, upload-time = "2025-09-08T23:09:49.376Z" }, + { url = "https://files.pythonhosted.org/packages/3e/79/f38c92eeaeb03a2ccc2ba9866f0439593bb08c5e3b714ac1d553e5c96e25/pyzmq-27.1.0-pp311-pypy311_pp73-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:ac25465d42f92e990f8d8b0546b01c391ad431c3bf447683fdc40565941d0604", size = 800208, upload-time = "2025-09-08T23:09:51.073Z" }, + { url = "https://files.pythonhosted.org/packages/49/0e/3f0d0d335c6b3abb9b7b723776d0b21fa7f3a6c819a0db6097059aada160/pyzmq-27.1.0-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53b40f8ae006f2734ee7608d59ed661419f087521edbfc2149c3932e9c14808c", size = 567747, upload-time = "2025-09-08T23:09:52.698Z" }, + { url = "https://files.pythonhosted.org/packages/a1/cf/f2b3784d536250ffd4be70e049f3b60981235d70c6e8ce7e3ef21e1adb25/pyzmq-27.1.0-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f605d884e7c8be8fe1aa94e0a783bf3f591b84c24e4bc4f3e7564c82ac25e271", size = 747371, upload-time = "2025-09-08T23:09:54.563Z" }, + { url = "https://files.pythonhosted.org/packages/01/1b/5dbe84eefc86f48473947e2f41711aded97eecef1231f4558f1f02713c12/pyzmq-27.1.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c9f7f6e13dff2e44a6afeaf2cf54cee5929ad64afaf4d40b50f93c58fc687355", size = 544862, upload-time = "2025-09-08T23:09:56.509Z" }, +] + [[package]] name = "readme-renderer" version = "44.0" @@ -1475,6 +2037,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/67/921ec3024056483db83953ae8e48079ad62b92db7880013ca77632921dd0/readme_renderer-44.0-py3-none-any.whl", hash = "sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151", size = 13310, upload-time = "2024-07-08T15:00:56.577Z" }, ] +[[package]] +name = "redis" +version = "7.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "async-timeout", marker = "python_full_version < '3.11.3'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7b/7f/3759b1d0d72b7c92f0d70ffd9dc962b7b7b5ee74e135f9d7d8ab06b8a318/redis-7.4.0.tar.gz", hash = "sha256:64a6ea7bf567ad43c964d2c30d82853f8df927c5c9017766c55a1d1ed95d18ad", size = 4943913, upload-time = "2026-03-24T09:14:37.53Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/74/3a/95deec7db1eb53979973ebd156f3369a72732208d1391cd2e5d127062a32/redis-7.4.0-py3-none-any.whl", hash = "sha256:a9c74a5c893a5ef8455a5adb793a31bb70feb821c86eccb62eebef5a19c429ec", size = 409772, upload-time = "2026-03-24T09:14:35.968Z" }, +] + [[package]] name = "referencing" version = "0.37.0" @@ -1698,6 +2272,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" }, ] +[[package]] +name = "setuptools" +version = "82.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4f/db/cfac1baf10650ab4d1c111714410d2fbb77ac5a616db26775db562c8fab2/setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", size = 1152316, upload-time = "2026-03-09T12:47:17.221Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/76/f789f7a86709c6b087c5a2f52f911838cad707cc613162401badc665acfe/setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb", size = 1006223, upload-time = "2026-03-09T12:47:15.026Z" }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + +[[package]] +name = "soupsieve" +version = "2.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" }, +] + [[package]] name = "sse-starlette" version = "3.3.4" @@ -1724,6 +2334,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, ] +[[package]] +name = "stdlibs" +version = "2026.2.26" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d5/cd/2710eaacaefc8be2f520b55c313498a50a295a8378e932c70d4ea34250aa/stdlibs-2026.2.26.tar.gz", hash = "sha256:10f911bdd8d3e45b452cc187b3527e6f9d288c8a943c5f973da94c71b2757d5b", size = 20203, upload-time = "2026-02-26T23:30:04.775Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/ec/b6a5a568d584659e037c8f53fc25acc79950ac32796b8861b2015446b7b2/stdlibs-2026.2.26-py3-none-any.whl", hash = "sha256:3257486216eac5ac627a3a4c5665802aca72fe7fc9e4ab1f232b1fb47bfd3db6", size = 59288, upload-time = "2026-02-26T23:30:03.597Z" }, +] + +[[package]] +name = "tinycss2" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "webencodings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/fd/7a5ee21fd08ff70d3d33a5781c255cbe779659bd03278feb98b19ee550f4/tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7", size = 87085, upload-time = "2024-10-24T14:58:29.895Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/34/ebdc18bae6aa14fbee1a08b63c015c72b64868ff7dae68808ab500c492e2/tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289", size = 26610, upload-time = "2024-10-24T14:58:28.029Z" }, +] + [[package]] name = "tomli" version = "2.4.1" @@ -1778,6 +2409,105 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl", hash = "sha256:0d85819802132122da43cb86656f8d1f8c6587d54ae7dcaf30e90533028b49fe", size = 14583, upload-time = "2026-03-25T20:22:03.012Z" }, ] +[[package]] +name = "tornado" +version = "6.5.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/f1/3173dfa4a18db4a9b03e5d55325559dab51ee653763bb8745a75af491286/tornado-6.5.5.tar.gz", hash = "sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9", size = 516006, upload-time = "2026-03-10T21:31:02.067Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/59/8c/77f5097695f4dd8255ecbd08b2a1ed8ba8b953d337804dd7080f199e12bf/tornado-6.5.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa", size = 445983, upload-time = "2026-03-10T21:30:44.28Z" }, + { url = "https://files.pythonhosted.org/packages/ab/5e/7625b76cd10f98f1516c36ce0346de62061156352353ef2da44e5c21523c/tornado-6.5.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521", size = 444246, upload-time = "2026-03-10T21:30:46.571Z" }, + { url = "https://files.pythonhosted.org/packages/b2/04/7b5705d5b3c0fab088f434f9c83edac1573830ca49ccf29fb83bf7178eec/tornado-6.5.5-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5", size = 447229, upload-time = "2026-03-10T21:30:48.273Z" }, + { url = "https://files.pythonhosted.org/packages/34/01/74e034a30ef59afb4097ef8659515e96a39d910b712a89af76f5e4e1f93c/tornado-6.5.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07", size = 448192, upload-time = "2026-03-10T21:30:51.22Z" }, + { url = "https://files.pythonhosted.org/packages/be/00/fe9e02c5a96429fce1a1d15a517f5d8444f9c412e0bb9eadfbe3b0fc55bf/tornado-6.5.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e", size = 448039, upload-time = "2026-03-10T21:30:53.52Z" }, + { url = "https://files.pythonhosted.org/packages/82/9e/656ee4cec0398b1d18d0f1eb6372c41c6b889722641d84948351ae19556d/tornado-6.5.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca", size = 447445, upload-time = "2026-03-10T21:30:55.541Z" }, + { url = "https://files.pythonhosted.org/packages/5a/76/4921c00511f88af86a33de770d64141170f1cfd9c00311aea689949e274e/tornado-6.5.5-cp39-abi3-win32.whl", hash = "sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7", size = 448582, upload-time = "2026-03-10T21:30:57.142Z" }, + { url = "https://files.pythonhosted.org/packages/2c/23/f6c6112a04d28eed765e374435fb1a9198f73e1ec4b4024184f21faeb1ad/tornado-6.5.5-cp39-abi3-win_amd64.whl", hash = "sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b", size = 448990, upload-time = "2026-03-10T21:30:58.857Z" }, + { url = "https://files.pythonhosted.org/packages/b7/c8/876602cbc96469911f0939f703453c1157b0c826ecb05bdd32e023397d4e/tornado-6.5.5-cp39-abi3-win_arm64.whl", hash = "sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6", size = 448016, upload-time = "2026-03-10T21:31:00.43Z" }, +] + +[[package]] +name = "traitlets" +version = "5.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621, upload-time = "2024-04-19T11:11:49.746Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, +] + +[[package]] +name = "tree-sitter" +version = "0.25.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/7c/0350cfc47faadc0d3cf7d8237a4e34032b3014ddf4a12ded9933e1648b55/tree-sitter-0.25.2.tar.gz", hash = "sha256:fe43c158555da46723b28b52e058ad444195afd1db3ca7720c59a254544e9c20", size = 177961, upload-time = "2025-09-25T17:37:59.751Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e2/d4/f7ffb855cb039b7568aba4911fbe42e4c39c0e4398387c8e0d8251489992/tree_sitter-0.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72a510931c3c25f134aac2daf4eb4feca99ffe37a35896d7150e50ac3eee06c7", size = 146749, upload-time = "2025-09-25T17:37:16.475Z" }, + { url = "https://files.pythonhosted.org/packages/9a/58/f8a107f9f89700c0ab2930f1315e63bdedccbb5fd1b10fcbc5ebadd54ac8/tree_sitter-0.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:44488e0e78146f87baaa009736886516779253d6d6bac3ef636ede72bc6a8234", size = 137766, upload-time = "2025-09-25T17:37:18.138Z" }, + { url = "https://files.pythonhosted.org/packages/19/fb/357158d39f01699faea466e8fd5a849f5a30252c68414bddc20357a9ac79/tree_sitter-0.25.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c2f8e7d6b2f8489d4a9885e3adcaef4bc5ff0a275acd990f120e29c4ab3395c5", size = 599809, upload-time = "2025-09-25T17:37:19.169Z" }, + { url = "https://files.pythonhosted.org/packages/c5/a4/68ae301626f2393a62119481cb660eb93504a524fc741a6f1528a4568cf6/tree_sitter-0.25.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:20b570690f87f1da424cd690e51cc56728d21d63f4abd4b326d382a30353acc7", size = 627676, upload-time = "2025-09-25T17:37:20.715Z" }, + { url = "https://files.pythonhosted.org/packages/69/fe/4c1bef37db5ca8b17ca0b3070f2dff509468a50b3af18f17665adcab42b9/tree_sitter-0.25.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a0ec41b895da717bc218a42a3a7a0bfcfe9a213d7afaa4255353901e0e21f696", size = 624281, upload-time = "2025-09-25T17:37:21.823Z" }, + { url = "https://files.pythonhosted.org/packages/d4/30/3283cb7fa251cae2a0bf8661658021a789810db3ab1b0569482d4a3671fd/tree_sitter-0.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:7712335855b2307a21ae86efe949c76be36c6068d76df34faa27ce9ee40ff444", size = 127295, upload-time = "2025-09-25T17:37:22.977Z" }, + { url = "https://files.pythonhosted.org/packages/88/90/ceb05e6de281aebe82b68662890619580d4ffe09283ebd2ceabcf5df7b4a/tree_sitter-0.25.2-cp310-cp310-win_arm64.whl", hash = "sha256:a925364eb7fbb9cdce55a9868f7525a1905af512a559303bd54ef468fd88cb37", size = 113991, upload-time = "2025-09-25T17:37:23.854Z" }, + { url = "https://files.pythonhosted.org/packages/7c/22/88a1e00b906d26fa8a075dd19c6c3116997cb884bf1b3c023deb065a344d/tree_sitter-0.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8ca72d841215b6573ed0655b3a5cd1133f9b69a6fa561aecad40dca9029d75b", size = 146752, upload-time = "2025-09-25T17:37:24.775Z" }, + { url = "https://files.pythonhosted.org/packages/57/1c/22cc14f3910017b7a76d7358df5cd315a84fe0c7f6f7b443b49db2e2790d/tree_sitter-0.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc0351cfe5022cec5a77645f647f92a936b38850346ed3f6d6babfbeeeca4d26", size = 137765, upload-time = "2025-09-25T17:37:26.103Z" }, + { url = "https://files.pythonhosted.org/packages/1c/0c/d0de46ded7d5b34631e0f630d9866dab22d3183195bf0f3b81de406d6622/tree_sitter-0.25.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1799609636c0193e16c38f366bda5af15b1ce476df79ddaae7dd274df9e44266", size = 604643, upload-time = "2025-09-25T17:37:27.398Z" }, + { url = "https://files.pythonhosted.org/packages/34/38/b735a58c1c2f60a168a678ca27b4c1a9df725d0bf2d1a8a1c571c033111e/tree_sitter-0.25.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e65ae456ad0d210ee71a89ee112ac7e72e6c2e5aac1b95846ecc7afa68a194c", size = 632229, upload-time = "2025-09-25T17:37:28.463Z" }, + { url = "https://files.pythonhosted.org/packages/32/f6/cda1e1e6cbff5e28d8433578e2556d7ba0b0209d95a796128155b97e7693/tree_sitter-0.25.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:49ee3c348caa459244ec437ccc7ff3831f35977d143f65311572b8ba0a5f265f", size = 629861, upload-time = "2025-09-25T17:37:29.593Z" }, + { url = "https://files.pythonhosted.org/packages/f9/19/427e5943b276a0dd74c2a1f1d7a7393443f13d1ee47dedb3f8127903c080/tree_sitter-0.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:56ac6602c7d09c2c507c55e58dc7026b8988e0475bd0002f8a386cce5e8e8adc", size = 127304, upload-time = "2025-09-25T17:37:30.549Z" }, + { url = "https://files.pythonhosted.org/packages/eb/d9/eef856dc15f784d85d1397a17f3ee0f82df7778efce9e1961203abfe376a/tree_sitter-0.25.2-cp311-cp311-win_arm64.whl", hash = "sha256:b3d11a3a3ac89bb8a2543d75597f905a9926f9c806f40fcca8242922d1cc6ad5", size = 113990, upload-time = "2025-09-25T17:37:31.852Z" }, + { url = "https://files.pythonhosted.org/packages/3c/9e/20c2a00a862f1c2897a436b17edb774e831b22218083b459d0d081c9db33/tree_sitter-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ddabfff809ffc983fc9963455ba1cecc90295803e06e140a4c83e94c1fa3d960", size = 146941, upload-time = "2025-09-25T17:37:34.813Z" }, + { url = "https://files.pythonhosted.org/packages/ef/04/8512e2062e652a1016e840ce36ba1cc33258b0dcc4e500d8089b4054afec/tree_sitter-0.25.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c0c0ab5f94938a23fe81928a21cc0fac44143133ccc4eb7eeb1b92f84748331c", size = 137699, upload-time = "2025-09-25T17:37:36.349Z" }, + { url = "https://files.pythonhosted.org/packages/47/8a/d48c0414db19307b0fb3bb10d76a3a0cbe275bb293f145ee7fba2abd668e/tree_sitter-0.25.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd12d80d91d4114ca097626eb82714618dcdfacd6a5e0955216c6485c350ef99", size = 607125, upload-time = "2025-09-25T17:37:37.725Z" }, + { url = "https://files.pythonhosted.org/packages/39/d1/b95f545e9fc5001b8a78636ef942a4e4e536580caa6a99e73dd0a02e87aa/tree_sitter-0.25.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b43a9e4c89d4d0839de27cd4d6902d33396de700e9ff4c5ab7631f277a85ead9", size = 635418, upload-time = "2025-09-25T17:37:38.922Z" }, + { url = "https://files.pythonhosted.org/packages/de/4d/b734bde3fb6f3513a010fa91f1f2875442cdc0382d6a949005cd84563d8f/tree_sitter-0.25.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbb1706407c0e451c4f8cc016fec27d72d4b211fdd3173320b1ada7a6c74c3ac", size = 631250, upload-time = "2025-09-25T17:37:40.039Z" }, + { url = "https://files.pythonhosted.org/packages/46/f2/5f654994f36d10c64d50a192239599fcae46677491c8dd53e7579c35a3e3/tree_sitter-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:6d0302550bbe4620a5dc7649517c4409d74ef18558276ce758419cf09e578897", size = 127156, upload-time = "2025-09-25T17:37:41.132Z" }, + { url = "https://files.pythonhosted.org/packages/67/23/148c468d410efcf0a9535272d81c258d840c27b34781d625f1f627e2e27d/tree_sitter-0.25.2-cp312-cp312-win_arm64.whl", hash = "sha256:0c8b6682cac77e37cfe5cf7ec388844957f48b7bd8d6321d0ca2d852994e10d5", size = 113984, upload-time = "2025-09-25T17:37:42.074Z" }, + { url = "https://files.pythonhosted.org/packages/8c/67/67492014ce32729b63d7ef318a19f9cfedd855d677de5773476caf771e96/tree_sitter-0.25.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0628671f0de69bb279558ef6b640bcfc97864fe0026d840f872728a86cd6b6cd", size = 146926, upload-time = "2025-09-25T17:37:43.041Z" }, + { url = "https://files.pythonhosted.org/packages/4e/9c/a278b15e6b263e86c5e301c82a60923fa7c59d44f78d7a110a89a413e640/tree_sitter-0.25.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f5ddcd3e291a749b62521f71fc953f66f5fd9743973fd6dd962b092773569601", size = 137712, upload-time = "2025-09-25T17:37:44.039Z" }, + { url = "https://files.pythonhosted.org/packages/54/9a/423bba15d2bf6473ba67846ba5244b988cd97a4b1ea2b146822162256794/tree_sitter-0.25.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd88fbb0f6c3a0f28f0a68d72df88e9755cf5215bae146f5a1bdc8362b772053", size = 607873, upload-time = "2025-09-25T17:37:45.477Z" }, + { url = "https://files.pythonhosted.org/packages/ed/4c/b430d2cb43f8badfb3a3fa9d6cd7c8247698187b5674008c9d67b2a90c8e/tree_sitter-0.25.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b878e296e63661c8e124177cc3084b041ba3f5936b43076d57c487822426f614", size = 636313, upload-time = "2025-09-25T17:37:46.68Z" }, + { url = "https://files.pythonhosted.org/packages/9d/27/5f97098dbba807331d666a0997662e82d066e84b17d92efab575d283822f/tree_sitter-0.25.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d77605e0d353ba3fe5627e5490f0fbfe44141bafa4478d88ef7954a61a848dae", size = 631370, upload-time = "2025-09-25T17:37:47.993Z" }, + { url = "https://files.pythonhosted.org/packages/d4/3c/87caaed663fabc35e18dc704cd0e9800a0ee2f22bd18b9cbe7c10799895d/tree_sitter-0.25.2-cp313-cp313-win_amd64.whl", hash = "sha256:463c032bd02052d934daa5f45d183e0521ceb783c2548501cf034b0beba92c9b", size = 127157, upload-time = "2025-09-25T17:37:48.967Z" }, + { url = "https://files.pythonhosted.org/packages/d5/23/f8467b408b7988aff4ea40946a4bd1a2c1a73d17156a9d039bbaff1e2ceb/tree_sitter-0.25.2-cp313-cp313-win_arm64.whl", hash = "sha256:b3f63a1796886249bd22c559a5944d64d05d43f2be72961624278eff0dcc5cb8", size = 113975, upload-time = "2025-09-25T17:37:49.922Z" }, + { url = "https://files.pythonhosted.org/packages/07/e3/d9526ba71dfbbe4eba5e51d89432b4b333a49a1e70712aa5590cd22fc74f/tree_sitter-0.25.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:65d3c931013ea798b502782acab986bbf47ba2c452610ab0776cf4a8ef150fc0", size = 146776, upload-time = "2025-09-25T17:37:50.898Z" }, + { url = "https://files.pythonhosted.org/packages/42/97/4bd4ad97f85a23011dd8a535534bb1035c4e0bac1234d58f438e15cff51f/tree_sitter-0.25.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:bda059af9d621918efb813b22fb06b3fe00c3e94079c6143fcb2c565eb44cb87", size = 137732, upload-time = "2025-09-25T17:37:51.877Z" }, + { url = "https://files.pythonhosted.org/packages/b6/19/1e968aa0b1b567988ed522f836498a6a9529a74aab15f09dd9ac1e41f505/tree_sitter-0.25.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eac4e8e4c7060c75f395feec46421eb61212cb73998dbe004b7384724f3682ab", size = 609456, upload-time = "2025-09-25T17:37:52.925Z" }, + { url = "https://files.pythonhosted.org/packages/48/b6/cf08f4f20f4c9094006ef8828555484e842fc468827ad6e56011ab668dbd/tree_sitter-0.25.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:260586381b23be33b6191a07cea3d44ecbd6c01aa4c6b027a0439145fcbc3358", size = 636772, upload-time = "2025-09-25T17:37:54.647Z" }, + { url = "https://files.pythonhosted.org/packages/57/e2/d42d55bf56360987c32bc7b16adb06744e425670b823fb8a5786a1cea991/tree_sitter-0.25.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7d2ee1acbacebe50ba0f85fff1bc05e65d877958f00880f49f9b2af38dce1af0", size = 631522, upload-time = "2025-09-25T17:37:55.833Z" }, + { url = "https://files.pythonhosted.org/packages/03/87/af9604ebe275a9345d88c3ace0cf2a1341aa3f8ef49dd9fc11662132df8a/tree_sitter-0.25.2-cp314-cp314-win_amd64.whl", hash = "sha256:4973b718fcadfb04e59e746abfbb0288694159c6aeecd2add59320c03368c721", size = 130864, upload-time = "2025-09-25T17:37:57.453Z" }, + { url = "https://files.pythonhosted.org/packages/a6/6e/e64621037357acb83d912276ffd30a859ef117f9c680f2e3cb955f47c680/tree_sitter-0.25.2-cp314-cp314-win_arm64.whl", hash = "sha256:b8d4429954a3beb3e844e2872610d2a4800ba4eb42bb1990c6a4b1949b18459f", size = 117470, upload-time = "2025-09-25T17:37:58.431Z" }, +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.23.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9f/fb/7e2962bc1901daf264e7ce263b168e0139304a5f8f66c9b2baf20e550f87/tree_sitter_c_sharp-0.23.5.tar.gz", hash = "sha256:2635c7d5ec93e59f2e831b571bed99c4cc68a5d183a0994020aa769e1b990a71", size = 1147914, upload-time = "2026-04-14T16:11:22.441Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/c4/86d8d469400a856757a464a6ac01af97d8cdacbb595e62bdb98bf1e9db90/tree_sitter_c_sharp-0.23.5-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:61e1981cf21b09ee547b9c4c68e64fb4394325f8fc8d5f6d50d41471eba923ea", size = 333658, upload-time = "2026-04-14T16:11:11.288Z" }, + { url = "https://files.pythonhosted.org/packages/c8/13/593c8603f834eaf15082b81e079289fc9f062b4c0ab5b9489134084eec06/tree_sitter_c_sharp-0.23.5-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:a75994a11f6fed3f5b8c36ad6a00e5dc43205bd912c43af3a2a54fdf649664eb", size = 376296, upload-time = "2026-04-14T16:11:12.972Z" }, + { url = "https://files.pythonhosted.org/packages/41/5a/a8855cbb5bbab28adb29c2c7f0e7be5a9f1d21450c13b3c3e613190d9b8c/tree_sitter_c_sharp-0.23.5-cp310-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:aa88a780204cd153c4c1ae2d59c654cee1402212fa0d069823d6d34301587438", size = 358333, upload-time = "2026-04-14T16:11:14.214Z" }, + { url = "https://files.pythonhosted.org/packages/0a/c8/e0f391e343f5424d0627e3b6886c77baeb1249a3f10986be00b0b64ecdab/tree_sitter_c_sharp-0.23.5-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea38fb095d85d360dc5a0bec2fa605e496228876f798c9e089d5f0e72bcef46", size = 359448, upload-time = "2026-04-14T16:11:15.419Z" }, + { url = "https://files.pythonhosted.org/packages/6f/fc/10f807ac79f928241c5e0d827fdaf91e97dfba662fc7e07d7bd664140ec1/tree_sitter_c_sharp-0.23.5-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:05a9256415e7f24d4f133133794a9c224c60d19f677a04e2f6a94c25090b6d65", size = 358144, upload-time = "2026-04-14T16:11:17.087Z" }, + { url = "https://files.pythonhosted.org/packages/de/2a/6c3e12ef0cf09138717fcc02e1de8b76a3928d1bed65c7e3c2bd3172bcef/tree_sitter_c_sharp-0.23.5-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8636dc70b5a373c35c1036ed5de98e801f2e4d105ae41e2e20b6804c36e3bf33", size = 357525, upload-time = "2026-04-14T16:11:18.214Z" }, + { url = "https://files.pythonhosted.org/packages/2b/e0/bd287b092d611df95a9149117fd27b5947ce75527113d6898a4b4e2c8858/tree_sitter_c_sharp-0.23.5-cp310-abi3-win_amd64.whl", hash = "sha256:41a28cfa3d9ea50f5629e44550a03188c8fbd5079803dfc03554b6fd594b33fa", size = 338756, upload-time = "2026-04-14T16:11:19.661Z" }, + { url = "https://files.pythonhosted.org/packages/7f/fb/114ff43fdd256d0befed32f77c1dadee9517867181c70794571f718ed05c/tree_sitter_c_sharp-0.23.5-cp310-abi3-win_arm64.whl", hash = "sha256:2de4ebf95ddc2e92cd3105c8a8e0e7ec646bc82f52bfaf2f3acec0fa2401ec09", size = 337260, upload-time = "2026-04-14T16:11:20.849Z" }, +] + +[[package]] +name = "tree-sitter-language-pack" +version = "1.6.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tree-sitter" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/09/bd/ac34ab0ee92b2d27802754c575965e921490ce11b5357bf89f74a78e8309/tree_sitter_language_pack-1.6.2-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:f5998cfee5735a8e7e691f577062ff7eb3a7ea405ae5654c9cecaa4a1e6c81b0", size = 2241997, upload-time = "2026-04-18T07:04:36.042Z" }, + { url = "https://files.pythonhosted.org/packages/a1/e0/b997b8c3e0886288a47890e6313c3a7e74ea8192e2d141b3eab64d59a276/tree_sitter_language_pack-1.6.2-cp310-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:8ce814ede4e295f3419ba179b523889c52cc3a998ac085356a470e776596c026", size = 2419565, upload-time = "2026-04-18T07:04:37.67Z" }, + { url = "https://files.pythonhosted.org/packages/fa/a4/629e6983a93fbb52dc50af495ec0431565c6477eea4680d4298238e9831e/tree_sitter_language_pack-1.6.2-cp310-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:2305df7835c1cb3d34b71450b79d135878bc25ea5d02d9984cee864607a4ad60", size = 2555465, upload-time = "2026-04-18T07:04:39.57Z" }, + { url = "https://files.pythonhosted.org/packages/b5/9c/0f486ca7344f6f3345441e8516b464214c7c5a0f3775d11fda1368901c38/tree_sitter_language_pack-1.6.2-cp310-abi3-win_amd64.whl", hash = "sha256:08351222b43c3a73665571eaa440366add2093a2492bb35f032fb7a31945e720", size = 2351156, upload-time = "2026-04-18T07:04:41.377Z" }, +] + [[package]] name = "twine" version = "6.2.0" @@ -1798,6 +2528,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/7a/882d99539b19b1490cac5d77c67338d126e4122c8276bf640e411650c830/twine-6.2.0-py3-none-any.whl", hash = "sha256:418ebf08ccda9a8caaebe414433b0ba5e25eb5e4a927667122fbe8f829f985d8", size = 42727, upload-time = "2025-09-04T15:43:15.994Z" }, ] +[[package]] +name = "typer" +version = "0.24.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -1858,6 +2603,56 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/8d/edd0bd910ff803c308ee9a6b7778621af0d10252219ad9f19ef4d4982a61/virtualenv-21.2.4-py3-none-any.whl", hash = "sha256:29d21e941795206138d0f22f4e45ff7050e5da6c6472299fb7103318763861ac", size = 5831232, upload-time = "2026-04-14T22:15:29.342Z" }, ] +[[package]] +name = "watchdog" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220, upload-time = "2024-11-01T14:07:13.037Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/56/90994d789c61df619bfc5ce2ecdabd5eeff564e1eb47512bd01b5e019569/watchdog-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26", size = 96390, upload-time = "2024-11-01T14:06:24.793Z" }, + { url = "https://files.pythonhosted.org/packages/55/46/9a67ee697342ddf3c6daa97e3a587a56d6c4052f881ed926a849fcf7371c/watchdog-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112", size = 88389, upload-time = "2024-11-01T14:06:27.112Z" }, + { url = "https://files.pythonhosted.org/packages/44/65/91b0985747c52064d8701e1075eb96f8c40a79df889e59a399453adfb882/watchdog-6.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c897ac1b55c5a1461e16dae288d22bb2e412ba9807df8397a635d88f671d36c3", size = 89020, upload-time = "2024-11-01T14:06:29.876Z" }, + { url = "https://files.pythonhosted.org/packages/e0/24/d9be5cd6642a6aa68352ded4b4b10fb0d7889cb7f45814fb92cecd35f101/watchdog-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6eb11feb5a0d452ee41f824e271ca311a09e250441c262ca2fd7ebcf2461a06c", size = 96393, upload-time = "2024-11-01T14:06:31.756Z" }, + { url = "https://files.pythonhosted.org/packages/63/7a/6013b0d8dbc56adca7fdd4f0beed381c59f6752341b12fa0886fa7afc78b/watchdog-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2", size = 88392, upload-time = "2024-11-01T14:06:32.99Z" }, + { url = "https://files.pythonhosted.org/packages/d1/40/b75381494851556de56281e053700e46bff5b37bf4c7267e858640af5a7f/watchdog-6.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afd0fe1b2270917c5e23c2a65ce50c2a4abb63daafb0d419fde368e272a76b7c", size = 89019, upload-time = "2024-11-01T14:06:34.963Z" }, + { url = "https://files.pythonhosted.org/packages/39/ea/3930d07dafc9e286ed356a679aa02d777c06e9bfd1164fa7c19c288a5483/watchdog-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948", size = 96471, upload-time = "2024-11-01T14:06:37.745Z" }, + { url = "https://files.pythonhosted.org/packages/12/87/48361531f70b1f87928b045df868a9fd4e253d9ae087fa4cf3f7113be363/watchdog-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860", size = 88449, upload-time = "2024-11-01T14:06:39.748Z" }, + { url = "https://files.pythonhosted.org/packages/5b/7e/8f322f5e600812e6f9a31b75d242631068ca8f4ef0582dd3ae6e72daecc8/watchdog-6.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0", size = 89054, upload-time = "2024-11-01T14:06:41.009Z" }, + { url = "https://files.pythonhosted.org/packages/68/98/b0345cabdce2041a01293ba483333582891a3bd5769b08eceb0d406056ef/watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c", size = 96480, upload-time = "2024-11-01T14:06:42.952Z" }, + { url = "https://files.pythonhosted.org/packages/85/83/cdf13902c626b28eedef7ec4f10745c52aad8a8fe7eb04ed7b1f111ca20e/watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134", size = 88451, upload-time = "2024-11-01T14:06:45.084Z" }, + { url = "https://files.pythonhosted.org/packages/fe/c4/225c87bae08c8b9ec99030cd48ae9c4eca050a59bf5c2255853e18c87b50/watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b", size = 89057, upload-time = "2024-11-01T14:06:47.324Z" }, + { url = "https://files.pythonhosted.org/packages/30/ad/d17b5d42e28a8b91f8ed01cb949da092827afb9995d4559fd448d0472763/watchdog-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c7ac31a19f4545dd92fc25d200694098f42c9a8e391bc00bdd362c5736dbf881", size = 87902, upload-time = "2024-11-01T14:06:53.119Z" }, + { url = "https://files.pythonhosted.org/packages/5c/ca/c3649991d140ff6ab67bfc85ab42b165ead119c9e12211e08089d763ece5/watchdog-6.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9513f27a1a582d9808cf21a07dae516f0fab1cf2d7683a742c498b93eedabb11", size = 88380, upload-time = "2024-11-01T14:06:55.19Z" }, + { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079, upload-time = "2024-11-01T14:06:59.472Z" }, + { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078, upload-time = "2024-11-01T14:07:01.431Z" }, + { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076, upload-time = "2024-11-01T14:07:02.568Z" }, + { url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077, upload-time = "2024-11-01T14:07:03.893Z" }, + { url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078, upload-time = "2024-11-01T14:07:05.189Z" }, + { url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077, upload-time = "2024-11-01T14:07:06.376Z" }, + { url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078, upload-time = "2024-11-01T14:07:07.547Z" }, + { url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065, upload-time = "2024-11-01T14:07:09.525Z" }, + { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload-time = "2024-11-01T14:07:10.686Z" }, + { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" }, +] + +[[package]] +name = "wcwidth" +version = "0.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/35/a2/8e3becb46433538a38726c948d3399905a4c7cabd0df578ede5dc51f0ec2/wcwidth-0.6.0.tar.gz", hash = "sha256:cdc4e4262d6ef9a1a57e018384cbeb1208d8abbc64176027e2c2455c81313159", size = 159684, upload-time = "2026-02-06T19:19:40.919Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/68/5a/199c59e0a824a3db2b89c5d2dade7ab5f9624dbf6448dc291b46d5ec94d3/wcwidth-0.6.0-py3-none-any.whl", hash = "sha256:1a3a1e510b553315f8e146c54764f4fb6264ffad731b3d78088cdb1478ffbdad", size = 94189, upload-time = "2026-02-06T19:19:39.646Z" }, +] + +[[package]] +name = "webencodings" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/02/ae6ceac1baeda530866a85075641cec12989bd8d31af6d5ab4a3e8c92f47/webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923", size = 9721, upload-time = "2017-04-05T20:21:34.189Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", size = 11774, upload-time = "2017-04-05T20:21:32.581Z" }, +] + [[package]] name = "zipp" version = "3.23.1" From 29edffc330503bf4ac8ce11f4689fc2900764154 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 21 Apr 2026 21:46:09 +0500 Subject: [PATCH 03/32] refactor(core): global cleanup of Any --- codeclone/analysis/cfg.py | 50 +-- codeclone/analysis/normalizer.py | 15 +- .../baseline/_metrics_baseline_payload.py | 32 +- .../baseline/_metrics_baseline_validation.py | 143 +++++++-- codeclone/baseline/clone_baseline.py | 23 +- codeclone/baseline/metrics_baseline.py | 24 +- codeclone/baseline/trust.py | 18 +- codeclone/cache/_canonicalize.py | 47 ++- codeclone/cache/_validators.py | 53 +++- codeclone/config/argparse_builder.py | 6 +- codeclone/config/spec.py | 6 +- codeclone/core/_types.py | 64 +++- codeclone/core/discovery.py | 16 +- codeclone/core/discovery_cache.py | 284 +++++++++++++----- codeclone/core/metrics_payload.py | 17 +- codeclone/core/pipeline.py | 22 +- codeclone/core/reporting.py | 11 +- codeclone/core/worker.py | 3 +- codeclone/main.py | 2 +- codeclone/metrics/coverage_join.py | 8 +- codeclone/metrics/registry.py | 183 +++++++---- codeclone/report/derived.py | 4 +- codeclone/report/gates/evaluator.py | 28 +- codeclone/report/html/widgets/snippets.py | 10 +- codeclone/report/overview.py | 106 +++---- codeclone/report/renderers/sarif.py | 10 +- codeclone/surfaces/cli/attrs.py | 42 +++ codeclone/surfaces/cli/baseline_state.py | 28 +- codeclone/surfaces/cli/changed_scope.py | 27 +- codeclone/surfaces/cli/console.py | 30 +- codeclone/surfaces/cli/execution.py | 113 ++++--- codeclone/surfaces/cli/main.py | 14 - codeclone/surfaces/cli/post_run.py | 49 ++- codeclone/surfaces/cli/report_meta.py | 20 +- codeclone/surfaces/cli/reports_output.py | 50 +-- codeclone/surfaces/cli/runtime.py | 239 +++++++-------- codeclone/surfaces/cli/startup.py | 74 ++--- codeclone/surfaces/cli/summary.py | 14 +- codeclone/surfaces/cli/types.py | 115 ++++++- codeclone/surfaces/cli/workflow.py | 28 +- codeclone/surfaces/mcp/server.py | 70 +++-- codeclone/surfaces/mcp/service.py | 137 ++++----- codeclone/surfaces/mcp/session.py | 197 +++++++----- codeclone/surfaces/mcp/tools/_base.py | 6 +- codeclone/surfaces/mcp/tools/analyze.py | 16 +- codeclone/surfaces/mcp/tools/gates.py | 16 +- codeclone/surfaces/mcp/tools/runs.py | 12 +- codeclone/utils/json_io.py | 3 +- 48 files changed, 1536 insertions(+), 949 deletions(-) create mode 100644 codeclone/surfaces/cli/attrs.py delete mode 100644 codeclone/surfaces/cli/main.py diff --git a/codeclone/analysis/cfg.py b/codeclone/analysis/cfg.py index 67838da..5da1933 100644 --- a/codeclone/analysis/cfg.py +++ b/codeclone/analysis/cfg.py @@ -8,7 +8,7 @@ import ast from dataclasses import dataclass -from typing import TYPE_CHECKING, Protocol, cast +from typing import TYPE_CHECKING from ..meta_markers import CFG_META_PREFIX from .cfg_model import CFG, Block @@ -21,13 +21,6 @@ TryStar = getattr(ast, "TryStar", ast.Try) -class _TryLike(Protocol): - body: list[ast.stmt] - handlers: list[ast.ExceptHandler] - orelse: list[ast.stmt] - finalbody: list[ast.stmt] - - @dataclass(slots=True) class _LoopContext: continue_target: Block @@ -105,9 +98,19 @@ def _visit(self, stmt: ast.stmt) -> None: self._visit_for(stmt) # Structure is identical to For case ast.Try(): - self._visit_try(cast("_TryLike", stmt)) + self._visit_try( + body=stmt.body, + handlers=stmt.handlers, + orelse=stmt.orelse, + finalbody=stmt.finalbody, + ) case _ if TryStar is not None and isinstance(stmt, TryStar): - self._visit_try(cast("_TryLike", cast("object", stmt))) + self._visit_try( + body=stmt.body, + handlers=stmt.handlers, + orelse=stmt.orelse, + finalbody=stmt.finalbody, + ) case ast.With() | ast.AsyncWith(): self._visit_with(stmt) @@ -261,18 +264,25 @@ def _visit_with(self, stmt: ast.With | ast.AsyncWith) -> None: self.current = after_block - def _visit_try(self, stmt: _TryLike) -> None: + def _visit_try( + self, + *, + body: list[ast.stmt], + handlers: list[ast.ExceptHandler], + orelse: list[ast.stmt], + finalbody: list[ast.stmt], + ) -> None: try_entry = self.cfg.create_block() self.current.add_successor(try_entry) self.current = try_entry - handler_test_blocks = [self.cfg.create_block() for _ in stmt.handlers] - handler_body_blocks = [self.cfg.create_block() for _ in stmt.handlers] - else_block = self.cfg.create_block() if stmt.orelse else None + handler_test_blocks = [self.cfg.create_block() for _ in handlers] + handler_body_blocks = [self.cfg.create_block() for _ in handlers] + else_block = self.cfg.create_block() if orelse else None final_block = self.cfg.create_block() for idx, (handler, test_block, body_block) in enumerate( - zip(stmt.handlers, handler_test_blocks, handler_body_blocks, strict=True) + zip(handlers, handler_test_blocks, handler_body_blocks, strict=True) ): test_block.statements.append(_meta_expr(f"TRY_HANDLER_INDEX:{idx}")) if handler.type is not None: @@ -290,7 +300,7 @@ def _visit_try(self, stmt: _TryLike) -> None: # Process each statement in try body # Link only statements that can raise to exception handlers - for stmt_node in stmt.body: + for stmt_node in body: if self.current.is_terminated: break @@ -307,7 +317,7 @@ def _visit_try(self, stmt: _TryLike) -> None: self.current.add_successor(final_block) # Process handlers - for handler, body_block in zip(stmt.handlers, handler_body_blocks, strict=True): + for handler, body_block in zip(handlers, handler_body_blocks, strict=True): self.current = body_block self._visit_statements(handler.body) if not self.current.is_terminated: @@ -316,14 +326,14 @@ def _visit_try(self, stmt: _TryLike) -> None: # Process else if else_block: self.current = else_block - self._visit_statements(stmt.orelse) + self._visit_statements(orelse) if not self.current.is_terminated: self.current.add_successor(final_block) # Process finally self.current = final_block - if stmt.finalbody: - self._visit_statements(stmt.finalbody) + if finalbody: + self._visit_statements(finalbody) def _visit_match(self, stmt: ast.Match) -> None: self.current.statements.append(ast.Expr(value=stmt.subject)) diff --git a/codeclone/analysis/normalizer.py b/codeclone/analysis/normalizer.py index 732915e..19e44b1 100644 --- a/codeclone/analysis/normalizer.py +++ b/codeclone/analysis/normalizer.py @@ -11,7 +11,7 @@ import hashlib from ast import AST from dataclasses import dataclass -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING from ..meta_markers import CFG_META_PREFIX @@ -92,11 +92,16 @@ def visit_Constant(self, node: ast.Constant) -> ast.Constant: node.value = "_CONST_" return node + def _visit_expr(self, node: ast.expr) -> ast.expr: + visited = self.visit(node) + assert isinstance(visited, ast.expr) + return visited + def visit_Call(self, node: ast.Call) -> ast.Call: node.func = self._visit_call_target(node.func) - node.args = [cast("ast.expr", self.visit(arg)) for arg in node.args] + node.args = [self._visit_expr(arg) for arg in node.args] for kw in node.keywords: - kw.value = cast("ast.expr", self.visit(kw.value)) + kw.value = self._visit_expr(kw.value) return node def _visit_call_target(self, node: ast.expr) -> ast.expr: @@ -108,9 +113,9 @@ def _visit_call_target(self, node: ast.expr) -> ast.expr: if isinstance(value, (ast.Name, ast.Attribute)): node.value = self._visit_call_target(value) else: - node.value = cast("ast.expr", self.visit(value)) + node.value = self._visit_expr(value) return node - return cast("ast.expr", self.visit(node)) + return self._visit_expr(node) def visit_AugAssign(self, node: ast.AugAssign) -> AST: # Normalize x += 1 to x = x + 1 diff --git a/codeclone/baseline/_metrics_baseline_payload.py b/codeclone/baseline/_metrics_baseline_payload.py index 0e5071e..4f24864 100644 --- a/codeclone/baseline/_metrics_baseline_payload.py +++ b/codeclone/baseline/_metrics_baseline_payload.py @@ -8,7 +8,6 @@ import hashlib from pathlib import Path -from typing import Any import orjson @@ -205,33 +204,32 @@ def _build_payload( include_adoption: bool = True, api_surface_snapshot: ApiSurfaceSnapshot | None = None, api_surface_root: Path | None = None, -) -> dict[str, Any]: +) -> dict[str, object]: payload_sha256 = _compute_payload_sha256( snapshot, include_adoption=include_adoption, ) - payload: dict[str, Any] = { - "meta": { - "generator": { - "name": generator_name, - "version": generator_version, - }, - "schema_version": schema_version, - "python_tag": python_tag, - "created_at": created_at, - "payload_sha256": payload_sha256, + meta: dict[str, object] = { + "generator": { + "name": generator_name, + "version": generator_version, }, + "schema_version": schema_version, + "python_tag": python_tag, + "created_at": created_at, + "payload_sha256": payload_sha256, + } + payload: dict[str, object] = { + "meta": meta, "metrics": _snapshot_payload( snapshot, include_adoption=include_adoption, ), } if api_surface_snapshot is not None: - payload["meta"][_API_SURFACE_PAYLOAD_SHA256_KEY] = ( - _compute_api_surface_payload_sha256( - api_surface_snapshot, - root=api_surface_root, - ) + meta[_API_SURFACE_PAYLOAD_SHA256_KEY] = _compute_api_surface_payload_sha256( + api_surface_snapshot, + root=api_surface_root, ) payload["api_surface"] = _api_surface_snapshot_payload( api_surface_snapshot, diff --git a/codeclone/baseline/_metrics_baseline_validation.py b/codeclone/baseline/_metrics_baseline_validation.py index 831991d..21f15a9 100644 --- a/codeclone/baseline/_metrics_baseline_validation.py +++ b/codeclone/baseline/_metrics_baseline_validation.py @@ -8,7 +8,7 @@ from json import JSONDecodeError from pathlib import Path -from typing import Any, Literal, cast +from typing import Literal from ..cache.projection import runtime_filepath_from_wire from ..contracts import BASELINE_SCHEMA_VERSION @@ -33,6 +33,11 @@ ) from ._metrics_baseline_payload import _compose_api_surface_qualname +_HEALTH_GRADES = {"A", "B", "C", "D", "F"} +_API_PARAM_KINDS = {"pos_only", "pos_or_kw", "vararg", "kw_only", "kwarg"} +_PUBLIC_SYMBOL_KINDS = {"function", "class", "method", "constant"} +_EXPORTED_VIA_KINDS = {"all", "name"} + def _is_compatible_metrics_schema( *, @@ -66,7 +71,7 @@ def _atomic_write_json(path: Path, payload: dict[str, object]) -> None: ) -def _load_json_object(path: Path) -> dict[str, Any]: +def _load_json_object(path: Path) -> dict[str, object]: try: return _read_json_object(path) except OSError as e: @@ -86,7 +91,7 @@ def _load_json_object(path: Path) -> dict[str, Any]: ) from None -def _validate_top_level_structure(payload: dict[str, Any], *, path: Path) -> None: +def _validate_top_level_structure(payload: dict[str, object], *, path: Path) -> None: validate_top_level_structure( payload, path=path, @@ -99,7 +104,7 @@ def _validate_top_level_structure(payload: dict[str, Any], *, path: Path) -> Non def _validate_required_keys( - payload: dict[str, Any], + payload: dict[str, object], required: frozenset[str], *, path: Path, @@ -114,7 +119,7 @@ def _validate_required_keys( def _validate_exact_keys( - payload: dict[str, Any], + payload: dict[str, object], required: frozenset[str], *, path: Path, @@ -128,7 +133,7 @@ def _validate_exact_keys( ) -def _require_str(payload: dict[str, Any], key: str, *, path: Path) -> str: +def _require_str(payload: dict[str, object], key: str, *, path: Path) -> str: value = payload.get(key) if isinstance(value, str): return value @@ -139,7 +144,7 @@ def _require_str(payload: dict[str, Any], key: str, *, path: Path) -> str: def _extract_metrics_payload_sha256( - payload: dict[str, Any], + payload: dict[str, object], *, path: Path, ) -> str: @@ -150,7 +155,7 @@ def _extract_metrics_payload_sha256( def _extract_optional_payload_sha256( - payload: dict[str, Any], + payload: dict[str, object], *, key: str, ) -> str | None: @@ -158,7 +163,7 @@ def _extract_optional_payload_sha256( return value if isinstance(value, str) else None -def _require_int(payload: dict[str, Any], key: str, *, path: Path) -> int: +def _require_int(payload: dict[str, object], key: str, *, path: Path) -> int: value = payload.get(key) if isinstance(value, bool): raise BaselineValidationError( @@ -174,7 +179,7 @@ def _require_int(payload: dict[str, Any], key: str, *, path: Path) -> int: def _optional_require_str( - payload: dict[str, Any], + payload: dict[str, object], key: str, *, path: Path, @@ -190,7 +195,12 @@ def _optional_require_str( ) -def _require_str_list(payload: dict[str, Any], key: str, *, path: Path) -> list[str]: +def _require_str_list( + payload: dict[str, object], + key: str, + *, + path: Path, +) -> list[str]: value = payload.get(key) if not isinstance(value, list): raise BaselineValidationError( @@ -206,7 +216,7 @@ def _require_str_list(payload: dict[str, Any], key: str, *, path: Path) -> list[ def _parse_cycles( - payload: dict[str, Any], + payload: dict[str, object], *, key: str, path: Path, @@ -237,7 +247,7 @@ def _parse_cycles( def _parse_generator( - meta: dict[str, Any], + meta: dict[str, object], *, path: Path, ) -> tuple[str, str | None]: @@ -288,10 +298,10 @@ def _parse_generator( def _require_embedded_clone_baseline_payload( - payload: dict[str, Any], + payload: dict[str, object], *, path: Path, -) -> tuple[dict[str, Any], dict[str, Any]]: +) -> tuple[dict[str, object], dict[str, object]]: meta_obj = payload.get("meta") clones_obj = payload.get("clones") if not isinstance(meta_obj, dict): @@ -326,7 +336,7 @@ def _require_embedded_clone_baseline_payload( return meta_obj, clones_obj -def _resolve_embedded_schema_version(meta: dict[str, Any], *, path: Path) -> str: +def _resolve_embedded_schema_version(meta: dict[str, object], *, path: Path) -> str: raw_version = _require_str(meta, "schema_version", path=path) parts = raw_version.split(".") if len(parts) not in {2, 3} or not all(part.isdigit() for part in parts): @@ -342,7 +352,7 @@ def _resolve_embedded_schema_version(meta: dict[str, Any], *, path: Path) -> str def _parse_snapshot( - payload: dict[str, Any], + payload: dict[str, object], *, path: Path, ) -> MetricsSnapshot: @@ -373,7 +383,7 @@ def _parse_snapshot( sorted(set(_require_str_list(payload, "dead_code_items", path=path))) ), health_score=_require_int(payload, "health_score", path=path), - health_grade=cast("Literal['A', 'B', 'C', 'D', 'F']", grade), + health_grade=_require_health_grade(grade, path=path), typing_param_permille=_optional_int( payload, "typing_param_permille", @@ -389,13 +399,91 @@ def _parse_snapshot( ) -def _optional_int(payload: dict[str, Any], key: str, *, path: Path) -> int: +def _optional_int(payload: dict[str, object], key: str, *, path: Path) -> int: value = payload.get(key) if value is None: return 0 return _require_int(payload, key, path=path) +def _require_health_grade( + value: str, + *, + path: Path, +) -> Literal["A", "B", "C", "D", "F"]: + if value == "A": + return "A" + if value == "B": + return "B" + if value == "C": + return "C" + if value == "D": + return "D" + if value == "F": + return "F" + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: " + "'health_grade' must be one of A/B/C/D/F", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _require_api_param_kind( + value: str, + *, + path: Path, +) -> Literal["pos_only", "pos_or_kw", "vararg", "kw_only", "kwarg"]: + if value == "pos_only": + return "pos_only" + if value == "pos_or_kw": + return "pos_or_kw" + if value == "vararg": + return "vararg" + if value == "kw_only": + return "kw_only" + if value == "kwarg": + return "kwarg" + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: api param 'kind' is invalid", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _require_public_symbol_kind( + value: str, + *, + path: Path, +) -> Literal["function", "class", "method", "constant"]: + if value == "function": + return "function" + if value == "class": + return "class" + if value == "method": + return "method" + if value == "constant": + return "constant" + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: public symbol 'kind' is invalid", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + +def _require_exported_via( + value: str, + *, + path: Path, +) -> Literal["all", "name"]: + if value == "all": + return "all" + if value == "name": + return "name" + raise BaselineValidationError( + f"Invalid metrics baseline schema at {path}: " + "public symbol 'exported_via' is invalid", + status=MetricsBaselineStatus.INVALID_TYPE, + ) + + def _parse_api_surface_snapshot( payload: object, *, @@ -493,13 +581,7 @@ def _parse_api_surface_snapshot( params.append( ApiParamSpec( name=name, - kind=cast( - ( - "Literal['pos_only', 'pos_or_kw', " - "'vararg', 'kw_only', 'kwarg']" - ), - param_kind, - ), + kind=_require_api_param_kind(param_kind, path=path), has_default=has_default, annotation_hash=annotation_hash or "", ) @@ -507,10 +589,7 @@ def _parse_api_surface_snapshot( symbols.append( PublicSymbol( qualname=qualname or "", - kind=cast( - "Literal['function', 'class', 'method', 'constant']", - kind, - ), + kind=_require_public_symbol_kind(kind, path=path), start_line=_require_int(raw_symbol, "start_line", path=path), end_line=_require_int(raw_symbol, "end_line", path=path), params=tuple(params), @@ -520,7 +599,7 @@ def _parse_api_surface_snapshot( path=path, ) or "", - exported_via=cast("Literal['all', 'name']", exported_via), + exported_via=_require_exported_via(exported_via, path=path), ) ) modules.append( @@ -537,7 +616,7 @@ def _parse_api_surface_snapshot( def _require_str_list_or_none( - payload: dict[str, Any], + payload: dict[str, object], key: str, *, path: Path, diff --git a/codeclone/baseline/clone_baseline.py b/codeclone/baseline/clone_baseline.py index 4fc8ee5..7422232 100644 --- a/codeclone/baseline/clone_baseline.py +++ b/codeclone/baseline/clone_baseline.py @@ -9,7 +9,7 @@ import hmac import re from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING from .. import __version__ from ..contracts import ( @@ -73,7 +73,7 @@ def load( self, *, max_size_bytes: int | None = None, - preloaded_payload: dict[str, Any] | None = None, + preloaded_payload: dict[str, object] | None = None, ) -> None: try: exists = self.path.exists() @@ -391,7 +391,7 @@ def diff( ) -def _atomic_write_json(path: Path, payload: dict[str, Any]) -> None: +def _atomic_write_json(path: Path, payload: dict[str, object]) -> None: _write_json_document_atomically( path, payload, @@ -400,7 +400,7 @@ def _atomic_write_json(path: Path, payload: dict[str, Any]) -> None: ) -def _validate_top_level_structure(payload: dict[str, Any], *, path: Path) -> None: +def _validate_top_level_structure(payload: dict[str, object], *, path: Path) -> None: validate_top_level_structure( payload, path=path, @@ -413,7 +413,7 @@ def _validate_top_level_structure(payload: dict[str, Any], *, path: Path) -> Non def _validate_required_keys( - obj: dict[str, Any], required: set[str], *, path: Path + obj: dict[str, object], required: set[str], *, path: Path ) -> None: missing = required - set(obj.keys()) if missing: @@ -424,7 +424,7 @@ def _validate_required_keys( ) -def _validate_exact_clone_keys(clones: dict[str, Any], *, path: Path) -> None: +def _validate_exact_clone_keys(clones: dict[str, object], *, path: Path) -> None: keys = set(clones.keys()) extra = keys - _CLONES_REQUIRED_KEYS if extra: @@ -435,13 +435,18 @@ def _validate_exact_clone_keys(clones: dict[str, Any], *, path: Path) -> None: ) -def _is_legacy_baseline_payload(payload: dict[str, Any]) -> bool: +def _is_legacy_baseline_payload(payload: dict[str, object]) -> bool: return "functions" in payload and "blocks" in payload def _preserve_embedded_metrics( path: Path, -) -> tuple[dict[str, Any] | None, str | None, dict[str, Any] | None, str | None]: +) -> tuple[ + dict[str, object] | None, + str | None, + dict[str, object] | None, + str | None, +]: try: payload = _trust._load_json_object(path) except BaselineValidationError: @@ -486,7 +491,7 @@ def _baseline_payload( python_tag: str | None, generator_version: str | None, created_at: str | None, -) -> dict[str, Any]: +) -> dict[str, object]: resolved_generator = generator or _trust.BASELINE_GENERATOR resolved_schema = schema_version or BASELINE_SCHEMA_VERSION resolved_fingerprint = fingerprint_version or BASELINE_FINGERPRINT_VERSION diff --git a/codeclone/baseline/metrics_baseline.py b/codeclone/baseline/metrics_baseline.py index e2afa0e..4653cfd 100644 --- a/codeclone/baseline/metrics_baseline.py +++ b/codeclone/baseline/metrics_baseline.py @@ -9,7 +9,6 @@ import hmac from datetime import datetime, timezone from pathlib import Path -from typing import TYPE_CHECKING, Any, cast from .. import __version__ from ..contracts import BASELINE_SCHEMA_VERSION, METRICS_BASELINE_SCHEMA_VERSION @@ -55,9 +54,6 @@ from .diff import diff_metrics from .trust import current_python_tag -if TYPE_CHECKING: - from collections.abc import Mapping - def _now_utc_z() -> str: return ( @@ -211,19 +207,25 @@ def save(self) -> None: api_surface_snapshot=self.api_surface_snapshot, api_surface_root=self.path.parent, ) - payload_meta = cast("Mapping[str, Any]", payload["meta"]) + payload_meta = payload.get("meta") + if not isinstance(payload_meta, dict): + raise BaselineValidationError( + f"Invalid metrics baseline schema at {self.path}: " + "'meta' must be object", + status=MetricsBaselineStatus.INVALID_TYPE, + ) payload_metrics_hash = _require_str( - cast("dict[str, Any]", payload_meta), + payload_meta, "payload_sha256", path=self.path, ) payload_api_surface_hash = _optional_require_str( - cast("dict[str, Any]", payload_meta), + payload_meta, _API_SURFACE_PAYLOAD_SHA256_KEY, path=self.path, ) - existing: dict[str, Any] | None = None + existing: dict[str, object] | None = None try: if self.path.exists(): loaded = _load_json_object(self.path) @@ -285,17 +287,17 @@ def save(self) -> None: _atomic_write_json(self.path, payload) self.is_embedded_in_clone_baseline = False self.schema_version = _require_str( - cast("dict[str, Any]", payload_meta), + payload_meta, "schema_version", path=self.path, ) self.python_tag = _require_str( - cast("dict[str, Any]", payload_meta), + payload_meta, "python_tag", path=self.path, ) self.created_at = _require_str( - cast("dict[str, Any]", payload_meta), + payload_meta, "created_at", path=self.path, ) diff --git a/codeclone/baseline/trust.py b/codeclone/baseline/trust.py index df54168..a47f7ca 100644 --- a/codeclone/baseline/trust.py +++ b/codeclone/baseline/trust.py @@ -13,7 +13,7 @@ from enum import Enum from json import JSONDecodeError from pathlib import Path -from typing import TYPE_CHECKING, Any, Final +from typing import TYPE_CHECKING, Final import orjson @@ -84,7 +84,7 @@ def _safe_stat_size(path: Path) -> int: ) from e -def _load_json_object(path: Path) -> dict[str, Any]: +def _load_json_object(path: Path) -> dict[str, object]: try: return _read_json_object(path) except OSError as e: @@ -105,7 +105,7 @@ def _load_json_object(path: Path) -> dict[str, Any]: def _parse_generator_meta( - meta_obj: dict[str, Any], *, path: Path + meta_obj: dict[str, object], *, path: Path ) -> tuple[str, str | None]: raw_generator = meta_obj.get("generator") @@ -173,7 +173,7 @@ def _utc_now_z() -> str: ) -def _require_str(obj: dict[str, Any], key: str, *, path: Path) -> str: +def _require_str(obj: dict[str, object], key: str, *, path: Path) -> str: value = obj.get(key) if not isinstance(value, str): raise BaselineValidationError( @@ -183,7 +183,7 @@ def _require_str(obj: dict[str, Any], key: str, *, path: Path) -> str: return value -def _optional_str(obj: dict[str, Any], key: str, *, path: Path) -> str | None: +def _optional_str(obj: dict[str, object], key: str, *, path: Path) -> str | None: value = obj.get(key) if value is None: return None @@ -195,7 +195,7 @@ def _optional_str(obj: dict[str, Any], key: str, *, path: Path) -> str | None: return value -def _require_semver_str(obj: dict[str, Any], key: str, *, path: Path) -> str: +def _require_semver_str(obj: dict[str, object], key: str, *, path: Path) -> str: value = _require_str(obj, key, path=path) _parse_semver(value, key=key, path=path) return value @@ -216,7 +216,7 @@ def _parse_semver(value: str, *, key: str, path: Path) -> tuple[int, int, int]: return major, minor, patch -def _require_python_tag(obj: dict[str, Any], key: str, *, path: Path) -> str: +def _require_python_tag(obj: dict[str, object], key: str, *, path: Path) -> str: value = _require_str(obj, key, path=path) if not re.fullmatch(r"[a-z]{2}\d{2,3}", value): raise BaselineValidationError( @@ -226,7 +226,7 @@ def _require_python_tag(obj: dict[str, Any], key: str, *, path: Path) -> str: return value -def _require_utc_iso8601_z(obj: dict[str, Any], key: str, *, path: Path) -> str: +def _require_utc_iso8601_z(obj: dict[str, object], key: str, *, path: Path) -> str: value = _require_str(obj, key, path=path) if not _UTC_ISO8601_Z_RE.fullmatch(value): raise BaselineValidationError( @@ -252,7 +252,7 @@ def _require_utc_iso8601_z(obj: dict[str, Any], key: str, *, path: Path) -> str: def _require_sorted_unique_ids( - obj: dict[str, Any], key: str, *, pattern: re.Pattern[str], path: Path + obj: dict[str, object], key: str, *, pattern: re.Pattern[str], path: Path ) -> list[str]: value = obj.get(key) if not isinstance(value, list): diff --git a/codeclone/cache/_canonicalize.py b/codeclone/cache/_canonicalize.py index e6d8048..528cb53 100644 --- a/codeclone/cache/_canonicalize.py +++ b/codeclone/cache/_canonicalize.py @@ -7,7 +7,7 @@ from __future__ import annotations from collections.abc import Callable, Mapping -from typing import TypeGuard, TypeVar, cast +from typing import TypeGuard, TypeVar from ._validators import ( _is_block_dict, @@ -44,12 +44,15 @@ _ValidatedItemT = TypeVar("_ValidatedItemT") +def _is_str_item(value: object) -> TypeGuard[str]: + return isinstance(value, str) + + def _as_file_stat_dict(value: object) -> FileStat | None: if not _is_file_stat_dict(value): return None - obj = cast("Mapping[str, object]", value) - mtime_ns = obj.get("mtime_ns") - size = obj.get("size") + mtime_ns = value.get("mtime_ns") + size = value.get("size") if not isinstance(mtime_ns, int) or not isinstance(size, int): return None return FileStat(mtime_ns=mtime_ns, size=size) @@ -58,33 +61,27 @@ def _as_file_stat_dict(value: object) -> FileStat | None: def _as_source_stats_dict(value: object) -> SourceStatsDict | None: if not _is_source_stats_dict(value): return None - obj = cast("Mapping[str, object]", value) - lines = obj.get("lines") - functions = obj.get("functions") - methods = obj.get("methods") - classes = obj.get("classes") - assert isinstance(lines, int) - assert isinstance(functions, int) - assert isinstance(methods, int) - assert isinstance(classes, int) return SourceStatsDict( - lines=lines, - functions=functions, - methods=methods, - classes=classes, + lines=value["lines"], + functions=value["functions"], + methods=value["methods"], + classes=value["classes"], ) def _as_typed_list( value: object, *, - predicate: Callable[[object], bool], + predicate: Callable[[object], TypeGuard[_ValidatedItemT]], ) -> list[_ValidatedItemT] | None: if not isinstance(value, list): return None - if not all(predicate(item) for item in value): - return None - return cast("list[_ValidatedItemT]", value) + items: list[_ValidatedItemT] = [] + for item in value: + if not predicate(item): + return None + items.append(item) + return items def _as_typed_unit_list(value: object) -> list[UnitDict] | None: @@ -114,7 +111,7 @@ def _as_typed_module_deps_list(value: object) -> list[ModuleDepDict] | None: def _as_typed_string_list(value: object) -> list[str] | None: - return _as_typed_list(value, predicate=lambda item: isinstance(item, str)) + return _as_typed_list(value, predicate=_is_str_item) def _as_module_typing_coverage_dict( @@ -122,7 +119,7 @@ def _as_module_typing_coverage_dict( ) -> ModuleTypingCoverageDict | None: if not _is_module_typing_coverage_dict(value): return None - return cast("ModuleTypingCoverageDict", value) + return value def _as_module_docstring_coverage_dict( @@ -130,13 +127,13 @@ def _as_module_docstring_coverage_dict( ) -> ModuleDocstringCoverageDict | None: if not _is_module_docstring_coverage_dict(value): return None - return cast("ModuleDocstringCoverageDict", value) + return value def _as_module_api_surface_dict(value: object) -> ModuleApiSurfaceDict | None: if not _is_module_api_surface_dict(value): return None - return cast("ModuleApiSurfaceDict", value) + return value def _normalized_optional_string_list(value: object) -> list[str] | None: diff --git a/codeclone/cache/_validators.py b/codeclone/cache/_validators.py index 7cfc433..4ea25c2 100644 --- a/codeclone/cache/_validators.py +++ b/codeclone/cache/_validators.py @@ -7,15 +7,32 @@ from __future__ import annotations from collections.abc import Mapping, Sequence - - -def _is_file_stat_dict(value: object) -> bool: +from typing import TypeGuard + +from .entries import ( + ApiParamSpecDict, + BlockDict, + ClassMetricsDict, + DeadCandidateDict, + FileStat, + ModuleApiSurfaceDict, + ModuleDepDict, + ModuleDocstringCoverageDict, + ModuleTypingCoverageDict, + PublicSymbolDict, + SegmentDict, + SourceStatsDict, + UnitDict, +) + + +def _is_file_stat_dict(value: object) -> TypeGuard[FileStat]: if not isinstance(value, dict): return False return isinstance(value.get("mtime_ns"), int) and isinstance(value.get("size"), int) -def _is_source_stats_dict(value: object) -> bool: +def _is_source_stats_dict(value: object) -> TypeGuard[SourceStatsDict]: if not isinstance(value, dict): return False lines = value.get("lines") @@ -34,7 +51,7 @@ def _is_source_stats_dict(value: object) -> bool: ) -def _is_unit_dict(value: object) -> bool: +def _is_unit_dict(value: object) -> TypeGuard[UnitDict]: if not isinstance(value, dict): return False string_keys = ("qualname", "filepath", "fingerprint", "loc_bucket") @@ -60,7 +77,7 @@ def _is_unit_dict(value: object) -> bool: ) -def _is_block_dict(value: object) -> bool: +def _is_block_dict(value: object) -> TypeGuard[BlockDict]: if not isinstance(value, dict): return False string_keys = ("block_hash", "filepath", "qualname") @@ -68,7 +85,7 @@ def _is_block_dict(value: object) -> bool: return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) -def _is_segment_dict(value: object) -> bool: +def _is_segment_dict(value: object) -> TypeGuard[SegmentDict]: if not isinstance(value, dict): return False string_keys = ("segment_hash", "segment_sig", "filepath", "qualname") @@ -76,7 +93,9 @@ def _is_segment_dict(value: object) -> bool: return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) -def _is_module_typing_coverage_dict(value: object) -> bool: +def _is_module_typing_coverage_dict( + value: object, +) -> TypeGuard[ModuleTypingCoverageDict]: if not isinstance(value, dict): return False string_keys = ("module", "filepath") @@ -91,7 +110,9 @@ def _is_module_typing_coverage_dict(value: object) -> bool: return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) -def _is_module_docstring_coverage_dict(value: object) -> bool: +def _is_module_docstring_coverage_dict( + value: object, +) -> TypeGuard[ModuleDocstringCoverageDict]: if not isinstance(value, dict): return False string_keys = ("module", "filepath") @@ -99,7 +120,7 @@ def _is_module_docstring_coverage_dict(value: object) -> bool: return _has_typed_fields(value, string_keys=string_keys, int_keys=int_keys) -def _is_api_param_spec_dict(value: object) -> bool: +def _is_api_param_spec_dict(value: object) -> TypeGuard[ApiParamSpecDict]: if not isinstance(value, dict): return False return ( @@ -110,7 +131,7 @@ def _is_api_param_spec_dict(value: object) -> bool: ) -def _is_public_symbol_dict(value: object) -> bool: +def _is_public_symbol_dict(value: object) -> TypeGuard[PublicSymbolDict]: if not isinstance(value, dict): return False if not _has_typed_fields( @@ -127,7 +148,7 @@ def _is_public_symbol_dict(value: object) -> bool: ) -def _is_module_api_surface_dict(value: object) -> bool: +def _is_module_api_surface_dict(value: object) -> TypeGuard[ModuleApiSurfaceDict]: if not isinstance(value, dict): return False all_declared = value.get("all_declared", []) @@ -141,7 +162,7 @@ def _is_module_api_surface_dict(value: object) -> bool: ) -def _is_class_metrics_dict(value: object) -> bool: +def _is_class_metrics_dict(value: object) -> TypeGuard[ClassMetricsDict]: if not isinstance(value, dict): return False if not _has_typed_fields( @@ -169,7 +190,7 @@ def _is_class_metrics_dict(value: object) -> bool: return _is_string_list(coupled_classes) -def _is_module_dep_dict(value: object) -> bool: +def _is_module_dep_dict(value: object) -> TypeGuard[ModuleDepDict]: if not isinstance(value, dict): return False return _has_typed_fields( @@ -179,7 +200,7 @@ def _is_module_dep_dict(value: object) -> bool: ) -def _is_dead_candidate_dict(value: object) -> bool: +def _is_dead_candidate_dict(value: object) -> TypeGuard[DeadCandidateDict]: if not isinstance(value, dict): return False if not _has_typed_fields( @@ -194,7 +215,7 @@ def _is_dead_candidate_dict(value: object) -> bool: return _is_string_list(suppressed_rules) -def _is_string_list(value: object) -> bool: +def _is_string_list(value: object) -> TypeGuard[list[str]]: return isinstance(value, list) and all(isinstance(item, str) for item in value) diff --git a/codeclone/config/argparse_builder.py b/codeclone/config/argparse_builder.py index 79f4956..aec5a63 100644 --- a/codeclone/config/argparse_builder.py +++ b/codeclone/config/argparse_builder.py @@ -2,7 +2,7 @@ import argparse import sys -from typing import Any, NoReturn +from typing import NoReturn from .. import ui_messages as ui from ..contracts import ExitCode, cli_help_epilog @@ -37,7 +37,7 @@ def _add_option( ) return - argument_kwargs: dict[str, Any] = {"help": option.help_text} + argument_kwargs: dict[str, object] = {"help": option.help_text} if option.cli_kind == "value": argument_kwargs.update( @@ -76,7 +76,7 @@ def _add_option( else: raise RuntimeError(f"Unsupported CLI option kind: {option.cli_kind}") - group.add_argument(*option.flags, **argument_kwargs) + group.add_argument(*option.flags, **argument_kwargs) # type: ignore[arg-type] def build_parser(version: str) -> _ArgumentParser: diff --git a/codeclone/config/spec.py b/codeclone/config/spec.py index 095a3fc..4ea9a7c 100644 --- a/codeclone/config/spec.py +++ b/codeclone/config/spec.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Final, Literal, cast +from typing import Final, Literal from .. import ui_messages as ui from ..contracts import ( @@ -104,8 +104,10 @@ def _option( resolved_pyproject_key = None elif pyproject_key is _INFER_PYPROJECT_KEY: resolved_pyproject_key = dest + elif pyproject_key is None or isinstance(pyproject_key, str): + resolved_pyproject_key = pyproject_key else: - resolved_pyproject_key = cast("str | None", pyproject_key) + raise TypeError("pyproject_key must be str | None when pyproject_type is set") return OptionSpec( dest=dest, group=group, diff --git a/codeclone/core/_types.py b/codeclone/core/_types.py index ecedcfe..4bcf448 100644 --- a/codeclone/core/_types.py +++ b/codeclone/core/_types.py @@ -11,7 +11,6 @@ from dataclasses import dataclass from hashlib import sha256 from pathlib import Path -from typing import cast import orjson @@ -31,13 +30,14 @@ ModuleDocstringCoverage, ModuleTypingCoverage, ProjectMetrics, + SegmentGroupItem, SegmentUnit, StructuralFindingGroup, Suggestion, SuppressedCloneGroup, Unit, ) -from ..utils.coerce import as_int, as_str +from ..utils.coerce import as_int, as_mapping, as_str MAX_FILE_SIZE = 10 * 1024 * 1024 DEFAULT_BATCH_SIZE = 100 @@ -219,23 +219,61 @@ def _segment_groups_digest(segment_groups: Mapping[str, list[GroupItem]]) -> str def _coerce_segment_report_projection( value: object, ) -> SegmentReportProjection | None: - if not isinstance(value, dict): - return None - digest = value.get("digest") - suppressed = value.get("suppressed") - groups = value.get("groups") - if ( - not isinstance(digest, str) - or not isinstance(suppressed, int) - or not isinstance(groups, dict) - ): + row = as_mapping(value) + if not row: return None + match row.get("digest"), row.get("suppressed"), row.get("groups"): + case str() as digest, int() as suppressed, dict() as groups: + pass + case _: + return None if not all( isinstance(group_key, str) and isinstance(items, list) for group_key, items in groups.items() ): return None - return cast("SegmentReportProjection", value) + normalized_groups: dict[str, list[SegmentGroupItem]] = {} + for group_key, items in groups.items(): + if not isinstance(group_key, str) or not isinstance(items, list): + return None + normalized_items: list[SegmentGroupItem] = [] + for item in items: + if not isinstance(item, dict): + return None + segment_hash = item.get("segment_hash") + segment_sig = item.get("segment_sig") + filepath = item.get("filepath") + qualname = item.get("qualname") + start_line = item.get("start_line") + end_line = item.get("end_line") + size = item.get("size") + if not ( + isinstance(segment_hash, str) + and isinstance(segment_sig, str) + and isinstance(filepath, str) + and isinstance(qualname, str) + and isinstance(start_line, int) + and isinstance(end_line, int) + and isinstance(size, int) + ): + return None + normalized_items.append( + SegmentGroupItem( + segment_hash=segment_hash, + segment_sig=segment_sig, + filepath=filepath, + qualname=qualname, + start_line=start_line, + end_line=end_line, + size=size, + ) + ) + normalized_groups[group_key] = normalized_items + return { + "digest": digest, + "suppressed": suppressed, + "groups": normalized_groups, + } def _module_dep_sort_key(dep: ModuleDep) -> tuple[str, str, str, int]: diff --git a/codeclone/core/discovery.py b/codeclone/core/discovery.py index 4296fc5..244663a 100644 --- a/codeclone/core/discovery.py +++ b/codeclone/core/discovery.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import cast +from collections.abc import Mapping, Sequence from ..cache.store import Cache, file_stat_signature from ..models import ( @@ -55,6 +55,10 @@ ] +def _group_items_from_cache(rows: Sequence[Mapping[str, object]]) -> list[GroupItem]: + return [dict(row) for row in rows] + + def _new_discovery_buffers() -> DiscoveryBuffers: return [], [], [], [], [], [], set(), set(), [], [], [], [], [] @@ -118,13 +122,9 @@ def discover(*, boot: BootstrapResult, cache: Cache) -> DiscoveryResult: cached_source_stats_by_file.append( (filepath, lines, functions, methods, classes) ) - cached_units.extend(cast("list[GroupItem]", cast(object, cached["units"]))) - cached_blocks.extend( - cast("list[GroupItem]", cast(object, cached["blocks"])) - ) - cached_segments.extend( - cast("list[GroupItem]", cast(object, cached["segments"])) - ) + cached_units.extend(_group_items_from_cache(cached["units"])) + cached_blocks.extend(_group_items_from_cache(cached["blocks"])) + cached_segments.extend(_group_items_from_cache(cached["segments"])) if not boot.args.skip_metrics: ( class_metrics, diff --git a/codeclone/core/discovery_cache.py b/codeclone/core/discovery_cache.py index dbec1ac..0b43749 100644 --- a/codeclone/core/discovery_cache.py +++ b/codeclone/core/discovery_cache.py @@ -7,15 +7,13 @@ from __future__ import annotations from collections.abc import Mapping -from typing import Literal, cast +from typing import Literal from ..cache.entries import ( - ApiParamSpecDict, CacheEntry, ClassMetricsDict, DeadCandidateDict, ModuleDepDict, - PublicSymbolDict, StructuralFindingGroupDict, ) from ..models import ( @@ -31,8 +29,92 @@ StructuralFindingOccurrence, ) from ..paths import is_test_filepath +from ..utils.coerce import as_mapping from ._types import _as_sorted_str_tuple +_ApiParamKind = Literal["pos_only", "pos_or_kw", "vararg", "kw_only", "kwarg"] +_PublicSymbolKind = Literal["function", "class", "method", "constant"] +_ExportedViaKind = Literal["all", "name"] +_RiskLevel = Literal["low", "medium", "high"] +_ImportType = Literal["import", "from_import"] +_DeadCandidateKind = Literal["function", "class", "method", "import"] + + +def _api_param_kind(value: object) -> _ApiParamKind | None: + match value: + case "pos_only": + return "pos_only" + case "pos_or_kw": + return "pos_or_kw" + case "vararg": + return "vararg" + case "kw_only": + return "kw_only" + case "kwarg": + return "kwarg" + case _: + return None + + +def _public_symbol_kind(value: object) -> _PublicSymbolKind | None: + match value: + case "function": + return "function" + case "class": + return "class" + case "method": + return "method" + case "constant": + return "constant" + case _: + return None + + +def _exported_via_kind(value: object) -> _ExportedViaKind | None: + match value: + case "all": + return "all" + case "name": + return "name" + case _: + return None + + +def _risk_level(value: object) -> _RiskLevel | None: + match value: + case "low": + return "low" + case "medium": + return "medium" + case "high": + return "high" + case _: + return None + + +def _import_type(value: object) -> _ImportType | None: + match value: + case "import": + return "import" + case "from_import": + return "from_import" + case _: + return None + + +def _dead_candidate_kind(value: object) -> _DeadCandidateKind | None: + match value: + case "function": + return "function" + case "class": + return "class" + case "method": + return "method" + case "import": + return "import" + case _: + return None + def decode_cached_structural_finding_group( group_dict: StructuralFindingGroupDict, @@ -116,9 +198,9 @@ def usable_cached_source_stats( def _cache_dict_module_fields( value: object, ) -> tuple[Mapping[str, object], str, str] | None: - if not isinstance(value, dict): + if not isinstance(value, Mapping): return None - row = cast("Mapping[str, object]", value) + row = as_mapping(value) module = row.get("module") filepath = row.get("filepath") if not isinstance(module, str) or not isinstance(filepath, str): @@ -139,6 +221,23 @@ def _cache_dict_int_fields( return tuple(values) +def _api_param_fields( + row: Mapping[str, object], +) -> tuple[str, _ApiParamKind, bool, str] | None: + name = row.get("name") + validated_kind = _api_param_kind(row.get("kind")) + has_default = row.get("has_default") + annotation_hash = row.get("annotation_hash", "") + if ( + not isinstance(name, str) + or validated_kind is None + or not isinstance(has_default, bool) + or not isinstance(annotation_hash, str) + ): + return None + return name, validated_kind, has_default, annotation_hash + + def _typing_coverage_from_cache_dict(value: object) -> ModuleTypingCoverage | None: row_info = _cache_dict_module_fields(value) if row_info is None: @@ -189,43 +288,39 @@ def _docstring_coverage_from_cache_dict( ) -def _api_param_spec_from_cache_dict(value: ApiParamSpecDict) -> ApiParamSpec | None: - name = value.get("name") - kind = value.get("kind") - has_default = value.get("has_default") - annotation_hash = value.get("annotation_hash", "") - if ( - not isinstance(name, str) - or not isinstance(kind, str) - or not isinstance(has_default, bool) - or not isinstance(annotation_hash, str) - ): +def _api_param_spec_from_cache_dict(value: object) -> ApiParamSpec | None: + row = as_mapping(value) + if not row: return None + fields = _api_param_fields(row) + if fields is None: + return None + name, validated_kind, has_default, annotation_hash = fields return ApiParamSpec( name=name, - kind=cast( - "Literal['pos_only', 'pos_or_kw', 'vararg', 'kw_only', 'kwarg']", - kind, - ), + kind=validated_kind, has_default=has_default, annotation_hash=annotation_hash, ) -def _public_symbol_from_cache_dict(value: PublicSymbolDict) -> PublicSymbol | None: - qualname = value.get("qualname") - kind = value.get("kind") - start_line = value.get("start_line") - end_line = value.get("end_line") - exported_via = value.get("exported_via", "name") - returns_hash = value.get("returns_hash", "") - params_raw = value.get("params", []) +def _public_symbol_from_cache_dict(value: object) -> PublicSymbol | None: + row = as_mapping(value) + if not row: + return None + qualname = row.get("qualname") + start_line = row.get("start_line") + end_line = row.get("end_line") + returns_hash = row.get("returns_hash", "") + params_raw = row.get("params", []) + validated_kind = _public_symbol_kind(row.get("kind")) + validated_exported_via = _exported_via_kind(row.get("exported_via", "name")) if ( not isinstance(qualname, str) - or not isinstance(kind, str) + or validated_kind is None or not isinstance(start_line, int) or not isinstance(end_line, int) - or not isinstance(exported_via, str) + or validated_exported_via is None or not isinstance(returns_hash, str) or not isinstance(params_raw, list) ): @@ -240,12 +335,12 @@ def _public_symbol_from_cache_dict(value: PublicSymbolDict) -> PublicSymbol | No params.append(parsed) return PublicSymbol( qualname=qualname, - kind=cast("Literal['function', 'class', 'method', 'constant']", kind), + kind=validated_kind, start_line=start_line, end_line=end_line, params=tuple(params), returns_hash=returns_hash, - exported_via=cast("Literal['all', 'name']", exported_via), + exported_via=validated_exported_via, ) @@ -264,9 +359,7 @@ def _api_surface_from_cache_dict(value: object) -> ModuleApiSurface | None: return None symbols: list[PublicSymbol] = [] for item in symbols_raw: - if not isinstance(item, dict): - return None - parsed = _public_symbol_from_cache_dict(cast("PublicSymbolDict", item)) + parsed = _public_symbol_from_cache_dict(item) if parsed is None: return None symbols.append(parsed) @@ -278,6 +371,63 @@ def _api_surface_from_cache_dict(value: object) -> ModuleApiSurface | None: ) +def _class_metric_from_cache_row(metric_row: ClassMetricsDict) -> ClassMetrics | None: + risk_coupling = _risk_level(metric_row["risk_coupling"]) + risk_cohesion = _risk_level(metric_row["risk_cohesion"]) + if ( + not metric_row.get("qualname") + or not metric_row.get("filepath") + or risk_coupling is None + or risk_cohesion is None + ): + return None + return ClassMetrics( + qualname=metric_row["qualname"], + filepath=metric_row["filepath"], + start_line=metric_row["start_line"], + end_line=metric_row["end_line"], + cbo=metric_row["cbo"], + lcom4=metric_row["lcom4"], + method_count=metric_row["method_count"], + instance_var_count=metric_row["instance_var_count"], + risk_coupling=risk_coupling, + risk_cohesion=risk_cohesion, + coupled_classes=_as_sorted_str_tuple(metric_row.get("coupled_classes", [])), + ) + + +def _module_dep_from_cache_row(dep_row: ModuleDepDict) -> ModuleDep | None: + import_type = _import_type(dep_row["import_type"]) + if not dep_row.get("source") or not dep_row.get("target") or import_type is None: + return None + return ModuleDep( + source=dep_row["source"], + target=dep_row["target"], + import_type=import_type, + line=dep_row["line"], + ) + + +def _dead_candidate_from_cache_row(dead_row: DeadCandidateDict) -> DeadCandidate | None: + kind = _dead_candidate_kind(dead_row["kind"]) + if ( + not dead_row.get("qualname") + or not dead_row.get("local_name") + or not dead_row.get("filepath") + or kind is None + ): + return None + return DeadCandidate( + qualname=dead_row["qualname"], + local_name=dead_row["local_name"], + filepath=dead_row["filepath"], + start_line=dead_row["start_line"], + end_line=dead_row["end_line"], + kind=kind, + suppressed_rules=_as_sorted_str_tuple(dead_row.get("suppressed_rules", [])), + ) + + def load_cached_metrics_extended( entry: CacheEntry, *, @@ -293,54 +443,26 @@ def load_cached_metrics_extended( ModuleApiSurface | None, ]: class_metrics_rows: list[ClassMetricsDict] = entry.get("class_metrics", []) - class_metrics = tuple( - ClassMetrics( - qualname=row["qualname"], - filepath=row["filepath"], - start_line=row["start_line"], - end_line=row["end_line"], - cbo=row["cbo"], - lcom4=row["lcom4"], - method_count=row["method_count"], - instance_var_count=row["instance_var_count"], - risk_coupling=cast( - "Literal['low', 'medium', 'high']", - row["risk_coupling"], - ), - risk_cohesion=cast( - "Literal['low', 'medium', 'high']", - row["risk_cohesion"], - ), - coupled_classes=_as_sorted_str_tuple(row.get("coupled_classes", [])), - ) - for row in class_metrics_rows - if row.get("qualname") and row.get("filepath") - ) + class_metrics_items: list[ClassMetrics] = [] + for metric_row in class_metrics_rows: + parsed_metric = _class_metric_from_cache_row(metric_row) + if parsed_metric is not None: + class_metrics_items.append(parsed_metric) + class_metrics = tuple(class_metrics_items) module_dep_rows: list[ModuleDepDict] = entry.get("module_deps", []) - module_deps = tuple( - ModuleDep( - source=row["source"], - target=row["target"], - import_type=cast("Literal['import', 'from_import']", row["import_type"]), - line=row["line"], - ) - for row in module_dep_rows - if row.get("source") and row.get("target") - ) + module_dep_items: list[ModuleDep] = [] + for dep_row in module_dep_rows: + parsed_dep = _module_dep_from_cache_row(dep_row) + if parsed_dep is not None: + module_dep_items.append(parsed_dep) + module_deps = tuple(module_dep_items) dead_rows: list[DeadCandidateDict] = entry.get("dead_candidates", []) - dead_candidates = tuple( - DeadCandidate( - qualname=row["qualname"], - local_name=row["local_name"], - filepath=row["filepath"], - start_line=row["start_line"], - end_line=row["end_line"], - kind=cast("Literal['function', 'class', 'method', 'import']", row["kind"]), - suppressed_rules=tuple(sorted(set(row.get("suppressed_rules", [])))), - ) - for row in dead_rows - if row.get("qualname") and row.get("local_name") and row.get("filepath") - ) + dead_candidate_items: list[DeadCandidate] = [] + for dead_row in dead_rows: + parsed_dead = _dead_candidate_from_cache_row(dead_row) + if parsed_dead is not None: + dead_candidate_items.append(parsed_dead) + dead_candidates = tuple(dead_candidate_items) referenced_names = ( frozenset() if is_test_filepath(filepath) diff --git a/codeclone/core/metrics_payload.py b/codeclone/core/metrics_payload.py index 5776d96..6f76cb2 100644 --- a/codeclone/core/metrics_payload.py +++ b/codeclone/core/metrics_payload.py @@ -7,7 +7,6 @@ from __future__ import annotations from collections.abc import Mapping, Sequence -from typing import cast from ..domain.findings import CATEGORY_COHESION, CATEGORY_COMPLEXITY, CATEGORY_COUPLING from ..domain.quality import CONFIDENCE_HIGH, RISK_LOW @@ -22,7 +21,7 @@ ProjectMetrics, ) from ..suppressions import DEAD_CODE_RULE_ID, INLINE_CODECLONE_SUPPRESSION_SOURCE -from ..utils.coerce import as_int, as_str +from ..utils.coerce import as_int, as_mapping, as_sequence, as_str from .api_surface_payload import ( _api_surface_rows, _api_surface_summary, @@ -47,12 +46,8 @@ def _enrich_metrics_report_payload( key: (dict(value) if isinstance(value, Mapping) else value) for key, value in metrics_payload.items() } - coverage_adoption = dict( - cast("Mapping[str, object]", enriched.get("coverage_adoption", {})) - ) - coverage_summary = dict( - cast("Mapping[str, object]", coverage_adoption.get("summary", {})) - ) + coverage_adoption = dict(as_mapping(enriched.get("coverage_adoption"))) + coverage_summary = dict(as_mapping(coverage_adoption.get("summary"))) if coverage_summary: coverage_summary["baseline_diff_available"] = coverage_adoption_diff_available coverage_summary["param_delta"] = ( @@ -73,9 +68,9 @@ def _enrich_metrics_report_payload( coverage_adoption["summary"] = coverage_summary enriched["coverage_adoption"] = coverage_adoption - api_surface = dict(cast("Mapping[str, object]", enriched.get("api_surface", {}))) - api_summary = dict(cast("Mapping[str, object]", api_surface.get("summary", {}))) - api_items = list(cast("Sequence[object]", api_surface.get("items", ()))) + api_surface = dict(as_mapping(enriched.get("api_surface"))) + api_summary = dict(as_mapping(api_surface.get("summary"))) + api_items = list(as_sequence(api_surface.get("items"))) if api_summary: api_summary["baseline_diff_available"] = api_surface_diff_available api_summary["added"] = ( diff --git a/codeclone/core/pipeline.py b/codeclone/core/pipeline.py index d6c5cd8..e3c0b20 100644 --- a/codeclone/core/pipeline.py +++ b/codeclone/core/pipeline.py @@ -7,7 +7,6 @@ from __future__ import annotations from collections.abc import Mapping, Sequence -from typing import cast from ..findings.clones.grouping import ( build_block_groups, @@ -60,6 +59,19 @@ from .metrics_payload import build_metrics_report_payload +def _artifact_dep_graph(value: object, default: DepGraph) -> DepGraph: + return value if isinstance(value, DepGraph) else default + + +def _artifact_dead_items( + value: object, + default: tuple[DeadItem, ...], +) -> tuple[DeadItem, ...]: + if isinstance(value, tuple) and all(isinstance(item, DeadItem) for item in value): + return value + return default + + def compute_project_metrics( *, units: Sequence[GroupItemLike], @@ -107,10 +119,10 @@ def compute_project_metrics( for family in METRIC_FAMILIES.values(): aggregate = family.aggregate([family.compute(context)]) project_fields.update(aggregate.project_fields) - dep_graph = cast("DepGraph", aggregate.artifacts.get("dep_graph", dep_graph)) - dead_items = cast( - "tuple[DeadItem, ...]", - aggregate.artifacts.get("dead_items", dead_items), + dep_graph = _artifact_dep_graph(aggregate.artifacts.get("dep_graph"), dep_graph) + dead_items = _artifact_dead_items( + aggregate.artifacts.get("dead_items"), + dead_items, ) return build_project_metrics(project_fields), dep_graph, dead_items diff --git a/codeclone/core/reporting.py b/codeclone/core/reporting.py index 05cc83b..e17a334 100644 --- a/codeclone/core/reporting.py +++ b/codeclone/core/reporting.py @@ -7,7 +7,6 @@ from __future__ import annotations from collections.abc import Callable, Collection, Mapping -from typing import cast from ..models import MetricsDiff from ..report.document.builder import build_report_document @@ -32,6 +31,10 @@ GatingResult = GateResult +def _coerce_metrics_diff(value: object | None) -> MetricsDiff | None: + return value if isinstance(value, MetricsDiff) else None + + def _load_markdown_report_renderer() -> Callable[..., str]: from ..report.renderers.markdown import to_markdown_report @@ -100,10 +103,11 @@ def report( ) ) if needs_report_document: + validated_metrics_diff = _coerce_metrics_diff(metrics_diff) metrics_for_report = ( _enrich_metrics_report_payload( metrics_payload=analysis.metrics_payload, - metrics_diff=cast("MetricsDiff | None", metrics_diff), + metrics_diff=validated_metrics_diff, coverage_adoption_diff_available=coverage_adoption_diff_available, api_surface_diff_available=api_surface_diff_available, ) @@ -127,10 +131,11 @@ def report( ) if boot.output_paths.html and html_builder is not None: + validated_metrics_diff = _coerce_metrics_diff(metrics_diff) metrics_for_html = ( _enrich_metrics_report_payload( metrics_payload=analysis.metrics_payload, - metrics_diff=cast("MetricsDiff | None", metrics_diff), + metrics_diff=validated_metrics_diff, coverage_adoption_diff_available=coverage_adoption_diff_available, api_surface_diff_available=api_surface_diff_available, ) diff --git a/codeclone/core/worker.py b/codeclone/core/worker.py index 079e083..d88206c 100644 --- a/codeclone/core/worker.py +++ b/codeclone/core/worker.py @@ -10,7 +10,6 @@ import os from collections.abc import Callable from pathlib import Path -from typing import cast from ..analysis.normalizer import NormalizationConfig from ..analysis.units import extract_units_and_stats_from_source @@ -155,7 +154,7 @@ def _invoke_process_file( for key, value in optional_kwargs.items() if key in supported_names } - process_callable = cast("Callable[..., FileProcessResult]", process_file) + process_callable: Callable[..., FileProcessResult] = process_file return process_callable( filepath, root, diff --git a/codeclone/main.py b/codeclone/main.py index 1f97418..7e17b85 100644 --- a/codeclone/main.py +++ b/codeclone/main.py @@ -6,7 +6,7 @@ from __future__ import annotations -from .surfaces.cli.main import main +from .surfaces.cli.workflow import main __all__ = ["main"] diff --git a/codeclone/metrics/coverage_join.py b/codeclone/metrics/coverage_join.py index 8109acd..386b16b 100644 --- a/codeclone/metrics/coverage_join.py +++ b/codeclone/metrics/coverage_join.py @@ -10,7 +10,7 @@ from collections.abc import Sequence from dataclasses import dataclass from pathlib import Path -from typing import Literal, cast +from typing import Literal from xml.etree import ElementTree from ..models import CoverageJoinResult, GroupItemLike, UnitCoverageFact @@ -221,8 +221,10 @@ def _resolve_unit_path(filepath: str) -> str: def _risk_level(value: object) -> _Risk: risk = as_str(value, "low") - if risk in {"low", "medium", "high"}: - return cast(_Risk, risk) + if risk == "medium": + return "medium" + if risk == "high": + return "high" return "low" diff --git a/codeclone/metrics/registry.py b/codeclone/metrics/registry.py index 35f7f84..93d6391 100644 --- a/codeclone/metrics/registry.py +++ b/codeclone/metrics/registry.py @@ -7,7 +7,7 @@ from __future__ import annotations from collections.abc import Callable -from typing import cast +from typing import TypeGuard from ..domain.findings import CATEGORY_COHESION, CATEGORY_COMPLEXITY, CATEGORY_COUPLING from ..domain.quality import RISK_HIGH @@ -70,6 +70,63 @@ def _empty_dep_graph() -> DepGraph: ) +_EMPTY_HEALTH_SCORE = compute_health( + HealthInputs( + files_found=0, + files_analyzed_or_cached=0, + function_clone_groups=0, + block_clone_groups=0, + complexity_avg=0.0, + complexity_max=0, + high_risk_functions=0, + coupling_avg=0.0, + coupling_max=0, + high_risk_classes=0, + cohesion_avg=0.0, + low_cohesion_classes=0, + dependency_cycles=0, + dependency_max_depth=0, + dead_code_items=0, + ) +) + + +def _is_tuple_of_str(value: object) -> TypeGuard[tuple[str, ...]]: + return isinstance(value, tuple) and all(isinstance(item, str) for item in value) + + +def _is_tuple_of_tuple_str(value: object) -> TypeGuard[tuple[tuple[str, ...], ...]]: + return isinstance(value, tuple) and all(_is_tuple_of_str(item) for item in value) + + +def _is_tuple_of_dead_items(value: object) -> TypeGuard[tuple[DeadItem, ...]]: + return isinstance(value, tuple) and all( + isinstance(item, DeadItem) for item in value + ) + + +def _is_tuple_of_module_deps(value: object) -> TypeGuard[tuple[ModuleDep, ...]]: + return isinstance(value, tuple) and all( + isinstance(item, ModuleDep) for item in value + ) + + +def _is_tuple_of_typing_modules( + value: object, +) -> TypeGuard[tuple[ModuleTypingCoverage, ...]]: + return isinstance(value, tuple) and all( + isinstance(item, ModuleTypingCoverage) for item in value + ) + + +def _is_tuple_of_docstring_modules( + value: object, +) -> TypeGuard[tuple[ModuleDocstringCoverage, ...]]: + return isinstance(value, tuple) and all( + isinstance(item, ModuleDocstringCoverage) for item in value + ) + + def project_metrics_defaults() -> dict[str, object]: return { "complexity_avg": 0.0, @@ -88,25 +145,7 @@ def project_metrics_defaults() -> dict[str, object]: "dependency_max_depth": 0, "dependency_longest_chains": (), "dead_code": (), - "health": compute_health( - HealthInputs( - files_found=0, - files_analyzed_or_cached=0, - function_clone_groups=0, - block_clone_groups=0, - complexity_avg=0.0, - complexity_max=0, - high_risk_functions=0, - coupling_avg=0.0, - coupling_max=0, - high_risk_classes=0, - cohesion_avg=0.0, - low_cohesion_classes=0, - dependency_cycles=0, - dependency_max_depth=0, - dead_code_items=0, - ) - ), + "health": _EMPTY_HEALTH_SCORE, "typing_param_total": 0, "typing_param_annotated": 0, "typing_return_total": 0, @@ -133,21 +172,21 @@ def build_project_metrics(project_fields: dict[str, object]) -> ProjectMetrics: low_cohesion_classes=_result_tuple_str(project_fields, "low_cohesion_classes"), dependency_modules=_result_int(project_fields, "dependency_modules"), dependency_edges=_result_int(project_fields, "dependency_edges"), - dependency_edge_list=cast( - "tuple[ModuleDep, ...]", - project_fields.get("dependency_edge_list", ()), + dependency_edge_list=_result_module_deps( + project_fields, + "dependency_edge_list", ), - dependency_cycles=cast( - "tuple[tuple[str, ...], ...]", - project_fields.get("dependency_cycles", ()), + dependency_cycles=_result_nested_tuple_str( + project_fields, + "dependency_cycles", ), dependency_max_depth=_result_int(project_fields, "dependency_max_depth"), - dependency_longest_chains=cast( - "tuple[tuple[str, ...], ...]", - project_fields.get("dependency_longest_chains", ()), + dependency_longest_chains=_result_nested_tuple_str( + project_fields, + "dependency_longest_chains", ), - dead_code=cast("tuple[DeadItem, ...]", project_fields.get("dead_code", ())), - health=cast("HealthScore", project_fields["health"]), + dead_code=_result_dead_items(project_fields, "dead_code"), + health=_result_health(project_fields, "health"), typing_param_total=_result_int(project_fields, "typing_param_total"), typing_param_annotated=_result_int(project_fields, "typing_param_annotated"), typing_return_total=_result_int(project_fields, "typing_return_total"), @@ -161,18 +200,12 @@ def build_project_metrics(project_fields: dict[str, object]) -> ProjectMetrics: project_fields, "docstring_public_documented", ), - typing_modules=cast( - "tuple[ModuleTypingCoverage, ...]", - project_fields.get("typing_modules", ()), - ), - docstring_modules=cast( - "tuple[ModuleDocstringCoverage, ...]", - project_fields.get("docstring_modules", ()), - ), - api_surface=cast( - "ApiSurfaceSnapshot | None", - project_fields.get("api_surface"), + typing_modules=_result_typing_modules(project_fields, "typing_modules"), + docstring_modules=_result_docstring_modules( + project_fields, + "docstring_modules", ), + api_surface=_result_api_surface(project_fields, "api_surface"), ) @@ -186,21 +219,61 @@ def _result_int(result: dict[str, object], key: str) -> int: def _result_tuple_str(result: dict[str, object], key: str) -> tuple[str, ...]: - return cast("tuple[str, ...]", result.get(key, ())) + value = result.get(key, ()) + return value if _is_tuple_of_str(value) else () def _result_nested_tuple_str( result: dict[str, object], key: str, ) -> tuple[tuple[str, ...], ...]: - return cast("tuple[tuple[str, ...], ...]", result.get(key, ())) + value = result.get(key, ()) + return value if _is_tuple_of_tuple_str(value) else () def _result_dead_items( result: dict[str, object], key: str, ) -> tuple[DeadItem, ...]: - return cast("tuple[DeadItem, ...]", result.get(key, ())) + value = result.get(key, ()) + return value if _is_tuple_of_dead_items(value) else () + + +def _result_module_deps( + result: dict[str, object], + key: str, +) -> tuple[ModuleDep, ...]: + value = result.get(key, ()) + return value if _is_tuple_of_module_deps(value) else () + + +def _result_health(result: dict[str, object], key: str) -> HealthScore: + value = result.get(key) + return value if isinstance(value, HealthScore) else _EMPTY_HEALTH_SCORE + + +def _result_typing_modules( + result: dict[str, object], + key: str, +) -> tuple[ModuleTypingCoverage, ...]: + value = result.get(key, ()) + return value if _is_tuple_of_typing_modules(value) else () + + +def _result_docstring_modules( + result: dict[str, object], + key: str, +) -> tuple[ModuleDocstringCoverage, ...]: + value = result.get(key, ()) + return value if _is_tuple_of_docstring_modules(value) else () + + +def _result_api_surface( + result: dict[str, object], + key: str, +) -> ApiSurfaceSnapshot | None: + value = result.get(key) + return value if isinstance(value, ApiSurfaceSnapshot) else None def _memoized_result( @@ -386,10 +459,7 @@ def _aggregate_dependencies_family(results: list[MetricResult]) -> MetricAggrega project_fields={ "dependency_modules": _result_int(result, "dependency_modules"), "dependency_edges": _result_int(result, "dependency_edges"), - "dependency_edge_list": cast( - "tuple[ModuleDep, ...]", - result.get("dependency_edge_list", ()), - ), + "dependency_edge_list": _result_module_deps(result, "dependency_edge_list"), "dependency_cycles": _result_nested_tuple_str(result, "dependency_cycles"), "dependency_max_depth": _result_int(result, "dependency_max_depth"), "dependency_longest_chains": _result_nested_tuple_str( @@ -478,9 +548,7 @@ def _compute_health_family(context: MetricProjectContext) -> MetricResult: def _aggregate_health_family(results: list[MetricResult]) -> MetricAggregate: result = _first_result(results) - return MetricAggregate( - project_fields={"health": cast("HealthScore", result.get("health"))} - ) + return MetricAggregate(project_fields={"health": _result_health(result, "health")}) def _build_coverage_adoption_result(context: MetricProjectContext) -> MetricResult: @@ -532,13 +600,10 @@ def _aggregate_coverage_adoption_family(results: list[MetricResult]) -> MetricAg result, "docstring_public_documented", ), - "typing_modules": cast( - "tuple[ModuleTypingCoverage, ...]", - result.get("typing_modules", ()), - ), - "docstring_modules": cast( - "tuple[ModuleDocstringCoverage, ...]", - result.get("docstring_modules", ()), + "typing_modules": _result_typing_modules(result, "typing_modules"), + "docstring_modules": _result_docstring_modules( + result, + "docstring_modules", ), } ) diff --git a/codeclone/report/derived.py b/codeclone/report/derived.py index 362e491..7b07e30 100644 --- a/codeclone/report/derived.py +++ b/codeclone/report/derived.py @@ -7,7 +7,7 @@ from __future__ import annotations from collections import Counter -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING from ..domain.source_scope import ( IMPACT_SCOPE_MIXED, @@ -125,7 +125,7 @@ def normalized_source_kind(value: object) -> SourceKind: def source_scope_from_counts( counts: Mapping[SourceKind, int] | Mapping[str, int], ) -> dict[str, object]: - normalized_counts = cast("Mapping[str, int]", counts) + normalized_counts = {str(key): int(value) for key, value in counts.items()} def _count(kind: str) -> int: value = normalized_counts.get(kind, 0) diff --git a/codeclone/report/gates/evaluator.py b/codeclone/report/gates/evaluator.py index 4e8c203..a55b12d 100644 --- a/codeclone/report/gates/evaluator.py +++ b/codeclone/report/gates/evaluator.py @@ -8,7 +8,7 @@ from collections.abc import Callable, Mapping from dataclasses import dataclass -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING from ...contracts import ExitCode from ...metrics.registry import METRIC_FAMILIES @@ -109,28 +109,14 @@ def summarize_metrics_diff(metrics_diff: object | None) -> dict[str, object] | N } new_high_risk_functions = tuple( - cast( # pragma: no branch - tuple normalization is deterministic - "tuple[str, ...]", - tuple( - str(item) - for item in _as_sequence( - getattr(metrics_diff, "new_high_risk_functions", ()) - ) - if str(item).strip() - ), - ) + str(item) + for item in _as_sequence(getattr(metrics_diff, "new_high_risk_functions", ())) + if str(item).strip() ) new_high_coupling_classes = tuple( - cast( - "tuple[str, ...]", - tuple( - str(item) - for item in _as_sequence( - getattr(metrics_diff, "new_high_coupling_classes", ()) - ) - if str(item).strip() - ), - ) + str(item) + for item in _as_sequence(getattr(metrics_diff, "new_high_coupling_classes", ())) + if str(item).strip() ) new_cycles = tuple( tuple(str(part) for part in _as_sequence(item) if str(part).strip()) diff --git a/codeclone/report/html/widgets/snippets.py b/codeclone/report/html/widgets/snippets.py index 49afecc..eeac1d7 100644 --- a/codeclone/report/html/widgets/snippets.py +++ b/codeclone/report/html/widgets/snippets.py @@ -10,7 +10,7 @@ import importlib from dataclasses import dataclass from functools import lru_cache -from typing import TYPE_CHECKING, NamedTuple, cast +from typing import TYPE_CHECKING, NamedTuple from ....contracts.errors import FileProcessingError @@ -69,7 +69,13 @@ class _CacheInfo(NamedTuple): currsize: int def cache_info(self) -> _CacheInfo: - return cast("_FileCache._CacheInfo", self._get_file_lines_impl.cache_info()) + info = self._get_file_lines_impl.cache_info() + return self._CacheInfo( + hits=info.hits, + misses=info.misses, + maxsize=info.maxsize, + currsize=info.currsize, + ) _PYGMENTS_IMPORTER_ID: int | None = None diff --git a/codeclone/report/overview.py b/codeclone/report/overview.py index 9e71fc3..b0fe214 100644 --- a/codeclone/report/overview.py +++ b/codeclone/report/overview.py @@ -8,8 +8,9 @@ from collections import Counter from collections.abc import Mapping, Sequence +from dataclasses import dataclass, field from pathlib import PurePosixPath -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING from ..domain.findings import ( CATEGORY_COHESION, @@ -125,6 +126,25 @@ def _flatten_findings(findings: Mapping[str, object]) -> list[Mapping[str, objec ) +@dataclass(slots=True) +class _DirectoryContribution: + affected_items: int = 0 + files: set[str] = field(default_factory=set) + locations: list[dict[str, object]] = field(default_factory=list) + + +@dataclass(slots=True) +class _DirectoryBucketRow: + path: str + finding_ids: set[str] = field(default_factory=set) + affected_items: int = 0 + files: set[str] = field(default_factory=set) + locations: list[dict[str, object]] = field(default_factory=list) + kind_breakdown_ids: dict[str, set[str]] = field( + default_factory=lambda: {key: set() for key in _DIRECTORY_KIND_BREAKDOWN_KEYS} + ) + + def _directory_bucket_keys(group: Mapping[str, object]) -> tuple[str, ...]: family = str(group.get("family", "")).strip() category = str(group.get("category", "")).strip() @@ -221,8 +241,8 @@ def _overview_directory_label( def _directory_contributions( group: Mapping[str, object], -) -> dict[str, dict[str, object]]: - contributions: dict[str, dict[str, object]] = {} +) -> dict[str, _DirectoryContribution]: + contributions: dict[str, _DirectoryContribution] = {} for item in map(_as_mapping, _as_sequence(group.get("items"))): relative_path = _directory_relative_path(item) if relative_path is None: @@ -231,25 +251,16 @@ def _directory_contributions( relative_path ) directory = _overview_directory_label(relative_path, source_kind=source_kind) - entry = contributions.setdefault( - directory, - { - "affected_items": 0, - "files": set(), - "locations": [], - }, - ) - entry["affected_items"] = _as_int(entry.get("affected_items")) + 1 - cast(set[str], entry["files"]).add(relative_path) - cast(list[dict[str, object]], entry["locations"]).append( - {"source_kind": source_kind} - ) + entry = contributions.setdefault(directory, _DirectoryContribution()) + entry.affected_items += 1 + entry.files.add(relative_path) + entry.locations.append({"source_kind": source_kind}) return contributions def _directory_group_data( group: Mapping[str, object], -) -> tuple[str, dict[str, dict[str, object]]] | None: +) -> tuple[str, dict[str, _DirectoryContribution]] | None: group_id = str(group.get("id", "")).strip() if not group_id: return None @@ -265,7 +276,7 @@ def build_directory_hotspots( limit: int = 5, ) -> dict[str, object]: normalized_limit = max(1, _as_int(limit, 5)) - bucket_rows: dict[str, dict[str, dict[str, object]]] = { + bucket_rows: dict[str, dict[str, _DirectoryBucketRow]] = { bucket: {} for bucket in _DIRECTORY_HOTSPOT_BUCKETS } bucket_totals: Counter[str] = Counter() @@ -282,41 +293,22 @@ def build_directory_hotspots( for directory, contribution in contributions.items(): row = rows.setdefault( directory, - { - "path": directory, - "finding_ids": set(), - "affected_items": 0, - "files": set(), - "locations": [], - "kind_breakdown_ids": { - key: set() for key in _DIRECTORY_KIND_BREAKDOWN_KEYS - }, - }, - ) - cast(set[str], row["finding_ids"]).add(group_id) - row["affected_items"] = _as_int(row.get("affected_items")) + _as_int( - contribution.get("affected_items") - ) - cast(set[str], row["files"]).update( - cast(set[str], contribution["files"]) - ) - cast(list[dict[str, object]], row["locations"]).extend( - cast(list[dict[str, object]], contribution["locations"]) + _DirectoryBucketRow(path=directory), ) + row.finding_ids.add(group_id) + row.affected_items += contribution.affected_items + row.files.update(contribution.files) + row.locations.extend(contribution.locations) if bucket == "all" and kind_key is not None: - kind_rows = cast( - dict[str, set[str]], - row["kind_breakdown_ids"], - ) - kind_rows[kind_key].add(group_id) - bucket_totals[bucket] += _as_int(contribution.get("affected_items")) - - def _row_sort_key(row: Mapping[str, object]) -> tuple[int, int, int, str]: + row.kind_breakdown_ids[kind_key].add(group_id) + bucket_totals[bucket] += contribution.affected_items + + def _row_sort_key(row: _DirectoryBucketRow) -> tuple[int, int, int, str]: return ( - -len(cast(set[str], row["finding_ids"])), - -_as_int(row.get("affected_items")), - -len(cast(set[str], row["files"])), - str(row.get("path", "")), + -len(row.finding_ids), + -row.affected_items, + -len(row.files), + row.path, ) hotspots: dict[str, object] = {} @@ -326,11 +318,11 @@ def _row_sort_key(row: Mapping[str, object]) -> tuple[int, int, int, str]: total_affected_items = bucket_totals[bucket] items: list[dict[str, object]] = [] for row in bucket_items[:normalized_limit]: - finding_groups = len(cast(set[str], row["finding_ids"])) - affected_items = _as_int(row.get("affected_items")) - files = len(cast(set[str], row["files"])) + finding_groups = len(row.finding_ids) + affected_items = row.affected_items + files = len(row.files) item = { - "path": str(row.get("path", ".")), + "path": row.path, "finding_groups": finding_groups, "affected_items": affected_items, "files": files, @@ -340,13 +332,11 @@ def _row_sort_key(row: Mapping[str, object]) -> tuple[int, int, int, str]: ) if total_affected_items > 0 else 0.0, - "source_scope": source_scope_from_locations( - cast(list[dict[str, object]], row["locations"]) - ), + "source_scope": source_scope_from_locations(row.locations), } if bucket == "all": item["kind_breakdown"] = { - key: len(cast(dict[str, set[str]], row["kind_breakdown_ids"])[key]) + key: len(row.kind_breakdown_ids[key]) for key in _DIRECTORY_KIND_BREAKDOWN_KEYS } items.append(item) diff --git a/codeclone/report/renderers/sarif.py b/codeclone/report/renderers/sarif.py index bfa0bbc..17c8528 100644 --- a/codeclone/report/renderers/sarif.py +++ b/codeclone/report/renderers/sarif.py @@ -10,7 +10,7 @@ from collections.abc import Collection, Mapping, Sequence from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING import orjson @@ -194,7 +194,7 @@ def _artifact_catalog( } ) artifact_index_map = {path: index for index, path in enumerate(artifact_paths)} - artifacts = [ + artifacts: list[dict[str, object]] = [ { "location": { "uri": path, @@ -203,7 +203,7 @@ def _artifact_catalog( } for path in artifact_paths ] - return cast(list[dict[str, object]], artifacts), artifact_index_map + return artifacts, artifact_index_map def _clone_rule_spec(category: str) -> _RuleSpec: @@ -807,10 +807,10 @@ def _result_entry( group=group, primary_item=primary_item, ), - "properties": _result_properties(group), } + properties = _result_properties(group) + result["properties"] = properties if primary_item: - properties = cast(dict[str, object], result["properties"]) properties.update(_primary_location_properties(primary_item)) baseline_state = _baseline_state(group) if baseline_state: diff --git a/codeclone/surfaces/cli/attrs.py b/codeclone/surfaces/cli/attrs.py new file mode 100644 index 0000000..b450862 --- /dev/null +++ b/codeclone/surfaces/cli/attrs.py @@ -0,0 +1,42 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + + +def bool_attr(obj: object, name: str) -> bool: + return bool(getattr(obj, name, False)) + + +def int_attr(obj: object, name: str, default: int = 0) -> int: + value = getattr(obj, name, default) + if isinstance(value, bool): + return default + if isinstance(value, int): + return value + return default + + +def optional_text_attr(obj: object, name: str) -> str | None: + value = getattr(obj, name, None) + if value is None: + return None + if isinstance(value, Path): + return str(value) + if isinstance(value, str): + return value + return None + + +def text_attr(obj: object, name: str, default: str = "") -> str: + value = optional_text_attr(obj, name) + return default if value is None else value + + +def set_bool_attr(obj: object, name: str, value: bool) -> None: + setattr(obj, name, value) diff --git a/codeclone/surfaces/cli/baseline_state.py b/codeclone/surfaces/cli/baseline_state.py index a7bc631..2d6f5e3 100644 --- a/codeclone/surfaces/cli/baseline_state.py +++ b/codeclone/surfaces/cli/baseline_state.py @@ -10,7 +10,7 @@ from dataclasses import dataclass from json import JSONDecodeError from pathlib import Path -from typing import TYPE_CHECKING, Any, Protocol, cast +from typing import TYPE_CHECKING, Protocol import orjson @@ -36,8 +36,10 @@ ) from ...contracts.errors import BaselineValidationError from . import state as cli_state +from .types import CLIArgsLike, require_status_console if TYPE_CHECKING: + from ...core._types import AnalysisResult from ...models import GroupMapLike, ProjectMetrics __all__ = [ @@ -478,41 +480,39 @@ def _probe_metrics_baseline_section(path: Path) -> _MetricsBaselineSectionProbe: def _resolve_clone_baseline_state( *, - args: object, + args: CLIArgsLike, baseline_path: Path, baseline_exists: bool, - analysis: object, + analysis: AnalysisResult, shared_baseline_payload: dict[str, object] | None = None, ) -> _CloneBaselineState: - analysis_obj = cast("Any", analysis) return resolve_clone_baseline_state( - args=cast("Any", args), + args=args, baseline_path=baseline_path, baseline_exists=baseline_exists, - func_groups=analysis_obj.func_groups, - block_groups=analysis_obj.block_groups, + func_groups=analysis.func_groups, + block_groups=analysis.block_groups, codeclone_version=__version__, - console=cast("_PrinterLike", cli_state.get_console()), + console=require_status_console(cli_state.get_console()), shared_baseline_payload=shared_baseline_payload, ) def _resolve_metrics_baseline_state( *, - args: object, + args: CLIArgsLike, metrics_baseline_path: Path, metrics_baseline_exists: bool, baseline_updated_path: Path | None, - analysis: object, + analysis: AnalysisResult, shared_baseline_payload: dict[str, object] | None = None, ) -> _MetricsBaselineState: - analysis_obj = cast("Any", analysis) return resolve_metrics_baseline_state( - args=cast("Any", args), + args=args, metrics_baseline_path=metrics_baseline_path, metrics_baseline_exists=metrics_baseline_exists, baseline_updated_path=baseline_updated_path, - project_metrics=analysis_obj.project_metrics, - console=cast("_PrinterLike", cli_state.get_console()), + project_metrics=analysis.project_metrics, + console=require_status_console(cli_state.get_console()), shared_baseline_payload=shared_baseline_payload, ) diff --git a/codeclone/surfaces/cli/changed_scope.py b/codeclone/surfaces/cli/changed_scope.py index 5088744..4a47d23 100644 --- a/codeclone/surfaces/cli/changed_scope.py +++ b/codeclone/surfaces/cli/changed_scope.py @@ -10,14 +10,14 @@ import sys from collections.abc import Mapping, Sequence from pathlib import Path -from typing import Any, cast from ... import ui_messages as ui from ...contracts import ExitCode from ...utils import coerce as _coerce from ...utils.git_diff import validate_git_diff_ref from . import state as cli_state -from .types import ChangedCloneGate +from .attrs import bool_attr, optional_text_attr, set_bool_attr +from .types import ChangedCloneGate, require_status_console _as_mapping = _coerce.as_mapping _as_sequence = _coerce.as_sequence @@ -26,29 +26,30 @@ def _validate_changed_scope_args(*, args: object) -> str | None: - args_obj = cast("Any", args) - console = cast("Any", cli_state.get_console()) - if args_obj.diff_against and args_obj.paths_from_git_diff: + console = require_status_console(cli_state.get_console()) + diff_against = optional_text_attr(args, "diff_against") + paths_from_git_diff = optional_text_attr(args, "paths_from_git_diff") + if diff_against and paths_from_git_diff: console.print( ui.fmt_contract_error( "Use --diff-against or --paths-from-git-diff, not both." ) ) sys.exit(ExitCode.CONTRACT_ERROR) - if args_obj.paths_from_git_diff: - args_obj.changed_only = True - return str(args_obj.paths_from_git_diff) - if args_obj.diff_against and not args_obj.changed_only: + if paths_from_git_diff: + set_bool_attr(args, "changed_only", True) + return paths_from_git_diff + if diff_against and not bool_attr(args, "changed_only"): console.print(ui.fmt_contract_error("--diff-against requires --changed-only.")) sys.exit(ExitCode.CONTRACT_ERROR) - if args_obj.changed_only and not args_obj.diff_against: + if bool_attr(args, "changed_only") and not diff_against: console.print( ui.fmt_contract_error( "--changed-only requires --diff-against or --paths-from-git-diff." ) ) sys.exit(ExitCode.CONTRACT_ERROR) - return str(args_obj.diff_against) if args_obj.diff_against else None + return diff_against def _normalize_changed_paths( @@ -56,7 +57,7 @@ def _normalize_changed_paths( root_path: Path, paths: Sequence[str], ) -> tuple[str, ...]: - console = cast("Any", cli_state.get_console()) + console = require_status_console(cli_state.get_console()) normalized: set[str] = set() for raw_path in paths: candidate = raw_path.strip() @@ -92,7 +93,7 @@ def _normalize_changed_paths( def _git_diff_changed_paths(*, root_path: Path, git_diff_ref: str) -> tuple[str, ...]: - console = cast("Any", cli_state.get_console()) + console = require_status_console(cli_state.get_console()) try: validated_ref = validate_git_diff_ref(git_diff_ref) except ValueError as exc: diff --git a/codeclone/surfaces/cli/console.py b/codeclone/surfaces/cli/console.py index 422e7f2..9a7ffb5 100644 --- a/codeclone/surfaces/cli/console.py +++ b/codeclone/surfaces/cli/console.py @@ -13,12 +13,13 @@ from contextlib import AbstractContextManager, nullcontext from functools import lru_cache from pathlib import Path -from typing import TYPE_CHECKING, Any, Protocol, cast +from typing import TYPE_CHECKING from ... import __version__ from ... import ui_messages as ui from ...report.gates import reasons as gate_reasons from . import state as cli_state +from .types import CLIArgsLike, PrinterLike, StatusConsole, require_status_console if TYPE_CHECKING: from rich.console import Console as RichConsole @@ -40,10 +41,6 @@ _RICH_MARKUP_TAG_RE = re.compile(r"\[/?[a-zA-Z][a-zA-Z0-9_ .#:-]*]") -class _PrinterLike(Protocol): - def print(self, *objects: object, **kwargs: object) -> None: ... - - class PlainConsole: """Lightweight console for quiet/no-progress mode.""" @@ -112,7 +109,7 @@ def make_plain_console() -> PlainConsole: def _render_banner( *, - console: _PrinterLike, + console: PrinterLike, banner_title: str, project_name: str | None = None, root_display: str | None = None, @@ -131,12 +128,19 @@ def _render_banner( console.print(f" [dim]Root:[/dim] [dim]{root_display}[/dim]") -def _console() -> _PrinterLike: - return cast("_PrinterLike", cli_state.get_console()) +def _console() -> StatusConsole: + return require_status_console(cli_state.get_console()) -def _rich_progress_symbols() -> tuple[type[object], ...]: - return cast("tuple[type[object], ...]", rich_progress_symbols()) +def _rich_progress_symbols() -> tuple[ + type[RichProgress], + type[RichSpinnerColumn], + type[RichTextColumn], + type[RichBarColumn], + type[RichTimeElapsedColumn], +]: + progress, spinner, text, bar, elapsed = rich_progress_symbols() + return (progress, spinner, text, bar, elapsed) def _make_console(*, no_color: bool) -> object: @@ -155,18 +159,18 @@ def _print_gating_failure_block( *, code: str, entries: Sequence[tuple[str, object]], - args: object, + args: CLIArgsLike, ) -> None: gate_reasons.print_gating_failure_block( console=_console(), code=code, entries=list(entries), - args=cast("Any", args), + args=args, ) def _print_verbose_clone_hashes( - console: _PrinterLike, + console: PrinterLike, *, label: str, clone_hashes: set[str], diff --git a/codeclone/surfaces/cli/execution.py b/codeclone/surfaces/cli/execution.py index d243392..08ec6e4 100644 --- a/codeclone/surfaces/cli/execution.py +++ b/codeclone/surfaces/cli/execution.py @@ -8,9 +8,18 @@ import sys import time +from collections.abc import Callable from dataclasses import replace from pathlib import Path -from typing import Any, Protocol, cast + +from rich.console import Console as RichConsole +from rich.progress import ( + BarColumn, + Progress, + SpinnerColumn, + TextColumn, + TimeElapsedColumn, +) from ... import ui_messages as ui from ...cache.store import Cache @@ -18,12 +27,12 @@ from ...contracts.errors import CacheError from ...core._types import AnalysisResult, BootstrapResult, DiscoveryResult from ...core._types import ProcessingResult as PipelineProcessingResult +from ...core.reporting import GatingResult +from ...models import MetricsDiff from . import state as cli_state +from .attrs import bool_attr from .console import PlainConsole - - -class _PrinterLike(Protocol): - def print(self, *objects: object, **kwargs: object) -> None: ... +from .types import require_status_console def run_analysis_stages( @@ -31,25 +40,35 @@ def run_analysis_stages( args: object, boot: BootstrapResult, cache: Cache, - discover_fn: Any, - process_fn: Any, - analyze_fn: Any, - print_failed_files_fn: Any, - cache_update_segment_projection_fn: Any, - rich_progress_symbols_fn: Any, + discover_fn: Callable[..., DiscoveryResult], + process_fn: Callable[..., PipelineProcessingResult], + analyze_fn: Callable[..., AnalysisResult], + print_failed_files_fn: Callable[[tuple[str, ...]], None], + cache_update_segment_projection_fn: Callable[[Cache, AnalysisResult], None], + rich_progress_symbols_fn: Callable[ + [], + tuple[ + type[Progress], + type[SpinnerColumn], + type[TextColumn], + type[BarColumn], + type[TimeElapsedColumn], + ], + ], ) -> tuple[DiscoveryResult, PipelineProcessingResult, AnalysisResult]: - def _require_rich_console(value: object) -> object: + def _require_rich_console(value: object) -> RichConsole: if isinstance(value, PlainConsole): raise RuntimeError("Rich console is required when progress UI is enabled.") + if not isinstance(value, RichConsole): + raise RuntimeError("Rich console is required when progress UI is enabled.") return value - args_obj = cast("Any", args) - printer = cast("_PrinterLike", cli_state.get_console()) - use_status = not args_obj.quiet and not args_obj.no_progress + printer = require_status_console(cli_state.get_console()) + use_status = not bool_attr(args, "quiet") and not bool_attr(args, "no_progress") try: if use_status: - with cast("Any", printer).status(ui.STATUS_DISCOVERING, spinner="dots"): + with printer.status(ui.STATUS_DISCOVERING, spinner="dots"): discovery_result = discover_fn(boot=boot, cache=cache) else: discovery_result = discover_fn(boot=boot, cache=cache) @@ -61,10 +80,14 @@ def _require_rich_console(value: object) -> object: printer.print(f"[warning]{warning}[/warning]") total_files = len(discovery_result.files_to_process) - if total_files > 0 and not args_obj.quiet and args_obj.no_progress: + if ( + total_files > 0 + and not bool_attr(args, "quiet") + and bool_attr(args, "no_progress") + ): printer.print(ui.fmt_processing_changed(total_files)) - if total_files > 0 and not args_obj.no_progress: + if total_files > 0 and not bool_attr(args, "no_progress"): ( progress_cls, spinner_column_cls, @@ -73,19 +96,15 @@ def _require_rich_console(value: object) -> object: time_elapsed_column_cls, ) = rich_progress_symbols_fn() - progress_factory = cast("Any", progress_cls) - with progress_factory( - cast("Any", spinner_column_cls)(), - cast("Any", text_column_cls)("[progress.description]{task.description}"), - cast("Any", bar_column_cls)(), - cast("Any", text_column_cls)( - "[progress.percentage]{task.percentage:>3.0f}%" - ), - cast("Any", time_elapsed_column_cls)(), + with progress_cls( + spinner_column_cls(), + text_column_cls("[progress.description]{task.description}"), + bar_column_cls(), + text_column_cls("[progress.percentage]{task.percentage:>3.0f}%"), + time_elapsed_column_cls(), console=_require_rich_console(cli_state.get_console()), ) as progress_ui: - progress_ui_any = cast("Any", progress_ui) - task_id = progress_ui_any.add_task( + task_id = progress_ui.add_task( f"Analyzing {total_files} files...", total=total_files, ) @@ -93,7 +112,7 @@ def _require_rich_console(value: object) -> object: boot=boot, discovery=discovery_result, cache=cache, - on_advance=lambda: progress_ui_any.advance(task_id), + on_advance=lambda: progress_ui.advance(task_id), on_worker_error=lambda reason: printer.print( ui.fmt_worker_failed(reason) ), @@ -108,7 +127,7 @@ def _require_rich_console(value: object) -> object: cache=cache, on_worker_error=( (lambda reason: printer.print(ui.fmt_batch_item_failed(reason))) - if args_obj.no_progress + if bool_attr(args, "no_progress") else (lambda reason: printer.print(ui.fmt_worker_failed(reason))) ), on_parallel_fallback=lambda exc: printer.print( @@ -121,7 +140,7 @@ def _require_rich_console(value: object) -> object: print_failed_files_fn(tuple(processing_result.source_read_failures)) if use_status: - with cast("Any", printer).status(ui.STATUS_GROUPING, spinner="dots"): + with printer.status(ui.STATUS_GROUPING, spinner="dots"): analysis_result = analyze_fn( boot=boot, discovery=discovery_result, @@ -166,16 +185,15 @@ def enforce_gating( metrics_baseline_failure_code: ExitCode | None, new_func: set[str], new_block: set[str], - metrics_diff: object | None, + metrics_diff: MetricsDiff | None, html_report_path: str | None, - gate_fn: Any, - parse_metric_reason_entry_fn: Any, - print_gating_failure_block_fn: Any, - print_verbose_clone_hashes_fn: Any, + gate_fn: Callable[..., GatingResult], + parse_metric_reason_entry_fn: Callable[[str], tuple[str, str]], + print_gating_failure_block_fn: Callable[..., None], + print_verbose_clone_hashes_fn: Callable[..., None], clone_threshold_total: int | None = None, ) -> None: - args_obj = cast("Any", args) - printer = cast("_PrinterLike", cli_state.get_console()) + printer = require_status_console(cli_state.get_console()) if source_read_contract_failure: printer.print( @@ -203,7 +221,7 @@ def enforce_gating( ) sys.exit(metrics_baseline_failure_code) - if bool(getattr(args_obj, "fail_on_untested_hotspots", False)): + if bool_attr(args, "fail_on_untested_hotspots"): if analysis.coverage_join is None: printer.print( ui.fmt_contract_error( @@ -238,7 +256,7 @@ def enforce_gating( analysis=gating_analysis, new_func=new_func, new_block=new_block, - metrics_diff=cast("Any", metrics_diff), + metrics_diff=metrics_diff, ) metric_reasons = [ @@ -250,7 +268,7 @@ def enforce_gating( print_gating_failure_block_fn( code="metrics", entries=[parse_metric_reason_entry_fn(reason) for reason in metric_reasons], - args=args_obj, + args=args, ) sys.exit(ExitCode.GATING_FAILURE) @@ -270,10 +288,10 @@ def enforce_gating( print_gating_failure_block_fn( code="new-clones", entries=clone_entries, - args=args_obj, + args=args, ) - if args_obj.verbose: + if bool_attr(args, "verbose"): print_verbose_clone_hashes_fn( printer, label="Function clone hashes", @@ -303,16 +321,15 @@ def enforce_gating( ("clone_groups_total", int(total_raw)), ("clone_groups_limit", int(threshold_raw)), ), - args=args_obj, + args=args, ) sys.exit(ExitCode.GATING_FAILURE) def print_pipeline_done_if_needed(*, args: object, run_started_at: float) -> None: - args_obj = cast("Any", args) - if args_obj.quiet: + if bool_attr(args, "quiet"): return elapsed = time.monotonic() - run_started_at - printer = cast("_PrinterLike", cli_state.get_console()) + printer = require_status_console(cli_state.get_console()) printer.print() printer.print(ui.fmt_pipeline_done(elapsed)) diff --git a/codeclone/surfaces/cli/main.py b/codeclone/surfaces/cli/main.py deleted file mode 100644 index ce049c3..0000000 --- a/codeclone/surfaces/cli/main.py +++ /dev/null @@ -1,14 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -from .workflow import main - -__all__ = ["main"] - -if __name__ == "__main__": - main() diff --git a/codeclone/surfaces/cli/post_run.py b/codeclone/surfaces/cli/post_run.py index 27ae1ee..1662ce3 100644 --- a/codeclone/surfaces/cli/post_run.py +++ b/codeclone/surfaces/cli/post_run.py @@ -6,17 +6,18 @@ from __future__ import annotations -from collections.abc import Collection, Mapping +from collections.abc import Callable, Collection, Mapping from dataclasses import dataclass from pathlib import Path -from typing import Any, cast from ... import ui_messages as ui from ...baseline import Baseline from ...core._types import AnalysisResult +from ...models import MetricsDiff from .baseline_state import CloneBaselineState, MetricsBaselineState from .changed_scope import ChangedCloneGate from .summary import ChangedScopeSnapshot +from .types import CLIArgsLike, PrinterLike @dataclass(frozen=True, slots=True) @@ -24,7 +25,7 @@ class DiffContext: new_func: set[str] new_block: set[str] new_clones_count: int - metrics_diff: object | None + metrics_diff: MetricsDiff | None coverage_adoption_diff_available: bool api_surface_diff_available: bool @@ -71,19 +72,19 @@ def build_diff_context( def print_metrics_if_available( *, - args: object, + args: CLIArgsLike, analysis: AnalysisResult, - metrics_diff: object | None, + metrics_diff: MetricsDiff | None, api_surface_diff_available: bool, - console: Any, - build_metrics_snapshot_fn: Any, - print_metrics_fn: Any, + console: PrinterLike, + build_metrics_snapshot_fn: Callable[..., object], + print_metrics_fn: Callable[..., None], ) -> None: if analysis.project_metrics is None: return print_metrics_fn( console=console, - quiet=bool(cast("Any", args).quiet), + quiet=args.quiet, metrics=build_metrics_snapshot_fn( analysis_result=analysis, metrics_diff=metrics_diff, @@ -94,34 +95,31 @@ def print_metrics_if_available( def resolve_changed_clone_gate( *, - args: object, + args: CLIArgsLike, report_document: Mapping[str, object] | None, changed_paths: Collection[str], - changed_clone_gate_from_report_fn: Any, + changed_clone_gate_from_report_fn: Callable[..., ChangedCloneGate], ) -> ChangedCloneGate | None: - if not cast("Any", args).changed_only or report_document is None: + if not args.changed_only or report_document is None: return None - return cast( - "ChangedCloneGate", - changed_clone_gate_from_report_fn( - report_document, - changed_paths=tuple(changed_paths), - ), + return changed_clone_gate_from_report_fn( + report_document, + changed_paths=tuple(changed_paths), ) def maybe_print_changed_scope_snapshot( *, - args: object, + args: CLIArgsLike, changed_clone_gate: ChangedCloneGate | None, - console: Any, - print_changed_scope_fn: Any, + console: PrinterLike, + print_changed_scope_fn: Callable[..., None], ) -> None: if changed_clone_gate is None: return print_changed_scope_fn( console=console, - quiet=bool(cast("Any", args).quiet), + quiet=args.quiet, changed_scope=ChangedScopeSnapshot( paths_count=len(changed_clone_gate.changed_paths), findings_total=changed_clone_gate.findings_total, @@ -133,11 +131,10 @@ def maybe_print_changed_scope_snapshot( def warn_new_clones_without_fail( *, - args: object, + args: CLIArgsLike, notice_new_clones_count: int, - console: Any, + console: PrinterLike, ) -> None: - args_obj = cast("Any", args) - if args_obj.update_baseline or args_obj.fail_on_new or notice_new_clones_count <= 0: + if args.update_baseline or args.fail_on_new or notice_new_clones_count <= 0: return console.print(ui.WARN_NEW_CLONES_WITHOUT_FAIL) diff --git a/codeclone/surfaces/cli/report_meta.py b/codeclone/surfaces/cli/report_meta.py index 43b7c98..ae195e9 100644 --- a/codeclone/surfaces/cli/report_meta.py +++ b/codeclone/surfaces/cli/report_meta.py @@ -8,7 +8,7 @@ import sys from datetime import datetime, timezone -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING from ...baseline.clone_baseline import Baseline from ...baseline.trust import current_python_tag @@ -19,6 +19,7 @@ DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, ) from ...contracts.schemas import ReportMeta +from .types import CLIArgsLike if TYPE_CHECKING: from pathlib import Path @@ -142,12 +143,11 @@ def build_cli_report_meta( metrics_baseline_path: Path, metrics_baseline_state: MetricsBaselineState, analysis_result: AnalysisResult, - args: object, + args: CLIArgsLike, metrics_computed: tuple[str, ...], analysis_started_at_utc: str | None, report_generated_at_utc: str, ) -> ReportMeta: - args_obj = cast("Any", args) project_metrics = analysis_result.project_metrics return _build_report_meta( codeclone_version=codeclone_version, @@ -167,14 +167,14 @@ def build_cli_report_meta( metrics_baseline_status=metrics_baseline_state.status.value, health_score=(project_metrics.health.total if project_metrics else None), health_grade=(project_metrics.health.grade if project_metrics else None), - analysis_mode=("clones_only" if args_obj.skip_metrics else "full"), + analysis_mode=("clones_only" if args.skip_metrics else "full"), metrics_computed=metrics_computed, - min_loc=args_obj.min_loc, - min_stmt=args_obj.min_stmt, - block_min_loc=args_obj.block_min_loc, - block_min_stmt=args_obj.block_min_stmt, - segment_min_loc=args_obj.segment_min_loc, - segment_min_stmt=args_obj.segment_min_stmt, + min_loc=args.min_loc, + min_stmt=args.min_stmt, + block_min_loc=args.block_min_loc, + block_min_stmt=args.block_min_stmt, + segment_min_loc=args.segment_min_loc, + segment_min_stmt=args.segment_min_stmt, analysis_started_at_utc=analysis_started_at_utc, report_generated_at_utc=report_generated_at_utc, ) diff --git a/codeclone/surfaces/cli/reports_output.py b/codeclone/surfaces/cli/reports_output.py index 350893b..0cc417b 100644 --- a/codeclone/surfaces/cli/reports_output.py +++ b/codeclone/surfaces/cli/reports_output.py @@ -10,16 +10,20 @@ import webbrowser from collections.abc import Callable, Mapping, Sequence from pathlib import Path -from typing import Any, Protocol, cast +from typing import Protocol from ... import ui_messages as ui from ...contracts import ExitCode from . import state as cli_state -from .types import OutputPaths, ReportPathOrigin - - -class _PrinterLike(Protocol): - def print(self, *objects: object, **kwargs: object) -> None: ... +from .attrs import bool_attr, optional_text_attr +from .types import ( + CLIArgsLike, + OutputPaths, + PrinterLike, + ReportArtifacts, + ReportPathOrigin, + require_status_console, +) class _QuietArgs(Protocol): @@ -41,7 +45,7 @@ def _write_report_output( out: Path, content: str, label: str, - console: _PrinterLike, + console: PrinterLike, ) -> None: try: out.parent.mkdir(parents=True, exist_ok=True) @@ -63,9 +67,9 @@ def _open_html_report_in_browser(*, path: Path) -> None: def write_report_outputs( *, args: _QuietArgs, - output_paths: object, - report_artifacts: object, - console: _PrinterLike, + output_paths: OutputPaths, + report_artifacts: ReportArtifacts, + console: PrinterLike, open_html_report: bool = False, ) -> str | None: html_report_path: str | None = None @@ -156,7 +160,7 @@ def _validate_output_path( *, expected_suffix: str, label: str, - console: _PrinterLike, + console: PrinterLike, invalid_message: Callable[..., str], invalid_path_message: Callable[..., str], ) -> Path: @@ -233,13 +237,12 @@ def _timestamped_report_path(path: Path, *, report_generated_at_utc: str) -> Pat def _resolve_output_paths( - args: object, + args: CLIArgsLike, *, report_path_origins: Mapping[str, ReportPathOrigin | None], report_generated_at_utc: str, ) -> OutputPaths: - args_obj = cast("Any", args) - printer = cast("_PrinterLike", cli_state.get_console()) + printer = require_status_console(cli_state.get_console()) resolved: dict[str, Path | None] = { "html": None, "json": None, @@ -256,7 +259,7 @@ def _resolve_output_paths( ) for field_name, arg_name, expected_suffix, label in output_specs: - raw_value = getattr(args_obj, arg_name, None) + raw_value = optional_text_attr(args, arg_name) if not raw_value: continue path = _validate_output_path( @@ -268,7 +271,7 @@ def _resolve_output_paths( invalid_path_message=ui.fmt_invalid_output_path, ) if ( - args_obj.timestamped_report_paths + args.timestamped_report_paths and report_path_origins.get(field_name) == "default" ): path = _timestamped_report_path( @@ -287,13 +290,12 @@ def _resolve_output_paths( def _validate_report_ui_flags(*, args: object, output_paths: OutputPaths) -> None: - args_obj = cast("Any", args) - console = cast("_PrinterLike", cli_state.get_console()) - if args_obj.open_html_report and output_paths.html is None: + console = require_status_console(cli_state.get_console()) + if bool_attr(args, "open_html_report") and output_paths.html is None: console.print(ui.fmt_contract_error(ui.ERR_OPEN_HTML_REPORT_REQUIRES_HTML)) sys.exit(ExitCode.CONTRACT_ERROR) - if args_obj.timestamped_report_paths and not any( + if bool_attr(args, "timestamped_report_paths") and not any( ( output_paths.html, output_paths.json, @@ -310,15 +312,15 @@ def _validate_report_ui_flags(*, args: object, output_paths: OutputPaths) -> Non def _write_report_outputs( *, - args: object, + args: CLIArgsLike, output_paths: OutputPaths, - report_artifacts: object, + report_artifacts: ReportArtifacts, open_html_report: bool = False, ) -> str | None: return write_report_outputs( - args=cast("Any", args), + args=args, output_paths=output_paths, report_artifacts=report_artifacts, - console=cast("_PrinterLike", cli_state.get_console()), + console=require_status_console(cli_state.get_console()), open_html_report=open_html_report, ) diff --git a/codeclone/surfaces/cli/runtime.py b/codeclone/surfaces/cli/runtime.py index 7c76594..ac346d3 100644 --- a/codeclone/surfaces/cli/runtime.py +++ b/codeclone/surfaces/cli/runtime.py @@ -8,44 +8,15 @@ import sys from pathlib import Path -from typing import Any, Protocol, cast +from typing import Protocol from ... import ui_messages as ui from ...cache.store import Cache from ...cache.versioning import CacheStatus from ...contracts import ExitCode from . import state as cli_state - - -class _RuntimeArgs(Protocol): - cache_path: str | None - coverage_xml: str | None - max_baseline_size_mb: int - max_cache_size_mb: int - fail_threshold: int - fail_complexity: int - fail_coupling: int - fail_cohesion: int - fail_health: int - fail_on_new_metrics: bool - fail_on_typing_regression: bool - fail_on_docstring_regression: bool - fail_on_api_break: bool - fail_on_untested_hotspots: bool - min_typing_coverage: int - min_docstring_coverage: int - coverage_min: int - api_surface: bool - update_metrics_baseline: bool - skip_metrics: bool - fail_cycles: bool - fail_dead_code: bool - skip_dead_code: bool - skip_dependencies: bool - - -class _PrinterLike(Protocol): - def print(self, *objects: object, **kwargs: object) -> None: ... +from .attrs import bool_attr, int_attr, optional_text_attr, set_bool_attr +from .types import PrinterLike, require_status_console class _CacheLike(Protocol): @@ -59,56 +30,56 @@ def load_warning(self) -> str | None: ... def cache_schema_version(self) -> str | None: ... -def validate_numeric_args(args: _RuntimeArgs) -> bool: +def validate_numeric_args(args: object) -> bool: return bool( not ( - args.max_baseline_size_mb < 0 - or args.max_cache_size_mb < 0 - or args.fail_threshold < -1 - or args.fail_complexity < -1 - or args.fail_coupling < -1 - or args.fail_cohesion < -1 - or args.fail_health < -1 - or args.min_typing_coverage < -1 - or args.min_typing_coverage > 100 - or args.min_docstring_coverage < -1 - or args.min_docstring_coverage > 100 - or args.coverage_min < 0 - or args.coverage_min > 100 + int_attr(args, "max_baseline_size_mb") < 0 + or int_attr(args, "max_cache_size_mb") < 0 + or int_attr(args, "fail_threshold", -1) < -1 + or int_attr(args, "fail_complexity", -1) < -1 + or int_attr(args, "fail_coupling", -1) < -1 + or int_attr(args, "fail_cohesion", -1) < -1 + or int_attr(args, "fail_health", -1) < -1 + or int_attr(args, "min_typing_coverage", -1) < -1 + or int_attr(args, "min_typing_coverage", -1) > 100 + or int_attr(args, "min_docstring_coverage", -1) < -1 + or int_attr(args, "min_docstring_coverage", -1) > 100 + or int_attr(args, "coverage_min") < 0 + or int_attr(args, "coverage_min") > 100 ) ) -def _metrics_flags_requested(args: _RuntimeArgs) -> bool: +def _metrics_flags_requested(args: object) -> bool: return bool( - args.fail_complexity >= 0 - or args.fail_coupling >= 0 - or args.fail_cohesion >= 0 - or args.fail_cycles - or args.fail_dead_code - or args.fail_health >= 0 - or args.fail_on_new_metrics - or args.fail_on_typing_regression - or args.fail_on_docstring_regression - or args.fail_on_api_break - or args.fail_on_untested_hotspots - or args.min_typing_coverage >= 0 - or args.min_docstring_coverage >= 0 - or args.api_surface - or args.update_metrics_baseline - or bool(getattr(args, "coverage_xml", None)) + int_attr(args, "fail_complexity", -1) >= 0 + or int_attr(args, "fail_coupling", -1) >= 0 + or int_attr(args, "fail_cohesion", -1) >= 0 + or bool_attr(args, "fail_cycles") + or bool_attr(args, "fail_dead_code") + or int_attr(args, "fail_health", -1) >= 0 + or bool_attr(args, "fail_on_new_metrics") + or bool_attr(args, "fail_on_typing_regression") + or bool_attr(args, "fail_on_docstring_regression") + or bool_attr(args, "fail_on_api_break") + or bool_attr(args, "fail_on_untested_hotspots") + or int_attr(args, "min_typing_coverage", -1) >= 0 + or int_attr(args, "min_docstring_coverage", -1) >= 0 + or bool_attr(args, "api_surface") + or bool_attr(args, "update_metrics_baseline") + or bool(optional_text_attr(args, "coverage_xml")) ) def configure_metrics_mode( *, - args: _RuntimeArgs, + args: object, metrics_baseline_exists: bool, - console: _PrinterLike, + console: PrinterLike, ) -> None: metrics_flags_requested = _metrics_flags_requested(args) - if args.skip_metrics and metrics_flags_requested: + if bool_attr(args, "skip_metrics") and metrics_flags_requested: console.print( ui.fmt_contract_error( "--skip-metrics cannot be used together with metrics gating/update " @@ -118,35 +89,36 @@ def configure_metrics_mode( sys.exit(ExitCode.CONTRACT_ERROR) if ( - not args.skip_metrics + not bool_attr(args, "skip_metrics") and not metrics_flags_requested and not metrics_baseline_exists ): - args.skip_metrics = True + set_bool_attr(args, "skip_metrics", True) - if args.skip_metrics: - args.skip_dead_code = True - args.skip_dependencies = True + if bool_attr(args, "skip_metrics"): + set_bool_attr(args, "skip_dead_code", True) + set_bool_attr(args, "skip_dependencies", True) return - if args.fail_dead_code: - args.skip_dead_code = False - if args.fail_cycles: - args.skip_dependencies = False - if bool(getattr(args, "fail_on_api_break", False)): - args.api_surface = True + if bool_attr(args, "fail_dead_code"): + set_bool_attr(args, "skip_dead_code", False) + if bool_attr(args, "fail_cycles"): + set_bool_attr(args, "skip_dependencies", False) + if bool_attr(args, "fail_on_api_break"): + set_bool_attr(args, "api_surface", True) def resolve_cache_path( *, root_path: Path, - args: _RuntimeArgs, + args: object, from_args: bool, legacy_cache_path: Path, - console: _PrinterLike, + console: PrinterLike, ) -> Path: - if from_args and args.cache_path: - return Path(args.cache_path).expanduser() + cache_path_arg = optional_text_attr(args, "cache_path") + if from_args and cache_path_arg: + return Path(cache_path_arg).expanduser() cache_path = root_path / ".cache" / "codeclone" / "cache.json" if legacy_cache_path.exists(): @@ -164,19 +136,19 @@ def resolve_cache_path( return cache_path -def metrics_computed(args: _RuntimeArgs) -> tuple[str, ...]: - if args.skip_metrics: +def metrics_computed(args: object) -> tuple[str, ...]: + if bool_attr(args, "skip_metrics"): return () computed = ["complexity", "coupling", "cohesion", "health"] - if not args.skip_dependencies: + if not bool_attr(args, "skip_dependencies"): computed.append("dependencies") - if not args.skip_dead_code: + if not bool_attr(args, "skip_dead_code"): computed.append("dead_code") computed.append("coverage_adoption") - if bool(getattr(args, "api_surface", False)): + if bool_attr(args, "api_surface"): computed.append("api_surface") - if bool(getattr(args, "coverage_xml", None)): + if bool(optional_text_attr(args, "coverage_xml")): computed.append("coverage_join") return tuple(computed) @@ -220,54 +192,54 @@ def prepare_metrics_mode_and_ui( baseline_exists: bool, metrics_baseline_path: Path, metrics_baseline_exists: bool, - configure_metrics_mode: Any, - print_banner: Any, + configure_metrics_mode: object, + print_banner: object, ) -> None: - args_obj = cast("Any", args) if ( - args_obj.update_baseline - and not args_obj.skip_metrics - and not args_obj.update_metrics_baseline + bool_attr(args, "update_baseline") + and not bool_attr(args, "skip_metrics") + and not bool_attr(args, "update_metrics_baseline") ): - args_obj.update_metrics_baseline = True - configure_metrics_mode( - args=args_obj, - metrics_baseline_exists=metrics_baseline_exists, - ) + set_bool_attr(args, "update_metrics_baseline", True) + if callable(configure_metrics_mode): + configure_metrics_mode( + args=args, + metrics_baseline_exists=metrics_baseline_exists, + ) if ( - args_obj.update_metrics_baseline + bool_attr(args, "update_metrics_baseline") and metrics_baseline_path == baseline_path and not baseline_exists - and not args_obj.update_baseline + and not bool_attr(args, "update_baseline") ): - args_obj.update_baseline = True - if args_obj.quiet: - args_obj.no_progress = True + set_bool_attr(args, "update_baseline", True) + if bool_attr(args, "quiet"): + set_bool_attr(args, "no_progress", True) return - print_banner(root=root_path) + if callable(print_banner): + print_banner(root=root_path) def gating_mode_enabled(args: object) -> bool: - args_obj = cast("Any", args) return bool( - args_obj.fail_on_new - or args_obj.fail_threshold >= 0 - or args_obj.fail_complexity >= 0 - or args_obj.fail_coupling >= 0 - or args_obj.fail_cohesion >= 0 - or args_obj.fail_cycles - or args_obj.fail_dead_code - or args_obj.fail_health >= 0 - or args_obj.fail_on_new_metrics - or args_obj.fail_on_typing_regression - or args_obj.fail_on_docstring_regression - or args_obj.fail_on_api_break - or args_obj.min_typing_coverage >= 0 - or args_obj.min_docstring_coverage >= 0 + bool_attr(args, "fail_on_new") + or int_attr(args, "fail_threshold", -1) >= 0 + or int_attr(args, "fail_complexity", -1) >= 0 + or int_attr(args, "fail_coupling", -1) >= 0 + or int_attr(args, "fail_cohesion", -1) >= 0 + or bool_attr(args, "fail_cycles") + or bool_attr(args, "fail_dead_code") + or int_attr(args, "fail_health", -1) >= 0 + or bool_attr(args, "fail_on_new_metrics") + or bool_attr(args, "fail_on_typing_regression") + or bool_attr(args, "fail_on_docstring_regression") + or bool_attr(args, "fail_on_api_break") + or int_attr(args, "min_typing_coverage", -1) >= 0 + or int_attr(args, "min_docstring_coverage", -1) >= 0 ) -def print_failed_files(*, failed_files: tuple[str, ...], console: _PrinterLike) -> None: +def print_failed_files(*, failed_files: tuple[str, ...], console: PrinterLike) -> None: if not failed_files: return console.print(ui.fmt_failed_files_header(len(failed_files))) @@ -277,37 +249,46 @@ def print_failed_files(*, failed_files: tuple[str, ...], console: _PrinterLike) console.print(f" ... and {len(failed_files) - 10} more") -def _resolve_cache_path(*, root_path: Path, args: object, from_args: bool) -> Path: +def _resolve_cache_path( + *, + root_path: Path, + args: object, + from_args: bool, +) -> Path: return resolve_cache_path( root_path=root_path, - args=cast("Any", args), + args=args, from_args=from_args, legacy_cache_path=cli_state.LEGACY_CACHE_PATH, - console=cast("_PrinterLike", cli_state.get_console()), + console=require_status_console(cli_state.get_console()), ) def _validate_numeric_args(args: object) -> bool: - return validate_numeric_args(cast("Any", args)) + return validate_numeric_args(args) -def _configure_metrics_mode(*, args: object, metrics_baseline_exists: bool) -> None: +def _configure_metrics_mode( + *, + args: object, + metrics_baseline_exists: bool, +) -> None: configure_metrics_mode( - args=cast("Any", args), + args=args, metrics_baseline_exists=metrics_baseline_exists, - console=cast("_PrinterLike", cli_state.get_console()), + console=require_status_console(cli_state.get_console()), ) def _print_failed_files(failed_files: tuple[str, ...] | list[str]) -> None: print_failed_files( failed_files=tuple(failed_files), - console=cast("_PrinterLike", cli_state.get_console()), + console=require_status_console(cli_state.get_console()), ) def _metrics_computed(args: object) -> tuple[str, ...]: - return metrics_computed(cast("Any", args)) + return metrics_computed(args) def _resolve_cache_status(cache: Cache) -> tuple[CacheStatus, str | None]: diff --git a/codeclone/surfaces/cli/startup.py b/codeclone/surfaces/cli/startup.py index 51062ca..8cd145c 100644 --- a/codeclone/surfaces/cli/startup.py +++ b/codeclone/surfaces/cli/startup.py @@ -7,13 +7,17 @@ from __future__ import annotations import os +from collections.abc import Callable from dataclasses import dataclass from pathlib import Path -from typing import Any, NoReturn, cast +from typing import NoReturn from ... import ui_messages as ui from ...config.pyproject_loader import ConfigValidationError from ...contracts import ExitCode +from .attrs import text_attr +from .baseline_state import MetricsBaselineSectionProbe +from .types import CLIArgsLike, ParserWithDefaults, StatusConsole @dataclass(frozen=True, slots=True) @@ -40,7 +44,7 @@ def resolve_runtime_path_arg( def exit_contract_error( message: str, *, - printer: Any, + printer: StatusConsole, cause: BaseException | None = None, ) -> NoReturn: printer.print(ui.fmt_contract_error(message)) @@ -49,9 +53,9 @@ def exit_contract_error( raise SystemExit(ExitCode.CONTRACT_ERROR) from cause -def resolve_existing_root_path(*, args: object, printer: Any) -> Path: +def resolve_existing_root_path(*, args: object, printer: StatusConsole) -> Path: try: - root_path = Path(cast("Any", args).root).resolve() + root_path = Path(text_attr(args, "root", ".")).resolve() except OSError as exc: exit_contract_error( ui.ERR_INVALID_ROOT_PATH.format(error=exc), @@ -69,37 +73,33 @@ def resolve_existing_root_path(*, args: object, printer: Any) -> Path: def load_pyproject_config_or_exit( *, root_path: Path, - load_pyproject_config_fn: Any, - printer: Any, + load_pyproject_config_fn: Callable[[Path], dict[str, object]], + printer: StatusConsole, ) -> dict[str, object]: try: - return cast("dict[str, object]", load_pyproject_config_fn(root_path)) + return load_pyproject_config_fn(root_path) except ConfigValidationError as exc: exit_contract_error(str(exc), printer=printer, cause=exc) -def configure_runtime_flags(args: object) -> None: - args_obj = cast("Any", args) - if args_obj.debug: +def configure_runtime_flags(args: CLIArgsLike) -> None: + if args.debug: os.environ["CODECLONE_DEBUG"] = "1" - if args_obj.ci: - args_obj.fail_on_new = True - args_obj.no_color = True - args_obj.quiet = True + if args.ci: + args.fail_on_new = True + args.no_color = True + args.quiet = True def configure_runtime_console( *, - args: object, - make_plain_console: Any, - make_console: Any, - set_console: Any, + args: CLIArgsLike, + make_plain_console: Callable[[], object], + make_console: Callable[..., object], + set_console: Callable[[object], None], ) -> object: - args_obj = cast("Any", args) console = ( - make_plain_console() - if args_obj.quiet - else make_console(no_color=args_obj.no_color) + make_plain_console() if args.quiet else make_console(no_color=args.no_color) ) set_console(console) return console @@ -107,9 +107,9 @@ def configure_runtime_console( def validate_numeric_args_or_exit( *, - args: object, - validate_numeric_args_fn: Any, - printer: Any, + args: CLIArgsLike, + validate_numeric_args_fn: Callable[[CLIArgsLike], bool], + printer: StatusConsole, ) -> None: if validate_numeric_args_fn(args): return @@ -123,22 +123,19 @@ def validate_numeric_args_or_exit( def resolve_baseline_inputs( *, - ap: object, - args: object, + ap: ParserWithDefaults, + args: CLIArgsLike, root_path: Path, baseline_path_from_args: bool, metrics_path_from_args: bool, - probe_metrics_baseline_section_fn: Any, - printer: Any, + probe_metrics_baseline_section_fn: Callable[[Path], MetricsBaselineSectionProbe], + printer: StatusConsole, ) -> ResolvedBaselineInputs: - args_obj = cast("Any", args) - ap_obj = cast("Any", ap) - - baseline_arg_path = Path(args_obj.baseline).expanduser() + baseline_arg_path = Path(text_attr(args, "baseline")).expanduser() try: baseline_path = resolve_runtime_path_arg( root_path=root_path, - raw_path=args_obj.baseline, + raw_path=text_attr(args, "baseline"), from_cli=baseline_path_from_args, ) baseline_exists = baseline_path.exists() @@ -150,12 +147,15 @@ def resolve_baseline_inputs( ) shared_baseline_payload: dict[str, object] | None = None - default_metrics_baseline = ap_obj.get_default("metrics_baseline") + default_metrics_baseline = ap.get_default("metrics_baseline") + metrics_baseline_value = text_attr(args, "metrics_baseline") metrics_path_overridden = metrics_path_from_args or ( - args_obj.metrics_baseline != default_metrics_baseline + metrics_baseline_value != str(default_metrics_baseline) ) metrics_baseline_raw_path = ( - args_obj.metrics_baseline if metrics_path_overridden else args_obj.baseline + metrics_baseline_value + if metrics_path_overridden + else text_attr(args, "baseline") ) metrics_baseline_arg_path = Path(metrics_baseline_raw_path).expanduser() try: diff --git a/codeclone/surfaces/cli/summary.py b/codeclone/surfaces/cli/summary.py index f94be07..f0c14aa 100644 --- a/codeclone/surfaces/cli/summary.py +++ b/codeclone/surfaces/cli/summary.py @@ -7,9 +7,11 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any, Protocol +from typing import Protocol from ... import ui_messages as ui +from ...core._types import AnalysisResult, DiscoveryResult, ProcessingResult +from ...models import MetricsDiff from ...utils import coerce as _coerce _as_int = _coerce.as_int @@ -65,8 +67,8 @@ def print(self, *objects: object, **kwargs: object) -> None: ... def build_summary_counts( *, - discovery_result: Any, - processing_result: Any, + discovery_result: DiscoveryResult, + processing_result: ProcessingResult, ) -> dict[str, int]: return { "analyzed_lines": processing_result.analyzed_lines @@ -82,11 +84,13 @@ def build_summary_counts( def build_metrics_snapshot( *, - analysis_result: Any, - metrics_diff: Any | None, + analysis_result: AnalysisResult, + metrics_diff: MetricsDiff | None, api_surface_diff_available: bool, ) -> MetricsSnapshot: project_metrics = analysis_result.project_metrics + if project_metrics is None: + raise ValueError("Metrics snapshot requires computed project metrics.") metrics_payload_map = _as_mapping(analysis_result.metrics_payload) overloaded_modules_summary = _as_mapping( _as_mapping(metrics_payload_map.get("overloaded_modules")).get("summary") diff --git a/codeclone/surfaces/cli/types.py b/codeclone/surfaces/cli/types.py index 8d4e640..3e464b2 100644 --- a/codeclone/surfaces/cli/types.py +++ b/codeclone/surfaces/cli/types.py @@ -6,17 +6,29 @@ from __future__ import annotations +from contextlib import AbstractContextManager from dataclasses import dataclass -from typing import Literal +from pathlib import Path +from typing import Literal, Protocol, runtime_checkable -from ...core._types import FileProcessResult as ProcessingResult -from ...core._types import OutputPaths +from ...core._types import ( + AnalysisResult, + BootstrapResult, + DiscoveryResult, + OutputPaths, + ReportArtifacts, +) +from ...core._types import ( + FileProcessResult as ProcessingResult, +) ReportPathOrigin = Literal["default", "explicit"] @dataclass(frozen=True, slots=True) class ChangedCloneGate: + """Changed-scope clone summary used by CLI post-run gating.""" + changed_paths: tuple[str, ...] new_func: frozenset[str] new_block: frozenset[str] @@ -26,9 +38,106 @@ class ChangedCloneGate: findings_known: int +@runtime_checkable +class PrinterLike(Protocol): + """Minimal console surface that supports plain text output.""" + + def print(self, *objects: object, **kwargs: object) -> None: ... + + +@runtime_checkable +class StatusConsole(PrinterLike, Protocol): + """Console surface that can open rich status contexts.""" + + def status( + self, + *objects: object, + **kwargs: object, + ) -> AbstractContextManager[object]: ... + + +class CLIArgsLike(Protocol): + """Typed attribute view over the CLI namespace used by the workflow.""" + + root: str | Path + baseline: str | Path + metrics_baseline: str | Path + cache_path: str | Path | None + html_out: str | None + json_out: str | None + md_out: str | None + sarif_out: str | None + text_out: str | None + debug: bool + ci: bool + quiet: bool + no_color: bool + no_progress: bool + open_html_report: bool + timestamped_report_paths: bool + changed_only: bool + diff_against: str | None + paths_from_git_diff: str | None + skip_metrics: bool + skip_dead_code: bool + skip_dependencies: bool + update_baseline: bool + update_metrics_baseline: bool + fail_on_new: bool + fail_threshold: int + fail_complexity: int + fail_coupling: int + fail_cohesion: int + fail_cycles: bool + fail_dead_code: bool + fail_health: int + fail_on_new_metrics: bool + fail_on_typing_regression: bool + fail_on_docstring_regression: bool + fail_on_api_break: bool + fail_on_untested_hotspots: bool + min_typing_coverage: int + min_docstring_coverage: int + coverage_min: int + coverage_xml: str | None + api_surface: bool + verbose: bool + max_baseline_size_mb: int + max_cache_size_mb: int + min_loc: int + min_stmt: int + block_min_loc: int + block_min_stmt: int + segment_min_loc: int + segment_min_stmt: int + + +class ParserWithDefaults(Protocol): + """Argparse-compatible parser surface for default lookups.""" + + def get_default(self, dest: str) -> object: ... + + +def require_status_console(value: object) -> StatusConsole: + """Return a status-capable console or raise a precise type error.""" + + if not isinstance(value, StatusConsole): + raise TypeError("CLI console does not provide print/status methods.") + return value + + __all__ = [ + "AnalysisResult", + "BootstrapResult", + "CLIArgsLike", "ChangedCloneGate", + "DiscoveryResult", "OutputPaths", + "ParserWithDefaults", + "PrinterLike", "ProcessingResult", + "ReportArtifacts", "ReportPathOrigin", + "StatusConsole", + "require_status_console", ] diff --git a/codeclone/surfaces/cli/workflow.py b/codeclone/surfaces/cli/workflow.py index 532d27e..5518dde 100644 --- a/codeclone/surfaces/cli/workflow.py +++ b/codeclone/surfaces/cli/workflow.py @@ -9,7 +9,6 @@ import sys import time from pathlib import Path -from typing import Any, Protocol, cast from ... import __version__ from ... import ui_messages as ui @@ -33,6 +32,7 @@ from ...core.parallelism import process from ...core.pipeline import analyze from ...core.reporting import gate, report +from ...models import MetricsDiff from ...report.html import build_html_report from . import report_meta as cli_meta_mod from . import state as cli_state @@ -98,6 +98,7 @@ build_metrics_snapshot, build_summary_counts, ) +from .types import CLIArgsLike, StatusConsole, require_status_console __all__ = [ "LEGACY_CACHE_PATH", @@ -137,17 +138,13 @@ ] -class _PrinterLike(Protocol): - def print(self, *objects: object, **kwargs: object) -> None: ... - - def _set_console(value: object) -> object: cli_state.set_console(value) return value -def _console() -> _PrinterLike: - return cast("_PrinterLike", _set_console(console)) +def _console() -> StatusConsole: + return require_status_console(_set_console(console)) def _make_console(*, no_color: bool) -> object: @@ -164,21 +161,26 @@ def print_banner(*, root: Path | None = None) -> None: _print_banner_impl(root=root) -def _configure_runtime_console(args: object) -> None: +def _configure_runtime_console(args: CLIArgsLike) -> None: global console console = _configure_runtime_console_impl( args=args, make_plain_console=_make_plain_console, make_console=_make_console, - set_console=_set_console, + set_console=lambda value: cli_state.set_console(value), ) -def _resolve_cache_path(*, root_path: Path, args: object, from_args: bool) -> Path: +def _resolve_cache_path( + *, + root_path: Path, + args: CLIArgsLike, + from_args: bool, +) -> Path: cli_state.LEGACY_CACHE_PATH = LEGACY_CACHE_PATH _set_console(console) return _resolve_cache_path_impl( - root_path=cast("Any", root_path), + root_path=root_path, args=args, from_args=from_args, ) @@ -199,7 +201,7 @@ def _cache_update_segment_projection(cache: Cache, analysis: AnalysisResult) -> def _run_analysis_stages( *, - args: object, + args: CLIArgsLike, boot: BootstrapResult, cache: Cache, ) -> tuple[DiscoveryResult, PipelineProcessingResult, AnalysisResult]: @@ -228,7 +230,7 @@ def _enforce_gating( metrics_baseline_failure_code: ExitCode | None, new_func: set[str], new_block: set[str], - metrics_diff: object | None, + metrics_diff: MetricsDiff | None, html_report_path: str | None, clone_threshold_total: int | None = None, ) -> None: diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 7f4d718..81f264a 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -10,7 +10,7 @@ import ipaddress import sys from collections.abc import Callable -from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast +from typing import TYPE_CHECKING, Literal, TypeVar from ... import __version__ from ...contracts import DOCS_URL @@ -22,6 +22,7 @@ CachePolicy, MCPAnalysisRequest, MCPGateRequest, + MCPServiceContractError, _validated_history_limit, ) @@ -66,12 +67,13 @@ def _load_mcp_runtime() -> tuple[ ToolAnnotations, ]: try: - from mcp.server.fastmcp import FastMCP as runtime_fastmcp + from mcp.server.fastmcp import FastMCP as imported_fastmcp from mcp.types import ToolAnnotations as runtime_tool_annotations except ImportError as exc: raise MCPDependencyError(_MCP_INSTALL_HINT) from exc + runtime_fastmcp: type[FastMCP] = imported_fastmcp return ( - cast("type[FastMCP]", runtime_fastmcp), + runtime_fastmcp, runtime_tool_annotations( readOnlyHint=True, destructiveHint=False, @@ -93,6 +95,30 @@ def _load_mcp_runtime() -> tuple[ ) +def _validated_analysis_mode(value: str) -> AnalysisMode: + if value == "full": + return "full" + if value == "clones_only": + return "clones_only" + raise MCPServiceContractError( + f"Invalid value for analysis_mode: {value!r}. " + "Expected one of: clones_only, full." + ) + + +def _validated_cache_policy(value: str) -> CachePolicy: + if value == "reuse": + return "reuse" + if value == "refresh": + return "refresh" + if value == "off": + return "off" + raise MCPServiceContractError( + f"Invalid value for cache_policy: {value!r}. " + "Expected one of: off, refresh, reuse." + ) + + def build_mcp_server( *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT, @@ -103,6 +129,8 @@ def build_mcp_server( debug: bool = False, log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO", ) -> FastMCP: + """Build and register the local read-only CodeClone FastMCP server.""" + runtime_fastmcp, read_only_tool, analysis_tool, session_tool = _load_mcp_runtime() service = CodeCloneMCPService(history_limit=_validated_history_limit(history_limit)) mcp = runtime_fastmcp( @@ -120,20 +148,26 @@ def build_mcp_server( # FastMCP otherwise reports the `mcp` package version in initialize/serverInfo. mcp._mcp_server.version = __version__ - def tool(*args: Any, **kwargs: Any) -> Callable[[MCPCallable], MCPCallable]: - return cast( - "Callable[[MCPCallable], MCPCallable]", - mcp.tool(*args, **kwargs), - ) + def tool(*args: object, **kwargs: object) -> Callable[[MCPCallable], MCPCallable]: + decorator = mcp.tool(*args, **kwargs) # type: ignore[arg-type] + + def register(func: MCPCallable) -> MCPCallable: + decorator(func) + return func + + return register def resource( - *args: Any, - **kwargs: Any, + *args: object, + **kwargs: object, ) -> Callable[[MCPCallable], MCPCallable]: - return cast( - "Callable[[MCPCallable], MCPCallable]", - mcp.resource(*args, **kwargs), - ) + decorator = mcp.resource(*args, **kwargs) # type: ignore[arg-type] + + def register(func: MCPCallable) -> MCPCallable: + decorator(func) + return func + + return register @tool( title="Analyze Repository", @@ -177,7 +211,7 @@ def analyze_repository( return service.analyze_repository( MCPAnalysisRequest( root=root, - analysis_mode=cast("AnalysisMode", analysis_mode), + analysis_mode=_validated_analysis_mode(analysis_mode), respect_pyproject=respect_pyproject, changed_paths=tuple(changed_paths or ()), git_diff_ref=git_diff_ref, @@ -197,7 +231,7 @@ def analyze_repository( baseline_path=baseline_path, metrics_baseline_path=metrics_baseline_path, max_baseline_size_mb=max_baseline_size_mb, - cache_policy=cast("CachePolicy", cache_policy), + cache_policy=_validated_cache_policy(cache_policy), cache_path=cache_path, max_cache_size_mb=max_cache_size_mb, ) @@ -249,7 +283,7 @@ def analyze_changed_paths( root=root, changed_paths=tuple(changed_paths or ()), git_diff_ref=git_diff_ref, - analysis_mode=cast("AnalysisMode", analysis_mode), + analysis_mode=_validated_analysis_mode(analysis_mode), respect_pyproject=respect_pyproject, processes=processes, min_loc=min_loc, @@ -267,7 +301,7 @@ def analyze_changed_paths( baseline_path=baseline_path, metrics_baseline_path=metrics_baseline_path, max_baseline_size_mb=max_baseline_size_mb, - cache_policy=cast("CachePolicy", cache_policy), + cache_policy=_validated_cache_policy(cache_policy), cache_path=cache_path, max_cache_size_mb=max_cache_size_mb, ) diff --git a/codeclone/surfaces/mcp/service.py b/codeclone/surfaces/mcp/service.py index 4a9c0da..adca1d4 100644 --- a/codeclone/surfaces/mcp/service.py +++ b/codeclone/surfaces/mcp/service.py @@ -6,7 +6,7 @@ from __future__ import annotations import inspect -from typing import Any, cast +from typing import Protocol from .session import ( DEFAULT_MCP_HISTORY_LIMIT, @@ -17,107 +17,110 @@ from .tools._base import run_kw -class CodeCloneMCPService(MCPSession): - def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: - super().__init__(history_limit=history_limit) - # Keep a stable seam for tests and monkeypatch-based callers while the - # service itself now owns the real MCP session state. - self.session = self - - def _run_session_method( - self, - name: str, - /, - *args: object, - **kwargs: object, - ) -> object: - method = cast("Any", getattr(MCPSession, name)) - return method(self, *args, **kwargs) - - def _session_bound_method(self, name: str) -> object: - return cast("Any", getattr(MCPSession, name)).__get__(self, MCPSession) - - def _run_dict(self, name: str, **params: object) -> dict[str, object]: - bound = self._session_bound_method(name) - return cast("dict[str, object]", run_kw(bound, params)) - - def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: - return cast( - "dict[str, object]", - self._run_session_method("analyze_repository", request), - ) +class _RunDictService(Protocol): + def _run_dict(self, method_name: str, **params: object) -> dict[str, object]: ... - def analyze_changed_paths(self, request: MCPAnalysisRequest) -> dict[str, object]: - return cast( - "dict[str, object]", - self._run_session_method("analyze_changed_paths", request), - ) - - def get_run_summary(self, run_id: str | None = None) -> dict[str, object]: - return cast( - "dict[str, object]", - self._run_session_method("get_run_summary", run_id), - ) - def compare_runs(self, **params: object) -> dict[str, object]: +class _QueryServiceMixin: + def compare_runs(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("compare_runs", **params) - def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: - return cast( - "dict[str, object]", - self._run_session_method("evaluate_gates", request), - ) - - def get_report_section(self, **params: object) -> dict[str, object]: + def get_report_section( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: return self._run_dict("get_report_section", **params) - def list_findings(self, **params: object) -> dict[str, object]: + def list_findings(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("list_findings", **params) - def get_finding(self, **params: object) -> dict[str, object]: + def get_finding(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("get_finding", **params) - def get_remediation(self, **params: object) -> dict[str, object]: + def get_remediation(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("get_remediation", **params) - def list_hotspots(self, **params: object) -> dict[str, object]: + def list_hotspots(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("list_hotspots", **params) - def get_production_triage(self, **params: object) -> dict[str, object]: + def get_production_triage( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: return self._run_dict("get_production_triage", **params) - def get_help(self, **params: object) -> dict[str, object]: + def get_help(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("get_help", **params) - def generate_pr_summary(self, **params: object) -> dict[str, object]: + def generate_pr_summary( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: return self._run_dict("generate_pr_summary", **params) - def mark_finding_reviewed(self, **params: object) -> dict[str, object]: + def mark_finding_reviewed( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: return self._run_dict("mark_finding_reviewed", **params) - def list_reviewed_findings(self, **params: object) -> dict[str, object]: + def list_reviewed_findings( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: return self._run_dict("list_reviewed_findings", **params) - def clear_session_runs(self) -> dict[str, object]: - return cast("dict[str, object]", self._run_session_method("clear_session_runs")) - - def check_complexity(self, **params: object) -> dict[str, object]: + def check_complexity( + self: _RunDictService, + **params: object, + ) -> dict[str, object]: return self._run_dict("check_complexity", **params) - def check_clones(self, **params: object) -> dict[str, object]: + def check_clones(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("check_clones", **params) - def check_coupling(self, **params: object) -> dict[str, object]: + def check_coupling(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("check_coupling", **params) - def check_cohesion(self, **params: object) -> dict[str, object]: + def check_cohesion(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("check_cohesion", **params) - def check_dead_code(self, **params: object) -> dict[str, object]: + def check_dead_code(self: _RunDictService, **params: object) -> dict[str, object]: return self._run_dict("check_dead_code", **params) + +class CodeCloneMCPService(_QueryServiceMixin, MCPSession): + def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: + super().__init__(history_limit=history_limit) + self._session_cls = MCPSession + # Keep a stable seam for tests and monkeypatch-based callers while the + # service itself now owns the real MCP session state. + self.session = self + + def _run_dict(self, method_name: str, **params: object) -> dict[str, object]: + bound = getattr(self._session_cls, method_name).__get__(self, type(self)) + result = run_kw(bound, params) + if not isinstance(result, dict): + raise TypeError(f"MCP session method '{method_name}' must return a dict.") + return result + + def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: + return self._session_cls.analyze_repository(self, request) + + def analyze_changed_paths(self, request: MCPAnalysisRequest) -> dict[str, object]: + return self._session_cls.analyze_changed_paths(self, request) + + def get_run_summary(self, run_id: str | None = None) -> dict[str, object]: + return self._session_cls.get_run_summary(self, run_id) + + def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: + return self._session_cls.evaluate_gates(self, request) + + def clear_session_runs(self) -> dict[str, object]: + return self._session_cls.clear_session_runs(self) + def read_resource(self, uri: str) -> str: - return cast("str", self._run_session_method("read_resource", uri)) + return self._session_cls.read_resource(self, uri) _EMPTY = inspect.Signature.empty diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index 55af87b..33fcc98 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -15,7 +15,7 @@ from json import JSONDecodeError from pathlib import Path from threading import RLock -from typing import Any, Final, Literal, cast +from typing import Final, Literal, TypeVar import orjson @@ -338,6 +338,7 @@ } _SHORT_RUN_ID_LENGTH = 8 _SHORT_HASH_ID_LENGTH = 6 +ChoiceT = TypeVar("ChoiceT", bound=str) @dataclass(frozen=True) @@ -1232,18 +1233,15 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: analysis_result.project_metrics ) - report_artifacts = cast( - "Any", - report( - boot=boot, - discovery=discovery_result, - processing=processing_result, - analysis=analysis_result, - report_meta=report_meta, - new_func=new_func, - new_block=new_block, - metrics_diff=metrics_diff, - ), + report_artifacts = report( + boot=boot, + discovery=discovery_result, + processing=processing_result, + analysis=analysis_result, + report_meta=report_meta, + new_func=new_func, + new_block=new_block, + metrics_diff=metrics_diff, ) report_json = report_artifacts.json if report_json is None: @@ -1349,9 +1347,10 @@ def compare_runs( run_id_after: str | None = None, focus: ComparisonFocus = "all", ) -> dict[str, object]: - validated_focus = cast( - "ComparisonFocus", - self._validate_choice("focus", focus, _VALID_COMPARISON_FOCUS), + validated_focus = self._validate_choice( + "focus", + focus, + _VALID_COMPARISON_FOCUS, ) before = self._runs.get(run_id_before) after = self._runs.get(run_id_after) @@ -1517,9 +1516,10 @@ def get_report_section( offset: int = 0, limit: int = 50, ) -> dict[str, object]: - validated_section = cast( - "ReportSection", - self._validate_choice("section", section, _VALID_REPORT_SECTIONS), + validated_section = self._validate_choice( + "section", + section, + _VALID_REPORT_SECTIONS, ) record = self._runs.get(run_id) report_document = record.report_document @@ -1553,7 +1553,7 @@ def get_report_section( if validated_family_input is not None else None ) - validated_family = cast("MetricsDetailFamily | None", normalized_family) + validated_family = self._metrics_detail_family(normalized_family) return self._metrics_detail_payload( metrics=metrics, family=validated_family, @@ -1588,21 +1588,25 @@ def list_findings( limit: int = 50, max_results: int | None = None, ) -> dict[str, object]: - validated_family = cast( - "FindingFamilyFilter", - self._validate_choice("family", family, _VALID_FINDING_FAMILIES), + validated_family = self._validate_choice( + "family", + family, + _VALID_FINDING_FAMILIES, ) - validated_novelty = cast( - "FindingNoveltyFilter", - self._validate_choice("novelty", novelty, _VALID_FINDING_NOVELTY), + validated_novelty = self._validate_choice( + "novelty", + novelty, + _VALID_FINDING_NOVELTY, ) - validated_sort = cast( - "FindingSort", - self._validate_choice("sort_by", sort_by, _VALID_FINDING_SORT), + validated_sort = self._validate_choice( + "sort_by", + sort_by, + _VALID_FINDING_SORT, ) - validated_detail = cast( - "DetailLevel", - self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + validated_detail = self._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, ) validated_severity = self._validate_optional_choice( "severity", @@ -1658,9 +1662,10 @@ def get_finding( detail_level: DetailLevel = "normal", ) -> dict[str, object]: record = self._runs.get(run_id) - validated_detail = cast( - "DetailLevel", - self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + validated_detail = self._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, ) canonical_id = self._resolve_canonical_finding_id(record, finding_id) for finding in self._base_findings(record): @@ -1695,9 +1700,10 @@ def get_remediation( run_id: str | None = None, detail_level: DetailLevel = "normal", ) -> dict[str, object]: - validated_detail = cast( - "DetailLevel", - self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + validated_detail = self._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, ) record = self._runs.get(run_id) canonical_id = self._resolve_canonical_finding_id(record, finding_id) @@ -1733,13 +1739,11 @@ def list_hotspots( limit: int = 10, max_results: int | None = None, ) -> dict[str, object]: - validated_kind = cast( - "HotlistKind", - self._validate_choice("kind", kind, _VALID_HOTLIST_KINDS), - ) - validated_detail = cast( - "DetailLevel", - self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + validated_kind = self._validate_choice("kind", kind, _VALID_HOTLIST_KINDS) + validated_detail = self._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, ) record = self._runs.get(run_id) paths_filter = self._resolve_query_changed_paths( @@ -1854,13 +1858,11 @@ def get_help( topic: HelpTopic, detail: HelpDetail = "compact", ) -> dict[str, object]: - validated_topic = cast( - "HelpTopic", - self._validate_choice("topic", topic, _VALID_HELP_TOPICS), - ) - validated_detail = cast( - "HelpDetail", - self._validate_choice("detail", detail, _VALID_HELP_DETAILS), + validated_topic = self._validate_choice("topic", topic, _VALID_HELP_TOPICS) + validated_detail = self._validate_choice( + "detail", + detail, + _VALID_HELP_DETAILS, ) spec = _HELP_TOPIC_SPECS[validated_topic] payload: dict[str, object] = { @@ -1888,9 +1890,10 @@ def generate_pr_summary( git_diff_ref: str | None = None, format: PRSummaryFormat = "markdown", ) -> dict[str, object]: - output_format = cast( - "PRSummaryFormat", - self._validate_choice("format", format, _VALID_PR_SUMMARY_FORMATS), + output_format = self._validate_choice( + "format", + format, + _VALID_PR_SUMMARY_FORMATS, ) record = self._runs.get(run_id) paths_filter = self._resolve_query_changed_paths( @@ -1912,7 +1915,7 @@ def generate_pr_summary( run_id_after=record.run_id, focus="all", ) - resolved = cast("list[dict[str, object]]", compare_payload["improvements"]) + resolved = self._dict_rows(compare_payload.get("improvements")) with self._state_lock: gate_result = dict( self._last_gate_results.get( @@ -1937,7 +1940,7 @@ def generate_pr_summary( "verdict": verdict, "new_findings_in_changed_files": changed_items, "resolved": resolved, - "blocking_gates": list(cast(Sequence[str], gate_result.get("reasons", []))), + "blocking_gates": self._string_rows(gate_result.get("reasons")), } if output_format == "json": return payload @@ -2042,9 +2045,10 @@ def check_complexity( max_results: int = 10, detail_level: DetailLevel = "summary", ) -> dict[str, object]: - validated_detail = cast( - "DetailLevel", - self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + validated_detail = self._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, ) record = self._resolve_granular_record( run_id=run_id, @@ -2098,9 +2102,10 @@ def check_clones( max_results: int = 10, detail_level: DetailLevel = "summary", ) -> dict[str, object]: - validated_detail = cast( - "DetailLevel", - self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + validated_detail = self._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, ) record = self._resolve_granular_record( run_id=run_id, @@ -2179,9 +2184,10 @@ def _check_design_metric( category: str, check: str, ) -> dict[str, object]: - validated_detail = cast( - "DetailLevel", - self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + validated_detail = self._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, ) record = self._resolve_granular_record( run_id=run_id, @@ -2221,9 +2227,10 @@ def check_dead_code( max_results: int = 10, detail_level: DetailLevel = "summary", ) -> dict[str, object]: - validated_detail = cast( - "DetailLevel", - self._validate_choice("detail_level", detail_level, _VALID_DETAIL_LEVELS), + validated_detail = self._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, ) validated_min_severity = self._validate_optional_choice( "min_severity", @@ -3927,9 +3934,9 @@ def _validate_analysis_request(self, request: MCPAnalysisRequest) -> None: @staticmethod def _validate_choice( name: str, - value: str, + value: ChoiceT, allowed: Sequence[str] | frozenset[str], - ) -> str: + ) -> ChoiceT: if value not in allowed: allowed_list = ", ".join(sorted(allowed)) raise MCPServiceContractError( @@ -3940,13 +3947,57 @@ def _validate_choice( def _validate_optional_choice( self, name: str, - value: str | None, + value: ChoiceT | None, allowed: Sequence[str] | frozenset[str], - ) -> str | None: + ) -> ChoiceT | None: if value is None: return None return self._validate_choice(name, value, allowed) + @staticmethod + def _metrics_detail_family(value: str | None) -> MetricsDetailFamily | None: + match value: + case "complexity": + return "complexity" + case "coupling": + return "coupling" + case "cohesion": + return "cohesion" + case "coverage_adoption": + return "coverage_adoption" + case "coverage_join": + return "coverage_join" + case "dependencies": + return "dependencies" + case "dead_code": + return "dead_code" + case "api_surface": + return "api_surface" + case "god_modules" | "overloaded_modules": + return "overloaded_modules" + case "health": + return "health" + case _: + return None + + @staticmethod + def _dict_rows(value: object) -> list[dict[str, object]]: + if not isinstance(value, Sequence) or isinstance( + value, + (str, bytes, bytearray), + ): + return [] + return [dict(item) for item in value if isinstance(item, Mapping)] + + @staticmethod + def _string_rows(value: object) -> list[str]: + if not isinstance(value, Sequence) or isinstance( + value, + (str, bytes, bytearray), + ): + return [] + return [str(item) for item in value if isinstance(item, str)] + @staticmethod def _resolve_root(root: str | None) -> Path: cleaned_root = "" if root is None else str(root).strip() diff --git a/codeclone/surfaces/mcp/tools/_base.py b/codeclone/surfaces/mcp/tools/_base.py index 64c8a51..41c0969 100644 --- a/codeclone/surfaces/mcp/tools/_base.py +++ b/codeclone/surfaces/mcp/tools/_base.py @@ -8,7 +8,7 @@ from collections.abc import Callable, Mapping from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Protocol, cast +from typing import TYPE_CHECKING, Protocol if TYPE_CHECKING: from ..session import MCPSession @@ -40,5 +40,5 @@ def run(self, session: MCPSession, params: Mapping[str, object]) -> object: return self.runner(session, params) -def run_kw(bound: object, params: Mapping[str, object]) -> object: - return cast("Any", bound)(**dict(params)) +def run_kw(bound: Callable[..., object], params: Mapping[str, object]) -> object: + return bound(**dict(params)) diff --git a/codeclone/surfaces/mcp/tools/analyze.py b/codeclone/surfaces/mcp/tools/analyze.py index 4ebe7aa..2c7548c 100644 --- a/codeclone/surfaces/mcp/tools/analyze.py +++ b/codeclone/surfaces/mcp/tools/analyze.py @@ -5,24 +5,32 @@ from __future__ import annotations -from typing import cast +from collections.abc import Mapping -from ..session import MCPAnalysisRequest +from ..session import MCPAnalysisRequest, MCPServiceContractError from ._base import MCPToolSchema, SimpleMCPTool + +def _analysis_request(params: Mapping[str, object]) -> MCPAnalysisRequest: + request = params.get("request") + if not isinstance(request, MCPAnalysisRequest): + raise MCPServiceContractError("Tool requires a valid MCPAnalysisRequest.") + return request + + TOOLS = ( SimpleMCPTool( name="analyze_repository", schema=MCPToolSchema(title="Analyze Repository"), runner=lambda session, params: session.analyze_repository( - cast("MCPAnalysisRequest", params["request"]) + _analysis_request(params) ), ), SimpleMCPTool( name="analyze_changed_paths", schema=MCPToolSchema(title="Analyze Changed Paths"), runner=lambda session, params: session.analyze_changed_paths( - cast("MCPAnalysisRequest", params["request"]) + _analysis_request(params) ), ), ) diff --git a/codeclone/surfaces/mcp/tools/gates.py b/codeclone/surfaces/mcp/tools/gates.py index 78c6255..16b44f4 100644 --- a/codeclone/surfaces/mcp/tools/gates.py +++ b/codeclone/surfaces/mcp/tools/gates.py @@ -5,17 +5,23 @@ from __future__ import annotations -from typing import cast +from collections.abc import Mapping -from ..session import MCPGateRequest +from ..session import MCPGateRequest, MCPServiceContractError from ._base import MCPToolSchema, SimpleMCPTool + +def _gate_request(params: Mapping[str, object]) -> MCPGateRequest: + request = params.get("request") + if not isinstance(request, MCPGateRequest): + raise MCPServiceContractError("Tool requires a valid MCPGateRequest.") + return request + + TOOLS = ( SimpleMCPTool( name="evaluate_gates", schema=MCPToolSchema(title="Evaluate Gates"), - runner=lambda session, params: session.evaluate_gates( - cast("MCPGateRequest", params["request"]) - ), + runner=lambda session, params: session.evaluate_gates(_gate_request(params)), ), ) diff --git a/codeclone/surfaces/mcp/tools/runs.py b/codeclone/surfaces/mcp/tools/runs.py index ae6caa5..58189b8 100644 --- a/codeclone/surfaces/mcp/tools/runs.py +++ b/codeclone/surfaces/mcp/tools/runs.py @@ -5,17 +5,19 @@ from __future__ import annotations -from typing import cast - from ._base import MCPToolSchema, SimpleMCPTool + +def _run_id(params: dict[str, object]) -> str | None: + value = params.get("run_id") + return value if isinstance(value, str) else None + + TOOLS = ( SimpleMCPTool( name="get_run_summary", schema=MCPToolSchema(title="Get Run Summary"), - runner=lambda session, params: session.get_run_summary( - cast("str | None", params.get("run_id")) - ), + runner=lambda session, params: session.get_run_summary(_run_id(dict(params))), ), SimpleMCPTool( name="clear_session_runs", diff --git a/codeclone/utils/json_io.py b/codeclone/utils/json_io.py index c7be355..69dd93c 100644 --- a/codeclone/utils/json_io.py +++ b/codeclone/utils/json_io.py @@ -10,7 +10,6 @@ import tempfile from json import JSONDecodeError from pathlib import Path -from typing import Any import orjson @@ -40,7 +39,7 @@ def read_json_document(path: Path) -> object: return orjson.loads(path.read_bytes()) -def read_json_object(path: Path) -> dict[str, Any]: +def read_json_object(path: Path) -> dict[str, object]: payload = read_json_document(path) if not isinstance(payload, dict): raise TypeError("JSON payload must be an object") From 3eb8bceb7a994714bfa0f8a0d7cd1a4b32765f73 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 21 Apr 2026 21:51:32 +0500 Subject: [PATCH 04/32] refactor(analysis,report): remove empty TYPE_CHECKING guards --- codeclone/analysis/fingerprint.py | 4 ---- codeclone/report/document/_design_groups.py | 7 +------ codeclone/report/document/integrity.py | 4 ---- codeclone/report/document/inventory.py | 5 ----- codeclone/report/document/metrics.py | 5 ----- 5 files changed, 1 insertion(+), 24 deletions(-) diff --git a/codeclone/analysis/fingerprint.py b/codeclone/analysis/fingerprint.py index a33ebe4..dff7dbc 100644 --- a/codeclone/analysis/fingerprint.py +++ b/codeclone/analysis/fingerprint.py @@ -7,7 +7,6 @@ from __future__ import annotations import hashlib -from typing import TYPE_CHECKING from .. import qualnames as _qualnames from ..metrics.complexity import cyclomatic_complexity @@ -18,9 +17,6 @@ normalized_ast_dump_from_list, ) -if TYPE_CHECKING: - pass - def sha1(s: str) -> str: return hashlib.sha1(s.encode("utf-8")).hexdigest() diff --git a/codeclone/report/document/_design_groups.py b/codeclone/report/document/_design_groups.py index 5ef9e59..01f0a77 100644 --- a/codeclone/report/document/_design_groups.py +++ b/codeclone/report/document/_design_groups.py @@ -7,7 +7,6 @@ from __future__ import annotations from collections.abc import Mapping -from typing import TYPE_CHECKING from ...contracts import ( DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, @@ -32,6 +31,7 @@ SEVERITY_CRITICAL, SEVERITY_WARNING, ) +from ...findings.ids import design_group_id from ...utils.coerce import as_float as _as_float from ...utils.coerce import as_int as _as_int from ...utils.coerce import as_mapping as _as_mapping @@ -39,11 +39,6 @@ from ..derived import ( report_location_from_group_item, ) - -if TYPE_CHECKING: - pass - -from ...findings.ids import design_group_id from ._common import ( _COVERAGE_JOIN_FAMILY, _coerced_nonnegative_threshold, diff --git a/codeclone/report/document/integrity.py b/codeclone/report/document/integrity.py index 8863370..5360ef8 100644 --- a/codeclone/report/document/integrity.py +++ b/codeclone/report/document/integrity.py @@ -8,13 +8,9 @@ from collections.abc import Mapping, Sequence from hashlib import sha256 -from typing import TYPE_CHECKING import orjson -if TYPE_CHECKING: - pass - def _canonical_integrity_payload( *, diff --git a/codeclone/report/document/inventory.py b/codeclone/report/document/inventory.py index a51fce3..17ed577 100644 --- a/codeclone/report/document/inventory.py +++ b/codeclone/report/document/inventory.py @@ -7,7 +7,6 @@ from __future__ import annotations from collections.abc import Mapping, Sequence -from typing import TYPE_CHECKING from ...domain.findings import ( CATEGORY_COHESION, @@ -16,10 +15,6 @@ from ...utils.coerce import as_int as _as_int from ...utils.coerce import as_mapping as _as_mapping from ...utils.coerce import as_sequence as _as_sequence - -if TYPE_CHECKING: - pass - from ._common import ( _analysis_profile_payload, _contract_path, diff --git a/codeclone/report/document/metrics.py b/codeclone/report/document/metrics.py index e1472a5..3c98024 100644 --- a/codeclone/report/document/metrics.py +++ b/codeclone/report/document/metrics.py @@ -7,7 +7,6 @@ from __future__ import annotations from collections.abc import Mapping -from typing import TYPE_CHECKING from ...domain.findings import ( CATEGORY_COHESION, @@ -29,10 +28,6 @@ from ...utils.coerce import as_int as _as_int from ...utils.coerce import as_mapping as _as_mapping from ...utils.coerce import as_sequence as _as_sequence - -if TYPE_CHECKING: - pass - from ._common import ( _contract_path, _normalize_nested_string_rows, From 7008ddcd783d078419f2dd2a73c1826a0407513e Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 21 Apr 2026 22:09:28 +0500 Subject: [PATCH 05/32] fix(cache): prune stale deleted file entries --- codeclone/cache/store.py | 22 ++++++++++++ codeclone/core/discovery.py | 2 ++ tests/test_cache.py | 41 ++++++++++++++++++++++ tests/test_cli_inprocess.py | 12 +++++++ tests/test_core_branch_coverage.py | 56 ++++++++++++++++++++++++++++-- 5 files changed, 131 insertions(+), 2 deletions(-) diff --git a/codeclone/cache/store.py b/codeclone/cache/store.py index ce85709..3cfb551 100644 --- a/codeclone/cache/store.py +++ b/codeclone/cache/store.py @@ -7,6 +7,7 @@ from __future__ import annotations import os +from collections.abc import Collection from json import JSONDecodeError from pathlib import Path @@ -592,6 +593,27 @@ def put_file_entry( canonical_entry=canonical_entry, ) + def prune_file_entries(self, existing_filepaths: Collection[str]) -> int: + keep_runtime_paths = { + runtime_filepath_from_wire( + wire_filepath_from_runtime(filepath, root=self.root), + root=self.root, + ) + for filepath in existing_filepaths + } + stale_runtime_paths = sorted( + runtime_path + for runtime_path in self.data["files"] + if runtime_path not in keep_runtime_paths + ) + if not stale_runtime_paths: + return 0 + for runtime_path in stale_runtime_paths: + self.data["files"].pop(runtime_path, None) + self._canonical_runtime_paths.discard(runtime_path) + self._dirty = True + return len(stale_runtime_paths) + def file_stat_signature(path: str) -> FileStat: stat_result = os.stat(path) diff --git a/codeclone/core/discovery.py b/codeclone/core/discovery.py index 244663a..f0d951c 100644 --- a/codeclone/core/discovery.py +++ b/codeclone/core/discovery.py @@ -155,6 +155,8 @@ def discover(*, boot: BootstrapResult, cache: Cache) -> DiscoveryResult: continue files_to_process.append(filepath) + cache.prune_file_entries(all_file_paths) + return DiscoveryResult( files_found=files_found, cache_hits=cache_hits, diff --git a/tests/test_cache.py b/tests/test_cache.py index eafd041..e381096 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -143,6 +143,47 @@ def test_cache_roundtrip(tmp_path: Path) -> None: assert loaded.cache_schema_version == Cache._CACHE_VERSION +def test_cache_prune_file_entries_removes_stale_paths(tmp_path: Path) -> None: + root = tmp_path.resolve() + cache_path = root / "cache.json" + live = root / "live.py" + stale = root / "stale.py" + live.write_text("def live():\n return 1\n", "utf-8") + + cache = Cache(cache_path, root=root) + cache.put_file_entry( + str(live), + file_stat_signature(str(live)), + [], + [], + [], + ) + cache.put_file_entry( + str(stale), + {"mtime_ns": 1, "size": 1}, + [], + [], + [], + ) + cache.save() + + loaded = Cache(cache_path, root=root) + loaded.load() + + removed = loaded.prune_file_entries((str(live),)) + + assert removed == 1 + assert str(live) in loaded.data["files"] + assert str(stale) not in loaded.data["files"] + + loaded.save() + + reloaded = Cache(cache_path, root=root) + reloaded.load() + assert reloaded.get_file_entry(str(live)) is not None + assert reloaded.get_file_entry(str(stale)) is None + + def test_cache_roundtrip_preserves_empty_structural_findings(tmp_path: Path) -> None: cache_path = tmp_path / "cache.json" cache = Cache(cache_path) diff --git a/tests/test_cli_inprocess.py b/tests/test_cli_inprocess.py index c43795b..606598d 100644 --- a/tests/test_cli_inprocess.py +++ b/tests/test_cli_inprocess.py @@ -526,6 +526,9 @@ def put_file_entry( def save(self) -> None: return None + def prune_file_entries(self, existing_filepaths: object) -> int: + return 0 + monkeypatch.setattr(cli, "Cache", _CacheStub) _write_default_source(tmp_path) _run_parallel_main(monkeypatch, [str(tmp_path), *extra_args, "--no-progress"]) @@ -808,6 +811,9 @@ def put_file_entry( def save(self) -> None: return None + def prune_file_entries(self, existing_filepaths: object) -> int: + return 0 + monkeypatch.setattr(cli, "Cache", _CacheStub) _patch_parallel(monkeypatch) _run_main(monkeypatch, [str(root1), "--no-progress"]) @@ -973,6 +979,9 @@ def put_file_entry( def save(self) -> None: return None + def prune_file_entries(self, existing_filepaths: object) -> int: + return 0 + monkeypatch.setattr(cli, "LEGACY_CACHE_PATH", _LegacyPathSame(cache_path)) monkeypatch.setattr(cli, "Cache", _CacheStub) _patch_parallel(monkeypatch) @@ -1022,6 +1031,9 @@ def put_file_entry( def save(self) -> None: return None + def prune_file_entries(self, existing_filepaths: object) -> int: + return 0 + monkeypatch.setattr(cli, "Cache", _CacheStub) _patch_parallel(monkeypatch) _run_main( diff --git a/tests/test_core_branch_coverage.py b/tests/test_core_branch_coverage.py index 1a058bf..9022a5f 100644 --- a/tests/test_core_branch_coverage.py +++ b/tests/test_core_branch_coverage.py @@ -33,13 +33,13 @@ _decode_wire_unit, ) from codeclone.cache._wire_encode import _encode_wire_file_entry -from codeclone.cache.entries import CacheEntry, _as_risk_literal +from codeclone.cache.entries import CacheEntry, SourceStatsDict, _as_risk_literal from codeclone.cache.projection import ( SegmentReportProjection, build_segment_report_projection, decode_segment_report_projection, ) -from codeclone.cache.store import Cache +from codeclone.cache.store import Cache, file_stat_signature from codeclone.contracts.errors import CacheError from codeclone.core._types import ( AnalysisResult, @@ -719,6 +719,9 @@ class _FakeCache: def get_file_entry(self, _path: str) -> dict[str, object]: return cache_entry + def prune_file_entries(self, existing_filepaths: object) -> int: + return 0 + boot = BootstrapResult( root=tmp_path, config=NormalizationConfig(), @@ -731,6 +734,55 @@ def get_file_entry(self, _path: str) -> dict[str, object]: return discover(boot=boot, cache=cast(Cache, _FakeCache())) +def test_discover_prunes_deleted_cache_entries(tmp_path: Path) -> None: + live = tmp_path / "a.py" + stale = tmp_path / "stale.py" + live.write_text("def f():\n return 1\n", "utf-8") + + cache_path = tmp_path / "cache.json" + cache = Cache(cache_path, root=tmp_path) + cache.put_file_entry( + str(live), + file_stat_signature(str(live)), + [], + [], + [], + source_stats=SourceStatsDict(lines=2, functions=1, methods=0, classes=0), + ) + cache.put_file_entry( + str(stale), + {"mtime_ns": 1, "size": 1}, + [], + [], + [], + source_stats=SourceStatsDict(lines=0, functions=0, methods=0, classes=0), + ) + cache.save() + + loaded = Cache(cache_path, root=tmp_path) + loaded.load() + boot = BootstrapResult( + root=tmp_path, + config=NormalizationConfig(), + args=Namespace(skip_metrics=False, min_loc=1, min_stmt=1, processes=1), + output_paths=OutputPaths(), + cache_path=cache_path, + ) + + result = discover(boot=boot, cache=loaded) + + assert result.files_found == 1 + assert result.cache_hits == 1 + assert result.files_to_process == () + assert str(stale) not in loaded.data["files"] + + loaded.save() + + reloaded = Cache(cache_path, root=tmp_path) + reloaded.load() + assert str(stale) not in reloaded.data["files"] + + @pytest.mark.parametrize( ("cached_entry", "expected_cache_hits", "expected_files_to_process"), [ From 39430ab2860b972f847658a75e02f1eff51a17b8 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 21 Apr 2026 22:09:31 +0500 Subject: [PATCH 06/32] refactor(report,mcp): reduce import dependency chains --- codeclone/core/reporting.py | 8 +++++- codeclone/report/renderers/markdown.py | 35 ++++++++++++++------------ codeclone/report/renderers/sarif.py | 33 +++++++++++++----------- codeclone/surfaces/mcp/tools/_base.py | 15 +++++------ 4 files changed, 52 insertions(+), 39 deletions(-) diff --git a/codeclone/core/reporting.py b/codeclone/core/reporting.py index e17a334..5faf4ce 100644 --- a/codeclone/core/reporting.py +++ b/codeclone/core/reporting.py @@ -9,7 +9,6 @@ from collections.abc import Callable, Collection, Mapping from ..models import MetricsDiff -from ..report.document.builder import build_report_document from ..report.gates.evaluator import GateResult, GateState from ..report.gates.evaluator import MetricGateConfig as _MetricGateConfig from ..report.gates.evaluator import evaluate_gate_state as _evaluate_gate_state @@ -47,6 +46,12 @@ def _load_sarif_report_renderer() -> Callable[..., str]: return to_sarif_report +def _load_report_document_builder() -> Callable[..., dict[str, object]]: + from ..report.document.builder import build_report_document + + return build_report_document + + def report( *, boot: BootstrapResult, @@ -103,6 +108,7 @@ def report( ) ) if needs_report_document: + build_report_document = _load_report_document_builder() validated_metrics_diff = _coerce_metrics_diff(metrics_diff) metrics_for_report = ( _enrich_metrics_report_payload( diff --git a/codeclone/report/renderers/markdown.py b/codeclone/report/renderers/markdown.py index 6cd5e62..18a79f1 100644 --- a/codeclone/report/renderers/markdown.py +++ b/codeclone/report/renderers/markdown.py @@ -12,7 +12,6 @@ from ...domain.findings import FAMILY_CLONE, FAMILY_DEAD_CODE, FAMILY_STRUCTURAL from ...utils.coerce import as_float, as_int, as_mapping, as_sequence from .._formatting import format_spread_text -from ..document.builder import build_report_document if TYPE_CHECKING: from ...models import StructuralFindingGroup, Suggestion, SuppressedCloneGroup @@ -641,21 +640,25 @@ def to_markdown_report( suggestions: Collection[Suggestion] | None = None, structural_findings: Sequence[StructuralFindingGroup] | None = None, ) -> str: - payload = report_document or build_report_document( - func_groups=func_groups, - block_groups=block_groups, - segment_groups=segment_groups, - meta=meta, - inventory=inventory, - block_facts=block_facts or {}, - new_function_group_keys=new_function_group_keys, - new_block_group_keys=new_block_group_keys, - new_segment_group_keys=new_segment_group_keys, - suppressed_clone_groups=suppressed_clone_groups, - metrics=metrics, - suggestions=tuple(suggestions or ()), - structural_findings=tuple(structural_findings or ()), - ) + payload = report_document + if payload is None: + from ..document.builder import build_report_document + + payload = build_report_document( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + meta=meta, + inventory=inventory, + block_facts=block_facts or {}, + new_function_group_keys=new_function_group_keys, + new_block_group_keys=new_block_group_keys, + new_segment_group_keys=new_segment_group_keys, + suppressed_clone_groups=suppressed_clone_groups, + metrics=metrics, + suggestions=tuple(suggestions or ()), + structural_findings=tuple(structural_findings or ()), + ) return render_markdown_report_document(payload) diff --git a/codeclone/report/renderers/sarif.py b/codeclone/report/renderers/sarif.py index 17c8528..ba443c8 100644 --- a/codeclone/report/renderers/sarif.py +++ b/codeclone/report/renderers/sarif.py @@ -52,7 +52,6 @@ from ...utils.coerce import as_int as _as_int from ...utils.coerce import as_mapping as _as_mapping from ...utils.coerce import as_sequence as _as_sequence -from ..document.builder import build_report_document if TYPE_CHECKING: from ...models import StructuralFindingGroup, Suggestion @@ -973,20 +972,24 @@ def to_sarif_report( suggestions: Collection[Suggestion] | None = None, structural_findings: Sequence[StructuralFindingGroup] | None = None, ) -> str: - payload = report_document or build_report_document( - func_groups=func_groups, - block_groups=block_groups, - segment_groups=segment_groups, - meta=meta, - inventory=inventory, - block_facts=block_facts or {}, - new_function_group_keys=new_function_group_keys, - new_block_group_keys=new_block_group_keys, - new_segment_group_keys=new_segment_group_keys, - metrics=metrics, - suggestions=tuple(suggestions or ()), - structural_findings=tuple(structural_findings or ()), - ) + payload = report_document + if payload is None: + from ..document.builder import build_report_document + + payload = build_report_document( + func_groups=func_groups, + block_groups=block_groups, + segment_groups=segment_groups, + meta=meta, + inventory=inventory, + block_facts=block_facts or {}, + new_function_group_keys=new_function_group_keys, + new_block_group_keys=new_block_group_keys, + new_segment_group_keys=new_segment_group_keys, + metrics=metrics, + suggestions=tuple(suggestions or ()), + structural_findings=tuple(structural_findings or ()), + ) return render_sarif_report_document(payload) diff --git a/codeclone/surfaces/mcp/tools/_base.py b/codeclone/surfaces/mcp/tools/_base.py index 41c0969..313a67b 100644 --- a/codeclone/surfaces/mcp/tools/_base.py +++ b/codeclone/surfaces/mcp/tools/_base.py @@ -8,10 +8,7 @@ from collections.abc import Callable, Mapping from dataclasses import dataclass -from typing import TYPE_CHECKING, Protocol - -if TYPE_CHECKING: - from ..session import MCPSession +from typing import Protocol @dataclass(frozen=True, slots=True) @@ -20,6 +17,10 @@ class MCPToolSchema: description: str = "" +class MCPToolSession(Protocol): + def __getattr__(self, name: str) -> Callable[..., object]: ... + + class MCPTool(Protocol): @property def name(self) -> str: ... @@ -27,16 +28,16 @@ def name(self) -> str: ... @property def schema(self) -> MCPToolSchema: ... - def run(self, session: MCPSession, params: Mapping[str, object]) -> object: ... + def run(self, session: MCPToolSession, params: Mapping[str, object]) -> object: ... @dataclass(frozen=True, slots=True) class SimpleMCPTool: name: str schema: MCPToolSchema - runner: Callable[[MCPSession, Mapping[str, object]], object] + runner: Callable[[MCPToolSession, Mapping[str, object]], object] - def run(self, session: MCPSession, params: Mapping[str, object]) -> object: + def run(self, session: MCPToolSession, params: Mapping[str, object]) -> object: return self.runner(session, params) From 0ed1b038135fc904db632cf14decac70a8525387 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 21 Apr 2026 22:45:16 +0500 Subject: [PATCH 07/32] test(coverage): cover validation and renderer edges --- codeclone/_html_report/_assets/css.py | 1457 +++++++++++++++++ codeclone/_html_report/_assets/js.py | 843 ++++++++++ .../_html_report/_primitives/data_attrs.py | 30 + codeclone/_html_report/_primitives/escape.py | 25 + codeclone/_html_report/_primitives/filters.py | 59 + codeclone/_html_report/_widgets/badges.py | 272 +++ codeclone/_html_report/_widgets/components.py | 106 ++ codeclone/_html_report/_widgets/glossary.py | 100 ++ codeclone/_html_report/_widgets/icons.py | 214 +++ codeclone/_html_report/_widgets/snippets.py | 207 +++ codeclone/_html_report/_widgets/tables.py | 127 ++ codeclone/_html_report/_widgets/tabs.py | 60 + tests/test_cli_unit.py | 19 + tests/test_core_branch_coverage.py | 65 +- tests/test_coverage_edges.py | 5 + tests/test_mcp_tools.py | 62 + tests/test_metrics_baseline.py | 206 +++ tests/test_pipeline_metrics.py | 184 +++ tests/test_report_contract_coverage.py | 75 + 19 files changed, 4114 insertions(+), 2 deletions(-) create mode 100644 codeclone/_html_report/_assets/css.py create mode 100644 codeclone/_html_report/_assets/js.py create mode 100644 codeclone/_html_report/_primitives/data_attrs.py create mode 100644 codeclone/_html_report/_primitives/escape.py create mode 100644 codeclone/_html_report/_primitives/filters.py create mode 100644 codeclone/_html_report/_widgets/badges.py create mode 100644 codeclone/_html_report/_widgets/components.py create mode 100644 codeclone/_html_report/_widgets/glossary.py create mode 100644 codeclone/_html_report/_widgets/icons.py create mode 100644 codeclone/_html_report/_widgets/snippets.py create mode 100644 codeclone/_html_report/_widgets/tables.py create mode 100644 codeclone/_html_report/_widgets/tabs.py create mode 100644 tests/test_mcp_tools.py diff --git a/codeclone/_html_report/_assets/css.py b/codeclone/_html_report/_assets/css.py new file mode 100644 index 0000000..66a4609 --- /dev/null +++ b/codeclone/_html_report/_assets/css.py @@ -0,0 +1,1457 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""CSS design system for the HTML report — tokens, components, layout.""" + +from __future__ import annotations + +# --------------------------------------------------------------------------- +# Design tokens +# --------------------------------------------------------------------------- + +_TOKENS_DARK = """\ +:root{ + --font-sans:"Inter","Inter Variable",-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,Oxygen,Ubuntu,sans-serif; + --font-display:"Inter","Inter Variable",-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,sans-serif; + --font-mono:"JetBrains Mono",ui-monospace,SFMono-Regular,"SF Mono",Menlo,Consolas,monospace; + --font-numeric:"JetBrains Mono",ui-monospace,SFMono-Regular,"SF Mono",Menlo,Consolas,monospace; + + /* Surface — chromatic grays tinted toward the indigo accent (hue 275). + Every surface shares the brand hue at tiny chroma so the UI feels like + one palette, not gray slate + a purple sticker. */ + --bg-body:oklch(16% 0.018 275); + --bg-surface:oklch(20% 0.022 275); + --bg-raised:oklch(24% 0.028 275); + --bg-overlay:oklch(29% 0.033 275); + --bg-subtle:oklch(34% 0.038 275); + + /* Border — same hue, higher chroma for legibility */ + --border:oklch(32% 0.035 275); + --border-strong:oklch(44% 0.045 275); + + /* Text — muted greys keep a trace of indigo so they feel alive */ + --text-primary:oklch(95% 0.010 275); + --text-secondary:oklch(74% 0.028 275); + --text-muted:oklch(58% 0.038 275); + + /* Accent — codeclone indigo (brand, unchanged) */ + --accent-primary:#6366f1; + --accent-hover:#818cf8; + --accent-muted:color-mix(in oklch,#6366f1 25%,transparent); + --accent-soft:oklch(30% 0.12 275); + + /* Semantic — brand-adjacent, hue-rotated so they read as siblings + of the indigo instead of raw Tailwind defaults */ + --success:oklch(74% 0.15 162); + --success-muted:color-mix(in oklch,oklch(74% 0.15 162) 18%,transparent); + --warning:oklch(80% 0.15 82); + --warning-muted:color-mix(in oklch,oklch(80% 0.15 82) 18%,transparent); + --error:oklch(70% 0.18 18); + --error-muted:color-mix(in oklch,oklch(70% 0.18 18) 18%,transparent); + --danger:oklch(70% 0.18 18); + --info:oklch(72% 0.13 238); + --info-muted:color-mix(in oklch,oklch(72% 0.13 238) 18%,transparent); + + /* elevation */ + --shadow-sm:0 1px 2px rgba(0,0,0,.25); + --shadow-md:0 2px 8px rgba(0,0,0,.3); + --shadow-lg:0 4px 16px rgba(0,0,0,.35); + --shadow-xl:0 8px 32px rgba(0,0,0,.4); + + /* radii */ + --radius-sm:4px; + --radius-md:6px; + --radius-lg:8px; + --radius-xl:12px; + + /* spacing */ + --sp-1:4px;--sp-2:8px;--sp-3:12px;--sp-4:16px;--sp-5:20px;--sp-6:24px;--sp-8:32px;--sp-10:40px; + + /* transitions */ + --ease:cubic-bezier(.4,0,.2,1); + --dur-fast:120ms; + --dur-normal:200ms; + --dur-slow:300ms; + + /* sizes */ + --topbar-h:72px; + --container-max:1360px; + + color-scheme:dark; +} +""" + +_TOKENS_LIGHT = """\ +/* Light palette — mirror of the dark one at higher lightness + lower chroma. + Every surface/border/text token still carries a trace of indigo hue 275 + so the whole theme feels like one family in both modes. */ +@media(prefers-color-scheme:light){ + :root:not([data-theme]){ + --bg-body:oklch(98.5% 0.006 275);--bg-surface:#ffffff; + --bg-raised:oklch(97% 0.010 275);--bg-overlay:oklch(93% 0.015 275);--bg-subtle:oklch(88% 0.020 275); + --border:oklch(88% 0.020 275);--border-strong:oklch(78% 0.028 275); + --text-primary:oklch(22% 0.040 275);--text-secondary:oklch(42% 0.048 275);--text-muted:oklch(58% 0.040 275); + --accent-primary:#4f46e5;--accent-hover:#6366f1;--accent-muted:color-mix(in oklch,#4f46e5 12%,transparent); + --accent-soft:oklch(94% 0.045 275); + --success:oklch(52% 0.16 162);--success-muted:color-mix(in oklch,oklch(52% 0.16 162) 12%,transparent); + --warning:oklch(60% 0.15 65);--warning-muted:color-mix(in oklch,oklch(60% 0.15 65) 12%,transparent); + --error:oklch(55% 0.22 20);--error-muted:color-mix(in oklch,oklch(55% 0.22 20) 12%,transparent); + --danger:oklch(55% 0.22 20);--info:oklch(52% 0.18 238);--info-muted:color-mix(in oklch,oklch(52% 0.18 238) 12%,transparent); + --shadow-sm:0 1px 2px rgba(0,0,0,.06);--shadow-md:0 2px 8px rgba(0,0,0,.08); + --shadow-lg:0 4px 16px rgba(0,0,0,.1);--shadow-xl:0 8px 32px rgba(0,0,0,.12); + color-scheme:light; + } +} +[data-theme="light"]{ + --bg-body:oklch(98.5% 0.006 275);--bg-surface:#ffffff; + --bg-raised:oklch(97% 0.010 275);--bg-overlay:oklch(93% 0.015 275);--bg-subtle:oklch(88% 0.020 275); + --border:oklch(88% 0.020 275);--border-strong:oklch(78% 0.028 275); + --text-primary:oklch(22% 0.040 275);--text-secondary:oklch(42% 0.048 275);--text-muted:oklch(58% 0.040 275); + --accent-primary:#4f46e5;--accent-hover:#6366f1;--accent-muted:color-mix(in oklch,#4f46e5 12%,transparent); + --accent-soft:oklch(94% 0.045 275); + --success:oklch(52% 0.16 162);--success-muted:color-mix(in oklch,oklch(52% 0.16 162) 12%,transparent); + --warning:oklch(60% 0.15 65);--warning-muted:color-mix(in oklch,oklch(60% 0.15 65) 12%,transparent); + --error:oklch(55% 0.22 20);--error-muted:color-mix(in oklch,oklch(55% 0.22 20) 12%,transparent); + --danger:oklch(55% 0.22 20);--info:oklch(52% 0.18 238);--info-muted:color-mix(in oklch,oklch(52% 0.18 238) 12%,transparent); + --shadow-sm:0 1px 2px rgba(0,0,0,.06);--shadow-md:0 2px 8px rgba(0,0,0,.08); + --shadow-lg:0 4px 16px rgba(0,0,0,.1);--shadow-xl:0 8px 32px rgba(0,0,0,.12); + color-scheme:light; +} +""" + +# --------------------------------------------------------------------------- +# Reset + base +# --------------------------------------------------------------------------- + +_RESET = """\ +*,*::before,*::after{box-sizing:border-box;margin:0;padding:0} +html{-webkit-text-size-adjust:100%;text-size-adjust:100%;-webkit-font-smoothing:antialiased; + -moz-osx-font-smoothing:grayscale;scroll-behavior:smooth;scrollbar-gutter:stable} +body{font-family:var(--font-sans);font-size:14px;line-height:1.6;color:var(--text-primary); + background:var(--bg-body);overflow-x:hidden; + /* Inter stylistic alternates: + zero — slashed zero (disambiguates 0 from O in metric values) + ss02 — disambiguation set (I/l/1/0 clear apart) + ss01 — open digits (open 4, 6, 9) + cv11 — single-story a (the "designer" look) + cv02/03/04 — open alternates for 4/6/3 + Together these give Inter its Vercel / Linear / Radix feel. */ + font-feature-settings:"zero","ss02","ss01","cv11","cv02","cv03","cv04"; + font-optical-sizing:auto} +code,pre,kbd{font-family:var(--font-mono);font-size:13px;font-feature-settings:normal} +a{color:var(--accent-primary);text-decoration:none} +a:hover{color:var(--accent-hover);text-decoration:underline} +h1,h2,h3,h4{font-family:var(--font-display);font-weight:600;line-height:1.3;color:var(--text-primary); + letter-spacing:-0.01em} +h1{font-size:1.5rem}h2{font-size:1.25rem}h3{font-size:1.1rem} +ul,ol{list-style:none} +button,input,select{font:inherit;color:inherit} +summary{cursor:pointer} +.muted{color:var(--text-muted);font-size:.85em} +""" + +# --------------------------------------------------------------------------- +# Layout +# --------------------------------------------------------------------------- + +_LAYOUT = """\ +.container{max-width:var(--container-max);margin:0 auto;padding:0 var(--sp-6)} + +/* Topbar */ +.topbar{position:sticky;top:0;z-index:100;background:var(--bg-surface);border-bottom:1px solid var(--border); + box-shadow:var(--shadow-sm)} +.topbar-inner{display:flex;align-items:center;justify-content:space-between; + height:72px;padding:0 var(--sp-6);max-width:var(--container-max);margin:0 auto} +.brand{display:flex;align-items:center;gap:var(--sp-3);min-width:0;flex:1} +.brand-logo{flex-shrink:0} +.brand-text{display:flex;flex-direction:column;gap:2px;min-width:0;flex:1} +.brand h1{display:flex;flex-wrap:wrap;align-items:baseline;gap:var(--sp-1);font-size:1.15rem; + font-weight:700;color:var(--text-primary);line-height:1.3;min-width:0} +.brand-meta{font-size:.78rem;color:var(--text-muted);overflow-wrap:anywhere} +.brand-project{display:inline-flex;flex-wrap:wrap;align-items:baseline;gap:4px; + font-weight:500;color:var(--text-secondary);min-width:0} +.brand-project-name{font-family:var(--font-mono);font-size:.85em;font-weight:500;padding:1px 5px; + border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--accent-primary); + max-width:100%;overflow-wrap:anywhere} +.topbar-actions{display:flex;align-items:center;gap:var(--sp-2);flex-shrink:0;flex-wrap:wrap} + +/* Theme toggle */ +.theme-toggle{display:inline-flex;align-items:center;gap:var(--sp-1); + padding:var(--sp-1) var(--sp-3);background:none;border:1px solid var(--border); + border-radius:var(--radius-md);cursor:pointer;color:var(--text-muted);font-size:.85rem; + font-weight:500;font-family:inherit;transition:all var(--dur-fast) var(--ease)} +.theme-toggle:hover{color:var(--text-primary);background:var(--bg-raised);border-color:var(--border-strong)} +.theme-toggle svg{width:16px;height:16px} +/* Adaptive icon: show sun in light mode, moon in dark mode. */ +.theme-icon{display:none} +:root[data-theme="light"] .theme-icon-sun{display:inline-flex} +:root[data-theme="dark"] .theme-icon-moon{display:inline-flex} + +/* Main tabs — full-width pill bar */ +.main-tabs-wrap{position:sticky;top:var(--topbar-h);z-index:90;padding:var(--sp-3) 0 0; + background:var(--bg-body)} +.main-tabs{display:flex;gap:var(--sp-1);padding:var(--sp-1); + background:var(--bg-surface);border:1px solid var(--border);border-radius:var(--radius-lg); + overflow-x:auto;scrollbar-width:none;-webkit-overflow-scrolling:touch} +.main-tabs::-webkit-scrollbar{display:none} +.main-tab{position:relative;flex:1;display:inline-flex;align-items:center;justify-content:center; + gap:var(--sp-1);text-align:center;padding:var(--sp-2) var(--sp-3);background:none; + border:none;cursor:pointer;font-size:.85rem;font-weight:500;color:var(--text-muted); + white-space:nowrap;border-radius:var(--radius-md);transition:all var(--dur-fast) var(--ease)} +.main-tab:hover{color:var(--text-primary);background:var(--bg-raised)} +.main-tab[aria-selected="true"]{color:var(--accent-primary);background:var(--accent-muted)} +.main-tab-icon{flex-shrink:0;opacity:.72} +.main-tab-label{display:inline-flex;align-items:center} +.tab-count{display:inline-flex;align-items:center;justify-content:center;min-width:18px; + height:18px;padding:0 5px;font-size:.68rem;font-weight:700;border-radius:var(--radius-sm); + background:var(--bg-overlay);color:var(--text-muted);margin-left:var(--sp-1)} +.main-tab[aria-selected="true"] .tab-count{background:var(--accent-primary); + color:#fff} + +/* Tab panels */ +.tab-panel{display:none;padding:var(--sp-6) 0;contain:layout style} +.tab-panel.active{display:block} +""" + +# --------------------------------------------------------------------------- +# Components: buttons, inputs, selects +# --------------------------------------------------------------------------- + +_CONTROLS = """\ +/* Buttons */ +.btn{display:inline-flex;align-items:center;gap:var(--sp-1);padding:var(--sp-1) var(--sp-3); + font-size:.8rem;font-weight:500;border:1px solid var(--border);border-radius:var(--radius-md); + background:var(--bg-raised);color:var(--text-secondary);cursor:pointer;white-space:nowrap; + transition:all var(--dur-fast) var(--ease)} +.btn:hover{border-color:var(--border-strong);color:var(--text-primary);background:var(--bg-overlay)} +.btn-prov{position:relative} +.btn-prov .prov-dot{width:8px;height:8px;border-radius:50%;flex-shrink:0} +.btn-prov .prov-dot.dot-green{background:var(--success)} +.btn-prov .prov-dot.dot-amber{background:var(--warning)} +.btn-prov .prov-dot.dot-red{background:var(--error)} +.btn-prov .prov-dot.dot-neutral{background:var(--text-muted)} + +/* Provenance pill — single compact status chip in topbar */ +.prov-pill{display:inline-flex;align-items:center;gap:var(--sp-1); + padding:var(--sp-1) var(--sp-3);background:none; + border:1px solid var(--border);border-radius:var(--radius-md);cursor:pointer; + color:var(--text-muted);transition:all var(--dur-fast) var(--ease); + font-size:.85rem;font-weight:500;font-family:inherit} +.prov-pill:hover{background:var(--bg-raised);border-color:var(--border-strong); + color:var(--text-primary)} +.prov-pill:focus-visible{outline:2px solid var(--accent-primary);outline-offset:1px} +.prov-pill-icon{flex-shrink:0;opacity:.75} +.prov-pill:hover .prov-pill-icon{opacity:1} +.prov-pill-label{font-size:.85rem;font-weight:500;white-space:nowrap} +.prov-pill--green .prov-pill-icon{color:var(--success)} +.prov-pill--green:hover{border-color:color-mix(in srgb,var(--success) 55%,var(--border))} +.prov-pill--amber .prov-pill-icon{color:var(--warning)} +.prov-pill--amber:hover{border-color:color-mix(in srgb,var(--warning) 55%,var(--border))} +.prov-pill--red .prov-pill-icon{color:var(--error)} +.prov-pill--red:hover{border-color:color-mix(in srgb,var(--error) 60%,var(--border))} +.prov-pill--neutral .prov-pill-icon{color:var(--text-muted)} +.btn.ghost{background:none;border-color:transparent} +.btn.ghost:hover{background:var(--bg-raised);border-color:var(--border)} +.btn.btn-icon{padding:var(--sp-1);min-width:28px;justify-content:center} +.btn svg{width:14px;height:14px} + +/* Inputs */ +input[type="text"]{padding:var(--sp-1) var(--sp-3);font-size:.85rem;border:1px solid var(--border); + border-radius:var(--radius-md);background:var(--bg-body);color:var(--text-primary);outline:none; + transition:border-color var(--dur-fast) var(--ease)} +input[type="text"]:focus{border-color:var(--accent-primary);box-shadow:0 0 0 2px var(--accent-muted)} +input[type="text"]::placeholder{color:var(--text-muted)} + +/* Selects */ +.select{padding:var(--sp-1) var(--sp-3);padding-right:var(--sp-6);font-size:.8rem; + border:1px solid var(--border);border-radius:var(--radius-md);background:var(--bg-raised); + color:var(--text-secondary);cursor:pointer;appearance:none; + background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' fill='none' stroke='%236b6f88' stroke-width='2'%3E%3Cpath d='M3 4.5l3 3 3-3'/%3E%3C/svg%3E"); + background-repeat:no-repeat;background-position:right 8px center} +.select:focus{border-color:var(--accent-primary);outline:none} + +/* Checkbox labels */ +.inline-check{display:inline-flex;align-items:center;gap:var(--sp-1);font-size:.8rem; + color:var(--text-muted);cursor:pointer;white-space:nowrap} +.inline-check input[type="checkbox"]{accent-color:var(--accent-primary);width:14px;height:14px} +""" + +# --------------------------------------------------------------------------- +# Search box +# --------------------------------------------------------------------------- + +_SEARCH = """\ +.search-box{position:relative;display:flex;align-items:center} +.search-ico{position:absolute;left:var(--sp-2);color:var(--text-muted);pointer-events:none; + display:flex;align-items:center} +.search-ico svg{width:14px;height:14px} +.search-box input[type="text"]{padding-left:28px;width:200px} +.clear-btn{position:absolute;right:var(--sp-1);background:none;border:none;cursor:pointer; + color:var(--text-muted);padding:2px;display:flex;align-items:center;opacity:0; + transition:opacity var(--dur-fast) var(--ease)} +.search-box input:not(:placeholder-shown)~.clear-btn{opacity:1} +.clear-btn:hover{color:var(--text-primary)} +.clear-btn svg{width:14px;height:14px} +""" + +# --------------------------------------------------------------------------- +# Toolbar + pagination +# --------------------------------------------------------------------------- + +_TOOLBAR = """\ +.toolbar{display:flex;flex-wrap:wrap;align-items:center;gap:var(--sp-2); + padding:var(--sp-3) var(--sp-4);background:var(--bg-raised);border:1px solid var(--border); + border-radius:var(--radius-lg);margin-bottom:var(--sp-4)} +.toolbar-left{display:flex;flex-wrap:wrap;align-items:center;gap:var(--sp-2);flex:1} +.toolbar-right{display:flex;align-items:center;gap:var(--sp-2)} + +.pagination{display:flex;align-items:center;gap:var(--sp-1)} +.page-meta{font-size:.8rem;color:var(--text-muted);white-space:nowrap;min-width:100px;text-align:center} + +/* Filters popover: one button collapses Context/Type/Spread/min-occ into a menu */ +.filters-popover{position:relative} +.filters-btn{display:inline-flex;align-items:center;gap:var(--sp-1);white-space:nowrap} +.filters-btn-ico{flex:none} +.filters-count{display:inline-flex;align-items:center;justify-content:center; + min-width:18px;height:18px;padding:0 5px;border-radius:999px; + background:var(--accent-primary);color:#fff;font-size:.68rem;font-weight:600; + line-height:1} +.filters-btn[aria-expanded="true"]{border-color:var(--accent-primary); + color:var(--accent-primary)} +.filters-menu{position:absolute;top:calc(100% + var(--sp-1));left:0;z-index:20; + min-width:240px;display:flex;flex-direction:column;gap:var(--sp-2); + padding:var(--sp-3);background:var(--bg-surface); + border:1px solid var(--border);border-radius:var(--radius-md); + box-shadow:var(--shadow-lg)} +.filters-menu[hidden]{display:none} +.filters-row{display:flex;align-items:center;gap:var(--sp-2)} +.filters-row .select{flex:1;min-width:0} +.filters-label{font-size:.75rem;color:var(--text-muted);min-width:60px; + font-weight:500} +.filters-row.inline-check{gap:var(--sp-2);font-size:.82rem;color:var(--text-secondary); + cursor:pointer} +.filters-row.inline-check input[type="checkbox"]{margin:0} + +/* Expand/collapse toggle — single button that flips state */ +.expand-toggle{white-space:nowrap} +.expand-toggle[data-expanded="true"]{background:var(--bg-overlay); + border-color:var(--border-strong)} + +/* Suggestions count pill (right side of the shared toolbar). */ +.suggestions-count-label{font-size:.8rem;color:var(--text-muted);font-weight:500; + font-variant-numeric:tabular-nums;white-space:nowrap} +""" + +# --------------------------------------------------------------------------- +# Insight banners +# --------------------------------------------------------------------------- + +_INSIGHT = """\ +.insight-banner{padding:var(--sp-3) var(--sp-4);border-radius:var(--radius-md); + margin-bottom:var(--sp-4);border-left:3px solid var(--border);background:none} +.insight-question{font-size:.78rem;font-weight:500;color:var(--text-muted); + text-transform:uppercase;letter-spacing:.03em;margin-bottom:2px} +.insight-answer{font-size:.82rem;color:var(--text-secondary);line-height:1.5} + +.insight-ok{border-left-color:var(--success);background:var(--success-muted)} +.insight-warn{border-left-color:var(--warning);background:var(--warning-muted)} +.insight-risk{border-left-color:var(--error);background:var(--error-muted)} +.insight-info{border-left-color:var(--info);background:var(--info-muted)} +""" + +# --------------------------------------------------------------------------- +# Tables +# --------------------------------------------------------------------------- + +_TABLES = """\ +.table-wrap{display:block;inline-size:100%;max-inline-size:100%;min-inline-size:0;overflow-x:auto; + overflow-y:hidden;border:1px solid var(--border);border-radius:var(--radius-lg);margin-bottom:var(--sp-4); + background: + linear-gradient(to right,var(--bg-surface) 30%,transparent) left center / 40px 100% no-repeat local, + linear-gradient(to left,var(--bg-surface) 30%,transparent) right center / 40px 100% no-repeat local, + linear-gradient(to right,rgba(0,0,0,.15),transparent) left center / 14px 100% no-repeat scroll, + linear-gradient(to left,rgba(0,0,0,.15),transparent) right center / 14px 100% no-repeat scroll} +.table{inline-size:max-content;min-inline-size:100%;border-collapse:collapse;font-size:.82rem; + font-family:var(--font-mono)} +.table th{position:sticky;top:0;z-index:2;padding:var(--sp-2) var(--sp-3);text-align:left;font-family:var(--font-sans); + font-weight:600;font-size:.75rem;text-transform:uppercase;letter-spacing:.05em; + color:var(--text-muted);background:var(--bg-overlay);border-bottom:1px solid var(--border); + white-space:nowrap;cursor:default;user-select:none} +.table th[data-sortable]{cursor:pointer} +.table th[data-sortable]:hover{color:var(--text-primary)} +.table th .sort-icon{display:inline-flex;margin-left:var(--sp-1);opacity:.4} +.table th[aria-sort] .sort-icon{opacity:1;color:var(--accent-primary)} +.table td{padding:var(--sp-2) var(--sp-3);border-bottom:1px solid var(--border);color:var(--text-secondary); + vertical-align:top} +.table tr:last-child td{border-bottom:none} +.table tr:hover td{background:var(--bg-raised)} +.table .col-name{font-weight:500;color:var(--text-primary);max-width:360px;overflow:hidden; + text-overflow:ellipsis;white-space:nowrap} +.table .col-file,.table .col-path{color:var(--text-muted);max-width:240px;overflow:hidden; + text-overflow:ellipsis;white-space:nowrap} +.table .col-number,.table .col-num{font-variant-numeric:tabular-nums;text-align:right;white-space:nowrap} +.table .col-risk,.table .col-badge,.table .col-cat{white-space:nowrap} +.table .col-steps{max-width:120px;word-break:break-word} +.table .col-wide{max-width:320px;word-break:break-all} +.table-empty{padding:var(--sp-8);text-align:center;color:var(--text-muted);font-size:.9rem} +""" + +# --------------------------------------------------------------------------- +# Sub-tabs (clone-nav / split-tabs) +# --------------------------------------------------------------------------- + +_SUB_TABS = """\ +.clone-nav{display:flex;gap:2px;border-bottom:1px solid var(--border);margin-bottom:var(--sp-4); + overflow-x:auto;scrollbar-width:none} +.clone-nav::-webkit-scrollbar{display:none} +.clone-nav-btn{position:relative;padding:var(--sp-2) var(--sp-4);background:none;border:none; + cursor:pointer;font-size:.85rem;font-weight:500;color:var(--text-muted);white-space:nowrap; + transition:color var(--dur-fast) var(--ease)} +.clone-nav-btn:hover{color:var(--text-primary)} +.clone-nav-btn.active{color:var(--accent-primary)} +.clone-nav-btn.active::after{content:"";position:absolute;bottom:-1px;left:0;right:0; + height:2px;background:var(--accent-primary);border-radius:1px 1px 0 0} +.clone-panel{display:none} +.clone-panel.active{display:block} +""" + +# --------------------------------------------------------------------------- +# Sections + groups +# --------------------------------------------------------------------------- + +_SECTIONS = """\ +.section{margin-bottom:var(--sp-6)} +.subsection-title{font-size:1rem;font-weight:600;color:var(--text-primary); + margin-bottom:var(--sp-3);padding-bottom:var(--sp-2);border-bottom:1px solid var(--border)} +.section-body{display:flex;flex-direction:column;gap:var(--sp-3)} + +/* Clone groups */ +.group{border:1px solid var(--border);border-radius:var(--radius-lg);background:var(--bg-surface); + overflow:hidden;transition:box-shadow var(--dur-fast) var(--ease)} +.group:hover{box-shadow:var(--shadow-sm)} +.group-head{display:flex;align-items:center;justify-content:space-between;padding:var(--sp-3) var(--sp-4); + gap:var(--sp-3);cursor:pointer} +.group-head-left{display:flex;align-items:center;gap:var(--sp-3);min-width:0;flex:1} +.group-head-right{display:flex;align-items:center;gap:var(--sp-2);flex-shrink:0} +.group-toggle{background:none;border:none;cursor:pointer;color:var(--text-muted);padding:var(--sp-1); + display:flex;align-items:center;transition:transform var(--dur-normal) var(--ease);flex-shrink:0} +.group-toggle svg{width:16px;height:16px} +.group-toggle.expanded{transform:rotate(180deg)} +.group-info{min-width:0;flex:1} +.group-name{font-weight:600;font-size:.9rem;color:var(--text-primary);white-space:nowrap; + overflow:hidden;text-overflow:ellipsis;font-family:var(--font-mono)} +.group-summary{font-size:.8rem;color:var(--text-muted)} + +/* Badges */ +.clone-type-badge{font-size:.68rem;font-weight:500;padding:2px var(--sp-2); + border-radius:var(--radius-sm);background:var(--accent-muted);color:var(--accent-primary)} +.clone-count-badge{font-size:.68rem;font-weight:600;padding:2px var(--sp-2); + border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--text-secondary)} + +/* Group body */ +.group-body{border-top:1px solid var(--border);display:none} +.group-body.expanded{display:block} +.group-body.items.expanded{display:grid} +.group-compare-note{padding:var(--sp-2) var(--sp-4);font-size:.8rem;color:var(--text-muted); + background:var(--bg-raised);border-bottom:1px solid var(--border);font-style:italic} + +/* Group explain */ +.group-explain{padding:var(--sp-2) var(--sp-4);display:flex;flex-wrap:wrap;gap:var(--sp-1); + background:var(--bg-raised);border-bottom:1px solid var(--border)} +.group-explain-item{font-size:.68rem;padding:2px var(--sp-2);border-radius:var(--radius-sm); + background:var(--bg-overlay);color:var(--text-muted);font-family:var(--font-mono);white-space:nowrap} +.group-explain-warn{color:var(--warning);background:var(--warning-muted)} +.group-explain-muted{opacity:.7} +.group-explain-note{font-size:.75rem;color:var(--text-muted);font-style:italic;width:100%; + padding-top:var(--sp-1)} +""" + +# --------------------------------------------------------------------------- +# Items (clone instances) +# --------------------------------------------------------------------------- + +_ITEMS = """\ +.items{grid-template-columns:repeat(2,1fr);gap:0} +.items .item{border-right:1px solid var(--border);border-bottom:1px solid var(--border)} +.items .item:nth-child(2n){border-right:none} +.items .item:nth-last-child(-n+2){border-bottom:none} +.items .item:last-child{border-bottom:none} +.item{padding:0;min-width:0;overflow:hidden} +.item-header{display:flex;align-items:center;justify-content:space-between; + padding:var(--sp-2) var(--sp-3);background:var(--bg-raised);gap:var(--sp-2)} +.item-title{font-weight:500;font-size:.8rem;color:var(--text-primary);font-family:var(--font-mono); + white-space:nowrap;overflow:hidden;text-overflow:ellipsis;min-width:0;flex:1} +.item-loc{font-size:.72rem;color:var(--text-muted);font-family:var(--font-mono);white-space:nowrap;flex-shrink:0} +.item-compare-meta{padding:var(--sp-1) var(--sp-3);font-size:.72rem;color:var(--text-muted); + background:var(--bg-body);border-bottom:1px solid var(--border)} +""" + +# --------------------------------------------------------------------------- +# Code blocks +# --------------------------------------------------------------------------- + +_CODE = """\ +/* _html_snippets renders .codebox>.hitline/.line */ +.codebox{overflow-x:auto;font-size:12px;line-height:1.7;background:var(--bg-body);padding:var(--sp-2) 0;margin:0} +.codebox pre{margin:0;padding:0} +.codebox .line,.codebox .hitline{padding:0 var(--sp-4) 0 var(--sp-2);white-space:pre;display:block} +.codebox .line:hover{background:var(--bg-raised)} +.codebox .hitline{background:color-mix(in oklch,var(--accent-primary) 12%,transparent); + border-left:3px solid var(--accent-primary);padding-left:calc(var(--sp-2) - 3px)} +.codebox .hitline:hover{background:color-mix(in oklch,var(--accent-primary) 20%,transparent)} +""" + +# --------------------------------------------------------------------------- +# Risk / severity / source-kind badges +# --------------------------------------------------------------------------- + +_BADGES = """\ +.risk-badge,.severity-badge{display:inline-flex;align-items:center;font-size:.68rem;font-weight:600; + padding:2px var(--sp-2);border-radius:var(--radius-sm);text-transform:uppercase;letter-spacing:.02em} +.risk-critical,.severity-critical{background:var(--error-muted);color:var(--error)} +.risk-high,.severity-high{background:var(--error-muted);color:var(--error)} +.risk-warning,.severity-warning{background:var(--warning-muted);color:var(--warning)} +.risk-medium,.severity-medium{background:var(--warning-muted);color:var(--warning)} +.risk-low,.severity-low{background:var(--success-muted);color:var(--success)} +.risk-info,.severity-info{background:var(--info-muted);color:var(--info)} + +.source-kind-badge{display:inline-flex;align-items:center;font-size:.68rem;font-weight:500; + padding:2px var(--sp-2);border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--text-muted)} +.source-kind-production{background:var(--error-muted);color:var(--error)} +.source-kind-test,.source-kind-test_util{background:var(--info-muted);color:var(--info)} +.source-kind-fixture,.source-kind-conftest{background:var(--warning-muted);color:var(--warning)} +.source-kind-import,.source-kind-cross_kind{background:var(--accent-muted);color:var(--accent-primary)} +.category-badge{display:inline-flex;align-items:center;gap:3px;font-size:.68rem; + font-family:var(--font-mono);padding:2px var(--sp-2);border-radius:var(--radius-sm); + background:var(--bg-overlay);color:var(--text-muted);white-space:nowrap} +.category-badge-key{font-weight:400;color:var(--text-muted)} +.category-badge-val{font-weight:600;color:var(--text-secondary)} +.finding-why-chips{display:flex;flex-wrap:wrap;gap:var(--sp-1);margin:var(--sp-1) 0} +.finding-why-chips .category-badge{font-size:.68rem} +""" + +# --------------------------------------------------------------------------- +# Overview +# --------------------------------------------------------------------------- + +_OVERVIEW = """\ +/* Dashboard */ +/* KPI grid: health card on the left, KPI cards in two rows on the right */ +.overview-kpi-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr)); + gap:var(--sp-3);margin-bottom:var(--sp-6)} +.overview-kpi-grid--with-health{grid-template-columns:minmax(190px,210px) minmax(0,1fr); + gap:var(--sp-3);align-items:stretch} +.overview-kpi-cards{display:grid;grid-template-columns:repeat(4,minmax(0,1fr)); + gap:var(--sp-3);min-width:0} +.overview-kpi-grid--with-health .meta-item{min-width:0} +.overview-kpi-grid--with-health .meta-item{min-height:0} +.overview-kpi-cards .meta-item{display:grid;grid-template-rows:auto 1fr auto; + align-items:start;padding:var(--sp-3) var(--sp-4);gap:var(--sp-2);min-height:0} +.overview-kpi-cards .meta-item .meta-label{font-size:.75rem;min-height:18px} +.overview-kpi-cards .meta-item .meta-value{display:flex;align-items:center; + font-size:1.55rem;line-height:1;padding:var(--sp-1) 0} +.overview-kpi-cards .kpi-detail{margin-top:0;gap:4px;align-self:end} +.overview-kpi-cards .kpi-micro{padding:2px 6px;font-size:.65rem} +.overview-kpi-grid--with-health .overview-health-card{padding:var(--sp-2)} +.overview-kpi-grid--with-health .overview-health-inner{width:100%;height:100%} +.overview-kpi-grid--with-health .health-ring{width:140px;height:140px;margin:auto} +.overview-kpi-grid--with-health .overview-health-card .meta-value{font-size:1.2rem} +.overview-kpi-grid--with-health .overview-health-card .meta-label{font-size:.66rem} +@media(max-width:1380px){ + .overview-kpi-cards{grid-template-columns:repeat(3,minmax(0,1fr))} +} +@media(max-width:980px){ + .overview-kpi-grid--with-health{grid-template-columns:1fr} + .overview-kpi-cards{grid-template-columns:repeat(2,minmax(0,1fr))} +} +@media(max-width:520px){ + .overview-kpi-cards{grid-template-columns:1fr} + .overview-kpi-cards .meta-item{grid-template-rows:auto auto auto;align-content:start; + min-height:0} + .overview-kpi-cards .meta-item .meta-label{min-height:0} + .overview-kpi-cards .meta-item .meta-value{padding-top:0} + .overview-kpi-cards .kpi-detail{align-self:start} + .overview-kpi-cards .kpi-micro{max-width:100%;white-space:normal;overflow-wrap:anywhere} +} + +/* Health gauge */ +.overview-health-card{display:flex;align-items:center;justify-content:center; + padding:var(--sp-3);background:var(--bg-surface);border:1px solid var(--border); + border-radius:var(--radius-lg)} +.overview-health-inner{display:flex;flex-direction:column;align-items:center;justify-content:center; + gap:var(--sp-1)} +.health-ring{position:relative;width:140px;height:140px} +.health-ring svg{width:100%;height:100%;transform:rotate(-90deg)} +.health-ring-bg{fill:none;stroke:var(--border);stroke-width:6} +.health-ring-baseline{fill:none;stroke-width:6;stroke-linecap:round} +.health-ring-fg{fill:none;stroke-width:6;stroke-linecap:round; + transition:stroke-dashoffset 1s var(--ease)} +.health-ring-label{position:absolute;inset:0;display:flex;flex-direction:column; + align-items:center;justify-content:center} +.health-ring-score{font-family:var(--font-numeric);font-size:1.85rem;font-weight:680; + color:var(--text-primary);font-variant-numeric:tabular-nums;line-height:1; + letter-spacing:-0.018em} +.health-ring-grade{font-size:.72rem;font-weight:500;color:var(--text-muted);margin-top:3px} +.health-ring-delta{font-size:.65rem;font-weight:600;margin-top:3px} +.health-ring-delta--up{color:var(--success)} +.health-ring-delta--down{color:var(--error)} + +/* Get Badge button (under health ring) */ +.badge-btn{display:inline-flex;align-items:center;gap:4px;margin-top:var(--sp-2); + padding:4px 10px;font-size:.65rem;font-weight:500;color:var(--text-muted); + background:var(--bg-surface);border:1px solid var(--border);border-radius:var(--radius-sm); + cursor:pointer;transition:all var(--dur-fast) var(--ease);white-space:nowrap} +.badge-btn:hover{color:var(--text-primary);border-color:var(--border-strong); + background:var(--bg-alt)} + +/* Badge modal */ +.badge-modal{max-width:680px;width:92vw;max-height:85vh} +.badge-modal .modal-head{display:flex;align-items:center;justify-content:space-between; + padding:var(--sp-3) var(--sp-4);border-bottom:1px solid var(--border)} +.badge-modal .modal-head h2{font-size:1rem;font-weight:700;margin:0} +.badge-modal .modal-body{padding:var(--sp-3) var(--sp-4) var(--sp-4);overflow-y:auto;flex:1 1 auto} + +/* Badge tabs */ +.badge-tabs{display:flex;gap:var(--sp-1);margin-bottom:var(--sp-3)} +.badge-tab{padding:5px 12px;font-size:.72rem;font-weight:500;color:var(--text-muted); + background:transparent;border:1px solid var(--border);border-radius:var(--radius-sm); + cursor:pointer;transition:all var(--dur-fast) var(--ease)} +.badge-tab:hover{color:var(--text-primary);border-color:var(--border-strong)} +.badge-tab--active{color:var(--text-primary);background:var(--bg-alt); + border-color:var(--border-strong);font-weight:600} + +/* Badge preview & disclaimer */ +.badge-preview{text-align:center;padding:var(--sp-3) 0;margin-bottom:var(--sp-1); + border-bottom:1px solid var(--border)} +.badge-preview img{height:24px} +.badge-disclaimer{font-size:.65rem;color:var(--text-muted);text-align:center; + margin:var(--sp-1) 0 var(--sp-2);line-height:1.4} + +/* Badge embed fields */ +.badge-field-label{display:block;font-size:.68rem;font-weight:600;color:var(--text-muted); + margin-bottom:var(--sp-1);margin-top:var(--sp-3);text-transform:uppercase;letter-spacing:.04em} +.badge-code-wrap{display:flex;align-items:stretch;border:1px solid var(--border); + border-radius:var(--radius-sm);overflow:hidden;background:var(--bg-alt)} +.badge-code{flex:1;padding:var(--sp-2) var(--sp-3);font-size:.72rem;font-family:var(--font-mono); + color:var(--text-primary);word-break:break-all;white-space:pre-wrap;line-height:1.5; + user-select:all;-webkit-user-select:all} +.badge-copy-btn{min-width:64px;padding:var(--sp-2) var(--sp-3);font-size:.68rem;font-weight:500; + color:var(--text-muted);background:transparent;border:none;border-left:1px solid var(--border); + cursor:pointer;transition:all var(--dur-fast) var(--ease);white-space:nowrap} +.badge-copy-btn:hover{color:var(--text-primary)} +.badge-copy-btn--ok{color:var(--success)} + +/* KPI stat card */ +.meta-item{padding:var(--sp-3) var(--sp-4);background:var(--bg-surface);border:1px solid var(--border); + border-radius:var(--radius-lg);display:flex;flex-direction:column;gap:var(--sp-1); + transition:border-color var(--dur-fast) var(--ease);min-width:0; + font-family:var(--font-mono)} +.meta-item:hover{border-color:var(--border-strong)} +.meta-item .meta-label{font-size:.75rem;font-weight:500;color:var(--text-muted); + display:flex;align-items:center;gap:var(--sp-1);letter-spacing:.01em;line-height:1.35} +.meta-item .meta-value{font-family:var(--font-numeric);font-size:1.4rem;font-weight:680; + color:var(--text-primary);font-variant-numeric:tabular-nums;line-height:1.15; + letter-spacing:-0.01em} +.meta-item .meta-value--good{color:var(--success)} +.meta-item .meta-value--bad{color:var(--error)} +.meta-item .meta-value--warn{color:var(--warning)} +.meta-item .meta-value--muted{color:var(--text-muted)} +.kpi-detail{display:flex;flex-wrap:wrap;gap:3px;margin-top:2px} +.kpi-detail code{font-size:.78rem} +.kpi-micro{display:inline-flex;align-items:center;gap:3px;font-size:.62rem; + padding:1px 5px;border-radius:var(--radius-sm);background:var(--bg-raised); + white-space:nowrap;line-height:1.3;font-family:inherit} +.kpi-micro-val{font-family:inherit;font-weight:500;font-variant-numeric:tabular-nums; + color:var(--text-muted)} +.kpi-micro-lbl{font-weight:400;color:var(--text-muted);text-transform:lowercase} +.kpi-micro--baselined{color:var(--success);font-weight:500;font-size:.6rem} +.kpi-delta{font-size:.62rem;font-weight:700;margin-left:auto; + padding:1px 5px;border-radius:var(--radius-sm);white-space:nowrap} +.kpi-delta--good{color:var(--success);background:var(--success-muted)} +.kpi-delta--bad{color:var(--error);background:var(--error-muted)} +.kpi-delta--neutral{color:var(--text-muted);background:var(--bg-raised)} +.kpi-help{display:inline-flex;align-items:center;justify-content:center;width:15px;height:15px; + font-size:.6rem;font-weight:600;border-radius:50%;background:none; + color:var(--text-muted);cursor:help;position:relative;border:1.5px solid var(--border); + opacity:.5;transition:opacity var(--dur-fast) var(--ease)} +.kpi-help:hover{opacity:1} +.kpi-tooltip{position:fixed;z-index:9999;pointer-events:none; + background:var(--bg-overlay);color:var(--text-primary); + padding:var(--sp-2) var(--sp-3);border-radius:var(--radius-md);font-size:.75rem;font-weight:400; + white-space:normal;width:max-content;max-width:240px;line-height:1.4; + box-shadow:var(--shadow-md);border:1px solid var(--border)} + +/* Tone variants */ +.meta-item.tone-ok{border-left:3px solid var(--success)} +.meta-item.tone-warn{border-left:3px solid var(--warning)} +.meta-item.tone-risk{border-left:3px solid var(--error)} + +/* Clusters */ +.overview-cluster{margin-bottom:var(--sp-5)} +.overview-cluster-header{margin-bottom:var(--sp-3)} +.overview-cluster-copy{font-size:.82rem;color:var(--text-muted);margin-top:2px} +.overview-cluster-empty{display:flex;flex-direction:column;align-items:center;gap:var(--sp-2); + padding:var(--sp-5);text-align:center;color:var(--text-muted);font-size:.85rem} +.empty-icon{color:var(--success);opacity:.35;width:32px;height:32px;flex-shrink:0} +.overview-list{display:grid;grid-template-columns:repeat(2,1fr);gap:var(--sp-2)} + +/* Overview rows */ +.overview-row{display:flex;flex-direction:column;gap:var(--sp-1); + padding:var(--sp-3) var(--sp-4);background:var(--bg-surface);border:1px solid var(--border); + border-radius:var(--radius-lg);transition:border-color var(--dur-fast) var(--ease)} +.overview-row:hover{border-color:var(--border-strong)} +.overview-row[data-severity="critical"]{border-left:3px solid var(--error)} +.overview-row[data-severity="warning"]{border-left:3px solid var(--warning)} +.overview-row[data-severity="info"]{border-left:3px solid var(--info)} +.overview-row-head{display:flex;align-items:center;gap:var(--sp-2);flex-wrap:wrap} +.overview-row-spread{font-size:.72rem;font-family:var(--font-mono);color:var(--text-muted); + margin-left:auto;white-space:nowrap} +.overview-row-title{font-weight:600;font-size:.85rem;color:var(--text-primary)} +.overview-row-summary{font-size:.8rem;color:var(--text-secondary);line-height:1.5} + +/* Summary grid */ +.overview-summary-grid{display:grid;gap:var(--sp-3);margin-bottom:var(--sp-3)} +.overview-summary-grid--2col{grid-template-columns:repeat(auto-fit,minmax(280px,1fr))} +.overview-summary-grid--3col{grid-template-columns:repeat(auto-fit,minmax(240px,1fr))} +.overview-summary-item{background:var(--bg-surface); + border:1px solid color-mix(in srgb,var(--border) 78%,transparent); + border-radius:var(--radius-lg);padding:var(--sp-4)} +.overview-summary-label{display:flex;align-items:center;gap:var(--sp-2); + font-size:.82rem;font-weight:700;text-transform:none; + letter-spacing:normal;color:var(--text-secondary);margin-bottom:var(--sp-3); + padding-bottom:var(--sp-2); + border-bottom:1px solid color-mix(in srgb,var(--border) 58%,transparent); + font-family:var(--font-display)} +.overview-summary-item > :not(.overview-summary-label){font-family:var(--font-mono)} +.summary-icon{flex-shrink:0;opacity:.6} +.summary-icon--risk{color:var(--warning)} +.summary-icon--info{color:var(--accent-primary)} +.overview-summary-list{display:flex;flex-direction:column;gap:var(--sp-2)} +.overview-summary-list li{font-size:.82rem;color:var(--text-secondary); + padding-left:var(--sp-3);position:relative;line-height:1.5} +.overview-summary-list li::before{content:"\\2022";position:absolute;left:0;color:var(--text-muted)} +.overview-summary-value{font-size:.85rem;color:var(--text-muted)} +/* Compact stat grid used inside overview-summary-item cards (Coverage Join). */ +.overview-stat-row{display:grid;grid-template-columns:repeat(auto-fit,minmax(84px,1fr)); + gap:var(--sp-3);align-items:end} +.overview-stat{display:flex;flex-direction:column;gap:2px;min-width:0} +.overview-stat-value{font-family:var(--font-numeric);font-size:1.4rem;font-weight:680; + color:var(--text-primary);font-variant-numeric:tabular-nums;line-height:1.12; + letter-spacing:-0.01em} +.overview-stat-label{font-size:.68rem;font-weight:500;color:var(--text-muted); + text-transform:uppercase;letter-spacing:.04em} +.overview-stat-caption{margin-top:var(--sp-3);font-size:.72rem;color:var(--text-muted); + line-height:1.4} +.overview-stat-caption code{font-family:var(--font-mono);font-size:.68rem; + padding:1px 4px;border-radius:var(--radius-sm);background:var(--bg-raised); + color:var(--text-secondary)} +.overview-stat-row + .kpi-detail{margin-top:var(--sp-2)} +/* Fact-list: compact label ··· value rows inside overview-summary-item cards. */ +.overview-fact-list{display:flex;flex-direction:column;gap:var(--sp-2)} +.overview-fact-row{display:flex;align-items:baseline;justify-content:space-between;gap:var(--sp-3); + font-size:.8rem;padding-bottom:6px;border-bottom:1px solid var(--border)} +.overview-fact-row:last-child{padding-bottom:0;border-bottom:none} +.overview-fact-label{color:var(--text-muted)} +.overview-fact-value{display:inline-flex;align-items:baseline;gap:6px; + color:var(--text-primary);font-weight:600;font-variant-numeric:tabular-nums;text-align:right} +.overview-fact-delta{font-size:.68rem;font-weight:400;color:var(--text-muted)} +.overview-fact-value--warn{color:var(--warning)} +.overview-fact-value--good{color:var(--success)} +/* Source breakdown bars */ +.breakdown-list{display:flex;flex-direction:column;gap:var(--sp-2)} +.breakdown-row{display:grid;grid-template-columns:6.5rem 2rem 1fr;align-items:center;gap:var(--sp-2)} +.breakdown-row .source-kind-badge{justify-content:center;min-width:0;width:100%;text-align:center} +.breakdown-count{font-size:.8rem;font-weight:600;font-variant-numeric:tabular-nums; + color:var(--text-primary);text-align:right} +.breakdown-bar-track{height:6px;border-radius:3px;background:var(--bg-raised);overflow:hidden} +.breakdown-bar-fill{display:block;height:100%;border-radius:3px; + background:var(--accent-primary);transition:width .6s var(--ease)} +/* Directory hotspot entries */ +.dir-hotspot-list{display:flex;flex-direction:column;gap:0} +.dir-hotspot-entry{padding:var(--sp-2) 0;border-bottom:1px solid color-mix(in srgb,var(--border) 50%,transparent)} +.dir-hotspot-entry:last-child{border-bottom:none;padding-bottom:0} +.dir-hotspot-entry:first-child{padding-top:0} +/* Row 1: path + badge */ +.dir-hotspot-head{display:flex;align-items:center;gap:var(--sp-2);min-width:0} +.dir-hotspot-path{font-size:.78rem;font-weight:600;color:var(--text-primary);line-height:1.3; + overflow:hidden;text-overflow:ellipsis;white-space:nowrap;min-width:0;flex:1} +/* Row 2: bar + pct + meta */ +.dir-hotspot-detail{display:flex;align-items:center;gap:var(--sp-2);margin-top:3px} +.dir-hotspot-bar-track{width:30%;flex-shrink:0;height:4px;border-radius:2px; + background:var(--bg-raised);overflow:hidden;display:flex} +.dir-hotspot-bar-prev{height:100%;background:var(--text-muted);opacity:.18} +.dir-hotspot-bar-cur{height:100%;background:var(--accent-primary);opacity:.7} +.dir-hotspot-pct{font-size:.72rem;font-weight:600;font-variant-numeric:tabular-nums; + color:var(--text-secondary);white-space:nowrap;flex-shrink:0} +.dir-hotspot-meta{display:flex;flex-wrap:wrap;gap:4px 6px;font-size:.68rem;color:var(--text-muted)} +.dir-hotspot-meta span{font-variant-numeric:tabular-nums} +.dir-hotspot-meta-sep{opacity:.3} +.overloaded-module-list{display:flex;flex-direction:column;gap:0} +.overloaded-module-entry{padding:var(--sp-2) 0;border-bottom:1px solid color-mix(in srgb,var(--border) 50%,transparent)} +.overloaded-module-entry:last-child{border-bottom:none;padding-bottom:0} +.overloaded-module-entry:first-child{padding-top:0} +.overloaded-module-head{display:flex;align-items:flex-start;justify-content:space-between;gap:var(--sp-2);margin-bottom:4px} +.overloaded-module-title{display:flex;align-items:center;flex-wrap:wrap;gap:var(--sp-2);min-width:0} +.overloaded-module-title code{font-size:.78rem;font-weight:600;color:var(--text-primary);line-height:1.35} +.overloaded-module-score{flex-shrink:0;font-size:.68rem;font-weight:700;font-variant-numeric:tabular-nums; + color:var(--accent-primary);background:var(--accent-muted);border-radius:var(--radius-sm);padding:2px var(--sp-2)} +.overloaded-module-metrics{display:flex;flex-wrap:wrap;gap:6px;font-size:.68rem;color:var(--text-muted)} +.overloaded-module-metrics span{font-variant-numeric:tabular-nums} +.overloaded-module-reasons,.overloaded-module-signal-list{display:flex;flex-wrap:wrap;gap:var(--sp-1);margin-top:var(--sp-2)} +.overloaded-module-reason-chip,.overloaded-module-signal-pill{display:inline-flex;align-items:center;gap:5px; + font-size:.68rem;font-weight:500;color:var(--text-secondary);background:var(--bg-raised); + border:1px solid color-mix(in srgb,var(--border) 60%,transparent);border-radius:var(--radius-sm); + padding:2px var(--sp-2)} +.overloaded-module-signal-count{font-variant-numeric:tabular-nums;color:var(--text-muted)} +/* Health radar chart */ +.health-radar{display:flex;justify-content:center;padding:var(--sp-3) 0} +.health-radar svg{width:100%;max-width:520px;height:auto;overflow:visible} +.health-radar text{font-size:10.5px;font-family:var(--font-mono);fill:var(--text-secondary); + font-weight:500} +.health-radar .radar-score{font-weight:600;font-variant-numeric:tabular-nums;fill:var(--text-secondary)} +.health-radar .radar-label--weak{fill:var(--error)} +.health-radar .radar-label--weak .radar-score{fill:var(--error)} +.health-radar-legend{font-size:.78rem;color:var(--text-secondary);text-align:center; + margin-top:var(--sp-2);max-width:520px;margin-left:auto;margin-right:auto} +/* Findings by family bars */ +.families-list{display:flex;flex-direction:column;gap:var(--sp-2)} +.families-row{display:grid;grid-template-columns:5.5rem 2rem 1fr auto;align-items:center;gap:var(--sp-2)} +.families-row--muted{opacity:.65} +.families-label{font-size:.75rem;font-weight:500;color:var(--text-secondary);text-align:right} +.families-count{font-size:.8rem;font-weight:600;font-variant-numeric:tabular-nums; + color:var(--text-primary);text-align:right} +.breakdown-bar-track{display:flex} +.breakdown-bar-fill--baselined{opacity:.5} +.breakdown-bar-fill--new{border-radius:0 3px 3px 0} +.families-delta{font-size:.65rem;font-weight:600;font-variant-numeric:tabular-nums;white-space:nowrap} +.families-delta--ok{color:var(--success)} +.families-delta--new{color:var(--error)} +""" + +# --------------------------------------------------------------------------- +# Dependencies (SVG graph) +# --------------------------------------------------------------------------- + +_DEPENDENCIES = """\ +.stat-cards,.dep-stats{display:grid;grid-template-columns:repeat(auto-fit,minmax(160px,1fr)); + gap:var(--sp-2);margin-bottom:var(--sp-4)} +.stat-cards .meta-item,.dep-stats .meta-item{display:grid;grid-template-rows:auto 1fr auto;min-height:100px} +.stat-cards .meta-item .meta-label,.dep-stats .meta-item .meta-label{font-size:.72rem;min-height:18px} +.stat-cards .meta-item .meta-value,.dep-stats .meta-item .meta-value{display:flex;align-items:center} +.stat-cards .kpi-detail,.dep-stats .kpi-detail{margin-top:0;align-self:end} +.dep-graph-wrap{overflow:hidden;margin-bottom:var(--sp-4);border:1px solid var(--border); + border-radius:var(--radius-lg);background:var(--bg-surface);padding:var(--sp-4)} +.dep-graph-svg{width:100%;height:auto;max-height:520px} +.dep-graph-svg text{fill:var(--text-secondary);font-family:var(--font-mono)} +.dep-node{transition:fill-opacity var(--dur-fast) var(--ease)} +.dep-edge{transition:stroke-opacity var(--dur-fast) var(--ease)} +.dep-label{transition:fill var(--dur-fast) var(--ease)} + +/* Hub bar */ +.dep-hub-bar{display:flex;align-items:center;gap:var(--sp-2);flex-wrap:wrap; + margin-bottom:var(--sp-4);padding:var(--sp-2) var(--sp-4);background:var(--bg-raised); + border-radius:var(--radius-lg);border:1px solid var(--border)} +.dep-hub-label{font-size:.75rem;font-weight:600;text-transform:uppercase;letter-spacing:.05em; + color:var(--text-muted)} +.dep-hub-pill{display:inline-flex;align-items:center;gap:var(--sp-1);padding:var(--sp-1) var(--sp-2); + border-radius:var(--radius-sm);background:var(--bg-overlay);font-size:.8rem} +.dep-hub-name{color:var(--text-primary);font-family:var(--font-mono);font-size:.8rem} +.dep-hub-deg{font-size:.68rem;font-weight:600;color:var(--accent-primary); + background:var(--accent-muted);padding:2px var(--sp-2);border-radius:var(--radius-sm)} + +/* Legend */ +.dep-legend{display:flex;gap:var(--sp-4);align-items:center;margin-bottom:var(--sp-4); + padding:var(--sp-2) var(--sp-4);font-size:.8rem;color:var(--text-muted)} +.dep-legend-item{display:inline-flex;align-items:center;gap:var(--sp-1)} +.dep-legend-item svg{flex-shrink:0} + +/* Chain flow */ +.chain-flow{display:inline-flex;align-items:center;gap:var(--sp-1);flex-wrap:wrap} +.chain-node{font-family:var(--font-mono);font-size:.8rem;color:var(--text-primary); + padding:0 var(--sp-1);background:var(--bg-overlay);border-radius:var(--radius-sm)} +.chain-arrow{color:var(--text-muted);font-size:.75rem} +""" + +# --------------------------------------------------------------------------- +# Novelty controls +# --------------------------------------------------------------------------- + +_NOVELTY = """\ +/* Slim inline baseline-split bar (replaces the old boxed section chrome). */ +.novelty-bar{display:flex;align-items:center;gap:var(--sp-3);flex-wrap:wrap; + margin-bottom:var(--sp-3);padding:var(--sp-2) 0; + border-bottom:1px solid var(--border)} +.novelty-bar-tabs{display:inline-flex;gap:var(--sp-1)} +.novelty-bar-note{font-size:.78rem;color:var(--text-muted);line-height:1.4} +.novelty-tab{transition:all var(--dur-fast) var(--ease)} +.novelty-tab.active{background:var(--accent-primary);color:white;border-color:var(--accent-primary)} +.novelty-tab[data-novelty-state="good"]{color:var(--success);border-color:var(--success);background:var(--success-muted)} +.novelty-tab[data-novelty-state="good"].active{background:var(--success);color:white;border-color:var(--success)} +.novelty-tab[data-novelty-state="bad"]{color:var(--error);border-color:var(--error);background:var(--error-muted)} +.novelty-tab[data-novelty-state="bad"].active{background:var(--error);color:white;border-color:var(--error)} +.novelty-count{font-size:.72rem;font-weight:600;background:rgba(255,255,255,.15);padding:0 var(--sp-1); + border-radius:var(--radius-sm);margin-left:var(--sp-1)} + +/* Hidden by novelty filter */ +.group[data-novelty-hidden="true"]{display:none} +""" + +# --------------------------------------------------------------------------- +# Dead-code +# --------------------------------------------------------------------------- + +_DEAD_CODE = """\ +/* No custom overrides — uses shared table + tabs */ +""" + +# --------------------------------------------------------------------------- +# Suggestions +# --------------------------------------------------------------------------- + +_SUGGESTIONS = """\ +/* List layout */ +.suggestions-list{display:flex;flex-direction:column;gap:var(--sp-2)} + +/* Card — full-width row */ +.suggestion-card{background:var(--bg-surface);border:1px solid var(--border);border-radius:var(--radius-lg); + overflow:hidden;transition:border-color var(--dur-fast) var(--ease),box-shadow var(--dur-fast) var(--ease)} +.suggestion-card:hover{border-color:var(--border-strong);box-shadow:var(--shadow-sm)} +.suggestion-card[data-severity="critical"]{border-left:3px solid var(--error)} +.suggestion-card[data-severity="warning"]{border-left:3px solid var(--warning)} +.suggestion-card[data-severity="info"]{border-left:3px solid var(--info)} + +/* Header row: severity pill · title · meta badges */ +.suggestion-head{padding:var(--sp-3) var(--sp-4);display:flex;align-items:center; + gap:var(--sp-2);flex-wrap:wrap} +.suggestion-sev{font-size:.68rem;font-weight:600;text-transform:uppercase;letter-spacing:.04em; + padding:2px var(--sp-2);border-radius:var(--radius-sm);white-space:nowrap} +.suggestion-sev--critical{background:var(--error-muted);color:var(--error)} +.suggestion-sev--warning{background:var(--warning-muted);color:var(--warning)} +.suggestion-sev--info{background:var(--info-muted);color:var(--info)} +.suggestion-sev-inline{font-size:.68rem;font-weight:600;padding:2px var(--sp-2); + border-radius:var(--radius-sm)} +.suggestion-title{font-weight:600;font-size:.85rem;color:var(--text-primary);flex:1;min-width:0} +.suggestion-meta{display:flex;align-items:center;gap:var(--sp-2);flex-shrink:0;flex-wrap:wrap} +.suggestion-meta-badge{font-size:.68rem;font-weight:600;padding:2px var(--sp-2); + border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--text-muted); + white-space:nowrap;line-height:1.2;font-variant-numeric:tabular-nums} +.suggestion-effort--easy{color:var(--success);background:var(--success-muted, rgba(34,197,94,.1))} +.suggestion-effort--moderate{color:var(--warning);background:var(--warning-muted)} +.suggestion-effort--hard{color:var(--error);background:var(--error-muted)} + +/* Body — context + summary */ +.suggestion-body{padding:0 var(--sp-4) var(--sp-3);display:flex;flex-direction:column;gap:var(--sp-1)} +.suggestion-context{display:flex;gap:var(--sp-1);flex-wrap:wrap} +.suggestion-chip{font-size:.68rem;font-weight:500;padding:2px var(--sp-2);border-radius:var(--radius-sm); + background:var(--bg-overlay);color:var(--text-muted);white-space:nowrap} +.suggestion-summary{font-size:.8rem;font-family:var(--font-mono);color:var(--text-secondary);line-height:1.5} +.suggestion-action{display:flex;align-items:center;gap:var(--sp-1); + font-size:.8rem;font-weight:500;color:var(--accent-primary);margin-top:var(--sp-1)} +.suggestion-action-icon{flex-shrink:0;color:var(--accent-primary)} + +/* Expandable details */ +.suggestion-details{border-top:1px solid var(--border)} +.suggestion-details summary{padding:var(--sp-2) var(--sp-4);font-size:.75rem;font-weight:500; + color:var(--text-muted);cursor:pointer;display:flex;align-items:center;gap:var(--sp-2); + background:none;user-select:none} +.suggestion-details summary:hover{color:var(--text-primary);background:var(--bg-raised)} +.suggestion-details[open] summary{border-bottom:1px solid var(--border)} +.suggestion-details-body{padding:var(--sp-3) var(--sp-4);display:flex;flex-direction:column;gap:var(--sp-3)} + +/* Facts grid inside details */ +.suggestion-facts{display:grid;grid-template-columns:repeat(auto-fit,minmax(200px,1fr));gap:var(--sp-3)} +.suggestion-fact-group{display:flex;flex-direction:column;gap:var(--sp-1)} +.suggestion-fact-group-title{font-size:.68rem;font-weight:600;text-transform:uppercase; + letter-spacing:.05em;color:var(--text-muted);padding-bottom:var(--sp-1);border-bottom:1px solid var(--border)} +.suggestion-dl{display:flex;flex-direction:column;gap:2px} +.suggestion-dl div{display:flex;gap:var(--sp-2);align-items:baseline} +.suggestion-dl dt{font-size:.72rem;color:var(--text-muted);white-space:nowrap;min-width:60px} +.suggestion-dl dd{font-size:.78rem;font-family:var(--font-mono);color:var(--text-primary);word-break:break-word} + +/* Locations & steps inside details */ +.suggestion-locations{display:flex;flex-direction:column;gap:var(--sp-1)} +.suggestion-locations li{display:flex;gap:var(--sp-2);align-items:baseline; + padding:2px 0;border-bottom:1px solid var(--border);line-height:1.4} +.suggestion-locations li:last-child{border-bottom:none} +.suggestion-loc-path{font-family:var(--font-mono);font-size:.75rem;color:var(--text-secondary)} +.suggestion-loc-lines{color:var(--text-muted)} +.suggestion-loc-name{font-family:var(--font-mono);font-size:.72rem;color:var(--text-muted); + margin-left:auto} +.suggestion-steps{padding-left:var(--sp-4);display:flex;flex-direction:column;gap:var(--sp-1);list-style:decimal} +.suggestion-steps li{font-size:.78rem;color:var(--text-secondary)} +.suggestion-sub-title{font-size:.72rem;font-weight:600;text-transform:uppercase;letter-spacing:.04em; + color:var(--text-muted);margin-bottom:var(--sp-1)} + +.suggestion-empty{padding:var(--sp-4);text-align:center;color:var(--text-muted);font-size:.85rem} + +/* Hidden cards */ +.suggestion-card[data-filter-hidden="true"]{display:none} +""" + +# --------------------------------------------------------------------------- +# Structural findings +# --------------------------------------------------------------------------- + +_STRUCTURAL = """\ +/* Structural findings — list layout */ +.sf-list{display:flex;flex-direction:column;gap:var(--sp-2)} +.sf-card{background:var(--bg-surface);border:1px solid var(--border);border-left:3px solid var(--info); + border-radius:var(--radius-lg); + overflow:hidden;transition:border-color var(--dur-fast) var(--ease),box-shadow var(--dur-fast) var(--ease)} +.sf-card:hover{border-color:var(--border-strong);box-shadow:var(--shadow-sm)} + +/* Header row */ +.sf-head{padding:var(--sp-3) var(--sp-4);display:flex;align-items:center;gap:var(--sp-2);flex-wrap:wrap} +.sf-kind-badge{font-size:.68rem;font-weight:600;text-transform:uppercase;letter-spacing:.03em; + padding:2px var(--sp-2);border-radius:var(--radius-sm);white-space:nowrap; + background:var(--info-muted);color:var(--info)} +.sf-title{font-weight:600;font-size:.85rem;color:var(--text-primary);flex:1;min-width:0} +.sf-meta{display:flex;align-items:center;gap:var(--sp-1);flex-shrink:0;flex-wrap:wrap} +.sf-why-btn{font-size:.72rem;color:var(--accent-primary);font-weight:500} + +/* Body */ +.sf-body{padding:0 var(--sp-4) var(--sp-3);display:flex;flex-direction:column;gap:var(--sp-2)} +.sf-chips{display:flex;flex-wrap:wrap;gap:var(--sp-1)} +.sf-scope-text{font-size:.8rem;font-family:var(--font-mono);color:var(--text-secondary)} +.sf-inline-action{display:flex;align-items:flex-start;gap:var(--sp-2);padding:var(--sp-2) var(--sp-3); + border:1px solid var(--border);border-radius:var(--radius-md);background:var(--bg-raised)} +.sf-inline-action-label{font-size:.72rem;font-weight:600;letter-spacing:.02em;text-transform:uppercase; + color:var(--accent-primary);white-space:nowrap} +.sf-inline-action-text{font-size:.8rem;color:var(--text-secondary);line-height:1.45} + +/* Expandable occurrences */ +.sf-details{border-top:1px solid var(--border)} +.sf-details summary{padding:var(--sp-2) var(--sp-4);font-size:.75rem;font-weight:500; + color:var(--text-muted);cursor:pointer;display:flex;align-items:center;gap:var(--sp-2); + background:none;user-select:none} +.sf-details summary:hover{color:var(--text-primary);background:var(--bg-raised)} +.sf-details[open] summary{border-bottom:1px solid var(--border)} +.sf-details-body{padding:0} +.sf-details-body .table-wrap{border:none;border-radius:0} +.sf-table .col-num{white-space:nowrap} +.sf-table{table-layout:fixed} + +.sf-kind-meta{font-weight:normal;font-size:.8rem;color:var(--text-muted)} +.subsection-title{font-size:.95rem;margin:var(--sp-4) 0 var(--sp-2)} +.finding-occurrences-more summary{font-size:.8rem;color:var(--accent-primary);cursor:pointer; + padding:var(--sp-1) var(--sp-3)} +.sf-card[data-filter-hidden="true"]{display:none} +/* Finding Why modal */ +.finding-why-modal{max-width:720px;width:92vw;max-height:85vh} +.finding-why-modal .modal-head{display:flex;align-items:center;justify-content:space-between; + padding:var(--sp-3) var(--sp-4);border-bottom:1px solid var(--border);flex-shrink:0} +.finding-why-modal .modal-head h2{font-size:1rem;font-weight:600} +.finding-why-modal .modal-body{padding:var(--sp-3) var(--sp-4);overflow-y:auto;flex:1 1 auto;min-height:0} +.metrics-section{margin-bottom:var(--sp-3)} +.metrics-section-title{font-size:.75rem;font-weight:600;text-transform:uppercase;letter-spacing:.04em; + color:var(--text-muted);margin-bottom:var(--sp-1);padding-bottom:3px;border-bottom:1px solid var(--border)} +.finding-why-text{font-size:.85rem;color:var(--text-secondary);line-height:1.5;margin:var(--sp-1) 0} +.finding-why-list{font-size:.82rem;color:var(--text-secondary);line-height:1.5; + list-style:disc;padding-left:var(--sp-5);margin:var(--sp-1) 0} +.finding-why-list li{margin-bottom:2px} +.finding-why-note{font-size:.78rem;color:var(--text-muted);margin-bottom:var(--sp-2)} +.finding-why-examples{display:flex;flex-direction:column;gap:var(--sp-2)} +.finding-why-example{border:1px solid var(--border);border-radius:var(--radius-md);overflow:hidden} +.finding-why-example-head{display:flex;align-items:center;gap:var(--sp-2);padding:var(--sp-1) var(--sp-3); + background:var(--bg-raised);font-size:.78rem;border-bottom:1px solid var(--border)} +.finding-why-example-label{font-weight:600;color:var(--text-primary)} +.finding-why-example-meta{color:var(--text-muted);font-family:var(--font-mono);font-size:.72rem} +.finding-why-example-loc{margin-left:auto;color:var(--text-muted);font-family:var(--font-mono);font-size:.72rem} +""" + +# --------------------------------------------------------------------------- +# Report provenance / meta panel +# --------------------------------------------------------------------------- + +_META_PANEL = """\ +/* Provenance section cards */ +.prov-section{margin-bottom:var(--sp-3);background:var(--bg-surface); + border-radius:var(--radius-md);padding:var(--sp-3) var(--sp-4) var(--sp-2); + border:1px solid var(--border); + box-shadow:0 1px 2px color-mix(in srgb,var(--text-primary) 3%,transparent)} +.prov-section:last-child{margin-bottom:0} +.prov-section-title{font-size:.66rem;font-weight:700;text-transform:uppercase;letter-spacing:.09em; + color:var(--text-secondary);margin:0 calc(-1*var(--sp-4)) var(--sp-2); + padding:0 var(--sp-4) var(--sp-2);border:none; + border-bottom:1px solid color-mix(in srgb,var(--border) 60%,transparent); + display:flex;align-items:center;gap:6px} +.prov-section-title svg{width:13px;height:13px;opacity:.7;flex-shrink:0; + color:var(--accent-primary)} +.prov-table{width:100%;border-collapse:collapse;font-size:.8rem} +.prov-table tr:not(:last-child){border-bottom:1px solid color-mix(in srgb,var(--border) 25%,transparent)} +.prov-table tr:hover{background:color-mix(in srgb,var(--accent-primary) 3%,transparent)} +.prov-td-label{padding:6px 0;color:var(--text-muted);white-space:nowrap;width:40%; + vertical-align:top;font-weight:500;font-size:.76rem;letter-spacing:.002em} +.prov-td-value{padding:6px 0 6px var(--sp-2);color:var(--text-primary);word-break:break-all; + font-family:var(--font-mono);font-size:.72rem;vertical-align:top} + +/* Provenance summary badges */ +.prov-summary{display:flex;flex-wrap:wrap;align-items:center;gap:6px; + padding:var(--sp-2) var(--sp-4);border-top:1px solid var(--border)} +.prov-badge{display:inline-flex;align-items:center;gap:4px;font-size:.68rem; + padding:2px var(--sp-2);border-radius:var(--radius-sm);background:var(--bg-raised); + white-space:nowrap;line-height:1.3;border:1px solid color-mix(in srgb,var(--border) 55%,transparent); + font-family:var(--font-mono);letter-spacing:.005em} +.prov-badge-val{font-weight:600;font-variant-numeric:tabular-nums;color:var(--text-primary)} +.prov-badge-lbl{font-weight:400;color:var(--text-muted);text-transform:lowercase} +.prov-badge--inline{padding:2px 8px} +.prov-badge--inline .prov-badge-val{font-weight:500} +.prov-badge--green{background:var(--success-muted);border-color:color-mix(in srgb,var(--success) 20%,transparent)} +.prov-badge--green .prov-badge-val{color:var(--success)} +.prov-badge--red{background:var(--error-muted);border-color:color-mix(in srgb,var(--error) 20%,transparent)} +.prov-badge--red .prov-badge-val{color:var(--error)} +.prov-badge--amber{background:var(--warning-muted);border-color:color-mix(in srgb,var(--warning) 20%,transparent)} +.prov-badge--amber .prov-badge-val{color:var(--warning)} +.prov-badge--neutral{background:var(--bg-overlay);border-color:color-mix(in srgb,var(--border) 75%,transparent)} +.prov-badge--neutral .prov-badge-val{color:var(--text-secondary)} +.prov-explain{font-size:.62rem;color:var(--text-muted);margin-left:auto;font-style:italic} + +/* Truncated long values (paths, sha256) in provenance table */ +.prov-mono-trunc{font-family:var(--font-mono);font-size:.72rem;color:var(--text-primary); + background:var(--bg-body);padding:2px 6px;border-radius:var(--radius-sm); + border:1px solid color-mix(in srgb,var(--border) 45%,transparent); + white-space:nowrap;overflow:hidden;text-overflow:ellipsis; + max-width:100%;vertical-align:middle;letter-spacing:.01em} +.prov-td-value:has(.prov-mono-trunc){display:flex;align-items:center;gap:var(--sp-1);flex-wrap:nowrap; + min-width:0} +.prov-copy-btn{display:inline-flex;align-items:center;justify-content:center; + width:22px;height:22px;padding:0;background:none;border:1px solid transparent; + border-radius:var(--radius-sm);color:var(--text-muted);cursor:pointer; + transition:all var(--dur-fast) var(--ease);flex-shrink:0} +.prov-copy-btn:hover{color:var(--text-primary);background:var(--bg-overlay); + border-color:color-mix(in srgb,var(--border) 70%,transparent)} +.prov-copy-btn:focus-visible{outline:2px solid var(--accent-primary);outline-offset:1px} +.prov-copy-btn--ok{color:var(--success);background:var(--success-muted); + border-color:color-mix(in srgb,var(--success) 30%,transparent)} +.prov-copy-btn svg{width:12px;height:12px} +""" + + +# --------------------------------------------------------------------------- +# Shared micro-interactions +# --------------------------------------------------------------------------- + +_MICRO_INTERACTIONS = """\ +/* Shared card micro-interactions */ +.meta-item,.overview-row,.overview-summary-item,.group,.suggestion-card,.sf-card,.prov-section{ + --card-hover-accent:var(--accent-primary); + --card-outline:color-mix(in oklch,var(--card-hover-accent) 24%,transparent); + --card-hover-shadow: + 0 10px 24px color-mix(in srgb,var(--card-hover-accent) 8%,transparent), + var(--shadow-md); + transform:translateY(0); + transition:transform var(--dur-fast) var(--ease), + border-color var(--dur-fast) var(--ease), + box-shadow var(--dur-fast) var(--ease)} +@media (hover:hover) and (pointer:fine){ + .meta-item:hover,.overview-row:hover,.overview-summary-item:hover,.group:hover,.suggestion-card:hover,.sf-card:hover,.prov-section:hover{ + transform:translateY(-2px); + border-color:color-mix(in oklch,var(--card-hover-accent) 22%,var(--border-strong)); + box-shadow:0 0 0 1px var(--card-outline),var(--card-hover-shadow)} +} +@media (prefers-reduced-motion:reduce){ + .meta-item,.overview-row,.overview-summary-item,.group,.suggestion-card,.sf-card,.prov-section{ + transition:border-color var(--dur-fast) var(--ease), + box-shadow var(--dur-fast) var(--ease)} + .meta-item:hover,.overview-row:hover,.overview-summary-item:hover,.group:hover,.suggestion-card:hover,.sf-card:hover,.prov-section:hover{ + transform:none} +} +""" + +# --------------------------------------------------------------------------- +# Empty states +# --------------------------------------------------------------------------- + +_EMPTY = """\ +.empty{display:flex;align-items:center;justify-content:center;padding:var(--sp-10)} +.empty-card{text-align:center;max-width:400px} +.empty-icon{margin-bottom:var(--sp-3);color:var(--success)} +.empty-icon svg{width:40px;height:40px} +.empty-card h2{margin-bottom:var(--sp-2)} +.empty-card p{color:var(--text-secondary);font-size:.9rem} +.tab-empty{display:flex;flex-direction:column;align-items:center;justify-content:center; + padding:var(--sp-10);text-align:center;font-family:var(--font-sans)} +.tab-empty-icon{color:var(--text-muted);opacity:.4;margin-bottom:var(--sp-3);width:48px;height:48px} +.tab-empty-title{font-size:1rem;font-weight:600;color:var(--text-primary);margin-bottom:var(--sp-1); + font-family:var(--font-display)} +.tab-empty-desc{font-size:.85rem;color:var(--text-muted);max-width:320px;font-family:var(--font-sans)} +.tab-empty-desc-detail{text-align:left;max-width:520px;font-size:.8rem;word-break:break-word; + font-family:var(--font-sans)} +.tab-empty-reason{display:block;margin-top:var(--sp-1);font-size:.75rem;color:var(--text-muted); + opacity:.7;word-break:break-all;font-family:var(--font-mono, monospace)} + +/* Inline empty state — compact stacked variant for cards/summary items. + No background/border — sits inside its parent card. Icon color carries tone. */ +.inline-empty{display:flex;flex-direction:column;align-items:center;justify-content:center; + gap:var(--sp-2);padding:var(--sp-4) var(--sp-3);min-height:72px; + color:var(--text-muted);font-size:.82rem;font-weight:500; + text-align:center;letter-spacing:.005em;line-height:1.4;font-family:var(--font-sans)} +.inline-empty-icon{flex-shrink:0;opacity:.5;color:var(--text-muted)} +.inline-empty-text{max-width:260px} +.inline-empty--good .inline-empty-icon{color:var(--success);opacity:.7} +.inline-empty--neutral .inline-empty-icon{color:var(--text-muted);opacity:.5} +""" + +# --------------------------------------------------------------------------- +# Coupled details +# --------------------------------------------------------------------------- + +_COUPLED = """\ +.coupled-details{display:inline} +.coupled-summary{display:inline;cursor:pointer} +.coupled-summary:hover{color:var(--text-primary)} +.coupled-more{font-size:.75rem;color:var(--text-muted);margin-left:var(--sp-1)} +.coupled-expanded{margin-top:var(--sp-1)} +""" + +# --------------------------------------------------------------------------- +# Modal (dialog) +# --------------------------------------------------------------------------- + +_MODAL = """\ +/* Generic dialog modal — Safari-compatible centering */ +dialog{background:var(--bg-surface);color:var(--text-primary);border:1px solid var(--border); + border-radius:var(--radius-xl);box-shadow:var(--shadow-xl);padding:0;max-width:600px;width:90vw; + max-height:80vh;overflow:hidden} +dialog:not([open]){display:none} +dialog[open]{display:flex;flex-direction:column; + position:fixed;inset:0;margin:auto;z-index:9999} +dialog::backdrop{background:rgba(0,0,0,.5);backdrop-filter:blur(4px);-webkit-backdrop-filter:blur(4px)} +.modal-close{background:none;border:none;cursor:pointer;color:var(--text-muted);padding:var(--sp-1); + font-size:1.25rem;line-height:1} +.modal-close:hover{color:var(--text-primary)} + +/* Info modal (block metrics) */ +#clone-info-modal{max-width:640px;width:92vw;max-height:85vh} +#clone-info-modal .modal-head{display:flex;align-items:center;justify-content:space-between; + padding:var(--sp-3) var(--sp-4);border-bottom:1px solid var(--border)} +#clone-info-modal .modal-head h2{font-size:1rem} +#clone-info-modal .modal-body{padding:var(--sp-3) var(--sp-4);overflow-y:auto;flex:1 1 auto;min-height:0} +.info-dl{display:grid;grid-template-columns:1fr 1fr;gap:0;margin:0} +.info-dl>div{display:flex;justify-content:space-between;gap:var(--sp-2); + padding:var(--sp-2) var(--sp-3);border-bottom:1px solid var(--border)} +.info-dl>div:nth-last-child(-n+2){border-bottom:none} +.info-dl dt{font-size:.8rem;color:var(--text-muted);white-space:nowrap} +.info-dl dd{font-size:.8rem;font-weight:500;color:var(--text-primary);margin:0;text-align:right; + font-family:var(--font-mono)} + +/* Provenance modal */ +dialog.prov-modal{max-width:720px;width:92vw;max-height:86vh;padding:0;overflow:hidden; + border-radius:var(--radius-lg)} +.prov-modal-body{padding:var(--sp-4) var(--sp-5) var(--sp-5);overflow-y:auto;flex:1 1 auto} +.prov-modal .prov-summary{padding:var(--sp-2) var(--sp-5) var(--sp-3); + border-top:none;border-bottom:1px solid var(--border);flex-shrink:0; + background:color-mix(in srgb,var(--bg-raised) 50%,transparent)} + +/* Provenance hero — status header at top of modal */ +.prov-hero{position:relative;display:flex;align-items:center;gap:var(--sp-4); + padding:var(--sp-4) var(--sp-5);flex-shrink:0; + border-bottom:1px solid var(--border); + background:linear-gradient(180deg, + color-mix(in srgb,var(--bg-raised) 55%,transparent) 0%, + var(--bg-surface) 100%)} +.prov-hero-badge{display:inline-flex;align-items:center;gap:7px; + padding:6px 12px 6px 10px;border-radius:999px;font-weight:700;font-size:.78rem; + letter-spacing:.005em;white-space:nowrap;flex-shrink:0; + border:1px solid var(--border);background:var(--bg-surface)} +.prov-hero-icon{flex-shrink:0} +.prov-hero-label{line-height:1} +.prov-hero--green .prov-hero-badge{color:var(--success); + background:color-mix(in srgb,var(--success) 10%,var(--bg-surface)); + border-color:color-mix(in srgb,var(--success) 45%,var(--border))} +.prov-hero--amber .prov-hero-badge{color:var(--warning); + background:color-mix(in srgb,var(--warning) 10%,var(--bg-surface)); + border-color:color-mix(in srgb,var(--warning) 45%,var(--border))} +.prov-hero--red .prov-hero-badge{color:var(--error); + background:color-mix(in srgb,var(--error) 10%,var(--bg-surface)); + border-color:color-mix(in srgb,var(--error) 50%,var(--border))} +.prov-hero--neutral .prov-hero-badge{color:var(--text-secondary)} +.prov-hero-text{display:flex;flex-direction:column;gap:2px;min-width:0;flex:1} +.prov-hero-title{font-size:1.02rem;font-weight:700;letter-spacing:-.01em; + color:var(--text-primary);margin:0;line-height:1.25} +.prov-hero-sub{font-size:.8rem;color:var(--text-secondary);margin:0;line-height:1.35; + overflow:hidden;text-overflow:ellipsis} +.prov-hero-close{flex-shrink:0;align-self:flex-start} + +""" + +# --------------------------------------------------------------------------- +# Command palette +# --------------------------------------------------------------------------- + +_CMD_PALETTE = "" # removed: command palette eliminated + +# --------------------------------------------------------------------------- +# Toast notifications +# --------------------------------------------------------------------------- + +_TOAST = """\ +.toast-container{position:fixed;bottom:var(--sp-6);right:var(--sp-6);z-index:2000; + display:flex;flex-direction:column;gap:var(--sp-2)} +.toast{padding:var(--sp-3) var(--sp-5);background:var(--bg-overlay);border:1px solid var(--border); + border-radius:var(--radius-lg);box-shadow:var(--shadow-lg);font-size:.85rem;color:var(--text-primary); + animation:toast-in var(--dur-slow) var(--ease)} +@keyframes toast-in{from{opacity:0;transform:translateY(8px)}to{opacity:1;transform:none}} +""" + +# --------------------------------------------------------------------------- +# Utility +# --------------------------------------------------------------------------- + +_UTILITY = """\ +/* Responsive */ +@media(max-width:768px){ + .overview-kpi-grid{grid-template-columns:repeat(2,1fr)} + .toolbar{flex-direction:column;align-items:stretch} + .toolbar-left,.toolbar-right{justify-content:flex-start} + .overview-list{grid-template-columns:1fr} + .items{grid-template-columns:1fr} + .items .item{border-right:none} + .overview-row-head{flex-wrap:wrap} + .overview-row-spread{margin-left:0;width:100%} + .suggestion-head{flex-direction:column;align-items:flex-start} + .suggestion-facts{grid-template-columns:1fr} + .sf-head{flex-direction:column;align-items:flex-start} + .sf-meta{width:100%} + .dir-hotspot-head{flex-wrap:wrap;align-items:flex-start} + .dir-hotspot-detail{flex-wrap:wrap;align-items:flex-start} + .dir-hotspot-bar-track{width:min(148px,42%);min-width:96px} + .dir-hotspot-meta{width:100%} + .container{padding:0 var(--sp-3)} + .topbar{position:static} + .topbar-inner{height:auto;padding:var(--sp-2) var(--sp-3);flex-direction:row; + align-items:center;gap:var(--sp-2)} + .brand{flex:1;min-width:0;align-items:center;gap:var(--sp-2)} + .brand-logo{width:24px;height:24px} + .brand-text{gap:0} + .brand h1{font-size:.85rem;line-height:1.25;white-space:nowrap;overflow:hidden;text-overflow:ellipsis} + .brand-project-name{font-size:.78em;padding:0 3px} + .brand-meta{display:none} + .topbar-actions{flex-shrink:0;gap:var(--sp-1)} + .topbar-actions .prov-pill{font-size:0;gap:0;width:32px;height:32px; + padding:0;align-items:center;justify-content:center} + .topbar-actions .prov-pill-label{display:none} + .topbar-actions .prov-pill-icon{opacity:1} + .theme-toggle{font-size:0;gap:0;width:32px;height:32px; + padding:0;align-items:center;justify-content:center} + .theme-toggle svg{width:16px;height:16px} + .ide-picker-btn{font-size:0;gap:0;width:32px;height:32px; + padding:0;align-items:center;justify-content:center} + .ide-picker-btn svg{width:16px;height:16px} + .ide-picker-label{display:none} + .ide-menu{right:0;min-width:140px} + .main-tabs-wrap{position:sticky;top:0;z-index:90;padding:var(--sp-2) 0 0} + .main-tabs{padding:var(--sp-1);gap:2px; + background: + linear-gradient(to right,var(--bg-surface) 30%,transparent) left center / 28px 100% no-repeat local, + linear-gradient(to left,var(--bg-surface) 30%,transparent) right center / 28px 100% no-repeat local, + linear-gradient(to right,rgba(0,0,0,.12),transparent) left center / 10px 100% no-repeat scroll, + linear-gradient(to left,rgba(0,0,0,.12),transparent) right center / 10px 100% no-repeat scroll, + var(--bg-surface)} + .main-tab{flex:none;padding:var(--sp-1) var(--sp-2);font-size:.78rem} + .main-tab-icon{width:13px;height:13px} +} +@media(max-width:480px){ + .overview-kpi-grid{grid-template-columns:1fr} + .search-box input[type="text"]{width:140px} + .brand-logo{width:28px;height:28px} +} + +/* IDE link */ +.ide-link{color:inherit;text-decoration:none;cursor:default} +[data-ide]:not([data-ide=""]) .ide-link{cursor:pointer;color:var(--accent-primary); + text-decoration-line:underline;text-decoration-style:dotted;text-underline-offset:2px} +[data-ide]:not([data-ide=""]) .ide-link:hover{text-decoration-style:solid} + +/* IDE picker dropdown */ +.ide-picker{position:relative;display:inline-flex} +.ide-picker-btn{display:inline-flex;align-items:center;gap:var(--sp-1); + padding:var(--sp-1) var(--sp-3);background:none;border:1px solid var(--border); + border-radius:var(--radius-md);cursor:pointer;color:var(--text-muted);font-size:.85rem; + font-weight:500;font-family:inherit;transition:all var(--dur-fast) var(--ease); + white-space:nowrap} +.ide-picker-btn:hover{color:var(--text-primary);background:var(--bg-raised);border-color:var(--border-strong)} +.ide-picker-btn svg{width:16px;height:16px;flex-shrink:0} +.ide-picker-btn[aria-expanded="true"]{color:var(--accent-primary);border-color:var(--accent-primary)} +.ide-menu{display:none;position:absolute;top:100%;right:0;margin-top:var(--sp-1); + min-width:160px;background:var(--bg-surface);border:1px solid var(--border); + border-radius:var(--radius);box-shadow:0 4px 12px rgba(0,0,0,.15); + z-index:100;padding:var(--sp-1) 0;list-style:none} +.ide-menu[data-open]{display:block} +.ide-menu li{padding:0} +.ide-menu button{display:flex;align-items:center;gap:var(--sp-2);width:100%; + padding:var(--sp-1) var(--sp-3);background:none;border:none;color:var(--text-primary); + font-size:.8rem;font-family:var(--font-sans);cursor:pointer;text-align:left} +.ide-menu button:hover{background:var(--bg-alt)} +.ide-menu button[aria-checked="true"]{color:var(--accent-primary);font-weight:600} +.ide-menu button[aria-checked="true"]::before{content:'\\2713';font-size:.7rem; + width:14px;text-align:center;flex-shrink:0} +.ide-menu button[aria-checked="false"]::before{content:'';width:14px;flex-shrink:0} + +/* Print */ +@media print{ + .topbar,.toolbar,.pagination,.theme-toggle,.toast-container, + .novelty-tabs,.clear-btn,.btn,.ide-picker{display:none!important} + .tab-panel{display:block!important;break-inside:avoid} + .group-body{display:block!important} + body{background:#fff;color:#000} +} +""" + +# --------------------------------------------------------------------------- +# Footer +# --------------------------------------------------------------------------- + +_FOOTER = """\ +.report-footer{margin-top:var(--sp-8);padding:var(--sp-4) 0;border-top:1px solid var(--border); + text-align:center;font-size:.78rem;color:var(--text-muted)} +.report-footer a{color:var(--accent-primary)} +.report-footer-main{display:block} +.report-footer-schemas{margin-top:var(--sp-1);font-size:.72rem;letter-spacing:.01em; + font-variant-numeric:tabular-nums;opacity:.85} +""" + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +_ALL_SECTIONS = ( + _TOKENS_DARK, + _TOKENS_LIGHT, + _RESET, + _LAYOUT, + _CONTROLS, + _SEARCH, + _TOOLBAR, + _INSIGHT, + _TABLES, + _SUB_TABS, + _SECTIONS, + _ITEMS, + _CODE, + _BADGES, + _OVERVIEW, + _DEPENDENCIES, + _NOVELTY, + _DEAD_CODE, + _SUGGESTIONS, + _STRUCTURAL, + _META_PANEL, + _MICRO_INTERACTIONS, + _EMPTY, + _COUPLED, + _MODAL, + _CMD_PALETTE, + _TOAST, + _UTILITY, + _FOOTER, +) + + +def build_css() -> str: + """Return the complete CSS string for the HTML report.""" + return "\n".join(_ALL_SECTIONS) diff --git a/codeclone/_html_report/_assets/js.py b/codeclone/_html_report/_assets/js.py new file mode 100644 index 0000000..0a59b38 --- /dev/null +++ b/codeclone/_html_report/_assets/js.py @@ -0,0 +1,843 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""JavaScript for the HTML report — modular IIFE with feature blocks.""" + +from __future__ import annotations + +# --------------------------------------------------------------------------- +# Core helpers +# --------------------------------------------------------------------------- + +_CORE = """\ +const $=s=>document.querySelector(s); +const $$=s=>[...document.querySelectorAll(s)]; + +/* Shared Filters popover wiring: one button opens a menu, outside-click + + Escape dismiss it. Reused by Clones (per-section) and Suggestions (global). */ +function wireFiltersPopover(toggleEl){ + if(!toggleEl)return; + const popover=toggleEl.parentElement; + if(!popover)return; + const menu=popover.querySelector('.filters-menu'); + if(!menu)return; + function setOpen(open){ + toggleEl.setAttribute('aria-expanded',open?'true':'false'); + if(open)menu.removeAttribute('hidden'); + else menu.setAttribute('hidden',''); + } + toggleEl.addEventListener('click',e=>{ + e.stopPropagation(); + setOpen(toggleEl.getAttribute('aria-expanded')!=='true'); + }); + document.addEventListener('click',e=>{ + if(toggleEl.getAttribute('aria-expanded')!=='true')return; + if(popover.contains(e.target))return; + setOpen(false); + }); + document.addEventListener('keydown',e=>{ + if(e.key!=='Escape')return; + if(toggleEl.getAttribute('aria-expanded')!=='true')return; + setOpen(false); + toggleEl.focus(); + }); +} +window.wireFiltersPopover=wireFiltersPopover; +""" + +# --------------------------------------------------------------------------- +# Theme +# --------------------------------------------------------------------------- + +_THEME = """\ +(function initTheme(){ + const key='codeclone-theme'; + const root=document.documentElement; + const saved=localStorage.getItem(key); + // Always resolve + set data-theme so icon CSS selectors always match. + const initial=saved==='light'||saved==='dark' + ?saved + :(matchMedia('(prefers-color-scheme:dark)').matches?'dark':'light'); + root.setAttribute('data-theme',initial); + + const btn=$('.theme-toggle'); + if(!btn)return; + btn.addEventListener('click',()=>{ + const next=root.getAttribute('data-theme')==='dark'?'light':'dark'; + root.setAttribute('data-theme',next); + localStorage.setItem(key,next); + }); +})(); +""" + +# --------------------------------------------------------------------------- +# Main tabs +# --------------------------------------------------------------------------- + +_TABS = """\ +(function initTabs(){ + const tabs=$$('.main-tab'); + const panels=$$('.tab-panel'); + if(!tabs.length)return; + + function activate(id){ + tabs.forEach(t=>{t.setAttribute('aria-selected',t.dataset.tab===id?'true':'false')}); + panels.forEach(p=>{p.classList.toggle('active',p.id==='panel-'+id)}); + history.replaceState(null,'','#'+id); + } + + tabs.forEach(t=>t.addEventListener('click',()=>activate(t.dataset.tab))); + + // Keyboard: arrow left/right + const tabList=$('[role="tablist"].main-tabs'); + if(tabList){ + tabList.addEventListener('keydown',e=>{ + const idx=tabs.indexOf(document.activeElement); + if(idx<0)return; + let next=-1; + if(e.key==='ArrowRight')next=(idx+1)%tabs.length; + else if(e.key==='ArrowLeft')next=(idx-1+tabs.length)%tabs.length; + if(next>=0){e.preventDefault();tabs[next].focus();activate(tabs[next].dataset.tab)} + }); + } + + // Hash deep-link + const hash=location.hash.slice(1); + const valid=tabs.map(t=>t.dataset.tab); + activate(valid.includes(hash)?hash:valid[0]||''); +})(); +""" + +# --------------------------------------------------------------------------- +# Sub-tabs (clone-nav / split-tabs) +# --------------------------------------------------------------------------- + +_SUB_TABS = """\ +(function initSubTabs(){ + $$('.clone-nav-btn').forEach(btn=>{ + btn.addEventListener('click',()=>{ + const group=btn.dataset.subtabGroup; + if(!group)return; + $$('.clone-nav-btn[data-subtab-group="'+group+'"]').forEach(b=>b.classList.remove('active')); + btn.classList.add('active'); + $$('.clone-panel[data-subtab-group="'+group+'"]').forEach(p=>{ + p.classList.toggle('active',p.dataset.clonePanel===btn.dataset.cloneTab); + }); + }); + }); +})(); +""" + +# --------------------------------------------------------------------------- +# Sections: search, filter, pagination, collapse/expand +# --------------------------------------------------------------------------- + +_SECTIONS = """\ +(function initSections(){ + // Registry so novelty filter can call applyFilters directly (no debounce) + window.__sectionFilters=window.__sectionFilters||{}; + + $$('[data-section]').forEach(sec=>{ + const id=sec.dataset.section; + const groups=[...sec.querySelectorAll('.group[data-group="'+id+'"]')]; + const searchInput=$('#search-'+id); + const pageMeta=sec.querySelector('[data-page-meta="'+id+'"]'); + const pageSizeSelect=sec.querySelector('[data-pagesize="'+id+'"]'); + const sourceKindFilter=sec.querySelector('[data-source-kind-filter="'+id+'"]'); + const cloneTypeFilter=sec.querySelector('[data-clone-type-filter="'+id+'"]'); + const spreadFilter=sec.querySelector('[data-spread-filter="'+id+'"]'); + const minOccCheck=sec.querySelector('[data-min-occurrences-filter="'+id+'"]'); + + let page=1; + let pageSize=parseInt(pageSizeSelect?.value||'10',10); + + function isAll(v){return !v||v==='all'} + + function activeFilterCount(){ + let n=0; + if(!isAll(sourceKindFilter?.value))n++; + if(!isAll(cloneTypeFilter?.value))n++; + if(!isAll(spreadFilter?.value))n++; + if(minOccCheck?.checked)n++; + return n; + } + + function updateFiltersBadge(){ + const badge=sec.querySelector('[data-filters-count="'+id+'"]'); + if(!badge)return; + const n=activeFilterCount(); + if(n>0){badge.hidden=false;badge.textContent=String(n)} + else{badge.hidden=true;badge.textContent='0'} + } + + function applyFilters(){ + const q=(searchInput?.value||'').toLowerCase().trim(); + const sk=sourceKindFilter?.value||''; + const ct=cloneTypeFilter?.value||''; + const sp=spreadFilter?.value||''; + const minOcc=minOccCheck?.checked||false; + + groups.forEach(g=>{ + // Novelty-hidden groups are always hidden + if(g.getAttribute('data-novelty-hidden')==='true'){g.style.display='none';return} + let show=true; + if(q&&!(g.dataset.search||'').includes(q))show=false; + if(!isAll(sk)&&g.dataset.sourceKind!==sk)show=false; + if(!isAll(ct)&&g.dataset.cloneType!==ct)show=false; + if(!isAll(sp)&&g.dataset.spreadBucket!==sp)show=false; + if(minOcc&&parseInt(g.dataset.groupArity||'0',10)<4)show=false; + g.style.display=show?'':'none'; + }); + updateFiltersBadge(); + page=1; + paginate(); + } + + function paginate(){ + // Collect groups that passed both novelty + search/filter + const vis=groups.filter(g=>g.style.display!=='none'); + const totalPages=Math.max(1,Math.ceil(vis.length/pageSize)); + if(page>totalPages)page=totalPages; + const start=(page-1)*pageSize; + const end=start+pageSize; + vis.forEach((g,i)=>{g.style.display=i>=start&&i{clearTimeout(timer);timer=setTimeout(applyFilters,200)}); + } + [sourceKindFilter,cloneTypeFilter,spreadFilter].forEach(el=>{ + if(el)el.addEventListener('change',applyFilters); + }); + if(minOccCheck)minOccCheck.addEventListener('change',applyFilters); + if(pageSizeSelect)pageSizeSelect.addEventListener('change',()=>{ + pageSize=parseInt(pageSizeSelect.value,10);page=1;paginate()}); + + // Clear search + const clearBtn=sec.querySelector('[data-clear="'+id+'"]'); + if(clearBtn&&searchInput)clearBtn.addEventListener('click',()=>{searchInput.value='';applyFilters()}); + + // Prev/Next + const prevBtn=sec.querySelector('[data-prev="'+id+'"]'); + const nextBtn=sec.querySelector('[data-next="'+id+'"]'); + if(prevBtn)prevBtn.addEventListener('click',()=>{if(page>1){page--;paginate()}}); + if(nextBtn)nextBtn.addEventListener('click',()=>{ + const vis=visible();const tp=Math.max(1,Math.ceil(vis.length/pageSize)); + if(page{ + const expanded=expandToggle.dataset.expanded==='true'; + const target=!expanded; + const scope=target + ? groups.filter(g=>g.style.display!=='none') + : groups; + scope.forEach(g=>{ + const body=g.querySelector('.group-body'); + const toggle=g.querySelector('.group-toggle'); + if(target){ + if(body)body.classList.add('expanded'); + if(toggle)toggle.classList.add('expanded'); + }else{ + if(body)body.classList.remove('expanded'); + if(toggle)toggle.classList.remove('expanded'); + } + }); + expandToggle.dataset.expanded=target?'true':'false'; + expandToggle.textContent=target?'Collapse all':'Expand all'; + }); + } + + // Filters popover (shared helper handles open/close + dismiss) + wireFiltersPopover(sec.querySelector('[data-filters-toggle="'+id+'"]')); + + // Initial + applyFilters(); + }); + + // Toggle individual groups + document.addEventListener('click',e=>{ + const btn=e.target.closest('[data-toggle-group]'); + if(!btn)return; + const groupId=btn.dataset.toggleGroup; + const body=$('#group-body-'+groupId); + if(!body)return; + body.classList.toggle('expanded'); + btn.classList.toggle('expanded'); + }); + + // Also toggle on group-head click (except buttons) + document.addEventListener('click',e=>{ + const head=e.target.closest('.group-head'); + if(!head)return; + if(e.target.closest('button'))return; + const toggle=head.querySelector('.group-toggle'); + if(toggle)toggle.click(); + }); +})(); +""" + +# --------------------------------------------------------------------------- +# Novelty filter (global new/known) +# --------------------------------------------------------------------------- + +_NOVELTY = """\ +(function initNovelty(){ + const ctrl=$('#global-novelty-controls'); + if(!ctrl)return; + const defaultNovelty=ctrl.dataset.defaultNovelty||'new'; + const btns=$$('[data-global-novelty]'); + let activeNovelty=''; + + function applyNovelty(val){ + activeNovelty=val; + btns.forEach(b=>b.classList.toggle('active',b.dataset.globalNovelty===val)); + $$('.group[data-novelty]').forEach(g=>{ + const nov=g.dataset.novelty; + if(nov==='all')g.setAttribute('data-novelty-hidden','false'); + else g.setAttribute('data-novelty-hidden',nov!==val?'true':'false'); + }); + // Re-run section filters directly (no debounce) + const reg=window.__sectionFilters||{}; + Object.values(reg).forEach(fn=>fn()); + } + + btns.forEach(b=>b.addEventListener('click',()=>applyNovelty(b.dataset.globalNovelty))); + applyNovelty(defaultNovelty); +})(); +""" + +# --------------------------------------------------------------------------- +# Modals (dialog-based for block metrics info) +# --------------------------------------------------------------------------- + +_MODALS = """\ +(function initModals(){ + let dlg=$('#clone-info-modal'); + if(!dlg){ + dlg=document.createElement('dialog'); + dlg.id='clone-info-modal'; + dlg.innerHTML='' + +''; + document.body.appendChild(dlg); + dlg.querySelector('.modal-close').addEventListener('click',()=>dlg.close()); + dlg.addEventListener('click',e=>{if(e.target===dlg)dlg.close()}); + } + + document.addEventListener('click',e=>{ + const btn=e.target.closest('[data-metrics-btn]'); + if(!btn)return; + const groupId=btn.dataset.metricsBtn; + const group=btn.closest('.group'); + if(!group)return; + const d=group.dataset; + const items=[]; + function add(label,val){if(val)items.push('
'+label+'
'+val+'
')} + add('Match rule',d.matchRule); + add('Block size',d.blockSize); + add('Signature',d.signatureKind); + add('Merged regions',d.mergedRegions); + add('Pattern',d.patternLabel); + add('Hint',d.hintLabel); + add('Hint confidence',d.hintConfidence); + add('Assert ratio',d.assertRatio); + add('Consecutive asserts',d.consecutiveAsserts); + add('Boilerplate asserts',d.boilerplateAsserts); + add('Group arity',d.groupArity); + add('Clone type',d.cloneType); + add('Source kind',d.sourceKind); + if(d.spreadFiles)add('Spread',d.spreadFunctions+' fn / '+d.spreadFiles+' files'); + dlg.querySelector('#modal-title').textContent='Group: '+groupId; + dlg.querySelector('#modal-body').innerHTML=items.length + ?'
'+items.join('')+'
' + :'

No metadata available.

'; + dlg.showModal(); + }); +})(); +""" + +# --------------------------------------------------------------------------- +# Suggestions filter +# --------------------------------------------------------------------------- + +_SUGGESTIONS = """\ +(function initSuggestions(){ + const body=$('[data-suggestions-body]'); + if(!body)return; + const cards=[...body.querySelectorAll('[data-suggestion-card]')]; + const sevSel=$('[data-suggestions-severity]'); + const catSel=$('[data-suggestions-category]'); + const famSel=$('[data-suggestions-family]'); + const skSel=$('[data-suggestions-source-kind]'); + const spSel=$('[data-suggestions-spread]'); + const actCheck=$('[data-suggestions-actionable]'); + const countLabel=$('[data-suggestions-count]'); + const filtersBadge=$('[data-filters-count="suggestions"]'); + + function activeFilterCount(){ + let n=0; + [sevSel,catSel,famSel,skSel,spSel].forEach(el=>{ + if(el&&el.value)n++; + }); + if(actCheck?.checked)n++; + return n; + } + + function updateFiltersBadge(){ + if(!filtersBadge)return; + const n=activeFilterCount(); + if(n>0){filtersBadge.hidden=false;filtersBadge.textContent=String(n)} + else{filtersBadge.hidden=true;filtersBadge.textContent='0'} + } + + function apply(){ + const sev=sevSel?.value||''; + const cat=catSel?.value||''; + const fam=famSel?.value||''; + const sk=skSel?.value||''; + const sp=spSel?.value||''; + const act=actCheck?.checked||false; + let shown=0; + cards.forEach(c=>{ + let hide=false; + if(sev&&c.dataset.severity!==sev)hide=true; + if(cat&&c.dataset.category!==cat)hide=true; + if(fam&&c.dataset.family!==fam)hide=true; + if(sk&&c.dataset.sourceKind!==sk)hide=true; + if(sp&&c.dataset.spreadBucket!==sp)hide=true; + if(act&&c.dataset.actionable!=='true')hide=true; + c.setAttribute('data-filter-hidden',hide?'true':'false'); + if(!hide)shown++; + }); + if(countLabel)countLabel.textContent=shown+' shown'; + updateFiltersBadge(); + } + + [sevSel,catSel,famSel,skSel,spSel].forEach(el=>{if(el)el.addEventListener('change',apply)}); + if(actCheck)actCheck.addEventListener('change',apply); + + // Popover wiring (shared helper) + wireFiltersPopover($('[data-filters-toggle="suggestions"]')); + + // Initial + apply(); +})(); +""" + +# --------------------------------------------------------------------------- +# Dependency graph hover +# --------------------------------------------------------------------------- + +_DEP_GRAPH = """\ +(function initDepGraph(){ + const svg=$('.dep-graph-svg'); + if(!svg)return; + const nodes=$$('.dep-node'); + const labels=$$('.dep-label'); + const edges=$$('.dep-edge'); + + function highlight(name){ + nodes.forEach(n=>{n.style.fillOpacity=n.dataset.node===name?'1':'0.15'}); + labels.forEach(l=>{l.style.fill=l.dataset.node===name?'var(--text-primary)':'var(--text-muted)'; + l.style.fillOpacity=l.dataset.node===name?'1':'0.3'}); + edges.forEach(e=>{ + const connected=e.dataset.source===name||e.dataset.target===name; + e.style.strokeOpacity=connected?'0.8':'0.05'; + e.style.strokeWidth=connected?'2':'1'; + }); + } + + function reset(){ + nodes.forEach(n=>{n.style.fillOpacity=''}); + labels.forEach(l=>{l.style.fill='';l.style.fillOpacity=''}); + edges.forEach(e=>{e.style.strokeOpacity='';e.style.strokeWidth=''}); + } + + [...nodes,...labels].forEach(el=>{ + el.addEventListener('mouseenter',()=>highlight(el.dataset.node)); + el.addEventListener('mouseleave',reset); + el.style.cursor='pointer'; + }); +})(); +""" + +# --------------------------------------------------------------------------- +# Meta panel toggle +# --------------------------------------------------------------------------- + +_META_PANEL = """\ +(function initBadgeModal(){ + const dlg=$('#badge-modal'); + if(!dlg)return; + + /* --- state --- */ + var _grade='',_score=0,_variant='grade'; + + /* --- grade→shields color (canonical bands) --- */ + function badgeColor(g){ + return g==='A'?'brightgreen':g==='B'?'green':g==='C'?'yellow':g==='D'?'orange':'red'} + + /* --- build shield URLs & embed codes for current variant --- */ + function render(){ + var label,alt,url; + if(_variant==='full'){ + label=_score+' ('+_grade+')';alt='codeclone '+_score+' ('+_grade+')'; + }else{ + label='grade '+_grade;alt='codeclone grade '+_grade;} + url='https://img.shields.io/badge/codeclone-' + +encodeURIComponent(label).replace(/-/g,'--')+'-'+badgeColor(_grade); + var prev=dlg.querySelector('#badge-preview'); + if(prev)prev.innerHTML=''+alt+''; + var md=dlg.querySelector('#badge-code-md'); + if(md)md.textContent='!['+alt+']('+url+')'; + var ht=dlg.querySelector('#badge-code-html'); + if(ht)ht.textContent=''+alt+'';} + + /* --- tabs --- */ + dlg.querySelectorAll('[data-badge-tab]').forEach(function(tab){ + tab.addEventListener('click',function(){ + dlg.querySelectorAll('[data-badge-tab]').forEach(function(t){ + t.classList.remove('badge-tab--active');t.setAttribute('aria-selected','false')}); + tab.classList.add('badge-tab--active');tab.setAttribute('aria-selected','true'); + _variant=tab.dataset.badgeTab;render();});}); + + /* --- open --- */ + document.addEventListener('click',function(e){ + var btn=e.target.closest('[data-badge-open]'); + if(!btn)return; + _grade=btn.dataset.badgeGrade||''; + _score=parseInt(btn.dataset.badgeScore||'0',10); + _variant='grade'; + dlg.querySelectorAll('[data-badge-tab]').forEach(function(t){ + var active=t.dataset.badgeTab==='grade'; + t.classList.toggle('badge-tab--active',active); + t.setAttribute('aria-selected',active?'true':'false');}); + render();dlg.showModal(); + var fc=dlg.querySelector('[data-badge-close]');if(fc)fc.focus();}); + + /* --- close --- */ + var closeBtn=dlg.querySelector('[data-badge-close]'); + if(closeBtn)closeBtn.addEventListener('click',function(){dlg.close()}); + dlg.addEventListener('click',function(e){if(e.target===dlg)dlg.close()}); + + /* --- copy with feedback --- */ + dlg.addEventListener('click',function(e){ + var copyBtn=e.target.closest('[data-badge-copy]'); + if(!copyBtn)return; + var which=copyBtn.dataset.badgeCopy; + var code=dlg.querySelector('#badge-code-'+which); + if(!code)return; + navigator.clipboard.writeText(code.textContent).then(function(){ + copyBtn.textContent='\u2713 Copied';copyBtn.classList.add('badge-copy-btn--ok'); + setTimeout(function(){copyBtn.textContent='Copy'; + copyBtn.classList.remove('badge-copy-btn--ok')},1500);});}); +})(); +(function initProvModal(){ + const dlg=$('#prov-modal'); + if(!dlg)return; + const openBtn=$('[data-prov-open]'); + const closeBtn=dlg.querySelector('[data-prov-close]'); + if(openBtn)openBtn.addEventListener('click',()=>dlg.showModal()); + if(closeBtn)closeBtn.addEventListener('click',()=>dlg.close()); + dlg.addEventListener('click',function(e){ + if(e.target===dlg){dlg.close();return} + var copyBtn=e.target.closest('[data-prov-copy]'); + if(!copyBtn)return; + e.stopPropagation(); + var payload=copyBtn.getAttribute('data-prov-copy')||''; + if(!payload||!navigator.clipboard)return; + navigator.clipboard.writeText(payload).then(function(){ + copyBtn.classList.add('prov-copy-btn--ok'); + var original=copyBtn.innerHTML; + copyBtn.innerHTML=''; + setTimeout(function(){ + copyBtn.classList.remove('prov-copy-btn--ok'); + copyBtn.innerHTML=original; + },1400); + }); + }); +})(); +(function initFindingWhy(){ + var dlg=$('#finding-why-modal'); + if(!dlg)return; + var body=dlg.querySelector('.modal-body'); + var closeBtn=dlg.querySelector('[data-finding-why-close]'); + closeBtn.addEventListener('click',function(){dlg.close()}); + dlg.addEventListener('click',function(e){if(e.target===dlg)dlg.close()}); + document.addEventListener('click',function(e){ + var btn=e.target.closest('[data-finding-why-btn]'); + if(!btn)return; + var tplId=btn.getAttribute('data-finding-why-btn'); + var tpl=document.getElementById(tplId); + if(!tpl)return; + body.innerHTML=tpl.innerHTML; + dlg.showModal(); + }); +})(); +""" + +# --------------------------------------------------------------------------- +# JSON export +# --------------------------------------------------------------------------- + +_EXPORT = "" # removed: Export JSON button eliminated from topbar + +# --------------------------------------------------------------------------- +# Command Palette (Cmd/Ctrl+K) +# --------------------------------------------------------------------------- + +_CMD_PALETTE = "" # removed: command palette eliminated + +# --------------------------------------------------------------------------- +# Table sort +# --------------------------------------------------------------------------- + +_TABLE_SORT = """\ +(function initTableSort(){ + $$('.table th[data-sortable]').forEach(th=>{ + th.addEventListener('click',()=>{ + const table=th.closest('.table'); + if(!table)return; + const idx=[...th.parentElement.children].indexOf(th); + const tbody=table.querySelector('tbody')||table; + const rows=[...tbody.querySelectorAll('tr')].filter(r=>r.querySelector('td')); + const cur=th.getAttribute('aria-sort'); + const dir=cur==='ascending'?'descending':'ascending'; + // Reset siblings + [...th.parentElement.children].forEach(s=>{s.removeAttribute('aria-sort')}); + th.setAttribute('aria-sort',dir); + + rows.sort((a,b)=>{ + const at=(a.children[idx]?.textContent||'').trim(); + const bt=(b.children[idx]?.textContent||'').trim(); + const an=parseFloat(at),bn=parseFloat(bt); + const cmp=(!isNaN(an)&&!isNaN(bn))?an-bn:at.localeCompare(bt); + return dir==='ascending'?cmp:-cmp; + }); + rows.forEach(r=>tbody.appendChild(r)); + }); + }); +})(); +""" + +# --------------------------------------------------------------------------- +# Toast +# --------------------------------------------------------------------------- + +_TOAST = """\ +function toast(msg){ + let c=$('.toast-container'); + if(!c){c=document.createElement('div');c.className='toast-container';document.body.appendChild(c)} + const t=document.createElement('div');t.className='toast';t.textContent=msg; + c.appendChild(t); + setTimeout(()=>{t.style.opacity='0';t.style.transform='translateY(8px)'; + setTimeout(()=>t.remove(),300)},3000); +} +""" + +# --------------------------------------------------------------------------- +# Lazy highlight (IntersectionObserver for code snippets) +# --------------------------------------------------------------------------- + +_SCOPE_COUNTERS = """\ +function updateCloneScopeCounters(){ + const sections=['functions','blocks','segments']; + let total=0; + sections.forEach(id=>{ + const sec=document.querySelector('[data-section="'+id+'"]'); + if(!sec)return; + const vis=[...sec.querySelectorAll('.group[data-group="'+id+'"]')] + .filter(g=>g.style.display!=='none'&&g.getAttribute('data-novelty-hidden')!=='true'); + total+=vis.length; + const tabCount=document.querySelector('[data-clone-tab-count="'+id+'"]'); + if(tabCount){tabCount.textContent=vis.length;tabCount.dataset.totalGroups=vis.length} + }); + const mainBtn=document.querySelector('[data-main-clones-count]'); + if(mainBtn)mainBtn.setAttribute('data-main-clones-count',total); +} +""" + +_LAZY_HIGHLIGHT = "" + +# --------------------------------------------------------------------------- +# IDE links +# --------------------------------------------------------------------------- + +_IDE_LINKS = r""" +(function initIdeLinks(){ + const KEY='codeclone-ide'; + const root=document.documentElement; + var scanRoot=root.getAttribute('data-scan-root')||''; + var projectName=scanRoot.replace(/\/$/,'').split('/').pop()||''; + + function relPath(abs){ + var r=scanRoot.replace(/\/$/,'')+'/'; + if(abs.indexOf(r)===0)return abs.substring(r.length); + return abs; + } + + const SCHEMES={ + pycharm:{label:'PyCharm', + url:function(f,l){return 'jetbrains://pycharm/navigate/reference?project='+encodeURIComponent(projectName)+'&path='+encodeURIComponent(relPath(f))+':'+l}}, + idea:{label:'IntelliJ IDEA', + url:function(f,l){return 'jetbrains://idea/navigate/reference?project='+encodeURIComponent(projectName)+'&path='+encodeURIComponent(relPath(f))+':'+l}}, + vscode:{label:'VS Code', + url:function(f,l){return 'vscode://file'+f+':'+l}}, + cursor:{label:'Cursor', + url:function(f,l){return 'cursor://file'+f+':'+l}}, + fleet:{label:'Fleet', + url:function(f,l){return 'fleet://open?file='+encodeURIComponent(f)+'&line='+l}}, + zed:{label:'Zed', + url:function(f,l){return 'zed://file'+f+':'+l}}, + '': {label:'None',url:null} + }; + + var current=localStorage.getItem(KEY)||''; + root.setAttribute('data-ide',current); + + const btn=$('.ide-picker-btn'); + const menu=$('.ide-menu'); + const label=$('.ide-picker-label'); + if(!btn||!menu)return; + + function updateLabel(){ + if(!label)return; + var s=SCHEMES[current]; + label.textContent=s&¤t?s.label:'IDE'; + } + + function setChecked(){ + menu.querySelectorAll('button').forEach(function(b){ + b.setAttribute('aria-checked',b.dataset.ide===current?'true':'false'); + }); + } + + function applyHrefs(){ + var s=SCHEMES[current]; + $$('.ide-link[data-file]').forEach(function(a){ + if(!current||!s||!s.url){a.removeAttribute('href');return} + var f=a.getAttribute('data-file'),l=a.getAttribute('data-line')||'1'; + if(!f)return; + a.setAttribute('href',s.url(f,l)); + }); + } + + setChecked(); + updateLabel(); + applyHrefs(); + + // Reapply hrefs when new content becomes visible (tab switch) + var mo=new MutationObserver(function(){applyHrefs()}); + document.querySelectorAll('.tab-panel').forEach(function(p){ + mo.observe(p,{attributes:true,attributeFilter:['class']}); + }); + + btn.addEventListener('click',function(e){ + e.stopPropagation(); + var open=menu.hasAttribute('data-open'); + if(open){menu.removeAttribute('data-open');btn.setAttribute('aria-expanded','false')} + else{menu.setAttribute('data-open','');btn.setAttribute('aria-expanded','true')} + }); + + document.addEventListener('click',function(){ + menu.removeAttribute('data-open');btn.setAttribute('aria-expanded','false'); + }); + + menu.addEventListener('click',function(e){ + e.stopPropagation(); + var b=e.target.closest('button[data-ide]'); + if(!b)return; + current=b.dataset.ide; + localStorage.setItem(KEY,current); + root.setAttribute('data-ide',current); + setChecked(); + updateLabel(); + applyHrefs(); + menu.removeAttribute('data-open');btn.setAttribute('aria-expanded','false'); + }); + +})(); +""" + +# --------------------------------------------------------------------------- +# Tooltips (fixed-position, escapes overflow containers) +# --------------------------------------------------------------------------- + +_TOOLTIPS = """\ +(function initTooltips(){ + let tip=null; + function show(e){ + const el=e.target; + const text=el.getAttribute('data-tip'); + if(!text)return; + tip=document.createElement('div'); + tip.className='kpi-tooltip'; + tip.textContent=text; + document.body.appendChild(tip); + const r=el.getBoundingClientRect(); + const tw=tip.offsetWidth; + const th=tip.offsetHeight; + let left=r.left+r.width/2-tw/2; + let top=r.bottom+6; + if(left<4)left=4; + if(left+tw>window.innerWidth-4)left=window.innerWidth-tw-4; + if(top+th>window.innerHeight-4){top=r.top-th-6} + tip.style.left=left+'px'; + tip.style.top=top+'px'; + } + function hide(){if(tip){tip.remove();tip=null}} + document.addEventListener('mouseenter',function(e){ + if(e.target.matches('.kpi-help[data-tip]'))show(e); + },true); + document.addEventListener('mouseleave',function(e){ + if(e.target.matches('.kpi-help[data-tip]'))hide(); + },true); +})(); +""" + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +_ALL_MODULES = ( + _CORE, + _TOAST, + _THEME, + _TABS, + _SUB_TABS, + _SECTIONS, + _NOVELTY, + _MODALS, + _SUGGESTIONS, + _DEP_GRAPH, + _META_PANEL, + _EXPORT, + _CMD_PALETTE, + _TABLE_SORT, + _SCOPE_COUNTERS, + _LAZY_HIGHLIGHT, + _IDE_LINKS, + _TOOLTIPS, +) + + +def build_js() -> str: + """Return the complete JS string for the HTML report, wrapped in an IIFE.""" + body = "\n".join(_ALL_MODULES) + return f"(function(){{\n'use strict';\n{body}\n}})();\n" diff --git a/codeclone/_html_report/_primitives/data_attrs.py b/codeclone/_html_report/_primitives/data_attrs.py new file mode 100644 index 0000000..d4e94f3 --- /dev/null +++ b/codeclone/_html_report/_primitives/data_attrs.py @@ -0,0 +1,30 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Unified data-attribute builder for HTML elements.""" + +from __future__ import annotations + +from ._html_escape import _escape_html + +__all__ = ["_build_data_attrs"] + + +def _build_data_attrs(attrs: dict[str, object | None]) -> str: + """Build a space-prefixed HTML data-attribute string from a dict. + + None values are omitted; empty strings are preserved as ``attr=""``. + All values are escaped. + Returns ``''`` when no attrs survive, or ``' data-foo="bar" ...'`` + (leading space) otherwise. + """ + parts: list[str] = [] + for key, val in attrs.items(): + if val is None: + continue + s = str(val) + parts.append(f'{key}="{_escape_html(s)}"') + return f" {' '.join(parts)}" if parts else "" diff --git a/codeclone/_html_report/_primitives/escape.py b/codeclone/_html_report/_primitives/escape.py new file mode 100644 index 0000000..381b033 --- /dev/null +++ b/codeclone/_html_report/_primitives/escape.py @@ -0,0 +1,25 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import html + + +def _escape_html(v: object) -> str: + text = html.escape("" if v is None else str(v), quote=True) + text = text.replace("`", "`") + text = text.replace("\u2028", "
").replace("\u2029", "
") + return text + + +def _meta_display(v: object) -> str: + if isinstance(v, bool): + return "true" if v else "false" + if v is None: + return "n/a" + text = str(v).strip() + return text if text else "n/a" diff --git a/codeclone/_html_report/_primitives/filters.py b/codeclone/_html_report/_primitives/filters.py new file mode 100644 index 0000000..e700fad --- /dev/null +++ b/codeclone/_html_report/_primitives/filters.py @@ -0,0 +1,59 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Data-driven filter dropdown renderer for report toolbars.""" + +from __future__ import annotations + +from collections.abc import Sequence + +from ._html_escape import _escape_html + +__all__ = [ + "CLONE_TYPE_OPTIONS", + "SPREAD_OPTIONS", + "_render_select", +] + +CLONE_TYPE_OPTIONS: tuple[tuple[str, str], ...] = ( + ("Type-1", "Type-1"), + ("Type-2", "Type-2"), + ("Type-3", "Type-3"), + ("Type-4", "Type-4"), +) + +SPREAD_OPTIONS: tuple[tuple[str, str], ...] = ( + ("high", "high"), + ("low", "low"), +) + + +def _render_select( + *, + element_id: str, + data_attr: str, + options: Sequence[tuple[str, str]], + all_label: str = "all", + selected: str | None = None, +) -> str: + """Render a ``" + f'', + ] + for value, display in options: + sel = " selected" if selected == value else "" + parts.append( + f'" + ) + parts.append("") + return "".join(parts) diff --git a/codeclone/_html_report/_widgets/badges.py b/codeclone/_html_report/_widgets/badges.py new file mode 100644 index 0000000..716d1ad --- /dev/null +++ b/codeclone/_html_report/_widgets/badges.py @@ -0,0 +1,272 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Shared HTML badge, label, and visual helpers for the report UI layer. + +Naming conventions: + - ``{domain}-badge`` for inline taxonomy labels (risk-badge, severity-badge, + source-kind-badge, clone-type-badge) + - ``meta-item`` is the **single** card pattern for all stat/KPI/meta cards + - ``meta-label`` + ``meta-value`` are the **single** label+value pair + - ``suggestion-card`` for suggestion grid items +""" + +from __future__ import annotations + +from collections.abc import Callable, Sequence + +from ._html_escape import _escape_html +from .domain.quality import ( + EFFORT_EASY, + EFFORT_HARD, + EFFORT_MODERATE, + RISK_HIGH, + RISK_LOW, + RISK_MEDIUM, + SEVERITY_CRITICAL, + SEVERITY_INFO, + SEVERITY_WARNING, +) +from .report._source_kinds import normalize_source_kind, source_kind_label + +__all__ = [ + "CHECK_CIRCLE_SVG", + "INFO_CIRCLE_SVG", + "_inline_empty", + "_micro_badges", + "_quality_badge_html", + "_render_chain_flow", + "_short_label", + "_source_kind_badge_html", + "_stat_card", + "_tab_empty", + "_tab_empty_info", +] + +_EFFORT_CSS: dict[str, str] = { + EFFORT_EASY: "success", + EFFORT_MODERATE: "warning", + EFFORT_HARD: "error", +} + +CHECK_CIRCLE_SVG = ( + '' + '' + '' + "" +) + +INFO_CIRCLE_SVG = ( + '' + '' + '' + '' + "" +) + + +def _micro_badges(*pairs: tuple[str, object]) -> str: + """Render compact label:value micro-badge pairs for stat card details.""" + return "".join( + f'' + f'{_escape_html(str(value))}' + f'{_escape_html(label)}' + for label, value in pairs + if value is not None and str(value) != "n/a" + ) + + +def _quality_badge_html(text: str) -> str: + """Render a risk / severity / effort value as a styled badge.""" + r = text.strip().lower() + if r in (RISK_LOW, RISK_HIGH, RISK_MEDIUM): + return ( + f'{_escape_html(r)}' + ) + if r in (SEVERITY_CRITICAL, SEVERITY_WARNING, SEVERITY_INFO): + return ( + f'' + f"{_escape_html(r)}" + ) + if r in _EFFORT_CSS: + return ( + f'{_escape_html(r)}' + ) + return _escape_html(text) + + +def _source_kind_badge_html(source_kind: str) -> str: + normalized = normalize_source_kind(source_kind) + return ( + f'' + f"{_escape_html(source_kind_label(normalized))}" + ) + + +_INLINE_EMPTY_ICONS: dict[str, str] = { + "good": ( + '' + ), + "neutral": ( + '' + ), +} + + +def _inline_empty(message: str, *, tone: str = "neutral") -> str: + """Compact single-row empty-state for inline/card contexts. + + Use for summary items, breakdown panels, and other small cards where a + full ``.tab-empty`` would be too heavy. + + *tone*: + - ``"good"`` — green check (positive: "nothing to report"). + - ``"neutral"`` — muted info dot (missing or unavailable data). + """ + tone_key = tone if tone in _INLINE_EMPTY_ICONS else "neutral" + icon = _INLINE_EMPTY_ICONS[tone_key] + return ( + f'
' + f"{icon}" + f'{_escape_html(message)}' + "
" + ) + + +def _tab_empty( + message: str, + *, + description: str | None = "Nothing to report - keep up the good work.", +) -> str: + desc_html = ( + f'
{_escape_html(description)}
' + if description + else "" + ) + return ( + '
' + f"{CHECK_CIRCLE_SVG}" + f'
{_escape_html(message)}
' + f"{desc_html}" + "
" + ) + + +def _tab_empty_info( + message: str, + *, + description: str | None = None, + detail_html: str | None = None, +) -> str: + if detail_html: + desc_block = ( + f'
{detail_html}
' + ) + elif description: + desc_block = ( + f'
' + f"{_escape_html(description)}
" + ) + else: + desc_block = "" + return ( + '
' + f"{INFO_CIRCLE_SVG}" + f'
{_escape_html(message)}
' + f"{desc_block}" + "
" + ) + + +def _short_label(name: str, max_len: int = 18) -> str: + """Shorten a dotted name keeping the last segment, truncated if needed.""" + parts = name.rsplit(".", maxsplit=1) + label = parts[-1] if len(parts) > 1 else name + if len(label) > max_len: + half = max_len // 2 - 1 + return f"{label[:half]}..{label[-half:]}" + return label + + +def _render_chain_flow( + parts: Sequence[str], + *, + arrows: bool = False, +) -> str: + """Render a sequence of names as chain-node spans, optionally with arrows.""" + nodes: list[str] = [] + for i, mod in enumerate(parts): + short = _short_label(str(mod)) + nodes.append( + f'' + f"{_escape_html(short)}" + ) + if arrows and i < len(parts) - 1: + nodes.append('\u2192') + return f'{"".join(nodes)}' + + +def _stat_card( + label: str, + value: object, + *, + detail: str = "", + tip: str = "", + value_tone: str = "", + css_class: str = "meta-item", + glossary_tip_fn: Callable[[str], str] | None = None, + delta_new: int | None = None, +) -> str: + """Unified stat-card renderer. + + Always emits the same HTML structure using ``.meta-item`` / + ``.meta-label`` / ``.meta-value`` so every stat card shares the + exact same design code. + + *value_tone* — semantic color for the main value: + ``"good"`` → green (metric is clean), ``"bad"`` → red (metric has issues), + ``"warn"`` → yellow, ``"muted"`` → dimmed, ``""`` → default text-primary. + + *delta_new* — if provided and > 0, renders a ``+N new`` badge + inline with the label (top-right). For "bad" metrics (complexity, + coupling, etc.) positive delta means regression → red. + """ + tip_html = "" + if glossary_tip_fn is not None: + tip_html = glossary_tip_fn(label) + elif tip: + tip_html = f'?' + + detail_html = "" + if detail: + detail_html = f'
{detail}
' + + delta_html = "" + if delta_new is not None and delta_new > 0: + delta_html = f'+{delta_new}' + + value_cls = f" meta-value--{value_tone}" if value_tone else "" + + return ( + f'
' + f'
{_escape_html(label)}{tip_html}{delta_html}
' + f'
{_escape_html(str(value))}
' + f"{detail_html}" + "
" + ) diff --git a/codeclone/_html_report/_widgets/components.py b/codeclone/_html_report/_widgets/components.py new file mode 100644 index 0000000..7a9fcae --- /dev/null +++ b/codeclone/_html_report/_widgets/components.py @@ -0,0 +1,106 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Shared UI components: insight banners, summary helpers, chip rows.""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Literal + +from .._coerce import as_int as _as_int +from .._html_badges import _inline_empty, _source_kind_badge_html +from .._html_escape import _escape_html +from ._icons import section_icon_html + +Tone = Literal["ok", "warn", "risk", "info"] + +_EMPTY_ICON = ( + '' + '' + '' +) + + +def insight_block(*, question: str, answer: str, tone: Tone = "info") -> str: + return ( + f'
' + f'
{_escape_html(question)}
' + f'
{_escape_html(answer)}
' + "
" + ) + + +def overview_cluster_header(title: str, subtitle: str | None = None) -> str: + sub = ( + f'

{_escape_html(subtitle)}

' + if subtitle + else "" + ) + return ( + '
' + f'

{_escape_html(title)}

' + f"{sub}" + "
" + ) + + +_SUMMARY_ICON_KEYS: dict[str, tuple[str, str]] = { + "top risks": ("top-risks", "summary-icon summary-icon--risk"), + "issue breakdown": ("issue-breakdown", "summary-icon summary-icon--info"), + "source breakdown": ("source-breakdown", "summary-icon summary-icon--info"), + "all findings": ("all-findings", "summary-icon summary-icon--info"), + "clone groups": ("clone-groups", "summary-icon summary-icon--info"), + "low cohesion": ("low-cohesion", "summary-icon summary-icon--info"), + "top candidates": ("quality", "summary-icon summary-icon--info"), + "more candidates": ("quality", "summary-icon summary-icon--info"), + "health profile": ("health-profile", "summary-icon summary-icon--info"), + "adoption coverage": ("coverage-adoption", "summary-icon summary-icon--info"), + "public api surface": ("api-surface", "summary-icon summary-icon--info"), + "coverage join": ("quality", "summary-icon summary-icon--info"), +} + + +def overview_summary_item_html(*, label: str, body_html: str) -> str: + icon_key, icon_class = _SUMMARY_ICON_KEYS.get(label.lower(), ("", "")) + icon = ( + section_icon_html(icon_key, class_name=icon_class) + if icon_key and icon_class + else "" + ) + return ( + '
' + '
' + f"{icon}{_escape_html(label)}
" + f"{body_html}" + "
" + ) + + +def overview_source_breakdown_html(breakdown: Mapping[str, object]) -> str: + sorted_items = sorted( + ((str(k), _as_int(v)) for k, v in breakdown.items()), + key=lambda item: -item[1], + ) + rows = [(kind, count) for kind, count in sorted_items if count > 0] + if not rows: + return _inline_empty("No source data available", tone="neutral") + + total = sum(c for _, c in rows) + parts: list[str] = [] + for kind, count in rows: + pct = round(count / total * 100) if total else 0 + parts.append( + '
' + f"{_source_kind_badge_html(kind)}" + f'{count}' + f'' + f'' + "
" + ) + return '
' + "".join(parts) + "
" diff --git a/codeclone/_html_report/_widgets/glossary.py b/codeclone/_html_report/_widgets/glossary.py new file mode 100644 index 0000000..e48d4f0 --- /dev/null +++ b/codeclone/_html_report/_widgets/glossary.py @@ -0,0 +1,100 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Tooltip glossary for report table headers and stat cards.""" + +from __future__ import annotations + +from .._html_escape import _escape_html + +GLOSSARY: dict[str, str] = { + # Complexity + "function": "Fully-qualified function or method name", + "class": "Fully-qualified class name", + "name": "Symbol name (function, class, or variable)", + "file": "Source file path relative to scan root", + "location": "File and line range where the symbol is defined", + "cc": "Cyclomatic complexity — number of independent execution paths", + "nesting": "Maximum nesting depth of control-flow statements", + "risk": "Risk level based on metric thresholds (low / medium / high)", + # Coupling / cohesion + "cbo": "Coupling Between Objects — number of classes this class depends on", + "coupled classes": "Resolved class dependencies used to compute CBO for this class", + "lcom4": "Lack of Cohesion of Methods — connected components in method/field graph", + "methods": "Number of methods defined in the class", + "fields": "Number of instance variables (attributes) in the class", + # Dead code + "line": "Source line number where the symbol starts", + "kind": "Symbol type: function, class, import, or variable", + "confidence": "Detection confidence (low / medium / high / critical)", + # Dependencies + "longest chain": "Longest transitive import chain between modules", + "length": "Number of modules in the dependency chain", + "cycle": "Circular import dependency between modules", + # Suggestions + "priority": "Computed priority score (higher = more urgent)", + "severity": "Issue severity: critical, warning, or info", + "category": ( + "Metric category: clone, complexity, coupling, cohesion, dead_code, dependency" + ), + "title": "Brief description of the suggested improvement", + "effort": "Estimated effort to fix: easy, moderate, or hard", + "steps": "Actionable steps to resolve the issue", + # Dependency stat cards + "modules": "Total number of Python modules analyzed", + "edges": "Total number of import relationships between modules", + "max depth": "Longest chain of transitive imports", + "cycles": "Number of circular import dependencies detected", + # Complexity stat cards + "high-risk functions": ( + "Functions with cyclomatic complexity above the high-risk threshold" + ), + "max cc": "Highest cyclomatic complexity value among all analyzed functions", + "avg cc": "Average cyclomatic complexity across all analyzed functions", + "deep nesting": ( + "Functions with nesting depth exceeding recommended threshold (> 4)" + ), + # Coupling stat cards + "high-coupling classes": "Classes with CBO above the high-risk threshold", + "max cbo": "Highest Coupling Between Objects value among all classes", + "avg cbo": "Average CBO across all analyzed classes", + "medium risk": "Items at medium risk level — worth reviewing but not critical", + # Cohesion stat cards + "low-cohesion classes": ( + "Classes with LCOM4 > 1, indicating multiple responsibilities" + ), + "max lcom4": "Highest Lack of Cohesion value among all classes", + "high risk": "Items at high risk level requiring attention", + # Overloaded module stat cards + "overloaded": ( + "Modules exceeding acceptable thresholds for size, complexity, or coupling" + ), + "critical": "Items with critical status requiring immediate attention", + "max score": "Highest overload score among all modules", + "avg loc": "Average lines of code per module", + # Dead code stat cards + "candidates": "Total dead code candidates detected by static analysis", + "high confidence": "Dead code items detected with high or critical confidence", + "suppressed": "Dead code candidates excluded by suppression rules", + "hit rate": "Percentage of high-confidence items among all candidates", + # Clone stat cards + "clone groups": "Distinct duplication patterns, each containing 2+ code fragments", + "instances": "Total duplicated code fragments across all groups", + "new groups": "Clone groups not present in the previous baseline", + "high spread": "Clone groups spanning multiple files", + # Suggestion stat cards + "total suggestions": "Total actionable improvement suggestions generated", + "warning": "Suggestions with warning severity worth reviewing", + "easy wins": "Actionable suggestions with low estimated effort", +} + + +def glossary_tip(label: str) -> str: + """Return a tooltip ```` for *label*, or ``''`` if unknown.""" + tip = GLOSSARY.get(label.lower(), "") + if not tip: + return "" + return f' ?' diff --git a/codeclone/_html_report/_widgets/icons.py b/codeclone/_html_report/_widgets/icons.py new file mode 100644 index 0000000..87b68c2 --- /dev/null +++ b/codeclone/_html_report/_widgets/icons.py @@ -0,0 +1,214 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""SVG icon constants for the HTML report (Lucide-style).""" + +from __future__ import annotations + + +def _svg(size: int, sw: str, body: str) -> str: + return ( + f'{body}' + ) + + +def _svg_with_class(size: int, sw: str, body: str, *, class_name: str = "") -> str: + class_attr = f' class="{class_name}"' if class_name else "" + return ( + f'{body}' + ) + + +BRAND_LOGO = ( + '" +) + +ICONS: dict[str, str] = { + "search": _svg( + 16, + "2.5", + '', + ), + "clear": _svg( + 16, + "2.5", + '', + ), + "chev_down": _svg( + 16, + "2.5", + '', + ), + "theme_moon": _svg_with_class( + 16, + "2", + '', + class_name="theme-icon theme-icon-moon", + ), + "theme_sun": _svg_with_class( + 16, + "2", + '' + '' + '' + '' + '' + '' + '' + '' + '', + class_name="theme-icon theme-icon-sun", + ), + "check": _svg( + 48, + "2", + '', + ), + "prev": _svg( + 16, + "2", + '', + ), + "next": _svg( + 16, + "2", + '', + ), + "sort_asc": _svg( + 12, + "2", + '', + ), + "sort_desc": _svg( + 12, + "2", + '', + ), + "ide": _svg( + 16, + "2", + '', + ), +} + +_SECTION_ICON_BODIES: dict[str, tuple[str, str]] = { + "overview": ( + "1.8", + '' + '' + '' + '', + ), + "clones": ( + "2", + '' + '', + ), + "quality": ( + "2", + '' + '' + '', + ), + "dependencies": ( + "2", + '' + '' + '', + ), + "dead-code": ( + "2", + '' + '', + ), + "suggestions": ( + "2", + '' + '', + ), + "structural-findings": ( + "2", + '' + '' + '', + ), + "top-risks": ( + "2", + '' + '', + ), + "issue-breakdown": ( + "2", + '' + '' + '', + ), + "source-breakdown": ( + "2", + '' + '', + ), + "health-profile": ( + "2", + '' + '' + '', + ), + "all-findings": ( + "2", + '' + '' + '', + ), + "clone-groups": ( + "2", + '' + '', + ), + "low-cohesion": ( + "2", + '' + '' + '', + ), + "coverage-adoption": ( + "2", + '' + '', + ), + "api-surface": ( + "2", + '' + '', + ), +} + + +def section_icon_html( + key: str, + *, + class_name: str = "", + size: int = 16, +) -> str: + spec = _SECTION_ICON_BODIES.get(key.strip().lower()) + if spec is None: + return "" + stroke_width, body = spec + return _svg_with_class(size, stroke_width, body, class_name=class_name) diff --git a/codeclone/_html_report/_widgets/snippets.py b/codeclone/_html_report/_widgets/snippets.py new file mode 100644 index 0000000..dac7eec --- /dev/null +++ b/codeclone/_html_report/_widgets/snippets.py @@ -0,0 +1,207 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import html +import importlib +from dataclasses import dataclass +from functools import lru_cache +from typing import TYPE_CHECKING, NamedTuple, cast + +from .errors import FileProcessingError + +if TYPE_CHECKING: + from types import ModuleType + + +@dataclass(slots=True) +class _Snippet: + filepath: str + start_line: int + end_line: int + code_html: str + + +class _FileCache: + __slots__ = ("_get_file_lines_impl", "maxsize") + + def __init__(self, maxsize: int = 128) -> None: + self.maxsize = maxsize + self._get_file_lines_impl = lru_cache(maxsize=maxsize)(self._read_file_lines) + + @staticmethod + def _read_file_lines(filepath: str) -> tuple[str, ...]: + try: + + def _read_with_errors(errors: str) -> tuple[str, ...]: + with open(filepath, encoding="utf-8", errors=errors) as f: + return tuple(line.rstrip("\n") for line in f) + + try: + return _read_with_errors("strict") + except UnicodeDecodeError: + return _read_with_errors("replace") + except OSError as e: + raise FileProcessingError(f"Cannot read {filepath}: {e}") from e + + def get_lines_range( + self, filepath: str, start_line: int, end_line: int + ) -> tuple[str, ...]: + if start_line < 1: + start_line = 1 + if end_line < start_line: + return () + lines = self._get_file_lines_impl(filepath) + start_index = start_line - 1 + if start_index >= len(lines): + return () + end_index = min(len(lines), end_line) + return lines[start_index:end_index] + + class _CacheInfo(NamedTuple): + hits: int + misses: int + maxsize: int | None + currsize: int + + def cache_info(self) -> _CacheInfo: + return cast("_FileCache._CacheInfo", self._get_file_lines_impl.cache_info()) + + +_PYGMENTS_IMPORTER_ID: int | None = None +_PYGMENTS_API: tuple[ModuleType, ModuleType, ModuleType] | None = None + + +def _load_pygments_api() -> tuple[ModuleType, ModuleType, ModuleType] | None: + """ + Load pygments modules once per import-function identity. + + Tests monkeypatch `importlib.import_module`; tracking importer identity keeps + behavior deterministic and allows import-error branches to stay testable. + """ + global _PYGMENTS_IMPORTER_ID + global _PYGMENTS_API + + importer_id = id(importlib.import_module) + if importer_id != _PYGMENTS_IMPORTER_ID: + _PYGMENTS_IMPORTER_ID = importer_id + _PYGMENTS_API = None + if _PYGMENTS_API is not None: + return _PYGMENTS_API + + try: + pygments = importlib.import_module("pygments") + formatters = importlib.import_module("pygments.formatters") + lexers = importlib.import_module("pygments.lexers") + except ImportError: + return None + + _PYGMENTS_API = (pygments, formatters, lexers) + return _PYGMENTS_API + + +def _try_pygments(code: str) -> str | None: + pygments_api = _load_pygments_api() + if pygments_api is None: + return None + pygments, formatters, lexers = pygments_api + + highlight = pygments.highlight + formatter_cls = formatters.HtmlFormatter + lexer_cls = lexers.PythonLexer + result = highlight(code, lexer_cls(), formatter_cls(nowrap=True)) + return result if isinstance(result, str) else None + + +def _pygments_css(style_name: str) -> str: + """ + Returns CSS for pygments tokens. Scoped to `.codebox` to avoid leaking styles. + If Pygments is not available or style missing, returns "". + """ + pygments_api = _load_pygments_api() + if pygments_api is None: + return "" + _, formatters, _ = pygments_api + + try: + formatter_cls = formatters.HtmlFormatter + fmt = formatter_cls(style=style_name) + except Exception: + try: + fmt = formatter_cls() + except Exception: + return "" + + try: + css = fmt.get_style_defs(".codebox") + return css if isinstance(css, str) else "" + except Exception: + return "" + + +def _render_code_block( + *, + filepath: str, + start_line: int, + end_line: int, + file_cache: _FileCache, + context: int, + max_lines: int, +) -> _Snippet: + s = max(1, start_line - context) + e = end_line + context + + if e - s + 1 > max_lines: + e = s + max_lines - 1 + + try: + lines = file_cache.get_lines_range(filepath, s, e) + except FileProcessingError: + missing = ( + '
'
+            '
Source file unavailable
' + "
" + ) + return _Snippet( + filepath=filepath, + start_line=start_line, + end_line=end_line, + code_html=missing, + ) + + numbered: list[tuple[bool, str]] = [] + for lineno, line in enumerate(lines, start=s): + hit = start_line <= lineno <= end_line + numbered.append((hit, f"{lineno:>5} | {line.rstrip()}")) + + raw = "\n".join(text for _, text in numbered) + highlighted = _try_pygments(raw) + + if highlighted is None: + rendered: list[str] = [] + for hit, text in numbered: + cls = "hitline" if hit else "line" + rendered.append( + f'
{html.escape(text, quote=False)}
' + ) + body = "".join(rendered) + else: + hit_flags = [hit for hit, _ in numbered] + pyg_lines = highlighted.split("\n") + rendered_pyg: list[str] = [] + for i, pyg_line in enumerate(pyg_lines): + hit = hit_flags[i] if i < len(hit_flags) else False + cls = "hitline" if hit else "line" + rendered_pyg.append(f'
{pyg_line}
') + body = "".join(rendered_pyg) + + return _Snippet( + filepath=filepath, + start_line=start_line, + end_line=end_line, + code_html=f'
{body}
', + ) diff --git a/codeclone/_html_report/_widgets/tables.py b/codeclone/_html_report/_widgets/tables.py new file mode 100644 index 0000000..7f633f2 --- /dev/null +++ b/codeclone/_html_report/_widgets/tables.py @@ -0,0 +1,127 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Generic table renderer for metric/finding tables.""" + +from __future__ import annotations + +from collections.abc import Collection, Sequence +from typing import TYPE_CHECKING + +from .._html_badges import _quality_badge_html, _tab_empty +from .._html_escape import _escape_html +from ._glossary import glossary_tip + +if TYPE_CHECKING: + from ._context import ReportContext + +_RISK_HEADERS = {"risk", "confidence", "severity", "effort"} +_PATH_HEADERS = {"file", "location"} + +_COL_WIDTHS: dict[str, str] = { + "cc": "62px", + "cbo": "62px", + "lcom4": "70px", + "nesting": "76px", + "line": "60px", + "length": "68px", + "methods": "80px", + "fields": "68px", + "priority": "74px", + "risk": "78px", + "confidence": "94px", + "severity": "82px", + "effort": "78px", + "category": "100px", + "kind": "76px", + "steps": "120px", + "coupled classes": "360px", +} + +_COL_CLS: dict[str, str] = {} +for _h in ("function", "class", "name"): + _COL_CLS[_h] = "col-name" +for _h in ("file", "location"): + _COL_CLS[_h] = "col-path" +for _h in ( + "cc", + "cbo", + "lcom4", + "nesting", + "line", + "length", + "methods", + "fields", + "priority", +): + _COL_CLS[_h] = "col-num" +for _h in ("risk", "confidence", "severity", "effort"): + _COL_CLS[_h] = "col-badge" +for _h in ("category", "kind"): + _COL_CLS[_h] = "col-cat" +for _h in ("cycle", "longest chain", "title", "coupled classes"): + _COL_CLS[_h] = "col-wide" +_COL_CLS["steps"] = "col-steps" + + +def render_rows_table( + *, + headers: Sequence[str], + rows: Sequence[Sequence[str]], + empty_message: str, + empty_description: str | None = "Nothing to report - keep up the good work.", + raw_html_headers: Collection[str] = (), + ctx: ReportContext | None = None, +) -> str: + """Render a data table with badges, tooltips, and col sizing.""" + if not rows: + return _tab_empty(empty_message, description=empty_description) + + lower_headers = [h.lower() for h in headers] + raw_html_set = {h.lower() for h in raw_html_headers} + + # colgroup + cg = [""] + for h in lower_headers: + w = _COL_WIDTHS.get(h) + cg.append(f'' if w else "") + cg.append("") + + # thead + th_parts = [ + f"{_escape_html(header)}{glossary_tip(header)}" for header in headers + ] + + # tbody + def _td(col_idx: int, cell: str) -> str: + h = lower_headers[col_idx] if col_idx < len(lower_headers) else "" + cls = _COL_CLS.get(h, "") + cls_attr = f' class="{cls}"' if cls else "" + if h in raw_html_set: + return f"{cell}" + if h in _RISK_HEADERS: + return f"{_quality_badge_html(cell)}" + if h in _PATH_HEADERS and ctx is not None: + short = ctx.relative_path(cell) + return ( + f'' + f'' + f"{_escape_html(short)}" + ) + return f"{_escape_html(cell)}" + + body_html = "".join( + "" + "".join(_td(i, cell) for i, cell in enumerate(row)) + "" + for row in rows + ) + + return ( + '
' + f"{''.join(cg)}" + f"{''.join(th_parts)}" + f"{body_html}" + "
" + ) diff --git a/codeclone/_html_report/_widgets/tabs.py b/codeclone/_html_report/_widgets/tabs.py new file mode 100644 index 0000000..8ce1e43 --- /dev/null +++ b/codeclone/_html_report/_widgets/tabs.py @@ -0,0 +1,60 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +"""Tab/subtab rendering helpers.""" + +from __future__ import annotations + +from collections.abc import Sequence + +from .._html_escape import _escape_html + + +def render_split_tabs( + *, + group_id: str, + tabs: Sequence[tuple[str, str, int, str]], + emit_clone_counters: bool = False, +) -> str: + """Render sub-tab navigation + panels. + + Each tab tuple: ``(tab_id, label, count, panel_html)``. + """ + if not tabs: + return "" + + nav: list[str] = [ + '") + + panels: list[str] = [] + for idx, (tab_id, _, _, panel_html) in enumerate(tabs): + active = " active" if idx == 0 else "" + panels.append( + f'
' + f"{panel_html}
" + ) + + return f"{''.join(nav)}{''.join(panels)}" diff --git a/tests/test_cli_unit.py b/tests/test_cli_unit.py index 6bb25dc..3dd6aa0 100644 --- a/tests/test_cli_unit.py +++ b/tests/test_cli_unit.py @@ -20,6 +20,7 @@ import codeclone.baseline as baseline_mod import codeclone.baseline.metrics_baseline as metrics_baseline_mod import codeclone.core.worker as core_worker +import codeclone.surfaces.cli.attrs as cli_attrs import codeclone.surfaces.cli.baseline_state as cli_baselines_mod import codeclone.surfaces.cli.changed_scope as cli_changed_scope import codeclone.surfaces.cli.console as cli_console @@ -159,6 +160,24 @@ def test_process_file_success(tmp_path: Path) -> None: assert result.stat is not None +def test_cli_attr_helpers_handle_bool_int_and_path_edges(tmp_path: Path) -> None: + args = SimpleNamespace( + flag="yes", + numeric=True, + broken=3.14, + path_value=tmp_path / "report.json", + invalid_text=123, + ) + + assert cli_attrs.bool_attr(args, "flag") is True + assert cli_attrs.int_attr(args, "numeric", default=7) == 7 + assert cli_attrs.int_attr(args, "broken", default=9) == 9 + assert cli_attrs.optional_text_attr(args, "path_value") == str( + tmp_path / "report.json" + ) + assert cli_attrs.optional_text_attr(args, "invalid_text") is None + + def test_cli_module_main_guard(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr(sys, "argv", ["codeclone", "--help"]) with pytest.raises(SystemExit) as exc: diff --git a/tests/test_core_branch_coverage.py b/tests/test_core_branch_coverage.py index 9022a5f..65143e3 100644 --- a/tests/test_core_branch_coverage.py +++ b/tests/test_core_branch_coverage.py @@ -573,8 +573,7 @@ def _must_not_run( files_to_process=(), skipped_warnings=(), cached_segment_report_projection=cast( - "SegmentReportProjection", - cached_projection, + SegmentReportProjection, cached_projection ), ) processing = ProcessingResult( @@ -614,6 +613,68 @@ def test_pipeline_coerce_segment_projection_invalid_shapes() -> None: is None ) + assert ( + _coerce_segment_report_projection( + { + "digest": "d", + "suppressed": 0, + "groups": {"k": [{"segment_hash": "h", "segment_sig": "s"}]}, + } + ) + is None + ) + + assert ( + _coerce_segment_report_projection( + { + "digest": "d", + "suppressed": 0, + "groups": {"k": ["bad-item"]}, + } + ) + is None + ) + + +def test_pipeline_coerce_segment_projection_valid_group_items() -> None: + projection = _coerce_segment_report_projection( + { + "digest": "digest", + "suppressed": 2, + "groups": { + "sig-1": [ + { + "segment_hash": "hash-1", + "segment_sig": "sig-1", + "filepath": "pkg/mod.py", + "qualname": "pkg.mod:run", + "start_line": 10, + "end_line": 16, + "size": 6, + } + ] + }, + } + ) + + assert projection == { + "digest": "digest", + "suppressed": 2, + "groups": { + "sig-1": [ + { + "segment_hash": "hash-1", + "segment_sig": "sig-1", + "filepath": "pkg/mod.py", + "qualname": "pkg.mod:run", + "start_line": 10, + "end_line": 16, + "size": 6, + } + ] + }, + } + def test_pipeline_analyze_tracks_suppressed_dead_code_candidates() -> None: boot = BootstrapResult( diff --git a/tests/test_coverage_edges.py b/tests/test_coverage_edges.py index 84a100e..16b3d44 100644 --- a/tests/test_coverage_edges.py +++ b/tests/test_coverage_edges.py @@ -105,6 +105,11 @@ def test_validate_git_diff_ref_rejects_control_whitespace_characters() -> None: validate_git_diff_ref("main\tHEAD") +def test_validate_git_diff_ref_rejects_empty_value() -> None: + with pytest.raises(ValueError, match="must not be empty"): + validate_git_diff_ref("") + + def test_add_option_rejects_unsupported_cli_kind() -> None: parser = argparse.ArgumentParser() group = parser.add_argument_group("Example") diff --git a/tests/test_mcp_tools.py b/tests/test_mcp_tools.py new file mode 100644 index 0000000..6700091 --- /dev/null +++ b/tests/test_mcp_tools.py @@ -0,0 +1,62 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from collections.abc import Callable + +import pytest + +from codeclone.surfaces.mcp.session import ( + MCPAnalysisRequest, + MCPGateRequest, + MCPServiceContractError, +) +from codeclone.surfaces.mcp.tools._base import MCPToolSchema, SimpleMCPTool +from codeclone.surfaces.mcp.tools.analyze import _analysis_request +from codeclone.surfaces.mcp.tools.gates import _gate_request +from codeclone.surfaces.mcp.tools.runs import _run_id + + +class _Session: + def __init__(self) -> None: + self.value = "ok" + + def __getattr__(self, name: str) -> Callable[..., object]: + raise AttributeError(name) + + +def test_analysis_request_requires_typed_request() -> None: + request = MCPAnalysisRequest(root="/repo") + + assert _analysis_request({"request": request}) is request + + with pytest.raises(MCPServiceContractError, match="valid MCPAnalysisRequest"): + _analysis_request({"request": object()}) + + +def test_gate_request_requires_typed_request() -> None: + request = MCPGateRequest(fail_on_new=True) + + assert _gate_request({"request": request}) is request + + with pytest.raises(MCPServiceContractError, match="valid MCPGateRequest"): + _gate_request({"request": "broken"}) + + +def test_run_id_accepts_only_strings() -> None: + assert _run_id({"run_id": "abc123"}) == "abc123" + assert _run_id({"run_id": 123}) is None + + +def test_simple_mcp_tool_runs_bound_runner() -> None: + tool = SimpleMCPTool( + name="demo", + schema=MCPToolSchema(title="Demo"), + runner=lambda session, params: (session.value, dict(params)), + ) + + assert tool.run(_Session(), {"alpha": 1}) == ("ok", {"alpha": 1}) diff --git a/tests/test_metrics_baseline.py b/tests/test_metrics_baseline.py index a71544e..74a8cc0 100644 --- a/tests/test_metrics_baseline.py +++ b/tests/test_metrics_baseline.py @@ -1043,6 +1043,212 @@ def test_metrics_baseline_parse_snapshot_grade_validation(tmp_path: Path) -> Non mb_validate._parse_snapshot(payload, path=path) +def test_metrics_baseline_version_and_optional_string_helpers(tmp_path: Path) -> None: + path = tmp_path / "metrics-baseline.json" + + assert ( + mb_validate._is_compatible_metrics_schema( + baseline_version="1.1", + expected_version="1.2", + ) + is True + ) + assert ( + mb_validate._is_compatible_metrics_schema( + baseline_version=None, + expected_version="1.2", + ) + is False + ) + assert ( + mb_validate._is_compatible_metrics_schema( + baseline_version="broken", + expected_version="1.2", + ) + is False + ) + assert mb_validate._parse_major_minor("1") is None + assert mb_validate._parse_major_minor("1.x") is None + assert mb_validate._require_str_list_or_none({}, "missing", path=path) is None + + with pytest.raises(BaselineValidationError, match="'qualname' must be str"): + mb_validate._optional_require_str({"qualname": 1}, "qualname", path=path) + + +def test_metrics_baseline_enum_validation_helpers_cover_all_supported_values( + tmp_path: Path, +) -> None: + path = tmp_path / "metrics-baseline.json" + + for grade in ("A", "B", "C", "D", "F"): + assert mb_validate._require_health_grade(grade, path=path) == grade + with pytest.raises(BaselineValidationError, match="must be one of A/B/C/D/F"): + mb_validate._require_health_grade("Z", path=path) + + for kind in ("pos_only", "pos_or_kw", "vararg", "kw_only", "kwarg"): + assert mb_validate._require_api_param_kind(kind, path=path) == kind + with pytest.raises(BaselineValidationError, match="api param 'kind' is invalid"): + mb_validate._require_api_param_kind("bad", path=path) + + for kind in ("function", "class", "method", "constant"): + assert mb_validate._require_public_symbol_kind(kind, path=path) == kind + with pytest.raises( + BaselineValidationError, + match="public symbol 'kind' is invalid", + ): + mb_validate._require_public_symbol_kind("bad", path=path) + + assert mb_validate._require_exported_via("all", path=path) == "all" + assert mb_validate._require_exported_via("name", path=path) == "name" + with pytest.raises( + BaselineValidationError, + match="public symbol 'exported_via' is invalid", + ): + mb_validate._require_exported_via("bad", path=path) + + +def test_metrics_baseline_parse_api_surface_snapshot_validation_edges( + tmp_path: Path, +) -> None: + path = tmp_path / "metrics-baseline.json" + + with pytest.raises(BaselineValidationError, match="'api_surface' must be object"): + mb_validate._parse_api_surface_snapshot([], path=path) + + with pytest.raises( + BaselineValidationError, + match=r"'api_surface\.modules' must be list", + ): + mb_validate._parse_api_surface_snapshot({"modules": "bad"}, path=path) + + with pytest.raises( + BaselineValidationError, match="api surface module must be object" + ): + mb_validate._parse_api_surface_snapshot({"modules": ["bad"]}, path=path) + + with pytest.raises( + BaselineValidationError, match="api surface symbols must be list" + ): + mb_validate._parse_api_surface_snapshot( + { + "modules": [ + { + "module": "pkg.mod", + "filepath": "pkg/mod.py", + "symbols": "bad", + } + ] + }, + path=path, + ) + + with pytest.raises( + BaselineValidationError, match="api surface symbol must be object" + ): + mb_validate._parse_api_surface_snapshot( + { + "modules": [ + { + "module": "pkg.mod", + "filepath": "pkg/mod.py", + "symbols": ["bad"], + } + ] + }, + path=path, + ) + + with pytest.raises( + BaselineValidationError, + match="api surface symbol requires 'local_name' or 'qualname'", + ): + mb_validate._parse_api_surface_snapshot( + { + "modules": [ + { + "module": "pkg.mod", + "filepath": "pkg/mod.py", + "symbols": [{"kind": "function", "exported_via": "name"}], + } + ] + }, + path=path, + ) + + with pytest.raises( + BaselineValidationError, match="api surface params must be list" + ): + mb_validate._parse_api_surface_snapshot( + { + "modules": [ + { + "module": "pkg.mod", + "filepath": "pkg/mod.py", + "symbols": [ + { + "local_name": "run", + "kind": "function", + "exported_via": "name", + "params": "bad", + } + ], + } + ] + }, + path=path, + ) + + with pytest.raises(BaselineValidationError, match="api param must be object"): + mb_validate._parse_api_surface_snapshot( + { + "modules": [ + { + "module": "pkg.mod", + "filepath": "pkg/mod.py", + "symbols": [ + { + "local_name": "run", + "kind": "function", + "exported_via": "name", + "params": ["bad"], + } + ], + } + ] + }, + path=path, + ) + + with pytest.raises( + BaselineValidationError, match="api param 'has_default' must be bool" + ): + mb_validate._parse_api_surface_snapshot( + { + "modules": [ + { + "module": "pkg.mod", + "filepath": "pkg/mod.py", + "symbols": [ + { + "local_name": "run", + "kind": "function", + "exported_via": "name", + "params": [ + { + "name": "value", + "kind": "pos_or_kw", + "has_default": "bad", + } + ], + } + ], + } + ] + }, + path=path, + ) + + def test_metrics_baseline_load_json_read_oserror_status( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: diff --git a/tests/test_pipeline_metrics.py b/tests/test_pipeline_metrics.py index ca1ab38..97d7b38 100644 --- a/tests/test_pipeline_metrics.py +++ b/tests/test_pipeline_metrics.py @@ -6,6 +6,7 @@ from __future__ import annotations +from collections.abc import Callable from dataclasses import replace from pathlib import Path from typing import cast @@ -27,12 +28,21 @@ from codeclone.core.bootstrap import _resolve_optional_runtime_path from codeclone.core.coverage_payload import _coverage_join_rows, _coverage_join_summary from codeclone.core.discovery_cache import ( + _api_param_kind, _api_param_spec_from_cache_dict, _api_surface_from_cache_dict, _cache_dict_int_fields, _cache_dict_module_fields, + _class_metric_from_cache_row, + _dead_candidate_from_cache_row, + _dead_candidate_kind, _docstring_coverage_from_cache_dict, + _exported_via_kind, + _import_type, + _module_dep_from_cache_row, _public_symbol_from_cache_dict, + _public_symbol_kind, + _risk_level, _typing_coverage_from_cache_dict, ) from codeclone.core.discovery_cache import ( @@ -744,6 +754,180 @@ def test_load_cached_metrics_extended_decodes_adoption_and_api_surface() -> None assert api_surface.symbols[0].qualname == "pkg.mod:run" +@pytest.mark.parametrize( + ("helper", "accepted"), + ( + (_api_param_kind, ("pos_only", "pos_or_kw", "vararg", "kw_only", "kwarg")), + (_public_symbol_kind, ("function", "class", "method", "constant")), + (_exported_via_kind, ("all", "name")), + (_risk_level, ("low", "medium", "high")), + (_import_type, ("import", "from_import")), + (_dead_candidate_kind, ("function", "class", "method", "import")), + ), +) +def test_discovery_cache_literal_helpers_accept_known_values_and_reject_unknowns( + helper: Callable[[object], object | None], + accepted: tuple[str, ...], +) -> None: + for value in accepted: + assert helper(value) == value + assert helper("broken") is None + + +def test_discovery_cache_parsers_reject_invalid_rows_and_skip_invalid_entries() -> None: + assert _api_param_spec_from_cache_dict([]) is None + assert ( + _public_symbol_from_cache_dict( + { + "qualname": "pkg.mod:run", + "kind": "broken", + "start_line": 1, + "end_line": 2, + "exported_via": "name", + "returns_hash": "", + "params": [], + } + ) + is None + ) + assert ( + _api_surface_from_cache_dict( + { + "module": "pkg.mod", + "filepath": "pkg/mod.py", + "all_declared": [1], + "symbols": [], + } + ) + is None + ) + assert ( + _class_metric_from_cache_row( + { + "qualname": "pkg.mod:Service", + "filepath": "pkg/mod.py", + "start_line": 1, + "end_line": 10, + "cbo": 3, + "lcom4": 2, + "method_count": 2, + "instance_var_count": 1, + "risk_coupling": "broken", + "risk_cohesion": "high", + } + ) + is None + ) + assert ( + _module_dep_from_cache_row( + { + "source": "pkg.mod", + "target": "pkg.dep", + "import_type": "broken", + "line": 3, + } + ) + is None + ) + assert ( + _dead_candidate_from_cache_row( + { + "qualname": "pkg.mod:unused", + "local_name": "unused", + "filepath": "pkg/mod.py", + "start_line": 1, + "end_line": 2, + "kind": "broken", + } + ) + is None + ) + + entry: CacheEntry = { + "stat": {"mtime_ns": 1, "size": 1}, + "units": [], + "blocks": [], + "segments": [], + "class_metrics": [ + { + "qualname": "pkg.mod:Service", + "filepath": "pkg/mod.py", + "start_line": 1, + "end_line": 10, + "cbo": 3, + "lcom4": 2, + "method_count": 2, + "instance_var_count": 1, + "risk_coupling": "high", + "risk_cohesion": "medium", + "coupled_classes": ["pkg.dep"], + }, + { + "qualname": "pkg.mod:Broken", + "filepath": "pkg/mod.py", + "start_line": 11, + "end_line": 20, + "cbo": 1, + "lcom4": 1, + "method_count": 1, + "instance_var_count": 0, + "risk_coupling": "broken", + "risk_cohesion": "low", + }, + ], + "module_deps": [ + { + "source": "pkg.mod", + "target": "pkg.dep", + "import_type": "import", + "line": 3, + }, + { + "source": "pkg.mod", + "target": "pkg.bad", + "import_type": "broken", + "line": 4, + }, + ], + "dead_candidates": [ + { + "qualname": "pkg.mod:unused", + "local_name": "unused", + "filepath": "pkg/mod.py", + "start_line": 30, + "end_line": 32, + "kind": "function", + "suppressed_rules": ["dead-code"], + }, + { + "qualname": "pkg.mod:broken", + "local_name": "broken", + "filepath": "pkg/mod.py", + "start_line": 40, + "end_line": 42, + "kind": "broken", + }, + ], + "referenced_names": ["run"], + "referenced_qualnames": ["pkg.mod:run"], + } + + ( + class_metrics, + module_deps, + dead_candidates, + referenced_names, + referenced_qualnames, + *_rest, + ) = _load_cached_metrics_extended(entry, filepath="tests/test_mod.py") + + assert len(class_metrics) == 1 + assert len(module_deps) == 1 + assert len(dead_candidates) == 1 + assert referenced_names == frozenset() + assert referenced_qualnames == frozenset() + + def test_metric_gate_reasons_collects_all_enabled_reasons() -> None: reasons = _metric_gate_reasons_from_metrics( project_metrics=_project_metrics(dead_confidence="high"), diff --git a/tests/test_report_contract_coverage.py b/tests/test_report_contract_coverage.py index fc536b1..5c86414 100644 --- a/tests/test_report_contract_coverage.py +++ b/tests/test_report_contract_coverage.py @@ -1345,6 +1345,72 @@ def test_report_contract_renderers_include_coverage_join_section_when_present() ) +def test_report_contract_markdown_renders_empty_suppressed_clone_section() -> None: + payload = _rich_report_document() + findings = cast(dict[str, object], payload["findings"]) + groups = cast(dict[str, object], findings["groups"]) + clone_groups = cast(dict[str, object], groups["clones"]) + clone_groups["suppressed"] = {"functions": [], "blocks": [], "segments": []} + + markdown = render_markdown_report_document(payload) + + assert "Suppressed Golden Fixture Clone Groups" in markdown + assert "_None._" in markdown + + +def test_report_contract_markdown_truncates_suppressed_clone_locations() -> None: + payload = _rich_report_document() + findings = cast(dict[str, object], payload["findings"]) + groups = cast(dict[str, object], findings["groups"]) + clone_groups = cast(dict[str, object], groups["clones"]) + clone_groups["suppressed"] = { + "functions": [ + { + "id": "clone:function:golden", + "category": "function", + "clone_type": "Type-2", + "severity": "warning", + "source_scope": { + "impact_scope": "runtime", + "dominant_kind": "production", + }, + "spread": {"files": 7, "functions": 7}, + "count": 7, + "suppression_rule": "golden_fixture", + "suppression_source": "project_config", + "matched_patterns": ["tests/fixtures/golden_*"], + "items": [ + { + "relative_path": f"tests/golden_{idx}.py", + "qualname": f"tests.golden_{idx}:run", + "start_line": 10 + idx, + "end_line": 11 + idx, + } + for idx in range(7) + ], + } + ], + "blocks": [], + "segments": [], + } + + markdown = render_markdown_report_document(payload) + + assert "... and 2 more occurrence(s)" in markdown + + +def test_report_contract_markdown_supports_legacy_god_modules_metrics_key() -> None: + payload = _rich_report_document() + metrics = cast(dict[str, object], payload["metrics"]) + families = cast(dict[str, object], metrics["families"]) + families["god_modules"] = families.pop("overloaded_modules") + + markdown = render_markdown_report_document(payload) + + assert "### Overloaded Modules" in markdown + assert "candidate_status=candidate" in markdown + + def test_report_contract_includes_canonical_overloaded_modules_family() -> None: payload = _rich_report_document() @@ -2274,6 +2340,15 @@ def test_render_sarif_report_document_without_srcroot_keeps_relative_payload() - assert cast(str, cast(dict[str, object], result["message"])["text"]).endswith(".") +def test_sarif_rule_spec_covers_coverage_scope_gap_design_findings() -> None: + spec = _sarif_rule_spec( + {"family": "design", "category": "coverage", "kind": "coverage_scope_gap"} + ) + + assert spec.rule_id == "CDESIGN006" + assert spec.short_description == "Coverage scope gap" + + def test_collect_paths_from_metrics_covers_all_metric_families_and_skips_missing() -> ( None ): From f648d1fcd24059600dfc583c33e0ad7931ffbf97 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Tue, 21 Apr 2026 22:45:52 +0500 Subject: [PATCH 08/32] refactor(app): global and comprehensive refactoring (Stage #1). --- .gitignore | 5 + codeclone/_html_report/_assets/css.py | 1457 ----------------- codeclone/_html_report/_assets/js.py | 843 ---------- .../_html_report/_primitives/data_attrs.py | 30 - codeclone/_html_report/_primitives/escape.py | 25 - codeclone/_html_report/_primitives/filters.py | 59 - codeclone/_html_report/_widgets/badges.py | 272 --- codeclone/_html_report/_widgets/components.py | 106 -- codeclone/_html_report/_widgets/glossary.py | 100 -- codeclone/_html_report/_widgets/icons.py | 214 --- codeclone/_html_report/_widgets/snippets.py | 207 --- codeclone/_html_report/_widgets/tables.py | 127 -- codeclone/_html_report/_widgets/tabs.py | 60 - 13 files changed, 5 insertions(+), 3500 deletions(-) delete mode 100644 codeclone/_html_report/_assets/css.py delete mode 100644 codeclone/_html_report/_assets/js.py delete mode 100644 codeclone/_html_report/_primitives/data_attrs.py delete mode 100644 codeclone/_html_report/_primitives/escape.py delete mode 100644 codeclone/_html_report/_primitives/filters.py delete mode 100644 codeclone/_html_report/_widgets/badges.py delete mode 100644 codeclone/_html_report/_widgets/components.py delete mode 100644 codeclone/_html_report/_widgets/glossary.py delete mode 100644 codeclone/_html_report/_widgets/icons.py delete mode 100644 codeclone/_html_report/_widgets/snippets.py delete mode 100644 codeclone/_html_report/_widgets/tables.py delete mode 100644 codeclone/_html_report/_widgets/tabs.py diff --git a/.gitignore b/.gitignore index e3ad2eb..71bd32f 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,8 @@ site/ /package-lock.json extensions/vscode-codeclone/node_modules /coverage.xml +/.cgcignore +/mcp.json +/scripts/refactor_guard.sh +/docs/refactoring-spec.md +/smoke_cli.sh diff --git a/codeclone/_html_report/_assets/css.py b/codeclone/_html_report/_assets/css.py deleted file mode 100644 index 66a4609..0000000 --- a/codeclone/_html_report/_assets/css.py +++ /dev/null @@ -1,1457 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -"""CSS design system for the HTML report — tokens, components, layout.""" - -from __future__ import annotations - -# --------------------------------------------------------------------------- -# Design tokens -# --------------------------------------------------------------------------- - -_TOKENS_DARK = """\ -:root{ - --font-sans:"Inter","Inter Variable",-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,Oxygen,Ubuntu,sans-serif; - --font-display:"Inter","Inter Variable",-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,sans-serif; - --font-mono:"JetBrains Mono",ui-monospace,SFMono-Regular,"SF Mono",Menlo,Consolas,monospace; - --font-numeric:"JetBrains Mono",ui-monospace,SFMono-Regular,"SF Mono",Menlo,Consolas,monospace; - - /* Surface — chromatic grays tinted toward the indigo accent (hue 275). - Every surface shares the brand hue at tiny chroma so the UI feels like - one palette, not gray slate + a purple sticker. */ - --bg-body:oklch(16% 0.018 275); - --bg-surface:oklch(20% 0.022 275); - --bg-raised:oklch(24% 0.028 275); - --bg-overlay:oklch(29% 0.033 275); - --bg-subtle:oklch(34% 0.038 275); - - /* Border — same hue, higher chroma for legibility */ - --border:oklch(32% 0.035 275); - --border-strong:oklch(44% 0.045 275); - - /* Text — muted greys keep a trace of indigo so they feel alive */ - --text-primary:oklch(95% 0.010 275); - --text-secondary:oklch(74% 0.028 275); - --text-muted:oklch(58% 0.038 275); - - /* Accent — codeclone indigo (brand, unchanged) */ - --accent-primary:#6366f1; - --accent-hover:#818cf8; - --accent-muted:color-mix(in oklch,#6366f1 25%,transparent); - --accent-soft:oklch(30% 0.12 275); - - /* Semantic — brand-adjacent, hue-rotated so they read as siblings - of the indigo instead of raw Tailwind defaults */ - --success:oklch(74% 0.15 162); - --success-muted:color-mix(in oklch,oklch(74% 0.15 162) 18%,transparent); - --warning:oklch(80% 0.15 82); - --warning-muted:color-mix(in oklch,oklch(80% 0.15 82) 18%,transparent); - --error:oklch(70% 0.18 18); - --error-muted:color-mix(in oklch,oklch(70% 0.18 18) 18%,transparent); - --danger:oklch(70% 0.18 18); - --info:oklch(72% 0.13 238); - --info-muted:color-mix(in oklch,oklch(72% 0.13 238) 18%,transparent); - - /* elevation */ - --shadow-sm:0 1px 2px rgba(0,0,0,.25); - --shadow-md:0 2px 8px rgba(0,0,0,.3); - --shadow-lg:0 4px 16px rgba(0,0,0,.35); - --shadow-xl:0 8px 32px rgba(0,0,0,.4); - - /* radii */ - --radius-sm:4px; - --radius-md:6px; - --radius-lg:8px; - --radius-xl:12px; - - /* spacing */ - --sp-1:4px;--sp-2:8px;--sp-3:12px;--sp-4:16px;--sp-5:20px;--sp-6:24px;--sp-8:32px;--sp-10:40px; - - /* transitions */ - --ease:cubic-bezier(.4,0,.2,1); - --dur-fast:120ms; - --dur-normal:200ms; - --dur-slow:300ms; - - /* sizes */ - --topbar-h:72px; - --container-max:1360px; - - color-scheme:dark; -} -""" - -_TOKENS_LIGHT = """\ -/* Light palette — mirror of the dark one at higher lightness + lower chroma. - Every surface/border/text token still carries a trace of indigo hue 275 - so the whole theme feels like one family in both modes. */ -@media(prefers-color-scheme:light){ - :root:not([data-theme]){ - --bg-body:oklch(98.5% 0.006 275);--bg-surface:#ffffff; - --bg-raised:oklch(97% 0.010 275);--bg-overlay:oklch(93% 0.015 275);--bg-subtle:oklch(88% 0.020 275); - --border:oklch(88% 0.020 275);--border-strong:oklch(78% 0.028 275); - --text-primary:oklch(22% 0.040 275);--text-secondary:oklch(42% 0.048 275);--text-muted:oklch(58% 0.040 275); - --accent-primary:#4f46e5;--accent-hover:#6366f1;--accent-muted:color-mix(in oklch,#4f46e5 12%,transparent); - --accent-soft:oklch(94% 0.045 275); - --success:oklch(52% 0.16 162);--success-muted:color-mix(in oklch,oklch(52% 0.16 162) 12%,transparent); - --warning:oklch(60% 0.15 65);--warning-muted:color-mix(in oklch,oklch(60% 0.15 65) 12%,transparent); - --error:oklch(55% 0.22 20);--error-muted:color-mix(in oklch,oklch(55% 0.22 20) 12%,transparent); - --danger:oklch(55% 0.22 20);--info:oklch(52% 0.18 238);--info-muted:color-mix(in oklch,oklch(52% 0.18 238) 12%,transparent); - --shadow-sm:0 1px 2px rgba(0,0,0,.06);--shadow-md:0 2px 8px rgba(0,0,0,.08); - --shadow-lg:0 4px 16px rgba(0,0,0,.1);--shadow-xl:0 8px 32px rgba(0,0,0,.12); - color-scheme:light; - } -} -[data-theme="light"]{ - --bg-body:oklch(98.5% 0.006 275);--bg-surface:#ffffff; - --bg-raised:oklch(97% 0.010 275);--bg-overlay:oklch(93% 0.015 275);--bg-subtle:oklch(88% 0.020 275); - --border:oklch(88% 0.020 275);--border-strong:oklch(78% 0.028 275); - --text-primary:oklch(22% 0.040 275);--text-secondary:oklch(42% 0.048 275);--text-muted:oklch(58% 0.040 275); - --accent-primary:#4f46e5;--accent-hover:#6366f1;--accent-muted:color-mix(in oklch,#4f46e5 12%,transparent); - --accent-soft:oklch(94% 0.045 275); - --success:oklch(52% 0.16 162);--success-muted:color-mix(in oklch,oklch(52% 0.16 162) 12%,transparent); - --warning:oklch(60% 0.15 65);--warning-muted:color-mix(in oklch,oklch(60% 0.15 65) 12%,transparent); - --error:oklch(55% 0.22 20);--error-muted:color-mix(in oklch,oklch(55% 0.22 20) 12%,transparent); - --danger:oklch(55% 0.22 20);--info:oklch(52% 0.18 238);--info-muted:color-mix(in oklch,oklch(52% 0.18 238) 12%,transparent); - --shadow-sm:0 1px 2px rgba(0,0,0,.06);--shadow-md:0 2px 8px rgba(0,0,0,.08); - --shadow-lg:0 4px 16px rgba(0,0,0,.1);--shadow-xl:0 8px 32px rgba(0,0,0,.12); - color-scheme:light; -} -""" - -# --------------------------------------------------------------------------- -# Reset + base -# --------------------------------------------------------------------------- - -_RESET = """\ -*,*::before,*::after{box-sizing:border-box;margin:0;padding:0} -html{-webkit-text-size-adjust:100%;text-size-adjust:100%;-webkit-font-smoothing:antialiased; - -moz-osx-font-smoothing:grayscale;scroll-behavior:smooth;scrollbar-gutter:stable} -body{font-family:var(--font-sans);font-size:14px;line-height:1.6;color:var(--text-primary); - background:var(--bg-body);overflow-x:hidden; - /* Inter stylistic alternates: - zero — slashed zero (disambiguates 0 from O in metric values) - ss02 — disambiguation set (I/l/1/0 clear apart) - ss01 — open digits (open 4, 6, 9) - cv11 — single-story a (the "designer" look) - cv02/03/04 — open alternates for 4/6/3 - Together these give Inter its Vercel / Linear / Radix feel. */ - font-feature-settings:"zero","ss02","ss01","cv11","cv02","cv03","cv04"; - font-optical-sizing:auto} -code,pre,kbd{font-family:var(--font-mono);font-size:13px;font-feature-settings:normal} -a{color:var(--accent-primary);text-decoration:none} -a:hover{color:var(--accent-hover);text-decoration:underline} -h1,h2,h3,h4{font-family:var(--font-display);font-weight:600;line-height:1.3;color:var(--text-primary); - letter-spacing:-0.01em} -h1{font-size:1.5rem}h2{font-size:1.25rem}h3{font-size:1.1rem} -ul,ol{list-style:none} -button,input,select{font:inherit;color:inherit} -summary{cursor:pointer} -.muted{color:var(--text-muted);font-size:.85em} -""" - -# --------------------------------------------------------------------------- -# Layout -# --------------------------------------------------------------------------- - -_LAYOUT = """\ -.container{max-width:var(--container-max);margin:0 auto;padding:0 var(--sp-6)} - -/* Topbar */ -.topbar{position:sticky;top:0;z-index:100;background:var(--bg-surface);border-bottom:1px solid var(--border); - box-shadow:var(--shadow-sm)} -.topbar-inner{display:flex;align-items:center;justify-content:space-between; - height:72px;padding:0 var(--sp-6);max-width:var(--container-max);margin:0 auto} -.brand{display:flex;align-items:center;gap:var(--sp-3);min-width:0;flex:1} -.brand-logo{flex-shrink:0} -.brand-text{display:flex;flex-direction:column;gap:2px;min-width:0;flex:1} -.brand h1{display:flex;flex-wrap:wrap;align-items:baseline;gap:var(--sp-1);font-size:1.15rem; - font-weight:700;color:var(--text-primary);line-height:1.3;min-width:0} -.brand-meta{font-size:.78rem;color:var(--text-muted);overflow-wrap:anywhere} -.brand-project{display:inline-flex;flex-wrap:wrap;align-items:baseline;gap:4px; - font-weight:500;color:var(--text-secondary);min-width:0} -.brand-project-name{font-family:var(--font-mono);font-size:.85em;font-weight:500;padding:1px 5px; - border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--accent-primary); - max-width:100%;overflow-wrap:anywhere} -.topbar-actions{display:flex;align-items:center;gap:var(--sp-2);flex-shrink:0;flex-wrap:wrap} - -/* Theme toggle */ -.theme-toggle{display:inline-flex;align-items:center;gap:var(--sp-1); - padding:var(--sp-1) var(--sp-3);background:none;border:1px solid var(--border); - border-radius:var(--radius-md);cursor:pointer;color:var(--text-muted);font-size:.85rem; - font-weight:500;font-family:inherit;transition:all var(--dur-fast) var(--ease)} -.theme-toggle:hover{color:var(--text-primary);background:var(--bg-raised);border-color:var(--border-strong)} -.theme-toggle svg{width:16px;height:16px} -/* Adaptive icon: show sun in light mode, moon in dark mode. */ -.theme-icon{display:none} -:root[data-theme="light"] .theme-icon-sun{display:inline-flex} -:root[data-theme="dark"] .theme-icon-moon{display:inline-flex} - -/* Main tabs — full-width pill bar */ -.main-tabs-wrap{position:sticky;top:var(--topbar-h);z-index:90;padding:var(--sp-3) 0 0; - background:var(--bg-body)} -.main-tabs{display:flex;gap:var(--sp-1);padding:var(--sp-1); - background:var(--bg-surface);border:1px solid var(--border);border-radius:var(--radius-lg); - overflow-x:auto;scrollbar-width:none;-webkit-overflow-scrolling:touch} -.main-tabs::-webkit-scrollbar{display:none} -.main-tab{position:relative;flex:1;display:inline-flex;align-items:center;justify-content:center; - gap:var(--sp-1);text-align:center;padding:var(--sp-2) var(--sp-3);background:none; - border:none;cursor:pointer;font-size:.85rem;font-weight:500;color:var(--text-muted); - white-space:nowrap;border-radius:var(--radius-md);transition:all var(--dur-fast) var(--ease)} -.main-tab:hover{color:var(--text-primary);background:var(--bg-raised)} -.main-tab[aria-selected="true"]{color:var(--accent-primary);background:var(--accent-muted)} -.main-tab-icon{flex-shrink:0;opacity:.72} -.main-tab-label{display:inline-flex;align-items:center} -.tab-count{display:inline-flex;align-items:center;justify-content:center;min-width:18px; - height:18px;padding:0 5px;font-size:.68rem;font-weight:700;border-radius:var(--radius-sm); - background:var(--bg-overlay);color:var(--text-muted);margin-left:var(--sp-1)} -.main-tab[aria-selected="true"] .tab-count{background:var(--accent-primary); - color:#fff} - -/* Tab panels */ -.tab-panel{display:none;padding:var(--sp-6) 0;contain:layout style} -.tab-panel.active{display:block} -""" - -# --------------------------------------------------------------------------- -# Components: buttons, inputs, selects -# --------------------------------------------------------------------------- - -_CONTROLS = """\ -/* Buttons */ -.btn{display:inline-flex;align-items:center;gap:var(--sp-1);padding:var(--sp-1) var(--sp-3); - font-size:.8rem;font-weight:500;border:1px solid var(--border);border-radius:var(--radius-md); - background:var(--bg-raised);color:var(--text-secondary);cursor:pointer;white-space:nowrap; - transition:all var(--dur-fast) var(--ease)} -.btn:hover{border-color:var(--border-strong);color:var(--text-primary);background:var(--bg-overlay)} -.btn-prov{position:relative} -.btn-prov .prov-dot{width:8px;height:8px;border-radius:50%;flex-shrink:0} -.btn-prov .prov-dot.dot-green{background:var(--success)} -.btn-prov .prov-dot.dot-amber{background:var(--warning)} -.btn-prov .prov-dot.dot-red{background:var(--error)} -.btn-prov .prov-dot.dot-neutral{background:var(--text-muted)} - -/* Provenance pill — single compact status chip in topbar */ -.prov-pill{display:inline-flex;align-items:center;gap:var(--sp-1); - padding:var(--sp-1) var(--sp-3);background:none; - border:1px solid var(--border);border-radius:var(--radius-md);cursor:pointer; - color:var(--text-muted);transition:all var(--dur-fast) var(--ease); - font-size:.85rem;font-weight:500;font-family:inherit} -.prov-pill:hover{background:var(--bg-raised);border-color:var(--border-strong); - color:var(--text-primary)} -.prov-pill:focus-visible{outline:2px solid var(--accent-primary);outline-offset:1px} -.prov-pill-icon{flex-shrink:0;opacity:.75} -.prov-pill:hover .prov-pill-icon{opacity:1} -.prov-pill-label{font-size:.85rem;font-weight:500;white-space:nowrap} -.prov-pill--green .prov-pill-icon{color:var(--success)} -.prov-pill--green:hover{border-color:color-mix(in srgb,var(--success) 55%,var(--border))} -.prov-pill--amber .prov-pill-icon{color:var(--warning)} -.prov-pill--amber:hover{border-color:color-mix(in srgb,var(--warning) 55%,var(--border))} -.prov-pill--red .prov-pill-icon{color:var(--error)} -.prov-pill--red:hover{border-color:color-mix(in srgb,var(--error) 60%,var(--border))} -.prov-pill--neutral .prov-pill-icon{color:var(--text-muted)} -.btn.ghost{background:none;border-color:transparent} -.btn.ghost:hover{background:var(--bg-raised);border-color:var(--border)} -.btn.btn-icon{padding:var(--sp-1);min-width:28px;justify-content:center} -.btn svg{width:14px;height:14px} - -/* Inputs */ -input[type="text"]{padding:var(--sp-1) var(--sp-3);font-size:.85rem;border:1px solid var(--border); - border-radius:var(--radius-md);background:var(--bg-body);color:var(--text-primary);outline:none; - transition:border-color var(--dur-fast) var(--ease)} -input[type="text"]:focus{border-color:var(--accent-primary);box-shadow:0 0 0 2px var(--accent-muted)} -input[type="text"]::placeholder{color:var(--text-muted)} - -/* Selects */ -.select{padding:var(--sp-1) var(--sp-3);padding-right:var(--sp-6);font-size:.8rem; - border:1px solid var(--border);border-radius:var(--radius-md);background:var(--bg-raised); - color:var(--text-secondary);cursor:pointer;appearance:none; - background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' fill='none' stroke='%236b6f88' stroke-width='2'%3E%3Cpath d='M3 4.5l3 3 3-3'/%3E%3C/svg%3E"); - background-repeat:no-repeat;background-position:right 8px center} -.select:focus{border-color:var(--accent-primary);outline:none} - -/* Checkbox labels */ -.inline-check{display:inline-flex;align-items:center;gap:var(--sp-1);font-size:.8rem; - color:var(--text-muted);cursor:pointer;white-space:nowrap} -.inline-check input[type="checkbox"]{accent-color:var(--accent-primary);width:14px;height:14px} -""" - -# --------------------------------------------------------------------------- -# Search box -# --------------------------------------------------------------------------- - -_SEARCH = """\ -.search-box{position:relative;display:flex;align-items:center} -.search-ico{position:absolute;left:var(--sp-2);color:var(--text-muted);pointer-events:none; - display:flex;align-items:center} -.search-ico svg{width:14px;height:14px} -.search-box input[type="text"]{padding-left:28px;width:200px} -.clear-btn{position:absolute;right:var(--sp-1);background:none;border:none;cursor:pointer; - color:var(--text-muted);padding:2px;display:flex;align-items:center;opacity:0; - transition:opacity var(--dur-fast) var(--ease)} -.search-box input:not(:placeholder-shown)~.clear-btn{opacity:1} -.clear-btn:hover{color:var(--text-primary)} -.clear-btn svg{width:14px;height:14px} -""" - -# --------------------------------------------------------------------------- -# Toolbar + pagination -# --------------------------------------------------------------------------- - -_TOOLBAR = """\ -.toolbar{display:flex;flex-wrap:wrap;align-items:center;gap:var(--sp-2); - padding:var(--sp-3) var(--sp-4);background:var(--bg-raised);border:1px solid var(--border); - border-radius:var(--radius-lg);margin-bottom:var(--sp-4)} -.toolbar-left{display:flex;flex-wrap:wrap;align-items:center;gap:var(--sp-2);flex:1} -.toolbar-right{display:flex;align-items:center;gap:var(--sp-2)} - -.pagination{display:flex;align-items:center;gap:var(--sp-1)} -.page-meta{font-size:.8rem;color:var(--text-muted);white-space:nowrap;min-width:100px;text-align:center} - -/* Filters popover: one button collapses Context/Type/Spread/min-occ into a menu */ -.filters-popover{position:relative} -.filters-btn{display:inline-flex;align-items:center;gap:var(--sp-1);white-space:nowrap} -.filters-btn-ico{flex:none} -.filters-count{display:inline-flex;align-items:center;justify-content:center; - min-width:18px;height:18px;padding:0 5px;border-radius:999px; - background:var(--accent-primary);color:#fff;font-size:.68rem;font-weight:600; - line-height:1} -.filters-btn[aria-expanded="true"]{border-color:var(--accent-primary); - color:var(--accent-primary)} -.filters-menu{position:absolute;top:calc(100% + var(--sp-1));left:0;z-index:20; - min-width:240px;display:flex;flex-direction:column;gap:var(--sp-2); - padding:var(--sp-3);background:var(--bg-surface); - border:1px solid var(--border);border-radius:var(--radius-md); - box-shadow:var(--shadow-lg)} -.filters-menu[hidden]{display:none} -.filters-row{display:flex;align-items:center;gap:var(--sp-2)} -.filters-row .select{flex:1;min-width:0} -.filters-label{font-size:.75rem;color:var(--text-muted);min-width:60px; - font-weight:500} -.filters-row.inline-check{gap:var(--sp-2);font-size:.82rem;color:var(--text-secondary); - cursor:pointer} -.filters-row.inline-check input[type="checkbox"]{margin:0} - -/* Expand/collapse toggle — single button that flips state */ -.expand-toggle{white-space:nowrap} -.expand-toggle[data-expanded="true"]{background:var(--bg-overlay); - border-color:var(--border-strong)} - -/* Suggestions count pill (right side of the shared toolbar). */ -.suggestions-count-label{font-size:.8rem;color:var(--text-muted);font-weight:500; - font-variant-numeric:tabular-nums;white-space:nowrap} -""" - -# --------------------------------------------------------------------------- -# Insight banners -# --------------------------------------------------------------------------- - -_INSIGHT = """\ -.insight-banner{padding:var(--sp-3) var(--sp-4);border-radius:var(--radius-md); - margin-bottom:var(--sp-4);border-left:3px solid var(--border);background:none} -.insight-question{font-size:.78rem;font-weight:500;color:var(--text-muted); - text-transform:uppercase;letter-spacing:.03em;margin-bottom:2px} -.insight-answer{font-size:.82rem;color:var(--text-secondary);line-height:1.5} - -.insight-ok{border-left-color:var(--success);background:var(--success-muted)} -.insight-warn{border-left-color:var(--warning);background:var(--warning-muted)} -.insight-risk{border-left-color:var(--error);background:var(--error-muted)} -.insight-info{border-left-color:var(--info);background:var(--info-muted)} -""" - -# --------------------------------------------------------------------------- -# Tables -# --------------------------------------------------------------------------- - -_TABLES = """\ -.table-wrap{display:block;inline-size:100%;max-inline-size:100%;min-inline-size:0;overflow-x:auto; - overflow-y:hidden;border:1px solid var(--border);border-radius:var(--radius-lg);margin-bottom:var(--sp-4); - background: - linear-gradient(to right,var(--bg-surface) 30%,transparent) left center / 40px 100% no-repeat local, - linear-gradient(to left,var(--bg-surface) 30%,transparent) right center / 40px 100% no-repeat local, - linear-gradient(to right,rgba(0,0,0,.15),transparent) left center / 14px 100% no-repeat scroll, - linear-gradient(to left,rgba(0,0,0,.15),transparent) right center / 14px 100% no-repeat scroll} -.table{inline-size:max-content;min-inline-size:100%;border-collapse:collapse;font-size:.82rem; - font-family:var(--font-mono)} -.table th{position:sticky;top:0;z-index:2;padding:var(--sp-2) var(--sp-3);text-align:left;font-family:var(--font-sans); - font-weight:600;font-size:.75rem;text-transform:uppercase;letter-spacing:.05em; - color:var(--text-muted);background:var(--bg-overlay);border-bottom:1px solid var(--border); - white-space:nowrap;cursor:default;user-select:none} -.table th[data-sortable]{cursor:pointer} -.table th[data-sortable]:hover{color:var(--text-primary)} -.table th .sort-icon{display:inline-flex;margin-left:var(--sp-1);opacity:.4} -.table th[aria-sort] .sort-icon{opacity:1;color:var(--accent-primary)} -.table td{padding:var(--sp-2) var(--sp-3);border-bottom:1px solid var(--border);color:var(--text-secondary); - vertical-align:top} -.table tr:last-child td{border-bottom:none} -.table tr:hover td{background:var(--bg-raised)} -.table .col-name{font-weight:500;color:var(--text-primary);max-width:360px;overflow:hidden; - text-overflow:ellipsis;white-space:nowrap} -.table .col-file,.table .col-path{color:var(--text-muted);max-width:240px;overflow:hidden; - text-overflow:ellipsis;white-space:nowrap} -.table .col-number,.table .col-num{font-variant-numeric:tabular-nums;text-align:right;white-space:nowrap} -.table .col-risk,.table .col-badge,.table .col-cat{white-space:nowrap} -.table .col-steps{max-width:120px;word-break:break-word} -.table .col-wide{max-width:320px;word-break:break-all} -.table-empty{padding:var(--sp-8);text-align:center;color:var(--text-muted);font-size:.9rem} -""" - -# --------------------------------------------------------------------------- -# Sub-tabs (clone-nav / split-tabs) -# --------------------------------------------------------------------------- - -_SUB_TABS = """\ -.clone-nav{display:flex;gap:2px;border-bottom:1px solid var(--border);margin-bottom:var(--sp-4); - overflow-x:auto;scrollbar-width:none} -.clone-nav::-webkit-scrollbar{display:none} -.clone-nav-btn{position:relative;padding:var(--sp-2) var(--sp-4);background:none;border:none; - cursor:pointer;font-size:.85rem;font-weight:500;color:var(--text-muted);white-space:nowrap; - transition:color var(--dur-fast) var(--ease)} -.clone-nav-btn:hover{color:var(--text-primary)} -.clone-nav-btn.active{color:var(--accent-primary)} -.clone-nav-btn.active::after{content:"";position:absolute;bottom:-1px;left:0;right:0; - height:2px;background:var(--accent-primary);border-radius:1px 1px 0 0} -.clone-panel{display:none} -.clone-panel.active{display:block} -""" - -# --------------------------------------------------------------------------- -# Sections + groups -# --------------------------------------------------------------------------- - -_SECTIONS = """\ -.section{margin-bottom:var(--sp-6)} -.subsection-title{font-size:1rem;font-weight:600;color:var(--text-primary); - margin-bottom:var(--sp-3);padding-bottom:var(--sp-2);border-bottom:1px solid var(--border)} -.section-body{display:flex;flex-direction:column;gap:var(--sp-3)} - -/* Clone groups */ -.group{border:1px solid var(--border);border-radius:var(--radius-lg);background:var(--bg-surface); - overflow:hidden;transition:box-shadow var(--dur-fast) var(--ease)} -.group:hover{box-shadow:var(--shadow-sm)} -.group-head{display:flex;align-items:center;justify-content:space-between;padding:var(--sp-3) var(--sp-4); - gap:var(--sp-3);cursor:pointer} -.group-head-left{display:flex;align-items:center;gap:var(--sp-3);min-width:0;flex:1} -.group-head-right{display:flex;align-items:center;gap:var(--sp-2);flex-shrink:0} -.group-toggle{background:none;border:none;cursor:pointer;color:var(--text-muted);padding:var(--sp-1); - display:flex;align-items:center;transition:transform var(--dur-normal) var(--ease);flex-shrink:0} -.group-toggle svg{width:16px;height:16px} -.group-toggle.expanded{transform:rotate(180deg)} -.group-info{min-width:0;flex:1} -.group-name{font-weight:600;font-size:.9rem;color:var(--text-primary);white-space:nowrap; - overflow:hidden;text-overflow:ellipsis;font-family:var(--font-mono)} -.group-summary{font-size:.8rem;color:var(--text-muted)} - -/* Badges */ -.clone-type-badge{font-size:.68rem;font-weight:500;padding:2px var(--sp-2); - border-radius:var(--radius-sm);background:var(--accent-muted);color:var(--accent-primary)} -.clone-count-badge{font-size:.68rem;font-weight:600;padding:2px var(--sp-2); - border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--text-secondary)} - -/* Group body */ -.group-body{border-top:1px solid var(--border);display:none} -.group-body.expanded{display:block} -.group-body.items.expanded{display:grid} -.group-compare-note{padding:var(--sp-2) var(--sp-4);font-size:.8rem;color:var(--text-muted); - background:var(--bg-raised);border-bottom:1px solid var(--border);font-style:italic} - -/* Group explain */ -.group-explain{padding:var(--sp-2) var(--sp-4);display:flex;flex-wrap:wrap;gap:var(--sp-1); - background:var(--bg-raised);border-bottom:1px solid var(--border)} -.group-explain-item{font-size:.68rem;padding:2px var(--sp-2);border-radius:var(--radius-sm); - background:var(--bg-overlay);color:var(--text-muted);font-family:var(--font-mono);white-space:nowrap} -.group-explain-warn{color:var(--warning);background:var(--warning-muted)} -.group-explain-muted{opacity:.7} -.group-explain-note{font-size:.75rem;color:var(--text-muted);font-style:italic;width:100%; - padding-top:var(--sp-1)} -""" - -# --------------------------------------------------------------------------- -# Items (clone instances) -# --------------------------------------------------------------------------- - -_ITEMS = """\ -.items{grid-template-columns:repeat(2,1fr);gap:0} -.items .item{border-right:1px solid var(--border);border-bottom:1px solid var(--border)} -.items .item:nth-child(2n){border-right:none} -.items .item:nth-last-child(-n+2){border-bottom:none} -.items .item:last-child{border-bottom:none} -.item{padding:0;min-width:0;overflow:hidden} -.item-header{display:flex;align-items:center;justify-content:space-between; - padding:var(--sp-2) var(--sp-3);background:var(--bg-raised);gap:var(--sp-2)} -.item-title{font-weight:500;font-size:.8rem;color:var(--text-primary);font-family:var(--font-mono); - white-space:nowrap;overflow:hidden;text-overflow:ellipsis;min-width:0;flex:1} -.item-loc{font-size:.72rem;color:var(--text-muted);font-family:var(--font-mono);white-space:nowrap;flex-shrink:0} -.item-compare-meta{padding:var(--sp-1) var(--sp-3);font-size:.72rem;color:var(--text-muted); - background:var(--bg-body);border-bottom:1px solid var(--border)} -""" - -# --------------------------------------------------------------------------- -# Code blocks -# --------------------------------------------------------------------------- - -_CODE = """\ -/* _html_snippets renders .codebox>.hitline/.line */ -.codebox{overflow-x:auto;font-size:12px;line-height:1.7;background:var(--bg-body);padding:var(--sp-2) 0;margin:0} -.codebox pre{margin:0;padding:0} -.codebox .line,.codebox .hitline{padding:0 var(--sp-4) 0 var(--sp-2);white-space:pre;display:block} -.codebox .line:hover{background:var(--bg-raised)} -.codebox .hitline{background:color-mix(in oklch,var(--accent-primary) 12%,transparent); - border-left:3px solid var(--accent-primary);padding-left:calc(var(--sp-2) - 3px)} -.codebox .hitline:hover{background:color-mix(in oklch,var(--accent-primary) 20%,transparent)} -""" - -# --------------------------------------------------------------------------- -# Risk / severity / source-kind badges -# --------------------------------------------------------------------------- - -_BADGES = """\ -.risk-badge,.severity-badge{display:inline-flex;align-items:center;font-size:.68rem;font-weight:600; - padding:2px var(--sp-2);border-radius:var(--radius-sm);text-transform:uppercase;letter-spacing:.02em} -.risk-critical,.severity-critical{background:var(--error-muted);color:var(--error)} -.risk-high,.severity-high{background:var(--error-muted);color:var(--error)} -.risk-warning,.severity-warning{background:var(--warning-muted);color:var(--warning)} -.risk-medium,.severity-medium{background:var(--warning-muted);color:var(--warning)} -.risk-low,.severity-low{background:var(--success-muted);color:var(--success)} -.risk-info,.severity-info{background:var(--info-muted);color:var(--info)} - -.source-kind-badge{display:inline-flex;align-items:center;font-size:.68rem;font-weight:500; - padding:2px var(--sp-2);border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--text-muted)} -.source-kind-production{background:var(--error-muted);color:var(--error)} -.source-kind-test,.source-kind-test_util{background:var(--info-muted);color:var(--info)} -.source-kind-fixture,.source-kind-conftest{background:var(--warning-muted);color:var(--warning)} -.source-kind-import,.source-kind-cross_kind{background:var(--accent-muted);color:var(--accent-primary)} -.category-badge{display:inline-flex;align-items:center;gap:3px;font-size:.68rem; - font-family:var(--font-mono);padding:2px var(--sp-2);border-radius:var(--radius-sm); - background:var(--bg-overlay);color:var(--text-muted);white-space:nowrap} -.category-badge-key{font-weight:400;color:var(--text-muted)} -.category-badge-val{font-weight:600;color:var(--text-secondary)} -.finding-why-chips{display:flex;flex-wrap:wrap;gap:var(--sp-1);margin:var(--sp-1) 0} -.finding-why-chips .category-badge{font-size:.68rem} -""" - -# --------------------------------------------------------------------------- -# Overview -# --------------------------------------------------------------------------- - -_OVERVIEW = """\ -/* Dashboard */ -/* KPI grid: health card on the left, KPI cards in two rows on the right */ -.overview-kpi-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr)); - gap:var(--sp-3);margin-bottom:var(--sp-6)} -.overview-kpi-grid--with-health{grid-template-columns:minmax(190px,210px) minmax(0,1fr); - gap:var(--sp-3);align-items:stretch} -.overview-kpi-cards{display:grid;grid-template-columns:repeat(4,minmax(0,1fr)); - gap:var(--sp-3);min-width:0} -.overview-kpi-grid--with-health .meta-item{min-width:0} -.overview-kpi-grid--with-health .meta-item{min-height:0} -.overview-kpi-cards .meta-item{display:grid;grid-template-rows:auto 1fr auto; - align-items:start;padding:var(--sp-3) var(--sp-4);gap:var(--sp-2);min-height:0} -.overview-kpi-cards .meta-item .meta-label{font-size:.75rem;min-height:18px} -.overview-kpi-cards .meta-item .meta-value{display:flex;align-items:center; - font-size:1.55rem;line-height:1;padding:var(--sp-1) 0} -.overview-kpi-cards .kpi-detail{margin-top:0;gap:4px;align-self:end} -.overview-kpi-cards .kpi-micro{padding:2px 6px;font-size:.65rem} -.overview-kpi-grid--with-health .overview-health-card{padding:var(--sp-2)} -.overview-kpi-grid--with-health .overview-health-inner{width:100%;height:100%} -.overview-kpi-grid--with-health .health-ring{width:140px;height:140px;margin:auto} -.overview-kpi-grid--with-health .overview-health-card .meta-value{font-size:1.2rem} -.overview-kpi-grid--with-health .overview-health-card .meta-label{font-size:.66rem} -@media(max-width:1380px){ - .overview-kpi-cards{grid-template-columns:repeat(3,minmax(0,1fr))} -} -@media(max-width:980px){ - .overview-kpi-grid--with-health{grid-template-columns:1fr} - .overview-kpi-cards{grid-template-columns:repeat(2,minmax(0,1fr))} -} -@media(max-width:520px){ - .overview-kpi-cards{grid-template-columns:1fr} - .overview-kpi-cards .meta-item{grid-template-rows:auto auto auto;align-content:start; - min-height:0} - .overview-kpi-cards .meta-item .meta-label{min-height:0} - .overview-kpi-cards .meta-item .meta-value{padding-top:0} - .overview-kpi-cards .kpi-detail{align-self:start} - .overview-kpi-cards .kpi-micro{max-width:100%;white-space:normal;overflow-wrap:anywhere} -} - -/* Health gauge */ -.overview-health-card{display:flex;align-items:center;justify-content:center; - padding:var(--sp-3);background:var(--bg-surface);border:1px solid var(--border); - border-radius:var(--radius-lg)} -.overview-health-inner{display:flex;flex-direction:column;align-items:center;justify-content:center; - gap:var(--sp-1)} -.health-ring{position:relative;width:140px;height:140px} -.health-ring svg{width:100%;height:100%;transform:rotate(-90deg)} -.health-ring-bg{fill:none;stroke:var(--border);stroke-width:6} -.health-ring-baseline{fill:none;stroke-width:6;stroke-linecap:round} -.health-ring-fg{fill:none;stroke-width:6;stroke-linecap:round; - transition:stroke-dashoffset 1s var(--ease)} -.health-ring-label{position:absolute;inset:0;display:flex;flex-direction:column; - align-items:center;justify-content:center} -.health-ring-score{font-family:var(--font-numeric);font-size:1.85rem;font-weight:680; - color:var(--text-primary);font-variant-numeric:tabular-nums;line-height:1; - letter-spacing:-0.018em} -.health-ring-grade{font-size:.72rem;font-weight:500;color:var(--text-muted);margin-top:3px} -.health-ring-delta{font-size:.65rem;font-weight:600;margin-top:3px} -.health-ring-delta--up{color:var(--success)} -.health-ring-delta--down{color:var(--error)} - -/* Get Badge button (under health ring) */ -.badge-btn{display:inline-flex;align-items:center;gap:4px;margin-top:var(--sp-2); - padding:4px 10px;font-size:.65rem;font-weight:500;color:var(--text-muted); - background:var(--bg-surface);border:1px solid var(--border);border-radius:var(--radius-sm); - cursor:pointer;transition:all var(--dur-fast) var(--ease);white-space:nowrap} -.badge-btn:hover{color:var(--text-primary);border-color:var(--border-strong); - background:var(--bg-alt)} - -/* Badge modal */ -.badge-modal{max-width:680px;width:92vw;max-height:85vh} -.badge-modal .modal-head{display:flex;align-items:center;justify-content:space-between; - padding:var(--sp-3) var(--sp-4);border-bottom:1px solid var(--border)} -.badge-modal .modal-head h2{font-size:1rem;font-weight:700;margin:0} -.badge-modal .modal-body{padding:var(--sp-3) var(--sp-4) var(--sp-4);overflow-y:auto;flex:1 1 auto} - -/* Badge tabs */ -.badge-tabs{display:flex;gap:var(--sp-1);margin-bottom:var(--sp-3)} -.badge-tab{padding:5px 12px;font-size:.72rem;font-weight:500;color:var(--text-muted); - background:transparent;border:1px solid var(--border);border-radius:var(--radius-sm); - cursor:pointer;transition:all var(--dur-fast) var(--ease)} -.badge-tab:hover{color:var(--text-primary);border-color:var(--border-strong)} -.badge-tab--active{color:var(--text-primary);background:var(--bg-alt); - border-color:var(--border-strong);font-weight:600} - -/* Badge preview & disclaimer */ -.badge-preview{text-align:center;padding:var(--sp-3) 0;margin-bottom:var(--sp-1); - border-bottom:1px solid var(--border)} -.badge-preview img{height:24px} -.badge-disclaimer{font-size:.65rem;color:var(--text-muted);text-align:center; - margin:var(--sp-1) 0 var(--sp-2);line-height:1.4} - -/* Badge embed fields */ -.badge-field-label{display:block;font-size:.68rem;font-weight:600;color:var(--text-muted); - margin-bottom:var(--sp-1);margin-top:var(--sp-3);text-transform:uppercase;letter-spacing:.04em} -.badge-code-wrap{display:flex;align-items:stretch;border:1px solid var(--border); - border-radius:var(--radius-sm);overflow:hidden;background:var(--bg-alt)} -.badge-code{flex:1;padding:var(--sp-2) var(--sp-3);font-size:.72rem;font-family:var(--font-mono); - color:var(--text-primary);word-break:break-all;white-space:pre-wrap;line-height:1.5; - user-select:all;-webkit-user-select:all} -.badge-copy-btn{min-width:64px;padding:var(--sp-2) var(--sp-3);font-size:.68rem;font-weight:500; - color:var(--text-muted);background:transparent;border:none;border-left:1px solid var(--border); - cursor:pointer;transition:all var(--dur-fast) var(--ease);white-space:nowrap} -.badge-copy-btn:hover{color:var(--text-primary)} -.badge-copy-btn--ok{color:var(--success)} - -/* KPI stat card */ -.meta-item{padding:var(--sp-3) var(--sp-4);background:var(--bg-surface);border:1px solid var(--border); - border-radius:var(--radius-lg);display:flex;flex-direction:column;gap:var(--sp-1); - transition:border-color var(--dur-fast) var(--ease);min-width:0; - font-family:var(--font-mono)} -.meta-item:hover{border-color:var(--border-strong)} -.meta-item .meta-label{font-size:.75rem;font-weight:500;color:var(--text-muted); - display:flex;align-items:center;gap:var(--sp-1);letter-spacing:.01em;line-height:1.35} -.meta-item .meta-value{font-family:var(--font-numeric);font-size:1.4rem;font-weight:680; - color:var(--text-primary);font-variant-numeric:tabular-nums;line-height:1.15; - letter-spacing:-0.01em} -.meta-item .meta-value--good{color:var(--success)} -.meta-item .meta-value--bad{color:var(--error)} -.meta-item .meta-value--warn{color:var(--warning)} -.meta-item .meta-value--muted{color:var(--text-muted)} -.kpi-detail{display:flex;flex-wrap:wrap;gap:3px;margin-top:2px} -.kpi-detail code{font-size:.78rem} -.kpi-micro{display:inline-flex;align-items:center;gap:3px;font-size:.62rem; - padding:1px 5px;border-radius:var(--radius-sm);background:var(--bg-raised); - white-space:nowrap;line-height:1.3;font-family:inherit} -.kpi-micro-val{font-family:inherit;font-weight:500;font-variant-numeric:tabular-nums; - color:var(--text-muted)} -.kpi-micro-lbl{font-weight:400;color:var(--text-muted);text-transform:lowercase} -.kpi-micro--baselined{color:var(--success);font-weight:500;font-size:.6rem} -.kpi-delta{font-size:.62rem;font-weight:700;margin-left:auto; - padding:1px 5px;border-radius:var(--radius-sm);white-space:nowrap} -.kpi-delta--good{color:var(--success);background:var(--success-muted)} -.kpi-delta--bad{color:var(--error);background:var(--error-muted)} -.kpi-delta--neutral{color:var(--text-muted);background:var(--bg-raised)} -.kpi-help{display:inline-flex;align-items:center;justify-content:center;width:15px;height:15px; - font-size:.6rem;font-weight:600;border-radius:50%;background:none; - color:var(--text-muted);cursor:help;position:relative;border:1.5px solid var(--border); - opacity:.5;transition:opacity var(--dur-fast) var(--ease)} -.kpi-help:hover{opacity:1} -.kpi-tooltip{position:fixed;z-index:9999;pointer-events:none; - background:var(--bg-overlay);color:var(--text-primary); - padding:var(--sp-2) var(--sp-3);border-radius:var(--radius-md);font-size:.75rem;font-weight:400; - white-space:normal;width:max-content;max-width:240px;line-height:1.4; - box-shadow:var(--shadow-md);border:1px solid var(--border)} - -/* Tone variants */ -.meta-item.tone-ok{border-left:3px solid var(--success)} -.meta-item.tone-warn{border-left:3px solid var(--warning)} -.meta-item.tone-risk{border-left:3px solid var(--error)} - -/* Clusters */ -.overview-cluster{margin-bottom:var(--sp-5)} -.overview-cluster-header{margin-bottom:var(--sp-3)} -.overview-cluster-copy{font-size:.82rem;color:var(--text-muted);margin-top:2px} -.overview-cluster-empty{display:flex;flex-direction:column;align-items:center;gap:var(--sp-2); - padding:var(--sp-5);text-align:center;color:var(--text-muted);font-size:.85rem} -.empty-icon{color:var(--success);opacity:.35;width:32px;height:32px;flex-shrink:0} -.overview-list{display:grid;grid-template-columns:repeat(2,1fr);gap:var(--sp-2)} - -/* Overview rows */ -.overview-row{display:flex;flex-direction:column;gap:var(--sp-1); - padding:var(--sp-3) var(--sp-4);background:var(--bg-surface);border:1px solid var(--border); - border-radius:var(--radius-lg);transition:border-color var(--dur-fast) var(--ease)} -.overview-row:hover{border-color:var(--border-strong)} -.overview-row[data-severity="critical"]{border-left:3px solid var(--error)} -.overview-row[data-severity="warning"]{border-left:3px solid var(--warning)} -.overview-row[data-severity="info"]{border-left:3px solid var(--info)} -.overview-row-head{display:flex;align-items:center;gap:var(--sp-2);flex-wrap:wrap} -.overview-row-spread{font-size:.72rem;font-family:var(--font-mono);color:var(--text-muted); - margin-left:auto;white-space:nowrap} -.overview-row-title{font-weight:600;font-size:.85rem;color:var(--text-primary)} -.overview-row-summary{font-size:.8rem;color:var(--text-secondary);line-height:1.5} - -/* Summary grid */ -.overview-summary-grid{display:grid;gap:var(--sp-3);margin-bottom:var(--sp-3)} -.overview-summary-grid--2col{grid-template-columns:repeat(auto-fit,minmax(280px,1fr))} -.overview-summary-grid--3col{grid-template-columns:repeat(auto-fit,minmax(240px,1fr))} -.overview-summary-item{background:var(--bg-surface); - border:1px solid color-mix(in srgb,var(--border) 78%,transparent); - border-radius:var(--radius-lg);padding:var(--sp-4)} -.overview-summary-label{display:flex;align-items:center;gap:var(--sp-2); - font-size:.82rem;font-weight:700;text-transform:none; - letter-spacing:normal;color:var(--text-secondary);margin-bottom:var(--sp-3); - padding-bottom:var(--sp-2); - border-bottom:1px solid color-mix(in srgb,var(--border) 58%,transparent); - font-family:var(--font-display)} -.overview-summary-item > :not(.overview-summary-label){font-family:var(--font-mono)} -.summary-icon{flex-shrink:0;opacity:.6} -.summary-icon--risk{color:var(--warning)} -.summary-icon--info{color:var(--accent-primary)} -.overview-summary-list{display:flex;flex-direction:column;gap:var(--sp-2)} -.overview-summary-list li{font-size:.82rem;color:var(--text-secondary); - padding-left:var(--sp-3);position:relative;line-height:1.5} -.overview-summary-list li::before{content:"\\2022";position:absolute;left:0;color:var(--text-muted)} -.overview-summary-value{font-size:.85rem;color:var(--text-muted)} -/* Compact stat grid used inside overview-summary-item cards (Coverage Join). */ -.overview-stat-row{display:grid;grid-template-columns:repeat(auto-fit,minmax(84px,1fr)); - gap:var(--sp-3);align-items:end} -.overview-stat{display:flex;flex-direction:column;gap:2px;min-width:0} -.overview-stat-value{font-family:var(--font-numeric);font-size:1.4rem;font-weight:680; - color:var(--text-primary);font-variant-numeric:tabular-nums;line-height:1.12; - letter-spacing:-0.01em} -.overview-stat-label{font-size:.68rem;font-weight:500;color:var(--text-muted); - text-transform:uppercase;letter-spacing:.04em} -.overview-stat-caption{margin-top:var(--sp-3);font-size:.72rem;color:var(--text-muted); - line-height:1.4} -.overview-stat-caption code{font-family:var(--font-mono);font-size:.68rem; - padding:1px 4px;border-radius:var(--radius-sm);background:var(--bg-raised); - color:var(--text-secondary)} -.overview-stat-row + .kpi-detail{margin-top:var(--sp-2)} -/* Fact-list: compact label ··· value rows inside overview-summary-item cards. */ -.overview-fact-list{display:flex;flex-direction:column;gap:var(--sp-2)} -.overview-fact-row{display:flex;align-items:baseline;justify-content:space-between;gap:var(--sp-3); - font-size:.8rem;padding-bottom:6px;border-bottom:1px solid var(--border)} -.overview-fact-row:last-child{padding-bottom:0;border-bottom:none} -.overview-fact-label{color:var(--text-muted)} -.overview-fact-value{display:inline-flex;align-items:baseline;gap:6px; - color:var(--text-primary);font-weight:600;font-variant-numeric:tabular-nums;text-align:right} -.overview-fact-delta{font-size:.68rem;font-weight:400;color:var(--text-muted)} -.overview-fact-value--warn{color:var(--warning)} -.overview-fact-value--good{color:var(--success)} -/* Source breakdown bars */ -.breakdown-list{display:flex;flex-direction:column;gap:var(--sp-2)} -.breakdown-row{display:grid;grid-template-columns:6.5rem 2rem 1fr;align-items:center;gap:var(--sp-2)} -.breakdown-row .source-kind-badge{justify-content:center;min-width:0;width:100%;text-align:center} -.breakdown-count{font-size:.8rem;font-weight:600;font-variant-numeric:tabular-nums; - color:var(--text-primary);text-align:right} -.breakdown-bar-track{height:6px;border-radius:3px;background:var(--bg-raised);overflow:hidden} -.breakdown-bar-fill{display:block;height:100%;border-radius:3px; - background:var(--accent-primary);transition:width .6s var(--ease)} -/* Directory hotspot entries */ -.dir-hotspot-list{display:flex;flex-direction:column;gap:0} -.dir-hotspot-entry{padding:var(--sp-2) 0;border-bottom:1px solid color-mix(in srgb,var(--border) 50%,transparent)} -.dir-hotspot-entry:last-child{border-bottom:none;padding-bottom:0} -.dir-hotspot-entry:first-child{padding-top:0} -/* Row 1: path + badge */ -.dir-hotspot-head{display:flex;align-items:center;gap:var(--sp-2);min-width:0} -.dir-hotspot-path{font-size:.78rem;font-weight:600;color:var(--text-primary);line-height:1.3; - overflow:hidden;text-overflow:ellipsis;white-space:nowrap;min-width:0;flex:1} -/* Row 2: bar + pct + meta */ -.dir-hotspot-detail{display:flex;align-items:center;gap:var(--sp-2);margin-top:3px} -.dir-hotspot-bar-track{width:30%;flex-shrink:0;height:4px;border-radius:2px; - background:var(--bg-raised);overflow:hidden;display:flex} -.dir-hotspot-bar-prev{height:100%;background:var(--text-muted);opacity:.18} -.dir-hotspot-bar-cur{height:100%;background:var(--accent-primary);opacity:.7} -.dir-hotspot-pct{font-size:.72rem;font-weight:600;font-variant-numeric:tabular-nums; - color:var(--text-secondary);white-space:nowrap;flex-shrink:0} -.dir-hotspot-meta{display:flex;flex-wrap:wrap;gap:4px 6px;font-size:.68rem;color:var(--text-muted)} -.dir-hotspot-meta span{font-variant-numeric:tabular-nums} -.dir-hotspot-meta-sep{opacity:.3} -.overloaded-module-list{display:flex;flex-direction:column;gap:0} -.overloaded-module-entry{padding:var(--sp-2) 0;border-bottom:1px solid color-mix(in srgb,var(--border) 50%,transparent)} -.overloaded-module-entry:last-child{border-bottom:none;padding-bottom:0} -.overloaded-module-entry:first-child{padding-top:0} -.overloaded-module-head{display:flex;align-items:flex-start;justify-content:space-between;gap:var(--sp-2);margin-bottom:4px} -.overloaded-module-title{display:flex;align-items:center;flex-wrap:wrap;gap:var(--sp-2);min-width:0} -.overloaded-module-title code{font-size:.78rem;font-weight:600;color:var(--text-primary);line-height:1.35} -.overloaded-module-score{flex-shrink:0;font-size:.68rem;font-weight:700;font-variant-numeric:tabular-nums; - color:var(--accent-primary);background:var(--accent-muted);border-radius:var(--radius-sm);padding:2px var(--sp-2)} -.overloaded-module-metrics{display:flex;flex-wrap:wrap;gap:6px;font-size:.68rem;color:var(--text-muted)} -.overloaded-module-metrics span{font-variant-numeric:tabular-nums} -.overloaded-module-reasons,.overloaded-module-signal-list{display:flex;flex-wrap:wrap;gap:var(--sp-1);margin-top:var(--sp-2)} -.overloaded-module-reason-chip,.overloaded-module-signal-pill{display:inline-flex;align-items:center;gap:5px; - font-size:.68rem;font-weight:500;color:var(--text-secondary);background:var(--bg-raised); - border:1px solid color-mix(in srgb,var(--border) 60%,transparent);border-radius:var(--radius-sm); - padding:2px var(--sp-2)} -.overloaded-module-signal-count{font-variant-numeric:tabular-nums;color:var(--text-muted)} -/* Health radar chart */ -.health-radar{display:flex;justify-content:center;padding:var(--sp-3) 0} -.health-radar svg{width:100%;max-width:520px;height:auto;overflow:visible} -.health-radar text{font-size:10.5px;font-family:var(--font-mono);fill:var(--text-secondary); - font-weight:500} -.health-radar .radar-score{font-weight:600;font-variant-numeric:tabular-nums;fill:var(--text-secondary)} -.health-radar .radar-label--weak{fill:var(--error)} -.health-radar .radar-label--weak .radar-score{fill:var(--error)} -.health-radar-legend{font-size:.78rem;color:var(--text-secondary);text-align:center; - margin-top:var(--sp-2);max-width:520px;margin-left:auto;margin-right:auto} -/* Findings by family bars */ -.families-list{display:flex;flex-direction:column;gap:var(--sp-2)} -.families-row{display:grid;grid-template-columns:5.5rem 2rem 1fr auto;align-items:center;gap:var(--sp-2)} -.families-row--muted{opacity:.65} -.families-label{font-size:.75rem;font-weight:500;color:var(--text-secondary);text-align:right} -.families-count{font-size:.8rem;font-weight:600;font-variant-numeric:tabular-nums; - color:var(--text-primary);text-align:right} -.breakdown-bar-track{display:flex} -.breakdown-bar-fill--baselined{opacity:.5} -.breakdown-bar-fill--new{border-radius:0 3px 3px 0} -.families-delta{font-size:.65rem;font-weight:600;font-variant-numeric:tabular-nums;white-space:nowrap} -.families-delta--ok{color:var(--success)} -.families-delta--new{color:var(--error)} -""" - -# --------------------------------------------------------------------------- -# Dependencies (SVG graph) -# --------------------------------------------------------------------------- - -_DEPENDENCIES = """\ -.stat-cards,.dep-stats{display:grid;grid-template-columns:repeat(auto-fit,minmax(160px,1fr)); - gap:var(--sp-2);margin-bottom:var(--sp-4)} -.stat-cards .meta-item,.dep-stats .meta-item{display:grid;grid-template-rows:auto 1fr auto;min-height:100px} -.stat-cards .meta-item .meta-label,.dep-stats .meta-item .meta-label{font-size:.72rem;min-height:18px} -.stat-cards .meta-item .meta-value,.dep-stats .meta-item .meta-value{display:flex;align-items:center} -.stat-cards .kpi-detail,.dep-stats .kpi-detail{margin-top:0;align-self:end} -.dep-graph-wrap{overflow:hidden;margin-bottom:var(--sp-4);border:1px solid var(--border); - border-radius:var(--radius-lg);background:var(--bg-surface);padding:var(--sp-4)} -.dep-graph-svg{width:100%;height:auto;max-height:520px} -.dep-graph-svg text{fill:var(--text-secondary);font-family:var(--font-mono)} -.dep-node{transition:fill-opacity var(--dur-fast) var(--ease)} -.dep-edge{transition:stroke-opacity var(--dur-fast) var(--ease)} -.dep-label{transition:fill var(--dur-fast) var(--ease)} - -/* Hub bar */ -.dep-hub-bar{display:flex;align-items:center;gap:var(--sp-2);flex-wrap:wrap; - margin-bottom:var(--sp-4);padding:var(--sp-2) var(--sp-4);background:var(--bg-raised); - border-radius:var(--radius-lg);border:1px solid var(--border)} -.dep-hub-label{font-size:.75rem;font-weight:600;text-transform:uppercase;letter-spacing:.05em; - color:var(--text-muted)} -.dep-hub-pill{display:inline-flex;align-items:center;gap:var(--sp-1);padding:var(--sp-1) var(--sp-2); - border-radius:var(--radius-sm);background:var(--bg-overlay);font-size:.8rem} -.dep-hub-name{color:var(--text-primary);font-family:var(--font-mono);font-size:.8rem} -.dep-hub-deg{font-size:.68rem;font-weight:600;color:var(--accent-primary); - background:var(--accent-muted);padding:2px var(--sp-2);border-radius:var(--radius-sm)} - -/* Legend */ -.dep-legend{display:flex;gap:var(--sp-4);align-items:center;margin-bottom:var(--sp-4); - padding:var(--sp-2) var(--sp-4);font-size:.8rem;color:var(--text-muted)} -.dep-legend-item{display:inline-flex;align-items:center;gap:var(--sp-1)} -.dep-legend-item svg{flex-shrink:0} - -/* Chain flow */ -.chain-flow{display:inline-flex;align-items:center;gap:var(--sp-1);flex-wrap:wrap} -.chain-node{font-family:var(--font-mono);font-size:.8rem;color:var(--text-primary); - padding:0 var(--sp-1);background:var(--bg-overlay);border-radius:var(--radius-sm)} -.chain-arrow{color:var(--text-muted);font-size:.75rem} -""" - -# --------------------------------------------------------------------------- -# Novelty controls -# --------------------------------------------------------------------------- - -_NOVELTY = """\ -/* Slim inline baseline-split bar (replaces the old boxed section chrome). */ -.novelty-bar{display:flex;align-items:center;gap:var(--sp-3);flex-wrap:wrap; - margin-bottom:var(--sp-3);padding:var(--sp-2) 0; - border-bottom:1px solid var(--border)} -.novelty-bar-tabs{display:inline-flex;gap:var(--sp-1)} -.novelty-bar-note{font-size:.78rem;color:var(--text-muted);line-height:1.4} -.novelty-tab{transition:all var(--dur-fast) var(--ease)} -.novelty-tab.active{background:var(--accent-primary);color:white;border-color:var(--accent-primary)} -.novelty-tab[data-novelty-state="good"]{color:var(--success);border-color:var(--success);background:var(--success-muted)} -.novelty-tab[data-novelty-state="good"].active{background:var(--success);color:white;border-color:var(--success)} -.novelty-tab[data-novelty-state="bad"]{color:var(--error);border-color:var(--error);background:var(--error-muted)} -.novelty-tab[data-novelty-state="bad"].active{background:var(--error);color:white;border-color:var(--error)} -.novelty-count{font-size:.72rem;font-weight:600;background:rgba(255,255,255,.15);padding:0 var(--sp-1); - border-radius:var(--radius-sm);margin-left:var(--sp-1)} - -/* Hidden by novelty filter */ -.group[data-novelty-hidden="true"]{display:none} -""" - -# --------------------------------------------------------------------------- -# Dead-code -# --------------------------------------------------------------------------- - -_DEAD_CODE = """\ -/* No custom overrides — uses shared table + tabs */ -""" - -# --------------------------------------------------------------------------- -# Suggestions -# --------------------------------------------------------------------------- - -_SUGGESTIONS = """\ -/* List layout */ -.suggestions-list{display:flex;flex-direction:column;gap:var(--sp-2)} - -/* Card — full-width row */ -.suggestion-card{background:var(--bg-surface);border:1px solid var(--border);border-radius:var(--radius-lg); - overflow:hidden;transition:border-color var(--dur-fast) var(--ease),box-shadow var(--dur-fast) var(--ease)} -.suggestion-card:hover{border-color:var(--border-strong);box-shadow:var(--shadow-sm)} -.suggestion-card[data-severity="critical"]{border-left:3px solid var(--error)} -.suggestion-card[data-severity="warning"]{border-left:3px solid var(--warning)} -.suggestion-card[data-severity="info"]{border-left:3px solid var(--info)} - -/* Header row: severity pill · title · meta badges */ -.suggestion-head{padding:var(--sp-3) var(--sp-4);display:flex;align-items:center; - gap:var(--sp-2);flex-wrap:wrap} -.suggestion-sev{font-size:.68rem;font-weight:600;text-transform:uppercase;letter-spacing:.04em; - padding:2px var(--sp-2);border-radius:var(--radius-sm);white-space:nowrap} -.suggestion-sev--critical{background:var(--error-muted);color:var(--error)} -.suggestion-sev--warning{background:var(--warning-muted);color:var(--warning)} -.suggestion-sev--info{background:var(--info-muted);color:var(--info)} -.suggestion-sev-inline{font-size:.68rem;font-weight:600;padding:2px var(--sp-2); - border-radius:var(--radius-sm)} -.suggestion-title{font-weight:600;font-size:.85rem;color:var(--text-primary);flex:1;min-width:0} -.suggestion-meta{display:flex;align-items:center;gap:var(--sp-2);flex-shrink:0;flex-wrap:wrap} -.suggestion-meta-badge{font-size:.68rem;font-weight:600;padding:2px var(--sp-2); - border-radius:var(--radius-sm);background:var(--bg-overlay);color:var(--text-muted); - white-space:nowrap;line-height:1.2;font-variant-numeric:tabular-nums} -.suggestion-effort--easy{color:var(--success);background:var(--success-muted, rgba(34,197,94,.1))} -.suggestion-effort--moderate{color:var(--warning);background:var(--warning-muted)} -.suggestion-effort--hard{color:var(--error);background:var(--error-muted)} - -/* Body — context + summary */ -.suggestion-body{padding:0 var(--sp-4) var(--sp-3);display:flex;flex-direction:column;gap:var(--sp-1)} -.suggestion-context{display:flex;gap:var(--sp-1);flex-wrap:wrap} -.suggestion-chip{font-size:.68rem;font-weight:500;padding:2px var(--sp-2);border-radius:var(--radius-sm); - background:var(--bg-overlay);color:var(--text-muted);white-space:nowrap} -.suggestion-summary{font-size:.8rem;font-family:var(--font-mono);color:var(--text-secondary);line-height:1.5} -.suggestion-action{display:flex;align-items:center;gap:var(--sp-1); - font-size:.8rem;font-weight:500;color:var(--accent-primary);margin-top:var(--sp-1)} -.suggestion-action-icon{flex-shrink:0;color:var(--accent-primary)} - -/* Expandable details */ -.suggestion-details{border-top:1px solid var(--border)} -.suggestion-details summary{padding:var(--sp-2) var(--sp-4);font-size:.75rem;font-weight:500; - color:var(--text-muted);cursor:pointer;display:flex;align-items:center;gap:var(--sp-2); - background:none;user-select:none} -.suggestion-details summary:hover{color:var(--text-primary);background:var(--bg-raised)} -.suggestion-details[open] summary{border-bottom:1px solid var(--border)} -.suggestion-details-body{padding:var(--sp-3) var(--sp-4);display:flex;flex-direction:column;gap:var(--sp-3)} - -/* Facts grid inside details */ -.suggestion-facts{display:grid;grid-template-columns:repeat(auto-fit,minmax(200px,1fr));gap:var(--sp-3)} -.suggestion-fact-group{display:flex;flex-direction:column;gap:var(--sp-1)} -.suggestion-fact-group-title{font-size:.68rem;font-weight:600;text-transform:uppercase; - letter-spacing:.05em;color:var(--text-muted);padding-bottom:var(--sp-1);border-bottom:1px solid var(--border)} -.suggestion-dl{display:flex;flex-direction:column;gap:2px} -.suggestion-dl div{display:flex;gap:var(--sp-2);align-items:baseline} -.suggestion-dl dt{font-size:.72rem;color:var(--text-muted);white-space:nowrap;min-width:60px} -.suggestion-dl dd{font-size:.78rem;font-family:var(--font-mono);color:var(--text-primary);word-break:break-word} - -/* Locations & steps inside details */ -.suggestion-locations{display:flex;flex-direction:column;gap:var(--sp-1)} -.suggestion-locations li{display:flex;gap:var(--sp-2);align-items:baseline; - padding:2px 0;border-bottom:1px solid var(--border);line-height:1.4} -.suggestion-locations li:last-child{border-bottom:none} -.suggestion-loc-path{font-family:var(--font-mono);font-size:.75rem;color:var(--text-secondary)} -.suggestion-loc-lines{color:var(--text-muted)} -.suggestion-loc-name{font-family:var(--font-mono);font-size:.72rem;color:var(--text-muted); - margin-left:auto} -.suggestion-steps{padding-left:var(--sp-4);display:flex;flex-direction:column;gap:var(--sp-1);list-style:decimal} -.suggestion-steps li{font-size:.78rem;color:var(--text-secondary)} -.suggestion-sub-title{font-size:.72rem;font-weight:600;text-transform:uppercase;letter-spacing:.04em; - color:var(--text-muted);margin-bottom:var(--sp-1)} - -.suggestion-empty{padding:var(--sp-4);text-align:center;color:var(--text-muted);font-size:.85rem} - -/* Hidden cards */ -.suggestion-card[data-filter-hidden="true"]{display:none} -""" - -# --------------------------------------------------------------------------- -# Structural findings -# --------------------------------------------------------------------------- - -_STRUCTURAL = """\ -/* Structural findings — list layout */ -.sf-list{display:flex;flex-direction:column;gap:var(--sp-2)} -.sf-card{background:var(--bg-surface);border:1px solid var(--border);border-left:3px solid var(--info); - border-radius:var(--radius-lg); - overflow:hidden;transition:border-color var(--dur-fast) var(--ease),box-shadow var(--dur-fast) var(--ease)} -.sf-card:hover{border-color:var(--border-strong);box-shadow:var(--shadow-sm)} - -/* Header row */ -.sf-head{padding:var(--sp-3) var(--sp-4);display:flex;align-items:center;gap:var(--sp-2);flex-wrap:wrap} -.sf-kind-badge{font-size:.68rem;font-weight:600;text-transform:uppercase;letter-spacing:.03em; - padding:2px var(--sp-2);border-radius:var(--radius-sm);white-space:nowrap; - background:var(--info-muted);color:var(--info)} -.sf-title{font-weight:600;font-size:.85rem;color:var(--text-primary);flex:1;min-width:0} -.sf-meta{display:flex;align-items:center;gap:var(--sp-1);flex-shrink:0;flex-wrap:wrap} -.sf-why-btn{font-size:.72rem;color:var(--accent-primary);font-weight:500} - -/* Body */ -.sf-body{padding:0 var(--sp-4) var(--sp-3);display:flex;flex-direction:column;gap:var(--sp-2)} -.sf-chips{display:flex;flex-wrap:wrap;gap:var(--sp-1)} -.sf-scope-text{font-size:.8rem;font-family:var(--font-mono);color:var(--text-secondary)} -.sf-inline-action{display:flex;align-items:flex-start;gap:var(--sp-2);padding:var(--sp-2) var(--sp-3); - border:1px solid var(--border);border-radius:var(--radius-md);background:var(--bg-raised)} -.sf-inline-action-label{font-size:.72rem;font-weight:600;letter-spacing:.02em;text-transform:uppercase; - color:var(--accent-primary);white-space:nowrap} -.sf-inline-action-text{font-size:.8rem;color:var(--text-secondary);line-height:1.45} - -/* Expandable occurrences */ -.sf-details{border-top:1px solid var(--border)} -.sf-details summary{padding:var(--sp-2) var(--sp-4);font-size:.75rem;font-weight:500; - color:var(--text-muted);cursor:pointer;display:flex;align-items:center;gap:var(--sp-2); - background:none;user-select:none} -.sf-details summary:hover{color:var(--text-primary);background:var(--bg-raised)} -.sf-details[open] summary{border-bottom:1px solid var(--border)} -.sf-details-body{padding:0} -.sf-details-body .table-wrap{border:none;border-radius:0} -.sf-table .col-num{white-space:nowrap} -.sf-table{table-layout:fixed} - -.sf-kind-meta{font-weight:normal;font-size:.8rem;color:var(--text-muted)} -.subsection-title{font-size:.95rem;margin:var(--sp-4) 0 var(--sp-2)} -.finding-occurrences-more summary{font-size:.8rem;color:var(--accent-primary);cursor:pointer; - padding:var(--sp-1) var(--sp-3)} -.sf-card[data-filter-hidden="true"]{display:none} -/* Finding Why modal */ -.finding-why-modal{max-width:720px;width:92vw;max-height:85vh} -.finding-why-modal .modal-head{display:flex;align-items:center;justify-content:space-between; - padding:var(--sp-3) var(--sp-4);border-bottom:1px solid var(--border);flex-shrink:0} -.finding-why-modal .modal-head h2{font-size:1rem;font-weight:600} -.finding-why-modal .modal-body{padding:var(--sp-3) var(--sp-4);overflow-y:auto;flex:1 1 auto;min-height:0} -.metrics-section{margin-bottom:var(--sp-3)} -.metrics-section-title{font-size:.75rem;font-weight:600;text-transform:uppercase;letter-spacing:.04em; - color:var(--text-muted);margin-bottom:var(--sp-1);padding-bottom:3px;border-bottom:1px solid var(--border)} -.finding-why-text{font-size:.85rem;color:var(--text-secondary);line-height:1.5;margin:var(--sp-1) 0} -.finding-why-list{font-size:.82rem;color:var(--text-secondary);line-height:1.5; - list-style:disc;padding-left:var(--sp-5);margin:var(--sp-1) 0} -.finding-why-list li{margin-bottom:2px} -.finding-why-note{font-size:.78rem;color:var(--text-muted);margin-bottom:var(--sp-2)} -.finding-why-examples{display:flex;flex-direction:column;gap:var(--sp-2)} -.finding-why-example{border:1px solid var(--border);border-radius:var(--radius-md);overflow:hidden} -.finding-why-example-head{display:flex;align-items:center;gap:var(--sp-2);padding:var(--sp-1) var(--sp-3); - background:var(--bg-raised);font-size:.78rem;border-bottom:1px solid var(--border)} -.finding-why-example-label{font-weight:600;color:var(--text-primary)} -.finding-why-example-meta{color:var(--text-muted);font-family:var(--font-mono);font-size:.72rem} -.finding-why-example-loc{margin-left:auto;color:var(--text-muted);font-family:var(--font-mono);font-size:.72rem} -""" - -# --------------------------------------------------------------------------- -# Report provenance / meta panel -# --------------------------------------------------------------------------- - -_META_PANEL = """\ -/* Provenance section cards */ -.prov-section{margin-bottom:var(--sp-3);background:var(--bg-surface); - border-radius:var(--radius-md);padding:var(--sp-3) var(--sp-4) var(--sp-2); - border:1px solid var(--border); - box-shadow:0 1px 2px color-mix(in srgb,var(--text-primary) 3%,transparent)} -.prov-section:last-child{margin-bottom:0} -.prov-section-title{font-size:.66rem;font-weight:700;text-transform:uppercase;letter-spacing:.09em; - color:var(--text-secondary);margin:0 calc(-1*var(--sp-4)) var(--sp-2); - padding:0 var(--sp-4) var(--sp-2);border:none; - border-bottom:1px solid color-mix(in srgb,var(--border) 60%,transparent); - display:flex;align-items:center;gap:6px} -.prov-section-title svg{width:13px;height:13px;opacity:.7;flex-shrink:0; - color:var(--accent-primary)} -.prov-table{width:100%;border-collapse:collapse;font-size:.8rem} -.prov-table tr:not(:last-child){border-bottom:1px solid color-mix(in srgb,var(--border) 25%,transparent)} -.prov-table tr:hover{background:color-mix(in srgb,var(--accent-primary) 3%,transparent)} -.prov-td-label{padding:6px 0;color:var(--text-muted);white-space:nowrap;width:40%; - vertical-align:top;font-weight:500;font-size:.76rem;letter-spacing:.002em} -.prov-td-value{padding:6px 0 6px var(--sp-2);color:var(--text-primary);word-break:break-all; - font-family:var(--font-mono);font-size:.72rem;vertical-align:top} - -/* Provenance summary badges */ -.prov-summary{display:flex;flex-wrap:wrap;align-items:center;gap:6px; - padding:var(--sp-2) var(--sp-4);border-top:1px solid var(--border)} -.prov-badge{display:inline-flex;align-items:center;gap:4px;font-size:.68rem; - padding:2px var(--sp-2);border-radius:var(--radius-sm);background:var(--bg-raised); - white-space:nowrap;line-height:1.3;border:1px solid color-mix(in srgb,var(--border) 55%,transparent); - font-family:var(--font-mono);letter-spacing:.005em} -.prov-badge-val{font-weight:600;font-variant-numeric:tabular-nums;color:var(--text-primary)} -.prov-badge-lbl{font-weight:400;color:var(--text-muted);text-transform:lowercase} -.prov-badge--inline{padding:2px 8px} -.prov-badge--inline .prov-badge-val{font-weight:500} -.prov-badge--green{background:var(--success-muted);border-color:color-mix(in srgb,var(--success) 20%,transparent)} -.prov-badge--green .prov-badge-val{color:var(--success)} -.prov-badge--red{background:var(--error-muted);border-color:color-mix(in srgb,var(--error) 20%,transparent)} -.prov-badge--red .prov-badge-val{color:var(--error)} -.prov-badge--amber{background:var(--warning-muted);border-color:color-mix(in srgb,var(--warning) 20%,transparent)} -.prov-badge--amber .prov-badge-val{color:var(--warning)} -.prov-badge--neutral{background:var(--bg-overlay);border-color:color-mix(in srgb,var(--border) 75%,transparent)} -.prov-badge--neutral .prov-badge-val{color:var(--text-secondary)} -.prov-explain{font-size:.62rem;color:var(--text-muted);margin-left:auto;font-style:italic} - -/* Truncated long values (paths, sha256) in provenance table */ -.prov-mono-trunc{font-family:var(--font-mono);font-size:.72rem;color:var(--text-primary); - background:var(--bg-body);padding:2px 6px;border-radius:var(--radius-sm); - border:1px solid color-mix(in srgb,var(--border) 45%,transparent); - white-space:nowrap;overflow:hidden;text-overflow:ellipsis; - max-width:100%;vertical-align:middle;letter-spacing:.01em} -.prov-td-value:has(.prov-mono-trunc){display:flex;align-items:center;gap:var(--sp-1);flex-wrap:nowrap; - min-width:0} -.prov-copy-btn{display:inline-flex;align-items:center;justify-content:center; - width:22px;height:22px;padding:0;background:none;border:1px solid transparent; - border-radius:var(--radius-sm);color:var(--text-muted);cursor:pointer; - transition:all var(--dur-fast) var(--ease);flex-shrink:0} -.prov-copy-btn:hover{color:var(--text-primary);background:var(--bg-overlay); - border-color:color-mix(in srgb,var(--border) 70%,transparent)} -.prov-copy-btn:focus-visible{outline:2px solid var(--accent-primary);outline-offset:1px} -.prov-copy-btn--ok{color:var(--success);background:var(--success-muted); - border-color:color-mix(in srgb,var(--success) 30%,transparent)} -.prov-copy-btn svg{width:12px;height:12px} -""" - - -# --------------------------------------------------------------------------- -# Shared micro-interactions -# --------------------------------------------------------------------------- - -_MICRO_INTERACTIONS = """\ -/* Shared card micro-interactions */ -.meta-item,.overview-row,.overview-summary-item,.group,.suggestion-card,.sf-card,.prov-section{ - --card-hover-accent:var(--accent-primary); - --card-outline:color-mix(in oklch,var(--card-hover-accent) 24%,transparent); - --card-hover-shadow: - 0 10px 24px color-mix(in srgb,var(--card-hover-accent) 8%,transparent), - var(--shadow-md); - transform:translateY(0); - transition:transform var(--dur-fast) var(--ease), - border-color var(--dur-fast) var(--ease), - box-shadow var(--dur-fast) var(--ease)} -@media (hover:hover) and (pointer:fine){ - .meta-item:hover,.overview-row:hover,.overview-summary-item:hover,.group:hover,.suggestion-card:hover,.sf-card:hover,.prov-section:hover{ - transform:translateY(-2px); - border-color:color-mix(in oklch,var(--card-hover-accent) 22%,var(--border-strong)); - box-shadow:0 0 0 1px var(--card-outline),var(--card-hover-shadow)} -} -@media (prefers-reduced-motion:reduce){ - .meta-item,.overview-row,.overview-summary-item,.group,.suggestion-card,.sf-card,.prov-section{ - transition:border-color var(--dur-fast) var(--ease), - box-shadow var(--dur-fast) var(--ease)} - .meta-item:hover,.overview-row:hover,.overview-summary-item:hover,.group:hover,.suggestion-card:hover,.sf-card:hover,.prov-section:hover{ - transform:none} -} -""" - -# --------------------------------------------------------------------------- -# Empty states -# --------------------------------------------------------------------------- - -_EMPTY = """\ -.empty{display:flex;align-items:center;justify-content:center;padding:var(--sp-10)} -.empty-card{text-align:center;max-width:400px} -.empty-icon{margin-bottom:var(--sp-3);color:var(--success)} -.empty-icon svg{width:40px;height:40px} -.empty-card h2{margin-bottom:var(--sp-2)} -.empty-card p{color:var(--text-secondary);font-size:.9rem} -.tab-empty{display:flex;flex-direction:column;align-items:center;justify-content:center; - padding:var(--sp-10);text-align:center;font-family:var(--font-sans)} -.tab-empty-icon{color:var(--text-muted);opacity:.4;margin-bottom:var(--sp-3);width:48px;height:48px} -.tab-empty-title{font-size:1rem;font-weight:600;color:var(--text-primary);margin-bottom:var(--sp-1); - font-family:var(--font-display)} -.tab-empty-desc{font-size:.85rem;color:var(--text-muted);max-width:320px;font-family:var(--font-sans)} -.tab-empty-desc-detail{text-align:left;max-width:520px;font-size:.8rem;word-break:break-word; - font-family:var(--font-sans)} -.tab-empty-reason{display:block;margin-top:var(--sp-1);font-size:.75rem;color:var(--text-muted); - opacity:.7;word-break:break-all;font-family:var(--font-mono, monospace)} - -/* Inline empty state — compact stacked variant for cards/summary items. - No background/border — sits inside its parent card. Icon color carries tone. */ -.inline-empty{display:flex;flex-direction:column;align-items:center;justify-content:center; - gap:var(--sp-2);padding:var(--sp-4) var(--sp-3);min-height:72px; - color:var(--text-muted);font-size:.82rem;font-weight:500; - text-align:center;letter-spacing:.005em;line-height:1.4;font-family:var(--font-sans)} -.inline-empty-icon{flex-shrink:0;opacity:.5;color:var(--text-muted)} -.inline-empty-text{max-width:260px} -.inline-empty--good .inline-empty-icon{color:var(--success);opacity:.7} -.inline-empty--neutral .inline-empty-icon{color:var(--text-muted);opacity:.5} -""" - -# --------------------------------------------------------------------------- -# Coupled details -# --------------------------------------------------------------------------- - -_COUPLED = """\ -.coupled-details{display:inline} -.coupled-summary{display:inline;cursor:pointer} -.coupled-summary:hover{color:var(--text-primary)} -.coupled-more{font-size:.75rem;color:var(--text-muted);margin-left:var(--sp-1)} -.coupled-expanded{margin-top:var(--sp-1)} -""" - -# --------------------------------------------------------------------------- -# Modal (dialog) -# --------------------------------------------------------------------------- - -_MODAL = """\ -/* Generic dialog modal — Safari-compatible centering */ -dialog{background:var(--bg-surface);color:var(--text-primary);border:1px solid var(--border); - border-radius:var(--radius-xl);box-shadow:var(--shadow-xl);padding:0;max-width:600px;width:90vw; - max-height:80vh;overflow:hidden} -dialog:not([open]){display:none} -dialog[open]{display:flex;flex-direction:column; - position:fixed;inset:0;margin:auto;z-index:9999} -dialog::backdrop{background:rgba(0,0,0,.5);backdrop-filter:blur(4px);-webkit-backdrop-filter:blur(4px)} -.modal-close{background:none;border:none;cursor:pointer;color:var(--text-muted);padding:var(--sp-1); - font-size:1.25rem;line-height:1} -.modal-close:hover{color:var(--text-primary)} - -/* Info modal (block metrics) */ -#clone-info-modal{max-width:640px;width:92vw;max-height:85vh} -#clone-info-modal .modal-head{display:flex;align-items:center;justify-content:space-between; - padding:var(--sp-3) var(--sp-4);border-bottom:1px solid var(--border)} -#clone-info-modal .modal-head h2{font-size:1rem} -#clone-info-modal .modal-body{padding:var(--sp-3) var(--sp-4);overflow-y:auto;flex:1 1 auto;min-height:0} -.info-dl{display:grid;grid-template-columns:1fr 1fr;gap:0;margin:0} -.info-dl>div{display:flex;justify-content:space-between;gap:var(--sp-2); - padding:var(--sp-2) var(--sp-3);border-bottom:1px solid var(--border)} -.info-dl>div:nth-last-child(-n+2){border-bottom:none} -.info-dl dt{font-size:.8rem;color:var(--text-muted);white-space:nowrap} -.info-dl dd{font-size:.8rem;font-weight:500;color:var(--text-primary);margin:0;text-align:right; - font-family:var(--font-mono)} - -/* Provenance modal */ -dialog.prov-modal{max-width:720px;width:92vw;max-height:86vh;padding:0;overflow:hidden; - border-radius:var(--radius-lg)} -.prov-modal-body{padding:var(--sp-4) var(--sp-5) var(--sp-5);overflow-y:auto;flex:1 1 auto} -.prov-modal .prov-summary{padding:var(--sp-2) var(--sp-5) var(--sp-3); - border-top:none;border-bottom:1px solid var(--border);flex-shrink:0; - background:color-mix(in srgb,var(--bg-raised) 50%,transparent)} - -/* Provenance hero — status header at top of modal */ -.prov-hero{position:relative;display:flex;align-items:center;gap:var(--sp-4); - padding:var(--sp-4) var(--sp-5);flex-shrink:0; - border-bottom:1px solid var(--border); - background:linear-gradient(180deg, - color-mix(in srgb,var(--bg-raised) 55%,transparent) 0%, - var(--bg-surface) 100%)} -.prov-hero-badge{display:inline-flex;align-items:center;gap:7px; - padding:6px 12px 6px 10px;border-radius:999px;font-weight:700;font-size:.78rem; - letter-spacing:.005em;white-space:nowrap;flex-shrink:0; - border:1px solid var(--border);background:var(--bg-surface)} -.prov-hero-icon{flex-shrink:0} -.prov-hero-label{line-height:1} -.prov-hero--green .prov-hero-badge{color:var(--success); - background:color-mix(in srgb,var(--success) 10%,var(--bg-surface)); - border-color:color-mix(in srgb,var(--success) 45%,var(--border))} -.prov-hero--amber .prov-hero-badge{color:var(--warning); - background:color-mix(in srgb,var(--warning) 10%,var(--bg-surface)); - border-color:color-mix(in srgb,var(--warning) 45%,var(--border))} -.prov-hero--red .prov-hero-badge{color:var(--error); - background:color-mix(in srgb,var(--error) 10%,var(--bg-surface)); - border-color:color-mix(in srgb,var(--error) 50%,var(--border))} -.prov-hero--neutral .prov-hero-badge{color:var(--text-secondary)} -.prov-hero-text{display:flex;flex-direction:column;gap:2px;min-width:0;flex:1} -.prov-hero-title{font-size:1.02rem;font-weight:700;letter-spacing:-.01em; - color:var(--text-primary);margin:0;line-height:1.25} -.prov-hero-sub{font-size:.8rem;color:var(--text-secondary);margin:0;line-height:1.35; - overflow:hidden;text-overflow:ellipsis} -.prov-hero-close{flex-shrink:0;align-self:flex-start} - -""" - -# --------------------------------------------------------------------------- -# Command palette -# --------------------------------------------------------------------------- - -_CMD_PALETTE = "" # removed: command palette eliminated - -# --------------------------------------------------------------------------- -# Toast notifications -# --------------------------------------------------------------------------- - -_TOAST = """\ -.toast-container{position:fixed;bottom:var(--sp-6);right:var(--sp-6);z-index:2000; - display:flex;flex-direction:column;gap:var(--sp-2)} -.toast{padding:var(--sp-3) var(--sp-5);background:var(--bg-overlay);border:1px solid var(--border); - border-radius:var(--radius-lg);box-shadow:var(--shadow-lg);font-size:.85rem;color:var(--text-primary); - animation:toast-in var(--dur-slow) var(--ease)} -@keyframes toast-in{from{opacity:0;transform:translateY(8px)}to{opacity:1;transform:none}} -""" - -# --------------------------------------------------------------------------- -# Utility -# --------------------------------------------------------------------------- - -_UTILITY = """\ -/* Responsive */ -@media(max-width:768px){ - .overview-kpi-grid{grid-template-columns:repeat(2,1fr)} - .toolbar{flex-direction:column;align-items:stretch} - .toolbar-left,.toolbar-right{justify-content:flex-start} - .overview-list{grid-template-columns:1fr} - .items{grid-template-columns:1fr} - .items .item{border-right:none} - .overview-row-head{flex-wrap:wrap} - .overview-row-spread{margin-left:0;width:100%} - .suggestion-head{flex-direction:column;align-items:flex-start} - .suggestion-facts{grid-template-columns:1fr} - .sf-head{flex-direction:column;align-items:flex-start} - .sf-meta{width:100%} - .dir-hotspot-head{flex-wrap:wrap;align-items:flex-start} - .dir-hotspot-detail{flex-wrap:wrap;align-items:flex-start} - .dir-hotspot-bar-track{width:min(148px,42%);min-width:96px} - .dir-hotspot-meta{width:100%} - .container{padding:0 var(--sp-3)} - .topbar{position:static} - .topbar-inner{height:auto;padding:var(--sp-2) var(--sp-3);flex-direction:row; - align-items:center;gap:var(--sp-2)} - .brand{flex:1;min-width:0;align-items:center;gap:var(--sp-2)} - .brand-logo{width:24px;height:24px} - .brand-text{gap:0} - .brand h1{font-size:.85rem;line-height:1.25;white-space:nowrap;overflow:hidden;text-overflow:ellipsis} - .brand-project-name{font-size:.78em;padding:0 3px} - .brand-meta{display:none} - .topbar-actions{flex-shrink:0;gap:var(--sp-1)} - .topbar-actions .prov-pill{font-size:0;gap:0;width:32px;height:32px; - padding:0;align-items:center;justify-content:center} - .topbar-actions .prov-pill-label{display:none} - .topbar-actions .prov-pill-icon{opacity:1} - .theme-toggle{font-size:0;gap:0;width:32px;height:32px; - padding:0;align-items:center;justify-content:center} - .theme-toggle svg{width:16px;height:16px} - .ide-picker-btn{font-size:0;gap:0;width:32px;height:32px; - padding:0;align-items:center;justify-content:center} - .ide-picker-btn svg{width:16px;height:16px} - .ide-picker-label{display:none} - .ide-menu{right:0;min-width:140px} - .main-tabs-wrap{position:sticky;top:0;z-index:90;padding:var(--sp-2) 0 0} - .main-tabs{padding:var(--sp-1);gap:2px; - background: - linear-gradient(to right,var(--bg-surface) 30%,transparent) left center / 28px 100% no-repeat local, - linear-gradient(to left,var(--bg-surface) 30%,transparent) right center / 28px 100% no-repeat local, - linear-gradient(to right,rgba(0,0,0,.12),transparent) left center / 10px 100% no-repeat scroll, - linear-gradient(to left,rgba(0,0,0,.12),transparent) right center / 10px 100% no-repeat scroll, - var(--bg-surface)} - .main-tab{flex:none;padding:var(--sp-1) var(--sp-2);font-size:.78rem} - .main-tab-icon{width:13px;height:13px} -} -@media(max-width:480px){ - .overview-kpi-grid{grid-template-columns:1fr} - .search-box input[type="text"]{width:140px} - .brand-logo{width:28px;height:28px} -} - -/* IDE link */ -.ide-link{color:inherit;text-decoration:none;cursor:default} -[data-ide]:not([data-ide=""]) .ide-link{cursor:pointer;color:var(--accent-primary); - text-decoration-line:underline;text-decoration-style:dotted;text-underline-offset:2px} -[data-ide]:not([data-ide=""]) .ide-link:hover{text-decoration-style:solid} - -/* IDE picker dropdown */ -.ide-picker{position:relative;display:inline-flex} -.ide-picker-btn{display:inline-flex;align-items:center;gap:var(--sp-1); - padding:var(--sp-1) var(--sp-3);background:none;border:1px solid var(--border); - border-radius:var(--radius-md);cursor:pointer;color:var(--text-muted);font-size:.85rem; - font-weight:500;font-family:inherit;transition:all var(--dur-fast) var(--ease); - white-space:nowrap} -.ide-picker-btn:hover{color:var(--text-primary);background:var(--bg-raised);border-color:var(--border-strong)} -.ide-picker-btn svg{width:16px;height:16px;flex-shrink:0} -.ide-picker-btn[aria-expanded="true"]{color:var(--accent-primary);border-color:var(--accent-primary)} -.ide-menu{display:none;position:absolute;top:100%;right:0;margin-top:var(--sp-1); - min-width:160px;background:var(--bg-surface);border:1px solid var(--border); - border-radius:var(--radius);box-shadow:0 4px 12px rgba(0,0,0,.15); - z-index:100;padding:var(--sp-1) 0;list-style:none} -.ide-menu[data-open]{display:block} -.ide-menu li{padding:0} -.ide-menu button{display:flex;align-items:center;gap:var(--sp-2);width:100%; - padding:var(--sp-1) var(--sp-3);background:none;border:none;color:var(--text-primary); - font-size:.8rem;font-family:var(--font-sans);cursor:pointer;text-align:left} -.ide-menu button:hover{background:var(--bg-alt)} -.ide-menu button[aria-checked="true"]{color:var(--accent-primary);font-weight:600} -.ide-menu button[aria-checked="true"]::before{content:'\\2713';font-size:.7rem; - width:14px;text-align:center;flex-shrink:0} -.ide-menu button[aria-checked="false"]::before{content:'';width:14px;flex-shrink:0} - -/* Print */ -@media print{ - .topbar,.toolbar,.pagination,.theme-toggle,.toast-container, - .novelty-tabs,.clear-btn,.btn,.ide-picker{display:none!important} - .tab-panel{display:block!important;break-inside:avoid} - .group-body{display:block!important} - body{background:#fff;color:#000} -} -""" - -# --------------------------------------------------------------------------- -# Footer -# --------------------------------------------------------------------------- - -_FOOTER = """\ -.report-footer{margin-top:var(--sp-8);padding:var(--sp-4) 0;border-top:1px solid var(--border); - text-align:center;font-size:.78rem;color:var(--text-muted)} -.report-footer a{color:var(--accent-primary)} -.report-footer-main{display:block} -.report-footer-schemas{margin-top:var(--sp-1);font-size:.72rem;letter-spacing:.01em; - font-variant-numeric:tabular-nums;opacity:.85} -""" - - -# --------------------------------------------------------------------------- -# Public API -# --------------------------------------------------------------------------- - -_ALL_SECTIONS = ( - _TOKENS_DARK, - _TOKENS_LIGHT, - _RESET, - _LAYOUT, - _CONTROLS, - _SEARCH, - _TOOLBAR, - _INSIGHT, - _TABLES, - _SUB_TABS, - _SECTIONS, - _ITEMS, - _CODE, - _BADGES, - _OVERVIEW, - _DEPENDENCIES, - _NOVELTY, - _DEAD_CODE, - _SUGGESTIONS, - _STRUCTURAL, - _META_PANEL, - _MICRO_INTERACTIONS, - _EMPTY, - _COUPLED, - _MODAL, - _CMD_PALETTE, - _TOAST, - _UTILITY, - _FOOTER, -) - - -def build_css() -> str: - """Return the complete CSS string for the HTML report.""" - return "\n".join(_ALL_SECTIONS) diff --git a/codeclone/_html_report/_assets/js.py b/codeclone/_html_report/_assets/js.py deleted file mode 100644 index 0a59b38..0000000 --- a/codeclone/_html_report/_assets/js.py +++ /dev/null @@ -1,843 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -"""JavaScript for the HTML report — modular IIFE with feature blocks.""" - -from __future__ import annotations - -# --------------------------------------------------------------------------- -# Core helpers -# --------------------------------------------------------------------------- - -_CORE = """\ -const $=s=>document.querySelector(s); -const $$=s=>[...document.querySelectorAll(s)]; - -/* Shared Filters popover wiring: one button opens a menu, outside-click + - Escape dismiss it. Reused by Clones (per-section) and Suggestions (global). */ -function wireFiltersPopover(toggleEl){ - if(!toggleEl)return; - const popover=toggleEl.parentElement; - if(!popover)return; - const menu=popover.querySelector('.filters-menu'); - if(!menu)return; - function setOpen(open){ - toggleEl.setAttribute('aria-expanded',open?'true':'false'); - if(open)menu.removeAttribute('hidden'); - else menu.setAttribute('hidden',''); - } - toggleEl.addEventListener('click',e=>{ - e.stopPropagation(); - setOpen(toggleEl.getAttribute('aria-expanded')!=='true'); - }); - document.addEventListener('click',e=>{ - if(toggleEl.getAttribute('aria-expanded')!=='true')return; - if(popover.contains(e.target))return; - setOpen(false); - }); - document.addEventListener('keydown',e=>{ - if(e.key!=='Escape')return; - if(toggleEl.getAttribute('aria-expanded')!=='true')return; - setOpen(false); - toggleEl.focus(); - }); -} -window.wireFiltersPopover=wireFiltersPopover; -""" - -# --------------------------------------------------------------------------- -# Theme -# --------------------------------------------------------------------------- - -_THEME = """\ -(function initTheme(){ - const key='codeclone-theme'; - const root=document.documentElement; - const saved=localStorage.getItem(key); - // Always resolve + set data-theme so icon CSS selectors always match. - const initial=saved==='light'||saved==='dark' - ?saved - :(matchMedia('(prefers-color-scheme:dark)').matches?'dark':'light'); - root.setAttribute('data-theme',initial); - - const btn=$('.theme-toggle'); - if(!btn)return; - btn.addEventListener('click',()=>{ - const next=root.getAttribute('data-theme')==='dark'?'light':'dark'; - root.setAttribute('data-theme',next); - localStorage.setItem(key,next); - }); -})(); -""" - -# --------------------------------------------------------------------------- -# Main tabs -# --------------------------------------------------------------------------- - -_TABS = """\ -(function initTabs(){ - const tabs=$$('.main-tab'); - const panels=$$('.tab-panel'); - if(!tabs.length)return; - - function activate(id){ - tabs.forEach(t=>{t.setAttribute('aria-selected',t.dataset.tab===id?'true':'false')}); - panels.forEach(p=>{p.classList.toggle('active',p.id==='panel-'+id)}); - history.replaceState(null,'','#'+id); - } - - tabs.forEach(t=>t.addEventListener('click',()=>activate(t.dataset.tab))); - - // Keyboard: arrow left/right - const tabList=$('[role="tablist"].main-tabs'); - if(tabList){ - tabList.addEventListener('keydown',e=>{ - const idx=tabs.indexOf(document.activeElement); - if(idx<0)return; - let next=-1; - if(e.key==='ArrowRight')next=(idx+1)%tabs.length; - else if(e.key==='ArrowLeft')next=(idx-1+tabs.length)%tabs.length; - if(next>=0){e.preventDefault();tabs[next].focus();activate(tabs[next].dataset.tab)} - }); - } - - // Hash deep-link - const hash=location.hash.slice(1); - const valid=tabs.map(t=>t.dataset.tab); - activate(valid.includes(hash)?hash:valid[0]||''); -})(); -""" - -# --------------------------------------------------------------------------- -# Sub-tabs (clone-nav / split-tabs) -# --------------------------------------------------------------------------- - -_SUB_TABS = """\ -(function initSubTabs(){ - $$('.clone-nav-btn').forEach(btn=>{ - btn.addEventListener('click',()=>{ - const group=btn.dataset.subtabGroup; - if(!group)return; - $$('.clone-nav-btn[data-subtab-group="'+group+'"]').forEach(b=>b.classList.remove('active')); - btn.classList.add('active'); - $$('.clone-panel[data-subtab-group="'+group+'"]').forEach(p=>{ - p.classList.toggle('active',p.dataset.clonePanel===btn.dataset.cloneTab); - }); - }); - }); -})(); -""" - -# --------------------------------------------------------------------------- -# Sections: search, filter, pagination, collapse/expand -# --------------------------------------------------------------------------- - -_SECTIONS = """\ -(function initSections(){ - // Registry so novelty filter can call applyFilters directly (no debounce) - window.__sectionFilters=window.__sectionFilters||{}; - - $$('[data-section]').forEach(sec=>{ - const id=sec.dataset.section; - const groups=[...sec.querySelectorAll('.group[data-group="'+id+'"]')]; - const searchInput=$('#search-'+id); - const pageMeta=sec.querySelector('[data-page-meta="'+id+'"]'); - const pageSizeSelect=sec.querySelector('[data-pagesize="'+id+'"]'); - const sourceKindFilter=sec.querySelector('[data-source-kind-filter="'+id+'"]'); - const cloneTypeFilter=sec.querySelector('[data-clone-type-filter="'+id+'"]'); - const spreadFilter=sec.querySelector('[data-spread-filter="'+id+'"]'); - const minOccCheck=sec.querySelector('[data-min-occurrences-filter="'+id+'"]'); - - let page=1; - let pageSize=parseInt(pageSizeSelect?.value||'10',10); - - function isAll(v){return !v||v==='all'} - - function activeFilterCount(){ - let n=0; - if(!isAll(sourceKindFilter?.value))n++; - if(!isAll(cloneTypeFilter?.value))n++; - if(!isAll(spreadFilter?.value))n++; - if(minOccCheck?.checked)n++; - return n; - } - - function updateFiltersBadge(){ - const badge=sec.querySelector('[data-filters-count="'+id+'"]'); - if(!badge)return; - const n=activeFilterCount(); - if(n>0){badge.hidden=false;badge.textContent=String(n)} - else{badge.hidden=true;badge.textContent='0'} - } - - function applyFilters(){ - const q=(searchInput?.value||'').toLowerCase().trim(); - const sk=sourceKindFilter?.value||''; - const ct=cloneTypeFilter?.value||''; - const sp=spreadFilter?.value||''; - const minOcc=minOccCheck?.checked||false; - - groups.forEach(g=>{ - // Novelty-hidden groups are always hidden - if(g.getAttribute('data-novelty-hidden')==='true'){g.style.display='none';return} - let show=true; - if(q&&!(g.dataset.search||'').includes(q))show=false; - if(!isAll(sk)&&g.dataset.sourceKind!==sk)show=false; - if(!isAll(ct)&&g.dataset.cloneType!==ct)show=false; - if(!isAll(sp)&&g.dataset.spreadBucket!==sp)show=false; - if(minOcc&&parseInt(g.dataset.groupArity||'0',10)<4)show=false; - g.style.display=show?'':'none'; - }); - updateFiltersBadge(); - page=1; - paginate(); - } - - function paginate(){ - // Collect groups that passed both novelty + search/filter - const vis=groups.filter(g=>g.style.display!=='none'); - const totalPages=Math.max(1,Math.ceil(vis.length/pageSize)); - if(page>totalPages)page=totalPages; - const start=(page-1)*pageSize; - const end=start+pageSize; - vis.forEach((g,i)=>{g.style.display=i>=start&&i{clearTimeout(timer);timer=setTimeout(applyFilters,200)}); - } - [sourceKindFilter,cloneTypeFilter,spreadFilter].forEach(el=>{ - if(el)el.addEventListener('change',applyFilters); - }); - if(minOccCheck)minOccCheck.addEventListener('change',applyFilters); - if(pageSizeSelect)pageSizeSelect.addEventListener('change',()=>{ - pageSize=parseInt(pageSizeSelect.value,10);page=1;paginate()}); - - // Clear search - const clearBtn=sec.querySelector('[data-clear="'+id+'"]'); - if(clearBtn&&searchInput)clearBtn.addEventListener('click',()=>{searchInput.value='';applyFilters()}); - - // Prev/Next - const prevBtn=sec.querySelector('[data-prev="'+id+'"]'); - const nextBtn=sec.querySelector('[data-next="'+id+'"]'); - if(prevBtn)prevBtn.addEventListener('click',()=>{if(page>1){page--;paginate()}}); - if(nextBtn)nextBtn.addEventListener('click',()=>{ - const vis=visible();const tp=Math.max(1,Math.ceil(vis.length/pageSize)); - if(page{ - const expanded=expandToggle.dataset.expanded==='true'; - const target=!expanded; - const scope=target - ? groups.filter(g=>g.style.display!=='none') - : groups; - scope.forEach(g=>{ - const body=g.querySelector('.group-body'); - const toggle=g.querySelector('.group-toggle'); - if(target){ - if(body)body.classList.add('expanded'); - if(toggle)toggle.classList.add('expanded'); - }else{ - if(body)body.classList.remove('expanded'); - if(toggle)toggle.classList.remove('expanded'); - } - }); - expandToggle.dataset.expanded=target?'true':'false'; - expandToggle.textContent=target?'Collapse all':'Expand all'; - }); - } - - // Filters popover (shared helper handles open/close + dismiss) - wireFiltersPopover(sec.querySelector('[data-filters-toggle="'+id+'"]')); - - // Initial - applyFilters(); - }); - - // Toggle individual groups - document.addEventListener('click',e=>{ - const btn=e.target.closest('[data-toggle-group]'); - if(!btn)return; - const groupId=btn.dataset.toggleGroup; - const body=$('#group-body-'+groupId); - if(!body)return; - body.classList.toggle('expanded'); - btn.classList.toggle('expanded'); - }); - - // Also toggle on group-head click (except buttons) - document.addEventListener('click',e=>{ - const head=e.target.closest('.group-head'); - if(!head)return; - if(e.target.closest('button'))return; - const toggle=head.querySelector('.group-toggle'); - if(toggle)toggle.click(); - }); -})(); -""" - -# --------------------------------------------------------------------------- -# Novelty filter (global new/known) -# --------------------------------------------------------------------------- - -_NOVELTY = """\ -(function initNovelty(){ - const ctrl=$('#global-novelty-controls'); - if(!ctrl)return; - const defaultNovelty=ctrl.dataset.defaultNovelty||'new'; - const btns=$$('[data-global-novelty]'); - let activeNovelty=''; - - function applyNovelty(val){ - activeNovelty=val; - btns.forEach(b=>b.classList.toggle('active',b.dataset.globalNovelty===val)); - $$('.group[data-novelty]').forEach(g=>{ - const nov=g.dataset.novelty; - if(nov==='all')g.setAttribute('data-novelty-hidden','false'); - else g.setAttribute('data-novelty-hidden',nov!==val?'true':'false'); - }); - // Re-run section filters directly (no debounce) - const reg=window.__sectionFilters||{}; - Object.values(reg).forEach(fn=>fn()); - } - - btns.forEach(b=>b.addEventListener('click',()=>applyNovelty(b.dataset.globalNovelty))); - applyNovelty(defaultNovelty); -})(); -""" - -# --------------------------------------------------------------------------- -# Modals (dialog-based for block metrics info) -# --------------------------------------------------------------------------- - -_MODALS = """\ -(function initModals(){ - let dlg=$('#clone-info-modal'); - if(!dlg){ - dlg=document.createElement('dialog'); - dlg.id='clone-info-modal'; - dlg.innerHTML='' - +''; - document.body.appendChild(dlg); - dlg.querySelector('.modal-close').addEventListener('click',()=>dlg.close()); - dlg.addEventListener('click',e=>{if(e.target===dlg)dlg.close()}); - } - - document.addEventListener('click',e=>{ - const btn=e.target.closest('[data-metrics-btn]'); - if(!btn)return; - const groupId=btn.dataset.metricsBtn; - const group=btn.closest('.group'); - if(!group)return; - const d=group.dataset; - const items=[]; - function add(label,val){if(val)items.push('
'+label+'
'+val+'
')} - add('Match rule',d.matchRule); - add('Block size',d.blockSize); - add('Signature',d.signatureKind); - add('Merged regions',d.mergedRegions); - add('Pattern',d.patternLabel); - add('Hint',d.hintLabel); - add('Hint confidence',d.hintConfidence); - add('Assert ratio',d.assertRatio); - add('Consecutive asserts',d.consecutiveAsserts); - add('Boilerplate asserts',d.boilerplateAsserts); - add('Group arity',d.groupArity); - add('Clone type',d.cloneType); - add('Source kind',d.sourceKind); - if(d.spreadFiles)add('Spread',d.spreadFunctions+' fn / '+d.spreadFiles+' files'); - dlg.querySelector('#modal-title').textContent='Group: '+groupId; - dlg.querySelector('#modal-body').innerHTML=items.length - ?'
'+items.join('')+'
' - :'

No metadata available.

'; - dlg.showModal(); - }); -})(); -""" - -# --------------------------------------------------------------------------- -# Suggestions filter -# --------------------------------------------------------------------------- - -_SUGGESTIONS = """\ -(function initSuggestions(){ - const body=$('[data-suggestions-body]'); - if(!body)return; - const cards=[...body.querySelectorAll('[data-suggestion-card]')]; - const sevSel=$('[data-suggestions-severity]'); - const catSel=$('[data-suggestions-category]'); - const famSel=$('[data-suggestions-family]'); - const skSel=$('[data-suggestions-source-kind]'); - const spSel=$('[data-suggestions-spread]'); - const actCheck=$('[data-suggestions-actionable]'); - const countLabel=$('[data-suggestions-count]'); - const filtersBadge=$('[data-filters-count="suggestions"]'); - - function activeFilterCount(){ - let n=0; - [sevSel,catSel,famSel,skSel,spSel].forEach(el=>{ - if(el&&el.value)n++; - }); - if(actCheck?.checked)n++; - return n; - } - - function updateFiltersBadge(){ - if(!filtersBadge)return; - const n=activeFilterCount(); - if(n>0){filtersBadge.hidden=false;filtersBadge.textContent=String(n)} - else{filtersBadge.hidden=true;filtersBadge.textContent='0'} - } - - function apply(){ - const sev=sevSel?.value||''; - const cat=catSel?.value||''; - const fam=famSel?.value||''; - const sk=skSel?.value||''; - const sp=spSel?.value||''; - const act=actCheck?.checked||false; - let shown=0; - cards.forEach(c=>{ - let hide=false; - if(sev&&c.dataset.severity!==sev)hide=true; - if(cat&&c.dataset.category!==cat)hide=true; - if(fam&&c.dataset.family!==fam)hide=true; - if(sk&&c.dataset.sourceKind!==sk)hide=true; - if(sp&&c.dataset.spreadBucket!==sp)hide=true; - if(act&&c.dataset.actionable!=='true')hide=true; - c.setAttribute('data-filter-hidden',hide?'true':'false'); - if(!hide)shown++; - }); - if(countLabel)countLabel.textContent=shown+' shown'; - updateFiltersBadge(); - } - - [sevSel,catSel,famSel,skSel,spSel].forEach(el=>{if(el)el.addEventListener('change',apply)}); - if(actCheck)actCheck.addEventListener('change',apply); - - // Popover wiring (shared helper) - wireFiltersPopover($('[data-filters-toggle="suggestions"]')); - - // Initial - apply(); -})(); -""" - -# --------------------------------------------------------------------------- -# Dependency graph hover -# --------------------------------------------------------------------------- - -_DEP_GRAPH = """\ -(function initDepGraph(){ - const svg=$('.dep-graph-svg'); - if(!svg)return; - const nodes=$$('.dep-node'); - const labels=$$('.dep-label'); - const edges=$$('.dep-edge'); - - function highlight(name){ - nodes.forEach(n=>{n.style.fillOpacity=n.dataset.node===name?'1':'0.15'}); - labels.forEach(l=>{l.style.fill=l.dataset.node===name?'var(--text-primary)':'var(--text-muted)'; - l.style.fillOpacity=l.dataset.node===name?'1':'0.3'}); - edges.forEach(e=>{ - const connected=e.dataset.source===name||e.dataset.target===name; - e.style.strokeOpacity=connected?'0.8':'0.05'; - e.style.strokeWidth=connected?'2':'1'; - }); - } - - function reset(){ - nodes.forEach(n=>{n.style.fillOpacity=''}); - labels.forEach(l=>{l.style.fill='';l.style.fillOpacity=''}); - edges.forEach(e=>{e.style.strokeOpacity='';e.style.strokeWidth=''}); - } - - [...nodes,...labels].forEach(el=>{ - el.addEventListener('mouseenter',()=>highlight(el.dataset.node)); - el.addEventListener('mouseleave',reset); - el.style.cursor='pointer'; - }); -})(); -""" - -# --------------------------------------------------------------------------- -# Meta panel toggle -# --------------------------------------------------------------------------- - -_META_PANEL = """\ -(function initBadgeModal(){ - const dlg=$('#badge-modal'); - if(!dlg)return; - - /* --- state --- */ - var _grade='',_score=0,_variant='grade'; - - /* --- grade→shields color (canonical bands) --- */ - function badgeColor(g){ - return g==='A'?'brightgreen':g==='B'?'green':g==='C'?'yellow':g==='D'?'orange':'red'} - - /* --- build shield URLs & embed codes for current variant --- */ - function render(){ - var label,alt,url; - if(_variant==='full'){ - label=_score+' ('+_grade+')';alt='codeclone '+_score+' ('+_grade+')'; - }else{ - label='grade '+_grade;alt='codeclone grade '+_grade;} - url='https://img.shields.io/badge/codeclone-' - +encodeURIComponent(label).replace(/-/g,'--')+'-'+badgeColor(_grade); - var prev=dlg.querySelector('#badge-preview'); - if(prev)prev.innerHTML=''+alt+''; - var md=dlg.querySelector('#badge-code-md'); - if(md)md.textContent='!['+alt+']('+url+')'; - var ht=dlg.querySelector('#badge-code-html'); - if(ht)ht.textContent=''+alt+'';} - - /* --- tabs --- */ - dlg.querySelectorAll('[data-badge-tab]').forEach(function(tab){ - tab.addEventListener('click',function(){ - dlg.querySelectorAll('[data-badge-tab]').forEach(function(t){ - t.classList.remove('badge-tab--active');t.setAttribute('aria-selected','false')}); - tab.classList.add('badge-tab--active');tab.setAttribute('aria-selected','true'); - _variant=tab.dataset.badgeTab;render();});}); - - /* --- open --- */ - document.addEventListener('click',function(e){ - var btn=e.target.closest('[data-badge-open]'); - if(!btn)return; - _grade=btn.dataset.badgeGrade||''; - _score=parseInt(btn.dataset.badgeScore||'0',10); - _variant='grade'; - dlg.querySelectorAll('[data-badge-tab]').forEach(function(t){ - var active=t.dataset.badgeTab==='grade'; - t.classList.toggle('badge-tab--active',active); - t.setAttribute('aria-selected',active?'true':'false');}); - render();dlg.showModal(); - var fc=dlg.querySelector('[data-badge-close]');if(fc)fc.focus();}); - - /* --- close --- */ - var closeBtn=dlg.querySelector('[data-badge-close]'); - if(closeBtn)closeBtn.addEventListener('click',function(){dlg.close()}); - dlg.addEventListener('click',function(e){if(e.target===dlg)dlg.close()}); - - /* --- copy with feedback --- */ - dlg.addEventListener('click',function(e){ - var copyBtn=e.target.closest('[data-badge-copy]'); - if(!copyBtn)return; - var which=copyBtn.dataset.badgeCopy; - var code=dlg.querySelector('#badge-code-'+which); - if(!code)return; - navigator.clipboard.writeText(code.textContent).then(function(){ - copyBtn.textContent='\u2713 Copied';copyBtn.classList.add('badge-copy-btn--ok'); - setTimeout(function(){copyBtn.textContent='Copy'; - copyBtn.classList.remove('badge-copy-btn--ok')},1500);});}); -})(); -(function initProvModal(){ - const dlg=$('#prov-modal'); - if(!dlg)return; - const openBtn=$('[data-prov-open]'); - const closeBtn=dlg.querySelector('[data-prov-close]'); - if(openBtn)openBtn.addEventListener('click',()=>dlg.showModal()); - if(closeBtn)closeBtn.addEventListener('click',()=>dlg.close()); - dlg.addEventListener('click',function(e){ - if(e.target===dlg){dlg.close();return} - var copyBtn=e.target.closest('[data-prov-copy]'); - if(!copyBtn)return; - e.stopPropagation(); - var payload=copyBtn.getAttribute('data-prov-copy')||''; - if(!payload||!navigator.clipboard)return; - navigator.clipboard.writeText(payload).then(function(){ - copyBtn.classList.add('prov-copy-btn--ok'); - var original=copyBtn.innerHTML; - copyBtn.innerHTML=''; - setTimeout(function(){ - copyBtn.classList.remove('prov-copy-btn--ok'); - copyBtn.innerHTML=original; - },1400); - }); - }); -})(); -(function initFindingWhy(){ - var dlg=$('#finding-why-modal'); - if(!dlg)return; - var body=dlg.querySelector('.modal-body'); - var closeBtn=dlg.querySelector('[data-finding-why-close]'); - closeBtn.addEventListener('click',function(){dlg.close()}); - dlg.addEventListener('click',function(e){if(e.target===dlg)dlg.close()}); - document.addEventListener('click',function(e){ - var btn=e.target.closest('[data-finding-why-btn]'); - if(!btn)return; - var tplId=btn.getAttribute('data-finding-why-btn'); - var tpl=document.getElementById(tplId); - if(!tpl)return; - body.innerHTML=tpl.innerHTML; - dlg.showModal(); - }); -})(); -""" - -# --------------------------------------------------------------------------- -# JSON export -# --------------------------------------------------------------------------- - -_EXPORT = "" # removed: Export JSON button eliminated from topbar - -# --------------------------------------------------------------------------- -# Command Palette (Cmd/Ctrl+K) -# --------------------------------------------------------------------------- - -_CMD_PALETTE = "" # removed: command palette eliminated - -# --------------------------------------------------------------------------- -# Table sort -# --------------------------------------------------------------------------- - -_TABLE_SORT = """\ -(function initTableSort(){ - $$('.table th[data-sortable]').forEach(th=>{ - th.addEventListener('click',()=>{ - const table=th.closest('.table'); - if(!table)return; - const idx=[...th.parentElement.children].indexOf(th); - const tbody=table.querySelector('tbody')||table; - const rows=[...tbody.querySelectorAll('tr')].filter(r=>r.querySelector('td')); - const cur=th.getAttribute('aria-sort'); - const dir=cur==='ascending'?'descending':'ascending'; - // Reset siblings - [...th.parentElement.children].forEach(s=>{s.removeAttribute('aria-sort')}); - th.setAttribute('aria-sort',dir); - - rows.sort((a,b)=>{ - const at=(a.children[idx]?.textContent||'').trim(); - const bt=(b.children[idx]?.textContent||'').trim(); - const an=parseFloat(at),bn=parseFloat(bt); - const cmp=(!isNaN(an)&&!isNaN(bn))?an-bn:at.localeCompare(bt); - return dir==='ascending'?cmp:-cmp; - }); - rows.forEach(r=>tbody.appendChild(r)); - }); - }); -})(); -""" - -# --------------------------------------------------------------------------- -# Toast -# --------------------------------------------------------------------------- - -_TOAST = """\ -function toast(msg){ - let c=$('.toast-container'); - if(!c){c=document.createElement('div');c.className='toast-container';document.body.appendChild(c)} - const t=document.createElement('div');t.className='toast';t.textContent=msg; - c.appendChild(t); - setTimeout(()=>{t.style.opacity='0';t.style.transform='translateY(8px)'; - setTimeout(()=>t.remove(),300)},3000); -} -""" - -# --------------------------------------------------------------------------- -# Lazy highlight (IntersectionObserver for code snippets) -# --------------------------------------------------------------------------- - -_SCOPE_COUNTERS = """\ -function updateCloneScopeCounters(){ - const sections=['functions','blocks','segments']; - let total=0; - sections.forEach(id=>{ - const sec=document.querySelector('[data-section="'+id+'"]'); - if(!sec)return; - const vis=[...sec.querySelectorAll('.group[data-group="'+id+'"]')] - .filter(g=>g.style.display!=='none'&&g.getAttribute('data-novelty-hidden')!=='true'); - total+=vis.length; - const tabCount=document.querySelector('[data-clone-tab-count="'+id+'"]'); - if(tabCount){tabCount.textContent=vis.length;tabCount.dataset.totalGroups=vis.length} - }); - const mainBtn=document.querySelector('[data-main-clones-count]'); - if(mainBtn)mainBtn.setAttribute('data-main-clones-count',total); -} -""" - -_LAZY_HIGHLIGHT = "" - -# --------------------------------------------------------------------------- -# IDE links -# --------------------------------------------------------------------------- - -_IDE_LINKS = r""" -(function initIdeLinks(){ - const KEY='codeclone-ide'; - const root=document.documentElement; - var scanRoot=root.getAttribute('data-scan-root')||''; - var projectName=scanRoot.replace(/\/$/,'').split('/').pop()||''; - - function relPath(abs){ - var r=scanRoot.replace(/\/$/,'')+'/'; - if(abs.indexOf(r)===0)return abs.substring(r.length); - return abs; - } - - const SCHEMES={ - pycharm:{label:'PyCharm', - url:function(f,l){return 'jetbrains://pycharm/navigate/reference?project='+encodeURIComponent(projectName)+'&path='+encodeURIComponent(relPath(f))+':'+l}}, - idea:{label:'IntelliJ IDEA', - url:function(f,l){return 'jetbrains://idea/navigate/reference?project='+encodeURIComponent(projectName)+'&path='+encodeURIComponent(relPath(f))+':'+l}}, - vscode:{label:'VS Code', - url:function(f,l){return 'vscode://file'+f+':'+l}}, - cursor:{label:'Cursor', - url:function(f,l){return 'cursor://file'+f+':'+l}}, - fleet:{label:'Fleet', - url:function(f,l){return 'fleet://open?file='+encodeURIComponent(f)+'&line='+l}}, - zed:{label:'Zed', - url:function(f,l){return 'zed://file'+f+':'+l}}, - '': {label:'None',url:null} - }; - - var current=localStorage.getItem(KEY)||''; - root.setAttribute('data-ide',current); - - const btn=$('.ide-picker-btn'); - const menu=$('.ide-menu'); - const label=$('.ide-picker-label'); - if(!btn||!menu)return; - - function updateLabel(){ - if(!label)return; - var s=SCHEMES[current]; - label.textContent=s&¤t?s.label:'IDE'; - } - - function setChecked(){ - menu.querySelectorAll('button').forEach(function(b){ - b.setAttribute('aria-checked',b.dataset.ide===current?'true':'false'); - }); - } - - function applyHrefs(){ - var s=SCHEMES[current]; - $$('.ide-link[data-file]').forEach(function(a){ - if(!current||!s||!s.url){a.removeAttribute('href');return} - var f=a.getAttribute('data-file'),l=a.getAttribute('data-line')||'1'; - if(!f)return; - a.setAttribute('href',s.url(f,l)); - }); - } - - setChecked(); - updateLabel(); - applyHrefs(); - - // Reapply hrefs when new content becomes visible (tab switch) - var mo=new MutationObserver(function(){applyHrefs()}); - document.querySelectorAll('.tab-panel').forEach(function(p){ - mo.observe(p,{attributes:true,attributeFilter:['class']}); - }); - - btn.addEventListener('click',function(e){ - e.stopPropagation(); - var open=menu.hasAttribute('data-open'); - if(open){menu.removeAttribute('data-open');btn.setAttribute('aria-expanded','false')} - else{menu.setAttribute('data-open','');btn.setAttribute('aria-expanded','true')} - }); - - document.addEventListener('click',function(){ - menu.removeAttribute('data-open');btn.setAttribute('aria-expanded','false'); - }); - - menu.addEventListener('click',function(e){ - e.stopPropagation(); - var b=e.target.closest('button[data-ide]'); - if(!b)return; - current=b.dataset.ide; - localStorage.setItem(KEY,current); - root.setAttribute('data-ide',current); - setChecked(); - updateLabel(); - applyHrefs(); - menu.removeAttribute('data-open');btn.setAttribute('aria-expanded','false'); - }); - -})(); -""" - -# --------------------------------------------------------------------------- -# Tooltips (fixed-position, escapes overflow containers) -# --------------------------------------------------------------------------- - -_TOOLTIPS = """\ -(function initTooltips(){ - let tip=null; - function show(e){ - const el=e.target; - const text=el.getAttribute('data-tip'); - if(!text)return; - tip=document.createElement('div'); - tip.className='kpi-tooltip'; - tip.textContent=text; - document.body.appendChild(tip); - const r=el.getBoundingClientRect(); - const tw=tip.offsetWidth; - const th=tip.offsetHeight; - let left=r.left+r.width/2-tw/2; - let top=r.bottom+6; - if(left<4)left=4; - if(left+tw>window.innerWidth-4)left=window.innerWidth-tw-4; - if(top+th>window.innerHeight-4){top=r.top-th-6} - tip.style.left=left+'px'; - tip.style.top=top+'px'; - } - function hide(){if(tip){tip.remove();tip=null}} - document.addEventListener('mouseenter',function(e){ - if(e.target.matches('.kpi-help[data-tip]'))show(e); - },true); - document.addEventListener('mouseleave',function(e){ - if(e.target.matches('.kpi-help[data-tip]'))hide(); - },true); -})(); -""" - -# --------------------------------------------------------------------------- -# Public API -# --------------------------------------------------------------------------- - -_ALL_MODULES = ( - _CORE, - _TOAST, - _THEME, - _TABS, - _SUB_TABS, - _SECTIONS, - _NOVELTY, - _MODALS, - _SUGGESTIONS, - _DEP_GRAPH, - _META_PANEL, - _EXPORT, - _CMD_PALETTE, - _TABLE_SORT, - _SCOPE_COUNTERS, - _LAZY_HIGHLIGHT, - _IDE_LINKS, - _TOOLTIPS, -) - - -def build_js() -> str: - """Return the complete JS string for the HTML report, wrapped in an IIFE.""" - body = "\n".join(_ALL_MODULES) - return f"(function(){{\n'use strict';\n{body}\n}})();\n" diff --git a/codeclone/_html_report/_primitives/data_attrs.py b/codeclone/_html_report/_primitives/data_attrs.py deleted file mode 100644 index d4e94f3..0000000 --- a/codeclone/_html_report/_primitives/data_attrs.py +++ /dev/null @@ -1,30 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -"""Unified data-attribute builder for HTML elements.""" - -from __future__ import annotations - -from ._html_escape import _escape_html - -__all__ = ["_build_data_attrs"] - - -def _build_data_attrs(attrs: dict[str, object | None]) -> str: - """Build a space-prefixed HTML data-attribute string from a dict. - - None values are omitted; empty strings are preserved as ``attr=""``. - All values are escaped. - Returns ``''`` when no attrs survive, or ``' data-foo="bar" ...'`` - (leading space) otherwise. - """ - parts: list[str] = [] - for key, val in attrs.items(): - if val is None: - continue - s = str(val) - parts.append(f'{key}="{_escape_html(s)}"') - return f" {' '.join(parts)}" if parts else "" diff --git a/codeclone/_html_report/_primitives/escape.py b/codeclone/_html_report/_primitives/escape.py deleted file mode 100644 index 381b033..0000000 --- a/codeclone/_html_report/_primitives/escape.py +++ /dev/null @@ -1,25 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import html - - -def _escape_html(v: object) -> str: - text = html.escape("" if v is None else str(v), quote=True) - text = text.replace("`", "`") - text = text.replace("\u2028", "
").replace("\u2029", "
") - return text - - -def _meta_display(v: object) -> str: - if isinstance(v, bool): - return "true" if v else "false" - if v is None: - return "n/a" - text = str(v).strip() - return text if text else "n/a" diff --git a/codeclone/_html_report/_primitives/filters.py b/codeclone/_html_report/_primitives/filters.py deleted file mode 100644 index e700fad..0000000 --- a/codeclone/_html_report/_primitives/filters.py +++ /dev/null @@ -1,59 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -"""Data-driven filter dropdown renderer for report toolbars.""" - -from __future__ import annotations - -from collections.abc import Sequence - -from ._html_escape import _escape_html - -__all__ = [ - "CLONE_TYPE_OPTIONS", - "SPREAD_OPTIONS", - "_render_select", -] - -CLONE_TYPE_OPTIONS: tuple[tuple[str, str], ...] = ( - ("Type-1", "Type-1"), - ("Type-2", "Type-2"), - ("Type-3", "Type-3"), - ("Type-4", "Type-4"), -) - -SPREAD_OPTIONS: tuple[tuple[str, str], ...] = ( - ("high", "high"), - ("low", "low"), -) - - -def _render_select( - *, - element_id: str, - data_attr: str, - options: Sequence[tuple[str, str]], - all_label: str = "all", - selected: str | None = None, -) -> str: - """Render a ``" - f'', - ] - for value, display in options: - sel = " selected" if selected == value else "" - parts.append( - f'" - ) - parts.append("") - return "".join(parts) diff --git a/codeclone/_html_report/_widgets/badges.py b/codeclone/_html_report/_widgets/badges.py deleted file mode 100644 index 716d1ad..0000000 --- a/codeclone/_html_report/_widgets/badges.py +++ /dev/null @@ -1,272 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -"""Shared HTML badge, label, and visual helpers for the report UI layer. - -Naming conventions: - - ``{domain}-badge`` for inline taxonomy labels (risk-badge, severity-badge, - source-kind-badge, clone-type-badge) - - ``meta-item`` is the **single** card pattern for all stat/KPI/meta cards - - ``meta-label`` + ``meta-value`` are the **single** label+value pair - - ``suggestion-card`` for suggestion grid items -""" - -from __future__ import annotations - -from collections.abc import Callable, Sequence - -from ._html_escape import _escape_html -from .domain.quality import ( - EFFORT_EASY, - EFFORT_HARD, - EFFORT_MODERATE, - RISK_HIGH, - RISK_LOW, - RISK_MEDIUM, - SEVERITY_CRITICAL, - SEVERITY_INFO, - SEVERITY_WARNING, -) -from .report._source_kinds import normalize_source_kind, source_kind_label - -__all__ = [ - "CHECK_CIRCLE_SVG", - "INFO_CIRCLE_SVG", - "_inline_empty", - "_micro_badges", - "_quality_badge_html", - "_render_chain_flow", - "_short_label", - "_source_kind_badge_html", - "_stat_card", - "_tab_empty", - "_tab_empty_info", -] - -_EFFORT_CSS: dict[str, str] = { - EFFORT_EASY: "success", - EFFORT_MODERATE: "warning", - EFFORT_HARD: "error", -} - -CHECK_CIRCLE_SVG = ( - '' - '' - '' - "" -) - -INFO_CIRCLE_SVG = ( - '' - '' - '' - '' - "" -) - - -def _micro_badges(*pairs: tuple[str, object]) -> str: - """Render compact label:value micro-badge pairs for stat card details.""" - return "".join( - f'' - f'{_escape_html(str(value))}' - f'{_escape_html(label)}' - for label, value in pairs - if value is not None and str(value) != "n/a" - ) - - -def _quality_badge_html(text: str) -> str: - """Render a risk / severity / effort value as a styled badge.""" - r = text.strip().lower() - if r in (RISK_LOW, RISK_HIGH, RISK_MEDIUM): - return ( - f'{_escape_html(r)}' - ) - if r in (SEVERITY_CRITICAL, SEVERITY_WARNING, SEVERITY_INFO): - return ( - f'' - f"{_escape_html(r)}" - ) - if r in _EFFORT_CSS: - return ( - f'{_escape_html(r)}' - ) - return _escape_html(text) - - -def _source_kind_badge_html(source_kind: str) -> str: - normalized = normalize_source_kind(source_kind) - return ( - f'' - f"{_escape_html(source_kind_label(normalized))}" - ) - - -_INLINE_EMPTY_ICONS: dict[str, str] = { - "good": ( - '' - ), - "neutral": ( - '' - ), -} - - -def _inline_empty(message: str, *, tone: str = "neutral") -> str: - """Compact single-row empty-state for inline/card contexts. - - Use for summary items, breakdown panels, and other small cards where a - full ``.tab-empty`` would be too heavy. - - *tone*: - - ``"good"`` — green check (positive: "nothing to report"). - - ``"neutral"`` — muted info dot (missing or unavailable data). - """ - tone_key = tone if tone in _INLINE_EMPTY_ICONS else "neutral" - icon = _INLINE_EMPTY_ICONS[tone_key] - return ( - f'
' - f"{icon}" - f'{_escape_html(message)}' - "
" - ) - - -def _tab_empty( - message: str, - *, - description: str | None = "Nothing to report - keep up the good work.", -) -> str: - desc_html = ( - f'
{_escape_html(description)}
' - if description - else "" - ) - return ( - '
' - f"{CHECK_CIRCLE_SVG}" - f'
{_escape_html(message)}
' - f"{desc_html}" - "
" - ) - - -def _tab_empty_info( - message: str, - *, - description: str | None = None, - detail_html: str | None = None, -) -> str: - if detail_html: - desc_block = ( - f'
{detail_html}
' - ) - elif description: - desc_block = ( - f'
' - f"{_escape_html(description)}
" - ) - else: - desc_block = "" - return ( - '
' - f"{INFO_CIRCLE_SVG}" - f'
{_escape_html(message)}
' - f"{desc_block}" - "
" - ) - - -def _short_label(name: str, max_len: int = 18) -> str: - """Shorten a dotted name keeping the last segment, truncated if needed.""" - parts = name.rsplit(".", maxsplit=1) - label = parts[-1] if len(parts) > 1 else name - if len(label) > max_len: - half = max_len // 2 - 1 - return f"{label[:half]}..{label[-half:]}" - return label - - -def _render_chain_flow( - parts: Sequence[str], - *, - arrows: bool = False, -) -> str: - """Render a sequence of names as chain-node spans, optionally with arrows.""" - nodes: list[str] = [] - for i, mod in enumerate(parts): - short = _short_label(str(mod)) - nodes.append( - f'' - f"{_escape_html(short)}" - ) - if arrows and i < len(parts) - 1: - nodes.append('\u2192') - return f'{"".join(nodes)}' - - -def _stat_card( - label: str, - value: object, - *, - detail: str = "", - tip: str = "", - value_tone: str = "", - css_class: str = "meta-item", - glossary_tip_fn: Callable[[str], str] | None = None, - delta_new: int | None = None, -) -> str: - """Unified stat-card renderer. - - Always emits the same HTML structure using ``.meta-item`` / - ``.meta-label`` / ``.meta-value`` so every stat card shares the - exact same design code. - - *value_tone* — semantic color for the main value: - ``"good"`` → green (metric is clean), ``"bad"`` → red (metric has issues), - ``"warn"`` → yellow, ``"muted"`` → dimmed, ``""`` → default text-primary. - - *delta_new* — if provided and > 0, renders a ``+N new`` badge - inline with the label (top-right). For "bad" metrics (complexity, - coupling, etc.) positive delta means regression → red. - """ - tip_html = "" - if glossary_tip_fn is not None: - tip_html = glossary_tip_fn(label) - elif tip: - tip_html = f'?' - - detail_html = "" - if detail: - detail_html = f'
{detail}
' - - delta_html = "" - if delta_new is not None and delta_new > 0: - delta_html = f'+{delta_new}' - - value_cls = f" meta-value--{value_tone}" if value_tone else "" - - return ( - f'
' - f'
{_escape_html(label)}{tip_html}{delta_html}
' - f'
{_escape_html(str(value))}
' - f"{detail_html}" - "
" - ) diff --git a/codeclone/_html_report/_widgets/components.py b/codeclone/_html_report/_widgets/components.py deleted file mode 100644 index 7a9fcae..0000000 --- a/codeclone/_html_report/_widgets/components.py +++ /dev/null @@ -1,106 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -"""Shared UI components: insight banners, summary helpers, chip rows.""" - -from __future__ import annotations - -from collections.abc import Mapping -from typing import Literal - -from .._coerce import as_int as _as_int -from .._html_badges import _inline_empty, _source_kind_badge_html -from .._html_escape import _escape_html -from ._icons import section_icon_html - -Tone = Literal["ok", "warn", "risk", "info"] - -_EMPTY_ICON = ( - '' - '' - '' -) - - -def insight_block(*, question: str, answer: str, tone: Tone = "info") -> str: - return ( - f'
' - f'
{_escape_html(question)}
' - f'
{_escape_html(answer)}
' - "
" - ) - - -def overview_cluster_header(title: str, subtitle: str | None = None) -> str: - sub = ( - f'

{_escape_html(subtitle)}

' - if subtitle - else "" - ) - return ( - '
' - f'

{_escape_html(title)}

' - f"{sub}" - "
" - ) - - -_SUMMARY_ICON_KEYS: dict[str, tuple[str, str]] = { - "top risks": ("top-risks", "summary-icon summary-icon--risk"), - "issue breakdown": ("issue-breakdown", "summary-icon summary-icon--info"), - "source breakdown": ("source-breakdown", "summary-icon summary-icon--info"), - "all findings": ("all-findings", "summary-icon summary-icon--info"), - "clone groups": ("clone-groups", "summary-icon summary-icon--info"), - "low cohesion": ("low-cohesion", "summary-icon summary-icon--info"), - "top candidates": ("quality", "summary-icon summary-icon--info"), - "more candidates": ("quality", "summary-icon summary-icon--info"), - "health profile": ("health-profile", "summary-icon summary-icon--info"), - "adoption coverage": ("coverage-adoption", "summary-icon summary-icon--info"), - "public api surface": ("api-surface", "summary-icon summary-icon--info"), - "coverage join": ("quality", "summary-icon summary-icon--info"), -} - - -def overview_summary_item_html(*, label: str, body_html: str) -> str: - icon_key, icon_class = _SUMMARY_ICON_KEYS.get(label.lower(), ("", "")) - icon = ( - section_icon_html(icon_key, class_name=icon_class) - if icon_key and icon_class - else "" - ) - return ( - '
' - '
' - f"{icon}{_escape_html(label)}
" - f"{body_html}" - "
" - ) - - -def overview_source_breakdown_html(breakdown: Mapping[str, object]) -> str: - sorted_items = sorted( - ((str(k), _as_int(v)) for k, v in breakdown.items()), - key=lambda item: -item[1], - ) - rows = [(kind, count) for kind, count in sorted_items if count > 0] - if not rows: - return _inline_empty("No source data available", tone="neutral") - - total = sum(c for _, c in rows) - parts: list[str] = [] - for kind, count in rows: - pct = round(count / total * 100) if total else 0 - parts.append( - '
' - f"{_source_kind_badge_html(kind)}" - f'{count}' - f'' - f'' - "
" - ) - return '
' + "".join(parts) + "
" diff --git a/codeclone/_html_report/_widgets/glossary.py b/codeclone/_html_report/_widgets/glossary.py deleted file mode 100644 index e48d4f0..0000000 --- a/codeclone/_html_report/_widgets/glossary.py +++ /dev/null @@ -1,100 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -"""Tooltip glossary for report table headers and stat cards.""" - -from __future__ import annotations - -from .._html_escape import _escape_html - -GLOSSARY: dict[str, str] = { - # Complexity - "function": "Fully-qualified function or method name", - "class": "Fully-qualified class name", - "name": "Symbol name (function, class, or variable)", - "file": "Source file path relative to scan root", - "location": "File and line range where the symbol is defined", - "cc": "Cyclomatic complexity — number of independent execution paths", - "nesting": "Maximum nesting depth of control-flow statements", - "risk": "Risk level based on metric thresholds (low / medium / high)", - # Coupling / cohesion - "cbo": "Coupling Between Objects — number of classes this class depends on", - "coupled classes": "Resolved class dependencies used to compute CBO for this class", - "lcom4": "Lack of Cohesion of Methods — connected components in method/field graph", - "methods": "Number of methods defined in the class", - "fields": "Number of instance variables (attributes) in the class", - # Dead code - "line": "Source line number where the symbol starts", - "kind": "Symbol type: function, class, import, or variable", - "confidence": "Detection confidence (low / medium / high / critical)", - # Dependencies - "longest chain": "Longest transitive import chain between modules", - "length": "Number of modules in the dependency chain", - "cycle": "Circular import dependency between modules", - # Suggestions - "priority": "Computed priority score (higher = more urgent)", - "severity": "Issue severity: critical, warning, or info", - "category": ( - "Metric category: clone, complexity, coupling, cohesion, dead_code, dependency" - ), - "title": "Brief description of the suggested improvement", - "effort": "Estimated effort to fix: easy, moderate, or hard", - "steps": "Actionable steps to resolve the issue", - # Dependency stat cards - "modules": "Total number of Python modules analyzed", - "edges": "Total number of import relationships between modules", - "max depth": "Longest chain of transitive imports", - "cycles": "Number of circular import dependencies detected", - # Complexity stat cards - "high-risk functions": ( - "Functions with cyclomatic complexity above the high-risk threshold" - ), - "max cc": "Highest cyclomatic complexity value among all analyzed functions", - "avg cc": "Average cyclomatic complexity across all analyzed functions", - "deep nesting": ( - "Functions with nesting depth exceeding recommended threshold (> 4)" - ), - # Coupling stat cards - "high-coupling classes": "Classes with CBO above the high-risk threshold", - "max cbo": "Highest Coupling Between Objects value among all classes", - "avg cbo": "Average CBO across all analyzed classes", - "medium risk": "Items at medium risk level — worth reviewing but not critical", - # Cohesion stat cards - "low-cohesion classes": ( - "Classes with LCOM4 > 1, indicating multiple responsibilities" - ), - "max lcom4": "Highest Lack of Cohesion value among all classes", - "high risk": "Items at high risk level requiring attention", - # Overloaded module stat cards - "overloaded": ( - "Modules exceeding acceptable thresholds for size, complexity, or coupling" - ), - "critical": "Items with critical status requiring immediate attention", - "max score": "Highest overload score among all modules", - "avg loc": "Average lines of code per module", - # Dead code stat cards - "candidates": "Total dead code candidates detected by static analysis", - "high confidence": "Dead code items detected with high or critical confidence", - "suppressed": "Dead code candidates excluded by suppression rules", - "hit rate": "Percentage of high-confidence items among all candidates", - # Clone stat cards - "clone groups": "Distinct duplication patterns, each containing 2+ code fragments", - "instances": "Total duplicated code fragments across all groups", - "new groups": "Clone groups not present in the previous baseline", - "high spread": "Clone groups spanning multiple files", - # Suggestion stat cards - "total suggestions": "Total actionable improvement suggestions generated", - "warning": "Suggestions with warning severity worth reviewing", - "easy wins": "Actionable suggestions with low estimated effort", -} - - -def glossary_tip(label: str) -> str: - """Return a tooltip ```` for *label*, or ``''`` if unknown.""" - tip = GLOSSARY.get(label.lower(), "") - if not tip: - return "" - return f' ?' diff --git a/codeclone/_html_report/_widgets/icons.py b/codeclone/_html_report/_widgets/icons.py deleted file mode 100644 index 87b68c2..0000000 --- a/codeclone/_html_report/_widgets/icons.py +++ /dev/null @@ -1,214 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -"""SVG icon constants for the HTML report (Lucide-style).""" - -from __future__ import annotations - - -def _svg(size: int, sw: str, body: str) -> str: - return ( - f'{body}' - ) - - -def _svg_with_class(size: int, sw: str, body: str, *, class_name: str = "") -> str: - class_attr = f' class="{class_name}"' if class_name else "" - return ( - f'{body}' - ) - - -BRAND_LOGO = ( - '" -) - -ICONS: dict[str, str] = { - "search": _svg( - 16, - "2.5", - '', - ), - "clear": _svg( - 16, - "2.5", - '', - ), - "chev_down": _svg( - 16, - "2.5", - '', - ), - "theme_moon": _svg_with_class( - 16, - "2", - '', - class_name="theme-icon theme-icon-moon", - ), - "theme_sun": _svg_with_class( - 16, - "2", - '' - '' - '' - '' - '' - '' - '' - '' - '', - class_name="theme-icon theme-icon-sun", - ), - "check": _svg( - 48, - "2", - '', - ), - "prev": _svg( - 16, - "2", - '', - ), - "next": _svg( - 16, - "2", - '', - ), - "sort_asc": _svg( - 12, - "2", - '', - ), - "sort_desc": _svg( - 12, - "2", - '', - ), - "ide": _svg( - 16, - "2", - '', - ), -} - -_SECTION_ICON_BODIES: dict[str, tuple[str, str]] = { - "overview": ( - "1.8", - '' - '' - '' - '', - ), - "clones": ( - "2", - '' - '', - ), - "quality": ( - "2", - '' - '' - '', - ), - "dependencies": ( - "2", - '' - '' - '', - ), - "dead-code": ( - "2", - '' - '', - ), - "suggestions": ( - "2", - '' - '', - ), - "structural-findings": ( - "2", - '' - '' - '', - ), - "top-risks": ( - "2", - '' - '', - ), - "issue-breakdown": ( - "2", - '' - '' - '', - ), - "source-breakdown": ( - "2", - '' - '', - ), - "health-profile": ( - "2", - '' - '' - '', - ), - "all-findings": ( - "2", - '' - '' - '', - ), - "clone-groups": ( - "2", - '' - '', - ), - "low-cohesion": ( - "2", - '' - '' - '', - ), - "coverage-adoption": ( - "2", - '' - '', - ), - "api-surface": ( - "2", - '' - '', - ), -} - - -def section_icon_html( - key: str, - *, - class_name: str = "", - size: int = 16, -) -> str: - spec = _SECTION_ICON_BODIES.get(key.strip().lower()) - if spec is None: - return "" - stroke_width, body = spec - return _svg_with_class(size, stroke_width, body, class_name=class_name) diff --git a/codeclone/_html_report/_widgets/snippets.py b/codeclone/_html_report/_widgets/snippets.py deleted file mode 100644 index dac7eec..0000000 --- a/codeclone/_html_report/_widgets/snippets.py +++ /dev/null @@ -1,207 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -from __future__ import annotations - -import html -import importlib -from dataclasses import dataclass -from functools import lru_cache -from typing import TYPE_CHECKING, NamedTuple, cast - -from .errors import FileProcessingError - -if TYPE_CHECKING: - from types import ModuleType - - -@dataclass(slots=True) -class _Snippet: - filepath: str - start_line: int - end_line: int - code_html: str - - -class _FileCache: - __slots__ = ("_get_file_lines_impl", "maxsize") - - def __init__(self, maxsize: int = 128) -> None: - self.maxsize = maxsize - self._get_file_lines_impl = lru_cache(maxsize=maxsize)(self._read_file_lines) - - @staticmethod - def _read_file_lines(filepath: str) -> tuple[str, ...]: - try: - - def _read_with_errors(errors: str) -> tuple[str, ...]: - with open(filepath, encoding="utf-8", errors=errors) as f: - return tuple(line.rstrip("\n") for line in f) - - try: - return _read_with_errors("strict") - except UnicodeDecodeError: - return _read_with_errors("replace") - except OSError as e: - raise FileProcessingError(f"Cannot read {filepath}: {e}") from e - - def get_lines_range( - self, filepath: str, start_line: int, end_line: int - ) -> tuple[str, ...]: - if start_line < 1: - start_line = 1 - if end_line < start_line: - return () - lines = self._get_file_lines_impl(filepath) - start_index = start_line - 1 - if start_index >= len(lines): - return () - end_index = min(len(lines), end_line) - return lines[start_index:end_index] - - class _CacheInfo(NamedTuple): - hits: int - misses: int - maxsize: int | None - currsize: int - - def cache_info(self) -> _CacheInfo: - return cast("_FileCache._CacheInfo", self._get_file_lines_impl.cache_info()) - - -_PYGMENTS_IMPORTER_ID: int | None = None -_PYGMENTS_API: tuple[ModuleType, ModuleType, ModuleType] | None = None - - -def _load_pygments_api() -> tuple[ModuleType, ModuleType, ModuleType] | None: - """ - Load pygments modules once per import-function identity. - - Tests monkeypatch `importlib.import_module`; tracking importer identity keeps - behavior deterministic and allows import-error branches to stay testable. - """ - global _PYGMENTS_IMPORTER_ID - global _PYGMENTS_API - - importer_id = id(importlib.import_module) - if importer_id != _PYGMENTS_IMPORTER_ID: - _PYGMENTS_IMPORTER_ID = importer_id - _PYGMENTS_API = None - if _PYGMENTS_API is not None: - return _PYGMENTS_API - - try: - pygments = importlib.import_module("pygments") - formatters = importlib.import_module("pygments.formatters") - lexers = importlib.import_module("pygments.lexers") - except ImportError: - return None - - _PYGMENTS_API = (pygments, formatters, lexers) - return _PYGMENTS_API - - -def _try_pygments(code: str) -> str | None: - pygments_api = _load_pygments_api() - if pygments_api is None: - return None - pygments, formatters, lexers = pygments_api - - highlight = pygments.highlight - formatter_cls = formatters.HtmlFormatter - lexer_cls = lexers.PythonLexer - result = highlight(code, lexer_cls(), formatter_cls(nowrap=True)) - return result if isinstance(result, str) else None - - -def _pygments_css(style_name: str) -> str: - """ - Returns CSS for pygments tokens. Scoped to `.codebox` to avoid leaking styles. - If Pygments is not available or style missing, returns "". - """ - pygments_api = _load_pygments_api() - if pygments_api is None: - return "" - _, formatters, _ = pygments_api - - try: - formatter_cls = formatters.HtmlFormatter - fmt = formatter_cls(style=style_name) - except Exception: - try: - fmt = formatter_cls() - except Exception: - return "" - - try: - css = fmt.get_style_defs(".codebox") - return css if isinstance(css, str) else "" - except Exception: - return "" - - -def _render_code_block( - *, - filepath: str, - start_line: int, - end_line: int, - file_cache: _FileCache, - context: int, - max_lines: int, -) -> _Snippet: - s = max(1, start_line - context) - e = end_line + context - - if e - s + 1 > max_lines: - e = s + max_lines - 1 - - try: - lines = file_cache.get_lines_range(filepath, s, e) - except FileProcessingError: - missing = ( - '
'
-            '
Source file unavailable
' - "
" - ) - return _Snippet( - filepath=filepath, - start_line=start_line, - end_line=end_line, - code_html=missing, - ) - - numbered: list[tuple[bool, str]] = [] - for lineno, line in enumerate(lines, start=s): - hit = start_line <= lineno <= end_line - numbered.append((hit, f"{lineno:>5} | {line.rstrip()}")) - - raw = "\n".join(text for _, text in numbered) - highlighted = _try_pygments(raw) - - if highlighted is None: - rendered: list[str] = [] - for hit, text in numbered: - cls = "hitline" if hit else "line" - rendered.append( - f'
{html.escape(text, quote=False)}
' - ) - body = "".join(rendered) - else: - hit_flags = [hit for hit, _ in numbered] - pyg_lines = highlighted.split("\n") - rendered_pyg: list[str] = [] - for i, pyg_line in enumerate(pyg_lines): - hit = hit_flags[i] if i < len(hit_flags) else False - cls = "hitline" if hit else "line" - rendered_pyg.append(f'
{pyg_line}
') - body = "".join(rendered_pyg) - - return _Snippet( - filepath=filepath, - start_line=start_line, - end_line=end_line, - code_html=f'
{body}
', - ) diff --git a/codeclone/_html_report/_widgets/tables.py b/codeclone/_html_report/_widgets/tables.py deleted file mode 100644 index 7f633f2..0000000 --- a/codeclone/_html_report/_widgets/tables.py +++ /dev/null @@ -1,127 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -"""Generic table renderer for metric/finding tables.""" - -from __future__ import annotations - -from collections.abc import Collection, Sequence -from typing import TYPE_CHECKING - -from .._html_badges import _quality_badge_html, _tab_empty -from .._html_escape import _escape_html -from ._glossary import glossary_tip - -if TYPE_CHECKING: - from ._context import ReportContext - -_RISK_HEADERS = {"risk", "confidence", "severity", "effort"} -_PATH_HEADERS = {"file", "location"} - -_COL_WIDTHS: dict[str, str] = { - "cc": "62px", - "cbo": "62px", - "lcom4": "70px", - "nesting": "76px", - "line": "60px", - "length": "68px", - "methods": "80px", - "fields": "68px", - "priority": "74px", - "risk": "78px", - "confidence": "94px", - "severity": "82px", - "effort": "78px", - "category": "100px", - "kind": "76px", - "steps": "120px", - "coupled classes": "360px", -} - -_COL_CLS: dict[str, str] = {} -for _h in ("function", "class", "name"): - _COL_CLS[_h] = "col-name" -for _h in ("file", "location"): - _COL_CLS[_h] = "col-path" -for _h in ( - "cc", - "cbo", - "lcom4", - "nesting", - "line", - "length", - "methods", - "fields", - "priority", -): - _COL_CLS[_h] = "col-num" -for _h in ("risk", "confidence", "severity", "effort"): - _COL_CLS[_h] = "col-badge" -for _h in ("category", "kind"): - _COL_CLS[_h] = "col-cat" -for _h in ("cycle", "longest chain", "title", "coupled classes"): - _COL_CLS[_h] = "col-wide" -_COL_CLS["steps"] = "col-steps" - - -def render_rows_table( - *, - headers: Sequence[str], - rows: Sequence[Sequence[str]], - empty_message: str, - empty_description: str | None = "Nothing to report - keep up the good work.", - raw_html_headers: Collection[str] = (), - ctx: ReportContext | None = None, -) -> str: - """Render a data table with badges, tooltips, and col sizing.""" - if not rows: - return _tab_empty(empty_message, description=empty_description) - - lower_headers = [h.lower() for h in headers] - raw_html_set = {h.lower() for h in raw_html_headers} - - # colgroup - cg = [""] - for h in lower_headers: - w = _COL_WIDTHS.get(h) - cg.append(f'' if w else "") - cg.append("") - - # thead - th_parts = [ - f"{_escape_html(header)}{glossary_tip(header)}" for header in headers - ] - - # tbody - def _td(col_idx: int, cell: str) -> str: - h = lower_headers[col_idx] if col_idx < len(lower_headers) else "" - cls = _COL_CLS.get(h, "") - cls_attr = f' class="{cls}"' if cls else "" - if h in raw_html_set: - return f"{cell}" - if h in _RISK_HEADERS: - return f"{_quality_badge_html(cell)}" - if h in _PATH_HEADERS and ctx is not None: - short = ctx.relative_path(cell) - return ( - f'' - f'' - f"{_escape_html(short)}" - ) - return f"{_escape_html(cell)}" - - body_html = "".join( - "" + "".join(_td(i, cell) for i, cell in enumerate(row)) + "" - for row in rows - ) - - return ( - '
' - f"{''.join(cg)}" - f"{''.join(th_parts)}" - f"{body_html}" - "
" - ) diff --git a/codeclone/_html_report/_widgets/tabs.py b/codeclone/_html_report/_widgets/tabs.py deleted file mode 100644 index 8ce1e43..0000000 --- a/codeclone/_html_report/_widgets/tabs.py +++ /dev/null @@ -1,60 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# Copyright (c) 2026 Den Rozhnovskiy - -"""Tab/subtab rendering helpers.""" - -from __future__ import annotations - -from collections.abc import Sequence - -from .._html_escape import _escape_html - - -def render_split_tabs( - *, - group_id: str, - tabs: Sequence[tuple[str, str, int, str]], - emit_clone_counters: bool = False, -) -> str: - """Render sub-tab navigation + panels. - - Each tab tuple: ``(tab_id, label, count, panel_html)``. - """ - if not tabs: - return "" - - nav: list[str] = [ - '") - - panels: list[str] = [] - for idx, (tab_id, _, _, panel_html) in enumerate(tabs): - active = " active" if idx == 0 else "" - panels.append( - f'
' - f"{panel_html}
" - ) - - return f"{''.join(nav)}{''.join(panels)}" From 2235c50996f947ed654e48425e5271ccddae9f6a Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 22 Apr 2026 14:57:51 +0500 Subject: [PATCH 09/32] docs(architecture): refresh docs for b6 layout --- AGENTS.md | 178 ++++---- CHANGELOG.md | 18 + docs/architecture.md | 4 +- docs/book/00-intro.md | 28 +- docs/book/01-architecture-map.md | 166 ++++--- docs/book/02-terminology.md | 97 ++-- docs/book/03-contracts-exit-codes.md | 77 ++-- docs/book/04-config-and-defaults.md | 43 +- docs/book/05-core-pipeline.md | 161 +++---- docs/book/06-baseline.md | 108 ++--- docs/book/07-cache.md | 117 ++--- docs/book/08-report.md | 264 +++-------- docs/book/09-cli.md | 212 +++------ docs/book/10-html-render.md | 136 ++---- docs/book/11-security-model.md | 61 ++- docs/book/12-determinism.md | 56 +-- docs/book/14-compatibility-and-versioning.md | 177 +++----- docs/book/15-health-score.md | 136 ++---- docs/book/15-metrics-and-quality-gates.md | 128 ++---- docs/book/16-dead-code-contract.md | 18 +- docs/book/17-suggestions-and-clone-typing.md | 82 ++-- docs/book/18-benchmarking.md | 2 +- docs/book/19-inline-suppressions.md | 16 +- docs/book/20-mcp-interface.md | 441 +++++-------------- docs/book/appendix/a-status-enums.md | 16 +- docs/book/appendix/b-schema-layouts.md | 20 +- docs/book/appendix/c-error-catalog.md | 71 +-- docs/sarif.md | 67 +-- 28 files changed, 1046 insertions(+), 1854 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 16e579d..5d45c03 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -137,18 +137,18 @@ uv run pytest -q tests/test_codex_plugin.py ### Versioned constants (single source of truth) -All schema/version constants live in `codeclone/contracts.py`. **Always read them from code, never copy +All schema/version constants live in `codeclone/contracts/__init__.py`. **Always read them from code, never copy from another doc.** Current values (verified at write time): -| Constant | Source | Current value | -|-----------------------------------|------------------------------|---------------| -| `BASELINE_SCHEMA_VERSION` | `codeclone/contracts.py` | `2.1` | -| `BASELINE_FINGERPRINT_VERSION` | `codeclone/contracts.py` | `1` | -| `CACHE_VERSION` | `codeclone/contracts.py` | `2.5` | -| `REPORT_SCHEMA_VERSION` | `codeclone/contracts.py` | `2.8` | -| `METRICS_BASELINE_SCHEMA_VERSION` | `codeclone/contracts.py` | `1.2` | +| Constant | Source | Current value | +|-----------------------------------|-----------------------------------|---------------| +| `BASELINE_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `2.1` | +| `BASELINE_FINGERPRINT_VERSION` | `codeclone/contracts/__init__.py` | `1` | +| `CACHE_VERSION` | `codeclone/contracts/__init__.py` | `2.5` | +| `REPORT_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `2.8` | +| `METRICS_BASELINE_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `1.2` | -When updating any doc that mentions a version, re-read `codeclone/contracts.py` first. Do not derive +When updating any doc that mentions a version, re-read `codeclone/contracts/__init__.py` first. Do not derive versions from another document. ### Baseline file structure (canonical) @@ -181,7 +181,7 @@ versions from another document. - `schema_version` is **baseline schema**, not package version. - Runtime writes baseline schema `2.1`. - Runtime accepts baseline schema `1.0` and `2.0`–`2.1` (governed by - `_BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR` in `codeclone/baseline.py`). + `_BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR` in `codeclone/baseline/trust.py`). - Compatibility is tied to: - `fingerprint_version` - `python_tag` @@ -367,24 +367,29 @@ Before cutting a release: Architecture is layered, but grounded in current code (not aspirational diagrams): -- **CLI / orchestration surface** (`codeclone/cli.py`, `codeclone/_cli_*.py`) parses args, resolves runtime mode, - coordinates pipeline calls, and prints UX. -- **Pipeline orchestrator** (`codeclone/pipeline.py`) owns end-to-end flow: bootstrap → discovery → processing → - analysis → report artifacts → gating. -- **Core analysis** (`codeclone/extractor.py`, `codeclone/cfg.py`, `codeclone/normalize.py`, `codeclone/blocks.py`, - `codeclone/grouping.py`, `codeclone/scanner.py`) produces normalized structural facts and clone candidates. -- **Domain/contracts layer** (`codeclone/models.py`, `codeclone/contracts.py`, `codeclone/errors.py`, - `codeclone/domain/*.py`) defines typed entities and stable enums/constants used across layers. -- **Persistence contracts** (`codeclone/baseline.py`, `codeclone/cache.py`, `codeclone/cache_io.py`, - `codeclone/metrics_baseline.py`) store trusted comparison state and optimization state. -- **Canonical report + projections** (`codeclone/report/json_contract.py`, `codeclone/report/*.py`) converts analysis - facts to deterministic, contract-shaped outputs. -- **HTML/UI rendering** (`codeclone/html_report.py`, `codeclone/_html_report/*`, `codeclone/_html_*.py`, - `codeclone/templates.py`) renders views from report/meta facts. +- **CLI entry + orchestration surface** (`codeclone/main.py`, `codeclone/surfaces/cli/*`, `codeclone/ui_messages/*`) + owns argument parsing, runtime/config resolution, summaries, report writes, and exit routing. +- **Config layer** (`codeclone/config/*`) is the single source of truth for option specs, parser construction, + `pyproject.toml` loading, and CLI > pyproject > defaults resolution. +- **Core orchestration** (`codeclone/core/*`) owns bootstrap → discovery → worker processing → project metrics → + report/gate integration. It does not own shell UX. +- **Analysis layer** (`codeclone/analysis/*`, `codeclone/blocks/*`, `codeclone/paths/*`, `codeclone/qualnames/*`) + parses source, normalizes AST/CFG facts, extracts units, and prepares deterministic analysis inputs. +- **Clone/finding derivation layer** (`codeclone/findings/*`, `codeclone/metrics/*`) groups clones and computes + structural and quality signals from already-extracted facts. +- **Domain/contracts layer** (`codeclone/models.py`, `codeclone/contracts/*`, `codeclone/domain/*`) defines typed + entities, enums, schema/version constants, and typed exceptions used across layers. +- **Persistence contracts** (`codeclone/baseline/*`, `codeclone/cache/*`) store trusted comparison state and + optimization state. They are contracts, not analysis truth. +- **Canonical report + projections** (`codeclone/report/document/*`, `codeclone/report/gates/*`, + `codeclone/report/renderers/*`, `codeclone/report/*.py`) converts analysis facts into deterministic report payloads + and deterministic projections. +- **HTML/UI rendering** (`codeclone/report/html/*`, `codeclone/templates.py`) renders views from canonical report/meta + facts. HTML is render-only. +- **MCP agent interface** (`codeclone/surfaces/mcp/*`) exposes the same pipeline/report contracts as a deterministic, + read-only MCP surface for AI agents and MCP-capable clients. - **Documentation/publishing surface** (`docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py`) publishes contract docs and the live sample report. -- **MCP agent interface** (`codeclone/mcp_service.py`, `codeclone/mcp_server.py`) exposes the current pipeline as a - deterministic, read-only MCP server for AI agents and MCP-capable clients. - **VS Code extension surface** (`extensions/vscode-codeclone/*`) is a native, workspace-only IDE client over `codeclone-mcp`, with baseline-aware, triage-first, source-first review UX. - **Claude Desktop bundle surface** (`extensions/claude-desktop-codeclone/*`) is a native `.mcpb` install wrapper for @@ -409,51 +414,51 @@ Non-negotiable interpretation: Use this map to route changes to the right owner module. -- `codeclone/cli.py` — public CLI entry and control-flow coordinator; add orchestration and top-level UX here; do not - move core analysis logic here. -- `codeclone/_cli_*.py` — CLI support slices (args, config, runtime, summary, reports, baselines, gating); keep them - thin and reusable; do not encode domain semantics that belong to pipeline/core/contracts. -- `codeclone/pipeline.py` — canonical orchestration and data plumbing between scanner/extractor/metrics/report/gating; - change integration flow here; do not move HTML-only presentation logic here. -- `codeclone/extractor.py` — AST extraction, CFG fingerprint input preparation, symbol/declaration collection, and - per-file metrics inputs; change parsing/extraction semantics here; do not couple this module to CLI/report - rendering/baseline logic. -- `codeclone/grouping.py` / `codeclone/blocks.py` — clone grouping and block/segment mechanics; normalization-adjacent - statement hashing lives with `codeclone/normalize.py`; do not mix grouping behavior with CLI/report UX concerns. -- `codeclone/metrics/` — metric computations and dead-code/dependency/health logic; change metric math and thresholds +- `codeclone/main.py` — public CLI entrypoint only. Keep it tiny. +- `codeclone/surfaces/cli/workflow.py` — top-level CLI orchestration and exit routing. Add CLI control flow here, not + in `main.py`. +- `codeclone/surfaces/cli/*` — CLI support slices (startup, runtime, execution, post-run handling, summaries, + reports, changed-scope logic, baseline state, console helpers). Keep them orchestration/UX-focused. +- `codeclone/config/*` — parser construction, option specs/defaults, pyproject loading, config resolution. Do not + duplicate option semantics elsewhere. +- `codeclone/core/*` — canonical runtime pipeline and payload plumbing. Change integration flow here; do not move shell + UX or HTML-only logic here. +- `codeclone/analysis/*` — AST parsing, CFG/fingerprint preparation, declaration/reference collection, and unit + extraction. Change parsing/extraction semantics here; keep it independent from CLI/report/baseline UX. +- `codeclone/findings/clones/grouping.py` + `codeclone/blocks/*` — clone grouping and block/segment mechanics. +- `codeclone/findings/structural/detectors.py` — structural finding extraction/normalization policy; keep it factual + and deterministic. +- `codeclone/metrics/*` — metric computations and dead-code/dependency/health logic; change metric math and thresholds here; do not make metrics depend on renderer/UI concerns. -- `codeclone/structural_findings.py` — structural finding extraction/normalization policy; keep it report-layer factual +- `codeclone/suppressions.py` — inline `# codeclone: ignore[...]` parse/bind/index logic; keep it declaration-scoped and deterministic. -- `codeclone/suppressions.py` — inline `# codeclone: ignore[...]` parse/bind/index logic; keep it declaration-scoped and - deterministic. -- `codeclone/baseline.py` — baseline schema/trust/integrity/compatibility contract; all baseline format changes go here - with explicit contract process. -- `codeclone/cache.py` — cache schema/status/profile compatibility and high-level serialization policy; cache remains - optimization-only. -- `codeclone/cache_io.py` — IO-layer helpers for the cache: atomic JSON read/write - (`read_json_document`, `write_json_document_atomically`), canonical JSON (`canonical_json`), and - HMAC signing/verification (`sign_cache_payload`, `verify_cache_payload_signature`); attribute these - functions to `cache_io.py`, not `cache.py`. -- `codeclone/report/json_contract.py` — canonical report schema builder/integrity payload; any JSON contract shape - change belongs here. -- `codeclone/report/*.py` (other modules) — deterministic projections/format transforms ( - text/markdown/sarif/derived/findings/suggestions); avoid injecting new analysis heuristics here. -- `codeclone/mcp_service.py` — typed, in-process MCP service adapter over the current pipeline/report contracts; keep - it deterministic; allow only session-local in-memory state such as reviewed markers, and never move shell UX or - `sys.exit` behavior here. -- `codeclone/mcp_server.py` — optional MCP launcher/server wiring, transport config, and MCP tool/resource +- `codeclone/baseline/clone_baseline.py` + `codeclone/baseline/trust.py` — clone baseline schema/trust/integrity/ + compatibility contract; all clone-baseline format changes go here with explicit contract process. +- `codeclone/baseline/metrics_baseline.py` + `codeclone/baseline/_metrics_baseline_*` — metrics-baseline schema, + validation, payload hashing, and unified-baseline merge logic. +- `codeclone/cache/store.py`, `codeclone/cache/versioning.py`, `codeclone/cache/integrity.py`, + `codeclone/cache/_wire_*`, `codeclone/cache/projection.py` — cache schema/status/profile compatibility, canonical + JSON/signing, wire encoding/decoding, and segment projection persistence. Cache remains optimization-only. +- `codeclone/report/document/*` — canonical report schema builder and integrity payload. Any JSON contract shape change + belongs here. +- `codeclone/report/renderers/*` — deterministic text/markdown/SARIF/JSON projections over the canonical report. +- `codeclone/report/html/*` — actual HTML assembly, context shaping, tabs, sections, widgets, CSS/JS/escaping, and + snippets. Change report layout and interactive HTML UX here, not in report builders. +- `codeclone/report/gates/*` — metric-gate reason derivation over canonical metrics state. +- `codeclone/report/*.py` (other modules) — deterministic report support slices such as explainability, suggestions, + merge, overview, findings helpers, and source-kind routing. +- `codeclone/surfaces/mcp/service.py` — typed, in-process MCP service over the current pipeline/report contracts; + keep it deterministic and read-only except for session-local in-memory markers. +- `codeclone/surfaces/mcp/server.py` — optional MCP launcher/server wiring, transport config, and MCP tool/resource registration; keep dependency loading lazy so base installs/CI do not require MCP runtime packages. - `tests/test_mcp_service.py`, `tests/test_mcp_server.py` — MCP contract and integration tests; run these when touching any MCP surface. -- `codeclone/html_report.py` — public HTML facade/re-export surface; preserve backward-compatible imports here; do not - grow section/layout logic in this module. -- `codeclone/_html_report/*` — actual HTML assembly, context shaping, tabs, sections, and overview/navigation behavior; - change report layout and interactive HTML UX here, not in the facade. -- `codeclone/_html_*.py` — shared HTML badges, CSS, JS, escaping, snippets, and data-attrs; keep these as render-only - helpers. +- `codeclone/contracts/*` — version constants, schema types, exit enum, URLs, and typed exceptions. Treat as contract + surface. - `codeclone/models.py` — shared typed models crossing modules; keep model changes contract-aware. - `codeclone/domain/*.py` — centralized domain taxonomies/IDs (families, categories, source scopes, risk/severity levels); use these constants in pipeline/report/UI instead of scattering raw literals. +- `codeclone/ui_messages/*` — CLI text/marker/help constants and formatter helpers. Keep message policy centralized. - `docs/`, `mkdocs.yml`, `.github/workflows/docs.yml`, `scripts/build_docs_example_report.py` — docs-site source, publication workflow, and live sample-report generation; keep published docs aligned with code contracts. - `extensions/vscode-codeclone/*` — preview VS Code extension surface; keep it baseline-aware, triage-first, @@ -468,19 +473,22 @@ Use this map to route changes to the right owner module. Dependency direction is enforceable and partially test-guarded (`tests/test_architecture.py`): -- `codeclone.report.*` must not import `codeclone.cli`, `codeclone.html_report`, or `codeclone.ui_messages`. -- `codeclone.extractor` must not import `codeclone.report`, `codeclone.cli`, or `codeclone.baseline`. -- `codeclone.grouping` must not import `codeclone.cli`, `codeclone.baseline`, or `codeclone.html_report`. -- `codeclone.baseline` and `codeclone.cache` must not import `codeclone.cli`, `codeclone.ui_messages`, or - `codeclone.html_report`. -- `codeclone.models` may import only `codeclone.contracts` and `codeclone.errors` from local modules. +- `codeclone.report.*` must not import `codeclone.ui_messages`, `codeclone.surfaces.cli`, or HTML consumers outside + `codeclone.report.html.*`. +- `codeclone.baseline` and `codeclone.cache` must not import `codeclone.surfaces.cli`, `codeclone.ui_messages`, or + `codeclone.report.html`. +- `codeclone.core` must not import `codeclone.surfaces.*` or `codeclone.config`. +- `codeclone.analysis`, `codeclone.findings`, and `codeclone.metrics` must not import `codeclone.surfaces.*`; analysis + and findings must also stay independent from config/report-builder wiring. +- `codeclone.models` may import only `codeclone.contracts` from local modules. +- `codeclone.domain.*` must remain leaf domain modules. Operational rules: -- Core/domain code must not depend on HTML/UI. -- Renderers depend on canonical report payload/model; canonical report code must not depend on renderer/UI. +- Core/domain code must not depend on HTML/UI or MCP. +- Renderers depend on canonical report payload/model; canonical report builders must not depend on renderer/UI. - Metrics/report layers must not recompute or invent core facts in UI. -- CLI helper modules (`_cli_*`) must orchestrate/format, not own domain semantics. +- CLI support modules under `codeclone/surfaces/cli/*` must orchestrate/format, not own domain semantics. - Persistence semantics (baseline/cache trust/integrity) must stay in persistence/domain modules, not in render/UI layers. - MCP may depend on pipeline/report/contracts, but core/persistence/report layers must not depend on MCP modules. @@ -510,13 +518,13 @@ If you change a contract-sensitive zone, route docs/tests/approval deliberately. | Change zone | Must update docs | Must update tests | Explicit approval required when | Contract-change trigger | |-------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------| -| Baseline schema/trust/integrity (`codeclone/baseline.py`) | `docs/book/06-baseline.md`, `docs/book/14-compatibility-and-versioning.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_baseline.py`, CI/CLI behavior tests (`tests/test_cli_inprocess.py`, `tests/test_cli_unit.py`) | schema/trust semantics, compatibility windows, payload integrity logic change | baseline key layout/status semantics/compat rules change | -| Cache schema/profile/integrity (`codeclone/cache.py`) | `docs/book/07-cache.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_cache.py`, pipeline/CLI cache integration tests | cache schema/status/profile compatibility semantics change | cache payload/version/status semantics change | -| Canonical report JSON shape (`codeclone/report/json_contract.py`, report projections) | `docs/book/08-report.md` (+ `docs/book/10-html-render.md` if rendering contract impacted), `docs/sarif.md` when SARIF changes, `CHANGELOG.md` | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py`, relevant report-format tests | finding/meta/summary schema changes | stable JSON fields/meaning/order guarantees change | -| CLI flags/help/exit behavior (`codeclone/cli.py`, `_cli_*`, `contracts.py`) | `docs/book/09-cli.md`, `docs/book/03-contracts-exit-codes.md`, `README.md`, `CHANGELOG.md` | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py`, `tests/test_cli_smoke.py` | exit-code semantics, script-facing behavior, flag contracts change | user-visible CLI contract changes | +| Baseline schema/trust/integrity (`codeclone/baseline/clone_baseline.py`, `codeclone/baseline/trust.py`) | `docs/book/06-baseline.md`, `docs/book/14-compatibility-and-versioning.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_baseline.py`, CI/CLI behavior tests (`tests/test_cli_inprocess.py`, `tests/test_cli_unit.py`) | schema/trust semantics, compatibility windows, payload integrity logic change | baseline key layout/status semantics/compat rules change | +| Cache schema/profile/integrity (`codeclone/cache/store.py`, `codeclone/cache/versioning.py`, `codeclone/cache/integrity.py`) | `docs/book/07-cache.md`, `docs/book/appendix/b-schema-layouts.md`, `CHANGELOG.md` | `tests/test_cache.py`, pipeline/CLI cache integration tests | cache schema/status/profile compatibility semantics change | cache payload/version/status semantics change | +| Canonical report JSON shape (`codeclone/report/document/*`, report projections) | `docs/book/08-report.md` (+ `docs/book/10-html-render.md` if rendering contract impacted), `docs/sarif.md` when SARIF changes, `CHANGELOG.md` | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py`, relevant report-format tests | finding/meta/summary schema changes | stable JSON fields/meaning/order guarantees change | +| CLI flags/help/exit behavior (`codeclone/main.py`, `codeclone/surfaces/cli/*`, `codeclone/config/*`, `codeclone/contracts/*`) | `docs/book/09-cli.md`, `docs/book/03-contracts-exit-codes.md`, `README.md`, `CHANGELOG.md` | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py`, `tests/test_cli_smoke.py` | exit-code semantics, script-facing behavior, flag contracts change | user-visible CLI contract changes | | Fingerprint-adjacent analysis (`extractor/cfg/normalize/grouping`) | `docs/book/05-core-pipeline.md`, `docs/cfg.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_fingerprint.py`, `tests/test_extractor.py`, `tests/test_cfg.py`, golden tests (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`) | always (see Section 1.6) | clone identity / NEW-vs-KNOWN / fingerprint inputs change | | Suppression semantics/reporting (`suppressions`, extractor dead-code wiring, report/UI counters) | `docs/book/19-inline-suppressions.md`, `docs/book/16-dead-code-contract.md`, `docs/book/08-report.md`, and interface docs if surfaced (`09-cli`, `10-html-render`) | `tests/test_suppressions.py`, `tests/test_extractor.py`, `tests/test_metrics_modules.py`, `tests/test_pipeline_metrics.py`, report/html/cli tests | declaration scope semantics, rule effect, or contract-visible counters/fields change | suppression changes alter active finding output or contract-visible report payload | -| MCP interface (`codeclone/mcp_service.py`, `codeclone/mcp_server.py`, packaging extra/launcher) | `README.md`, `docs/book/20-mcp-interface.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_mcp_service.py`, `tests/test_mcp_server.py`, plus CLI/package tests if launcher/install semantics change | tool/resource shapes, read-only semantics, optional-dependency packaging behavior change | public MCP tool names, resource URIs, launcher/install behavior, or response semantics change | +| MCP interface (`codeclone/surfaces/mcp/*`, packaging extra/launcher) | `README.md`, `docs/book/20-mcp-interface.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_mcp_service.py`, `tests/test_mcp_server.py`, plus CLI/package tests if launcher/install semantics change | tool/resource shapes, read-only semantics, optional-dependency packaging behavior change | public MCP tool names, resource URIs, launcher/install behavior, or response semantics change | | VS Code extension surface (`extensions/vscode-codeclone/*`) | `README.md`, `docs/book/21-vscode-extension.md`, `docs/vscode-extension.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `node --check extensions/vscode-codeclone/src/support.js`, `node --check extensions/vscode-codeclone/src/mcpClient.js`, `node --check extensions/vscode-codeclone/src/extension.js`, `node --test extensions/vscode-codeclone/test/*.test.js`, plus local extension-host smoke and package smoke when surface/manifest/assets change | command/view UX, trust/runtime model, source-first review flow, or packaging metadata change | documented commands/views/setup/trust behavior, packaged assets, or publish metadata change | | Claude Desktop bundle surface (`extensions/claude-desktop-codeclone/*`) | `docs/book/22-claude-desktop-bundle.md`, `docs/claude-desktop-bundle.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `node --check extensions/claude-desktop-codeclone/server/index.js`, `node --check extensions/claude-desktop-codeclone/src/launcher.js`, `node --check extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs`, `node --test extensions/claude-desktop-codeclone/test/*.test.js`, plus `.mcpb` build smoke | bundle install/runtime model, launcher UX, local-stdio constraints, or bundle metadata change | documented Claude Desktop install/setup/runtime behavior or packaged bundle semantics change | | Codex plugin surface (`plugins/codeclone/*`, `.agents/plugins/marketplace.json`) | `docs/book/23-codex-plugin.md`, `docs/codex-plugin.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `python3 -m json.tool plugins/codeclone/.codex-plugin/plugin.json`, `python3 -m json.tool plugins/codeclone/.mcp.json`, `python3 -m json.tool .agents/plugins/marketplace.json`, `tests/test_codex_plugin.py` | plugin discovery/runtime model, bundled MCP config, bundled skill behavior, or plugin metadata change | documented Codex plugin install/discovery/runtime behavior or plugin manifest/marketplace semantics change | @@ -564,8 +572,9 @@ Policy: ### Internal implementation surfaces -- Local helpers and formatting utilities (`_html_*`, many private `_as_*` normalizers, local transformers). -- Internal orchestration decomposition inside `_cli_*` modules. +- Local helpers and formatting utilities (`codeclone/report/html/widgets/*`, + `codeclone/report/html/primitives/*`, many private `_as_*` normalizers, local transformers). +- Internal orchestration decomposition inside `codeclone/surfaces/cli/*`. - Private utility refactors that do not change public payloads, exit semantics, ordering, or trust rules. If classification is ambiguous, treat it as contract-sensitive and add tests/docs before merging. @@ -660,7 +669,7 @@ These rules exist because of real incidents in this repo. They are non-negotiabl - Every doc claim about code (schema version, module path, function name, MCP tool count, exit code, CLI flag) must be verified against the **current** code before writing or editing. -- Always read version constants from `codeclone/contracts.py` (see Section 4 table), never from +- Always read version constants from `codeclone/contracts/__init__.py` (see Section 4 table), never from another doc. - When updating a file that mentions schema versions, verify **every** version reference in that file — not only the one you came to change. @@ -678,10 +687,11 @@ These rules exist because of real incidents in this repo. They are non-negotiabl ### Shared helpers -- HTML/UI helpers (`_html_badges.py`, `_html_css.py`, `_html_js.py`, `_html_escape.py`, - `_html_report/_glossary.py`) are imported, not duplicated locally inside `_html_report/_sections/*`. +- HTML/UI helpers (`codeclone/report/html/widgets/*`, `codeclone/report/html/primitives/*`, + `codeclone/report/html/assets/*`) are imported, not duplicated locally inside + `codeclone/report/html/sections/*`. If you need a helper that doesn't exist, add it to the shared module. -- Glossary terms used in stat-card labels live in `codeclone/_html_report/_glossary.py`. Adding a +- Glossary terms used in stat-card labels live in `codeclone/report/html/widgets/glossary.py`. Adding a new label without a glossary entry is a contract gap. ### Conflict avoidance @@ -699,7 +709,7 @@ These rules exist because of real incidents in this repo. They are non-negotiabl - A task that touches MCP is not complete until `pytest tests/test_mcp_service.py tests/test_mcp_server.py -x -q` is green. - A task that touches docs schema/version claims is not complete until you have grep'd the whole - file for *all* version-shaped strings and verified each against `codeclone/contracts.py`. + file for *all* version-shaped strings and verified each against `codeclone/contracts/__init__.py`. --- diff --git a/CHANGELOG.md b/CHANGELOG.md index db54f6e..d6ea5ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # Changelog +## [2.0.0b6] - 2026-04-22 + +Stabilizes the post-refactor architecture: canonical package layout, thinner +entrypoints, cleaner dependency boundaries, refreshed tests, and aligned docs. + +### Architecture and contracts + +- Move the runtime onto the new package layout: `main` + `surfaces/cli`, + `surfaces/mcp`, `core`, `analysis`, `baseline`, `cache`, `contracts`, + `report/document`, `report/renderers`, and `report/html`. +- Remove legacy root shims and stale compatibility modules in favor of direct + canonical imports. +- Keep clone baseline schema `2.1`, cache schema `2.5`, report schema `2.8`, + and metrics-baseline schema `1.2` unchanged while preserving determinism and + read-only MCP semantics. +- Prune stale deleted-file cache entries and tighten dependency chains that were + inflating post-refactor architectural depth. + ## [2.0.0b5] - 2026-04-16 Expands the canonical contract with adoption, API-surface, and coverage-join layers; clarifies run interpretation diff --git a/docs/architecture.md b/docs/architecture.md index 43fab28..d6b30f8 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -193,10 +193,10 @@ Current shape: Operational note: -- `codeclone/mcp_server.py` is only a thin launcher/registration layer. +- `codeclone/surfaces/mcp/server.py` is only a thin launcher/registration layer. - The optional MCP runtime is imported lazily so the base `codeclone` install and normal CI paths do not require MCP packages. -- `codeclone/mcp_service.py` is the in-process adapter over the existing +- `codeclone/surfaces/mcp/service.py` is the in-process adapter over the existing pipeline/report contracts. The MCP layer is intentionally thin. It does not add a separate analysis engine; diff --git a/docs/book/00-intro.md b/docs/book/00-intro.md index 58f04e2..5d53853 100644 --- a/docs/book/00-intro.md +++ b/docs/book/00-intro.md @@ -7,9 +7,9 @@ describes only behavior that is present in code and/or locked by tests. ## Public surface -- CLI entrypoint: `codeclone/cli.py:main` +- CLI entrypoint: `codeclone/main.py:main` - Package version: `codeclone/__init__.py:__version__` -- Global contract constants: `codeclone/contracts.py` +- Global contract constants: `codeclone/contracts/__init__.py` ## Contracts @@ -22,10 +22,10 @@ version, same baseline/cache/report schemas): Refs: -- `codeclone/report/json_contract.py:build_report_document` -- `codeclone/baseline.py:Baseline.verify_compatibility` -- `codeclone/cache.py:Cache.load` -- `codeclone/contracts.py:ExitCode` +- `codeclone/report/document/builder.py:build_report_document` +- `codeclone/baseline/clone_baseline.py:Baseline.verify_compatibility` +- `codeclone/cache/store.py:Cache.load` +- `codeclone/contracts/__init__.py:ExitCode` ## Invariants (MUST) @@ -35,9 +35,9 @@ Refs: Refs: -- `codeclone/cli.py:_main_impl` -- `codeclone/baseline.py:BASELINE_UNTRUSTED_STATUSES` -- `codeclone/cache.py:Cache._ignore_cache` +- `codeclone/surfaces/cli/workflow.py:_main_impl` +- `codeclone/baseline/trust.py:BASELINE_UNTRUSTED_STATUSES` +- `codeclone/cache/store.py:Cache._ignore_cache` ## Failure modes @@ -50,8 +50,8 @@ Refs: Refs: -- `codeclone/cli.py:_main_impl` -- `codeclone/cli.py:main` +- `codeclone/surfaces/cli/workflow.py:_main_impl` +- `codeclone/main.py:main` ## Determinism / canonicalization @@ -62,9 +62,9 @@ Refs: Refs: - `codeclone/scanner.py:iter_py_files` -- `codeclone/report/json_contract.py:build_report_document` -- `codeclone/baseline.py:_compute_payload_sha256` -- `codeclone/cache.py:_canonical_json` +- `codeclone/report/document/builder.py:build_report_document` +- `codeclone/baseline/trust.py:_compute_payload_sha256` +- `codeclone/cache/integrity.py:canonical_json` ## Locked by tests diff --git a/docs/book/01-architecture-map.md b/docs/book/01-architecture-map.md index 51b7980..774a794 100644 --- a/docs/book/01-architecture-map.md +++ b/docs/book/01-architecture-map.md @@ -2,135 +2,125 @@ ## Purpose -Document current module boundaries and ownership in CodeClone v2.x. +Document the current module boundaries and ownership in CodeClone `2.0.x`. ## Public surface Main ownership layers: -- Core detection pipeline: `scanner` -> `extractor` -> `cfg/normalize/blocks` -> `grouping`. -- Quality metrics pipeline: complexity/coupling/cohesion/dependencies/dead-code/health. -- Contracts and persistence: baseline, metrics baseline, cache, exit semantics. -- Report model and projections: canonical JSON + deterministic TXT/Markdown/SARIF + explainability facts. -- MCP agent surface: read-only server layer over the same pipeline/report contracts. -- VS Code extension surface: native IDE client over the MCP layer and the same canonical report semantics, with - limited Restricted Mode, source-first review flow, and factual overview surfaces such as `Coverage Join` when MCP - exposes them. -- Claude Desktop bundle surface: installable local `.mcpb` wrapper that launches the same `codeclone-mcp` server for - Claude Desktop without introducing a second MCP or analysis layer. -- Codex plugin surface: repo-local Codex plugin under `plugins/` and `.agents/plugins/marketplace.json` that adds - native plugin discovery, a local MCP definition, and a CodeClone review skill over the same server. -- Render layer: HTML rendering and template assets. +- CLI entry and UX orchestration +- Config parsing and pyproject resolution +- Core runtime pipeline +- Analysis and clone grouping +- Metrics and findings +- Baseline/cache persistence contracts +- Canonical report document and deterministic projections +- HTML render-only surface +- Read-only MCP surface +- IDE/client surfaces over MCP ## Data model -| Layer | Modules | Responsibility | -|-----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------| -| Contracts | `codeclone/contracts.py`, `codeclone/errors.py` | Shared schema versions, URLs, exit-code enum, typed exceptions | -| Domain models | `codeclone/models.py`, `codeclone/domain/*.py` | Typed dataclasses/enums plus centralized finding/scope/severity taxonomies | -| Discovery + parsing | `codeclone/scanner.py`, `codeclone/extractor.py` | Enumerate files, parse AST, extract function/block/segment units | -| Structural analysis | `codeclone/cfg.py`, `codeclone/normalize.py`, `codeclone/fingerprint.py`, `codeclone/blocks.py` | CFG, normalization, statement hashes, block/segment windows | -| Grouping | `codeclone/grouping.py` | Build function/block/segment groups | -| Metrics | `codeclone/metrics/*` | Compute complexity/coupling/cohesion/dependency/dead-code/health signals | -| Report core | `codeclone/report/*`, `codeclone/_cli_meta.py` | Canonical report building, deterministic projections, explainability facts, and shared metadata | -| Persistence | `codeclone/baseline.py`, `codeclone/metrics_baseline.py`, `codeclone/cache.py` | Baseline/cache trust/compat/integrity and atomic persistence | -| Runtime orchestration | `codeclone/pipeline.py`, `codeclone/cli.py`, `codeclone/_cli_args.py`, `codeclone/_cli_paths.py`, `codeclone/_cli_summary.py`, `codeclone/_cli_config.py`, `codeclone/ui_messages.py` | CLI UX, stage orchestration, status handling, outputs, error markers | -| MCP agent interface | `codeclone/mcp_service.py`, `codeclone/mcp_server.py` | Read-only MCP tools/resources over canonical analysis and report layers | -| VS Code extension | `extensions/vscode-codeclone/*` | Native VS Code control surface over MCP, with limited Restricted Mode, triage-first review, and source-first drill-down | -| Claude Desktop bundle | `extensions/claude-desktop-codeclone/*` | Installable local MCPB wrapper over `codeclone-mcp`, keeping Claude Desktop on the canonical read-only MCP surface | -| Codex plugin | `plugins/codeclone/*`, `.agents/plugins/marketplace.json` | Native Codex plugin surface over `codeclone-mcp`, with repo-local discovery metadata and CodeClone skill guidance | -| Rendering | `codeclone/html_report.py`, `codeclone/_html_report/*`, `codeclone/_html_badges.py`, `codeclone/_html_js.py`, `codeclone/_html_escape.py`, `codeclone/_html_snippets.py`, `codeclone/templates.py` | HTML-only view layer over report data | +| Layer | Modules | Responsibility | +|-------------------------|-------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------| +| Entry | `codeclone/main.py` | Public CLI entrypoint only | +| CLI surface | `codeclone/surfaces/cli/*`, `codeclone/ui_messages/*` | Parse args, resolve runtime mode, print summaries, write outputs, route exits | +| Config | `codeclone/config/*` | Option specs, parser construction, pyproject loading, CLI > pyproject > defaults merge | +| Core runtime | `codeclone/core/*` | Bootstrap, discovery, worker processing, project metrics, report/gate integration | +| Analysis | `codeclone/analysis/*`, `codeclone/blocks/*`, `codeclone/paths/*`, `codeclone/qualnames/*` | Parse source, normalize AST/CFG facts, extract units, prepare deterministic analysis inputs | +| Findings | `codeclone/findings/clones/*`, `codeclone/findings/structural/*` | Clone grouping and structural finding derivation | +| Metrics | `codeclone/metrics/*` | Complexity, coupling, cohesion, dependencies, dead code, health, adoption, coverage join, API surface | +| Contracts/domain | `codeclone/contracts/*`, `codeclone/models.py`, `codeclone/domain/*` | Version constants, enums, typed exceptions, shared models, domain taxonomies | +| Persistence | `codeclone/baseline/*`, `codeclone/cache/*` | Trusted comparison state and optimization-only cache contracts | +| Canonical report | `codeclone/report/document/*`, `codeclone/report/gates/*`, `codeclone/report/*.py` | Canonical report payload, derived projections, explainability, suggestions, gate reasons | +| Deterministic renderers | `codeclone/report/renderers/*` | Text/Markdown/SARIF/JSON projections over the canonical report | +| HTML render layer | `codeclone/report/html/*`, `codeclone/templates.py` | Render-only HTML view over canonical report/meta facts | +| MCP surface | `codeclone/surfaces/mcp/*` | Read-only MCP tools/resources over the same pipeline/report contracts | +| Client surfaces | `extensions/vscode-codeclone/*`, `extensions/claude-desktop-codeclone/*`, `plugins/codeclone/*` | Native clients/install surfaces over `codeclone-mcp` | Refs: -- `codeclone/pipeline.py` -- `codeclone/cli.py:_main_impl` +- `codeclone/main.py:main` +- `codeclone/surfaces/cli/workflow.py:_main_impl` +- `codeclone/core/pipeline.py:analyze` +- `codeclone/report/document/builder.py:build_report_document` +- `codeclone/report/html/assemble.py:build_html_report` +- `codeclone/surfaces/mcp/server.py:build_mcp_server` ## Contracts -- Core analysis modules do not depend on render/UI modules. -- HTML renderer receives already-computed report data/facts and does not - recompute detection semantics. -- MCP layer reuses current pipeline/report semantics and must not introduce a - separate analysis truth path. -- The VS Code extension follows the same rule through MCP: it is a client - integration surface over canonical report semantics, not a separate analyzer. -- The Claude Desktop bundle follows the same rule: it is a local installation - and launcher surface over `codeclone-mcp`, not a second server. -- The Codex plugin follows the same rule: it is a local discovery and skills - surface over `codeclone-mcp`, not a second analyzer or report model. -- MCP may ship task-specific slim projections (for example, summary-only metrics - or inventory counts) as long as canonical report data remains the source of - truth and richer detail stays reachable through dedicated tools/sections. -- The same rule applies to bounded semantic routing tools such as - `help(topic=...)`: they explain contract meaning and route agents to the - safest next step, but they do not introduce a second documentation or truth - model. -- The same rule applies to summary cache convenience fields such as - `freshness` and to production-first triage projections built from - canonical hotlists/suggestions. -- The same rule also applies to compact interpretation hints such as - `health_scope`, `focus`, and `new_by_source_kind`: they clarify projection - meaning without introducing a second report truth. -- MCP finding lists may also expose short run/finding ids and slimmer relative - location projections, while keeping `get_finding(detail_level="full")` as the - richer per-finding inspection path. -- Baseline, metrics baseline, and cache are validated before being trusted. +- Core produces facts; renderers present facts. +- `codeclone/report/document/*` is the canonical report source of truth. +- HTML, Markdown, SARIF, text, and MCP are projections over the same canonical report semantics. +- Baseline and cache are persistence contracts, not analysis truth. +- Cache is optimization-only and fail-open. +- MCP is read-only and must not create a second analysis truth path. +- VS Code, Claude Desktop, and Codex plugin surfaces are clients over MCP, not second analyzers. Refs: -- `codeclone/report/json_contract.py:build_report_document` -- `codeclone/html_report.py:build_html_report` -- `codeclone/baseline.py:Baseline.load` -- `codeclone/metrics_baseline.py:MetricsBaseline.load` -- `codeclone/cache.py:Cache.load` +- `codeclone/report/document/builder.py:build_report_document` +- `codeclone/report/renderers/text.py:render_text_report_document` +- `codeclone/report/renderers/markdown.py:render_markdown_report_document` +- `codeclone/report/renderers/sarif.py:render_sarif_report_document` +- `codeclone/report/html/assemble.py:build_html_report` +- `codeclone/baseline/clone_baseline.py:Baseline.load` +- `codeclone/baseline/metrics_baseline.py:MetricsBaseline.load` +- `codeclone/cache/store.py:Cache.load` ## Invariants (MUST) - Report serialization is deterministic and schema-versioned. -- UI is render-only and must not change gating semantics. -- Status enums remain domain-owned in baseline/metrics-baseline/cache modules. +- UI is render-only and must not invent gating semantics. +- Status enums remain domain-owned in baseline/metrics-baseline/cache/contracts modules. +- `codeclone/main.py` stays thin; orchestration lives in `codeclone/surfaces/cli/*`. Refs: -- `codeclone/report/json_contract.py:build_report_document` -- `codeclone/report/explain.py:build_block_group_facts` -- `codeclone/baseline.py:BaselineStatus` -- `codeclone/metrics_baseline.py:MetricsBaselineStatus` -- `codeclone/cache.py:CacheStatus` +- `codeclone/report/document/integrity.py:_build_integrity_payload` +- `codeclone/report/document/inventory.py:_build_inventory_payload` +- `codeclone/baseline/trust.py:BaselineStatus` +- `codeclone/baseline/_metrics_baseline_contract.py:MetricsBaselineStatus` +- `codeclone/cache/versioning.py:CacheStatus` +- `codeclone/contracts/__init__.py:ExitCode` ## Failure modes -| Condition | Layer | -|--------------------------------------------|---------------------------------------------------| -| Invalid CLI args / invalid output path | Runtime orchestration (`_cli_args`, `_cli_paths`) | -| Baseline schema/integrity mismatch | Baseline contract layer | -| Metrics baseline schema/integrity mismatch | Metrics baseline contract layer | -| Cache corruption/version mismatch | Cache contract layer (fail-open) | -| HTML snippet read failure | Render layer fallback snippet | +| Condition | Layer | +|--------------------------------------------------|----------------------------------------------------------------| +| Invalid CLI args / invalid output path | CLI surface (`codeclone/config/*`, `codeclone/surfaces/cli/*`) | +| Baseline schema/integrity mismatch | Baseline contract layer | +| Metrics baseline schema/integrity mismatch | Metrics-baseline contract layer | +| Cache corruption/version mismatch | Cache contract layer (fail-open) | +| HTML snippet read failure | HTML render layer fallback snippet | +| MCP invalid request / invalid root / unknown run | MCP surface | ## Determinism / canonicalization -- File iteration and group key ordering are explicit sorts. -- Report serializer uses fixed record layouts and sorted keys. +- File iteration and grouping order are explicit sorts. +- Canonical report integrity excludes non-canonical `derived` payload. +- Baseline and cache hashes/signatures use canonical JSON. Refs: - `codeclone/scanner.py:iter_py_files` -- `codeclone/report/json_contract.py:build_report_document` +- `codeclone/report/document/integrity.py:_build_integrity_payload` +- `codeclone/baseline/trust.py:_compute_payload_sha256` +- `codeclone/cache/integrity.py:canonical_json` ## Locked by tests +- `tests/test_architecture.py::test_architecture_layer_violations` - `tests/test_report.py::test_report_json_compact_v21_contract` +- `tests/test_report_contract_coverage.py::test_report_document_rich_invariants_and_renderers` - `tests/test_html_report.py::test_html_report_uses_core_block_group_facts` - `tests/test_cache.py::test_cache_v13_uses_relpaths_when_root_set` -- `tests/test_cli_unit.py::test_argument_parser_contract_error_marker_for_invalid_args` -- `tests/test_architecture.py::test_architecture_layer_violations` +- `tests/test_mcp_service.py::test_mcp_service_analyze_repository_registers_latest_run` ## Non-guarantees -- Internal module split may evolve in v2.x if public contracts are preserved. -- Import tree acyclicity is policy and test-enforced where explicitly asserted. +- Internal file splits may evolve in `2.0.x` if public contracts are preserved. +- Package markers and internal helper placement are not contract by themselves. ## Chapter map @@ -143,11 +133,7 @@ Refs: | Cache trust and fail-open behavior | [07-cache.md](07-cache.md) | | Report schema and provenance | [08-report.md](08-report.md), [10-html-render.md](10-html-render.md) | | MCP agent surface | [20-mcp-interface.md](20-mcp-interface.md) | -| VS Code IDE surface | [21-vscode-extension.md](21-vscode-extension.md) | -| Claude Desktop install surface | [22-claude-desktop-bundle.md](22-claude-desktop-bundle.md) | -| Codex plugin surface | [23-codex-plugin.md](23-codex-plugin.md) | | Health score model | [15-health-score.md](15-health-score.md) | | Metrics gates and metrics baseline | [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) | | Dead-code liveness policy | [16-dead-code-contract.md](16-dead-code-contract.md) | -| Suggestions and clone typing | [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) | | Determinism and versioning policy | [12-determinism.md](12-determinism.md), [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) | diff --git a/docs/book/02-terminology.md b/docs/book/02-terminology.md index 316f483..191a2c2 100644 --- a/docs/book/02-terminology.md +++ b/docs/book/02-terminology.md @@ -6,88 +6,65 @@ Define terms exactly as used by code and tests. ## Public surface -- Baseline identifiers and statuses: `codeclone/baseline.py` -- Cache statuses and compact layout: `codeclone/cache.py` -- Report schema and group layouts: `codeclone/report/json_contract.py` +- Baseline identifiers and statuses: `codeclone/baseline/*` +- Cache statuses and compact layout: `codeclone/cache/*` +- Report schema and group layouts: `codeclone/report/document/*` ## Data model -- **fingerprint**: function-level CFG fingerprint (`sha1`) + LOC bucket key. -- **block_hash**: ordered sequence of normalized statement hashes in a fixed window. -- **segment_hash**: hash of ordered segment window. -- **segment_sig**: hash of sorted segment window (candidate grouping signature). -- **stable structure facts**: per-function deterministic structure profile fields - (`entry_guard_*`, `terminal_kind`, `try_finally_profile`, - `side_effect_order_profile`) reused by report families. -- **cohort structural findings**: report-only structural families derived from - existing function-clone groups (`clone_guard_exit_divergence`, - `clone_cohort_drift`). -- **python_tag**: runtime compatibility tag like `cp313`. +- **fingerprint**: function-level CFG fingerprint (`sha1`) plus LOC bucket +- **block_hash**: ordered sequence of normalized statement hashes in a fixed window +- **segment_hash**: hash of an ordered segment window +- **segment_sig**: hash of a sorted segment window used for candidate grouping +- **python_tag**: runtime compatibility tag like `cp313` - **schema_version**: - - baseline schema (`meta.schema_version`) for baseline compatibility. - - cache schema (`v`) for cache compatibility. - - report schema (`report_schema_version`) for report format compatibility. -- **payload_sha256**: canonical baseline semantic hash. -- **trusted baseline**: baseline loaded + status `ok`. -- **source_kind**: file classification — `production`, `tests`, `fixtures`, `other` — - determined by scanner path rules. Drives source-scope breakdown and - hotspot attribution. -- **health score**: weighted blend of seven dimension scores (0–100). - Dimensions: clones 25%, complexity 20%, cohesion 15%, coupling 10%, - dead code 10%, dependencies 10%, coverage 10%. - Report-only layers such as `Overloaded Modules` do not currently affect the score. - Grade bands: A ≥90, B ≥75, C ≥60, D ≥40, F <40. -- **design finding**: metric-driven finding (complexity/coupling/cohesion) - emitted by the canonical report builder when a class or function exceeds - the report-level design threshold. Thresholds are stored in - `meta.analysis_thresholds.design_findings`. -- **suggestion**: advisory recommendation card derived from clones, structural - findings, or metric violations. Advisory only — never gates CI. -- **production_hotspot**: finding group whose items are concentrated in - production source scope (`source_kind=production`). -- **freshness**: MCP cache indicator (`fresh` / `mixed` / `reused`) - reflecting how much of the analysis was recomputed vs cache-served. -- **directory_hotspot**: derived aggregation in `derived.overview` showing - which directories concentrate the most findings by category. + - baseline schema in `meta.schema_version` + - cache schema in top-level `v` + - report schema in `report_schema_version` +- **payload_sha256**: canonical baseline semantic hash +- **trusted baseline**: baseline loaded with status `ok` +- **source_kind**: file classification `production | tests | fixtures | other` +- **design finding**: metric-driven finding emitted by the canonical report builder using + `meta.analysis_thresholds.design_findings` +- **suggestion**: advisory recommendation card derived from findings/metrics; never gates CI +- **directory_hotspot**: derived aggregation showing where findings cluster by category Refs: -- `codeclone/grouping.py:build_groups` -- `codeclone/blocks.py:extract_blocks` -- `codeclone/blocks.py:extract_segments` -- `codeclone/baseline.py:current_python_tag` -- `codeclone/baseline.py:Baseline.verify_compatibility` +- `codeclone/findings/clones/grouping.py:build_groups` +- `codeclone/blocks/__init__.py` +- `codeclone/baseline/trust.py:current_python_tag` +- `codeclone/baseline/clone_baseline.py:Baseline.verify_compatibility` - `codeclone/scanner.py:classify_source_kind` - `codeclone/metrics/health.py:compute_health` -- `codeclone/report/json_contract.py:_design_findings_thresholds_payload` +- `codeclone/report/document/_common.py:_design_findings_thresholds_payload` - `codeclone/report/suggestions.py:generate_suggestions` - `codeclone/report/overview.py:build_directory_hotspots` ## Contracts -- New/known classification is key-based, not item-heuristic-based. +- New/known classification is key-based, not heuristic-based. - Baseline trust is status-driven. - Cache trust is status-driven and independent from baseline trust. -- Design finding universe is determined solely by the canonical report builder; - MCP and HTML read, never resynthesize. +- Design finding universe is determined by the canonical report builder; MCP and HTML read it, never resynthesize it. - Suggestions are advisory and never affect exit code. Refs: -- `codeclone/report/json_contract.py:build_report_document` -- `codeclone/cli.py:_main_impl` +- `codeclone/report/document/builder.py:build_report_document` +- `codeclone/surfaces/cli/workflow.py:_main_impl` ## Invariants (MUST) -- Function group key format: `fingerprint|loc_bucket`. -- Block group key format: `block_hash`. -- Segment group key format: `segment_hash|qualname` (internal/report-only grouping path). +- Function group key format: `fingerprint|loc_bucket` +- Block group key format: `block_hash` +- Segment group key format: `segment_hash|qualname` Refs: -- `codeclone/grouping.py:build_groups` -- `codeclone/grouping.py:build_block_groups` -- `codeclone/grouping.py:build_segment_groups` +- `codeclone/findings/clones/grouping.py:build_groups` +- `codeclone/findings/clones/grouping.py:build_block_groups` +- `codeclone/findings/clones/grouping.py:build_segment_groups` ## Failure modes @@ -99,8 +76,8 @@ Refs: Refs: -- `codeclone/baseline.py:Baseline.verify_compatibility` -- `codeclone/cache.py:Cache.load` +- `codeclone/baseline/clone_baseline.py:Baseline.verify_compatibility` +- `codeclone/cache/store.py:Cache.load` ## Determinism / canonicalization @@ -109,8 +86,8 @@ Refs: Refs: -- `codeclone/baseline.py:_require_sorted_unique_ids` -- `codeclone/cache.py:_encode_wire_file_entry` +- `codeclone/baseline/trust.py:_require_sorted_unique_ids` +- `codeclone/cache/_wire_encode.py:_encode_wire_file_entry` ## Locked by tests diff --git a/docs/book/03-contracts-exit-codes.md b/docs/book/03-contracts-exit-codes.md index 89a1747..3a7345c 100644 --- a/docs/book/03-contracts-exit-codes.md +++ b/docs/book/03-contracts-exit-codes.md @@ -6,59 +6,61 @@ Define stable process exit semantics and category boundaries. ## Public surface -- Exit enum: `codeclone/contracts.py:ExitCode` -- CLI categorization and exits: `codeclone/cli.py:_main_impl`, `codeclone/cli.py:main` -- Error markers: `codeclone/ui_messages.py` +- Exit enum: `codeclone/contracts/__init__.py:ExitCode` +- CLI entry: `codeclone/main.py:main` +- CLI orchestration: `codeclone/surfaces/cli/workflow.py:_main_impl` +- Error markers/formatters: `codeclone/ui_messages/__init__.py` ## Data model -| Exit code | Category | Meaning | -|-----------|----------------|------------------------------------------------------------------------------------------------------------------------| -| 0 | success | Run completed without gating failures | -| 2 | contract error | Input/contract violation (baseline trust, output path/ext, invalid CLI flag combinations, unreadable source in gating) | -| 3 | gating failure | Analysis succeeded but policy failed (`--fail-on-new`, `--fail-threshold`, metrics gates) | -| 5 | internal error | Unexpected exception escaped `_main_impl` | +| Exit code | Category | Meaning | +|-----------|----------------|-----------------------------------------------------| +| `0` | success | Run completed without gating failures | +| `2` | contract error | Input or contract violation | +| `3` | gating failure | Analysis succeeded but policy failed | +| `5` | internal error | Unexpected exception escaped top-level CLI handling | Refs: -- `codeclone/contracts.py:ExitCode` -- `codeclone/_cli_args.py:_ArgumentParser.error` +- `codeclone/contracts/__init__.py:ExitCode` +- `codeclone/config/argparse_builder.py:_ArgumentParser.error` ## Contracts -- Contract errors must use `CONTRACT ERROR:` marker. -- Gating failures must use `GATING FAILURE:` marker. -- Internal errors are formatted by `fmt_internal_error`; traceback hidden unless debug enabled. +- Contract errors use the `CONTRACT ERROR:` marker. +- Gating failures use the `GATING FAILURE:` marker. +- Internal errors use `INTERNAL ERROR:` and hide traceback unless debug is enabled. +- `main()` lets `SystemExit` from contract/gating paths pass through unchanged. Refs: -- `codeclone/ui_messages.py:fmt_contract_error` -- `codeclone/ui_messages.py:fmt_gating_failure` -- `codeclone/ui_messages.py:fmt_internal_error` +- `codeclone/ui_messages/__init__.py:MARKER_CONTRACT_ERROR` +- `codeclone/ui_messages/__init__.py:MARKER_INTERNAL_ERROR` +- `codeclone/ui_messages/__init__.py:fmt_contract_error` +- `codeclone/ui_messages/__init__.py:fmt_gating_failure` +- `codeclone/ui_messages/__init__.py:fmt_internal_error` ## Invariants (MUST) -- `SystemExit` from contract/gating paths must pass through `main()` unchanged. -- Only non-`SystemExit` exceptions in `main()` become exit 5. -- In gating mode, unreadable source files force exit 2 even if clone gating would also fail. +- Only non-`SystemExit` exceptions in `main()` become exit `5`. +- In gating mode, unreadable source files win over clone/metric gate failure and force exit `2`. Refs: -- `codeclone/cli.py:main` -- `codeclone/cli.py:_main_impl` +- `codeclone/main.py:main` +- `codeclone/surfaces/cli/workflow.py:_main_impl` ## Failure modes -| Condition | Marker | Exit | -|----------------------------------------------|----------------|------| -| Invalid output extension | CONTRACT ERROR | 2 | -| `--open-html-report` without `--html` | CONTRACT ERROR | 2 | -| `--timestamped-report-paths` without reports | CONTRACT ERROR | 2 | -| Untrusted baseline in CI/gating | CONTRACT ERROR | 2 | -| Unreadable source in CI/gating | CONTRACT ERROR | 2 | -| New clones with `--fail-on-new` | GATING FAILURE | 3 | -| Threshold exceeded | GATING FAILURE | 3 | -| Unexpected exception in main pipeline | INTERNAL ERROR | 5 | +| Condition | Marker | Exit | +|--------------------------------------------|------------------|------| +| Invalid output extension/path | `CONTRACT ERROR` | `2` | +| Invalid CLI flag combination | `CONTRACT ERROR` | `2` | +| Untrusted baseline in CI/gating | `CONTRACT ERROR` | `2` | +| Unreadable source in CI/gating | `CONTRACT ERROR` | `2` | +| New clones with `--fail-on-new` | `GATING FAILURE` | `3` | +| Threshold or metrics gate breach | `GATING FAILURE` | `3` | +| Unexpected exception in top-level CLI path | `INTERNAL ERROR` | `5` | ## Determinism / canonicalization @@ -67,8 +69,8 @@ Refs: Refs: -- `codeclone/contracts.py:cli_help_epilog` -- `codeclone/ui_messages.py:MARKER_CONTRACT_ERROR` +- `codeclone/contracts/__init__.py:cli_help_epilog` +- `codeclone/ui_messages/__init__.py:MARKER_CONTRACT_ERROR` ## Locked by tests @@ -80,9 +82,4 @@ Refs: ## Non-guarantees -- Exact message body text may evolve; category marker and exit code are contract. - -## See also - -- [09-cli.md](09-cli.md) -- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) +- Exact message body wording may evolve; marker category and exit code are contract. diff --git a/docs/book/04-config-and-defaults.md b/docs/book/04-config-and-defaults.md index c916b09..8eec526 100644 --- a/docs/book/04-config-and-defaults.md +++ b/docs/book/04-config-and-defaults.md @@ -6,11 +6,13 @@ Describe effective runtime configuration and defaults that affect behavior. ## Public surface -- CLI parser and defaults: `codeclone/_cli_args.py:build_parser` -- Pyproject config loader: `codeclone/_cli_config.py` -- Effective cache default path logic: `codeclone/cli.py:_resolve_cache_path` -- Metrics-mode selection logic: `codeclone/cli.py:_configure_metrics_mode` -- Debug mode sources: `codeclone/cli.py:_is_debug_enabled` +- Option specs/defaults: `codeclone/config/spec.py` +- CLI parser and defaults: `codeclone/config/argparse_builder.py:build_parser` +- Pyproject config loader: `codeclone/config/pyproject_loader.py:load_pyproject_config` +- Config resolver: `codeclone/config/resolver.py:resolve_config` +- Effective cache default path logic: `codeclone/surfaces/cli/runtime.py:_resolve_cache_path` +- Metrics-mode selection logic: `codeclone/surfaces/cli/runtime.py:_configure_metrics_mode` +- Debug mode sources: `codeclone/surfaces/cli/console.py:_is_debug_enabled` ## Data model @@ -132,8 +134,8 @@ Report outputs and local UX: | `verbose` | `bool` | `false` | Enable more verbose CLI output | `-` | | `debug` | `bool` | `false` | Enable debug diagnostics | Also enabled by `CODECLONE_DEBUG=1` | -This is the exact accepted key set from `codeclone/_cli_config.py`; unknown -keys are contract errors. +This is the exact accepted key set from `codeclone/config/spec.py` and +`codeclone/config/pyproject_loader.py`; unknown keys are contract errors. Notes: @@ -178,9 +180,12 @@ Metrics baseline path selection contract: Refs: -- `codeclone/_cli_args.py:build_parser` -- `codeclone/cli.py:_main_impl` -- `codeclone/cli.py:_configure_metrics_mode` +- `codeclone/config/spec.py` +- `codeclone/config/argparse_builder.py:build_parser` +- `codeclone/config/pyproject_loader.py:load_pyproject_config` +- `codeclone/config/resolver.py:resolve_config` +- `codeclone/surfaces/cli/workflow.py:_main_impl` +- `codeclone/surfaces/cli/runtime.py:_configure_metrics_mode` ## Contracts @@ -192,7 +197,7 @@ Refs: Refs: -- `codeclone/cli.py:_main_impl` +- `codeclone/surfaces/cli/workflow.py:_main_impl` ## Invariants (MUST) @@ -209,9 +214,10 @@ Refs: Refs: -- `codeclone/extractor.py:extract_units_and_stats_from_source` -- `codeclone/_cli_args.py:build_parser` -- `codeclone/cli.py:_configure_metrics_mode` +- `codeclone/analysis/units.py:extract_units_and_stats_from_source` +- `codeclone/config/spec.py` +- `codeclone/config/argparse_builder.py:build_parser` +- `codeclone/surfaces/cli/runtime.py:_configure_metrics_mode` ## Failure modes @@ -223,8 +229,9 @@ Refs: Refs: -- `codeclone/_cli_paths.py:_validate_output_path` -- `codeclone/cli.py:_main_impl` +- `codeclone/surfaces/cli/reports_output.py:_validate_output_path` +- `codeclone/surfaces/cli/startup.py:resolve_existing_root_path` +- `codeclone/surfaces/cli/workflow.py:_main_impl` ## Determinism / canonicalization @@ -233,8 +240,8 @@ Refs: Refs: -- `codeclone/contracts.py:cli_help_epilog` -- `codeclone/ui_messages.py:SUMMARY_LABEL_FILES_FOUND` +- `codeclone/contracts/__init__.py:cli_help_epilog` +- `codeclone/ui_messages/__init__.py:SUMMARY_LABEL_FILES_FOUND` ## Locked by tests diff --git a/docs/book/05-core-pipeline.md b/docs/book/05-core-pipeline.md index 57c3a3c..903b505 100644 --- a/docs/book/05-core-pipeline.md +++ b/docs/book/05-core-pipeline.md @@ -2,139 +2,106 @@ ## Purpose -Describe the detection pipeline from file discovery to grouped clones. +Describe the runtime pipeline from file discovery to grouped clones, metrics, +report assembly, and gating. ## Public surface -Pipeline entrypoints: - -- Discovery stage: `codeclone/pipeline.py:discover` -- Per-file processing: `codeclone/pipeline.py:process_file` -- Extraction: `codeclone/extractor.py:extract_units_and_stats_from_source` -- Grouping: `codeclone/grouping.py` +- Discovery: `codeclone/core/discovery.py:discover` +- Per-file processing: `codeclone/core/worker.py:process_file` +- Extraction: `codeclone/analysis/units.py:extract_units_and_stats_from_source` +- Clone grouping: `codeclone/findings/clones/grouping.py` +- Project metrics and suggestions: `codeclone/core/pipeline.py` +- Report/gating integration: `codeclone/core/reporting.py:report`, + `codeclone/core/reporting.py:gate` ## Data model Stages: -1. Discover Python files (`iter_py_files`, sorted traversal) -2. Load from cache if `stat` signature matches -3. Process changed files: +1. Bootstrap runtime paths and config. +2. Discover Python files with deterministic traversal. +3. Load usable cache entries by stat signature and compatible analysis profile. +4. Process changed/missed files: - read source - - AST parse with limits - - extract units/blocks/segments -4. Build groups: + - parse AST with limits + - extract function, block, and segment units + - collect referenced names/qualnames and dead-code candidates +5. Build groups: - function groups by `fingerprint|loc_bucket` - block groups by `block_hash` - segment groups by `segment_sig` then `segment_hash|qualname` -5. Report-layer post-processing: - - merge block windows to maximal regions - - merge/suppress segment report groups - - optionally split out clone groups fully contained in configured - `golden_fixture_paths` -6. Structural report findings: - - duplicated branch families from per-function AST structure facts - - clone cohort drift families built from existing function groups (no rescan) -7. Metrics computation (full mode only): - - per-function cyclomatic complexity - - per-class coupling (CBO) and cohesion (LCOM4) - - dead-code analysis: declaration-only, qualname-based liveness - - dependency graph and cycle detection -8. Health scoring: - - seven dimension scores: clones, complexity, coupling, cohesion, - dead code, dependencies, coverage - - weighted blend → composite score (0–100) and grade (A–F) -9. Suggestion generation: - - advisory cards from clone groups, structural findings, metric violations - - deterministic priority sort, never gates CI -10. Current-run coverage join (optional): - - when `--coverage` is present, join external Cobertura XML to discovered - function spans - - invalid XML becomes `coverage_join.status="invalid"` for that run rather - than mutating baseline state -11. Design finding extraction: - - threshold-aware findings for complexity, coupling, cohesion - - coverage `coverage_hotspot` / `coverage_scope_gap` findings from valid - coverage-join rows only - - thresholds recorded in `meta.analysis_thresholds.design_findings` -12. Derived overview and hotlists: - - overview families, top risks, source breakdown, health snapshot - - directory hotspots by category (`derived.overview.directory_hotspots`) - - hotlists: most actionable, highest spread, production/test-fixture hotspots -13. Gate evaluation: - - clone-baseline diff (NEW vs KNOWN) - - metric threshold gates (`--fail-complexity`, `--fail-coupling`, etc.) - - metric regression gates (`--fail-on-new-metrics`) - - coverage hotspot gate (`--fail-on-untested-hotspots`) - - gate reasons emitted in deterministic order +6. Compute project metrics in full mode: + - complexity, coupling, cohesion + - dead code + - dependency graph and cycles + - health score + - adoption, API surface, optional coverage join +7. Build canonical report document and deterministic projections. +8. Evaluate clone diff and metric gates. Refs: -- `codeclone/pipeline.py` -- `codeclone/extractor.py:extract_units_and_stats_from_source` -- `codeclone/report/blocks.py:prepare_block_report_groups` -- `codeclone/report/segments.py:prepare_segment_report_groups` -- `codeclone/metrics/health.py:compute_health` -- `codeclone/metrics/coverage_join.py:build_coverage_join` -- `codeclone/report/json_contract.py:_build_design_groups` -- `codeclone/report/suggestions.py:generate_suggestions` -- `codeclone/report/overview.py:build_directory_hotspots` -- `codeclone/pipeline.py:metric_gate_reasons` +- `codeclone/core/bootstrap.py:bootstrap` +- `codeclone/core/discovery.py:discover` +- `codeclone/core/worker.py:process_file` +- `codeclone/analysis/units.py:extract_units_and_stats_from_source` +- `codeclone/report/document/builder.py:build_report_document` +- `codeclone/report/gates/evaluator.py:metric_gate_reasons` +- `codeclone/core/reporting.py:gate` ## Contracts -- Detection core (`extractor`, `normalize`, `cfg`, `blocks`) computes clone candidates. +- Detection core computes facts; report layer materializes canonical findings from those facts. - Report-layer transformations do not change function/block grouping keys used for baseline diff. - Segment groups are report-only and do not participate in baseline diff/gating. - Structural findings are report-only and do not participate in baseline diff/gating. -- `golden_fixture_paths` is a project-level clone exclusion policy, not a - fingerprint/baseline rule: - - it applies only to clone groups fully contained in matching - `tests/` / `tests/fixtures/` paths - - excluded groups do not affect health, clone gates, or suggestions - - excluded groups remain observable as suppressed canonical report facts -- Dead-code liveness references from test paths are excluded at extraction/cache-load boundaries for both - local-name references and canonical qualname references. +- `golden_fixture_paths` is a clone-policy exclusion layer: + excluded groups remain visible as suppressed canonical report facts, but do + not affect health, gates, or suggestions. +- Test-path liveness references are filtered both on fresh extraction and on + cache decode. Refs: -- `codeclone/cli.py:_main_impl` (diff uses only function/block groups) -- `codeclone/baseline.py:Baseline.diff` -- `codeclone/extractor.py:extract_units_and_stats_from_source` -- `codeclone/pipeline.py:_load_cached_metrics` +- `codeclone/findings/clones/grouping.py:build_groups` +- `codeclone/report/document/_findings_groups.py:_build_clone_groups` +- `codeclone/findings/structural/detectors.py:normalize_structural_findings` +- `codeclone/core/discovery_cache.py:load_cached_metrics_extended` +- `codeclone/baseline/clone_baseline.py:Baseline.diff` ## Invariants (MUST) -- `Files found = Files analyzed + Cache hits + Files skipped` warning if broken. -- In gating mode, unreadable source IO (`source_read_error`) is a contract failure. -- Parser time/resource protections are applied in POSIX mode via `_parse_limits`. +- `files_found = files_analyzed + cache_hits + files_skipped`, or CLI warns explicitly. +- In gating mode, unreadable source IO is a contract failure. +- Parser time/resource protections are applied before AST extraction. Refs: -- `codeclone/_cli_summary.py:_print_summary` -- `codeclone/cli.py:_main_impl` -- `codeclone/extractor.py:_parse_limits` +- `codeclone/surfaces/cli/summary.py:_print_summary` +- `codeclone/surfaces/cli/workflow.py:_main_impl` +- `codeclone/analysis/parser.py:_parse_limits` ## Failure modes -| Condition | Behavior | -|----------------------------------|-----------------------------------------------------------------------------| -| File stat/read/encoding error | File skipped; tracked as failed file; source-read subset tracked separately | -| Source read error in gating mode | Contract error exit 2 | -| Parser timeout | `ParseError` returned through processing failure path | -| Unexpected per-file exception | Captured as `ProcessingResult(error_kind="unexpected_error")` | +| Condition | Behavior | +|----------------------------------|--------------------------------------------------| +| File stat/read/encoding error | File skipped; tracked as failed file | +| Source read error in gating mode | Contract error, exit `2` | +| Parser timeout | `ParseError` through processing failure path | +| Unexpected per-file exception | Captured as `unexpected_error` processing result | ## Determinism / canonicalization - File list is sorted. -- Group sorting in reports is deterministic by key and stable item sort. +- Group sorting is deterministic by stable tuple keys. +- Canonical report integrity is computed only from canonical sections. Refs: - `codeclone/scanner.py:iter_py_files` -- `codeclone/report/json_contract.py:_build_clone_groups` -- `codeclone/report/json_contract.py:_build_structural_groups` -- `codeclone/report/json_contract.py:_build_integrity_payload` +- `codeclone/findings/clones/grouping.py:build_groups` +- `codeclone/report/document/integrity.py:_build_integrity_payload` ## Locked by tests @@ -143,16 +110,8 @@ Refs: - `tests/test_cli_inprocess.py::test_cli_unreadable_source_fails_in_ci_with_contract_error` - `tests/test_extractor.py::test_parse_limits_triggers_timeout` - `tests/test_extractor.py::test_dead_code_marks_symbol_dead_when_referenced_only_by_tests` -- `tests/test_extractor.py::test_extract_collects_referenced_qualnames_for_import_aliases` - `tests/test_pipeline_metrics.py::test_load_cached_metrics_ignores_referenced_names_from_test_files` ## Non-guarantees -- Parallel scheduling order is not guaranteed; only final grouped output determinism is guaranteed. - -## See also - -- [08-report.md](08-report.md) -- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) -- [16-dead-code-contract.md](16-dead-code-contract.md) -- [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) +- Parallel worker scheduling order is not guaranteed; only final output determinism is guaranteed. diff --git a/docs/book/06-baseline.md b/docs/book/06-baseline.md index b21c51e..d582a77 100644 --- a/docs/book/06-baseline.md +++ b/docs/book/06-baseline.md @@ -2,67 +2,58 @@ ## Purpose -Specify baseline schema v2.1, trust/compatibility checks, integrity hashing, and -runtime behavior. +Specify clone-baseline schema `2.1`, trust/compatibility checks, integrity +hashing, and runtime behavior. ## Public surface -- Baseline object lifecycle: `codeclone/baseline.py:Baseline` -- Baseline statuses: `codeclone/baseline.py:BaselineStatus` -- Baseline status coercion: `codeclone/baseline.py:coerce_baseline_status` -- CLI integration: `codeclone/cli.py:_main_impl` +- Baseline object lifecycle: `codeclone/baseline/clone_baseline.py:Baseline` +- Baseline statuses: `codeclone/baseline/trust.py:BaselineStatus` +- Baseline status coercion: `codeclone/baseline/trust.py:coerce_baseline_status` +- CLI integration: `codeclone/surfaces/cli/baseline_state.py` ## Data model Canonical baseline shape: -- Required top-level keys: `meta`, `clones` -- Optional top-level keys: `metrics`, `api_surface` (unified baseline flow) +- required top-level keys: `meta`, `clones` +- optional top-level keys: `metrics`, `api_surface` (unified baseline flow) - `meta` required keys: `generator`, `schema_version`, `fingerprint_version`, `python_tag`, `created_at`, `payload_sha256` - `clones` required keys: `functions`, `blocks` -- `functions` and `blocks` are sorted/unique `list[str]` +- `functions` and `blocks` are sorted, unique `list[str]` Refs: -- `codeclone/baseline.py:_TOP_LEVEL_REQUIRED_KEYS` -- `codeclone/baseline.py:_TOP_LEVEL_OPTIONAL_KEYS` -- `codeclone/baseline.py:_META_REQUIRED_KEYS` -- `codeclone/baseline.py:_CLONES_REQUIRED_KEYS` -- `codeclone/baseline.py:_require_sorted_unique_ids` +- `codeclone/baseline/clone_baseline.py:_TOP_LEVEL_REQUIRED_KEYS` +- `codeclone/baseline/clone_baseline.py:_TOP_LEVEL_OPTIONAL_KEYS` +- `codeclone/baseline/clone_baseline.py:_META_REQUIRED_KEYS` +- `codeclone/baseline/clone_baseline.py:_CLONES_REQUIRED_KEYS` +- `codeclone/baseline/trust.py:_require_sorted_unique_ids` ## Contracts -Compatibility gates (`verify_compatibility`): +Compatibility gates: -- `generator == "codeclone"` -- `schema_version` major/minor must be supported by runtime +- `generator.name == "codeclone"` +- supported `schema_version` - `fingerprint_version == BASELINE_FINGERPRINT_VERSION` - `python_tag == current_python_tag()` - integrity verified via `payload_sha256` Current runtime policy: -- New clone baseline saves write schema `2.1`. -- Runtime still accepts `2.0` and `2.1` within baseline major `2`. - -Embedded metrics contract: - -- Top-level `metrics` is allowed only for baseline schema `>= 2.0`. -- Clone baseline save preserves existing embedded `metrics` payload, - optional `api_surface` payload, and the corresponding - `meta.metrics_payload_sha256` / `meta.api_surface_payload_sha256` values. -- Embedded `api_surface` snapshots use a compact wire format: each symbol stores - `local_name` relative to its containing `module`, and each module row stores - `filepath` relative to the baseline directory when possible. Runtime - reconstructs canonical full qualnames and runtime filepaths in memory before - diffing. -- The default runtime flow is unified: clone baseline and metrics baseline - usually share the same `codeclone.baseline.json` file unless the metrics path - is explicitly overridden. -- In unified rewrite mode, disabled optional metric surfaces are omitted from - the rewritten embedded payload instead of being preserved as stale baggage. +- new clone baseline saves write schema `2.1` +- runtime accepts `1.0`, `2.0`, and `2.1` + +Unified-baseline contract: + +- top-level `metrics` is allowed only for baseline schema `>= 2.0` +- the default runtime flow is unified: clone and metrics comparison state both + live in `codeclone.baseline.json` unless `--metrics-baseline` is redirected +- unified rewrites preserve current embedded metric sections that remain enabled + and drop disabled optional sections instead of keeping stale baggage Integrity payload includes only: @@ -71,30 +62,23 @@ Integrity payload includes only: - `meta.fingerprint_version` - `meta.python_tag` -Integrity payload excludes: - -- `meta.schema_version` -- `meta.generator.*` -- `meta.created_at` - Refs: -- `codeclone/baseline.py:Baseline.verify_compatibility` -- `codeclone/baseline.py:_compute_payload_sha256` -- `codeclone/baseline.py:_preserve_embedded_metrics` +- `codeclone/baseline/clone_baseline.py:Baseline.verify_compatibility` +- `codeclone/baseline/trust.py:_compute_payload_sha256` +- `codeclone/baseline/metrics_baseline.py:MetricsBaseline.save` ## Invariants (MUST) -- Legacy top-level baselines (`functions`/`blocks` at root) are untrusted and - require regeneration. -- Baseline writes are atomic (`*.tmp` + `os.replace`, same filesystem). +- Legacy top-level baselines (`functions`/`blocks` at root) are untrusted and require regeneration. +- Baseline writes are atomic (`*.tmp` + `os.replace`). - Baseline diff is set-based and deterministic. Refs: -- `codeclone/baseline.py:_is_legacy_baseline_payload` -- `codeclone/baseline.py:_atomic_write_json` -- `codeclone/baseline.py:Baseline.diff` +- `codeclone/baseline/clone_baseline.py:_is_legacy_baseline_payload` +- `codeclone/baseline/clone_baseline.py:_atomic_write_json` +- `codeclone/baseline/clone_baseline.py:Baseline.diff` ## Failure modes @@ -113,26 +97,25 @@ Refs: CLI behavior: -- Normal mode: untrusted baseline is ignored, diff runs against empty baseline. -- Gating mode (`--ci` / `--fail-on-new`): untrusted baseline is contract error - (exit 2). +- normal mode: untrusted baseline is ignored and diff runs against empty baseline +- gating mode (`--ci` / `--fail-on-new`): untrusted baseline is a contract error Refs: -- `codeclone/baseline.py:BaselineStatus` -- `codeclone/cli.py:_main_impl` +- `codeclone/baseline/trust.py:BaselineStatus` +- `codeclone/surfaces/cli/baseline_state.py:resolve_clone_baseline_state` ## Determinism / canonicalization - Clone IDs are serialized sorted. -- Hash serialization uses canonical JSON (`sort_keys=True`, compact separators). -- `payload_sha256` uses `hmac.compare_digest` during verification. +- Hash serialization uses canonical JSON. +- Integrity verification uses constant-time comparison. Refs: -- `codeclone/baseline.py:_baseline_payload` -- `codeclone/baseline.py:_compute_payload_sha256` -- `codeclone/baseline.py:Baseline.verify_integrity` +- `codeclone/baseline/clone_baseline.py:_baseline_payload` +- `codeclone/baseline/trust.py:_compute_payload_sha256` +- `codeclone/baseline/clone_baseline.py:Baseline.verify_integrity` ## Locked by tests @@ -144,6 +127,5 @@ Refs: ## Non-guarantees -- Baseline generator version (`meta.generator.version`) is informational and not - a compatibility gate. +- `meta.generator.version` is informational and not a compatibility gate. - Baseline file indentation/style is not part of compatibility contract. diff --git a/docs/book/07-cache.md b/docs/book/07-cache.md index 1e3e268..b10a2bc 100644 --- a/docs/book/07-cache.md +++ b/docs/book/07-cache.md @@ -2,90 +2,73 @@ ## Purpose -Define cache schema v2.5, integrity verification, and fail-open behavior. +Define cache schema `2.5`, integrity verification, stale-entry pruning, and +fail-open behavior. ## Public surface -- Cache object lifecycle: `codeclone/cache.py:Cache` -- Cache statuses: `codeclone/cache.py:CacheStatus` -- Stat signature source: `codeclone/cache.py:file_stat_signature` -- CLI cache integration: `codeclone/cli.py:_main_impl` +- Cache object lifecycle: `codeclone/cache/store.py:Cache` +- Cache statuses: `codeclone/cache/versioning.py:CacheStatus` +- Stat signature source: `codeclone/cache/store.py:file_stat_signature` +- Wire encode/decode: `codeclone/cache/_wire_encode.py`, + `codeclone/cache/_wire_decode.py` +- CLI/runtime integration: `codeclone/surfaces/cli/runtime.py`, + `codeclone/core/discovery.py` ## Data model On-disk schema (`v == "2.5"`): -- Top-level: `v`, `payload`, `sig` +- top-level: `v`, `payload`, `sig` - `payload` keys: `py`, `fp`, `ap`, `files`, optional `sr` - `ap` (`analysis_profile`) keys: - - `min_loc`, `min_stmt` - - `block_min_loc`, `block_min_stmt` - - `segment_min_loc`, `segment_min_stmt` - - `collect_api_surface` -- `files` map stores compact per-file entries: - - `st`: `[mtime_ns, size]` - - `ss`: `[lines, functions, methods, classes]` (source stats snapshot) - - `u` (function units): compact row layout with structural facts: - `[qualname,start,end,loc,stmt_count,fingerprint,loc_bucket,cc,nesting,risk,raw_hash,entry_guard_count,entry_guard_terminal_profile,entry_guard_has_side_effect_before,terminal_kind,try_finally_profile,side_effect_order_profile]` - - optional analysis sections (`b`/`s` and metrics-related sections) - - `rn`: referenced local names (non-test files only) - - `rq`: referenced canonical qualnames (non-test files only) -- file keys are wire relpaths when `root` is configured -- optional `sr` (`segment report projection`) stores precomputed segment-report - merge/suppression output: - - `d`: digest of raw segment groups - - `s`: suppressed segment groups count - - `g`: grouped merged segment items (wire rows) -- per-file `dc` (`dead_candidates`) rows do not repeat filepath; path is implied by - the containing file entry + `min_loc`, `min_stmt`, `block_min_loc`, `block_min_stmt`, + `segment_min_loc`, `segment_min_stmt`, `collect_api_surface` +- `files` stores compact per-file entries with stat signature, extracted units, + optional metrics sections, referenced names/qualnames, and cached source stats +- `sr` stores optional segment-report projection payload Refs: -- `codeclone/cache.py:Cache.load` -- `codeclone/cache.py:_encode_wire_file_entry` -- `codeclone/cache.py:_decode_wire_file_entry` +- `codeclone/cache/store.py:Cache.load` +- `codeclone/cache/_wire_encode.py:_encode_wire_file_entry` +- `codeclone/cache/_wire_decode.py:_decode_wire_file_entry` ## Contracts - Cache is optimization-only; invalid cache never blocks analysis. -- Any cache trust failure triggers warning + empty cache fallback. -- Cached file entry without valid `ss` (`source_stats`) is treated as cache-miss for - processing counters and reprocessed. -- Cache compatibility gates: - - version `v == CACHE_VERSION` +- Any cache trust failure triggers warning + empty-cache fallback. +- Compatibility gates: + - `v == CACHE_VERSION` - `payload.py == current_python_tag()` - `payload.fp == BASELINE_FINGERPRINT_VERSION` - `payload.ap` matches the current analysis profile - (`min_loc`, `min_stmt`, `block_min_loc`, `block_min_stmt`, - `segment_min_loc`, `segment_min_stmt`, `collect_api_surface`) - - `sig` equals deterministic hash of canonical payload -- Cache schema must also be bumped when cached analysis semantics change in a - way that could leave syntactically valid but semantically stale per-file - entries accepted by runtime compatibility checks. + - `sig` matches deterministic hash of canonical payload +- Stale deleted-file entries are pruned on save/update; cache must reflect the + current worktree, not historical deleted modules. +- Cached entries without valid source stats are treated as cache-miss for + processing counters and reprocessed. Refs: -- `codeclone/cache.py:Cache.load` -- `codeclone/cache.py:Cache._ignore_cache` -- `codeclone/cache.py:Cache._sign_data` +- `codeclone/cache/store.py:Cache.load` +- `codeclone/cache/store.py:Cache._ignore_cache` +- `codeclone/cache/integrity.py:sign_cache_payload` +- `codeclone/core/discovery.py:discover` ## Invariants (MUST) -- Cache save writes canonical JSON and atomically replaces target file. -- Empty sections (`u`, `b`, `s`) are omitted from written wire entries. -- `rn`/`rq` are serialized as sorted unique arrays and omitted when empty. -- Cached public-API symbol payloads preserve declared parameter order; cache - canonicalization must not reorder callable signatures. -- `ss` is written when source stats are available and is required for full cache-hit - accounting in discovery stage. -- Legacy secret file `.cache_secret` is never used for trust; warning only. +- Cache save writes canonical JSON and atomically replaces the target file. +- Empty sections are omitted from wire entries. +- Referenced names/qualnames are serialized as sorted unique arrays and omitted when empty. +- Cached public-API symbol payloads preserve declared parameter order. +- Legacy `.cache_secret` is warning-only and never used for trust. Refs: -- `codeclone/cache.py:Cache.save` -- `codeclone/cache.py:_encode_wire_file_entry` -- `codeclone/pipeline.py:discover` -- `codeclone/cache.py:LEGACY_CACHE_SECRET_FILENAME` +- `codeclone/cache/store.py:Cache.save` +- `codeclone/cache/_wire_encode.py:_encode_wire_file_entry` +- `codeclone/cache/versioning.py:LEGACY_CACHE_SECRET_FILENAME` ## Failure modes @@ -106,25 +89,21 @@ CLI behavior: cache failures do not change exit code; analysis continues without Refs: -- `codeclone/cache.py:CacheStatus` -- `codeclone/cli.py:_main_impl` +- `codeclone/cache/versioning.py:CacheStatus` +- `codeclone/surfaces/cli/runtime.py:resolve_cache_status` ## Determinism / canonicalization - Cache signatures are computed over canonical JSON payload. -- Wire file paths and row arrays are sorted before write. -- `rn`/`rq` are deterministically normalized to sorted unique arrays. -- Current schema decodes only the canonical row shapes that current runtime writes; - for `u` rows, decoder accepts legacy 11-column layout and canonical 17-column - layout (missing structural columns default to neutral values). -- `sr` is additive and optional; invalid/missing projection never invalidates the - cache and simply falls back to runtime recomputation. +- Wire file paths and compact row arrays are sorted before write. +- Optional segment-report projection is additive; invalid/missing projection + falls back to runtime recomputation. Refs: -- `codeclone/cache.py:_canonical_json` -- `codeclone/cache.py:_wire_filepath_from_runtime` -- `codeclone/cache.py:_encode_wire_file_entry` +- `codeclone/cache/integrity.py:canonical_json` +- `codeclone/cache/projection.py:wire_filepath_from_runtime` +- `codeclone/cache/_wire_encode.py:_encode_wire_file_entry` ## Locked by tests @@ -135,9 +114,9 @@ Refs: - `tests/test_cache.py::test_cache_too_large_warns` - `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag` - `tests/test_cli_inprocess.py::test_cli_cache_analysis_profile_compatibility` -- `tests/test_pipeline_metrics.py::test_load_cached_metrics_ignores_referenced_names_from_test_files` +- `tests/test_core_branch_coverage.py::test_discover_prunes_deleted_cache_entries` ## Non-guarantees - Cache file content stability across schema bumps is not guaranteed. -- Cache payload is tamper-evident only; it is not secret-authenticated. +- Cache is tamper-evident only; it is not an authenticated secret store. diff --git a/docs/book/08-report.md b/docs/book/08-report.md index 2dbc6a0..d140747 100644 --- a/docs/book/08-report.md +++ b/docs/book/08-report.md @@ -2,21 +2,27 @@ ## Purpose -Define report contracts in `2.0.0b5`: canonical JSON (`report_schema_version=2.8`) -plus deterministic TXT/Markdown/SARIF projections. +Define the canonical report contract in `2.0.0b6`: report schema `2.8` plus +deterministic text/Markdown/SARIF/HTML projections. ## Public surface -- Canonical report builder: `codeclone/report/json_contract.py:build_report_document` -- JSON/TXT renderers: `codeclone/report/serialize.py` -- Markdown renderer: `codeclone/report/markdown.py` -- SARIF renderer: `codeclone/report/sarif.py` -- HTML renderer: `codeclone/html_report.py:build_html_report` -- Shared metadata source: `codeclone/_cli_meta.py:_build_report_meta` +- Canonical report builder: `codeclone/report/document/builder.py:build_report_document` +- Canonical inventory/integrity helpers: + `codeclone/report/document/inventory.py`, + `codeclone/report/document/integrity.py` +- Text renderer: `codeclone/report/renderers/text.py:render_text_report_document` +- Markdown renderer: + `codeclone/report/renderers/markdown.py:render_markdown_report_document` +- SARIF renderer: + `codeclone/report/renderers/sarif.py:render_sarif_report_document` +- HTML renderer: `codeclone/report/html/assemble.py:build_html_report` +- Shared CLI report meta: + `codeclone/surfaces/cli/report_meta.py:_build_report_meta` ## Data model -JSON report top-level (v2.8): +Canonical top-level sections: - `report_schema_version` - `meta` @@ -26,190 +32,76 @@ JSON report top-level (v2.8): - `derived` - `integrity` -Canonical provenance additions: - -- `meta.analysis_profile` records the effective runtime clone, block, and - segment thresholds for that run (`min_loc`, `min_stmt`, `block_*`, - `segment_*`). -- `meta.analysis_thresholds.design_findings` records the effective report-level - thresholds used to materialize canonical design findings for that run - (`complexity > N`, `coupling > N`, `cohesion >= N`). - -Canonical report-only metrics additions: - -- `metrics.families.overloaded_modules` records project-relative module hotspot - profiles and candidate classification for `Overloaded Modules` -- `metrics.families.coverage_adoption` records parameter coverage, return - coverage, public docstring coverage, and `Any` usage counts, plus compact - baseline deltas when a trusted metrics baseline is available -- `metrics.families.api_surface` records the current public symbol inventory - and compact baseline diff facts (`added`, `breaking`) when - `--api-surface` is enabled -- `metrics.families.coverage_join` records an optional current-run join between - external Cobertura line coverage and CodeClone function spans. Its summary - carries `status`, `source`, unit/line counts, `overall_permille`, - `missing_from_report_units`, `coverage_hotspots`, `scope_gap_hotspots`, - `hotspot_threshold_percent`, and optional `invalid_reason`; the same compact - summary is mirrored in `metrics.summary.coverage_join`; its items carry - per-function joined coverage facts, including `coverage_status`, - `coverage_hotspot`, and `scope_gap_hotspot`. -- coverage join facts are canonical report truth for that run, but they are - **not** baseline truth and do not update `codeclone.baseline.json` -- adoption/API/coverage-join metrics do **not** participate in clone baseline - NEW/KNOWN semantics; coverage join also does not participate in health scoring - and gates only when explicitly requested -- `Overloaded Modules` is a report-only experimental layer rather than a second - complexity metric: - - complexity reports local control-flow hotspots in functions and methods - - `Overloaded Modules` reports module-level responsibility overload and dependency - pressure - - the layer may later become scoring only after validation and explicit - health-model documentation updates - -Coverage/API role split: - -- `coverage_adoption` is a canonical metrics family, not a style linter. It - reports observable adoption facts only. -- `coverage_join` is a canonical current-run signal over an external Cobertura - XML file. It reports joined line facts and may materialize - `design` findings with `category="coverage"` and kinds - `coverage_hotspot` (measured below threshold) or `coverage_scope_gap` - (outside the supplied coverage scope); it does not infer branch coverage or - execute tests. -- `api_surface` is a canonical metrics/gating family, not a second finding - engine. It reports public API inventory plus baseline-diff facts when the - run opted into API collection. - -Canonical vs non-canonical split: - -- Canonical: `report_schema_version`, `meta`, `inventory`, `findings`, `metrics` -- Non-canonical projection layer: `derived` -- Integrity metadata: `integrity` (`canonicalization` + `digest`) - -Derived projection layer: - -- `derived.suggestions[*]` — action-surplus projection cards keyed back to - canonical findings via `finding_id` -- `derived.overview` — summary-only overview facts: - - `families` - - `top_risks` - - `source_scope_breakdown` - - `health_snapshot` - - `directory_hotspots` -- `derived.hotlists` — deterministic lists of canonical finding IDs: - - `most_actionable_ids` - - `highest_spread_ids` - - `production_hotspot_ids` - - `test_fixture_hotspot_ids` - -Finding families: +Canonical section roles: + +- `meta`, `inventory`, `findings`, `metrics` are canonical truth +- `derived` is a deterministic projection layer +- `integrity` carries canonicalization metadata and digest + +Current canonical report-only metric families include: + +- `health` +- `dead_code` +- `dependencies` +- `coverage_adoption` +- `api_surface` +- `coverage_join` +- `overloaded_modules` + +Current finding families include: - `findings.groups.clones.{functions,blocks,segments}` -- optional `findings.groups.clones.suppressed.{functions,blocks,segments}` for - clone groups excluded by project policy such as `golden_fixture_paths` +- optional `findings.groups.clones.suppressed.*` - `findings.groups.structural.groups` - `findings.groups.dead_code.groups` - `findings.groups.design.groups` -- `findings.summary.suppressed.dead_code` (suppressed counter, non-active findings) -- optional `findings.summary.suppressed.clones` plus clone-summary suppressed - counters when clone groups were excluded from active findings -Important role split: - -- Findings explain what was detected. -- Suggestions exist only when they add action structure on top of a finding - (next step, prioritization, effort/risk framing, grouped remediation, or - review relevance). -- Low-signal local structural info hints may remain findings-only and not - appear as separate suggestion cards. - -Structural finding kinds currently emitted by core/report pipeline: +Refs: -- `duplicated_branches` -- `clone_guard_exit_divergence` -- `clone_cohort_drift` +- `codeclone/report/document/builder.py:build_report_document` +- `codeclone/report/document/_common.py:_design_findings_thresholds_payload` +- `codeclone/report/document/_findings_groups.py:_build_clone_groups` +- `codeclone/report/document/_findings_groups.py:_build_structural_groups` -Per-group common axes (family-specific fields may extend): +## Contracts -- identity: `id`, `family`, `category`, `kind` -- assessment: `severity`, `confidence`, `priority` -- scope: `source_scope` (`dominant_kind`, `breakdown`, `impact_scope`) -- spread: `spread.files`, `spread.functions` -- evidence: `items`, `facts` (+ optional `display_facts`) +- JSON is the source of truth for report semantics. +- Markdown, text, SARIF, HTML, and MCP projections must read canonical report facts rather than recompute them. +- `derived` does not replace canonical findings/metrics. +- Design findings are built once in the canonical report using + `meta.analysis_thresholds.design_findings`; consumers must not synthesize them post-hoc. +- Coverage Join is canonical current-run truth for that run, but not baseline truth. +- Clone groups excluded by project policy are carried only under suppressed clone buckets and do not affect active + findings, health, clone gating, or suggestions. -## Contracts +Refs: -- JSON is source of truth for report semantics. -- Markdown and SARIF are deterministic projections from the same report document. -- MCP summary/finding/hotlist/report-section queries are deterministic views over - the same canonical report document. -- SARIF is an IDE/code-scanning-oriented projection: - - repo-relative result paths are anchored via `%SRCROOT%` - - referenced files are listed under `run.artifacts` - - clone results carry `baselineState` when clone novelty is known -- Derived layer (`suggestions`, `overview`, `hotlists`) does not replace canonical - findings/metrics. -- Design findings are built once in the canonical report using the effective - threshold policy recorded in `meta.analysis_thresholds.design_findings`; MCP - and HTML must not re-synthesize them post-hoc from raw metric rows. -- Coverage design findings are built from canonical `coverage_join` rows only - when a valid join is present. Invalid coverage input is represented as - `metrics.families.coverage_join.summary.status="invalid"` with no hotspot - item rows. -- HTML overview cards are materialized from canonical findings plus - `derived.overview` + `derived.hotlists`; pre-expanded overview card payloads are - not part of the report contract. -- `derived.overview.directory_hotspots` is a deterministic report-layer - aggregation over canonical findings; HTML must render it as-is or omit it on - compatibility paths without a canonical report document. -- `derived.overview.health_snapshot` is a projection over canonical - `metrics.families.health.summary`; it summarizes the current score but does - not define a second health model. -- `derived.overview.directory_hotspots[*].path` is an overview-oriented - directory key: runtime findings keep their parent directory, while test-only - and fixture-only findings collapse to the corresponding source-scope roots - (`.../tests` or `.../tests/fixtures`) to avoid duplicating the same hotspot - across leaf fixture paths. -- Overview hotspot/source-breakdown sections must resolve from canonical report - data or deterministic derived IDs; HTML must not silently substitute stale - placeholders such as `n/a` or empty-state cards when canonical data exists. -- `analysis_started_at_utc` and `report_generated_at_utc` are carried in - `meta.runtime`; renderers/projections may use them for provenance but must not - reinterpret them as semantic analysis data. -- Canonical `meta.scan_root` is normalized to `"."`; absolute runtime paths are - exposed under `meta.runtime.*_absolute`. -- `clone_type` and `novelty` are group-level properties inside clone groups. -- Cohort-drift structural families are report-only and must not affect baseline diff - or CI gating decisions. -- Dead-code suppressed candidates are carried only under metrics - (`metrics.families.dead_code.suppressed_items`) and never promoted to - active `findings.groups.dead_code`. -- Clone groups excluded by `golden_fixture_paths` are carried only under - `findings.groups.clones.suppressed.*`; they do not contribute to active - findings totals, health scoring, clone gating, or suggestion generation. -- A lower score after upgrade may reflect a broader health model, not only - worse code. Report renderers may surface the score, but health-model - expansion is documented separately in [15-health-score.md](15-health-score.md) - and compatibility notes. +- `codeclone/report/document/builder.py:build_report_document` +- `codeclone/report/derived.py:_health_snapshot` +- `codeclone/report/overview.py:materialize_report_overview` +- `codeclone/report/suggestions.py:generate_suggestions` ## Invariants (MUST) -- Stable ordering for groups/items/suggestions/hotlists. -- Stable ordering for SARIF rules, artifacts, and results. +- Stable ordering for groups, items, suggestions, and hotlists. - `derived.suggestions[*].finding_id` references existing canonical finding IDs. - `derived.hotlists.*_ids` reference existing canonical finding IDs. -- SARIF `artifacts[*]` and `locations[*].artifactLocation.index` stay aligned. -- `integrity.digest` is computed from canonical sections only (derived excluded). -- `source_scope.impact_scope` is explicit and deterministic (`runtime`, - `non_runtime`, `mixed`). +- SARIF artifacts, rules, and locations stay index-aligned. +- `integrity.digest` is computed from canonical sections only; `derived` is excluded. + +Refs: + +- `codeclone/report/document/integrity.py:_build_integrity_payload` +- `codeclone/report/document/inventory.py:_build_inventory_payload` +- `codeclone/report/renderers/sarif.py:render_sarif_report_document` ## Failure modes -| Condition | Behavior | -|---------------------------------|------------------------------------------------| -| Missing optional UI/meta fields | Renderer falls back to empty/`(none)` display | -| Untrusted baseline | Clone novelty resolves to `new` for all groups | -| Missing snippet source in HTML | Safe fallback snippet block | +| Condition | Behavior | +|---------------------------------|--------------------------------------------------------| +| Missing optional UI/meta fields | Renderer falls back to empty or `(none)`-style display | +| Untrusted baseline | Clone novelty resolves as current-run only | +| Missing source snippet in HTML | Safe fallback snippet block | ## Determinism / canonicalization @@ -219,9 +111,9 @@ Per-group common axes (family-specific fields may extend): Refs: -- `codeclone/report/json_contract.py:_build_integrity_payload` -- `codeclone/report/json_contract.py:_build_inventory_payload` -- `codeclone/structural_findings.py:normalize_structural_findings` +- `codeclone/report/document/integrity.py:_build_integrity_payload` +- `codeclone/report/document/inventory.py:_build_inventory_payload` +- `codeclone/findings/structural/detectors.py:normalize_structural_findings` ## Locked by tests @@ -231,22 +123,8 @@ Refs: - `tests/test_report_contract_coverage.py::test_report_document_rich_invariants_and_renderers` - `tests/test_report_contract_coverage.py::test_markdown_and_sarif_reuse_prebuilt_report_document` - `tests/test_report_branch_invariants.py::test_overview_and_sarif_branch_invariants` -- `tests/test_report.py::test_json_includes_clone_guard_exit_divergence_structural_group` -- `tests/test_report.py::test_json_includes_clone_cohort_drift_structural_group` -- `tests/test_report.py::test_report_json_dead_code_suppressed_items_are_reported_separately` ## Non-guarantees -- Human-readable wording in `derived` or HTML may evolve without schema bump. -- CSS/layout changes are not part of JSON contract. - -## See also - -- [07-cache.md](07-cache.md) -- [09-cli.md](09-cli.md) -- [10-html-render.md](10-html-render.md) -- [15-health-score.md](15-health-score.md) -- [20-mcp-interface.md](20-mcp-interface.md) -- [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) -- [../sarif.md](../sarif.md) -- [../examples/report.md](../examples/report.md) +- Human-facing wording in `derived` or HTML may evolve without a schema bump. +- CSS/layout changes are not part of the canonical report contract. diff --git a/docs/book/09-cli.md b/docs/book/09-cli.md index 644701b..79b4ebb 100644 --- a/docs/book/09-cli.md +++ b/docs/book/09-cli.md @@ -2,189 +2,106 @@ ## Purpose -Define observable CLI behavior: argument handling, summaries, error UI, and output writing. +Define observable CLI behavior: argument handling, summaries, output writing, +and exit routing. ## Public surface -- CLI runner: `codeclone/cli.py:main`, `codeclone/cli.py:_main_impl` -- Parser: `codeclone/_cli_args.py:build_parser` -- Summary renderer: `codeclone/_cli_summary.py:_print_summary` -- Path validation: `codeclone/_cli_paths.py:_validate_output_path` -- Message catalog: `codeclone/ui_messages.py` +- Public entrypoint: `codeclone/main.py:main` +- CLI orchestration: `codeclone/surfaces/cli/workflow.py:_main_impl` +- Parser: `codeclone/config/argparse_builder.py:build_parser` +- Summary renderer: `codeclone/surfaces/cli/summary.py:_print_summary` +- Output path validation and writes: + `codeclone/surfaces/cli/reports_output.py` +- Message catalog: `codeclone/ui_messages/__init__.py` ## Data model CLI modes: -- Normal mode -- Gating mode (`--ci`, `--fail-on-new`, `--fail-threshold>=0`) -- Update mode (`--update-baseline`) +- normal mode +- gating mode (`--ci`, `--fail-on-new`, explicit metric gates) +- baseline update mode (`--update-baseline`, `--update-metrics-baseline`) -Summary metrics: +Summary metrics include: - files found/analyzed/cache hits/skipped -- structural counters: analyzed lines/functions/methods/classes -- function/block/segment groups -- excluded golden-fixture clone groups (when configured) -- suppressed segment groups -- dead-code active/suppressed status in metrics line -- adoption coverage in the normal `Metrics` block: - parameter typing, return typing, public docstrings, and `Any` count -- public API surface in the normal `Metrics` block when `api_surface` was - collected: symbol/module counts plus added/breaking deltas when a trusted - metrics baseline is available -- coverage join in the normal `Metrics` block when `--coverage FILE` was - provided: joined Cobertura overall line coverage, untested hotspot count, and - threshold/source context +- structural counters for lines/functions/methods/classes +- function/block/segment clone groups +- suppressed clone groups from `golden_fixture_paths` +- dead-code active/suppressed status +- adoption/API/coverage-join facts when computed - new vs baseline -Metrics-related CLI gates: - -- threshold gates: - `--fail-complexity`, `--fail-coupling`, `--fail-cohesion`, `--fail-health` -- coverage threshold gates: - `--min-typing-coverage`, `--min-docstring-coverage` -- baseline-aware delta gates: - `--fail-on-new-metrics`, - `--fail-on-typing-regression`, - `--fail-on-docstring-regression`, - `--fail-on-api-break` -- external coverage join gate: - `--coverage FILE`, `--coverage-min PERCENT`, - `--fail-on-untested-hotspots` -- update mode: - `--update-metrics-baseline` -- opt-in metrics family: - `--api-surface` -- In unified baseline mode, `--update-baseline` rewrites embedded metric - surfaces from the current enabled config; disabled optional surfaces are - dropped. - Refs: -- `codeclone/_cli_summary.py:_print_summary` -- `codeclone/ui_messages.py:fmt_summary_files` +- `codeclone/surfaces/cli/summary.py:_print_summary` +- `codeclone/surfaces/cli/runtime.py:_metrics_flags_requested` +- `codeclone/surfaces/cli/runtime.py:_metrics_computed` +- `codeclone/surfaces/cli/report_meta.py:_build_report_meta` ## Contracts - Help output includes canonical exit-code section and project links. -- Reporting flag UX uses explicit pairs (`--no-progress`/`--progress`, - `--no-color`/`--color`) and avoids generated double-negation aliases. -- `--open-html-report` is a local UX action layered on top of `--html`; it does not implicitly enable HTML output. -- `--timestamped-report-paths` only rewrites default report paths requested via bare report flags; explicit FILE values - stay unchanged. -- Changed-scope clone review uses: +- Bare report flags write to deterministic default paths under `.cache/codeclone/`. +- `--open-html-report` is layered on top of `--html`; it does not imply HTML output. +- `--timestamped-report-paths` rewrites only default report paths requested via bare flags. +- Changed-scope review uses: - `--changed-only` - - `--diff-against GIT_REF` - - `--paths-from-git-diff GIT_REF` - Typical usage: - - `codeclone . --changed-only --diff-against main` - - `codeclone . --paths-from-git-diff HEAD~1` -- Contract errors are prefixed by `CONTRACT ERROR:`. -- Gating failures are prefixed by `GATING FAILURE:`. -- Internal errors use `fmt_internal_error` with optional debug details. -- Runtime footer uses explicit wording: `Pipeline done in s`. - This metric is CLI pipeline time and does not include external launcher/startup overhead (for example `uv run`). -- Dead-code metric line is stateful and deterministic: - - `N found (M suppressed)` when active dead-code items exist - - `✔ clean` when both active and suppressed are zero - - `✔ clean (M suppressed)` when active is zero but suppressed > 0 -- The normal rich `Metrics` block includes: - - `Adoption` when adoption coverage facts were computed - - `Public API` when `api_surface` facts were computed - - `Coverage` when Cobertura coverage was joined with `--coverage` -- Quiet compact metrics output stays on the existing fixed one-line summary and - does not expand adoption/API/coverage-join detail. -- When `golden_fixture_paths` excludes clone groups from active review, CLI - keeps that count inside the `Clones` summary line (`fixtures=N`) instead of - adding a separate summary row. -- Typing/docstring adoption metrics are computed in full mode. -- `--api-surface` is opt-in in normal runs, but runtime auto-enables it when - `--fail-on-api-break` or `--update-metrics-baseline` needs a public API - snapshot. -- `--fail-on-typing-regression` / `--fail-on-docstring-regression` require a - metrics baseline that already contains adoption coverage data. -- `--fail-on-api-break` requires a metrics baseline that already contains - `api_surface` data. -- `--coverage` is a current-run external Cobertura input. It does not update or - compare against `codeclone.baseline.json`. -- Relative clone-baseline and metrics-baseline paths from defaults or - `pyproject.toml` resolve from the analysis root. Explicit CLI paths are used - as provided. -- Invalid Cobertura XML is warning-only in normal runs: CLI prints - `Coverage join ignored`, keeps exit `0`, and shows `Coverage` as unavailable - in the normal `Metrics` block. It becomes a contract error only when - `--fail-on-untested-hotspots` requires a valid join. -- `--fail-on-untested-hotspots` requires `--coverage` and a valid Cobertura XML - input. It exits `3` when medium/high-risk functions measured by Coverage Join - fall below `--coverage-min` (default `50`). Functions outside the supplied - `coverage.xml` scope are surfaced separately and do not trigger this gate. - The flag name is retained for CLI compatibility. + - `--diff-against` + - `--paths-from-git-diff` +- Contract errors use `CONTRACT ERROR:`. +- Gating failures use `GATING FAILURE:`. +- Internal errors use `fmt_internal_error` and include traceback only in debug mode. Refs: -- `codeclone/contracts.py:cli_help_epilog` -- `codeclone/ui_messages.py:fmt_contract_error` -- `codeclone/ui_messages.py:fmt_internal_error` +- `codeclone/contracts/__init__.py:cli_help_epilog` +- `codeclone/ui_messages/__init__.py:fmt_contract_error` +- `codeclone/ui_messages/__init__.py:fmt_internal_error` +- `codeclone/surfaces/cli/changed_scope.py:_validate_changed_scope_args` ## Invariants (MUST) -- Report writes (`--html/--json/--md/--sarif/--text`) are path-validated and write failures are contract errors. -- Bare reporting flags write to default deterministic paths under - `.cache/codeclone/`. -- `--open-html-report` requires `--html`; invalid combination is a contract error. -- `--timestamped-report-paths` requires at least one requested report output; invalid combination is a contract error. -- `--changed-only` requires either `--diff-against` or `--paths-from-git-diff`. -- `--diff-against` requires `--changed-only`. -- `--diff-against` and `--paths-from-git-diff` are mutually exclusive. -- Git diff refs are validated as safe single revision expressions before - subprocess execution. -- Browser-open failure after a successful HTML write is warning-only and does not change the process exit code. -- Baseline update write failure is contract error. -- In gating mode, unreadable source files are contract errors with higher priority than clone gating failure. -- Changed-scope flags do not create a second canonical report: they project clone - summary/threshold decisions over the changed-files subset after the normal full - analysis completes. +- Report writes are path-validated and write failures are contract errors. +- `--open-html-report` requires `--html`. +- `--timestamped-report-paths` requires at least one requested report output. +- `--changed-only` requires a diff source. +- Browser-open failure after successful HTML write is warning-only. +- In gating mode, unreadable source files are contract errors with higher priority than clone/metric gate failures. Refs: -- `codeclone/cli.py:_write_report_output` -- `codeclone/cli.py:_main_impl` +- `codeclone/surfaces/cli/reports_output.py:_validate_output_path` +- `codeclone/surfaces/cli/reports_output.py:_validate_report_ui_flags` +- `codeclone/surfaces/cli/workflow.py:_main_impl` ## Failure modes -| Condition | User-facing category | Exit | -|---------------------------------------------------------------------------|----------------------|------| -| Invalid CLI flag | contract | 2 | -| Invalid output extension/path | contract | 2 | -| `--open-html-report` without `--html` | contract | 2 | -| `--timestamped-report-paths` without reports | contract | 2 | -| `--changed-only` without diff source | contract | 2 | -| `--diff-against` without `--changed-only` | contract | 2 | -| `--diff-against` + `--paths-from-git-diff` | contract | 2 | -| Baseline untrusted in CI/gating | contract | 2 | -| Coverage/API regression gate without required metrics-baseline capability | contract | 2 | -| `--fail-on-untested-hotspots` without `--coverage` | contract | 2 | -| Invalid Cobertura XML without hotspot gating | warning only | 0 | -| Invalid Cobertura XML for coverage hotspot gating | contract | 2 | -| Unreadable source in CI/gating | contract | 2 | -| New clones with `--fail-on-new` | gating | 3 | -| Threshold exceeded | gating | 3 | -| Coverage hotspots with `--fail-on-untested-hotspots` | gating | 3 | -| Unexpected exception | internal | 5 | +| Condition | User-facing category | Exit | +|-------------------------------------------------------------------|----------------------|------| +| Invalid CLI flag | contract | `2` | +| Invalid output extension/path | contract | `2` | +| Invalid changed-scope flag combination | contract | `2` | +| Baseline untrusted in CI/gating | contract | `2` | +| Coverage/API regression gate without required baseline capability | contract | `2` | +| Unreadable source in CI/gating | contract | `2` | +| New clones with `--fail-on-new` | gating | `3` | +| Threshold or metrics gate exceeded | gating | `3` | +| Unexpected exception | internal | `5` | ## Determinism / canonicalization - Summary metric ordering is fixed. -- Compact summary mode (`--quiet`) is fixed-format text. +- Compact summary mode is fixed-format text. - Help epilog is generated from static constants. -- `git diff --name-only` input is normalized to sorted repo-relative paths before - changed-scope projection is applied. +- Git diff path inputs are normalized to sorted repo-relative paths. Refs: -- `codeclone/_cli_summary.py:_print_summary` -- `codeclone/contracts.py:EXIT_CODE_DESCRIPTIONS` +- `codeclone/surfaces/cli/summary.py:_print_summary` +- `codeclone/contracts/__init__.py:cli_help_epilog` +- `codeclone/surfaces/cli/changed_scope.py:_normalize_changed_paths` ## Locked by tests @@ -196,12 +113,5 @@ Refs: ## Non-guarantees -- Rich styling details are not part of machine-facing CLI contract. +- Rich styling details are not machine-facing contract. - Warning phrasing may evolve if category markers and exit semantics stay stable. - -## See also - -- [04-config-and-defaults.md](04-config-and-defaults.md) -- [20-mcp-interface.md](20-mcp-interface.md) -- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) -- [16-dead-code-contract.md](16-dead-code-contract.md) diff --git a/docs/book/10-html-render.md b/docs/book/10-html-render.md index b87d377..6978fea 100644 --- a/docs/book/10-html-render.md +++ b/docs/book/10-html-render.md @@ -2,128 +2,82 @@ ## Purpose -Document HTML rendering as a pure view layer over report data/facts. +Document HTML rendering as a pure view layer over canonical report data. ## Public surface -- Main renderer: `codeclone/html_report.py:build_html_report` -- HTML assembly package: `codeclone/_html_report/*` -- Overview materialization bridge: `codeclone/report/overview.py:materialize_report_overview` -- Escaping helpers: `codeclone/_html_escape.py` -- Snippet/highlight helpers: `codeclone/_html_snippets.py` -- Static template: `codeclone/templates.py:REPORT_TEMPLATE` +- Main renderer: `codeclone/report/html/assemble.py:build_html_report` +- Package entrypoint: `codeclone/report/html/__init__.py:build_html_report` +- Context shaping: `codeclone/report/html/_context.py` +- Escaping helpers: `codeclone/report/html/primitives/escape.py` +- Snippet/highlight helpers: `codeclone/report/html/widgets/snippets.py` +- Sections/widgets/assets: `codeclone/report/html/sections/*`, + `codeclone/report/html/widgets/*`, `codeclone/report/html/assets/*` ## Data model -Inputs to renderer: +Inputs to the renderer: -- canonical report document (`report_document`) when available (preferred path) -- compatibility inputs for direct rendering path: - - grouped clone data (`func_groups`, `block_groups`, `segment_groups`) - - block explainability facts (`block_group_facts`) - - novelty key sets (`new_function_group_keys`, `new_block_group_keys`) - - shared report metadata (`report_meta`) +- canonical `report_document` (preferred path) +- shared `report_meta` +- optional runtime snippet sources for code excerpts Output: -- single self-contained HTML string - -Refs: - -- `codeclone/html_report.py:build_html_report` +- one self-contained HTML string ## Contracts -- HTML must not recompute detection semantics; it renders facts from core/report layers. -- Explainability hints shown in UI are sourced from `build_block_group_facts` data. -- Provenance panel mirrors report metadata contract. -- HTML may expose local UX affordances such as the health-grade badge dialog - or provenance modal, but those actions are projections over already computed - report/meta facts. -- Overview UI is a report projection: - - KPI cards with baseline-aware tone (`✓ baselined` / `+N` regression) - - Health gauge with baseline delta arc (improvement/degradation) - - Executive Summary: issue breakdown (sorted bars) + source breakdown - - Hotspots by Directory: render-only view over `derived.overview.directory_hotspots` - - Health Profile: full-width radar chart of dimension scores - - Get Badge modal: grade-only / score+grade variants with shields.io embed -- Quality UI is also a report projection: - - deterministic subtabs for complexity, coupling, cohesion, overloaded - modules, and `Coverage Join` when canonical join facts exist - - `Coverage Join` uses the same stat-card and table patterns as other - quality surfaces; it separates measured coverage hotspots from coverage - scope gaps, and invalid joins render a factual unavailable state instead - of a success-style empty message -- Dead-code UI is a single top-level `Dead Code` tab with deterministic split - sub-tabs: `Active` and `Suppressed`. -- Clones UI may append a `Suppressed` sub-tab when canonical report data - includes `findings.groups.clones.suppressed.*`; those rows are factual - projections of policy-excluded clone groups such as `golden_fixture_paths` - and do not become active clone findings. -- IDE deep links: - - An IDE picker in the topbar lets users choose their IDE. The selection is - persisted in `localStorage` (key `codeclone-ide`). - - Supported IDEs: PyCharm, IntelliJ IDEA, VS Code, Cursor, Fleet, Zed. - - File paths across Clones, Quality, Suggestions, Dead Code, and Findings - tabs are rendered as `` elements with `data-file` - (absolute path) and `data-line` attributes. - - JetBrains IDEs use `jetbrains://` protocol (requires Toolbox); others use - native URL schemes (`vscode://`, `cursor://`, `fleet://`, `zed://`). - - The scan root is embedded as `data-scan-root` on `` so that - JetBrains links can derive the project name and relative path. - - When no IDE is selected, links are inert (no `href`, default cursor). +- HTML must not recompute detection semantics; it renders facts from report/core layers. +- Provenance panels mirror canonical report/meta facts. +- Overview, Quality, Suggestions, Dead Code, and Clones tabs are projections over canonical report sections. +- IDE deep links are HTML-only UX over canonical path/line facts. +- Missing snippets or optional meta fields render safe factual fallbacks rather than invented data. Refs: -- `codeclone/report/explain.py:build_block_group_facts` +- `codeclone/report/html/assemble.py:build_html_report` +- `codeclone/report/html/sections/_clones.py:_render_group_explanation` +- `codeclone/report/html/sections/_meta.py:render_meta_panel` +- `codeclone/report/html/assets/js.py:_IDE_LINKS` - `codeclone/report/overview.py:materialize_report_overview` -- `codeclone/_html_report/_sections/_clones.py:_render_group_explanation` -- `codeclone/_html_report/_sections/_meta.py:render_meta_panel` -- `codeclone/_html_js.py:_IDE_LINKS` -- `codeclone/_html_report/_assemble.py` (IDE picker topbar widget) ## Invariants (MUST) -- All user/content fields are escaped for text/attributes before insertion. +- User/content fields are escaped before insertion into HTML. - Missing file snippets render explicit fallback blocks. -- Novelty controls reflect baseline trust split note and per-group novelty flags. -- Suppressed dead-code rows are rendered only from report dead-code suppression - payloads and do not become active dead-code findings in UI tables. -- Structural finding cards may render a compact inline suggested action when a - low-signal local hint intentionally has no separate suggestion card. -- IDE link `data-file` and `data-line` attributes are escaped via - `_escape_html` before insertion into HTML. +- Novelty badges reflect baseline trust and per-group novelty flags. +- Suppressed dead-code rows render only from report suppression payloads. +- Path-link `data-file` and `data-line` attributes are escaped before insertion. Refs: -- `codeclone/_html_escape.py:_escape_html` -- `codeclone/_html_snippets.py:_render_code_block` -- `codeclone/_html_report/_sections/_clones.py:render_clones_panel` -- `codeclone/_html_report/_tables.py` (path cell IDE links) -- `codeclone/report/findings.py` (structural findings IDE links) +- `codeclone/report/html/primitives/escape.py:_escape_html` +- `codeclone/report/html/widgets/snippets.py:_render_code_block` +- `codeclone/report/html/widgets/tables.py` ## Failure modes -| Condition | Behavior | -|-------------------------------------|---------------------------------------------| -| Source file unreadable for snippet | Render fallback snippet with message | -| Missing/invalid optional meta field | Render empty or `(none)`-equivalent display | -| Pygments unavailable | Escape-only fallback code rendering | +| Condition | Behavior | +|-------------------------------------|----------------------------------------| +| Source file unreadable for snippet | Render fallback snippet with message | +| Missing/invalid optional meta field | Render empty or `(none)`-style display | +| Pygments unavailable | Escape-only fallback code rendering | Refs: -- `codeclone/_html_snippets.py:_FileCache.get_lines_range` -- `codeclone/_html_snippets.py:_try_pygments` +- `codeclone/report/html/widgets/snippets.py:_FileCache` +- `codeclone/report/html/widgets/snippets.py:_try_pygments` ## Determinism / canonicalization -- Section/group ordering follows sorted report inputs. +- Section and group ordering follow sorted canonical report inputs. - Metadata rows are built in fixed order. Refs: -- `codeclone/_html_report/_assemble.py:build_html_report` -- `codeclone/_html_report/_sections/_meta.py:render_meta_panel` +- `codeclone/report/html/assemble.py:build_html_report` +- `codeclone/report/html/sections/_meta.py:render_meta_panel` ## Locked by tests @@ -135,13 +89,5 @@ Refs: ## Non-guarantees -- CSS/visual system and interaction details may evolve without schema bump. -- HTML-only interaction affordances (theme toggle, IDE picker, provenance modal, - badge modal, radar chart) are not baseline/cache/report contracts. -- IDE deep link behavior depends on the user's local IDE installation and - protocol handler registration (e.g. JetBrains Toolbox for `jetbrains://`). -- Overview layout (KPI grid, executive summary, analytics) is a pure view - concern; only the underlying data identity and ordering are contract-sensitive. -- Direct `build_html_report(...)` compatibility paths without a canonical - `report_document` may omit `directory_hotspots`; HTML must not approximate - directory aggregates from suggestion cards. +- CSS, layout, and interaction details may evolve without a schema bump. +- IDE deep link behavior depends on local IDE installation and protocol handlers. diff --git a/docs/book/11-security-model.md b/docs/book/11-security-model.md index 548c817..ac9d3e7 100644 --- a/docs/book/11-security-model.md +++ b/docs/book/11-security-model.md @@ -7,10 +7,12 @@ Describe implemented protections and explicit security boundaries. ## Public surface - Scanner path validation: `codeclone/scanner.py:iter_py_files` -- File read limits and parser limits: `codeclone/cli.py:process_file`, `codeclone/extractor.py:_parse_limits` -- Baseline/cache validation: `codeclone/baseline.py`, `codeclone/cache.py` -- HTML escaping: `codeclone/_html_escape.py`, `codeclone/html_report.py` -- MCP read-only enforcement: `codeclone/mcp_service.py`, `codeclone/mcp_server.py` +- File read and parser limits: `codeclone/core/worker.py:process_file`, + `codeclone/analysis/parser.py:_parse_limits` +- Baseline/cache validation: `codeclone/baseline/*`, `codeclone/cache/*` +- HTML escaping: `codeclone/report/html/primitives/escape.py`, + `codeclone/report/html/assemble.py` +- MCP read-only enforcement: `codeclone/surfaces/mcp/*` ## Data model @@ -19,44 +21,39 @@ Security-relevant input classes: - filesystem paths (root/source/baseline/cache/report) - untrusted JSON files (baseline/cache) - untrusted source snippets and metadata rendered into HTML +- MCP request parameters (`root`, filters, diff refs, cache policy) ## Contracts - CodeClone parses source text; it does not execute repository Python code. - Sensitive root directories are blocked by scanner policy. -- Symlink traversal outside root is skipped. -- HTML report escapes text and attribute contexts before embedding. -- MCP server is read-only by design: no tool mutates source files, baselines, - cache, or report artifacts. -- `--allow-remote` guard must be passed explicitly for non-local transports; - default is local-only (`stdio`). -- `cache_policy=refresh` is rejected — MCP cannot trigger cache invalidation. -- Review markers (`mark_finding_reviewed`) are session-local in-memory state; - they are never persisted to disk or leaked into baselines/reports. -- `git_diff_ref` is validated as a safe single revision expression before any - `git diff` subprocess call. Leading option-like prefixes, whitespace/control - characters, and unsupported punctuation are rejected. -- Run history is bounded by `--history-limit` (default 10) to prevent - unbounded memory growth. +- Symlink traversal outside the root is skipped. +- HTML escapes text and attribute contexts before embedding. +- MCP is read-only by design: + no tool mutates source files, baselines, cache, or report artifacts. +- `--allow-remote` is required for non-local transports. +- `cache_policy=refresh` is rejected by MCP. +- Review markers are session-local in-memory state only. +- `git_diff_ref` is validated as a safe single revision expression before any `git diff` subprocess call. Refs: -- `codeclone/extractor.py:_parse_with_limits` +- `codeclone/analysis/parser.py:_parse_with_limits` - `codeclone/scanner.py:SENSITIVE_DIRS` - `codeclone/scanner.py:iter_py_files` -- `codeclone/_html_escape.py:_escape_html` +- `codeclone/report/html/primitives/escape.py:_escape_html` ## Invariants (MUST) - Baseline and cache integrity checks use constant-time comparison. - Size guards are enforced before parsing baseline/cache JSON. -- Cache failures degrade safely (warning + ignore), baseline trust failures follow trust model. +- Cache failures degrade safely; baseline trust failures follow the explicit trust model. Refs: -- `codeclone/baseline.py:Baseline.verify_integrity` -- `codeclone/cache.py:Cache.load` -- `codeclone/cli.py:_main_impl` +- `codeclone/baseline/clone_baseline.py:Baseline.verify_integrity` +- `codeclone/cache/store.py:Cache.load` +- `codeclone/surfaces/cli/workflow.py:_main_impl` ## Failure modes @@ -68,20 +65,20 @@ Refs: | Oversized cache | Cache ignored | | HTML-injected payload in metadata/source | Escaped output | | `--allow-remote` not passed for HTTP | Transport rejected | -| `cache_policy=refresh` requested | Policy rejected | +| `cache_policy=refresh` requested in MCP | Policy rejected | | `git_diff_ref` fails validation | Parameter rejected | ## Determinism / canonicalization - Canonical JSON hashing for baseline/cache prevents formatting-only drift. -- Security failures map to explicit statuses (baseline/cache enums). +- Security failures map to explicit statuses rather than silent mutation. Refs: -- `codeclone/baseline.py:_compute_payload_sha256` -- `codeclone/cache.py:_canonical_json` -- `codeclone/baseline.py:BaselineStatus` -- `codeclone/cache.py:CacheStatus` +- `codeclone/baseline/trust.py:_compute_payload_sha256` +- `codeclone/cache/integrity.py:canonical_json` +- `codeclone/baseline/trust.py:BaselineStatus` +- `codeclone/cache/versioning.py:CacheStatus` ## Locked by tests @@ -90,8 +87,8 @@ Refs: - `tests/test_security.py::test_html_report_escapes_user_content` - `tests/test_html_report.py::test_html_report_escapes_script_breakout_payload` - `tests/test_cache.py::test_cache_too_large_warns` -- `tests/test_mcp_service.py::test_cache_policy_refresh_rejected` -- `tests/test_mcp_server.py::test_allow_remote_guard` +- `tests/test_mcp_service.py::test_mcp_service_rejects_refresh_cache_policy_in_read_only_mode` +- `tests/test_mcp_server.py::test_mcp_server_main_rejects_non_loopback_host_without_opt_in` ## Non-guarantees diff --git a/docs/book/12-determinism.md b/docs/book/12-determinism.md index e0579f1..6209a41 100644 --- a/docs/book/12-determinism.md +++ b/docs/book/12-determinism.md @@ -6,9 +6,11 @@ Document deterministic behavior and canonicalization controls. ## Public surface -- Sorting and traversal: `codeclone/scanner.py`, `codeclone/report/serialize.py`, `codeclone/cache.py` -- Canonical hashing: `codeclone/baseline.py`, `codeclone/cache.py` -- Golden detector snapshot policy: `tests/test_detector_golden.py` +- Sorted file traversal: `codeclone/scanner.py` +- Canonical report construction: `codeclone/report/document/*` +- Deterministic text projection: `codeclone/report/renderers/text.py` +- Baseline hashing: `codeclone/baseline/trust.py` +- Cache signing: `codeclone/cache/integrity.py` ## Data model @@ -18,57 +20,56 @@ Deterministic outputs depend on: - fixed baseline/cache/report schemas - sorted file traversal - sorted group keys and item records -- canonical JSON serialization for hashes +- canonical JSON serialization for hashes/signatures ## Contracts -- JSON report uses deterministic ordering for files/groups/items. -- TXT report uses deterministic metadata key order and group/item ordering. +- Canonical JSON report uses deterministic ordering for files, groups, items, and summaries. +- Text/Markdown/SARIF projections are deterministic views over the canonical report. - Baseline hash is canonical and independent from non-payload metadata fields. - Cache signature is canonical and independent from JSON whitespace. Refs: -- `codeclone/report/json_contract.py:build_report_document` -- `codeclone/report/serialize.py:render_text_report_document` -- `codeclone/baseline.py:_compute_payload_sha256` -- `codeclone/cache_io.py:sign_cache_payload` +- `codeclone/report/document/builder.py:build_report_document` +- `codeclone/report/renderers/text.py:render_text_report_document` +- `codeclone/baseline/trust.py:_compute_payload_sha256` +- `codeclone/cache/integrity.py:sign_cache_payload` ## Invariants (MUST) - `inventory.file_registry.items` is lexicographically sorted. - finding groups/items and derived hotlists are deterministically ordered. -- Baseline clone lists are sorted and unique. -- Golden detector test runs only on canonical Python tag from fixture metadata. +- baseline clone lists are sorted and unique. +- golden detector fixtures run only on the canonical Python tag from fixture metadata. Refs: -- `codeclone/report/json_contract.py:_build_inventory_payload` -- `codeclone/baseline.py:_require_sorted_unique_ids` +- `codeclone/report/document/inventory.py:_build_inventory_payload` +- `codeclone/baseline/trust.py:_require_sorted_unique_ids` - `tests/test_detector_golden.py::test_detector_output_matches_golden_fixture` ## Failure modes -| Condition | Determinism impact | -|-------------------------------------|--------------------------------------------------------| -| Different Python tag | Clone IDs may differ; baseline considered incompatible | -| Unsorted/non-canonical baseline IDs | Baseline rejected as invalid | -| Cache signature mismatch | Cache ignored and recomputed | -| Different cache provenance state | `meta.cache_*` differs by design | +| Condition | Determinism impact | +|-------------------------------------|-----------------------------------------------------| +| Different Python tag | Clone IDs may differ; baseline becomes incompatible | +| Unsorted/non-canonical baseline IDs | Baseline rejected as invalid | +| Cache signature mismatch | Cache ignored and recomputed | +| Different cache provenance state | `meta.cache_*` differs by design | ## Determinism / canonicalization Primary canonicalization points: -- `json.dumps(..., sort_keys=True, separators=(",", ":"), ensure_ascii=False)` for baseline/cache payload - hash/signature. -- tuple-based sort keys for report record arrays. +- canonical JSON with sorted keys and compact separators for baseline/cache hashing +- stable tuple-based sort keys for report arrays and hotlists Refs: -- `codeclone/baseline.py:_compute_payload_sha256` -- `codeclone/cache_io.py:canonical_json` -- `codeclone/report/json_contract.py:_build_integrity_payload` +- `codeclone/baseline/trust.py:_compute_payload_sha256` +- `codeclone/cache/integrity.py:canonical_json` +- `codeclone/report/document/integrity.py:_build_integrity_payload` ## Locked by tests @@ -81,5 +82,4 @@ Refs: ## Non-guarantees - Determinism is not guaranteed across different `python_tag` values. -- Byte-identical reports are not guaranteed across different cache provenance - states (`cache_status`, `cache_used`, `cache_schema_version`). +- Byte-identical reports are not guaranteed across different cache provenance states. diff --git a/docs/book/14-compatibility-and-versioning.md b/docs/book/14-compatibility-and-versioning.md index a68bdd2..ac9186f 100644 --- a/docs/book/14-compatibility-and-versioning.md +++ b/docs/book/14-compatibility-and-versioning.md @@ -7,12 +7,16 @@ compatibility is enforced. ## Public surface -- Version constants: `codeclone/contracts.py` -- Baseline compatibility checks: `codeclone/baseline.py:Baseline.verify_compatibility` -- Metrics baseline compatibility checks: `codeclone/metrics_baseline.py:MetricsBaseline.verify_compatibility` -- Cache compatibility checks: `codeclone/cache.py:Cache.load` -- Report schema assignment: `codeclone/report/json_contract.py:build_report_document` -- MCP public surface: `codeclone/mcp_server.py`, `codeclone/mcp_service.py` +- Version constants: `codeclone/contracts/__init__.py` +- Clone baseline compatibility: + `codeclone/baseline/clone_baseline.py:Baseline.verify_compatibility` +- Metrics baseline compatibility: + `codeclone/baseline/metrics_baseline.py:MetricsBaseline.verify_compatibility` +- Cache compatibility: `codeclone/cache/store.py:Cache.load` +- Report schema assignment: + `codeclone/report/document/builder.py:build_report_document` +- MCP public surface: `codeclone/surfaces/mcp/server.py`, + `codeclone/surfaces/mcp/service.py` ## Data model @@ -22,142 +26,69 @@ Current contract versions: - `BASELINE_FINGERPRINT_VERSION = "1"` - `CACHE_VERSION = "2.5"` - `REPORT_SCHEMA_VERSION = "2.8"` -- `METRICS_BASELINE_SCHEMA_VERSION = "1.2"` (used only when metrics are stored - in a dedicated metrics-baseline file instead of the default unified baseline) +- `METRICS_BASELINE_SCHEMA_VERSION = "1.2"` Refs: -- `codeclone/contracts.py` +- `codeclone/contracts/__init__.py` ## Contracts Version bump rules: -- Bump **baseline schema** only for baseline JSON layout/type changes. -- Bump **fingerprint version** when clone key semantics change. -- Bump **cache schema** for cache wire-format/validation changes and for - cached-analysis semantic changes that would otherwise leave stale cache - entries looking compatible to runtime validation. -- Bump **report schema** for canonical report document contract changes - (`report_schema_version`, consumed by JSON/TXT/Markdown/SARIF and HTML provenance/view). -- Bump **metrics-baseline schema** only for dedicated metrics-baseline payload - changes. -- This schema does **not** imply that metrics normally live in a separate file: - the default runtime path is still the unified baseline file, and the - standalone metrics-baseline schema applies only when users opt into a - different metrics-baseline path. -- MCP does not currently define a separate schema/version constant; tool names, - resource shapes, and documented request/response semantics are therefore - package-versioned public surface and must be documented/tested when changed. -- Slimming or splitting MCP-only projections (for example, summary payloads or - `metrics` vs `metrics_detail`) does not change `report_schema_version` as long - as the canonical report document and finding identities remain unchanged. -- The same rule applies to finding-level MCP projection changes such as - short MCP ids, slim summary locations, or omitting `priority_factors` - outside `detail_level="full"`. -- Additive MCP-only convenience fields/projections such as - `cache.freshness`, production-first triage, `health_scope`, `focus`, or - `new_by_source_kind` also do not change - `report_schema_version` when they are derived from unchanged canonical report - and summary data. -- The same rule applies to bounded MCP semantic guidance such as - `help(topic=...)`: package-versioned wording and routing may evolve, but they - do not change `report_schema_version` as long as canonical report semantics - and finding identities remain unchanged. -- Canonical report changes such as `meta.analysis_thresholds.design_findings` - or threshold-aware design finding materialization do change - `report_schema_version` because they alter canonical report semantics and - integrity payload. -- The same is true for additive canonical metrics families such as - `metrics.families.overloaded_modules`, `coverage_adoption`, `api_surface`, - or `coverage_join`: even when the layer is report-only or current-run only, - it still changes canonical report schema and integrity payload, so it - requires a report-schema bump. -- The same rule applies to new canonical suppressed-finding buckets such as - `findings.groups.clones.suppressed.*`: even though they are non-active - review facts, they still change canonical report shape and integrity payload. -- CodeClone does not currently define a separate health-model version constant. - Health-score semantics are package-versioned and must be documented in the - Health Score chapter and release notes when they change. - -Baseline compatibility rules: - -- Runtime accepts baseline schema majors `1` and `2` with supported minors. -- Runtime writes current schema (`2.1`) on new/updated baseline saves. -- Embedded top-level `metrics` is valid only for baseline schema `>= 2.0`. -- Unified clone baselines may also embed top-level `api_surface` when metrics - baseline data is stored in the same file. -- Embedded and standalone `api_surface` snapshots now use compact symbol wire - layout (`local_name` relative to `module`, `filepath` relative to the - baseline directory when possible) while runtime reconstructs full canonical - qualnames and runtime filepaths before comparison. This is a schema change - for baseline `2.1` / metrics-baseline `1.2`, not a silent serialization - detail. -- Capability-sensitive metrics gates (for example adoption regression or API - break gating) must check for the required embedded data, not only the clone - baseline schema version. - -Metrics-baseline compatibility rules: - -- Runtime writes standalone metrics-baseline schema `1.2`. -- Runtime accepts standalone metrics-baseline `1.1` and `1.2`. -- When metrics are embedded into the unified clone baseline, the embedded - metrics section follows the clone baseline schema compatibility window - instead (`2.0` and `2.1` in the current runtime). - -Baseline regeneration rules: - -- Required when `fingerprint_version` changes. -- Required when `python_tag` changes. -- Not required for package patch/minor updates if compatibility gates still pass. +- bump **baseline schema** only for clone-baseline JSON layout/type changes +- bump **fingerprint version** when clone identity semantics change +- bump **cache schema** for cache wire-format or compatibility-semantics changes +- bump **report schema** for canonical report document shape/meaning changes +- bump **metrics-baseline schema** only for standalone metrics-baseline payload changes -## Health model evolution +Operational compatibility rules: + +- runtime writes baseline schema `2.1` +- runtime accepts clone baseline `1.0`, `2.0`, and `2.1` +- runtime writes standalone metrics-baseline schema `1.2` +- runtime accepts standalone metrics-baseline `1.1` and `1.2` +- runtime writes cache schema `2.5` +- MCP does not define a separate schema constant; tool/resource semantics are package-versioned public surface -Health Score is stable within a given scoring model, but the scoring model may -evolve across releases. +Baseline regeneration is required when: -New signal families may first appear as report-only or experimental layers. -After validation and contract hardening, selected layers may later be promoted -into scoring. +- `fingerprint_version` changes +- `python_tag` changes -Future CodeClone releases may expand the Health Score formula with additional -validated signal families. As a result, a repository's score may decrease after -upgrade even if the code itself did not become worse. In such cases, the change -reflects an evolved scoring model rather than a retroactive decline in code -quality. +It is not required for package patch/minor updates when compatibility gates still pass. -Short operational reminder: +## Health model evolution -> A lower score after upgrade may reflect a broader health model, not only -> worse code. +CodeClone does not currently define a separate health-model version constant. +Health semantics are package-versioned behavior and must be documented in: -Contract consequence: +- this chapter +- [15-health-score.md](15-health-score.md) +- release notes -- health-model expansion does not necessarily require a baseline/cache/report - schema bump; -- but it **does** require explicit documentation and release-note coverage, - because it changes user-visible scoring semantics. +A lower score after upgrade may reflect a broader scoring model, not only worse code. ## Invariants (MUST) -- Contract changes must include code updates and changelog/docs updates. -- Schema mismatches must map to explicit statuses. -- Legacy baseline payloads (<=1.3 layout) remain untrusted and require regeneration. +- Contract changes require code + tests + changelog/docs updates. +- Schema mismatches map to explicit statuses. +- Legacy baselines stay untrusted and require regeneration. Refs: -- `codeclone/baseline.py:BaselineStatus` -- `codeclone/baseline.py:_is_legacy_baseline_payload` +- `codeclone/baseline/trust.py:BaselineStatus` +- `codeclone/baseline/clone_baseline.py:_is_legacy_baseline_payload` ## Failure modes -| Change type | User impact | -|------------------------------|-----------------------------------------------------------------------| -| Baseline schema bump | older unsupported baseline schemas become untrusted until regenerated | -| Fingerprint bump | clone IDs change; baseline regeneration required | -| Cache schema bump | old caches are ignored and rebuilt automatically | -| Report schema bump | downstream report consumers must update | -| Metrics-baseline schema bump | dedicated metrics-baseline files must be regenerated | +| Change type | User impact | +|------------------------------|----------------------------------------------------------------| +| Baseline schema bump | Older unsupported baselines become untrusted until regenerated | +| Fingerprint bump | Clone IDs change; baseline regeneration required | +| Cache schema bump | Old caches are ignored and rebuilt automatically | +| Report schema bump | Downstream report consumers must update | +| Metrics-baseline schema bump | Dedicated metrics-baseline files must be regenerated | ## Determinism / canonicalization @@ -166,9 +97,9 @@ Refs: Refs: -- `codeclone/contracts.py` -- `codeclone/baseline.py:Baseline.verify_compatibility` -- `codeclone/metrics_baseline.py:MetricsBaseline.verify_compatibility` +- `codeclone/contracts/__init__.py` +- `codeclone/baseline/clone_baseline.py:Baseline.verify_compatibility` +- `codeclone/baseline/metrics_baseline.py:MetricsBaseline.verify_compatibility` ## Locked by tests @@ -180,7 +111,5 @@ Refs: ## Non-guarantees -- Backward compatibility is not guaranteed across incompatible schema/fingerprint - bumps. -- Health Score is not frozen forever as a mathematical formula; what is frozen - is the obligation to document scoring-model changes and present them honestly. +- Backward compatibility is not guaranteed across incompatible schema/fingerprint bumps. +- Health Score is not mathematically frozen forever; the obligation to document scoring-model changes is. diff --git a/docs/book/15-health-score.md b/docs/book/15-health-score.md index be7b71e..70a4751 100644 --- a/docs/book/15-health-score.md +++ b/docs/book/15-health-score.md @@ -2,131 +2,72 @@ ## Purpose -Define the current Health Score model, the report-only layers that do **not** -yet affect it, and the policy for future scoring-model expansion. - -Health Score is a user-facing contract. It is not just an internal aggregate. +Define the current Health Score model, what does not affect it yet, and the +policy for future scoring-model expansion. ## Public surface - Scoring model: `codeclone/metrics/health.py:compute_health` -- Weight assignment: `codeclone/contracts.py:HEALTH_WEIGHTS` -- Input wiring: `codeclone/pipeline.py:compute_project_metrics` +- Weight assignment: `codeclone/contracts/__init__.py:HEALTH_WEIGHTS` +- Input wiring: `codeclone/core/pipeline.py:compute_project_metrics` - Canonical report surface: - `codeclone/report/json_contract.py:build_report_document` -- Overview projection: - `codeclone/report/json_contract.py:_health_snapshot` + `codeclone/report/document/builder.py:build_report_document` +- Health snapshot projections: + `codeclone/report/derived.py:_health_snapshot`, + `codeclone/report/overview.py:_health_snapshot` - CLI / HTML / MCP consumers: - `codeclone/_cli_summary.py`, `codeclone/_html_report/_sections/_overview.py`, - `codeclone/mcp_service.py` + `codeclone/surfaces/cli/summary.py`, + `codeclone/report/html/sections/_overview.py`, + `codeclone/surfaces/mcp/session.py` ## Contracts - Health Score is computed only in `analysis_mode=full`. -- In `analysis_mode=clones_only`, health is intentionally unavailable rather - than fabricated from partial inputs. +- In `analysis_mode=clones_only`, health is intentionally unavailable. - The current scoring model includes exactly seven dimensions: `clones`, `complexity`, `coupling`, `cohesion`, `dead_code`, `dependencies`, `coverage`. -- Only dimensions produced by `compute_health(...)` contribute to the score. -- Report-only or advisory layers must not affect the score until they are - explicitly promoted into the scoring model and documented. +- Report-only or advisory layers must not affect the score until they are explicitly promoted and documented. ## What currently affects Health Score -Current weights from `codeclone/contracts.py:HEALTH_WEIGHTS`: +Current weights from `codeclone/contracts/__init__.py:HEALTH_WEIGHTS`: -| Dimension | Weight | Current inputs in code | Signal type | Visible report/UI surface | -|--------------|--------|--------------------------------------------------------------------------------------|----------------------------------|------------------------------------------------------------------------------------------| -| Clones | 25% | function clone groups + block clone groups, normalized by `files_analyzed_or_cached` | aggregate project-level | `metrics.families.health.summary.dimensions.clones`, HTML `Health Profile`, CLI, MCP | -| Complexity | 20% | `complexity_avg`, `complexity_max`, `high_risk_functions` | local findings -> aggregate | `metrics.families.health.summary.dimensions.complexity`, design findings, HTML, CLI, MCP | -| Cohesion | 15% | `cohesion_avg`, `low_cohesion_classes` | local findings -> aggregate | `metrics.families.health.summary.dimensions.cohesion`, design findings, HTML, CLI, MCP | -| Coupling | 10% | `coupling_avg`, `coupling_max`, `high_risk_classes` | local findings -> aggregate | `metrics.families.health.summary.dimensions.coupling`, design findings, HTML, CLI, MCP | -| Dead code | 10% | count of active dead-code items after suppression and non-actionable filtering | local findings -> aggregate | `metrics.families.dead_code`, health dimensions, HTML, CLI, MCP | -| Dependencies | 10% | `dependency_cycles`, `dependency_max_depth` | aggregate graph-level | `metrics.families.dependencies`, health dimensions, HTML, CLI, MCP | -| Coverage | 10% | `files_analyzed_or_cached / files_found` | aggregate inventory-completeness | `metrics.families.health.summary.dimensions.coverage`, HTML `Health Profile`, MCP | +| Dimension | Weight | Signal | +|--------------|--------|------------------------------------------------------------------| +| Clones | 25% | Function + block clone density | +| Complexity | 20% | Function-level complexity risk | +| Cohesion | 15% | Low-cohesion class pressure | +| Coupling | 10% | Class-level coupling pressure | +| Dead code | 10% | Active dead-code items after suppression/filtering | +| Dependencies | 10% | Cycles and deep dependency chains | +| Coverage | 10% | Analysis completeness (`files_analyzed_or_cached / files_found`) | Important clarifications: -- `coverage` here means **analysis completeness**, not test coverage. -- The clone dimension currently uses only **function** and **block** clone - groups. Segment groups are visible in reports, but they do not currently feed - Health Score. -- Dead-code penalties use active dead-code items returned by - `find_unused(...)`. Suppressed or non-actionable candidates do not penalize - the score. -- Dependency pressure currently penalizes cycles directly and only penalizes - dependency depth beyond the safe zone (`max_depth > 6`). - -## Explainability intent - -The current health model is deterministic and explainable by design: - -- every scoring dimension is derived from explicit inputs already present in the - pipeline and canonical report; -- the canonical report exposes the score and per-dimension breakdown under - `metrics.families.health.summary`; -- overview/report projections may summarize the result, but they must not invent - extra health heuristics outside the scoring model. +- `coverage` here means analysis completeness, not test coverage. +- Segment clones are visible in reports but do not currently affect Health Score. +- Suppressed or non-actionable dead-code items do not penalize the score. ## Current non-scoring layers -The following layers are visible today but do **not** currently affect Health -Score: - -### Overloaded Modules - -`Overloaded Modules` is currently a report-only experimental layer. - -- It surfaces module-level hotspots derived from implementation burden and - dependency pressure. -- It is visible in `metrics.families.overloaded_modules`, HTML, Markdown/TXT, and MCP - `metrics_detail(family="overloaded_modules")`. -- It does not currently affect Health Score, gates, baseline novelty, or SARIF. -- It is **not** a restatement of cyclomatic complexity: complexity highlights - local control-flow hotspots, while Overloaded Modules highlights module-level - responsibility overload and dependency pressure. +Visible but non-scoring: -### Other visible non-scoring layers - -- `findings.groups.clones.segments` — canonical report-only segment-clone layer; - visible for review, excluded from baseline diff/gating/health. -- `findings.groups.structural.groups` — report-only structural findings; - visible as evidence/advisory material, excluded from health. -- `derived.suggestions` and `derived.hotlists` — advisory and routing - projections; never scoring inputs. +- `metrics.families.overloaded_modules` +- `findings.groups.clones.segments` +- `findings.groups.structural.groups` +- `derived.suggestions` +- `derived.hotlists` +- `metrics.families.coverage_join` ## Health model evolution -Health Score is stable within a given scoring model, but the model may evolve -across releases. - -New signal families may first appear as report-only or experimental layers. -After validation and contract hardening, selected layers may later be -introduced into scoring. - -Future CodeClone releases may expand the Health Score formula with additional -validated signal families. As a result, a repository's score may decrease after -upgrade even if the code itself did not become worse. In such cases, the change -reflects an evolved scoring model rather than a retroactive decline in code -quality. - -Promotion rules for a new scoring input: - -- the signal must be deterministic and stable enough for canonical reporting; -- the signal must be explainable in terms of explicit inputs and visible output; -- the signal must be validated on real repositories, not only synthetic cases; -- the change must be documented in release notes and in Health Score docs; -- MCP/HTML/CLI surfaces must continue to present the score honestly after the - expansion. - -Current versioning note: +Future releases may expand the score with additional validated signal families. +If that happens: -- CodeClone does **not** currently define a separate health-model version - constant. -- Health semantics are package-versioned public behavior and must therefore be - documented in this chapter, in compatibility notes, and in release notes when - they change. +- the change must be documented in this chapter and release notes +- CLI/HTML/MCP must continue to present the score honestly +- a lower score after upgrade may reflect a broader model, not only worse code ## Locked by tests @@ -140,4 +81,3 @@ Current versioning note: - [08-report.md](08-report.md) - [14-compatibility-and-versioning.md](14-compatibility-and-versioning.md) - [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) -- [16-dead-code-contract.md](16-dead-code-contract.md) diff --git a/docs/book/15-metrics-and-quality-gates.md b/docs/book/15-metrics-and-quality-gates.md index 5817088..e5fe4b9 100644 --- a/docs/book/15-metrics-and-quality-gates.md +++ b/docs/book/15-metrics-and-quality-gates.md @@ -6,11 +6,12 @@ Define metrics mode selection, metrics-baseline behavior, and gating semantics. ## Public surface -- Metrics mode wiring: `codeclone/cli.py:_configure_metrics_mode` -- Main orchestration and exit routing: `codeclone/cli.py:_main_impl` -- Gate evaluation: `codeclone/pipeline.py:metric_gate_reasons`, - `codeclone/pipeline.py:gate` -- Metrics baseline persistence/diff: `codeclone/metrics_baseline.py:MetricsBaseline` +- Metrics mode wiring: `codeclone/surfaces/cli/runtime.py:_configure_metrics_mode` +- Main orchestration and exit routing: `codeclone/surfaces/cli/workflow.py:_main_impl` +- Gate evaluation: `codeclone/report/gates/evaluator.py:metric_gate_reasons`, + `codeclone/core/reporting.py:gate` +- Metrics baseline persistence/diff: + `codeclone/baseline/metrics_baseline.py:MetricsBaseline` ## Data model @@ -20,9 +21,8 @@ Metrics gate inputs: `--fail-complexity`, `--fail-coupling`, `--fail-cohesion`, `--fail-health` - adoption threshold gates: `--min-typing-coverage`, `--min-docstring-coverage` -- external Cobertura coverage join: - `--coverage FILE`, `--coverage-min PERCENT`, - `--fail-on-untested-hotspots` +- current-run Cobertura coverage join: + `--coverage`, `--coverage-min`, `--fail-on-untested-hotspots` - boolean structural gates: `--fail-cycles`, `--fail-dead-code` - baseline-aware delta gates: @@ -39,88 +39,45 @@ Modes: - `analysis_mode=full`: metrics computed and suggestions enabled - `analysis_mode=clones_only`: metrics skipped -- Health-score semantics are defined in - [15-health-score.md](15-health-score.md). -- Metrics comparison state is unified by default: unless `--metrics-baseline` - is explicitly redirected, metrics baseline data comes from the same - `codeclone.baseline.json` path as clone baseline data. Refs: -- `codeclone/cli.py:_metrics_flags_requested` -- `codeclone/cli.py:_metrics_computed` -- `codeclone/_cli_meta.py:_build_report_meta` +- `codeclone/surfaces/cli/runtime.py:_metrics_flags_requested` +- `codeclone/surfaces/cli/runtime.py:_metrics_computed` +- `codeclone/surfaces/cli/report_meta.py:_build_report_meta` - `codeclone/metrics/health.py:compute_health` -- `codeclone/contracts.py:HEALTH_WEIGHTS` +- `codeclone/contracts/__init__.py:HEALTH_WEIGHTS` ## Contracts -- `--skip-metrics` is incompatible with metrics gating/update flags and is a - contract error. -- `golden_fixture_paths` is a separate project-level clone policy: - clone groups fully contained in matching `tests/` / `tests/fixtures/` paths - are excluded before health/gate/suggestion evaluation, but remain visible as - suppressed report facts. -- If metrics are not explicitly requested and no metrics baseline exists, - runtime auto-enables clone-only mode (`skip_metrics=true`). -- In clone-only mode: - `skip_dead_code=true`, `skip_dependencies=true`. -- `--fail-dead-code` forces dead-code analysis on (even if metrics are skipped). -- `--fail-cycles` forces dependency analysis on (even if metrics are skipped). -- Type/docstring adoption metrics are computed in full mode. -- `--coverage` joins an external Cobertura XML file to current-run function - spans with stdlib XML parsing only. This signal is not metrics-baseline truth, - is not written to `codeclone.baseline.json`, and does not affect fingerprint - or clone identity semantics. -- Invalid Cobertura XML downgrades to a current-run - `coverage_join.status="invalid"` signal in normal analysis. It does not fail - the run or update any baseline; only `--fail-on-untested-hotspots` upgrades - invalid input into a contract error. -- `--api-surface` is opt-in in normal runs, but runtime auto-enables it when - `--fail-on-api-break` or `--update-metrics-baseline` needs a public API - snapshot. -- In the normal CLI `Metrics` block, adoption coverage is shown whenever metrics - are computed, and the public API surface line appears when `api_surface` - facts were collected. A coverage line appears when `--coverage` produced a - joined coverage summary. -- `--update-baseline` in full mode implies metrics-baseline update in the same - run. -- If metrics baseline path equals clone baseline path and clone baseline file is - missing, `--update-metrics-baseline` escalates to `--update-baseline` so - embedded metrics can be written safely. -- `--fail-on-new-metrics` requires trusted metrics baseline unless baseline is - being updated in the same run. -- `--fail-on-typing-regression` / `--fail-on-docstring-regression` require a - metrics baseline that already contains adoption coverage data. -- `--fail-on-api-break` requires a metrics baseline that already contains - `api_surface` data. -- `--fail-on-untested-hotspots` requires `--coverage` and a valid Cobertura XML - input. It evaluates current-run `coverage_join` facts only for measured - medium/high-risk functions below the configured threshold; scope gaps are - surfaced separately and do not require or update a metrics baseline. The - flag name is retained for CLI compatibility. -- In CI mode, if metrics baseline was loaded and trusted, runtime enables - `fail_on_new_metrics=true`. +- `--skip-metrics` is incompatible with metrics gating/update flags. +- If metrics are not explicitly requested and no metrics baseline exists, runtime may auto-enable clone-only mode. +- In clone-only mode, dead-code and dependency analysis are skipped unless explicitly forced by gates. +- `--coverage` is a current-run signal only; it does not update baseline state. +- Invalid Cobertura XML becomes `coverage_join.status="invalid"` in normal runs and becomes a contract error only when + hotspot gating requires a valid join. +- `--api-surface` is opt-in, but runtime auto-enables it when API break gating or metrics-baseline update needs it. +- `--fail-on-new-metrics` requires a trusted metrics baseline unless baseline is being updated in the same run. +- `--fail-on-typing-regression`, `--fail-on-docstring-regression`, and `--fail-on-api-break` require the corresponding + capability in the trusted metrics baseline. +- In CI mode, if a trusted metrics baseline is loaded, runtime enables `fail_on_new_metrics=true`. Refs: -- `codeclone/cli.py:_configure_metrics_mode` -- `codeclone/cli.py:_main_impl` -- `codeclone/metrics_baseline.py:MetricsBaseline.verify_compatibility` +- `codeclone/surfaces/cli/runtime.py:_configure_metrics_mode` +- `codeclone/surfaces/cli/workflow.py:_main_impl` +- `codeclone/baseline/metrics_baseline.py:MetricsBaseline.verify_compatibility` ## Invariants (MUST) -- Metrics diff is computed only when: - metrics were computed and metrics baseline is trusted. -- Metric gate reasons are emitted in deterministic order: - threshold checks -> cycles/dead/health -> NEW-vs-baseline diffs -> - adoption/API baseline diffs -> coverage-join hotspot gate. -- Metric gate reasons are namespaced as `metric:*` in gate output. +- Metrics diff is computed only when metrics were computed and metrics baseline is trusted. +- Gate reasons are emitted in deterministic order. +- Metric gate reasons are namespaced as `metric:*`. Refs: -- `codeclone/pipeline.py:metric_gate_reasons` -- `codeclone/pipeline.py:gate` +- `codeclone/report/gates/evaluator.py:metric_gate_reasons` +- `codeclone/core/reporting.py:gate` ## Failure modes @@ -132,8 +89,7 @@ Refs: | Invalid Cobertura XML without hotspot gate | Current-run invalid signal, exit `0` | | Coverage hotspot gate without valid `--coverage` input | Contract error, exit `2` | | `--update-metrics-baseline` when metrics were not computed | Contract error, exit `2` | -| Threshold breach or NEW-vs-baseline metric regressions | Gating failure, exit `3` | -| Coverage hotspots from current-run coverage join | Gating failure, exit `3` | +| Threshold breach or metrics regressions | Gating failure, exit `3` | ## Determinism / canonicalization @@ -143,9 +99,9 @@ Refs: Refs: -- `codeclone/metrics_baseline.py:snapshot_from_project_metrics` -- `codeclone/metrics_baseline.py:_compute_payload_sha256` -- `codeclone/metrics_baseline.py:MetricsBaseline.verify_integrity` +- `codeclone/baseline/_metrics_baseline_payload.py:snapshot_from_project_metrics` +- `codeclone/baseline/_metrics_baseline_payload.py:_compute_payload_sha256` +- `codeclone/baseline/metrics_baseline.py:MetricsBaseline.verify_integrity` ## Locked by tests @@ -160,16 +116,4 @@ Refs: ## Non-guarantees - Absolute threshold defaults are not frozen by this chapter. -- Metrics scoring internals, per-dimension weighting, and the exact clone - density curve may evolve if exit semantics and contract statuses stay stable. - See [15-health-score.md](15-health-score.md) for the current model and the - phased expansion policy. - -## See also - -- [15-health-score.md](15-health-score.md) -- [04-config-and-defaults.md](04-config-and-defaults.md) -- [05-core-pipeline.md](05-core-pipeline.md) -- [09-cli.md](09-cli.md) -- [16-dead-code-contract.md](16-dead-code-contract.md) -- [17-suggestions-and-clone-typing.md](17-suggestions-and-clone-typing.md) +- Metrics scoring internals may evolve if exit semantics and contract statuses stay stable and are documented honestly. diff --git a/docs/book/16-dead-code-contract.md b/docs/book/16-dead-code-contract.md index 118180b..c4c3805 100644 --- a/docs/book/16-dead-code-contract.md +++ b/docs/book/16-dead-code-contract.md @@ -10,9 +10,9 @@ Define dead-code liveness rules, canonical symbol-usage boundaries, and gating s - Test-path classifier: `codeclone/paths.py:is_test_filepath` - Inline suppression parser/binder: `codeclone/suppressions.py` - Extraction of referenced names/candidates: - `codeclone/extractor.py:extract_units_and_stats_from_source` + `codeclone/analysis/units.py:extract_units_and_stats_from_source` - Cache load boundary for referenced names: - `codeclone/pipeline.py:_load_cached_metrics` + `codeclone/core/discovery_cache.py:load_cached_metrics_extended` ## Data model @@ -62,7 +62,7 @@ Refs: - `codeclone/metrics/dead_code.py:_is_non_actionable_candidate` - `codeclone/metrics/dead_code.py:find_unused` -- `codeclone/pipeline.py:metric_gate_reasons` +- `codeclone/report/gates/evaluator.py:metric_gate_reasons` ## Invariants (MUST) @@ -74,8 +74,8 @@ Refs: Refs: - `codeclone/metrics/dead_code.py:find_unused` -- `codeclone/extractor.py:extract_units_and_stats_from_source` -- `codeclone/pipeline.py:_load_cached_metrics` +- `codeclone/analysis/units.py:extract_units_and_stats_from_source` +- `codeclone/core/discovery_cache.py:load_cached_metrics_extended` ## Failure modes @@ -104,9 +104,9 @@ Refs: ## Locked by tests - `tests/test_extractor.py::test_dead_code_marks_symbol_dead_when_referenced_only_by_tests` -- `tests/test_extractor.py::test_dead_code_skips_module_pep562_hooks` -- `tests/test_extractor.py::test_dead_code_applies_inline_suppression_per_declaration` -- `tests/test_extractor.py::test_dead_code_suppression_binding_is_scoped_to_target_symbol` +- `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[skip_pep562_hooks]` +- `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[inline_suppression_per_declaration]` +- `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[suppression_binding_scoped_to_target]` - `tests/test_extractor.py::test_extract_collects_referenced_qualnames_for_import_aliases` - `tests/test_extractor.py::test_collect_dead_candidates_skips_protocol_and_stub_like_symbols` - `tests/test_pipeline_metrics.py::test_load_cached_metrics_ignores_referenced_names_from_test_files` @@ -118,7 +118,7 @@ Refs: - `tests/test_report.py::test_report_json_dead_code_suppressed_items_are_reported_separately` - `tests/test_html_report.py::test_html_report_renders_dead_code_split_with_suppressed_layer` - `tests/test_suppressions.py::test_extract_suppression_directives_supports_inline_and_leading_forms` -- `tests/test_suppressions.py::test_bind_suppressions_applies_only_to_adjacent_declaration_line` +- `tests/test_suppressions.py::test_bind_suppressions_targets_expected_declaration_scope[adjacent_leading_only]` ## Non-guarantees diff --git a/docs/book/17-suggestions-and-clone-typing.md b/docs/book/17-suggestions-and-clone-typing.md index a7eebeb..b9dc7ad 100644 --- a/docs/book/17-suggestions-and-clone-typing.md +++ b/docs/book/17-suggestions-and-clone-typing.md @@ -2,37 +2,38 @@ ## Purpose -Define deterministic clone-type classification and suggestion generation -contracts used by canonical report projections (`JSON` / `TXT` / `Markdown` / -`HTML`). +Define deterministic clone-type classification and suggestion generation used by +canonical report projections. ## Public surface - Clone-type classifier: `codeclone/report/suggestions.py:classify_clone_type` - Suggestion engine: `codeclone/report/suggestions.py:generate_suggestions` -- Pipeline integration: `codeclone/pipeline.py:compute_suggestions` -- Report serialization: `codeclone/report/json_contract.py:build_report_document` -- HTML render integration: `codeclone/html_report.py:build_html_report` +- Pipeline integration: `codeclone/core/pipeline.py:compute_suggestions` +- Report serialization: `codeclone/report/document/builder.py:build_report_document` +- HTML render integration: `codeclone/report/html/assemble.py:build_html_report` ## Data model Suggestion shape: -- `severity`: `critical|warning|info` -- `category`: - `clone|structural|complexity|coupling|cohesion|dead_code|dependency` -- `source_kind`: source classification of the primary location - (`production` / `tests` / `fixtures` / `other`) -- `title`, `location`, `steps`, `effort`, `priority` +- `severity` +- `category` +- `source_kind` +- `title` +- `location` +- `steps` +- `effort` +- `priority` Clone typing: - function groups: - Type-1: identical `raw_hash` - Type-2: identical normalized `fingerprint` - - Type-3: mixed fingerprints (same group semantics) + - Type-3: mixed fingerprints inside same group semantics - Type-4: fallback -- block/segment groups: Type-4 +- block and segment groups: Type-4 Refs: @@ -41,36 +42,26 @@ Refs: ## Contracts -- Suggestions are generated only in full metrics mode - (`skip_metrics=false`). +- Suggestions are generated only in full metrics mode. - Suggestions are advisory only and never directly control exit code. -- Suggestions are not a one-to-one mirror of findings. They should exist only - when they add action structure beyond the canonical finding itself. -- Low-signal local structural `info` hints stay in `findings` and do not emit a - separate suggestion card. -- SARIF projection is finding-driven and does not consume suggestion cards. -- JSON report stores clone typing at group level: - - `findings.groups.clones.[*].clone_type` -- Suggestion location is deterministic: first item by stable path/line sort. +- Suggestions are not a one-to-one mirror of findings; they exist only when they add action structure. +- Low-signal local structural info hints stay in findings and do not emit separate suggestion cards. +- SARIF remains finding-driven and does not consume suggestion cards. +- JSON report stores clone typing at group level under clone groups. Refs: -- `codeclone/pipeline.py:analyze` -- `codeclone/pipeline.py:gate` -- `codeclone/report/json_contract.py:build_report_document` +- `codeclone/core/pipeline.py:analyze` +- `codeclone/core/pipeline.py:compute_suggestions` +- `codeclone/report/document/builder.py:build_report_document` - `codeclone/report/suggestions.py:generate_suggestions` ## Invariants (MUST) -- Suggestion priority formula is stable: - `severity_weight / effort_weight`. -- For structural findings, separate suggestion cards are emitted only for the - actionable subset; low-signal local `info` hints remain finding-only. -- Suggestion output is sorted by: - `(-priority, severity, category, source_kind, location, title, subject_key)`. -- Derived suggestion serialization in report JSON applies deterministic ordering by - `(-priority, severity_rank, title, finding_id)`. -- Clone type output for a given group is deterministic for identical inputs. +- Suggestion priority formula is stable. +- Structural suggestion cards are emitted only for the actionable subset. +- Suggestion output is deterministically sorted. +- Clone type output for identical inputs is deterministic. Refs: @@ -87,14 +78,13 @@ Refs: ## Determinism / canonicalization -- Classifier uses deterministic set normalization + sorted collections. -- Serializer emits suggestions in generator-provided deterministic order. +- Classifier uses deterministic set normalization and sorted collections. +- Serializer emits suggestions in deterministic order. Refs: - `codeclone/report/suggestions.py:classify_clone_type` -- `codeclone/report/suggestions.py:generate_suggestions` -- `codeclone/report/json_contract.py:build_report_document` +- `codeclone/report/document/builder.py:build_report_document` ## Locked by tests @@ -105,13 +95,5 @@ Refs: ## Non-guarantees -- Suggestion wording can evolve without schema bump. -- Suggestion heuristics may be refined if deterministic ordering and - non-gating behavior remain unchanged. - -## See also - -- [05-core-pipeline.md](05-core-pipeline.md) -- [08-report.md](08-report.md) -- [10-html-render.md](10-html-render.md) -- [15-metrics-and-quality-gates.md](15-metrics-and-quality-gates.md) +- Suggestion wording can evolve without a schema bump. +- Suggestion heuristics may be refined if deterministic ordering and non-gating behavior remain unchanged. diff --git a/docs/book/18-benchmarking.md b/docs/book/18-benchmarking.md index cb49d41..0fdfc08 100644 --- a/docs/book/18-benchmarking.md +++ b/docs/book/18-benchmarking.md @@ -62,7 +62,7 @@ Benchmark output (`benchmark_schema_version=1.0`) contains: Refs: -- `codeclone/report/json_contract.py:_build_integrity_payload` +- `codeclone/report/document/integrity.py:_build_integrity_payload` - `benchmarks/run_benchmark.py` ## Recommended run profile diff --git a/docs/book/19-inline-suppressions.md b/docs/book/19-inline-suppressions.md index b051172..1062d5c 100644 --- a/docs/book/19-inline-suppressions.md +++ b/docs/book/19-inline-suppressions.md @@ -11,7 +11,7 @@ source comments, without introducing broad/project-wide ignores. - Dead-code final filter: `codeclone/metrics/dead_code.py:find_unused` - Suppressed dead-code projection helper: `codeclone/metrics/dead_code.py:find_suppressed_unused` -- Dead-code candidate extraction: `codeclone/extractor.py:_collect_dead_candidates` +- Dead-code candidate extraction: `codeclone/analysis/_module_walk.py:_collect_dead_candidates` ## Data model @@ -78,18 +78,18 @@ Refs: - `codeclone/suppressions.py:extract_suppression_directives` - `codeclone/suppressions.py:bind_suppressions_to_declarations` -- `codeclone/cache.py:_canonicalize_cache_entry` +- `codeclone/cache/_canonicalize.py:_canonicalize_cache_entry` ## Locked by tests - `tests/test_suppressions.py::test_extract_suppression_directives_supports_inline_and_leading_forms` -- `tests/test_suppressions.py::test_extract_suppression_directives_ignores_unknown_and_malformed_safely` -- `tests/test_suppressions.py::test_bind_suppressions_applies_only_to_adjacent_declaration_line` -- `tests/test_suppressions.py::test_bind_suppressions_does_not_propagate_class_inline_to_method` -- `tests/test_suppressions.py::test_bind_suppressions_applies_to_method_target` +- `tests/test_suppressions.py::test_extract_suppression_directives_ignores_invalid_forms[unknown_and_malformed]` +- `tests/test_suppressions.py::test_bind_suppressions_targets_expected_declaration_scope[adjacent_leading_only]` +- `tests/test_suppressions.py::test_bind_suppressions_targets_expected_declaration_scope[class_inline_does_not_propagate]` +- `tests/test_suppressions.py::test_bind_suppressions_targets_expected_declaration_scope[method_target]` - `tests/test_suppressions.py::test_build_suppression_index_deduplicates_rules_stably` -- `tests/test_extractor.py::test_dead_code_applies_inline_suppression_per_declaration` -- `tests/test_extractor.py::test_dead_code_suppression_binding_is_scoped_to_target_symbol` +- `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[inline_suppression_per_declaration]` +- `tests/test_extractor.py::test_dead_code_respects_runtime_hooks_and_inline_suppressions[suppression_binding_scoped_to_target]` - `tests/test_metrics_modules.py::test_find_unused_applies_inline_dead_code_suppression` - `tests/test_metrics_modules.py::test_find_suppressed_unused_returns_actionable_suppressed_candidates` - `tests/test_report.py::test_report_json_dead_code_suppressed_items_are_reported_separately` diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md index 2401070..d059dc3 100644 --- a/docs/book/20-mcp-interface.md +++ b/docs/book/20-mcp-interface.md @@ -4,348 +4,145 @@ Define the current public MCP surface in the `2.0` beta line. -This interface is **optional** (installed via the `mcp` extra). It exposes -the deterministic analysis pipeline as a **read-only MCP server** for AI agents -and MCP-capable clients. It does not replace the CLI or the canonical report -contract. +The MCP layer is optional, read-only, and built on the same canonical +pipeline/report contracts as the CLI. It does not create a second analysis +engine or a second persistence model. ## Public surface -- Package extra: `codeclone[mcp]` -- MCP launcher: `codeclone-mcp` -- MCP server: `codeclone/mcp_server.py` -- MCP service adapter: `codeclone/mcp_service.py` +- package extra: `codeclone[mcp]` +- launcher: `codeclone-mcp` +- server wiring: `codeclone/surfaces/mcp/server.py` +- in-process service/session: `codeclone/surfaces/mcp/service.py`, + `codeclone/surfaces/mcp/session.py` -## Data model +## Shape Current server characteristics: -- optional dependency; base `codeclone` install does not require `mcp` +- optional dependency; base `codeclone` install does not require MCP runtime - transports: - - `stdio` - - `streamable-http` + - `stdio` + - `streamable-http` - run storage: - - in-memory only - - bounded history (`--history-limit`, default `4`, maximum `10`) - - latest-run pointer for `codeclone://latest/...` resources - - the `latest` pointer moves whenever a newer `analyze_*` call registers a run -- run identity: - - canonical run identity is derived from the canonical report integrity digest - - MCP payloads expose a short `run_id` handle (first 8 hex chars) - - MCP tools/resources accept both short and full run ids - - MCP finding ids are compact by default and may lengthen when needed to - stay unique within a run + - in-memory only + - bounded by `--history-limit` + - latest-run pointer is process-local +- roots: + - analysis tools require an absolute repository root + - relative roots such as `.` are rejected - analysis modes: - - `full` - - `clones_only` -- process-count policy: - - `processes` is an optional override - - when omitted, MCP defers to the core CodeClone runtime -- initialize metadata: - - `serverInfo.version` reflects the CodeClone package version - - clients may use it for compatibility checks -- root contract: - - analysis tools require an absolute repository root - - relative roots such as `.` are rejected in MCP because server cwd may - differ from the client workspace - - granular `check_*` tools may omit `root` and use the latest compatible - stored run; if `root` is provided, it must also be absolute + - `full` + - `clones_only` - cache policies: - - `reuse` - - `off` - `refresh` is rejected in MCP because the server is read-only. -- summary payload: - - `run_id`, `version`, `schema`, `mode`, compact `analysis_profile` - - `health_scope` explains what the health score covers - - `focus` explains the active summary/triage lens - - `baseline`, `metrics_baseline`, `cache` - - untrusted baseline comparisons stay compact but explicit through - `baseline.compared_without_valid_baseline`, - `baseline.baseline_python_tag`, and `baseline.runtime_python_tag` - - `cache.freshness` classifies summary cache reuse as `fresh`, `mixed`, - or `reused` - - flattened `inventory` (`files`, `lines`, `functions`, `classes`) - - flattened `findings` (`total`, `new`, `known`, `by_family`, `production`, - `new_by_source_kind`) - - flattened `diff` (`new_clones`, `health_delta`, - `typing_param_permille_delta`, `typing_return_permille_delta`, - `docstring_permille_delta`, `api_breaking_changes`, `new_api_symbols`) - - optional `coverage_join` when an analysis request included - `coverage_xml` (`status`, `overall_permille`, `coverage_hotspots`, - `scope_gap_hotspots`, `hotspot_threshold_percent`) - - `warnings`, `failures` - - `analyze_changed_paths` is intentionally more compact than `get_run_summary`: - it returns `changed_files`, compact `baseline`, `focus`, `health_scope`, - `health`, `health_delta`, `verdict`, `new_findings`, - `new_by_source_kind`, `resolved_findings`, and an empty - `changed_findings` placeholder, while - detailed changed payload stays in - `get_report_section(section="changed")` -- workflow guidance: - - the MCP surface is intentionally agent-guiding rather than list-first - - the cheapest useful path is designed to be the most obvious path: - `get_run_summary` / `get_production_triage` first, then `list_hotspots` - or `check_*`, then `get_finding` / `get_remediation` - - `help(topic=...)` is a bounded semantic routing tool for contract/workflow - uncertainty; it is not a second manual or docs proxy -- finding-list payloads: - - MCP finding ids are compact projection ids; canonical report ids are unchanged - - `detail_level="summary"` is the default for list/check/hotspot tools - - `detail_level="summary"` keeps compact relative `"path:line"` locations - - `detail_level="normal"` keeps structured `{path, line, end_line, symbol}` - locations plus remediation - - `detail_level="full"` keeps the compatibility-oriented payload, - including `priority_factors`, `items`, and per-location `uri` - - empty design `check_*` responses may include a compact - `threshold_context` (`metric`, `threshold`, `measured_units`, - `highest_below_threshold`) so agents can tell whether the run is truly - quiet or just below the active threshold - -The MCP layer does not introduce a separate analysis engine. It calls the -current CodeClone pipeline and reuses the canonical report document already -produced by the report contract. + - `reuse` + - `off` + - `refresh` is accepted by the current MCP contract and routed through the + same runtime path as the CLI ## Tools -Current tool set (`21` tools): - -| Tool | Key parameters | Purpose | -|--------------------------|----------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------| -| `analyze_repository` | absolute `root`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, cache/baseline paths | Full analysis → compact summary; then `get_run_summary` or `get_production_triage` | -| `analyze_changed_paths` | absolute `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, `api_surface`, `coverage_xml` | Diff-aware analysis → compact changed-files snapshot | -| `get_run_summary` | `run_id` | Cheapest run snapshot: health, findings, baseline, inventory, active thresholds | -| `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Production-first view: health, hotspots, suggestions, active thresholds | -| `help` | `topic`, `detail` | Semantic guide for workflow, analysis profile, baseline, coverage, suppressions, review state, changed-scope | -| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Run-to-run delta: regressions, improvements, health change | -| `evaluate_gates` | `run_id`, gate thresholds, `fail_on_untested_hotspots`, `coverage_min` | Preview CI gating decisions | -| `get_report_section` | `run_id`, `section`, `family`, `path`, `offset`, `limit` | Read report sections; `metrics_detail` is paginated with family/path filters | -| `list_findings` | `family`, `severity`, `novelty`, `sort_by`, `detail_level`, `changed_paths`, pagination | Filtered, paginated findings; use after hotspots or `check_*` | -| `get_finding` | `finding_id`, `run_id`, `detail_level` | Single finding detail by id; defaults to `normal` | -| `get_remediation` | `finding_id`, `run_id`, `detail_level` | Remediation payload for one finding | -| `list_hotspots` | `kind`, `run_id`, `detail_level`, `changed_paths`, `limit` | Priority-ranked hotspot views; preferred before broad listing | -| `check_clones` | `run_id`, `root`, `path`, `clone_type`, `source_kind`, `detail_level` | Clone findings only; `health.dimensions` includes only `clones` | -| `check_complexity` | `run_id`, `root`, `path`, `min_complexity`, `detail_level` | Complexity hotspots only | -| `check_coupling` | `run_id`, `root`, `path`, `detail_level` | Coupling hotspots only | -| `check_cohesion` | `run_id`, `root`, `path`, `detail_level` | Cohesion hotspots only | -| `check_dead_code` | `run_id`, `root`, `path`, `min_severity`, `detail_level` | Dead-code findings only | -| `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | PR-friendly markdown or JSON summary | -| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Session-local review marker (in-memory) | -| `list_reviewed_findings` | `run_id` | List reviewed findings for a run | -| `clear_session_runs` | none | Reset in-memory runs and session state | - -All tools are read-only except `mark_finding_reviewed` and `clear_session_runs` -(session-local, in-memory). `check_*` tools query stored runs — call -`analyze_repository` or `analyze_changed_paths` first. - -Recommended workflow: - -1. `get_run_summary` or `get_production_triage` -2. `help(topic=...)` if contract meaning is unclear -3. `list_hotspots` or `check_*` -4. `get_finding` → `get_remediation` -5. `generate_pr_summary(format="markdown")` - -`metrics_detail` families currently include canonical health/quality families -plus `overloaded_modules`, `coverage_adoption`, `coverage_join`, and -`api_surface`. - -For analysis sensitivity, the intended model is: - -1. start with repo defaults or `pyproject`-resolved thresholds -2. lower thresholds only for an explicit higher-sensitivity exploratory pass -3. compare runs only when profile differences are understood +Current tool set: `21` tools. + +The MCP surface is intentionally triage-first: analyze first, summarize/triage +second, then drill into one finding or one hotspot family. + +### Analysis and run-level tools + +| Tool | Key parameters | Purpose | +|---|---|---| +| `analyze_repository` | `root`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `baseline_path`, `metrics_baseline_path`, `cache_policy` | Full deterministic analysis of one repo root; registers the latest in-memory run. | +| `analyze_changed_paths` | `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, `cache_policy` | Diff-aware analysis with changed-files projection over the same canonical run/report contract. | +| `get_run_summary` | `run_id` | Cheapest run-level snapshot. Start here after analysis when you need health, findings, baseline/cache status, and inventory in compact form. | +| `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Production-first first-pass view over one stored run. | +| `help` | `topic`, `detail` | Bounded workflow/contract guidance for supported MCP topics. | +| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Run-to-run delta view over findings and health; returns `incomparable` when roots/settings differ. | +| `evaluate_gates` | `run_id`, gate flags, threshold overrides, `coverage_min` | Preview CI/gating decisions against a stored run without mutating process or repo state. | + +### Report and finding projection tools + +| Tool | Key parameters | Purpose | +|---|---|---| +| `get_report_section` | `run_id`, `section`, `family`, `path`, `offset`, `limit` | Read canonical report sections; `metrics_detail` is the bounded/paginated drill-down path. | +| `list_findings` | `run_id`, `family`, `category`, `severity`, `source_kind`, `novelty`, `sort_by`, `detail_level`, changed-scope filters, pagination | Deterministic filtered finding list over canonical stored findings. | +| `get_finding` | `finding_id`, `run_id`, `detail_level` | Return one canonical finding group by short or full id. | +| `get_remediation` | `finding_id`, `run_id`, `detail_level` | Return the remediation/explainability packet for one finding. | +| `list_hotspots` | `kind`, `run_id`, `detail_level`, changed-scope filters, pagination | Return one derived hotspot list such as `most_actionable` or `production_hotspots`. | +| `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | PR-oriented summary for changed scope; `markdown` is the default human/LLM-facing format. | + +### Focused check tools + +| Tool | Key parameters | Purpose | +|---|---|---| +| `check_clones` | `run_id` or absolute `root`, `path`, `clone_type`, `source_kind`, `max_results`, `detail_level` | Narrow clone-only query over a compatible stored run. | +| `check_complexity` | `run_id` or absolute `root`, `path`, `min_complexity`, `max_results`, `detail_level` | Narrow complexity-hotspot query. | +| `check_coupling` | `run_id` or absolute `root`, `path`, `max_results`, `detail_level` | Narrow coupling-hotspot query. | +| `check_cohesion` | `run_id` or absolute `root`, `path`, `max_results`, `detail_level` | Narrow cohesion-hotspot query. | +| `check_dead_code` | `run_id` or absolute `root`, `path`, `min_severity`, `max_results`, `detail_level` | Narrow dead-code query. | + +### Session-local tools + +| Tool | Key parameters | Purpose | +|---|---|---| +| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Mark a finding as reviewed in the current in-memory MCP session. | +| `list_reviewed_findings` | `run_id` | Return reviewed markers currently held in process memory. | +| `clear_session_runs` | none | Clear in-memory run history and session-local review state for this server process. | ## Resources -Current fixed resources: - -| Resource | Payload | Availability | -|----------------------------------|-------------------------------------------------------|-------------------------------------------------------| -| `codeclone://latest/summary` | latest run summary projection | always after at least one run | -| `codeclone://latest/triage` | latest production-first triage projection | always after at least one run | -| `codeclone://latest/report.json` | latest canonical report document | always after at least one run | -| `codeclone://latest/health` | latest health score + dimensions | always after at least one run | -| `codeclone://latest/gates` | latest gate evaluation result | only after `evaluate_gates` in current server process | -| `codeclone://latest/changed` | latest changed-files projection | only for a diff-aware latest run | -| `codeclone://schema` | schema-style descriptor for canonical report sections | always available | - -Current run-scoped URI templates: - -| URI template | Payload | Availability | -|---------------------------------------------------|--------------------------------------|-----------------------------------------| -| `codeclone://runs/{run_id}/summary` | run-specific summary projection | for any stored run | -| `codeclone://runs/{run_id}/report.json` | run-specific canonical report | for any stored run | -| `codeclone://runs/{run_id}/findings/{finding_id}` | run-specific canonical finding group | for an existing finding in a stored run | - -Fixed resources and URI templates are convenience views over already -registered runs. They do not trigger fresh analysis by themselves. -If a client needs the freshest truth, it must start a fresh analysis run first -(typically with `cache_policy="off"`), rather than relying on older session -state behind `codeclone://latest/...`. - -## Contracts - -- MCP is **read-only**: - - no source-file mutation - - no baseline update - - no metrics-baseline update - - no cache refresh writes -- Session review markers are **ephemeral only**: - - stored in memory per server process - - never written to baseline, cache, or report artifacts -- `streamable-http` defaults to loopback binding. - Non-loopback hosts require explicit `--allow-remote` because the server has - no built-in authentication. -- `--allow-remote` expands the trust boundary materially: - - any reachable network client can trigger CPU-intensive analysis - - any reachable network client can read analysis results - - request parameters such as `root` and path filters can still probe - repository-relative filesystem structure - - use it only on trusted networks or behind a firewall / authenticated - reverse proxy -- MCP must reuse current: - - pipeline stages - - baseline trust semantics - - cache semantics - - canonical report contract -- `coverage_xml` is resolved relative to the absolute root when it is not - already absolute. It is a current-run Cobertura input only; MCP must never - write it to baseline/cache/report artifacts or treat it as baseline truth. -- When `respect_pyproject=true`, MCP also respects `golden_fixture_paths`. - Clone groups excluded by that policy are omitted from active clone/gate - projections but remain available in the canonical report under the optional - `findings.groups.clones.suppressed.*` bucket. -- Invalid Cobertura XML during `analyze_*` does not fail analysis; the stored - run carries `coverage_join.status="invalid"` plus `invalid_reason`. - `evaluate_gates(fail_on_untested_hotspots=true)` on that run is a contract - error because hotspot gating requires a valid join. -- Inline MCP design-threshold parameters (`complexity_threshold`, - `coupling_threshold`, `cohesion_threshold`) define the canonical design - finding universe of that run and are recorded in - `meta.analysis_thresholds.design_findings`. -- `get_run_summary` is a deterministic convenience projection derived from the - canonical report (`meta`, `inventory`, `findings.summary`, - `metrics.summary.health`) plus baseline-diff/gate/changed-files context. -- `get_production_triage` is also a deterministic MCP projection over the same - canonical run state (`summary`, `derived.hotlists`, `derived.suggestions`, - and canonical finding source scope). It must not create a second analysis or - remediation truth path. -- Canonical JSON remains the source of truth for report semantics. -- `list_findings` and `list_hotspots` are deterministic projections over the - canonical report, not a separate analysis branch. -- `metrics_detail(family="overloaded_modules")` exposes the canonical report-only - module-hotspot layer, but does not promote it into findings, hotlists, or - gate semantics. -- `metrics_detail(family="coverage_join")` exposes the canonical current-run - coverage join summary/items, including measured coverage hotspots and - coverage scope gaps. `evaluate_gates(fail_on_untested_hotspots=true)` - requires a stored run created with valid `coverage_xml`. -- `get_remediation` is a deterministic MCP projection over existing - suggestions/explainability data, not a second remediation engine. -- `analysis_mode="clones_only"` must mirror the same metric/dependency - skip-semantics as the regular pipeline. -- Missing optional MCP dependency is handled explicitly by the launcher with a - user-facing install hint and exit code `2`. - -## Invariants (MUST) - -- Tool names are stable public surface. -- Resource URI shapes are stable public surface. -- Read-only vs session-local tool annotations remain accurate. -- `analyze_repository` always registers exactly one latest run. -- `analyze_changed_paths` requires `changed_paths` or `git_diff_ref`. -- `analyze_repository` and `analyze_changed_paths` require an absolute `root`; - relative roots like `.` are rejected. -- `git_diff_ref` is validated as a safe single revision expression before - invoking `git diff`. -- `changed_paths` is a structured `list[str]` of repo-relative paths, not a - comma-separated string payload. -- `analyze_changed_paths` may return the same `run_id` as a previous run when - the canonical report digest is unchanged; changed-files state is an overlay, - not a second canonical report. -- `get_run_summary` with no `run_id` resolves to the latest stored run. -- `codeclone://latest/...` resources always resolve to the latest stored run in - the current MCP server process, not to a globally fresh analysis state. -- Summary-style MCP payloads expose `cache.freshness` as a derived convenience - marker; canonical cache metadata remains available only through canonical - report/meta surfaces. -- `get_report_section(section="all")` returns the full canonical report document. -- `get_report_section(section="metrics")` returns only `metrics.summary`. -- `get_report_section(section="metrics_detail")` is intentionally bounded: - without filters it returns `summary` plus a hint; with `family` and/or `path` - it returns a paginated item slice. -- `get_report_section(section="changed")` is available only for diff-aware runs. -- MCP short `run_id` values are session handles over the canonical digest of - that run. -- MCP summary/normal finding/location payloads use relative paths only and do - not expose absolute `file://` URIs. -- Finding `locations` and `html_anchor` values are stable projections over the - current run and do not invent non-canonical ids. -- For the same finding id, `source_kind` remains consistent across - `list_findings`, `list_hotspots`, and `get_finding`. -- `get_finding(detail_level="full")` remains the compatibility-preserving - full-detail endpoint: `priority_factors` and location `uri` are still - available there. -- `compare_runs` is only semantically meaningful when both runs use comparable - repository scope/root and analysis settings. -- `compare_runs` exposes top-level `comparable` plus optional `reason`. When - roots or effective analysis settings differ, `regressions` and - `improvements` become empty lists, `unchanged` and `health_delta` become - `null`, and `verdict` becomes `incomparable`. -- `compare_runs.health_delta` is `after.health - before.health` between the two - selected comparable runs. It is independent of baseline or metrics-baseline - drift. -- `compare_runs.verdict` is intentionally conservative but not one-dimensional: - it returns `mixed` when run-to-run finding deltas and `health_delta` disagree. -- `analysis_mode="clones_only"` keeps clone findings fully usable, but MCP - surfaces mark `health` as unavailable instead of fabricating zeroed metrics. -- `coverage_xml` requires `analysis_mode="full"` because coverage join depends - on function-span metrics. -- `codeclone://latest/triage` is a latest-only resource; run-specific triage is - available via the tool, not via a `codeclone://runs/{run_id}/...` resource URI. - -## Failure modes - -| Condition | Behavior | -|---------------------------------------------------|---------------------------------------------------| -| `mcp` extra not installed | `codeclone-mcp` prints install hint and exits `2` | -| Invalid root path / invalid numeric config | service raises contract error | -| `coverage_xml` with `analysis_mode="clones_only"` | service raises contract error | -| Coverage hotspot gate without valid coverage join | service raises contract error | -| Requested run missing | service raises run-not-found error | -| Requested finding missing | service raises finding-not-found error | -| Unsupported report section/resource suffix | service raises contract error | - -## Determinism / canonicalization - -- MCP run identity is derived from canonical report integrity digest. -- Finding order is inherited from canonical report ordering. -- Hotlists are derived from canonical report data and deterministic derived ids. -- No MCP-only heuristics may change analysis or gating semantics. -- MCP must not re-synthesize design findings from raw metrics after the run; - threshold-aware design findings belong to the canonical report document. -- Coverage join ordering and hotspot gates are inherited from canonical - `metrics.families.coverage_join` facts. +Resources are deterministic read-only projections over stored runs. + +| URI | Purpose | +|---|---| +| `codeclone://latest/summary` | Compact summary for the latest stored run. | +| `codeclone://latest/report.json` | Canonical JSON report for the latest stored run. | +| `codeclone://latest/health` | Health/metrics snapshot for the latest stored run. | +| `codeclone://latest/gates` | Last gate-evaluation result produced in this MCP session. | +| `codeclone://latest/changed` | Changed-files projection for the latest diff-aware run. | +| `codeclone://latest/triage` | Production-first triage payload for the latest stored run. | +| `codeclone://schema` | Canonical report schema-style descriptor. | +| `codeclone://runs/{run_id}/summary` | Compact summary for one specific stored run. | +| `codeclone://runs/{run_id}/report.json` | Canonical JSON report for one specific stored run. | +| `codeclone://runs/{run_id}/findings/{finding_id}` | Canonical JSON finding payload for one specific stored run. | + +## Contract rules + +- MCP is read-only with respect to source files, baselines, cache artifacts, + and report artifacts. +- MCP reuses the same canonical report document as CLI/JSON/HTML/SARIF. +- Finding ids, ordering, and summary data are deterministic projections over + the stored run. +- `analyze_changed_paths` requires either explicit `changed_paths` or + `git_diff_ref`. +- `analyze_repository` and `analyze_changed_paths` require an absolute `root`. +- `check_*` tools may resolve against an existing stored run, but if `root` is + provided it must also be absolute. +- `git_diff_ref` is validated before any subprocess call. +- Review markers are session-local in-memory state only. +- Run history is process-local and does not survive restart. +- Missing optional MCP dependency is surfaced explicitly by the launcher. + +## Security model + +- default transport is local `stdio` +- non-local HTTP exposure requires explicit `--allow-remote` +- server runtime is loaded lazily so base installs and normal CI do not require + MCP packages +- MCP must not mutate repo state or synthesize findings outside canonical + report facts + +## Determinism + +- run identity is derived from canonical report integrity +- summary, hotspots, findings, and remediation payloads are deterministic + projections over stored run state +- MCP must not create MCP-only analysis semantics or MCP-only gate semantics ## Locked by tests -- `tests/test_mcp_service.py::test_mcp_service_analyze_repository_registers_latest_run` -- `tests/test_mcp_service.py::test_mcp_service_lists_findings_and_hotspots` -- `tests/test_mcp_service.py::test_mcp_service_changed_runs_remediation_and_review_flow` -- `tests/test_mcp_service.py::test_mcp_service_granular_checks_pr_summary_and_resources` -- `tests/test_mcp_service.py::test_mcp_service_evaluate_gates_on_existing_run` -- `tests/test_mcp_service.py::test_mcp_service_resources_expose_latest_summary_and_report` -- `tests/test_mcp_server.py::test_mcp_server_exposes_expected_read_only_tools` -- `tests/test_mcp_server.py::test_mcp_server_tool_roundtrip_and_resources` -- `tests/test_mcp_server.py::test_mcp_server_main_reports_missing_optional_dependency` - -## Non-guarantees - -- There is currently no standalone `mcp_api_version` constant. -- In-memory run history does not survive process restart. -- `clear_session_runs` resets the in-memory run registry and related session - caches, but does not mutate baseline/cache/report artifacts on disk. -- Client-specific UI/approval behavior is not part of the CodeClone contract. +- `tests/test_mcp_service.py` +- `tests/test_mcp_server.py` +- `tests/test_mcp_tool_schema_snapshot.py` ## See also diff --git a/docs/book/appendix/a-status-enums.md b/docs/book/appendix/a-status-enums.md index 507396e..6ce4d6f 100644 --- a/docs/book/appendix/a-status-enums.md +++ b/docs/book/appendix/a-status-enums.md @@ -2,13 +2,13 @@ ## Purpose -Centralize machine-readable status value sets used across baseline/cache/report contracts. +Centralize machine-readable status sets used across baseline/cache/report/CLI contracts. ## Public surface -- Baseline statuses: `codeclone/baseline.py:BaselineStatus` -- Cache statuses: `codeclone/cache.py:CacheStatus` -- Exit categories: `codeclone/contracts.py:ExitCode` +- Baseline statuses: `codeclone/baseline/trust.py:BaselineStatus` +- Cache statuses: `codeclone/cache/versioning.py:CacheStatus` +- Exit categories: `codeclone/contracts/__init__.py:ExitCode` ## Data model @@ -54,13 +54,13 @@ Defined by `BASELINE_UNTRUSTED_STATUSES`. ## Contracts -- Status values are serialized into report metadata (`baseline_status`, `cache_status`). -- CLI branches by enum values, not UI text. +- Status values are serialized into report metadata. +- CLI branches by enum/status values, not by human-facing message text. Refs: -- `codeclone/_cli_meta.py:ReportMeta` -- `codeclone/cli.py:_main_impl` +- `codeclone/surfaces/cli/report_meta.py:_build_report_meta` +- `codeclone/surfaces/cli/workflow.py:_main_impl` ## Locked by tests diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index 8d0a25e..1890f4e 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -109,7 +109,7 @@ Notes: "collect_api_surface": false }, "files": { - "codeclone/cache.py": { + "codeclone/cache/store.py": { "st": [1730000000000000000, 2048], "ss": [450, 12, 3, 1], "u": [[ @@ -509,7 +509,7 @@ Notes: "artifacts": [ { "location": { - "uri": "codeclone/report/sarif.py", + "uri": "codeclone/report/renderers/sarif.py", "uriBaseId": "%SRCROOT%" } } @@ -540,7 +540,7 @@ Notes: { "physicalLocation": { "artifactLocation": { - "uri": "codeclone/report/sarif.py", + "uri": "codeclone/report/renderers/sarif.py", "uriBaseId": "%SRCROOT%", "index": 0 }, @@ -560,7 +560,7 @@ Notes: } ], "properties": { - "primaryPath": "codeclone/report/sarif.py", + "primaryPath": "codeclone/report/renderers/sarif.py", "primaryQualname": "codeclone.report.sarif:render_sarif_report_document", "primaryRegion": "1:10" }, @@ -598,9 +598,9 @@ INTEGRITY ## Refs -- `codeclone/baseline.py` -- `codeclone/cache.py` -- `codeclone/report/json_contract.py` -- `codeclone/report/serialize.py` -- `codeclone/report/markdown.py` -- `codeclone/report/sarif.py` +- `codeclone/baseline/clone_baseline.py` +- `codeclone/cache/store.py` +- `codeclone/report/document/builder.py` +- `codeclone/report/renderers/text.py` +- `codeclone/report/renderers/markdown.py` +- `codeclone/report/renderers/sarif.py` diff --git a/docs/book/appendix/c-error-catalog.md b/docs/book/appendix/c-error-catalog.md index 78e6389..4953c6d 100644 --- a/docs/book/appendix/c-error-catalog.md +++ b/docs/book/appendix/c-error-catalog.md @@ -8,82 +8,83 @@ Map core error conditions to statuses, markers, and exits. | Category | Marker | Exit | |----------------|-------------------|------| -| Contract error | `CONTRACT ERROR:` | 2 | -| Gating failure | `GATING FAILURE:` | 3 | -| Internal error | `INTERNAL ERROR:` | 5 | +| Contract error | `CONTRACT ERROR:` | `2` | +| Gating failure | `GATING FAILURE:` | `3` | +| Internal error | `INTERNAL ERROR:` | `5` | Refs: -- `codeclone/ui_messages.py:MARKER_CONTRACT_ERROR` -- `codeclone/contracts.py:ExitCode` +- `codeclone/ui_messages/__init__.py:MARKER_CONTRACT_ERROR` +- `codeclone/contracts/__init__.py:ExitCode` ## Baseline contract errors -| Condition | Baseline status | CLI behavior | -|----------------------|--------------------------------|-----------------------------------------| -| Missing baseline | `missing` | normal: empty diff; gating: exit 2 | -| Schema mismatch | `mismatch_schema_version` | normal: ignore baseline; gating: exit 2 | -| Fingerprint mismatch | `mismatch_fingerprint_version` | normal: ignore baseline; gating: exit 2 | -| Python tag mismatch | `mismatch_python_version` | normal: ignore baseline; gating: exit 2 | -| Integrity mismatch | `integrity_failed` | normal: ignore baseline; gating: exit 2 | +| Condition | Baseline status | CLI behavior | +|----------------------|--------------------------------|-------------------------------------------| +| Missing baseline | `missing` | normal: empty diff; gating: exit `2` | +| Schema mismatch | `mismatch_schema_version` | normal: ignore baseline; gating: exit `2` | +| Fingerprint mismatch | `mismatch_fingerprint_version` | normal: ignore baseline; gating: exit `2` | +| Python tag mismatch | `mismatch_python_version` | normal: ignore baseline; gating: exit `2` | +| Integrity mismatch | `integrity_failed` | normal: ignore baseline; gating: exit `2` | Refs: -- `codeclone/cli.py:_main_impl` -- `codeclone/baseline.py:BaselineStatus` +- `codeclone/surfaces/cli/workflow.py:_main_impl` +- `codeclone/baseline/trust.py:BaselineStatus` ## Cache degradation cases -| Condition | Cache status | Behavior | -|--------------------------------------------------|---------------------------------|------------------------| -| Missing cache file | `missing` | proceed without cache | -| Version mismatch | `version_mismatch` | ignore cache + warning | -| Analysis profile mismatch (`min_loc`/`min_stmt`) | `analysis_profile_mismatch` | ignore cache + warning | -| Invalid JSON/type | `invalid_json` / `invalid_type` | ignore cache + warning | -| Signature mismatch | `integrity_failed` | ignore cache + warning | -| Oversized cache | `too_large` | ignore cache + warning | +| Condition | Cache status | Behavior | +|---------------------------|---------------------------------|------------------------| +| Missing cache file | `missing` | proceed without cache | +| Version mismatch | `version_mismatch` | ignore cache + warning | +| Analysis profile mismatch | `analysis_profile_mismatch` | ignore cache + warning | +| Invalid JSON/type | `invalid_json` / `invalid_type` | ignore cache + warning | +| Signature mismatch | `integrity_failed` | ignore cache + warning | +| Oversized cache | `too_large` | ignore cache + warning | Refs: -- `codeclone/cache.py:CacheStatus` -- `codeclone/cache.py:Cache._ignore_cache` +- `codeclone/cache/versioning.py:CacheStatus` +- `codeclone/cache/store.py:Cache._ignore_cache` ## Source IO and gating | Condition | Behavior | |-------------------------------------------|---------------------------------| | Source read/decode failure in normal mode | file skipped; warning; continue | -| Source read/decode failure in gating mode | contract error exit 2 | +| Source read/decode failure in gating mode | contract error, exit `2` | Refs: -- `codeclone/cli.py:process_file` -- `codeclone/cli.py:_main_impl` +- `codeclone/core/worker.py:process_file` +- `codeclone/surfaces/cli/workflow.py:_main_impl` ## Report write errors -| Condition | Behavior | -|--------------------------------------------|-----------------------| -| Baseline write OSError | contract error exit 2 | -| HTML/JSON/Markdown/SARIF/TXT write OSError | contract error exit 2 | +| Condition | Behavior | +|--------------------------------------------|--------------------------| +| Baseline write OSError | contract error, exit `2` | +| HTML/JSON/Markdown/SARIF/TXT write OSError | contract error, exit `2` | Refs: -- `codeclone/cli.py:_main_impl` +- `codeclone/surfaces/cli/reports_output.py:_write_report_output` +- `codeclone/surfaces/cli/workflow.py:_main_impl` ## MCP interface errors | Condition | Behavior | |-----------------------------------------------|---------------------------------------------------| | Optional `mcp` extra missing | `codeclone-mcp` prints install hint and exits `2` | -| Invalid root path / invalid numeric config | MCP service contract error | +| Invalid root path / invalid config | MCP service contract error | | Missing run or finding id | MCP service request error | | Unsupported MCP resource URI / report section | MCP service contract error | Refs: -- `codeclone/mcp_server.py:main` -- `codeclone/mcp_service.py` +- `codeclone/surfaces/mcp/server.py:main` +- `codeclone/surfaces/mcp/service.py` ## Locked by tests diff --git a/docs/sarif.md b/docs/sarif.md index 2bb2518..c2ebd56 100644 --- a/docs/sarif.md +++ b/docs/sarif.md @@ -5,13 +5,13 @@ Explain how CodeClone projects canonical findings into SARIF and what IDEs or code-scanning tools can rely on. -SARIF is a machine-readable projection layer. The canonical source of report -truth remains the JSON report document. +SARIF is a deterministic projection layer. The canonical source of truth +remains the report document. ## Source files -- `codeclone/report/sarif.py` -- `codeclone/report/json_contract.py` +- `codeclone/report/renderers/sarif.py` +- `codeclone/report/document/builder.py` - `codeclone/report/findings.py` ## Design model @@ -32,18 +32,12 @@ anchored through `%SRCROOT%`. Current behavior: -- `run.originalUriBaseIds["%SRCROOT%"]` points at the scan root when an - absolute scan root is known +- `run.originalUriBaseIds["%SRCROOT%"]` points at the scan root when known - `run.artifacts[*]` enumerates referenced files - `artifactLocation.uri` uses repository-relative paths - `artifactLocation.index` aligns locations with artifacts for stable linking - `run.invocations[*].workingDirectory` mirrors the scan root URI when available -- `run.invocations[*].startTimeUtc` is emitted when analysis start time is - available in canonical runtime meta -- `run.automationDetails.id` is unique per run so code-scanning systems can - correlate uploads reliably - -This helps consumers resolve results back to workspace files consistently. +- `run.automationDetails.id` is unique per run ## Result model @@ -52,43 +46,12 @@ Current SARIF output includes: - `tool.driver.rules[*]` with stable rule IDs and help links - `results[*]` for clone groups, dead code, design findings, and structural findings - `locations[*]` with primary file/line mapping -- `locations[*].message` and `relatedLocations[*].message` with - human-readable role labels such as `Representative occurrence` -- `relatedLocations[*]` when the result has multiple relevant locations +- `relatedLocations[*]` for multi-location findings - `partialFingerprints.primaryLocationLineHash` for stable per-location identity - without encoding line numbers into the hash digest -- result `properties` with stable identity/context fields such as primary path, - qualname, and region - explicit `kind: "fail"` on results -For clone results, CodeClone also carries novelty-aware metadata when known: - -- `baselineState` - -This improves usefulness in IDE/code-scanning flows that distinguish new vs -known findings. - -Coverage join can materialize `coverage` / `coverage_hotspot` and -`coverage_scope_gap` design findings when the canonical report already -contains valid `metrics.families.coverage_join` facts. SARIF projects those -findings like other design findings; it does not parse Cobertura XML or create -coverage-specific analysis truth. - -## Rule metadata - -Rule records are intentionally richer than a minimal SARIF export. - -They include: - -- stable rule IDs -- stable rule names derived from `ruleId` -- display name -- help text / markdown -- tags -- docs-facing help URI - -The goal is not only schema compliance, but a better consumer experience in IDEs -and code-scanning platforms. +Coverage Join may materialize coverage design findings only when the canonical +report already contains valid `metrics.families.coverage_join` facts. ## What SARIF is good for here @@ -104,16 +67,6 @@ It is not the source of truth for: - gating semantics - baseline compatibility -Those remain owned by the canonical report and baseline contracts. - -## Limitations - -- Consumer UX depends on the IDE/platform; not every SARIF field is shown by - every tool. -- HTML-only presentation details are not carried into SARIF. -- SARIF wording may evolve as long as IDs, semantics, and deterministic - structure remain stable. - ## Validation and tests Relevant tests: @@ -124,7 +77,7 @@ Relevant tests: Contract-adjacent coverage includes: -- reuse of canonical report document +- reuse of the canonical report document - stable SARIF branch invariants - deterministic artifacts/rules/results ordering From 3ba948c5562978ddf728c36345678856e356b68c Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 22 Apr 2026 14:59:33 +0500 Subject: [PATCH 10/32] fix(defaults): centralize shared runtime defaults --- benchmarks/run_benchmark.py | 3 +- codeclone/analysis/units.py | 14 +++- codeclone/baseline/trust.py | 3 +- codeclone/cache/store.py | 23 ++++-- codeclone/cache/versioning.py | 4 +- codeclone/config/spec.py | 37 +++++---- codeclone/contracts/__init__.py | 34 ++++++++ codeclone/core/_types.py | 3 +- codeclone/core/pipeline.py | 5 +- codeclone/core/reporting.py | 3 +- codeclone/core/worker.py | 14 +++- codeclone/report/gates/evaluator.py | 4 +- codeclone/surfaces/cli/execution.py | 4 +- codeclone/surfaces/cli/startup.py | 4 +- codeclone/surfaces/mcp/server.py | 36 +++++---- codeclone/surfaces/mcp/session.py | 8 +- codeclone/ui_messages/__init__.py | 53 +++++++++---- tests/test_benchmark.py | 59 ++++++++++++++ tests/test_defaults_contract.py | 119 ++++++++++++++++++++++++++++ 19 files changed, 351 insertions(+), 79 deletions(-) create mode 100644 tests/test_defaults_contract.py diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index ad41e07..c5b3dbd 100755 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -26,6 +26,7 @@ from codeclone.baseline import current_python_tag BENCHMARK_SCHEMA_VERSION = "1.0" +BENCHMARK_CLI_MODULE = "codeclone.main" BENCHMARK_NEUTRAL_ARGS: tuple[str, ...] = ( "--no-fail-on-new", "--no-fail-on-new-metrics", @@ -162,7 +163,7 @@ def _run_cli_once( cmd = [ python_executable, "-m", - "codeclone.cli", + BENCHMARK_CLI_MODULE, str(target), *BENCHMARK_NEUTRAL_ARGS, "--json", diff --git a/codeclone/analysis/units.py b/codeclone/analysis/units.py index de68bf8..5c0fa93 100644 --- a/codeclone/analysis/units.py +++ b/codeclone/analysis/units.py @@ -11,6 +11,12 @@ from .. import qualnames as _qualnames from ..blocks import extract_blocks, extract_segments +from ..contracts import ( + DEFAULT_BLOCK_MIN_LOC, + DEFAULT_BLOCK_MIN_STMT, + DEFAULT_SEGMENT_MIN_LOC, + DEFAULT_SEGMENT_MIN_STMT, +) from ..contracts.errors import ParseError from ..findings.structural.detectors import scan_function_structure from ..metrics.adoption import collect_module_adoption @@ -84,10 +90,10 @@ def extract_units_and_stats_from_source( min_loc: int, min_stmt: int, *, - block_min_loc: int = 20, - block_min_stmt: int = 8, - segment_min_loc: int = 20, - segment_min_stmt: int = 10, + block_min_loc: int = DEFAULT_BLOCK_MIN_LOC, + block_min_stmt: int = DEFAULT_BLOCK_MIN_STMT, + segment_min_loc: int = DEFAULT_SEGMENT_MIN_LOC, + segment_min_stmt: int = DEFAULT_SEGMENT_MIN_STMT, collect_structural_findings: bool = True, collect_api_surface: bool = False, api_include_private_modules: bool = False, diff --git a/codeclone/baseline/trust.py b/codeclone/baseline/trust.py index a47f7ca..fa8179c 100644 --- a/codeclone/baseline/trust.py +++ b/codeclone/baseline/trust.py @@ -17,6 +17,7 @@ import orjson +from ..contracts import DEFAULT_MAX_BASELINE_SIZE_MB from ..contracts.errors import BaselineValidationError from ..utils.json_io import read_json_object as _read_json_object @@ -25,7 +26,7 @@ BASELINE_GENERATOR = "codeclone" _BASELINE_SCHEMA_MAX_MINOR_BY_MAJOR = {1: 0, 2: 1} -MAX_BASELINE_SIZE_BYTES = 5 * 1024 * 1024 +MAX_BASELINE_SIZE_BYTES = DEFAULT_MAX_BASELINE_SIZE_MB * 1024 * 1024 _UTC_ISO8601_Z_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$") diff --git a/codeclone/cache/store.py b/codeclone/cache/store.py index 3cfb551..e291425 100644 --- a/codeclone/cache/store.py +++ b/codeclone/cache/store.py @@ -12,7 +12,16 @@ from pathlib import Path from ..baseline.trust import current_python_tag -from ..contracts import BASELINE_FINGERPRINT_VERSION, CACHE_VERSION +from ..contracts import ( + BASELINE_FINGERPRINT_VERSION, + CACHE_VERSION, + DEFAULT_BLOCK_MIN_LOC, + DEFAULT_BLOCK_MIN_STMT, + DEFAULT_MIN_LOC, + DEFAULT_MIN_STMT, + DEFAULT_SEGMENT_MIN_LOC, + DEFAULT_SEGMENT_MIN_STMT, +) from ..contracts.errors import CacheError from ..models import BlockUnit, FileMetrics, SegmentUnit, StructuralFindingGroup, Unit from ._canonicalize import ( @@ -100,12 +109,12 @@ def __init__( *, root: str | Path | None = None, max_size_bytes: int | None = None, - min_loc: int = 10, - min_stmt: int = 6, - block_min_loc: int = 20, - block_min_stmt: int = 8, - segment_min_loc: int = 20, - segment_min_stmt: int = 10, + min_loc: int = DEFAULT_MIN_LOC, + min_stmt: int = DEFAULT_MIN_STMT, + block_min_loc: int = DEFAULT_BLOCK_MIN_LOC, + block_min_stmt: int = DEFAULT_BLOCK_MIN_STMT, + segment_min_loc: int = DEFAULT_SEGMENT_MIN_LOC, + segment_min_stmt: int = DEFAULT_SEGMENT_MIN_STMT, collect_api_surface: bool = False, ): self.path = Path(path) diff --git a/codeclone/cache/versioning.py b/codeclone/cache/versioning.py index d204994..2081242 100644 --- a/codeclone/cache/versioning.py +++ b/codeclone/cache/versioning.py @@ -10,12 +10,12 @@ from pathlib import Path from typing import TypedDict -from ..contracts import CACHE_VERSION +from ..contracts import CACHE_VERSION, DEFAULT_MAX_CACHE_SIZE_MB from ..contracts.schemas import AnalysisProfile from .entries import CacheEntry from .integrity import as_int_or_none, as_str_dict -MAX_CACHE_SIZE_BYTES = 50 * 1024 * 1024 +MAX_CACHE_SIZE_BYTES = DEFAULT_MAX_CACHE_SIZE_MB * 1024 * 1024 LEGACY_CACHE_SECRET_FILENAME = ".cache_secret" _DEFAULT_WIRE_UNIT_FLOW_PROFILES = ( 0, diff --git a/codeclone/config/spec.py b/codeclone/config/spec.py index 4ea9a7c..798e2bf 100644 --- a/codeclone/config/spec.py +++ b/codeclone/config/spec.py @@ -5,10 +5,27 @@ from .. import ui_messages as ui from ..contracts import ( + DEFAULT_BASELINE_PATH, + DEFAULT_BLOCK_MIN_LOC, + DEFAULT_BLOCK_MIN_STMT, DEFAULT_COHESION_THRESHOLD, DEFAULT_COMPLEXITY_THRESHOLD, DEFAULT_COUPLING_THRESHOLD, + DEFAULT_COVERAGE_MIN, DEFAULT_HEALTH_THRESHOLD, + DEFAULT_HTML_REPORT_PATH, + DEFAULT_JSON_REPORT_PATH, + DEFAULT_MARKDOWN_REPORT_PATH, + DEFAULT_MAX_BASELINE_SIZE_MB, + DEFAULT_MAX_CACHE_SIZE_MB, + DEFAULT_MIN_LOC, + DEFAULT_MIN_STMT, + DEFAULT_PROCESSES, + DEFAULT_ROOT, + DEFAULT_SARIF_REPORT_PATH, + DEFAULT_SEGMENT_MIN_LOC, + DEFAULT_SEGMENT_MIN_STMT, + DEFAULT_TEXT_REPORT_PATH, ) CliKind = Literal[ @@ -22,24 +39,6 @@ "version", ] -DEFAULT_ROOT = "." -DEFAULT_MIN_LOC = 10 -DEFAULT_MIN_STMT = 6 -DEFAULT_BLOCK_MIN_LOC = 20 -DEFAULT_BLOCK_MIN_STMT = 8 -DEFAULT_SEGMENT_MIN_LOC = 20 -DEFAULT_SEGMENT_MIN_STMT = 10 -DEFAULT_PROCESSES = 4 -DEFAULT_MAX_CACHE_SIZE_MB = 50 -DEFAULT_MAX_BASELINE_SIZE_MB = 5 - -DEFAULT_BASELINE_PATH = "codeclone.baseline.json" -DEFAULT_HTML_REPORT_PATH = ".cache/codeclone/report.html" -DEFAULT_JSON_REPORT_PATH = ".cache/codeclone/report.json" -DEFAULT_MARKDOWN_REPORT_PATH = ".cache/codeclone/report.md" -DEFAULT_SARIF_REPORT_PATH = ".cache/codeclone/report.sarif" -DEFAULT_TEXT_REPORT_PATH = ".cache/codeclone/report.txt" - _UNSET: Final[object] = object() _INFER_PYPROJECT_KEY: Final[object] = object() @@ -509,7 +508,7 @@ def _option( group="Quality gates", cli_kind="value", flags=("--coverage-min",), - default=50, + default=DEFAULT_COVERAGE_MIN, value_type=int, metavar="PERCENT", help_text=ui.HELP_COVERAGE_MIN, diff --git a/codeclone/contracts/__init__.py b/codeclone/contracts/__init__.py index 170fde6..9564ac5 100644 --- a/codeclone/contracts/__init__.py +++ b/codeclone/contracts/__init__.py @@ -23,6 +23,23 @@ DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD: Final = 10 DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD: Final = 4 DEFAULT_HEALTH_THRESHOLD: Final = 60 +DEFAULT_ROOT: Final = "." +DEFAULT_MIN_LOC: Final = 10 +DEFAULT_MIN_STMT: Final = 6 +DEFAULT_BLOCK_MIN_LOC: Final = 20 +DEFAULT_BLOCK_MIN_STMT: Final = 8 +DEFAULT_SEGMENT_MIN_LOC: Final = 20 +DEFAULT_SEGMENT_MIN_STMT: Final = 10 +DEFAULT_PROCESSES: Final = 4 +DEFAULT_MAX_CACHE_SIZE_MB: Final = 50 +DEFAULT_MAX_BASELINE_SIZE_MB: Final = 5 +DEFAULT_COVERAGE_MIN: Final = 50 +DEFAULT_BASELINE_PATH: Final = "codeclone.baseline.json" +DEFAULT_HTML_REPORT_PATH: Final = ".cache/codeclone/report.html" +DEFAULT_JSON_REPORT_PATH: Final = ".cache/codeclone/report.json" +DEFAULT_MARKDOWN_REPORT_PATH: Final = ".cache/codeclone/report.md" +DEFAULT_SARIF_REPORT_PATH: Final = ".cache/codeclone/report.sarif" +DEFAULT_TEXT_REPORT_PATH: Final = ".cache/codeclone/report.txt" COMPLEXITY_RISK_LOW_MAX: Final = 10 COMPLEXITY_RISK_MEDIUM_MAX: Final = 20 @@ -81,13 +98,30 @@ def cli_help_epilog() -> str: "COMPLEXITY_RISK_MEDIUM_MAX", "COUPLING_RISK_LOW_MAX", "COUPLING_RISK_MEDIUM_MAX", + "DEFAULT_BASELINE_PATH", + "DEFAULT_BLOCK_MIN_LOC", + "DEFAULT_BLOCK_MIN_STMT", "DEFAULT_COHESION_THRESHOLD", "DEFAULT_COMPLEXITY_THRESHOLD", "DEFAULT_COUPLING_THRESHOLD", + "DEFAULT_COVERAGE_MIN", "DEFAULT_HEALTH_THRESHOLD", + "DEFAULT_HTML_REPORT_PATH", + "DEFAULT_JSON_REPORT_PATH", + "DEFAULT_MARKDOWN_REPORT_PATH", + "DEFAULT_MAX_BASELINE_SIZE_MB", + "DEFAULT_MAX_CACHE_SIZE_MB", + "DEFAULT_MIN_LOC", + "DEFAULT_MIN_STMT", + "DEFAULT_PROCESSES", "DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD", "DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD", "DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD", + "DEFAULT_ROOT", + "DEFAULT_SARIF_REPORT_PATH", + "DEFAULT_SEGMENT_MIN_LOC", + "DEFAULT_SEGMENT_MIN_STMT", + "DEFAULT_TEXT_REPORT_PATH", "DOCS_URL", "HEALTH_WEIGHTS", "ISSUES_URL", diff --git a/codeclone/core/_types.py b/codeclone/core/_types.py index 4bcf448..8f15314 100644 --- a/codeclone/core/_types.py +++ b/codeclone/core/_types.py @@ -17,6 +17,7 @@ from ..analysis.normalizer import NormalizationConfig from ..cache.entries import FileStat from ..cache.projection import SegmentReportProjection +from ..contracts import DEFAULT_PROCESSES from ..models import ( BlockUnit, ClassMetrics, @@ -43,7 +44,7 @@ DEFAULT_BATCH_SIZE = 100 PARALLEL_MIN_FILES_PER_WORKER = 8 PARALLEL_MIN_FILES_FLOOR = 16 -DEFAULT_RUNTIME_PROCESSES = 4 +DEFAULT_RUNTIME_PROCESSES = DEFAULT_PROCESSES @dataclass(frozen=True, slots=True) diff --git a/codeclone/core/pipeline.py b/codeclone/core/pipeline.py index e3c0b20..7625dee 100644 --- a/codeclone/core/pipeline.py +++ b/codeclone/core/pipeline.py @@ -8,6 +8,7 @@ from collections.abc import Mapping, Sequence +from ..contracts import DEFAULT_COVERAGE_MIN from ..findings.clones.grouping import ( build_block_groups, build_groups, @@ -309,7 +310,7 @@ def analyze( root_path=boot.root, units=processing.units, hotspot_threshold_percent=int( - getattr(boot.args, "coverage_min", 50) + getattr(boot.args, "coverage_min", DEFAULT_COVERAGE_MIN) ), ) except CoverageJoinParseError as exc: @@ -317,7 +318,7 @@ def analyze( coverage_xml=str(coverage_xml_path), status="invalid", hotspot_threshold_percent=int( - getattr(boot.args, "coverage_min", 50) + getattr(boot.args, "coverage_min", DEFAULT_COVERAGE_MIN) ), invalid_reason=str(exc), ) diff --git a/codeclone/core/reporting.py b/codeclone/core/reporting.py index 5faf4ce..d43683f 100644 --- a/codeclone/core/reporting.py +++ b/codeclone/core/reporting.py @@ -8,6 +8,7 @@ from collections.abc import Callable, Collection, Mapping +from ..contracts import DEFAULT_COVERAGE_MIN from ..models import MetricsDiff from ..report.gates.evaluator import GateResult, GateState from ..report.gates.evaluator import MetricGateConfig as _MetricGateConfig @@ -246,7 +247,7 @@ def gate( ), min_typing_coverage=int(getattr(boot.args, "min_typing_coverage", -1)), min_docstring_coverage=int(getattr(boot.args, "min_docstring_coverage", -1)), - coverage_min=int(getattr(boot.args, "coverage_min", 50)), + coverage_min=int(getattr(boot.args, "coverage_min", DEFAULT_COVERAGE_MIN)), fail_on_new=bool(getattr(boot.args, "fail_on_new", False)), fail_threshold=int(getattr(boot.args, "fail_threshold", -1)), ) diff --git a/codeclone/core/worker.py b/codeclone/core/worker.py index d88206c..4cbd52c 100644 --- a/codeclone/core/worker.py +++ b/codeclone/core/worker.py @@ -14,6 +14,12 @@ from ..analysis.normalizer import NormalizationConfig from ..analysis.units import extract_units_and_stats_from_source from ..cache.entries import FileStat +from ..contracts import ( + DEFAULT_BLOCK_MIN_LOC, + DEFAULT_BLOCK_MIN_STMT, + DEFAULT_SEGMENT_MIN_LOC, + DEFAULT_SEGMENT_MIN_STMT, +) from ..scanner import module_name_from_path from ._types import MAX_FILE_SIZE, FileProcessResult @@ -27,10 +33,10 @@ def process_file( collect_structural_findings: bool = True, collect_api_surface: bool = False, api_include_private_modules: bool = False, - block_min_loc: int = 20, - block_min_stmt: int = 8, - segment_min_loc: int = 20, - segment_min_stmt: int = 10, + block_min_loc: int = DEFAULT_BLOCK_MIN_LOC, + block_min_stmt: int = DEFAULT_BLOCK_MIN_STMT, + segment_min_loc: int = DEFAULT_SEGMENT_MIN_LOC, + segment_min_stmt: int = DEFAULT_SEGMENT_MIN_STMT, ) -> FileProcessResult: try: try: diff --git a/codeclone/report/gates/evaluator.py b/codeclone/report/gates/evaluator.py index a55b12d..62310f2 100644 --- a/codeclone/report/gates/evaluator.py +++ b/codeclone/report/gates/evaluator.py @@ -10,7 +10,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING -from ...contracts import ExitCode +from ...contracts import DEFAULT_COVERAGE_MIN, ExitCode from ...metrics.registry import METRIC_FAMILIES from ...utils.coerce import as_int as _as_int from ...utils.coerce import as_mapping as _as_mapping @@ -35,7 +35,7 @@ class MetricGateConfig: fail_on_untested_hotspots: bool = False min_typing_coverage: int = -1 min_docstring_coverage: int = -1 - coverage_min: int = 50 + coverage_min: int = DEFAULT_COVERAGE_MIN fail_on_new: bool = False fail_threshold: int = -1 diff --git a/codeclone/surfaces/cli/execution.py b/codeclone/surfaces/cli/execution.py index 08ec6e4..6f8baaa 100644 --- a/codeclone/surfaces/cli/execution.py +++ b/codeclone/surfaces/cli/execution.py @@ -23,7 +23,7 @@ from ... import ui_messages as ui from ...cache.store import Cache -from ...contracts import ExitCode +from ...contracts import DEFAULT_HTML_REPORT_PATH, ExitCode from ...contracts.errors import CacheError from ...core._types import AnalysisResult, BootstrapResult, DiscoveryResult from ...core._types import ProcessingResult as PipelineProcessingResult @@ -273,7 +273,7 @@ def enforce_gating( sys.exit(ExitCode.GATING_FAILURE) if "clone:new" in gate_result.reasons: - default_report = Path(".cache/codeclone/report.html") + default_report = Path(DEFAULT_HTML_REPORT_PATH) resolved_html_report_path = html_report_path if resolved_html_report_path is None and default_report.exists(): resolved_html_report_path = str(default_report) diff --git a/codeclone/surfaces/cli/startup.py b/codeclone/surfaces/cli/startup.py index 8cd145c..b5c2b41 100644 --- a/codeclone/surfaces/cli/startup.py +++ b/codeclone/surfaces/cli/startup.py @@ -14,7 +14,7 @@ from ... import ui_messages as ui from ...config.pyproject_loader import ConfigValidationError -from ...contracts import ExitCode +from ...contracts import DEFAULT_ROOT, ExitCode from .attrs import text_attr from .baseline_state import MetricsBaselineSectionProbe from .types import CLIArgsLike, ParserWithDefaults, StatusConsole @@ -55,7 +55,7 @@ def exit_contract_error( def resolve_existing_root_path(*, args: object, printer: StatusConsole) -> Path: try: - root_path = Path(text_attr(args, "root", ".")).resolve() + root_path = Path(text_attr(args, "root", DEFAULT_ROOT)).resolve() except OSError as exc: exit_contract_error( ui.ERR_INVALID_ROOT_PATH.format(error=exc), diff --git a/codeclone/surfaces/mcp/server.py b/codeclone/surfaces/mcp/server.py index 81f264a..3599bac 100644 --- a/codeclone/surfaces/mcp/server.py +++ b/codeclone/surfaces/mcp/server.py @@ -13,7 +13,7 @@ from typing import TYPE_CHECKING, Literal, TypeVar from ... import __version__ -from ...contracts import DOCS_URL +from ...contracts import DEFAULT_COVERAGE_MIN, DOCS_URL from .service import CodeCloneMCPService from .session import ( DEFAULT_MCP_HISTORY_LIMIT, @@ -51,6 +51,12 @@ "CodeClone MCP support requires the optional 'mcp' extra. " "Install it with: pip install 'codeclone[mcp]'" ) +DEFAULT_MCP_HOST = "127.0.0.1" +DEFAULT_MCP_PORT = 8000 +DEFAULT_MCP_JSON_RESPONSE = True +DEFAULT_MCP_STATELESS_HTTP = True +DEFAULT_MCP_DEBUG = False +DEFAULT_MCP_LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO" class MCPDependencyError(RuntimeError): @@ -122,12 +128,14 @@ def _validated_cache_policy(value: str) -> CachePolicy: def build_mcp_server( *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT, - host: str = "127.0.0.1", - port: int = 8000, - json_response: bool = False, - stateless_http: bool = False, - debug: bool = False, - log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO", + host: str = DEFAULT_MCP_HOST, + port: int = DEFAULT_MCP_PORT, + json_response: bool = DEFAULT_MCP_JSON_RESPONSE, + stateless_http: bool = DEFAULT_MCP_STATELESS_HTTP, + debug: bool = DEFAULT_MCP_DEBUG, + log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = ( + DEFAULT_MCP_LOG_LEVEL + ), ) -> FastMCP: """Build and register the local read-only CodeClone FastMCP server.""" @@ -389,7 +397,7 @@ def evaluate_gates( fail_on_untested_hotspots: bool = False, min_typing_coverage: int = -1, min_docstring_coverage: int = -1, - coverage_min: int = 50, + coverage_min: int = DEFAULT_COVERAGE_MIN, ) -> dict[str, object]: return service.evaluate_gates( MCPGateRequest( @@ -911,7 +919,7 @@ def build_parser() -> argparse.ArgumentParser: ) parser.add_argument( "--host", - default="127.0.0.1", + default=DEFAULT_MCP_HOST, help="Host to bind when using streamable-http.", ) parser.add_argument( @@ -926,7 +934,7 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument( "--port", type=int, - default=8000, + default=DEFAULT_MCP_PORT, help="Port to bind when using streamable-http.", ) parser.add_argument( @@ -941,25 +949,25 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument( "--json-response", action=argparse.BooleanOptionalAction, - default=True, + default=DEFAULT_MCP_JSON_RESPONSE, help="Use JSON responses for streamable-http transport.", ) parser.add_argument( "--stateless-http", action=argparse.BooleanOptionalAction, - default=True, + default=DEFAULT_MCP_STATELESS_HTTP, help="Use stateless Streamable HTTP mode when transport is streamable-http.", ) parser.add_argument( "--debug", action=argparse.BooleanOptionalAction, - default=False, + default=DEFAULT_MCP_DEBUG, help="Enable FastMCP debug mode.", ) parser.add_argument( "--log-level", choices=("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"), - default="INFO", + default=DEFAULT_MCP_LOG_LEVEL, help="FastMCP server log level.", ) return parser diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index 33fcc98..2a11f09 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -39,6 +39,8 @@ DEFAULT_SEGMENT_MIN_STMT, ) from ...contracts import ( + DEFAULT_COVERAGE_MIN, + DEFAULT_JSON_REPORT_PATH, DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, @@ -169,7 +171,7 @@ SummaryFocus = Literal["repository", "production", "changed_paths"] _LEGACY_CACHE_PATH = Path("~/.cache/codeclone/cache.json").expanduser() -_REPORT_DUMMY_PATH = Path(".cache/codeclone/report.json") +_REPORT_DUMMY_PATH = Path(DEFAULT_JSON_REPORT_PATH) _HEALTH_SCOPE_REPOSITORY: Final[HealthScope] = "repository" _FOCUS_REPOSITORY: Final[SummaryFocus] = "repository" _FOCUS_PRODUCTION: Final[SummaryFocus] = "production" @@ -999,7 +1001,7 @@ class MCPGateRequest: fail_on_untested_hotspots: bool = False min_typing_coverage: int = -1 min_docstring_coverage: int = -1 - coverage_min: int = 50 + coverage_min: int = DEFAULT_COVERAGE_MIN @dataclass(frozen=True, slots=True) @@ -4066,7 +4068,7 @@ def _build_args(self, *, root_path: Path, request: MCPAnalysisRequest) -> Namesp api_surface=False, coverage_xml=None, fail_on_untested_hotspots=False, - coverage_min=50, + coverage_min=DEFAULT_COVERAGE_MIN, design_complexity_threshold=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, design_coupling_threshold=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, design_cohesion_threshold=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, diff --git a/codeclone/ui_messages/__init__.py b/codeclone/ui_messages/__init__.py index a945dde..d59e50c 100644 --- a/codeclone/ui_messages/__init__.py +++ b/codeclone/ui_messages/__init__.py @@ -13,7 +13,21 @@ from pathlib import Path from .. import __version__ -from ..contracts import ISSUES_URL +from ..contracts import ( + DEFAULT_BASELINE_PATH, + DEFAULT_COVERAGE_MIN, + DEFAULT_HTML_REPORT_PATH, + DEFAULT_JSON_REPORT_PATH, + DEFAULT_MARKDOWN_REPORT_PATH, + DEFAULT_MAX_BASELINE_SIZE_MB, + DEFAULT_MAX_CACHE_SIZE_MB, + DEFAULT_MIN_LOC, + DEFAULT_MIN_STMT, + DEFAULT_PROCESSES, + DEFAULT_SARIF_REPORT_PATH, + DEFAULT_TEXT_REPORT_PATH, + ISSUES_URL, +) from ..domain.quality import ( HEALTH_GRADE_A, HEALTH_GRADE_B, @@ -31,9 +45,15 @@ HELP_VERSION = "Print the CodeClone version and exit." HELP_ROOT = "Project root directory to scan.\nDefaults to the current directory." -HELP_MIN_LOC = "Minimum Lines of Code (LOC) required for clone analysis.\nDefault: 10." -HELP_MIN_STMT = "Minimum AST statement count required for clone analysis.\nDefault: 6." -HELP_PROCESSES = "Number of parallel worker processes.\nDefault: 4." +HELP_MIN_LOC = ( + "Minimum Lines of Code (LOC) required for clone analysis.\n" + f"Default: {DEFAULT_MIN_LOC}." +) +HELP_MIN_STMT = ( + "Minimum AST statement count required for clone analysis.\n" + f"Default: {DEFAULT_MIN_STMT}." +) +HELP_PROCESSES = f"Number of parallel worker processes.\nDefault: {DEFAULT_PROCESSES}." HELP_CHANGED_ONLY = ( "Limit clone gating and changed-scope summaries to findings that touch\n" "files from a git diff selection." @@ -53,11 +73,15 @@ HELP_CACHE_DIR_LEGACY = ( "Legacy alias for --cache-path.\nPrefer --cache-path in new configurations." ) -HELP_MAX_BASELINE_SIZE_MB = "Maximum allowed baseline size in MB.\nDefault: 5." -HELP_MAX_CACHE_SIZE_MB = "Maximum cache file size in MB.\nDefault: 50." +HELP_MAX_BASELINE_SIZE_MB = ( + f"Maximum allowed baseline size in MB.\nDefault: {DEFAULT_MAX_BASELINE_SIZE_MB}." +) +HELP_MAX_CACHE_SIZE_MB = ( + f"Maximum cache file size in MB.\nDefault: {DEFAULT_MAX_CACHE_SIZE_MB}." +) HELP_BASELINE = ( "Path to the clone baseline.\n" - f"If FILE is omitted, uses {Path('codeclone.baseline.json')}." + f"If FILE is omitted, uses {Path(DEFAULT_BASELINE_PATH)}." ) HELP_UPDATE_BASELINE = ( "Overwrite the clone baseline with current results.\nDisabled by default." @@ -127,7 +151,8 @@ ) HELP_COVERAGE_MIN = ( "Coverage threshold for untested hotspot detection.\n" - "Threshold is a whole percent from 0 to 100.\nDefault: 50." + "Threshold is a whole percent from 0 to 100.\n" + f"Default: {DEFAULT_COVERAGE_MIN}." ) HELP_CI = ( "Enable CI preset.\n" @@ -140,30 +165,30 @@ ) HELP_METRICS_BASELINE = ( "Path to the metrics baseline.\n" - f"If FILE is omitted, uses {Path('codeclone.baseline.json')}." + f"If FILE is omitted, uses {Path(DEFAULT_BASELINE_PATH)}." ) HELP_SKIP_METRICS = "Skip full metrics analysis and run in clone-only mode." HELP_SKIP_DEAD_CODE = "Skip dead code detection." HELP_SKIP_DEPENDENCIES = "Skip dependency graph analysis." HELP_HTML = ( "Generate an HTML report.\n" - "If FILE is omitted, writes to .cache/codeclone/report.html." + f"If FILE is omitted, writes to {DEFAULT_HTML_REPORT_PATH}." ) HELP_JSON = ( "Generate the canonical JSON report.\n" - "If FILE is omitted, writes to .cache/codeclone/report.json." + f"If FILE is omitted, writes to {DEFAULT_JSON_REPORT_PATH}." ) HELP_MD = ( "Generate a Markdown report.\n" - "If FILE is omitted, writes to .cache/codeclone/report.md." + f"If FILE is omitted, writes to {DEFAULT_MARKDOWN_REPORT_PATH}." ) HELP_SARIF = ( "Generate a SARIF 2.1.0 report.\n" - "If FILE is omitted, writes to .cache/codeclone/report.sarif." + f"If FILE is omitted, writes to {DEFAULT_SARIF_REPORT_PATH}." ) HELP_TEXT = ( "Generate a plain-text report.\n" - "If FILE is omitted, writes to .cache/codeclone/report.txt." + f"If FILE is omitted, writes to {DEFAULT_TEXT_REPORT_PATH}." ) HELP_OPEN_HTML_REPORT = ( "Open the generated HTML report in the default browser.\nRequires --html." diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 2a9435e..373f0b1 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -6,12 +6,17 @@ from __future__ import annotations +from pathlib import Path +from subprocess import CompletedProcess + import pytest from benchmarks.run_benchmark import ( + BENCHMARK_CLI_MODULE, BENCHMARK_NEUTRAL_ARGS, RunMeasurement, Scenario, + _run_cli_once, _timing_regressions, _validate_inventory_sample, ) @@ -77,6 +82,60 @@ def test_benchmark_neutral_args_disable_repo_quality_gates() -> None: assert "--skip-metrics" not in BENCHMARK_NEUTRAL_ARGS +def test_benchmark_runner_invokes_canonical_main_entrypoint( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + captured: dict[str, object] = {} + + def fake_run( + cmd: list[str], + *, + check: bool, + capture_output: bool, + text: bool, + env: dict[str, str], + ) -> CompletedProcess[str]: + captured["cmd"] = cmd + captured["check"] = check + captured["capture_output"] = capture_output + captured["text"] = text + captured["env"] = env + return CompletedProcess(cmd, 0, stdout="", stderr="") + + monkeypatch.setattr("benchmarks.run_benchmark.subprocess.run", fake_run) + monkeypatch.setattr( + "benchmarks.run_benchmark._read_report", + lambda _report_path: ( + "digest", + {"found": 10, "analyzed": 10, "cached": 0, "skipped": 0}, + ), + ) + + _run_cli_once( + target=tmp_path, + python_executable="python3", + cache_path=tmp_path / "cache.json", + report_path=tmp_path / "report.json", + extra_args=("--skip-metrics",), + ) + + assert captured["cmd"] == [ + "python3", + "-m", + BENCHMARK_CLI_MODULE, + str(tmp_path), + *BENCHMARK_NEUTRAL_ARGS, + "--json", + str(tmp_path / "report.json"), + "--cache-path", + str(tmp_path / "cache.json"), + "--no-progress", + "--quiet", + "--skip-metrics", + ] + + @pytest.mark.parametrize( ("scenario", "measurement", "message"), ( diff --git a/tests/test_defaults_contract.py b/tests/test_defaults_contract.py new file mode 100644 index 0000000..405c83d --- /dev/null +++ b/tests/test_defaults_contract.py @@ -0,0 +1,119 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import inspect +from pathlib import Path + +from codeclone.baseline.trust import MAX_BASELINE_SIZE_BYTES +from codeclone.cache.versioning import MAX_CACHE_SIZE_BYTES +from codeclone.config import spec as spec_mod +from codeclone.config.argparse_builder import build_parser +from codeclone.contracts import ( + DEFAULT_BASELINE_PATH, + DEFAULT_BLOCK_MIN_LOC, + DEFAULT_BLOCK_MIN_STMT, + DEFAULT_COVERAGE_MIN, + DEFAULT_MAX_BASELINE_SIZE_MB, + DEFAULT_MAX_CACHE_SIZE_MB, + DEFAULT_MIN_LOC, + DEFAULT_MIN_STMT, + DEFAULT_PROCESSES, + DEFAULT_ROOT, + DEFAULT_SEGMENT_MIN_LOC, + DEFAULT_SEGMENT_MIN_STMT, +) +from codeclone.core._types import DEFAULT_RUNTIME_PROCESSES +from codeclone.report.gates.evaluator import MetricGateConfig +from codeclone.surfaces.mcp import server as mcp_server +from codeclone.surfaces.mcp.service import CodeCloneMCPService +from codeclone.surfaces.mcp.session import MCPAnalysisRequest, MCPGateRequest + + +def test_config_spec_reexports_shared_runtime_defaults() -> None: + assert spec_mod.DEFAULT_ROOT == DEFAULT_ROOT + assert spec_mod.DEFAULT_MIN_LOC == DEFAULT_MIN_LOC + assert spec_mod.DEFAULT_MIN_STMT == DEFAULT_MIN_STMT + assert spec_mod.DEFAULT_BLOCK_MIN_LOC == DEFAULT_BLOCK_MIN_LOC + assert spec_mod.DEFAULT_BLOCK_MIN_STMT == DEFAULT_BLOCK_MIN_STMT + assert spec_mod.DEFAULT_SEGMENT_MIN_LOC == DEFAULT_SEGMENT_MIN_LOC + assert spec_mod.DEFAULT_SEGMENT_MIN_STMT == DEFAULT_SEGMENT_MIN_STMT + assert spec_mod.DEFAULT_PROCESSES == DEFAULT_PROCESSES + assert spec_mod.DEFAULT_MAX_CACHE_SIZE_MB == DEFAULT_MAX_CACHE_SIZE_MB + assert spec_mod.DEFAULT_MAX_BASELINE_SIZE_MB == DEFAULT_MAX_BASELINE_SIZE_MB + assert spec_mod.DEFAULT_BASELINE_PATH == DEFAULT_BASELINE_PATH + assert spec_mod.DEFAULTS_BY_DEST["coverage_min"] == DEFAULT_COVERAGE_MIN + + +def test_cli_parser_defaults_follow_contract_defaults() -> None: + args = build_parser("2.0.0").parse_args([]) + + assert args.root == DEFAULT_ROOT + assert args.min_loc == DEFAULT_MIN_LOC + assert args.min_stmt == DEFAULT_MIN_STMT + assert args.block_min_loc == DEFAULT_BLOCK_MIN_LOC + assert args.block_min_stmt == DEFAULT_BLOCK_MIN_STMT + assert args.segment_min_loc == DEFAULT_SEGMENT_MIN_LOC + assert args.segment_min_stmt == DEFAULT_SEGMENT_MIN_STMT + assert args.processes == DEFAULT_PROCESSES + assert args.max_cache_size_mb == DEFAULT_MAX_CACHE_SIZE_MB + assert args.baseline == DEFAULT_BASELINE_PATH + assert args.max_baseline_size_mb == DEFAULT_MAX_BASELINE_SIZE_MB + assert args.metrics_baseline == DEFAULT_BASELINE_PATH + assert args.coverage_min == DEFAULT_COVERAGE_MIN + + +def test_size_byte_limits_derive_from_contract_megabyte_defaults() -> None: + assert MAX_CACHE_SIZE_BYTES == DEFAULT_MAX_CACHE_SIZE_MB * 1024 * 1024 + assert MAX_BASELINE_SIZE_BYTES == DEFAULT_MAX_BASELINE_SIZE_MB * 1024 * 1024 + + +def test_runtime_and_gate_defaults_follow_contract_defaults(tmp_path: Path) -> None: + service = CodeCloneMCPService() + args = service._build_args( + root_path=tmp_path, + request=MCPAnalysisRequest(respect_pyproject=False), + ) + + assert DEFAULT_RUNTIME_PROCESSES == DEFAULT_PROCESSES + assert args.min_loc == DEFAULT_MIN_LOC + assert args.min_stmt == DEFAULT_MIN_STMT + assert args.block_min_loc == DEFAULT_BLOCK_MIN_LOC + assert args.block_min_stmt == DEFAULT_BLOCK_MIN_STMT + assert args.segment_min_loc == DEFAULT_SEGMENT_MIN_LOC + assert args.segment_min_stmt == DEFAULT_SEGMENT_MIN_STMT + assert args.max_cache_size_mb == DEFAULT_MAX_CACHE_SIZE_MB + assert args.max_baseline_size_mb == DEFAULT_MAX_BASELINE_SIZE_MB + assert args.baseline == DEFAULT_BASELINE_PATH + assert args.metrics_baseline == DEFAULT_BASELINE_PATH + assert args.coverage_min == DEFAULT_COVERAGE_MIN + assert MCPGateRequest().coverage_min == DEFAULT_COVERAGE_MIN + assert ( + MetricGateConfig( + fail_complexity=-1, + fail_coupling=-1, + fail_cohesion=-1, + fail_cycles=False, + fail_dead_code=False, + fail_health=-1, + fail_on_new_metrics=False, + ).coverage_min + == DEFAULT_COVERAGE_MIN + ) + + +def test_mcp_parser_and_builder_defaults_stay_in_sync() -> None: + args = mcp_server.build_parser().parse_args([]) + signature = inspect.signature(mcp_server.build_mcp_server) + + assert signature.parameters["history_limit"].default == args.history_limit + assert signature.parameters["host"].default == args.host + assert signature.parameters["port"].default == args.port + assert signature.parameters["json_response"].default == args.json_response + assert signature.parameters["stateless_http"].default == args.stateless_http + assert signature.parameters["debug"].default == args.debug + assert signature.parameters["log_level"].default == args.log_level From 7bb8b056bf8ea561fcbbaa6e623a1a4eeec2c409 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 22 Apr 2026 18:43:41 +0500 Subject: [PATCH 11/32] fix(readme): make wordmark render on PyPI with absolute asset URLs --- README.md | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0284773..83d8543 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,18 @@
- - - CodeClone + + + CodeClone
From f95a4c7435151f8d52e1950210c6ae9e861027d4 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Wed, 22 Apr 2026 18:57:06 +0500 Subject: [PATCH 12/32] chore(preview): tighten MCP docs and refresh b6 client metadata --- README.md | 13 +++++++++---- docs/claude-desktop-bundle.md | 4 ++-- docs/codex-plugin.md | 4 ++-- docs/vscode-extension.md | 4 ++-- extensions/claude-desktop-codeclone/README.md | 4 ++-- extensions/claude-desktop-codeclone/manifest.json | 2 +- .../claude-desktop-codeclone/package-lock.json | 4 ++-- extensions/claude-desktop-codeclone/package.json | 2 +- extensions/vscode-codeclone/README.md | 7 +++---- extensions/vscode-codeclone/src/support.js | 2 +- extensions/vscode-codeclone/test/support.test.js | 2 +- plugins/codeclone/.codex-plugin/plugin.json | 2 +- plugins/codeclone/README.md | 4 ++-- pyproject.toml | 2 -- tests/test_codex_plugin.py | 4 ++-- 15 files changed, 31 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 83d8543..d5d87b6 100644 --- a/README.md +++ b/README.md @@ -185,10 +185,15 @@ Optional read-only MCP server for AI agents and IDE clients. Never mutates source, baselines, or repo state. ```bash -uv tool install --pre "codeclone[mcp]" # or: uv pip install --pre "codeclone[mcp]" +uv tool install --pre "codeclone[mcp]" +# or +uv pip install --pre "codeclone[mcp]" -codeclone-mcp --transport stdio # local (Claude Code, Codex, Copilot, Gemini CLI) -codeclone-mcp --transport streamable-http # remote / HTTP-only clients +# local stdio clients +codeclone-mcp --transport stdio + +# remote / HTTP-only clients +codeclone-mcp --transport streamable-http ``` [MCP usage guide](https://orenlab.github.io/codeclone/mcp/) · @@ -284,7 +289,7 @@ Report contract: [Report contract](https://orenlab.github.io/codeclone/book/08-r { "report_schema_version": "2.8", "meta": { - "codeclone_version": "2.0.0b5", + "codeclone_version": "2.0.0b6", "project_name": "...", "scan_root": ".", "report_mode": "full", diff --git a/docs/claude-desktop-bundle.md b/docs/claude-desktop-bundle.md index 5e87168..c742616 100644 --- a/docs/claude-desktop-bundle.md +++ b/docs/claude-desktop-bundle.md @@ -23,14 +23,14 @@ The bundle prefers the current workspace launcher first: ```bash uv venv -uv pip install --python .venv/bin/python "codeclone[mcp]>=2.0.0b5" +uv pip install --python .venv/bin/python --pre "codeclone[mcp]" .venv/bin/codeclone-mcp --help ``` Global fallback: ```bash -uv tool install "codeclone[mcp]>=2.0.0b5" +uv tool install --pre "codeclone[mcp]" codeclone-mcp --help ``` diff --git a/docs/codex-plugin.md b/docs/codex-plugin.md index 19f8f54..dca590a 100644 --- a/docs/codex-plugin.md +++ b/docs/codex-plugin.md @@ -17,14 +17,14 @@ Repo-local discovery via `.agents/plugins/marketplace.json`. ```bash uv venv -uv pip install --python .venv/bin/python "codeclone[mcp]>=2.0.0b5" +uv pip install --python .venv/bin/python --pre "codeclone[mcp]" .venv/bin/codeclone-mcp --help ``` Global fallback: ```bash -uv tool install "codeclone[mcp]>=2.0.0b5" +uv tool install --pre "codeclone[mcp]" codeclone-mcp --help ``` diff --git a/docs/vscode-extension.md b/docs/vscode-extension.md index 7f011ac..45d2599 100644 --- a/docs/vscode-extension.md +++ b/docs/vscode-extension.md @@ -36,13 +36,13 @@ to `PATH`. Runtime and version-mismatch messages identify that resolved launcher Recommended install for the preview extension: ```bash -uv tool install "codeclone[mcp]>=2.0.0b4" +uv tool install --pre "codeclone[mcp]" ``` If you want the launcher inside the current environment instead: ```bash -uv pip install "codeclone[mcp]>=2.0.0b4" +uv pip install --pre "codeclone[mcp]" ``` Verify the launcher: diff --git a/extensions/claude-desktop-codeclone/README.md b/extensions/claude-desktop-codeclone/README.md index 2bf2060..37ee769 100644 --- a/extensions/claude-desktop-codeclone/README.md +++ b/extensions/claude-desktop-codeclone/README.md @@ -20,14 +20,14 @@ Recommended workspace-local setup: ```bash uv venv -uv pip install --python .venv/bin/python "codeclone[mcp]>=2.0.0b4" +uv pip install --python .venv/bin/python --pre "codeclone[mcp]" .venv/bin/codeclone-mcp --help ``` Global fallback: ```bash -uv tool install "codeclone[mcp]>=2.0.0b4" +uv tool install --pre "codeclone[mcp]" codeclone-mcp --help ``` diff --git a/extensions/claude-desktop-codeclone/manifest.json b/extensions/claude-desktop-codeclone/manifest.json index 33c3286..ccdf403 100644 --- a/extensions/claude-desktop-codeclone/manifest.json +++ b/extensions/claude-desktop-codeclone/manifest.json @@ -2,7 +2,7 @@ "manifest_version": "0.3", "name": "codeclone", "display_name": "CodeClone", - "version": "2.0.0-b5.1", + "version": "2.0.0-b6.0", "description": "Baseline-aware structural review for Claude Desktop through a local CodeClone MCP launcher.", "long_description": "CodeClone for Claude Desktop wraps the local codeclone-mcp launcher as an MCP bundle. It keeps Claude on the same canonical MCP surface used by the CLI, HTML report, VS Code extension, and Codex plugin — read-only, baseline-aware, local stdio only.", "author": { diff --git a/extensions/claude-desktop-codeclone/package-lock.json b/extensions/claude-desktop-codeclone/package-lock.json index 9f156cb..0e6d72b 100644 --- a/extensions/claude-desktop-codeclone/package-lock.json +++ b/extensions/claude-desktop-codeclone/package-lock.json @@ -1,12 +1,12 @@ { "name": "@orenlab/codeclone-claude-desktop", - "version": "2.0.0-b5.1", + "version": "2.0.0-b6.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@orenlab/codeclone-claude-desktop", - "version": "2.0.0-b5.1", + "version": "2.0.0-b6.0", "license": "MPL-2.0", "engines": { "node": ">=20.0.0" diff --git a/extensions/claude-desktop-codeclone/package.json b/extensions/claude-desktop-codeclone/package.json index 2b97c37..5abfc93 100644 --- a/extensions/claude-desktop-codeclone/package.json +++ b/extensions/claude-desktop-codeclone/package.json @@ -1,6 +1,6 @@ { "name": "@orenlab/codeclone-claude-desktop", - "version": "2.0.0-b5.1", + "version": "2.0.0-b6.0", "private": true, "description": "Claude Desktop MCP bundle wrapper for the local CodeClone MCP launcher.", "license": "MPL-2.0", diff --git a/extensions/vscode-codeclone/README.md b/extensions/vscode-codeclone/README.md index 897e65a..e1b1f7a 100644 --- a/extensions/vscode-codeclone/README.md +++ b/extensions/vscode-codeclone/README.md @@ -9,8 +9,7 @@ creating a second truth model. The extension stays read-only with respect to repository state and uses the same canonical report semantics as the CLI, HTML report, and MCP server. -This extension is published as a preview while the `2.0.0b5` line is still in -beta. +This extension is published as a preview for the current `2.0.x` beta line. ## What it is for @@ -50,13 +49,13 @@ falling back to `PATH`. Runtime and version-mismatch messages identify that reso Recommended install for the preview extension: ```bash -uv tool install "codeclone[mcp]>=2.0.0b4" +uv tool install --pre "codeclone[mcp]" ``` If you want the launcher inside the current environment instead: ```bash -uv pip install "codeclone[mcp]>=2.0.0b4" +uv pip install --pre "codeclone[mcp]" ``` Verify the launcher: diff --git a/extensions/vscode-codeclone/src/support.js b/extensions/vscode-codeclone/src/support.js index 98da9ca..f7117d3 100644 --- a/extensions/vscode-codeclone/src/support.js +++ b/extensions/vscode-codeclone/src/support.js @@ -9,7 +9,7 @@ const ANALYSIS_PROFILE_DEEPER_REVIEW = "deeperReview"; const ANALYSIS_PROFILE_CUSTOM = "custom"; const MINIMUM_SUPPORTED_CODECLONE_VERSION = "2.0.0b4"; const PREVIEW_INSTALL_COMMAND = - 'uv tool install "codeclone[mcp]>=2.0.0b4"'; + 'uv tool install --pre "codeclone[mcp]"'; const ANALYSIS_PROFILE_IDS = new Set([ ANALYSIS_PROFILE_DEFAULTS, ANALYSIS_PROFILE_DEEPER_REVIEW, diff --git a/extensions/vscode-codeclone/test/support.test.js b/extensions/vscode-codeclone/test/support.test.js index 8045d45..5ca4f77 100644 --- a/extensions/vscode-codeclone/test/support.test.js +++ b/extensions/vscode-codeclone/test/support.test.js @@ -330,6 +330,6 @@ test("minimum supported CodeClone version and install command stay aligned", () assert.equal(isMinimumSupportedCodeCloneVersion("1.27.0"), false); assert.equal( PREVIEW_INSTALL_COMMAND, - 'uv tool install "codeclone[mcp]>=2.0.0b4"' + 'uv tool install --pre "codeclone[mcp]"' ); }); diff --git a/plugins/codeclone/.codex-plugin/plugin.json b/plugins/codeclone/.codex-plugin/plugin.json index a0ce462..2f737fa 100644 --- a/plugins/codeclone/.codex-plugin/plugin.json +++ b/plugins/codeclone/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "codeclone", - "version": "2.0.0-b5.0", + "version": "2.0.0-b6.0", "description": "Baseline-aware structural code quality analysis for Codex through the local CodeClone MCP server.", "author": { "name": "Den Rozhnovskiy", diff --git a/plugins/codeclone/README.md b/plugins/codeclone/README.md index b2f4f23..e833b2c 100644 --- a/plugins/codeclone/README.md +++ b/plugins/codeclone/README.md @@ -29,7 +29,7 @@ Recommended workspace-local setup: ```bash uv venv -uv pip install --python .venv/bin/python "codeclone[mcp]>=2.0.0b4" +uv pip install --python .venv/bin/python --pre "codeclone[mcp]" .venv/bin/codeclone-mcp --help ``` @@ -38,7 +38,7 @@ If your workspace uses Poetry, install CodeClone into that Poetry environment. Global fallback: ```bash -uv tool install "codeclone[mcp]>=2.0.0b4" +uv tool install --pre "codeclone[mcp]" codeclone-mcp --help ``` diff --git a/pyproject.toml b/pyproject.toml index d2c6248..a8aaaae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -163,8 +163,6 @@ target-version = "py310" select = ["E", "F", "W", "I", "B", "UP", "SIM", "C4", "PIE", "PERF", "RUF"] [tool.ruff.lint.per-file-ignores] -"codeclone/_html_css.py" = ["E501"] -"codeclone/_html_js.py" = ["E501"] "codeclone/report/html/assets/*.py" = ["E501"] "codeclone/report/html/sections/*.py" = ["E501"] diff --git a/tests/test_codex_plugin.py b/tests/test_codex_plugin.py index 806878e..3246aa4 100644 --- a/tests/test_codex_plugin.py +++ b/tests/test_codex_plugin.py @@ -15,7 +15,7 @@ def test_codex_plugin_manifest_is_consistent() -> None: assert isinstance(manifest, dict) assert manifest["name"] == "codeclone" - assert manifest["version"] == "2.0.0-b5.0" + assert manifest["version"] == "2.0.0-b6.0" assert manifest["skills"] == "./skills/" assert manifest["mcpServers"] == "./.mcp.json" assert manifest["license"] == "MPL-2.0" @@ -126,7 +126,7 @@ def test_codex_plugin_readme_and_docs_exist() -> None: assert "does not rewrite `~/.codex/config.toml`" in readme_text assert "The plugin prefers a workspace launcher first" in readme_text assert "the current Poetry environment launcher" in readme_text - assert 'uv tool install "codeclone[mcp]>=2.0.0b4"' in readme_text + assert 'uv tool install --pre "codeclone[mcp]"' in readme_text assert (root / "docs" / "codex-plugin.md").is_file() assert (root / "docs" / "terms-of-use.md").is_file() From 841546659d2e3be512723141c39c7901883f93e6 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 23 Apr 2026 16:43:22 +0500 Subject: [PATCH 13/32] refactor(mcp): split session and remove duplicated CLI helper tails --- codeclone/baseline/metrics_baseline.py | 37 + codeclone/cache/store.py | 36 + codeclone/report/html/__init__.py | 14 +- codeclone/report/meta.py | 124 + codeclone/surfaces/cli/baseline_state.py | 36 +- codeclone/surfaces/cli/report_meta.py | 111 +- codeclone/surfaces/cli/runtime.py | 38 +- codeclone/surfaces/cli/workflow.py | 137 +- codeclone/surfaces/mcp/_session_baseline.py | 145 + .../surfaces/mcp/_session_finding_mixin.py | 1556 ++++++ codeclone/surfaces/mcp/_session_helpers.py | 901 ++++ codeclone/surfaces/mcp/_session_runtime.py | 41 + codeclone/surfaces/mcp/_session_shared.py | 1200 +++++ .../surfaces/mcp/_session_state_mixin.py | 1205 +++++ codeclone/surfaces/mcp/session.py | 4575 +---------------- scripts/build_docs_example_report.py | 3 +- tests/test_docs_example_report.py | 69 + tests/test_html_report.py | 18 +- tests/test_mcp_service.py | 425 +- tests/test_structural_findings.py | 2 +- 20 files changed, 5777 insertions(+), 4896 deletions(-) create mode 100644 codeclone/report/meta.py create mode 100644 codeclone/surfaces/mcp/_session_baseline.py create mode 100644 codeclone/surfaces/mcp/_session_finding_mixin.py create mode 100644 codeclone/surfaces/mcp/_session_helpers.py create mode 100644 codeclone/surfaces/mcp/_session_runtime.py create mode 100644 codeclone/surfaces/mcp/_session_shared.py create mode 100644 codeclone/surfaces/mcp/_session_state_mixin.py create mode 100644 tests/test_docs_example_report.py diff --git a/codeclone/baseline/metrics_baseline.py b/codeclone/baseline/metrics_baseline.py index 4653cfd..2aecf3c 100644 --- a/codeclone/baseline/metrics_baseline.py +++ b/codeclone/baseline/metrics_baseline.py @@ -7,9 +7,13 @@ from __future__ import annotations import hmac +from dataclasses import dataclass from datetime import datetime, timezone +from json import JSONDecodeError from pathlib import Path +import orjson + from .. import __version__ from ..contracts import BASELINE_SCHEMA_VERSION, METRICS_BASELINE_SCHEMA_VERSION from ..contracts.errors import BaselineValidationError @@ -55,6 +59,12 @@ from .trust import current_python_tag +@dataclass(frozen=True, slots=True) +class MetricsBaselineSectionProbe: + has_metrics_section: bool + payload: dict[str, object] | None + + def _now_utc_z() -> str: return ( datetime.now(timezone.utc) @@ -64,6 +74,31 @@ def _now_utc_z() -> str: ) +def probe_metrics_baseline_section(path: Path) -> MetricsBaselineSectionProbe: + if not path.exists(): + return MetricsBaselineSectionProbe( + has_metrics_section=False, + payload=None, + ) + try: + raw_payload = orjson.loads(path.read_text("utf-8")) + except (OSError, JSONDecodeError): + return MetricsBaselineSectionProbe( + has_metrics_section=True, + payload=None, + ) + if not isinstance(raw_payload, dict): + return MetricsBaselineSectionProbe( + has_metrics_section=True, + payload=None, + ) + payload = dict(raw_payload) + return MetricsBaselineSectionProbe( + has_metrics_section=("metrics" in payload), + payload=payload, + ) + + class MetricsBaseline: __slots__ = ( "api_surface_payload_sha256", @@ -453,8 +488,10 @@ def diff(self, current: ProjectMetrics) -> MetricsDiff: "METRICS_BASELINE_SCHEMA_VERSION", "METRICS_BASELINE_UNTRUSTED_STATUSES", "MetricsBaseline", + "MetricsBaselineSectionProbe", "MetricsBaselineStatus", "coerce_metrics_baseline_status", "current_python_tag", + "probe_metrics_baseline_section", "snapshot_from_project_metrics", ] diff --git a/codeclone/cache/store.py b/codeclone/cache/store.py index e291425..6ed75c4 100644 --- a/codeclone/cache/store.py +++ b/codeclone/cache/store.py @@ -10,6 +10,7 @@ from collections.abc import Collection from json import JSONDecodeError from pathlib import Path +from typing import Protocol from ..baseline.trust import current_python_tag from ..contracts import ( @@ -84,6 +85,41 @@ ) +class _CacheStatusLike(Protocol): + @property + def load_status(self) -> CacheStatus | str | None: ... + + @property + def load_warning(self) -> str | None: ... + + @property + def cache_schema_version(self) -> str | None: ... + + +def resolve_cache_status(cache: _CacheStatusLike) -> tuple[CacheStatus, str | None]: + raw_cache_status = getattr(cache, "load_status", None) + load_warning = getattr(cache, "load_warning", None) + if isinstance(raw_cache_status, CacheStatus): + cache_status = raw_cache_status + elif isinstance(raw_cache_status, str): + try: + cache_status = CacheStatus(raw_cache_status) + except ValueError: + cache_status = ( + CacheStatus.OK if load_warning is None else CacheStatus.INVALID_TYPE + ) + else: + cache_status = ( + CacheStatus.OK if load_warning is None else CacheStatus.INVALID_TYPE + ) + + raw_cache_schema_version = getattr(cache, "cache_schema_version", None) + cache_schema_version = ( + raw_cache_schema_version if isinstance(raw_cache_schema_version, str) else None + ) + return cache_status, cache_schema_version + + class Cache: __slots__ = ( "_canonical_runtime_paths", diff --git a/codeclone/report/html/__init__.py b/codeclone/report/html/__init__.py index d463eb0..cdde57d 100644 --- a/codeclone/report/html/__init__.py +++ b/codeclone/report/html/__init__.py @@ -9,17 +9,5 @@ from __future__ import annotations from .assemble import build_html_report -from .widgets.snippets import ( - _FileCache, - _pygments_css, - _render_code_block, - _try_pygments, -) -__all__ = [ - "_FileCache", - "_pygments_css", - "_render_code_block", - "_try_pygments", - "build_html_report", -] +__all__ = ["build_html_report"] diff --git a/codeclone/report/meta.py b/codeclone/report/meta.py new file mode 100644 index 0000000..cc1bfc9 --- /dev/null +++ b/codeclone/report/meta.py @@ -0,0 +1,124 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import sys +from datetime import datetime, timezone +from typing import TYPE_CHECKING + +from ..baseline.clone_baseline import Baseline +from ..baseline.trust import current_python_tag +from ..contracts import ( + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, +) +from ..contracts.schemas import ReportMeta + +if TYPE_CHECKING: + from pathlib import Path + + from ..baseline.metrics_baseline import MetricsBaseline + + +def current_report_timestamp_utc() -> str: + return ( + datetime.now(timezone.utc).replace(microsecond=0).strftime("%Y-%m-%dT%H:%M:%SZ") + ) + + +def build_report_meta( + *, + codeclone_version: str, + scan_root: Path, + baseline_path: Path, + baseline: Baseline, + baseline_loaded: bool, + baseline_status: str, + cache_path: Path, + cache_used: bool, + cache_status: str, + cache_schema_version: str | None, + files_skipped_source_io: int, + metrics_baseline_path: Path, + metrics_baseline: MetricsBaseline, + metrics_baseline_loaded: bool, + metrics_baseline_status: str, + health_score: int | None, + health_grade: str | None, + analysis_mode: str, + metrics_computed: tuple[str, ...], + min_loc: int, + min_stmt: int, + block_min_loc: int, + block_min_stmt: int, + segment_min_loc: int, + segment_min_stmt: int, + design_complexity_threshold: int = DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + design_coupling_threshold: int = DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + design_cohesion_threshold: int = DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + analysis_started_at_utc: str | None, + report_generated_at_utc: str, +) -> ReportMeta: + project_name = scan_root.name or str(scan_root) + return { + "codeclone_version": codeclone_version, + "project_name": project_name, + "scan_root": str(scan_root), + "python_version": _current_python_version(), + "python_tag": current_python_tag(), + "baseline_path": str(baseline_path), + "baseline_fingerprint_version": baseline.fingerprint_version, + "baseline_schema_version": baseline.schema_version, + "baseline_python_tag": baseline.python_tag, + "baseline_generator_name": baseline.generator, + "baseline_generator_version": baseline.generator_version, + "baseline_payload_sha256": baseline.payload_sha256, + "baseline_payload_sha256_verified": ( + baseline_loaded + and baseline_status == "ok" + and isinstance(baseline.payload_sha256, str) + ), + "baseline_loaded": baseline_loaded, + "baseline_status": baseline_status, + "cache_path": str(cache_path), + "cache_used": cache_used, + "cache_status": cache_status, + "cache_schema_version": cache_schema_version, + "files_skipped_source_io": files_skipped_source_io, + "metrics_baseline_path": str(metrics_baseline_path), + "metrics_baseline_loaded": metrics_baseline_loaded, + "metrics_baseline_status": metrics_baseline_status, + "metrics_baseline_schema_version": metrics_baseline.schema_version, + "metrics_baseline_payload_sha256": metrics_baseline.payload_sha256, + "metrics_baseline_payload_sha256_verified": ( + metrics_baseline_loaded + and metrics_baseline_status == "ok" + and isinstance(metrics_baseline.payload_sha256, str) + ), + "health_score": health_score, + "health_grade": health_grade, + "analysis_mode": analysis_mode, + "metrics_computed": list(metrics_computed), + "analysis_profile": { + "min_loc": min_loc, + "min_stmt": min_stmt, + "block_min_loc": block_min_loc, + "block_min_stmt": block_min_stmt, + "segment_min_loc": segment_min_loc, + "segment_min_stmt": segment_min_stmt, + }, + "design_complexity_threshold": design_complexity_threshold, + "design_coupling_threshold": design_coupling_threshold, + "design_cohesion_threshold": design_cohesion_threshold, + "analysis_started_at_utc": analysis_started_at_utc, + "report_generated_at_utc": report_generated_at_utc, + } + + +def _current_python_version() -> str: + return f"{sys.version_info.major}.{sys.version_info.minor}" diff --git a/codeclone/surfaces/cli/baseline_state.py b/codeclone/surfaces/cli/baseline_state.py index 2d6f5e3..bff38a5 100644 --- a/codeclone/surfaces/cli/baseline_state.py +++ b/codeclone/surfaces/cli/baseline_state.py @@ -8,12 +8,9 @@ import sys from dataclasses import dataclass -from json import JSONDecodeError from pathlib import Path from typing import TYPE_CHECKING, Protocol -import orjson - from ... import __version__ from ... import ui_messages as ui from ...baseline import ( @@ -26,8 +23,10 @@ from ...baseline.metrics_baseline import ( METRICS_BASELINE_UNTRUSTED_STATUSES, MetricsBaseline, + MetricsBaselineSectionProbe, MetricsBaselineStatus, coerce_metrics_baseline_status, + probe_metrics_baseline_section, ) from ...contracts import ( BASELINE_FINGERPRINT_VERSION, @@ -104,42 +103,11 @@ class _MetricsBaselineRuntime: trusted_for_diff: bool = False -@dataclass(frozen=True, slots=True) -class MetricsBaselineSectionProbe: - has_metrics_section: bool - payload: dict[str, object] | None - - _CloneBaselineState = CloneBaselineState _MetricsBaselineSectionProbe = MetricsBaselineSectionProbe _MetricsBaselineState = MetricsBaselineState -def probe_metrics_baseline_section(path: Path) -> MetricsBaselineSectionProbe: - if not path.exists(): - return MetricsBaselineSectionProbe( - has_metrics_section=False, - payload=None, - ) - try: - raw_payload = orjson.loads(path.read_text("utf-8")) - except (OSError, JSONDecodeError): - return MetricsBaselineSectionProbe( - has_metrics_section=True, - payload=None, - ) - if not isinstance(raw_payload, dict): - return MetricsBaselineSectionProbe( - has_metrics_section=True, - payload=None, - ) - payload = dict(raw_payload) - return MetricsBaselineSectionProbe( - has_metrics_section=("metrics" in payload), - payload=payload, - ) - - def resolve_clone_baseline_state( *, args: _BaselineArgs, diff --git a/codeclone/surfaces/cli/report_meta.py b/codeclone/surfaces/cli/report_meta.py index ae195e9..e2a418f 100644 --- a/codeclone/surfaces/cli/report_meta.py +++ b/codeclone/surfaces/cli/report_meta.py @@ -6,128 +6,23 @@ from __future__ import annotations -import sys -from datetime import datetime, timezone from typing import TYPE_CHECKING -from ...baseline.clone_baseline import Baseline -from ...baseline.trust import current_python_tag from ...cache.versioning import CacheStatus -from ...contracts import ( - DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, - DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, - DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, -) from ...contracts.schemas import ReportMeta +from ...report import meta as _report_meta +from ...report.meta import build_report_meta as _build_report_meta from .types import CLIArgsLike if TYPE_CHECKING: from pathlib import Path - from ...baseline.metrics_baseline import MetricsBaseline - from ...cache.versioning import CacheStatus from ...core._types import AnalysisResult from ...core._types import ProcessingResult as PipelineProcessingResult from .baseline_state import CloneBaselineState, MetricsBaselineState -def _current_python_version() -> str: - return f"{sys.version_info.major}.{sys.version_info.minor}" - - -def _current_report_timestamp_utc() -> str: - return ( - datetime.now(timezone.utc).replace(microsecond=0).strftime("%Y-%m-%dT%H:%M:%SZ") - ) - - -def _build_report_meta( - *, - codeclone_version: str, - scan_root: Path, - baseline_path: Path, - baseline: Baseline, - baseline_loaded: bool, - baseline_status: str, - cache_path: Path, - cache_used: bool, - cache_status: str, - cache_schema_version: str | None, - files_skipped_source_io: int, - metrics_baseline_path: Path, - metrics_baseline: MetricsBaseline, - metrics_baseline_loaded: bool, - metrics_baseline_status: str, - health_score: int | None, - health_grade: str | None, - analysis_mode: str, - metrics_computed: tuple[str, ...], - min_loc: int, - min_stmt: int, - block_min_loc: int, - block_min_stmt: int, - segment_min_loc: int, - segment_min_stmt: int, - design_complexity_threshold: int = DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, - design_coupling_threshold: int = DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - design_cohesion_threshold: int = DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, - analysis_started_at_utc: str | None, - report_generated_at_utc: str, -) -> ReportMeta: - project_name = scan_root.name or str(scan_root) - return { - "codeclone_version": codeclone_version, - "project_name": project_name, - "scan_root": str(scan_root), - "python_version": _current_python_version(), - "python_tag": current_python_tag(), - "baseline_path": str(baseline_path), - "baseline_fingerprint_version": baseline.fingerprint_version, - "baseline_schema_version": baseline.schema_version, - "baseline_python_tag": baseline.python_tag, - "baseline_generator_name": baseline.generator, - "baseline_generator_version": baseline.generator_version, - "baseline_payload_sha256": baseline.payload_sha256, - "baseline_payload_sha256_verified": ( - baseline_loaded - and baseline_status == "ok" - and isinstance(baseline.payload_sha256, str) - ), - "baseline_loaded": baseline_loaded, - "baseline_status": baseline_status, - "cache_path": str(cache_path), - "cache_used": cache_used, - "cache_status": cache_status, - "cache_schema_version": cache_schema_version, - "files_skipped_source_io": files_skipped_source_io, - "metrics_baseline_path": str(metrics_baseline_path), - "metrics_baseline_loaded": metrics_baseline_loaded, - "metrics_baseline_status": metrics_baseline_status, - "metrics_baseline_schema_version": metrics_baseline.schema_version, - "metrics_baseline_payload_sha256": metrics_baseline.payload_sha256, - "metrics_baseline_payload_sha256_verified": ( - metrics_baseline_loaded - and metrics_baseline_status == "ok" - and isinstance(metrics_baseline.payload_sha256, str) - ), - "health_score": health_score, - "health_grade": health_grade, - "analysis_mode": analysis_mode, - "metrics_computed": list(metrics_computed), - "analysis_profile": { - "min_loc": min_loc, - "min_stmt": min_stmt, - "block_min_loc": block_min_loc, - "block_min_stmt": block_min_stmt, - "segment_min_loc": segment_min_loc, - "segment_min_stmt": segment_min_stmt, - }, - "design_complexity_threshold": design_complexity_threshold, - "design_coupling_threshold": design_coupling_threshold, - "design_cohesion_threshold": design_cohesion_threshold, - "analysis_started_at_utc": analysis_started_at_utc, - "report_generated_at_utc": report_generated_at_utc, - } +_current_report_timestamp_utc = _report_meta.current_report_timestamp_utc def build_cli_report_meta( diff --git a/codeclone/surfaces/cli/runtime.py b/codeclone/surfaces/cli/runtime.py index ac346d3..8f91fb4 100644 --- a/codeclone/surfaces/cli/runtime.py +++ b/codeclone/surfaces/cli/runtime.py @@ -8,10 +8,9 @@ import sys from pathlib import Path -from typing import Protocol from ... import ui_messages as ui -from ...cache.store import Cache +from ...cache.store import Cache, resolve_cache_status from ...cache.versioning import CacheStatus from ...contracts import ExitCode from . import state as cli_state @@ -19,17 +18,6 @@ from .types import PrinterLike, require_status_console -class _CacheLike(Protocol): - @property - def load_status(self) -> CacheStatus | str | None: ... - - @property - def load_warning(self) -> str | None: ... - - @property - def cache_schema_version(self) -> str | None: ... - - def validate_numeric_args(args: object) -> bool: return bool( not ( @@ -153,30 +141,6 @@ def metrics_computed(args: object) -> tuple[str, ...]: return tuple(computed) -def resolve_cache_status(cache: _CacheLike) -> tuple[CacheStatus, str | None]: - raw_cache_status = getattr(cache, "load_status", None) - load_warning = getattr(cache, "load_warning", None) - if isinstance(raw_cache_status, CacheStatus): - cache_status = raw_cache_status - elif isinstance(raw_cache_status, str): - try: - cache_status = CacheStatus(raw_cache_status) - except ValueError: - cache_status = ( - CacheStatus.OK if load_warning is None else CacheStatus.INVALID_TYPE - ) - else: - cache_status = ( - CacheStatus.OK if load_warning is None else CacheStatus.INVALID_TYPE - ) - - raw_cache_schema_version = getattr(cache, "cache_schema_version", None) - cache_schema_version = ( - raw_cache_schema_version if isinstance(raw_cache_schema_version, str) else None - ) - return cache_status, cache_schema_version - - def resolve_report_cache_path(cache_path: Path) -> Path: try: return cache_path.resolve() diff --git a/codeclone/surfaces/cli/workflow.py b/codeclone/surfaces/cli/workflow.py index 5518dde..24f408d 100644 --- a/codeclone/surfaces/cli/workflow.py +++ b/codeclone/surfaces/cli/workflow.py @@ -15,12 +15,9 @@ from ...baseline import Baseline from ...cache.projection import build_segment_report_projection from ...cache.store import Cache +from ...config import resolver as config_resolver from ...config.argparse_builder import build_parser from ...config.pyproject_loader import load_pyproject_config -from ...config.resolver import ( - apply_pyproject_config_overrides, - collect_explicit_cli_dests, -) from ...contracts import ( ISSUES_URL, ExitCode, @@ -34,70 +31,17 @@ from ...core.reporting import gate, report from ...models import MetricsDiff from ...report.html import build_html_report +from . import baseline_state as cli_baseline_state +from . import changed_scope as cli_changed_scope +from . import console as cli_console +from . import execution as cli_execution +from . import post_run as cli_post_run from . import report_meta as cli_meta_mod +from . import reports_output as cli_reports_output +from . import runtime as cli_runtime +from . import startup as cli_startup from . import state as cli_state -from .baseline_state import ( - _probe_metrics_baseline_section, - _resolve_clone_baseline_state, - _resolve_metrics_baseline_state, -) -from .changed_scope import ( - _changed_clone_gate_from_report, - _git_diff_changed_paths, - _validate_changed_scope_args, -) -from .console import ( - _is_debug_enabled, - _make_plain_console, - _parse_metric_reason_entry, - _print_gating_failure_block, - _print_verbose_clone_hashes, - _rich_progress_symbols, -) -from .console import make_console as _make_rich_console -from .console import print_banner as _print_banner_impl -from .execution import ( - enforce_gating, - print_pipeline_done_if_needed, - run_analysis_stages, -) -from .post_run import build_diff_context as _build_diff_context -from .post_run import ( - maybe_print_changed_scope_snapshot, - print_metrics_if_available, - resolve_changed_clone_gate, - warn_new_clones_without_fail, -) -from .reports_output import ( - _report_path_origins, - _resolve_output_paths, - _validate_report_ui_flags, - _write_report_outputs, -) -from .runtime import ( - _configure_metrics_mode, - _metrics_computed, - _print_failed_files, - _resolve_cache_status, - _validate_numeric_args, - gating_mode_enabled, - prepare_metrics_mode_and_ui, - resolve_report_cache_path, -) -from .runtime import _resolve_cache_path as _resolve_cache_path_impl -from .startup import configure_runtime_console as _configure_runtime_console_impl -from .startup import configure_runtime_flags as _configure_runtime_flags -from .startup import load_pyproject_config_or_exit as _load_pyproject_config_or_exit -from .startup import resolve_baseline_inputs as _resolve_baseline_inputs -from .startup import resolve_existing_root_path as _resolve_existing_root_path -from .startup import validate_numeric_args_or_exit as _validate_numeric_args_or_exit -from .summary import ( - _print_changed_scope, - _print_metrics, - _print_summary, - build_metrics_snapshot, - build_summary_counts, -) +from . import summary as cli_summary from .types import CLIArgsLike, StatusConsole, require_status_console __all__ = [ @@ -126,8 +70,10 @@ "_validate_report_ui_flags", "_write_report_outputs", "analyze", + "apply_pyproject_config_overrides", "bootstrap", "build_html_report", + "collect_explicit_cli_dests", "console", "discover", "gate", @@ -137,6 +83,63 @@ "report", ] +apply_pyproject_config_overrides = config_resolver.apply_pyproject_config_overrides +collect_explicit_cli_dests = config_resolver.collect_explicit_cli_dests + +_probe_metrics_baseline_section = cli_baseline_state._probe_metrics_baseline_section +_resolve_clone_baseline_state = cli_baseline_state._resolve_clone_baseline_state +_resolve_metrics_baseline_state = cli_baseline_state._resolve_metrics_baseline_state + +_changed_clone_gate_from_report = cli_changed_scope._changed_clone_gate_from_report +_git_diff_changed_paths = cli_changed_scope._git_diff_changed_paths +_validate_changed_scope_args = cli_changed_scope._validate_changed_scope_args + +_is_debug_enabled = cli_console._is_debug_enabled +_make_plain_console = cli_console._make_plain_console +_make_rich_console = cli_console.make_console +_parse_metric_reason_entry = cli_console._parse_metric_reason_entry +_print_banner_impl = cli_console.print_banner +_print_gating_failure_block = cli_console._print_gating_failure_block +_print_verbose_clone_hashes = cli_console._print_verbose_clone_hashes +_rich_progress_symbols = cli_console._rich_progress_symbols + +print_pipeline_done_if_needed = cli_execution.print_pipeline_done_if_needed +run_analysis_stages = cli_execution.run_analysis_stages + +_build_diff_context = cli_post_run.build_diff_context +maybe_print_changed_scope_snapshot = cli_post_run.maybe_print_changed_scope_snapshot +print_metrics_if_available = cli_post_run.print_metrics_if_available +resolve_changed_clone_gate = cli_post_run.resolve_changed_clone_gate +warn_new_clones_without_fail = cli_post_run.warn_new_clones_without_fail + +_report_path_origins = cli_reports_output._report_path_origins +_resolve_output_paths = cli_reports_output._resolve_output_paths +_validate_report_ui_flags = cli_reports_output._validate_report_ui_flags +_write_report_outputs = cli_reports_output._write_report_outputs + +_configure_metrics_mode = cli_runtime._configure_metrics_mode +_metrics_computed = cli_runtime._metrics_computed +_print_failed_files = cli_runtime._print_failed_files +_resolve_cache_path_impl = cli_runtime._resolve_cache_path +_resolve_cache_status = cli_runtime._resolve_cache_status +_validate_numeric_args = cli_runtime._validate_numeric_args +gating_mode_enabled = cli_runtime.gating_mode_enabled +prepare_metrics_mode_and_ui = cli_runtime.prepare_metrics_mode_and_ui +resolve_report_cache_path = cli_runtime.resolve_report_cache_path + +_configure_runtime_console_impl = cli_startup.configure_runtime_console +_configure_runtime_flags = cli_startup.configure_runtime_flags +_load_pyproject_config_or_exit = cli_startup.load_pyproject_config_or_exit +_resolve_baseline_inputs = cli_startup.resolve_baseline_inputs +_resolve_existing_root_path = cli_startup.resolve_existing_root_path +_validate_numeric_args_or_exit = cli_startup.validate_numeric_args_or_exit + +_print_changed_scope = cli_summary._print_changed_scope +_print_metrics = cli_summary._print_metrics +_print_summary = cli_summary._print_summary +build_metrics_snapshot = cli_summary.build_metrics_snapshot +build_summary_counts = cli_summary.build_summary_counts + def _set_console(value: object) -> object: cli_state.set_console(value) @@ -235,7 +238,7 @@ def _enforce_gating( clone_threshold_total: int | None = None, ) -> None: _set_console(console) - enforce_gating( + cli_execution.enforce_gating( args=args, boot=boot, analysis=analysis, diff --git a/codeclone/surfaces/mcp/_session_baseline.py b/codeclone/surfaces/mcp/_session_baseline.py new file mode 100644 index 0000000..b10a93b --- /dev/null +++ b/codeclone/surfaces/mcp/_session_baseline.py @@ -0,0 +1,145 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +from ...baseline import ( + Baseline, + BaselineStatus, + coerce_baseline_status, + current_python_tag, +) +from ...baseline.metrics_baseline import ( + MetricsBaseline, + MetricsBaselineStatus, + coerce_metrics_baseline_status, +) +from ...contracts import ExitCode +from ...contracts.errors import BaselineValidationError + + +@dataclass(frozen=True, slots=True) +class CloneBaselineState: + baseline: Baseline + loaded: bool + status: BaselineStatus + failure_code: ExitCode | None + trusted_for_diff: bool + updated_path: Path | None + warning_message: str | None = None + + +@dataclass(frozen=True, slots=True) +class MetricsBaselineState: + baseline: MetricsBaseline + loaded: bool + status: MetricsBaselineStatus + failure_code: ExitCode | None + trusted_for_diff: bool + warning_message: str | None = None + + +def resolve_clone_baseline_state( + *, + baseline_path: Path, + baseline_exists: bool, + max_baseline_size_mb: int, + shared_baseline_payload: dict[str, object] | None = None, +) -> CloneBaselineState: + baseline = Baseline(baseline_path) + if not baseline_exists: + return CloneBaselineState( + baseline=baseline, + loaded=False, + status=BaselineStatus.MISSING, + failure_code=None, + trusted_for_diff=False, + updated_path=None, + warning_message=None, + ) + + try: + if shared_baseline_payload is None: + baseline.load(max_size_bytes=max_baseline_size_mb * 1024 * 1024) + else: + baseline.load( + max_size_bytes=max_baseline_size_mb * 1024 * 1024, + preloaded_payload=shared_baseline_payload, + ) + baseline.verify_compatibility(current_python_tag=current_python_tag()) + except BaselineValidationError as exc: + status = coerce_baseline_status(exc.status) + return CloneBaselineState( + baseline=baseline, + loaded=False, + status=status, + failure_code=None, + trusted_for_diff=False, + updated_path=None, + warning_message=str(exc), + ) + + return CloneBaselineState( + baseline=baseline, + loaded=True, + status=BaselineStatus.OK, + failure_code=None, + trusted_for_diff=True, + updated_path=None, + warning_message=None, + ) + + +def resolve_metrics_baseline_state( + *, + metrics_baseline_path: Path, + metrics_baseline_exists: bool, + max_baseline_size_mb: int, + skip_metrics: bool, + shared_baseline_payload: dict[str, object] | None = None, +) -> MetricsBaselineState: + baseline = MetricsBaseline(metrics_baseline_path) + if skip_metrics or not metrics_baseline_exists: + return MetricsBaselineState( + baseline=baseline, + loaded=False, + status=MetricsBaselineStatus.MISSING, + failure_code=None, + trusted_for_diff=False, + warning_message=None, + ) + + try: + if shared_baseline_payload is None: + baseline.load(max_size_bytes=max_baseline_size_mb * 1024 * 1024) + else: + baseline.load( + max_size_bytes=max_baseline_size_mb * 1024 * 1024, + preloaded_payload=shared_baseline_payload, + ) + baseline.verify_compatibility(runtime_python_tag=current_python_tag()) + except BaselineValidationError as exc: + status = coerce_metrics_baseline_status(exc.status) + return MetricsBaselineState( + baseline=baseline, + loaded=False, + status=status, + failure_code=None, + trusted_for_diff=False, + warning_message=str(exc), + ) + + return MetricsBaselineState( + baseline=baseline, + loaded=True, + status=MetricsBaselineStatus.OK, + failure_code=None, + trusted_for_diff=True, + warning_message=None, + ) diff --git a/codeclone/surfaces/mcp/_session_finding_mixin.py b/codeclone/surfaces/mcp/_session_finding_mixin.py new file mode 100644 index 0000000..694fd2c --- /dev/null +++ b/codeclone/surfaces/mcp/_session_finding_mixin.py @@ -0,0 +1,1556 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from types import TracebackType +from typing import Protocol + +from . import _session_helpers as _helpers +from ._session_shared import ( + _CHECK_TO_DIMENSION, + _CONFIDENCE_WEIGHT, + _DESIGN_CHECK_CONTEXT, + _EFFORT_WEIGHT, + _HOTLIST_REPORT_KEYS, + _NOVELTY_WEIGHT, + _RUNTIME_WEIGHT, + _SEVERITY_WEIGHT, + _VALID_ANALYSIS_MODES, + _VALID_CACHE_POLICIES, + _VALID_DETAIL_LEVELS, + _VALID_FINDING_FAMILIES, + _VALID_FINDING_NOVELTY, + _VALID_FINDING_SORT, + _VALID_HOTLIST_KINDS, + _VALID_SEVERITIES, + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CONFIDENCE_MEDIUM, + EFFORT_MODERATE, + FAMILY_CLONES, + FAMILY_DEAD_CODE, + FAMILY_DESIGN, + FAMILY_STRUCTURAL, + SOURCE_KIND_OTHER, + AnalysisMode, + CodeCloneMCPRunStore, + DetailLevel, + FindingFamilyFilter, + FindingNoveltyFilter, + FindingSort, + HotlistKind, + Mapping, + MCPAnalysisRequest, + MCPFindingNotFoundError, + MCPRunNotFoundError, + MCPRunRecord, + MCPServiceContractError, + OrderedDict, + Path, + Sequence, + _as_float, + _as_int, + _git_diff_lines_payload, + paginate, + resolve_finding_id, +) + + +class _StateLock(Protocol): + def __enter__(self) -> object: ... + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> bool | None: ... + + +class _MCPSessionFindingMixin: + _runs: CodeCloneMCPRunStore + _state_lock: _StateLock + _review_state: dict[str, OrderedDict[str, str | None]] + _last_gate_results: dict[str, dict[str, object]] + _spread_max_cache: dict[str, int] + + def _validate_analysis_request(self, request: MCPAnalysisRequest) -> None: + _helpers._validate_choice( + "analysis_mode", + request.analysis_mode, + _VALID_ANALYSIS_MODES, + ) + _helpers._validate_choice( + "cache_policy", + request.cache_policy, + _VALID_CACHE_POLICIES, + ) + if request.cache_policy == "refresh": + raise MCPServiceContractError( + "cache_policy='refresh' is not supported by the read-only " + "CodeClone MCP server. Use 'reuse' or 'off'." + ) + if request.analysis_mode == "clones_only" and request.coverage_xml is not None: + raise MCPServiceContractError( + "coverage_xml requires analysis_mode='full' because coverage join " + "depends on metrics-enabled analysis." + ) + + def _resolve_request_changed_paths( + self, + *, + root_path: Path, + changed_paths: Sequence[str], + git_diff_ref: str | None, + ) -> tuple[str, ...]: + if changed_paths and git_diff_ref is not None: + raise MCPServiceContractError( + "Provide changed_paths or git_diff_ref, not both." + ) + if git_diff_ref is not None: + return self._git_diff_paths(root_path=root_path, git_diff_ref=git_diff_ref) + if not changed_paths: + return () + return self._normalize_changed_paths(root_path=root_path, paths=changed_paths) + + def _resolve_query_changed_paths( + self, + *, + record: MCPRunRecord, + changed_paths: Sequence[str], + git_diff_ref: str | None, + prefer_record_paths: bool = False, + ) -> tuple[str, ...]: + if changed_paths or git_diff_ref is not None: + return self._resolve_request_changed_paths( + root_path=record.root, + changed_paths=changed_paths, + git_diff_ref=git_diff_ref, + ) + if prefer_record_paths: + return record.changed_paths + return () + + def _normalize_changed_paths( + self, + *, + root_path: Path, + paths: Sequence[str], + ) -> tuple[str, ...]: + normalized: set[str] = set() + for raw_path in paths: + candidate = Path(str(raw_path)).expanduser() + if candidate.is_absolute(): + try: + relative = candidate.resolve().relative_to(root_path) + except (OSError, ValueError) as exc: + raise MCPServiceContractError( + f"Changed path '{raw_path}' is outside root '{root_path}'." + ) from exc + normalized.add(relative.as_posix()) + continue + cleaned = _helpers._normalize_relative_path(candidate.as_posix()) + if cleaned: + normalized.add(cleaned) + return tuple(sorted(normalized)) + + def _git_diff_paths( + self, + *, + root_path: Path, + git_diff_ref: str, + ) -> tuple[str, ...]: + lines = _git_diff_lines_payload( + root_path=root_path, + git_diff_ref=git_diff_ref, + ) + return self._normalize_changed_paths(root_path=root_path, paths=lines) + + def _path_filter_tuple(self, path: str | None) -> tuple[str, ...]: + if not path: + return () + cleaned = _helpers._normalize_relative_path(Path(path).as_posix()) + return (cleaned,) if cleaned else () + + def _previous_run_for_root(self, record: MCPRunRecord) -> MCPRunRecord | None: + previous: MCPRunRecord | None = None + for item in self._runs.records(): + if item.run_id == record.run_id: + return previous + if item.root == record.root: + previous = item + return None + + def _latest_compatible_record( + self, + *, + analysis_mode: AnalysisMode, + root_path: Path | None = None, + ) -> MCPRunRecord | None: + for item in reversed(self._runs.records()): + if root_path is not None and item.root != root_path: + continue + if _helpers._record_supports_analysis_mode( + item, + analysis_mode=analysis_mode, + ): + return item + return None + + def _resolve_granular_record( + self, + *, + run_id: str | None, + root: str | None, + analysis_mode: AnalysisMode, + ) -> MCPRunRecord: + if run_id is not None: + record = self._runs.get(run_id) + if _helpers._record_supports_analysis_mode( + record, + analysis_mode=analysis_mode, + ): + return record + raise MCPServiceContractError( + "Selected MCP run is not compatible with this check. " + f"Call analyze_repository(root='{record.root}', " + "analysis_mode='full') first." + ) + root_path = self._resolve_optional_root(root) + latest_record = self._latest_compatible_record( + analysis_mode=analysis_mode, + root_path=root_path, + ) + if latest_record is not None: + return latest_record + if root_path is not None: + raise MCPRunNotFoundError( + f"No compatible MCP analysis run is available for root: {root_path}. " + f"Call analyze_repository(root='{root_path}') or " + f"analyze_changed_paths(root='{root_path}', changed_paths=[...]) first." + ) + raise MCPRunNotFoundError( + "No compatible MCP analysis run is available. " + "Call analyze_repository(root='/path/to/repo') or " + "analyze_changed_paths(root='/path/to/repo', changed_paths=[...]) first." + ) + + def _resolve_optional_root(self, root: str | None) -> Path | None: + cleaned_root = "" if root is None else str(root).strip() + if not cleaned_root: + return None + return _helpers._resolve_root(cleaned_root) + + def _finding_id_maps( + self, + record: MCPRunRecord, + ) -> tuple[dict[str, str], dict[str, str]]: + canonical_ids = sorted( + str(finding.get("id", "")) + for finding in self._base_findings(record) + if str(finding.get("id", "")) + ) + base_ids = { + canonical_id: _helpers._base_short_finding_id(canonical_id) + for canonical_id in canonical_ids + } + grouped: dict[str, list[str]] = {} + for canonical_id, short_name in base_ids.items(): + grouped.setdefault(short_name, []).append(canonical_id) + canonical_to_short: dict[str, str] = {} + short_to_canonical: dict[str, str] = {} + for short_name, group in grouped.items(): + if len(group) == 1: + canonical_id = group[0] + canonical_to_short[canonical_id] = short_name + short_to_canonical[short_name] = canonical_id + continue + disambiguated_ids = _helpers._disambiguated_short_finding_ids(group) + for canonical_id, disambiguated in disambiguated_ids.items(): + canonical_to_short[canonical_id] = disambiguated + short_to_canonical[disambiguated] = canonical_id + return canonical_to_short, short_to_canonical + + def _short_finding_id( + self, + record: MCPRunRecord, + canonical_id: str, + ) -> str: + canonical_to_short, _short_to_canonical = self._finding_id_maps(record) + return canonical_to_short.get(canonical_id, canonical_id) + + def _resolve_canonical_finding_id( + self, + record: MCPRunRecord, + finding_id: str, + ) -> str: + canonical_to_short, short_to_canonical = self._finding_id_maps(record) + canonical = resolve_finding_id( + canonical_to_short=canonical_to_short, + short_to_canonical=short_to_canonical, + finding_id=finding_id, + ) + if canonical is not None: + return canonical + raise MCPFindingNotFoundError( + f"Finding id '{finding_id}' was not found in run " + f"'{_helpers._short_run_id(record.run_id)}'." + ) + + def _base_findings(self, record: MCPRunRecord) -> list[dict[str, object]]: + report_document = record.report_document + findings = _helpers._as_mapping(report_document.get("findings")) + groups = _helpers._as_mapping(findings.get("groups")) + clone_groups = _helpers._as_mapping(groups.get(FAMILY_CLONES)) + return [ + *_helpers._dict_list(clone_groups.get("functions")), + *_helpers._dict_list(clone_groups.get("blocks")), + *_helpers._dict_list(clone_groups.get("segments")), + *_helpers._dict_list( + _helpers._as_mapping(groups.get(FAMILY_STRUCTURAL)).get("groups") + ), + *_helpers._dict_list( + _helpers._as_mapping(groups.get(FAMILY_DEAD_CODE)).get("groups") + ), + *_helpers._dict_list( + _helpers._as_mapping(groups.get(FAMILY_DESIGN)).get("groups") + ), + ] + + def _query_findings( + self, + *, + record: MCPRunRecord, + family: FindingFamilyFilter = "all", + category: str | None = None, + severity: str | None = None, + source_kind: str | None = None, + novelty: FindingNoveltyFilter = "all", + sort_by: FindingSort = "default", + detail_level: DetailLevel = "normal", + changed_paths: Sequence[str] = (), + exclude_reviewed: bool = False, + ) -> list[dict[str, object]]: + findings = self._base_findings(record) + max_spread_value = max( + (self._spread_value(finding) for finding in findings), + default=0, + ) + with self._state_lock: + self._spread_max_cache[record.run_id] = max_spread_value + filtered = [ + finding + for finding in findings + if self._matches_finding_filters( + finding=finding, + family=family, + category=category, + severity=severity, + source_kind=source_kind, + novelty=novelty, + ) + and ( + not changed_paths + or self._finding_touches_paths( + finding=finding, + changed_paths=changed_paths, + ) + ) + and (not exclude_reviewed or not self._finding_is_reviewed(record, finding)) + ] + remediation_map = { + str(finding.get("id", "")): self._remediation_for_finding(record, finding) + for finding in filtered + } + priority_map = { + str(finding.get("id", "")): self._priority_score( + record, + finding, + remediation=remediation_map[str(finding.get("id", ""))], + max_spread_value=max_spread_value, + ) + for finding in filtered + } + ordered = self._sort_findings( + record=record, + findings=filtered, + sort_by=sort_by, + priority_map=priority_map, + ) + return [ + self._decorate_finding( + record, + finding, + detail_level=detail_level, + remediation=remediation_map[str(finding.get("id", ""))], + priority_payload=priority_map[str(finding.get("id", ""))], + max_spread_value=max_spread_value, + ) + for finding in ordered + ] + + def _sort_findings( + self, + *, + record: MCPRunRecord, + findings: Sequence[Mapping[str, object]], + sort_by: FindingSort, + priority_map: Mapping[str, Mapping[str, object]] | None = None, + ) -> list[dict[str, object]]: + finding_rows = [dict(finding) for finding in findings] + if sort_by == "default": + return finding_rows + if sort_by == "severity": + finding_rows.sort( + key=lambda finding: ( + -_helpers._severity_rank(str(finding.get("severity", ""))), + str(finding.get("id", "")), + ) + ) + elif sort_by == "spread": + finding_rows.sort( + key=lambda finding: ( + -self._spread_value(finding), + -_as_float(finding.get("priority", 0.0), 0.0), + str(finding.get("id", "")), + ) + ) + else: + finding_rows.sort( + key=lambda finding: ( + -_as_float( + _helpers._as_mapping( + (priority_map or {}).get(str(finding.get("id", ""))) + ).get("score", 0.0), + 0.0, + ) + if priority_map is not None + else -_as_float( + self._priority_score(record, finding)["score"], + 0.0, + ), + -_helpers._severity_rank(str(finding.get("severity", ""))), + str(finding.get("id", "")), + ) + ) + return finding_rows + + def _decorate_finding( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + *, + detail_level: DetailLevel, + remediation: Mapping[str, object] | None = None, + priority_payload: Mapping[str, object] | None = None, + max_spread_value: int | None = None, + ) -> dict[str, object]: + resolved_remediation = ( + remediation + if remediation is not None + else self._remediation_for_finding(record, finding) + ) + resolved_priority_payload = ( + dict(priority_payload) + if priority_payload is not None + else self._priority_score( + record, + finding, + remediation=resolved_remediation, + max_spread_value=max_spread_value, + ) + ) + payload = dict(finding) + payload["priority_score"] = resolved_priority_payload["score"] + payload["priority_factors"] = resolved_priority_payload["factors"] + payload["locations"] = self._locations_for_finding( + record, + finding, + include_uri=detail_level == "full", + ) + payload["html_anchor"] = f"finding-{finding.get('id', '')}" + if resolved_remediation is not None: + payload["remediation"] = resolved_remediation + return self._project_finding_detail( + record, + payload, + detail_level=detail_level, + ) + + def _project_finding_detail( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + *, + detail_level: DetailLevel, + ) -> dict[str, object]: + if detail_level == "full": + full_payload = dict(finding) + full_payload["id"] = self._short_finding_id( + record, + str(finding.get("id", "")), + ) + return full_payload + payload: dict[str, object] = { + "id": self._short_finding_id(record, str(finding.get("id", ""))), + "kind": _helpers._finding_kind_label(finding), + "severity": str(finding.get("severity", "")), + "novelty": str(finding.get("novelty", "")), + "scope": _helpers._finding_source_kind(finding), + "count": _as_int(finding.get("count", 0), 0), + "spread": dict(_helpers._as_mapping(finding.get("spread"))), + "priority": round(_as_float(finding.get("priority_score", 0.0), 0.0), 2), + } + clone_type = str(finding.get("clone_type", "")).strip() + if clone_type: + payload["type"] = clone_type + locations = [ + _helpers._as_mapping(item) + for item in _helpers._as_sequence(finding.get("locations")) + ] + if detail_level == "summary": + remediation = _helpers._as_mapping(finding.get("remediation")) + if remediation: + payload["effort"] = str(remediation.get("effort", "")) + payload["locations"] = [ + summary_location + for summary_location in ( + _helpers._summary_location_string(location) + for location in locations + ) + if summary_location + ] + return payload + remediation = _helpers._as_mapping(finding.get("remediation")) + if remediation: + payload["remediation"] = _helpers._project_remediation( + remediation, + detail_level="normal", + ) + payload["locations"] = [ + projected + for projected in ( + _helpers._normal_location_payload(location) for location in locations + ) + if projected + ] + return payload + + def _finding_summary_card( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + ) -> dict[str, object]: + return self._finding_summary_card_payload( + record, + self._decorate_finding(record, finding, detail_level="full"), + ) + + def _finding_summary_card_payload( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + ) -> dict[str, object]: + return self._project_finding_detail(record, finding, detail_level="summary") + + def _comparison_finding_card( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + ) -> dict[str, object]: + summary_card = self._finding_summary_card(record, finding) + return { + "id": summary_card.get("id"), + "kind": summary_card.get("kind"), + "severity": summary_card.get("severity"), + } + + def _matches_finding_filters( + self, + *, + finding: Mapping[str, object], + family: FindingFamilyFilter, + category: str | None = None, + severity: str | None, + source_kind: str | None, + novelty: FindingNoveltyFilter, + ) -> bool: + finding_family = str(finding.get("family", "")).strip() + if family != "all" and finding_family != family: + return False + if ( + category is not None + and str(finding.get("category", "")).strip() != category + ): + return False + if ( + severity is not None + and str(finding.get("severity", "")).strip() != severity + ): + return False + dominant_kind = str( + _helpers._as_mapping(finding.get("source_scope")).get("dominant_kind", "") + ).strip() + if source_kind is not None and dominant_kind != source_kind: + return False + return novelty == "all" or str(finding.get("novelty", "")).strip() == novelty + + def _finding_touches_paths( + self, + *, + finding: Mapping[str, object], + changed_paths: Sequence[str], + ) -> bool: + normalized_paths = tuple(changed_paths) + for item in _helpers._as_sequence(finding.get("items")): + relative_path = str( + _helpers._as_mapping(item).get("relative_path", "") + ).strip() + if relative_path and _helpers._path_matches( + relative_path, + normalized_paths, + ): + return True + return False + + def _finding_is_reviewed( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + ) -> bool: + with self._state_lock: + review_map = self._review_state.get(record.run_id, OrderedDict()) + return str(finding.get("id", "")) in review_map + + def _include_hotspot_finding( + self, + *, + record: MCPRunRecord, + finding: Mapping[str, object], + changed_paths: Sequence[str], + exclude_reviewed: bool, + ) -> bool: + if changed_paths and not self._finding_touches_paths( + finding=finding, + changed_paths=changed_paths, + ): + return False + return not exclude_reviewed or not self._finding_is_reviewed(record, finding) + + def _priority_score( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + *, + remediation: Mapping[str, object] | None = None, + max_spread_value: int | None = None, + ) -> dict[str, object]: + spread_weight = self._spread_weight( + record, + finding, + max_spread_value=max_spread_value, + ) + factors = { + "severity_weight": _SEVERITY_WEIGHT.get( + str(finding.get("severity", "")), + 0.2, + ), + "effort_weight": _EFFORT_WEIGHT.get( + ( + str(remediation.get("effort", EFFORT_MODERATE)) + if remediation is not None + else EFFORT_MODERATE + ), + 0.6, + ), + "novelty_weight": _NOVELTY_WEIGHT.get( + str(finding.get("novelty", "")), + 0.7, + ), + "runtime_weight": _RUNTIME_WEIGHT.get( + str( + _helpers._as_mapping(finding.get("source_scope")).get( + "dominant_kind", + "other", + ) + ), + 0.5, + ), + "spread_weight": spread_weight, + "confidence_weight": _CONFIDENCE_WEIGHT.get( + str(finding.get("confidence", CONFIDENCE_MEDIUM)), + 0.7, + ), + } + product = 1.0 + for value in factors.values(): + product *= max(_as_float(value, 0.01), 0.01) + score = product ** (1.0 / max(len(factors), 1)) + return { + "score": round(score, 4), + "factors": { + key: round(_as_float(value, 0.0), 4) for key, value in factors.items() + }, + } + + def _spread_weight( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + *, + max_spread_value: int | None = None, + ) -> float: + spread_value = self._spread_value(finding) + if max_spread_value is None: + with self._state_lock: + max_spread_value = self._spread_max_cache.get(record.run_id) + if max_spread_value is None: + max_spread_value = max( + (self._spread_value(item) for item in self._base_findings(record)), + default=0, + ) + with self._state_lock: + self._spread_max_cache[record.run_id] = max_spread_value + max_value = max_spread_value + if max_value <= 0: + return 0.3 + return max(0.2, min(1.0, spread_value / max_value)) + + def _spread_value(self, finding: Mapping[str, object]) -> int: + spread = _helpers._as_mapping(finding.get("spread")) + files = _as_int(spread.get("files", 0), 0) + functions = _as_int(spread.get("functions", 0), 0) + count = _as_int(finding.get("count", 0), 0) + return max(files, functions, count, 1) + + def _locations_for_finding( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + *, + include_uri: bool = True, + ) -> list[dict[str, object]]: + locations: list[dict[str, object]] = [] + for item in _helpers._as_sequence(finding.get("items")): + item_map = _helpers._as_mapping(item) + relative_path = str(item_map.get("relative_path", "")).strip() + if not relative_path: + continue + line = _as_int(item_map.get("start_line", 0) or 0, 0) + end_line = _as_int(item_map.get("end_line", 0) or 0, 0) + symbol = str(item_map.get("qualname", item_map.get("module", ""))).strip() + location: dict[str, object] = { + "file": relative_path, + "line": line, + "end_line": end_line, + "symbol": symbol, + } + if include_uri: + absolute_path = (record.root / relative_path).resolve() + uri = absolute_path.as_uri() + if line > 0: + uri = f"{uri}#L{line}" + location["uri"] = uri + locations.append(location) + deduped: list[dict[str, object]] = [] + seen: set[tuple[str, int, str]] = set() + for location in locations: + key = ( + str(location.get("file", "")), + _as_int(location.get("line", 0), 0), + str(location.get("symbol", "")), + ) + if key not in seen: + seen.add(key) + deduped.append(location) + return deduped + + def _remediation_for_finding( + self, + record: MCPRunRecord, + finding: Mapping[str, object], + ) -> dict[str, object] | None: + suggestion = self._suggestion_for_finding(record, str(finding.get("id", ""))) + if suggestion is None: + return None + source_kind = str(getattr(suggestion, "source_kind", "other")) + spread_files = _as_int(getattr(suggestion, "spread_files", 0), 0) + spread_functions = _as_int(getattr(suggestion, "spread_functions", 0), 0) + title = str(getattr(suggestion, "title", "")).strip() + severity = str(finding.get("severity", "")).strip() + novelty = str(finding.get("novelty", "known")).strip() + count = _as_int( + getattr(suggestion, "fact_count", 0) or finding.get("count", 0) or 0, + 0, + ) + safe_refactor_shape = _helpers._safe_refactor_shape(suggestion) + effort = str(getattr(suggestion, "effort", EFFORT_MODERATE)) + confidence = str(getattr(suggestion, "confidence", CONFIDENCE_MEDIUM)) + risk_level = _helpers._risk_level_for_effort(effort) + return { + "effort": effort, + "priority": _as_float(getattr(suggestion, "priority", 0.0), 0.0), + "confidence": confidence, + "safe_refactor_shape": safe_refactor_shape, + "steps": list(getattr(suggestion, "steps", ())), + "risk_level": risk_level, + "why_now": _helpers._why_now_text( + title=title, + severity=severity, + novelty=novelty, + count=count, + source_kind=source_kind, + spread_files=spread_files, + spread_functions=spread_functions, + effort=effort, + ), + "blast_radius": { + "files": spread_files, + "functions": spread_functions, + "is_production": source_kind == "production", + }, + } + + def _suggestion_for_finding( + self, + record: MCPRunRecord, + finding_id: str, + ) -> object | None: + for suggestion in record.suggestions: + if _helpers._suggestion_finding_id(suggestion) == finding_id: + return suggestion + return None + + def _hotspot_rows( + self, + *, + record: MCPRunRecord, + kind: HotlistKind, + detail_level: DetailLevel, + changed_paths: Sequence[str], + exclude_reviewed: bool, + ) -> list[dict[str, object]]: + findings = self._base_findings(record) + finding_index = {str(finding.get("id", "")): finding for finding in findings} + max_spread_value = max( + (self._spread_value(finding) for finding in findings), + default=0, + ) + with self._state_lock: + self._spread_max_cache[record.run_id] = max_spread_value + remediation_map = { + str(finding.get("id", "")): self._remediation_for_finding(record, finding) + for finding in findings + } + priority_map = { + str(finding.get("id", "")): self._priority_score( + record, + finding, + remediation=remediation_map[str(finding.get("id", ""))], + max_spread_value=max_spread_value, + ) + for finding in findings + } + derived = _helpers._as_mapping(record.report_document.get("derived")) + hotlists = _helpers._as_mapping(derived.get("hotlists")) + if kind == "highest_priority": + ordered_ids = [ + str(finding.get("id", "")) + for finding in self._sort_findings( + record=record, + findings=findings, + sort_by="priority", + priority_map=priority_map, + ) + ] + else: + hotlist_key = _HOTLIST_REPORT_KEYS.get(kind) + if hotlist_key is None: + return [] + ordered_ids = [ + str(item) + for item in _helpers._as_sequence(hotlists.get(hotlist_key)) + if str(item) + ] + rows: list[dict[str, object]] = [] + for finding_id in ordered_ids: + finding = finding_index.get(finding_id) + if finding is None or not self._include_hotspot_finding( + record=record, + finding=finding, + changed_paths=changed_paths, + exclude_reviewed=exclude_reviewed, + ): + continue + finding_id_key = str(finding.get("id", "")) + rows.append( + self._decorate_finding( + record, + finding, + detail_level=detail_level, + remediation=remediation_map[finding_id_key], + priority_payload=priority_map[finding_id_key], + max_spread_value=max_spread_value, + ) + ) + return rows + + def _granular_payload( + self, + *, + record: MCPRunRecord, + check: str, + items: Sequence[Mapping[str, object]], + detail_level: DetailLevel, + max_results: int, + path: str | None, + threshold_context: Mapping[str, object] | None = None, + ) -> dict[str, object]: + bounded_items = [dict(item) for item in items[: max(1, max_results)]] + full_health = dict(_helpers._as_mapping(record.summary.get("health"))) + dimensions = _helpers._as_mapping(full_health.get("dimensions")) + relevant_dimension = _CHECK_TO_DIMENSION.get(check) + slim_dimensions = ( + {relevant_dimension: dimensions.get(relevant_dimension)} + if relevant_dimension and relevant_dimension in dimensions + else dict(dimensions) + ) + payload: dict[str, object] = { + "run_id": _helpers._short_run_id(record.run_id), + "check": check, + "detail_level": detail_level, + "path": path, + "returned": len(bounded_items), + "total": len(items), + "health": { + "score": full_health.get("score"), + "grade": full_health.get("grade"), + "dimensions": slim_dimensions, + }, + "items": bounded_items, + } + if threshold_context: + payload["threshold_context"] = dict(threshold_context) + return payload + + def _design_threshold_context( + self, + *, + record: MCPRunRecord, + check: str, + path: str | None, + items: Sequence[Mapping[str, object]], + requested_min: int | None = None, + ) -> dict[str, object] | None: + if items: + return None + spec = _DESIGN_CHECK_CONTEXT.get(check) + if spec is None: + return None + category = str(spec["category"]) + metric = str(spec["metric"]) + operator = str(spec["operator"]) + normalized_path = _helpers._normalize_relative_path(path or "") + metrics = _helpers._as_mapping(record.report_document.get("metrics")) + families = _helpers._as_mapping(metrics.get("families")) + family = _helpers._as_mapping(families.get(category)) + metric_items = [ + _helpers._as_mapping(item) + for item in _helpers._as_sequence(family.get("items")) + if not normalized_path + or _helpers._metric_item_matches_path( + _helpers._as_mapping(item), + normalized_path, + ) + ] + if not metric_items: + return None + values = [_as_int(item.get(metric), 0) for item in metric_items] + finding_threshold = self._design_finding_threshold( + record=record, + check=check, + ) + threshold = finding_threshold + threshold_kind = "finding_threshold" + if requested_min is not None and requested_min > finding_threshold: + threshold = requested_min + threshold_kind = "requested_min" + highest_below = _helpers._highest_below_threshold( + values=values, + operator=operator, + threshold=threshold, + ) + payload: dict[str, object] = { + "metric": metric, + "threshold": threshold, + "threshold_kind": threshold_kind, + "measured_units": len(metric_items), + } + if threshold_kind != "finding_threshold": + payload["finding_threshold"] = finding_threshold + if highest_below is not None: + payload["highest_below_threshold"] = highest_below + return payload + + def _design_finding_threshold( + self, + *, + record: MCPRunRecord, + check: str, + ) -> int: + spec = _DESIGN_CHECK_CONTEXT[check] + category = str(spec["category"]) + default_threshold = _as_int(spec["default_threshold"]) + findings = _helpers._as_mapping(record.report_document.get("findings")) + thresholds = _helpers._as_mapping( + _helpers._as_mapping(findings.get("thresholds")).get("design_findings") + ) + threshold_payload = _helpers._as_mapping(thresholds.get(category)) + if threshold_payload: + return _as_int(threshold_payload.get("value"), default_threshold) + request_value = { + "complexity": record.request.complexity_threshold, + "coupling": record.request.coupling_threshold, + "cohesion": record.request.cohesion_threshold, + }.get(check) + return _as_int(request_value, default_threshold) + + def _triage_suggestion_rows(self, record: MCPRunRecord) -> list[dict[str, object]]: + derived = _helpers._as_mapping(record.report_document.get("derived")) + canonical_rows = _helpers._dict_list(derived.get("suggestions")) + suggestion_source_kinds = { + _helpers._suggestion_finding_id( + suggestion + ): _helpers._normalized_source_kind( + getattr(suggestion, "source_kind", SOURCE_KIND_OTHER) + ) + for suggestion in record.suggestions + } + rows: list[dict[str, object]] = [] + for row in canonical_rows: + canonical_finding_id = str(row.get("finding_id", "")) + action = _helpers._as_mapping(row.get("action")) + try: + finding_id = self._short_finding_id( + record, + self._resolve_canonical_finding_id(record, canonical_finding_id), + ) + except MCPFindingNotFoundError: + finding_id = _helpers._base_short_finding_id(canonical_finding_id) + rows.append( + { + "id": f"suggestion:{finding_id}", + "finding_id": finding_id, + "title": str(row.get("title", "")), + "summary": str(row.get("summary", "")), + "effort": str(action.get("effort", "")), + "steps": list(_helpers._as_sequence(action.get("steps"))), + "source_kind": suggestion_source_kinds.get( + canonical_finding_id, + SOURCE_KIND_OTHER, + ), + } + ) + return rows + + def list_findings( + self, + *, + run_id: str | None = None, + family: FindingFamilyFilter = "all", + category: str | None = None, + severity: str | None = None, + source_kind: str | None = None, + novelty: FindingNoveltyFilter = "all", + sort_by: FindingSort = "default", + detail_level: DetailLevel = "summary", + changed_paths: Sequence[str] = (), + git_diff_ref: str | None = None, + exclude_reviewed: bool = False, + offset: int = 0, + limit: int = 50, + max_results: int | None = None, + ) -> dict[str, object]: + validated_family = _helpers._validate_choice( + "family", + family, + _VALID_FINDING_FAMILIES, + ) + validated_novelty = _helpers._validate_choice( + "novelty", + novelty, + _VALID_FINDING_NOVELTY, + ) + validated_sort = _helpers._validate_choice( + "sort_by", + sort_by, + _VALID_FINDING_SORT, + ) + validated_detail = _helpers._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, + ) + validated_severity = _helpers._validate_optional_choice( + "severity", + severity, + _VALID_SEVERITIES, + ) + record = self._runs.get(run_id) + paths_filter = self._resolve_query_changed_paths( + record=record, + changed_paths=changed_paths, + git_diff_ref=git_diff_ref, + ) + normalized_limit = max( + 1, + min(max_results if max_results is not None else limit, 200), + ) + filtered = self._query_findings( + record=record, + family=validated_family, + category=category, + severity=validated_severity, + source_kind=source_kind, + novelty=validated_novelty, + sort_by=validated_sort, + detail_level=validated_detail, + changed_paths=paths_filter, + exclude_reviewed=exclude_reviewed, + ) + page = paginate( + filtered, + offset=offset, + limit=normalized_limit, + max_limit=200, + ) + return { + "run_id": _helpers._short_run_id(record.run_id), + "detail_level": validated_detail, + "sort_by": validated_sort, + "changed_paths": list(paths_filter), + "offset": page.offset, + "limit": page.limit, + "returned": len(page.items), + "total": page.total, + "next_offset": page.next_offset, + "items": page.items, + } + + def get_finding( + self, + *, + finding_id: str, + run_id: str | None = None, + detail_level: DetailLevel = "normal", + ) -> dict[str, object]: + record = self._runs.get(run_id) + validated_detail = _helpers._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, + ) + canonical_id = self._resolve_canonical_finding_id(record, finding_id) + for finding in self._base_findings(record): + if str(finding.get("id")) == canonical_id: + return self._decorate_finding( + record, + finding, + detail_level=validated_detail, + ) + raise MCPFindingNotFoundError( + f"Finding id '{finding_id}' was not found in run " + f"'{_helpers._short_run_id(record.run_id)}'." + ) + + def _service_get_finding( + self, + *, + finding_id: str, + run_id: str | None = None, + detail_level: DetailLevel = "normal", + ) -> dict[str, object]: + return self.get_finding( + finding_id=finding_id, + run_id=run_id, + detail_level=detail_level, + ) + + def get_remediation( + self, + *, + finding_id: str, + run_id: str | None = None, + detail_level: DetailLevel = "normal", + ) -> dict[str, object]: + validated_detail = _helpers._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, + ) + record = self._runs.get(run_id) + canonical_id = self._resolve_canonical_finding_id(record, finding_id) + finding = self._service_get_finding( + finding_id=canonical_id, + run_id=record.run_id, + detail_level="full", + ) + remediation = _helpers._as_mapping(finding.get("remediation")) + if not remediation: + raise MCPFindingNotFoundError( + f"Finding id '{finding_id}' does not expose remediation guidance." + ) + return { + "run_id": _helpers._short_run_id(record.run_id), + "finding_id": self._short_finding_id(record, canonical_id), + "detail_level": validated_detail, + "remediation": _helpers._project_remediation( + remediation, + detail_level=validated_detail, + ), + } + + def list_hotspots( + self, + *, + kind: HotlistKind, + run_id: str | None = None, + detail_level: DetailLevel = "summary", + changed_paths: Sequence[str] = (), + git_diff_ref: str | None = None, + exclude_reviewed: bool = False, + limit: int = 10, + max_results: int | None = None, + ) -> dict[str, object]: + validated_kind = _helpers._validate_choice("kind", kind, _VALID_HOTLIST_KINDS) + validated_detail = _helpers._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, + ) + record = self._runs.get(run_id) + paths_filter = self._resolve_query_changed_paths( + record=record, + changed_paths=changed_paths, + git_diff_ref=git_diff_ref, + ) + rows = self._hotspot_rows( + record=record, + kind=validated_kind, + detail_level=validated_detail, + changed_paths=paths_filter, + exclude_reviewed=exclude_reviewed, + ) + normalized_limit = max( + 1, + min(max_results if max_results is not None else limit, 50), + ) + return { + "run_id": _helpers._short_run_id(record.run_id), + "kind": validated_kind, + "detail_level": validated_detail, + "changed_paths": list(paths_filter), + "returned": min(len(rows), normalized_limit), + "total": len(rows), + "items": [ + dict(_helpers._as_mapping(item)) for item in rows[:normalized_limit] + ], + } + + def mark_finding_reviewed( + self, + *, + finding_id: str, + run_id: str | None = None, + note: str | None = None, + ) -> dict[str, object]: + record = self._runs.get(run_id) + canonical_id = self._resolve_canonical_finding_id(record, finding_id) + self._service_get_finding( + finding_id=canonical_id, + run_id=record.run_id, + detail_level="normal", + ) + with self._state_lock: + review_map = self._review_state.setdefault(record.run_id, OrderedDict()) + review_map[canonical_id] = ( + note.strip() if isinstance(note, str) and note.strip() else None + ) + review_map.move_to_end(canonical_id) + return { + "run_id": _helpers._short_run_id(record.run_id), + "finding_id": self._short_finding_id(record, canonical_id), + "reviewed": True, + "note": review_map[canonical_id], + "reviewed_count": len(review_map), + } + + def list_reviewed_findings( + self, + *, + run_id: str | None = None, + ) -> dict[str, object]: + record = self._runs.get(run_id) + with self._state_lock: + review_items = tuple( + self._review_state.get(record.run_id, OrderedDict()).items() + ) + items = [] + for finding_id, note in review_items: + try: + finding = self._service_get_finding( + finding_id=finding_id, + run_id=record.run_id, + ) + except MCPFindingNotFoundError: + continue + items.append( + { + "finding_id": self._short_finding_id(record, finding_id), + "note": note, + "finding": self._project_finding_detail( + record, + finding, + detail_level="summary", + ), + } + ) + return { + "run_id": _helpers._short_run_id(record.run_id), + "reviewed_count": len(items), + "items": items, + } + + def check_complexity( + self, + *, + run_id: str | None = None, + root: str | None = None, + path: str | None = None, + min_complexity: int | None = None, + max_results: int = 10, + detail_level: DetailLevel = "summary", + ) -> dict[str, object]: + validated_detail = _helpers._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, + ) + record = self._resolve_granular_record( + run_id=run_id, + root=root, + analysis_mode="full", + ) + findings = self._query_findings( + record=record, + family="design", + category=CATEGORY_COMPLEXITY, + detail_level=validated_detail, + changed_paths=self._path_filter_tuple(path), + sort_by="priority", + ) + if min_complexity is not None: + findings = [ + finding + for finding in findings + if _as_int( + _helpers._as_mapping(finding.get("facts")).get( + "cyclomatic_complexity", + 0, + ) + ) + >= min_complexity + ] + return self._granular_payload( + record=record, + check="complexity", + items=findings, + detail_level=validated_detail, + max_results=max_results, + path=path, + threshold_context=self._design_threshold_context( + record=record, + check="complexity", + path=path, + items=findings, + requested_min=min_complexity, + ), + ) + + def check_clones( + self, + *, + run_id: str | None = None, + root: str | None = None, + path: str | None = None, + clone_type: str | None = None, + source_kind: str | None = None, + max_results: int = 10, + detail_level: DetailLevel = "summary", + ) -> dict[str, object]: + validated_detail = _helpers._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, + ) + record = self._resolve_granular_record( + run_id=run_id, + root=root, + analysis_mode="clones_only", + ) + findings = self._query_findings( + record=record, + family="clone", + source_kind=source_kind, + detail_level=validated_detail, + changed_paths=self._path_filter_tuple(path), + sort_by="priority", + ) + if clone_type is not None: + findings = [ + finding + for finding in findings + if str(finding.get("clone_type", "")).strip() == clone_type + ] + return self._granular_payload( + record=record, + check="clones", + items=findings, + detail_level=validated_detail, + max_results=max_results, + path=path, + ) + + def check_coupling( + self, + *, + run_id: str | None = None, + root: str | None = None, + path: str | None = None, + max_results: int = 10, + detail_level: DetailLevel = "summary", + ) -> dict[str, object]: + return self._check_design_metric( + run_id=run_id, + root=root, + path=path, + max_results=max_results, + detail_level=detail_level, + category=CATEGORY_COUPLING, + check="coupling", + ) + + def check_cohesion( + self, + *, + run_id: str | None = None, + root: str | None = None, + path: str | None = None, + max_results: int = 10, + detail_level: DetailLevel = "summary", + ) -> dict[str, object]: + return self._check_design_metric( + run_id=run_id, + root=root, + path=path, + max_results=max_results, + detail_level=detail_level, + category=CATEGORY_COHESION, + check="cohesion", + ) + + def _check_design_metric( + self, + *, + run_id: str | None, + root: str | None, + path: str | None, + max_results: int, + detail_level: DetailLevel, + category: str, + check: str, + ) -> dict[str, object]: + validated_detail = _helpers._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, + ) + record = self._resolve_granular_record( + run_id=run_id, + root=root, + analysis_mode="full", + ) + findings = self._query_findings( + record=record, + family="design", + category=category, + detail_level=validated_detail, + changed_paths=self._path_filter_tuple(path), + sort_by="priority", + ) + return self._granular_payload( + record=record, + check=check, + items=findings, + detail_level=validated_detail, + max_results=max_results, + path=path, + threshold_context=self._design_threshold_context( + record=record, + check=check, + path=path, + items=findings, + ), + ) + + def check_dead_code( + self, + *, + run_id: str | None = None, + root: str | None = None, + path: str | None = None, + min_severity: str | None = None, + max_results: int = 10, + detail_level: DetailLevel = "summary", + ) -> dict[str, object]: + validated_detail = _helpers._validate_choice( + "detail_level", + detail_level, + _VALID_DETAIL_LEVELS, + ) + validated_min_severity = _helpers._validate_optional_choice( + "min_severity", + min_severity, + _VALID_SEVERITIES, + ) + record = self._resolve_granular_record( + run_id=run_id, + root=root, + analysis_mode="full", + ) + findings = self._query_findings( + record=record, + family="dead_code", + detail_level=validated_detail, + changed_paths=self._path_filter_tuple(path), + sort_by="priority", + ) + if validated_min_severity is not None: + findings = [ + finding + for finding in findings + if _helpers._severity_rank(str(finding.get("severity", ""))) + >= _helpers._severity_rank(validated_min_severity) + ] + return self._granular_payload( + record=record, + check="dead_code", + items=findings, + detail_level=validated_detail, + max_results=max_results, + path=path, + ) diff --git a/codeclone/surfaces/mcp/_session_helpers.py b/codeclone/surfaces/mcp/_session_helpers.py new file mode 100644 index 0000000..062fe4b --- /dev/null +++ b/codeclone/surfaces/mcp/_session_helpers.py @@ -0,0 +1,901 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ...cache.store import Cache +from ...contracts import REPORT_SCHEMA_VERSION +from ...domain.findings import ( + CATEGORY_CLONE, + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_DEAD_CODE, + CATEGORY_DEPENDENCY, + CATEGORY_STRUCTURAL, + FAMILY_CLONE, + FAMILY_DEAD_CODE, +) +from ...domain.quality import ( + EFFORT_EASY, + EFFORT_HARD, + EFFORT_MODERATE, + SEVERITY_CRITICAL, + SEVERITY_INFO, + SEVERITY_WARNING, +) +from ...domain.source_scope import ( + SOURCE_KIND_ORDER, + SOURCE_KIND_OTHER, +) +from ...models import MetricsDiff +from ._session_runtime import resolve_cache_path +from ._session_shared import ( + _COMPACT_ITEM_EMPTY_VALUES, + _COMPACT_ITEM_PATH_KEYS, + _SHORT_RUN_ID_LENGTH, + _SOURCE_KIND_BREAKDOWN_ORDER, + DEFAULT_BLOCK_MIN_LOC, + DEFAULT_BLOCK_MIN_STMT, + DEFAULT_MIN_LOC, + DEFAULT_MIN_STMT, + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + DEFAULT_SEGMENT_MIN_LOC, + DEFAULT_SEGMENT_MIN_STMT, + AnalysisMode, + CachePolicy, + ChoiceT, + DetailLevel, + FreshnessKind, + Iterable, + Mapping, + MCPAnalysisRequest, + MCPRunRecord, + MCPServiceContractError, + MCPServiceError, + MetricsDetailFamily, + Namespace, + Path, + Sequence, + _as_int, + _base_short_finding_id_payload, + _disambiguated_clone_short_ids_payload, + _disambiguated_short_finding_id_payload, + _leaf_symbol_name_payload, + _load_report_document_payload, + _suggestion_finding_id_payload, + _summarize_metrics_diff, +) +from .payloads import short_id + + +def _summary_health_payload(summary: Mapping[str, object]) -> dict[str, object]: + if str(summary.get("analysis_mode", "")) == "clones_only": + return {"available": False, "reason": "metrics_skipped"} + health = dict(_as_mapping(summary.get("health"))) + if health: + return health + return {"available": False, "reason": "unavailable"} + + +def _summary_health_score(summary: Mapping[str, object]) -> int | None: + health = _summary_health_payload(summary) + if health.get("available") is False: + return None + return _as_int(health.get("score", 0), 0) + + +def _summary_health_delta(summary: Mapping[str, object]) -> int | None: + if _summary_health_payload(summary).get("available") is False: + return None + metrics_diff = _as_mapping(summary.get("metrics_diff")) + return _as_int(metrics_diff.get("health_delta", 0), 0) + + +def _severity_rank(severity: str) -> int: + return { + SEVERITY_CRITICAL: 3, + SEVERITY_WARNING: 2, + SEVERITY_INFO: 1, + }.get(severity, 0) + + +def _validate_choice( + name: str, + value: ChoiceT, + allowed: Sequence[str] | frozenset[str], +) -> ChoiceT: + if value not in allowed: + allowed_list = ", ".join(sorted(allowed)) + raise MCPServiceContractError( + f"Invalid value for {name}: {value!r}. Expected one of: {allowed_list}." + ) + return value + + +def _validate_optional_choice( + name: str, + value: ChoiceT | None, + allowed: Sequence[str] | frozenset[str], +) -> ChoiceT | None: + if value is None: + return None + return _validate_choice(name, value, allowed) + + +def _metrics_detail_family(value: str | None) -> MetricsDetailFamily | None: + match value: + case "complexity": + return "complexity" + case "coupling": + return "coupling" + case "cohesion": + return "cohesion" + case "coverage_adoption": + return "coverage_adoption" + case "coverage_join": + return "coverage_join" + case "dependencies": + return "dependencies" + case "dead_code": + return "dead_code" + case "api_surface": + return "api_surface" + case "god_modules" | "overloaded_modules": + return "overloaded_modules" + case "health": + return "health" + case _: + return None + + +def _dict_rows(value: object) -> list[dict[str, object]]: + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + return [] + return [dict(item) for item in value if isinstance(item, Mapping)] + + +def _string_rows(value: object) -> list[str]: + if not isinstance(value, Sequence) or isinstance(value, (str, bytes, bytearray)): + return [] + return [str(item) for item in value if isinstance(item, str)] + + +def _dict_list(value: object) -> list[dict[str, object]]: + return [dict(_as_mapping(item)) for item in _as_sequence(value)] + + +def _as_mapping(value: object) -> Mapping[str, object]: + return value if isinstance(value, Mapping) else {} + + +def _as_sequence(value: object) -> Sequence[object]: + if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): + return value + return () + + +def _short_run_id(run_id: str) -> str: + return short_id(run_id, length=_SHORT_RUN_ID_LENGTH) + + +def _normalize_relative_path(path: str) -> str: + cleaned = path.strip() + if cleaned == ".": + return "" + if cleaned.startswith("./"): + cleaned = cleaned[2:] + cleaned = cleaned.rstrip("/") + if ".." in Path(cleaned).parts: + raise MCPServiceContractError(f"path traversal not allowed: {path}") + return cleaned + + +def _path_matches(relative_path: str, changed_paths: Sequence[str]) -> bool: + return any( + relative_path == candidate or relative_path.startswith(candidate + "/") + for candidate in changed_paths + ) + + +def _record_supports_analysis_mode( + record: MCPRunRecord, + *, + analysis_mode: AnalysisMode, +) -> bool: + record_mode = record.request.analysis_mode + if analysis_mode == "clones_only": + return record_mode in {"clones_only", "full"} + return record_mode == "full" + + +def _resolve_root(root: str | None) -> Path: + if not isinstance(root, str) or not root.strip(): + raise MCPServiceContractError( + "CodeClone MCP analyze_repository requires an absolute repository root." + ) + root_path = Path(root).expanduser() + if not root_path.is_absolute(): + raise MCPServiceContractError( + "CodeClone MCP analyze_repository requires an absolute repository root." + ) + try: + resolved = root_path.resolve() + except OSError as exc: + raise MCPServiceContractError( + f"Unable to resolve repository root '{root}': {exc}" + ) from exc + if not resolved.exists(): + raise MCPServiceContractError(f"Repository root '{resolved}' does not exist.") + if not resolved.is_dir(): + raise MCPServiceContractError( + f"Repository root '{resolved}' is not a directory." + ) + return resolved + + +def _resolve_optional_path(value: str, root_path: Path) -> Path: + candidate = Path(value).expanduser() + resolved = candidate if candidate.is_absolute() else root_path / candidate + try: + return resolved.resolve() + except OSError as exc: + raise MCPServiceContractError( + f"Invalid path '{value}' relative to '{root_path}': {exc}" + ) from exc + + +def _base_short_finding_id(canonical_id: str) -> str: + return _base_short_finding_id_payload(canonical_id) + + +def _disambiguated_short_finding_id(canonical_id: str) -> str: + return _disambiguated_short_finding_id_payload(canonical_id) + + +def _disambiguated_short_finding_ids( + canonical_ids: Sequence[str], +) -> dict[str, str]: + clone_ids = [ + canonical_id + for canonical_id in canonical_ids + if canonical_id.startswith("clone:") + ] + if len(clone_ids) == len(canonical_ids): + clone_short_ids = _disambiguated_clone_short_ids_payload(clone_ids) + if len(set(clone_short_ids.values())) == len(clone_short_ids): + return clone_short_ids + return { + canonical_id: _disambiguated_short_finding_id(canonical_id) + for canonical_id in canonical_ids + } + + +def _leaf_symbol_name(value: object) -> str: + return _leaf_symbol_name_payload(value) + + +def _finding_kind_label(finding: Mapping[str, object]) -> str: + family = str(finding.get("family", "")).strip() + kind = str(finding.get("kind", finding.get("category", ""))).strip() + if family == FAMILY_CLONE: + clone_kind = str( + finding.get("clone_kind", finding.get("category", kind)) + ).strip() + return f"{clone_kind}_clone" if clone_kind else "clone" + if family == FAMILY_DEAD_CODE: + return "dead_code" + return kind or family + + +def _summary_location_string(location: Mapping[str, object]) -> str: + path = str(location.get("file", "")).strip() + line = _as_int(location.get("line", 0), 0) + if not path: + return "" + return f"{path}:{line}" if line > 0 else path + + +def _normal_location_payload(location: Mapping[str, object]) -> dict[str, object]: + path = str(location.get("file", "")).strip() + if not path: + return {} + payload: dict[str, object] = { + "path": path, + "line": _as_int(location.get("line", 0), 0), + "end_line": _as_int(location.get("end_line", 0), 0), + } + symbol = _leaf_symbol_name(location.get("symbol")) + if symbol: + payload["symbol"] = symbol + return payload + + +def _suggestion_finding_id(suggestion: object) -> str: + return _suggestion_finding_id_payload(suggestion) + + +def _project_remediation( + remediation: Mapping[str, object], + *, + detail_level: DetailLevel, +) -> dict[str, object]: + if detail_level == "full": + return dict(remediation) + projected = { + "effort": remediation.get("effort"), + "risk": remediation.get("risk_level"), + "shape": remediation.get("safe_refactor_shape"), + "why_now": remediation.get("why_now"), + } + if detail_level == "summary": + return projected + projected["steps"] = list(_as_sequence(remediation.get("steps"))) + return projected + + +def _safe_refactor_shape(suggestion: object) -> str: + category = str(getattr(suggestion, "category", "")).strip() + clone_type = str(getattr(suggestion, "clone_type", "")).strip() + title = str(getattr(suggestion, "title", "")).strip() + if category == CATEGORY_CLONE and clone_type == "Type-1": + return "Keep one canonical implementation and route callers through it." + if category == CATEGORY_CLONE and clone_type == "Type-2": + return "Extract shared implementation with explicit parameters." + if category == CATEGORY_CLONE and "Block" in title: + return "Extract the repeated statement sequence into a helper." + if category == CATEGORY_STRUCTURAL: + return "Extract the repeated branch family into a named helper." + if category == CATEGORY_COMPLEXITY: + return "Split the function into smaller named steps." + if category == CATEGORY_COUPLING: + return "Isolate responsibilities and invert unnecessary dependencies." + if category == CATEGORY_COHESION: + return "Split the class by responsibility boundary." + if category == CATEGORY_DEAD_CODE: + return "Delete the unused symbol or document intentional reachability." + if category == CATEGORY_DEPENDENCY: + return "Break the cycle by moving shared abstractions to a lower layer." + return "Extract the repeated logic into a shared, named abstraction." + + +def _risk_level_for_effort(effort: str) -> str: + return { + EFFORT_EASY: "low", + EFFORT_MODERATE: "medium", + EFFORT_HARD: "high", + }.get(effort, "medium") + + +def _why_now_text( + *, + title: str, + severity: str, + novelty: str, + count: int, + source_kind: str, + spread_files: int, + spread_functions: int, + effort: str, +) -> str: + novelty_text = "new regression" if novelty == "new" else "known debt" + context = ( + "production code" + if source_kind == "production" + else source_kind or "mixed scope" + ) + spread_text = f"{spread_files} files / {spread_functions} functions" + count_text = f"{count} instances" if count > 0 else "localized issue" + return ( + f"{severity.upper()} {title} in {context} — {count_text}, " + f"{spread_text}, {effort} fix, {novelty_text}." + ) + + +def _highest_below_threshold( + *, + values: Sequence[int], + operator: str, + threshold: int, +) -> int | None: + if operator == ">": + below = [value for value in values if value <= threshold] + elif operator == ">=": + below = [value for value in values if value < threshold] + else: + return None + return max(below) if below else None + + +def _normalized_source_kind(value: object) -> str: + normalized = str(value).strip().lower() + if normalized in SOURCE_KIND_ORDER: + return normalized + return SOURCE_KIND_OTHER + + +def _finding_source_kind(finding: Mapping[str, object]) -> str: + source_scope = _as_mapping(finding.get("source_scope")) + return _normalized_source_kind(source_scope.get("dominant_kind")) + + +def _source_kind_breakdown(source_kinds: Iterable[object]) -> dict[str, int]: + breakdown = dict.fromkeys(_SOURCE_KIND_BREAKDOWN_ORDER, 0) + for value in source_kinds: + breakdown[_normalized_source_kind(value)] += 1 + return breakdown + + +def _metric_item_matches_path(item: Mapping[str, object], normalized_path: str) -> bool: + path_value = ( + str(item.get("relative_path", "")).strip() + or str(item.get("path", "")).strip() + or str(item.get("filepath", "")).strip() + or str(item.get("file", "")).strip() + ) + if not path_value: + return False + return _path_matches(path_value, (normalized_path,)) + + +def _comparison_settings( + *, + args: Namespace, + request: MCPAnalysisRequest, +) -> tuple[object, ...]: + return ( + request.analysis_mode, + _as_int(args.min_loc, DEFAULT_MIN_LOC), + _as_int(args.min_stmt, DEFAULT_MIN_STMT), + _as_int(args.block_min_loc, DEFAULT_BLOCK_MIN_LOC), + _as_int(args.block_min_stmt, DEFAULT_BLOCK_MIN_STMT), + _as_int(args.segment_min_loc, DEFAULT_SEGMENT_MIN_LOC), + _as_int(args.segment_min_stmt, DEFAULT_SEGMENT_MIN_STMT), + _as_int( + args.design_complexity_threshold, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + ), + _as_int( + args.design_coupling_threshold, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + ), + _as_int( + args.design_cohesion_threshold, + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + ), + ) + + +def _comparison_scope( + *, + before: MCPRunRecord, + after: MCPRunRecord, +) -> dict[str, object]: + same_root = before.root == after.root + same_analysis_settings = before.comparison_settings == after.comparison_settings + if same_root and same_analysis_settings: + reason = "comparable" + elif not same_root and not same_analysis_settings: + reason = "different_root_and_analysis_settings" + elif not same_root: + reason = "different_root" + else: + reason = "different_analysis_settings" + return { + "comparable": same_root and same_analysis_settings, + "same_root": same_root, + "same_analysis_settings": same_analysis_settings, + "reason": reason, + } + + +def _changed_verdict( + *, + changed_projection: Mapping[str, object], + health_delta: int | None, +) -> str: + if _as_int(changed_projection.get("new", 0), 0) > 0 or ( + health_delta is not None and health_delta < 0 + ): + return "regressed" + if ( + _as_int(changed_projection.get("total", 0), 0) == 0 + and health_delta is not None + and health_delta > 0 + ): + return "improved" + return "stable" + + +def _comparison_verdict( + *, + regressions: int, + improvements: int, + health_delta: int | None, +) -> str: + has_negative_signal = regressions > 0 or ( + health_delta is not None and health_delta < 0 + ) + has_positive_signal = improvements > 0 or ( + health_delta is not None and health_delta > 0 + ) + if has_negative_signal and has_positive_signal: + return "mixed" + if has_negative_signal: + return "regressed" + if has_positive_signal: + return "improved" + return "stable" + + +def _comparison_summary_text( + *, + comparable: bool, + comparability_reason: str, + regressions: int, + improvements: int, + health_delta: int | None, +) -> str: + if not comparable: + reason_text = { + "different_root": "different roots", + "different_analysis_settings": "different analysis settings", + "different_root_and_analysis_settings": ( + "different roots and analysis settings" + ), + }.get(comparability_reason, "incomparable runs") + return f"Finding and run health deltas omitted ({reason_text})" + if health_delta is None: + return ( + f"{improvements} findings resolved, {regressions} new regressions; " + "run health delta omitted (metrics unavailable)" + ) + return ( + f"{improvements} findings resolved, {regressions} new regressions, " + f"run health delta {health_delta:+d}" + ) + + +def _resolve_cache_path(*, root_path: Path, args: Namespace) -> Path: + return resolve_cache_path(root_path=root_path, args=args) + + +def _build_cache( + *, + root_path: Path, + args: Namespace, + cache_path: Path, + policy: CachePolicy, +) -> Cache: + cache = Cache( + cache_path, + root=root_path, + max_size_bytes=_as_int(args.max_cache_size_mb, 0) * 1024 * 1024, + min_loc=_as_int(args.min_loc, DEFAULT_MIN_LOC), + min_stmt=_as_int(args.min_stmt, DEFAULT_MIN_STMT), + block_min_loc=_as_int(args.block_min_loc, DEFAULT_BLOCK_MIN_LOC), + block_min_stmt=_as_int(args.block_min_stmt, DEFAULT_BLOCK_MIN_STMT), + segment_min_loc=_as_int(args.segment_min_loc, DEFAULT_SEGMENT_MIN_LOC), + segment_min_stmt=_as_int(args.segment_min_stmt, DEFAULT_SEGMENT_MIN_STMT), + collect_api_surface=bool(getattr(args, "api_surface", False)), + ) + if policy != "off": + cache.load() + return cache + + +def _metrics_computed(analysis_mode: AnalysisMode) -> tuple[str, ...]: + return ( + () + if analysis_mode == "clones_only" + else ( + "complexity", + "coupling", + "cohesion", + "health", + "dependencies", + "dead_code", + ) + ) + + +def _load_report_document(report_json: str) -> dict[str, object]: + return _load_report_document_payload(report_json) + + +def _report_digest(report_document: Mapping[str, object]) -> str: + integrity = _as_mapping(report_document.get("integrity")) + digest = _as_mapping(integrity.get("digest")) + value = digest.get("value") + if not isinstance(value, str) or not value: + raise MCPServiceError("Canonical report digest is missing.") + return value + + +def _summary_analysis_profile_payload(summary: Mapping[str, object]) -> dict[str, int]: + analysis_profile = _as_mapping(summary.get("analysis_profile")) + if not analysis_profile: + return {} + keys = ( + "min_loc", + "min_stmt", + "block_min_loc", + "block_min_stmt", + "segment_min_loc", + "segment_min_stmt", + ) + payload = {key: _as_int(analysis_profile.get(key), -1) for key in keys} + return {key: value for key, value in payload.items() if value >= 0} + + +def _summary_trusted_state_payload( + summary: Mapping[str, object], + *, + key: str, +) -> dict[str, object]: + baseline = _as_mapping(summary.get(key)) + trusted = bool(baseline.get("trusted_for_diff", False)) + payload: dict[str, object] = { + "loaded": bool(baseline.get("loaded", False)), + "status": str(baseline.get("status", "")), + "trusted": trusted, + } + if key == "baseline": + payload["compared_without_valid_baseline"] = not trusted + baseline_python_tag = baseline.get("python_tag") + runtime_python_tag = summary.get("python_tag") + if isinstance(baseline_python_tag, str) and baseline_python_tag.strip(): + payload["baseline_python_tag"] = baseline_python_tag + if isinstance(runtime_python_tag, str) and runtime_python_tag.strip(): + payload["runtime_python_tag"] = runtime_python_tag + return payload + + +def _summary_cache_payload(summary: Mapping[str, object]) -> dict[str, object]: + cache = dict(_as_mapping(summary.get("cache"))) + if not cache: + return {} + return { + "used": bool(cache.get("used", False)), + "freshness": _effective_freshness(summary), + } + + +def _effective_freshness(summary: Mapping[str, object]) -> FreshnessKind: + inventory = _as_mapping(summary.get("inventory")) + files = _as_mapping(inventory.get("files")) + analyzed = max(0, _as_int(files.get("analyzed", 0), 0)) + cached = max(0, _as_int(files.get("cached", 0), 0)) + cache = _as_mapping(summary.get("cache")) + cache_used = bool(cache.get("used")) + if cache_used and cached > 0 and analyzed == 0: + return "reused" + if cache_used and cached > 0 and analyzed > 0: + return "mixed" + return "fresh" + + +def _summary_inventory_payload(inventory: Mapping[str, object]) -> dict[str, object]: + if not inventory: + return {} + files = _as_mapping(inventory.get("files")) + code = _as_mapping(inventory.get("code")) + total_files = _as_int( + files.get( + "total_found", + files.get( + "analyzed", + len( + _as_sequence( + _as_mapping(inventory.get("file_registry")).get("items") + ) + ), + ), + ), + 0, + ) + functions = _as_int(code.get("functions", 0), 0) + _as_int( + code.get("methods", 0), + 0, + ) + return { + "files": total_files, + "lines": _as_int(code.get("parsed_lines", 0), 0), + "functions": functions, + "classes": _as_int(code.get("classes", 0), 0), + } + + +def _summary_diff_payload(summary: Mapping[str, object]) -> dict[str, object]: + baseline_diff = _as_mapping(summary.get("baseline_diff")) + metrics_diff = _as_mapping(summary.get("metrics_diff")) + return { + "new_clones": _as_int(baseline_diff.get("new_clone_groups_total", 0), 0), + "health_delta": ( + _as_int(metrics_diff.get("health_delta", 0), 0) + if ( + metrics_diff + and _summary_health_payload(summary).get("available") is not False + ) + else None + ), + "typing_param_permille_delta": _as_int( + metrics_diff.get("typing_param_permille_delta", 0), + 0, + ), + "typing_return_permille_delta": _as_int( + metrics_diff.get("typing_return_permille_delta", 0), + 0, + ), + "docstring_permille_delta": _as_int( + metrics_diff.get("docstring_permille_delta", 0), + 0, + ), + "api_breaking_changes": _as_int(metrics_diff.get("api_breaking_changes", 0), 0), + "new_api_symbols": _as_int(metrics_diff.get("new_api_symbols", 0), 0), + } + + +def _summary_coverage_join_payload(record: MCPRunRecord) -> dict[str, object]: + metrics = _as_mapping(record.report_document.get("metrics")) + families = _as_mapping(metrics.get("families")) + coverage_join = _as_mapping(families.get("coverage_join")) + summary = _as_mapping(coverage_join.get("summary")) + if not summary: + return {} + payload: dict[str, object] = { + "status": str(summary.get("status", "")).strip(), + "overall_permille": _as_int(summary.get("overall_permille", 0), 0), + "coverage_hotspots": _as_int(summary.get("coverage_hotspots", 0), 0), + "scope_gap_hotspots": _as_int(summary.get("scope_gap_hotspots", 0), 0), + "hotspot_threshold_percent": _as_int( + summary.get("hotspot_threshold_percent", 0), + 0, + ), + } + source_value = summary.get("source") + source = source_value.strip() if isinstance(source_value, str) else "" + if source: + payload["source"] = source + invalid_reason_value = summary.get("invalid_reason") + invalid_reason = ( + invalid_reason_value.strip() if isinstance(invalid_reason_value, str) else "" + ) + if invalid_reason: + payload["invalid_reason"] = invalid_reason + return payload + + +def _compact_metrics_item(item: Mapping[str, object]) -> dict[str, object]: + compact: dict[str, object] = {} + path_value = ( + str(item.get("relative_path", "")).strip() + or str(item.get("path", "")).strip() + or str(item.get("filepath", "")).strip() + or str(item.get("file", "")).strip() + ) + if path_value: + compact["path"] = path_value + for key, value in item.items(): + if ( + key not in _COMPACT_ITEM_PATH_KEYS + and value not in _COMPACT_ITEM_EMPTY_VALUES + ): + compact[str(key)] = value + return compact + + +def _metrics_diff_payload(metrics_diff: MetricsDiff | None) -> dict[str, object] | None: + payload = _summarize_metrics_diff(metrics_diff) + return dict(payload) if payload is not None else None + + +def _schema_resource_payload() -> dict[str, object]: + return { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "CodeCloneCanonicalReport", + "type": "object", + "required": [ + "report_schema_version", + "meta", + "inventory", + "findings", + "derived", + "integrity", + ], + "properties": { + "report_schema_version": { + "type": "string", + "const": REPORT_SCHEMA_VERSION, + }, + "meta": {"type": "object"}, + "inventory": {"type": "object"}, + "findings": {"type": "object"}, + "metrics": {"type": "object"}, + "derived": {"type": "object"}, + "integrity": {"type": "object"}, + }, + } + + +def _finding_display_location(finding: Mapping[str, object]) -> str: + locations = _as_sequence(finding.get("locations")) + if not locations: + return "(unknown)" + first = locations[0] + if isinstance(first, str): + return first + location = _as_mapping(first) + path = str(location.get("path", location.get("file", ""))).strip() + if not path: + return "(unknown)" + line = _as_int(location.get("line", 0), 0) + return f"{path}:{line}" if line > 0 else path + + +def _render_pr_summary_markdown(payload: Mapping[str, object]) -> str: + health = _as_mapping(payload.get("health")) + score = health.get("score", "n/a") + grade = health.get("grade", "n/a") + delta = _as_int(payload.get("health_delta", 0), 0) + changed_items = [ + _as_mapping(item) + for item in _as_sequence(payload.get("new_findings_in_changed_files")) + ] + resolved = [_as_mapping(item) for item in _as_sequence(payload.get("resolved"))] + blocking_gates = [ + str(item) for item in _as_sequence(payload.get("blocking_gates")) if str(item) + ] + health_line = ( + "Health: " + f"{score}/100 ({grade}) | Delta: {delta:+d} | " + f"Verdict: {payload.get('verdict', 'stable')}" + if payload.get("health_delta") is not None + else ( + "Health: " + f"{score}/100 ({grade}) | Delta: n/a | " + f"Verdict: {payload.get('verdict', 'stable')}" + ) + ) + lines = [ + "## CodeClone Summary", + "", + health_line, + "", + f"### New findings in changed files ({len(changed_items)})", + ] + if not changed_items: + lines.append("- None") + else: + lines.extend( + [ + ( + f"- **{str(item.get('severity', 'info')).upper()}** " + f"{item.get('kind', 'finding')} in " + f"`{_finding_display_location(item)}`" + ) + for item in changed_items[:10] + ] + ) + lines.extend(["", f"### Resolved ({len(resolved)})"]) + if not resolved: + lines.append("- None") + else: + lines.extend( + [ + f"- {item.get('kind', 'finding')} in " + f"`{_finding_display_location(item)}`" + for item in resolved[:10] + ] + ) + lines.extend(["", "### Blocking gates"]) + if not blocking_gates: + lines.append("- none") + else: + lines.extend([f"- `{reason}`" for reason in blocking_gates]) + return "\n".join(lines) diff --git a/codeclone/surfaces/mcp/_session_runtime.py b/codeclone/surfaces/mcp/_session_runtime.py new file mode 100644 index 0000000..9b5179d --- /dev/null +++ b/codeclone/surfaces/mcp/_session_runtime.py @@ -0,0 +1,41 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from pathlib import Path + + +def validate_numeric_args(args: object) -> bool: + return bool( + not ( + _int_attr(args, "max_baseline_size_mb") < 0 + or _int_attr(args, "max_cache_size_mb") < 0 + or _int_attr(args, "fail_threshold", -1) < -1 + or _int_attr(args, "fail_complexity", -1) < -1 + or _int_attr(args, "fail_coupling", -1) < -1 + or _int_attr(args, "fail_cohesion", -1) < -1 + or _int_attr(args, "fail_health", -1) < -1 + or _int_attr(args, "min_typing_coverage", -1) < -1 + or _int_attr(args, "min_typing_coverage", -1) > 100 + or _int_attr(args, "min_docstring_coverage", -1) < -1 + or _int_attr(args, "min_docstring_coverage", -1) > 100 + or _int_attr(args, "coverage_min") < 0 + or _int_attr(args, "coverage_min") > 100 + ) + ) + + +def resolve_cache_path(*, root_path: Path, args: object) -> Path: + raw_value = getattr(args, "cache_path", None) + if isinstance(raw_value, str) and raw_value.strip(): + return Path(raw_value).expanduser() + return root_path / ".cache" / "codeclone" / "cache.json" + + +def _int_attr(args: object, name: str, default: int = 0) -> int: + value = getattr(args, name, default) + return value if isinstance(value, int) else default diff --git a/codeclone/surfaces/mcp/_session_shared.py b/codeclone/surfaces/mcp/_session_shared.py new file mode 100644 index 0000000..fae15c7 --- /dev/null +++ b/codeclone/surfaces/mcp/_session_shared.py @@ -0,0 +1,1200 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import hashlib +import subprocess +from argparse import Namespace +from collections import OrderedDict +from collections.abc import Iterable, Mapping, Sequence +from dataclasses import dataclass +from json import JSONDecodeError +from pathlib import Path +from threading import RLock +from typing import Final, Literal, TypeVar + +import orjson + +from ... import __version__ +from ...baseline import Baseline +from ...cache.store import Cache +from ...cache.versioning import CacheStatus +from ...config.pyproject_loader import ( + ConfigValidationError, + load_pyproject_config, +) +from ...config.spec import ( + DEFAULT_BASELINE_PATH, + DEFAULT_BLOCK_MIN_LOC, + DEFAULT_BLOCK_MIN_STMT, + DEFAULT_MAX_BASELINE_SIZE_MB, + DEFAULT_MAX_CACHE_SIZE_MB, + DEFAULT_MIN_LOC, + DEFAULT_MIN_STMT, + DEFAULT_SEGMENT_MIN_LOC, + DEFAULT_SEGMENT_MIN_STMT, +) +from ...contracts import ( + DEFAULT_COVERAGE_MIN, + DEFAULT_JSON_REPORT_PATH, + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + DOCS_URL, + REPORT_SCHEMA_VERSION, +) +from ...core._types import OutputPaths +from ...core.bootstrap import bootstrap +from ...core.discovery import discover +from ...core.parallelism import process +from ...core.pipeline import analyze +from ...core.reporting import report +from ...domain.findings import ( + CATEGORY_CLONE, + CATEGORY_COHESION, + CATEGORY_COMPLEXITY, + CATEGORY_COUPLING, + CATEGORY_DEAD_CODE, + CATEGORY_DEPENDENCY, + CATEGORY_STRUCTURAL, + CLONE_KIND_SEGMENT, + FAMILY_CLONE, + FAMILY_CLONES, + FAMILY_DEAD_CODE, + FAMILY_DESIGN, + FAMILY_STRUCTURAL, +) +from ...domain.quality import ( + CONFIDENCE_HIGH, + CONFIDENCE_LOW, + CONFIDENCE_MEDIUM, + EFFORT_EASY, + EFFORT_HARD, + EFFORT_MODERATE, + SEVERITY_CRITICAL, + SEVERITY_INFO, + SEVERITY_WARNING, +) +from ...domain.source_scope import ( + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, + SOURCE_KIND_ORDER, + SOURCE_KIND_OTHER, + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, +) +from ...findings.ids import ( + clone_group_id, + dead_code_group_id, + design_group_id, + structural_group_id, +) +from ...models import CoverageJoinResult, MetricsDiff, ProjectMetrics, Suggestion +from ...report.gates.evaluator import GateResult as GatingResult +from ...report.gates.evaluator import MetricGateConfig +from ...report.gates.evaluator import evaluate_gates as _evaluate_report_gates +from ...report.gates.evaluator import summarize_metrics_diff as _summarize_metrics_diff +from ...utils.coerce import as_float as _as_float +from ...utils.coerce import as_int as _as_int +from ...utils.git_diff import validate_git_diff_ref +from .payloads import paginate, resolve_finding_id, short_id + +AnalysisMode = Literal["full", "clones_only"] +CachePolicy = Literal["reuse", "refresh", "off"] +FreshnessKind = Literal["fresh", "mixed", "reused"] +HotlistKind = Literal[ + "most_actionable", + "highest_spread", + "highest_priority", + "production_hotspots", + "test_fixture_hotspots", +] +FindingFamilyFilter = Literal["all", "clone", "structural", "dead_code", "design"] +FindingNoveltyFilter = Literal["all", "new", "known"] +FindingSort = Literal["default", "priority", "severity", "spread"] +DetailLevel = Literal["summary", "normal", "full"] +ComparisonFocus = Literal["all", "clones", "structural", "metrics"] +PRSummaryFormat = Literal["markdown", "json"] +HelpTopic = Literal[ + "workflow", + "analysis_profile", + "suppressions", + "baseline", + "coverage", + "latest_runs", + "review_state", + "changed_scope", +] +HelpDetail = Literal["compact", "normal"] +MetricsDetailFamily = Literal[ + "complexity", + "coupling", + "cohesion", + "coverage_adoption", + "coverage_join", + "dependencies", + "dead_code", + "api_surface", + "god_modules", + "overloaded_modules", + "health", +] +ReportSection = Literal[ + "all", + "meta", + "inventory", + "findings", + "metrics", + "metrics_detail", + "derived", + "changed", + "integrity", +] +HealthScope = Literal["repository"] +SummaryFocus = Literal["repository", "production", "changed_paths"] + +_REPORT_DUMMY_PATH = Path(DEFAULT_JSON_REPORT_PATH) +_HEALTH_SCOPE_REPOSITORY: Final[HealthScope] = "repository" +_FOCUS_REPOSITORY: Final[SummaryFocus] = "repository" +_FOCUS_PRODUCTION: Final[SummaryFocus] = "production" +_FOCUS_CHANGED_PATHS: Final[SummaryFocus] = "changed_paths" +_MCP_CONFIG_KEYS = frozenset( + { + "min_loc", + "min_stmt", + "block_min_loc", + "block_min_stmt", + "segment_min_loc", + "segment_min_stmt", + "processes", + "cache_path", + "max_cache_size_mb", + "baseline", + "max_baseline_size_mb", + "metrics_baseline", + "api_surface", + "coverage_xml", + "coverage_min", + "golden_fixture_paths", + } +) +_RESOURCE_SECTION_MAP: Final[dict[str, ReportSection]] = { + "report.json": "all", + "summary": "meta", + "health": "metrics", + "changed": "changed", + "overview": "derived", +} +_SEVERITY_WEIGHT: Final[dict[str, float]] = { + SEVERITY_CRITICAL: 1.0, + SEVERITY_WARNING: 0.6, + SEVERITY_INFO: 0.2, +} +_EFFORT_WEIGHT: Final[dict[str, float]] = { + EFFORT_EASY: 1.0, + EFFORT_MODERATE: 0.6, + EFFORT_HARD: 0.3, +} +_NOVELTY_WEIGHT: Final[dict[str, float]] = {"new": 1.0, "known": 0.5} +_RUNTIME_WEIGHT: Final[dict[str, float]] = { + "production": 1.0, + "mixed": 0.8, + "tests": 0.4, + "fixtures": 0.2, + "other": 0.5, +} +_CONFIDENCE_WEIGHT: Final[dict[str, float]] = { + CONFIDENCE_HIGH: 1.0, + CONFIDENCE_MEDIUM: 0.7, + CONFIDENCE_LOW: 0.3, +} +# Canonical report groups use FAMILY_CLONES ("clones"), while individual finding +# payloads use FAMILY_CLONE ("clone"). +_VALID_ANALYSIS_MODES = frozenset({"full", "clones_only"}) +_VALID_CACHE_POLICIES = frozenset({"reuse", "refresh", "off"}) +_VALID_FINDING_FAMILIES = frozenset( + {"all", "clone", "structural", "dead_code", "design"} +) +_VALID_FINDING_NOVELTY = frozenset({"all", "new", "known"}) +_VALID_FINDING_SORT = frozenset({"default", "priority", "severity", "spread"}) +_VALID_DETAIL_LEVELS = frozenset({"summary", "normal", "full"}) +_VALID_COMPARISON_FOCUS = frozenset({"all", "clones", "structural", "metrics"}) +_VALID_PR_SUMMARY_FORMATS = frozenset({"markdown", "json"}) +_VALID_HELP_TOPICS = frozenset( + { + "workflow", + "analysis_profile", + "suppressions", + "baseline", + "coverage", + "latest_runs", + "review_state", + "changed_scope", + } +) +_VALID_HELP_DETAILS = frozenset({"compact", "normal"}) +DEFAULT_MCP_HISTORY_LIMIT = 4 +MAX_MCP_HISTORY_LIMIT = 10 +_VALID_REPORT_SECTIONS = frozenset( + { + "all", + "meta", + "inventory", + "findings", + "metrics", + "metrics_detail", + "derived", + "changed", + "integrity", + } +) +_VALID_HOTLIST_KINDS = frozenset( + { + "most_actionable", + "highest_spread", + "highest_priority", + "production_hotspots", + "test_fixture_hotspots", + } +) +_VALID_SEVERITIES = frozenset({SEVERITY_CRITICAL, SEVERITY_WARNING, SEVERITY_INFO}) +_SOURCE_KIND_BREAKDOWN_ORDER: Final[tuple[str, ...]] = ( + SOURCE_KIND_PRODUCTION, + SOURCE_KIND_TESTS, + SOURCE_KIND_FIXTURES, + SOURCE_KIND_MIXED, + SOURCE_KIND_OTHER, +) +_COMPACT_ITEM_PATH_KEYS: Final[frozenset[str]] = frozenset( + {"relative_path", "path", "filepath", "file"} +) +_COMPACT_ITEM_EMPTY_VALUES: Final[tuple[object, ...]] = ("", None, [], {}, ()) +_HOTLIST_REPORT_KEYS: Final[dict[str, str]] = { + "most_actionable": "most_actionable_ids", + "highest_spread": "highest_spread_ids", + "production_hotspots": "production_hotspot_ids", + "test_fixture_hotspots": "test_fixture_hotspot_ids", +} +_CHECK_TO_DIMENSION: Final[dict[str, str]] = { + "cohesion": "cohesion", + "coupling": "coupling", + "dead_code": "dead_code", + "complexity": "complexity", + "clones": "clones", +} +_DESIGN_CHECK_CONTEXT: Final[dict[str, dict[str, object]]] = { + "complexity": { + "category": CATEGORY_COMPLEXITY, + "metric": "cyclomatic_complexity", + "operator": ">", + "default_threshold": DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + }, + "coupling": { + "category": CATEGORY_COUPLING, + "metric": "cbo", + "operator": ">", + "default_threshold": DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + }, + "cohesion": { + "category": CATEGORY_COHESION, + "metric": "lcom4", + "operator": ">=", + "default_threshold": DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + }, +} +_VALID_METRICS_DETAIL_FAMILIES = frozenset( + { + "complexity", + "coupling", + "cohesion", + "coverage_adoption", + "coverage_join", + "dependencies", + "dead_code", + "api_surface", + "god_modules", + "overloaded_modules", + "health", + } +) +_METRICS_DETAIL_FAMILY_ALIASES: Final[dict[str, str]] = { + "god_modules": "overloaded_modules", +} +_SHORT_RUN_ID_LENGTH = 8 +_SHORT_HASH_ID_LENGTH = 6 +ChoiceT = TypeVar("ChoiceT", bound=str) + + +@dataclass(frozen=True) +class MCPHelpTopicSpec: + summary: str + key_points: tuple[str, ...] + recommended_tools: tuple[str, ...] + doc_links: tuple[tuple[str, str], ...] + warnings: tuple[str, ...] = () + anti_patterns: tuple[str, ...] = () + + +_MCP_BOOK_URL: Final = f"{DOCS_URL}book/" +_MCP_GUIDE_URL: Final = f"{DOCS_URL}mcp/" +_MCP_INTERFACE_DOC_LINK: Final[tuple[str, str]] = ( + "MCP interface contract", + f"{_MCP_BOOK_URL}20-mcp-interface/", +) +_BASELINE_DOC_LINK: Final[tuple[str, str]] = ( + "Baseline contract", + f"{_MCP_BOOK_URL}06-baseline/", +) +_CONFIG_DOC_LINK: Final[tuple[str, str]] = ( + "Config and defaults", + f"{_MCP_BOOK_URL}04-config-and-defaults/", +) +_REPORT_DOC_LINK: Final[tuple[str, str]] = ( + "Report contract", + f"{_MCP_BOOK_URL}08-report/", +) +_CLI_DOC_LINK: Final[tuple[str, str]] = ( + "CLI contract", + f"{_MCP_BOOK_URL}09-cli/", +) +_PIPELINE_DOC_LINK: Final[tuple[str, str]] = ( + "Core pipeline", + f"{_MCP_BOOK_URL}05-core-pipeline/", +) +_SUPPRESSIONS_DOC_LINK: Final[tuple[str, str]] = ( + "Inline suppressions contract", + f"{_MCP_BOOK_URL}19-inline-suppressions/", +) +_MCP_GUIDE_DOC_LINK: Final[tuple[str, str]] = ("MCP usage guide", _MCP_GUIDE_URL) +_HELP_TOPIC_SPECS: Final[dict[str, MCPHelpTopicSpec]] = { + "workflow": MCPHelpTopicSpec( + summary=( + "CodeClone MCP is triage-first and budget-aware. Start with a " + "summary or production triage, then narrow through hotspots or " + "focused checks before opening one finding in detail." + ), + key_points=( + "Recommended first pass: analyze_repository or analyze_changed_paths.", + ( + "Start with default or pyproject-resolved thresholds; lower them " + "only for an explicit higher-sensitivity follow-up pass." + ), + ( + "Use get_run_summary or get_production_triage before broad " + "finding listing." + ), + ( + "Prefer list_hotspots or focused check_* tools over " + "list_findings on noisy repositories." + ), + ("Use get_finding and get_remediation only after selecting an issue."), + ( + "get_report_section(section='all') is an exception path, not " + "a default first step." + ), + ), + recommended_tools=( + "analyze_repository", + "analyze_changed_paths", + "get_run_summary", + "get_production_triage", + "list_hotspots", + "check_clones", + "check_dead_code", + "get_finding", + "get_remediation", + ), + doc_links=(_MCP_INTERFACE_DOC_LINK, _MCP_GUIDE_DOC_LINK), + warnings=( + ( + "Broad list_findings calls burn context quickly on large or " + "noisy repositories." + ), + ( + "Prefer generate_pr_summary(format='markdown') unless machine " + "JSON is explicitly required." + ), + ), + anti_patterns=( + "Starting exploration with list_findings on a noisy repository.", + "Using get_report_section(section='all') as the default first step.", + ( + "Escalating detail on larger lists instead of opening one " + "finding with get_finding." + ), + ), + ), + "analysis_profile": MCPHelpTopicSpec( + summary=( + "CodeClone default analysis is intentionally conservative: stable " + "first-pass review, baseline-aware governance, and CI-friendly " + "signal over maximum local sensitivity." + ), + key_points=( + ( + "Default thresholds are intentionally conservative and " + "production-friendly." + ), + ( + "A clean default run does not rule out smaller local " + "duplication or repetition." + ), + ( + "Lowering thresholds increases sensitivity and can surface " + "smaller functions, tighter windows, and finer local signals." + ), + ( + "Lower-threshold runs are best for exploratory local review, " + "not as a silent replacement for the default governance profile." + ), + "Interpret results in the context of the active threshold profile.", + ), + recommended_tools=( + "analyze_repository", + "analyze_changed_paths", + "get_run_summary", + "compare_runs", + ), + doc_links=( + _CONFIG_DOC_LINK, + _PIPELINE_DOC_LINK, + _MCP_INTERFACE_DOC_LINK, + ), + warnings=( + ( + "Do not treat a default-threshold run as proof that no smaller " + "local clone or repetition exists." + ), + ( + "Lower-threshold runs usually increase noise and should be read " + "as higher-sensitivity exploratory passes." + ), + "Run comparisons are most meaningful when profiles are aligned.", + ), + anti_patterns=( + ( + "Assuming a clean default pass means no finer-grained " + "duplication exists anywhere in the repository." + ), + ( + "Lowering thresholds for exploration and then interpreting the " + "result as if it had the same meaning as the conservative " + "default pass." + ), + ( + "Mixing low-threshold exploratory output into baseline or CI " + "reasoning without acknowledging the profile change." + ), + ), + ), + "suppressions": MCPHelpTopicSpec( + summary=( + "CodeClone supports explicit inline suppressions for selected " + "findings. They are local policy, not analysis truth, and should " + "stay narrow and declaration-scoped." + ), + key_points=( + "Current syntax uses codeclone: ignore[rule-id,...].", + "Binding is declaration-scoped: def, async def, or class.", + ( + "Supported placement is the previous line or inline on the " + "declaration or header line." + ), + ( + "Suppressions are target-specific and do not imply file-wide " + "or cascading scope." + ), + ( + "Use suppressions for accepted dynamic or runtime false " + "positives, not to hide broad classes of debt." + ), + ), + recommended_tools=("get_finding", "get_remediation"), + doc_links=(_SUPPRESSIONS_DOC_LINK, _MCP_INTERFACE_DOC_LINK), + warnings=( + ( + "MCP explains suppression semantics but never creates or " + "updates suppressions." + ), + ), + anti_patterns=( + "Treating suppressions as file-wide or inherited state.", + ( + "Using suppressions to hide broad structural debt instead of " + "accepted false positives." + ), + ), + ), + "baseline": MCPHelpTopicSpec( + summary=( + "A baseline is CodeClone's accepted comparison snapshot for clones " + "and optional metrics. It separates known debt from new regressions " + "and is trust-checked before use." + ), + key_points=( + ( + "Canonical baseline schema is v2.0 with meta and clone keys; " + "metrics may be embedded for unified flows." + ), + ( + "Compatibility depends on generator identity, supported " + "schema version, fingerprint version, python tag, and payload " + "integrity." + ), + ( + "Known means already present in the trusted baseline; new " + "means not accepted by baseline." + ), + ( + "In CI and gating contexts, untrusted baseline states are " + "contract errors rather than soft warnings." + ), + "MCP is read-only and does not update or rewrite baselines.", + ), + recommended_tools=("get_run_summary", "evaluate_gates", "compare_runs"), + doc_links=(_BASELINE_DOC_LINK,), + warnings=( + "Baseline trust semantics directly affect new-vs-known classification.", + ), + anti_patterns=( + "Treating baseline as mutable MCP session state.", + "Assuming an untrusted baseline is only cosmetic in CI contexts.", + ), + ), + "coverage": MCPHelpTopicSpec( + summary=( + "Coverage join is an external current-run signal: CodeClone reads " + "an existing Cobertura XML report and joins line hits to risky " + "function spans." + ), + key_points=( + "Use Cobertura XML such as `coverage xml` output from coverage.py.", + "Coverage join does not become baseline truth and does not affect health.", + ( + "Coverage hotspot gating is current-run only and focuses on " + "medium/high-risk functions measured below the configured " + "threshold." + ), + ( + "Functions missing from the supplied coverage.xml are surfaced " + "as scope gaps, not labeled as untested." + ), + "Use metrics_detail(family='coverage_join') for bounded drill-down.", + ), + recommended_tools=( + "analyze_repository", + "analyze_changed_paths", + "get_run_summary", + "get_report_section", + "evaluate_gates", + ), + doc_links=( + _MCP_INTERFACE_DOC_LINK, + _CLI_DOC_LINK, + _REPORT_DOC_LINK, + ), + warnings=( + "Coverage join is only as accurate as the external XML path mapping.", + "It does not infer branch coverage and does not execute tests.", + "Use fail-on-untested-hotspots only with a valid joined coverage input.", + ), + anti_patterns=( + "Treating missing coverage XML as zero coverage without stating it.", + "Reading coverage join as a baseline-aware trend signal.", + "Assuming dynamic runtime dispatch is visible through a static line join.", + ), + ), + "latest_runs": MCPHelpTopicSpec( + summary=( + "latest/* resources point to the most recent analysis run in the " + "current MCP session. They are convenience handles, not persistent " + "truth anchors." + ), + key_points=( + "Run history is in-memory only and bounded by history-limit.", + "The latest pointer moves when a newer analyze_* call registers a run.", + "A fresh repository state requires a fresh analyze run.", + ( + "Short run ids are convenience handles derived from canonical " + "run identity." + ), + ( + "Do not assume latest/* is globally current outside the " + "active MCP session." + ), + ), + recommended_tools=( + "analyze_repository", + "analyze_changed_paths", + "get_run_summary", + "compare_runs", + ), + doc_links=(_MCP_INTERFACE_DOC_LINK, _MCP_GUIDE_DOC_LINK), + warnings=( + ( + "latest/* can point at a different repository after a later " + "analyze call in the same session." + ), + ), + anti_patterns=( + ( + "Assuming latest/* remains tied to one repository across the " + "whole client session." + ), + ( + "Using latest/* as a substitute for starting a fresh run when " + "freshness matters." + ), + ), + ), + "review_state": MCPHelpTopicSpec( + summary=( + "Reviewed state in MCP is session-local workflow state. It helps " + "long sessions track review progress without modifying canonical " + "findings, baseline, or persisted artifacts." + ), + key_points=( + "Review markers are in-memory only.", + "They do not change report truth, finding identity, or CI semantics.", + "They are useful for triage workflows across long sessions.", + ( + "They should not be interpreted as acceptance, suppression, " + "or baseline update." + ), + ), + recommended_tools=( + "list_hotspots", + "get_finding", + "mark_finding_reviewed", + "list_reviewed_findings", + ), + doc_links=(_MCP_INTERFACE_DOC_LINK, _MCP_GUIDE_DOC_LINK), + warnings=( + "Reviewed markers disappear when the MCP session is cleared or restarted.", + ), + anti_patterns=( + "Treating reviewed state as a persistent acceptance signal.", + "Assuming reviewed findings are removed from canonical report truth.", + ), + ), + "changed_scope": MCPHelpTopicSpec( + summary=( + "Changed-scope analysis narrows review to findings that touch a " + "selected change set. It is for PR and patch review, not a " + "replacement for full canonical analysis." + ), + key_points=( + ( + "Use analyze_changed_paths with explicit changed_paths or " + "git_diff_ref for review-focused runs." + ), + ( + "Start with the same conservative profile as the default " + "review, then lower thresholds only when you explicitly want " + "a higher-sensitivity changed-files pass." + ), + ( + "Changed-scope is best for asking what new issues touch " + "modified files and whether anything should block CI." + ), + "Prefer production triage and hotspot views before broad listing.", + "If repository-wide truth is needed, run full analysis first.", + ), + recommended_tools=( + "analyze_changed_paths", + "get_run_summary", + "get_production_triage", + "evaluate_gates", + "generate_pr_summary", + ), + doc_links=(_MCP_INTERFACE_DOC_LINK, _MCP_GUIDE_DOC_LINK), + warnings=( + ( + "Changed-scope narrows review focus; it does not replace the " + "full canonical report for repository-wide truth." + ), + ), + anti_patterns=( + "Using changed-scope as if it were the only source of repository truth.", + ( + "Starting changed-files review with broad listing instead of " + "compact triage." + ), + ), + ), +} + + +def _suggestion_finding_id_payload(suggestion: object) -> str: + if not hasattr(suggestion, "finding_family"): + return "" + family = str(getattr(suggestion, "finding_family", "")).strip() + if family == FAMILY_CLONES: + kind = str(getattr(suggestion, "finding_kind", "")).strip() + subject_key = str(getattr(suggestion, "subject_key", "")).strip() + return clone_group_id(kind or CLONE_KIND_SEGMENT, subject_key) + if family == FAMILY_STRUCTURAL: + return structural_group_id( + str(getattr(suggestion, "finding_kind", "")).strip() or CATEGORY_STRUCTURAL, + str(getattr(suggestion, "subject_key", "")).strip(), + ) + category = str(getattr(suggestion, "category", "")).strip() + subject_key = str(getattr(suggestion, "subject_key", "")).strip() + if category == CATEGORY_DEAD_CODE: + return dead_code_group_id(subject_key) + return design_group_id( + category, + subject_key or str(getattr(suggestion, "title", "")), + ) + + +@dataclass(frozen=True, slots=True) +class _CloneShortIdEntry: + canonical_id: str + alias: str + token: str + suffix: str + + def render(self, prefix_length: int) -> str: + if prefix_length <= 0: + prefix_length = len(self.token) + return f"{self.alias}:{self.token[:prefix_length]}{self.suffix}" + + +def _partitioned_short_id(alias: str, remainder: str) -> str: + first, _, rest = remainder.partition(":") + return f"{alias}:{first}:{rest}" if rest else f"{alias}:{first}" + + +def _clone_short_id_entry_payload(canonical_id: str) -> _CloneShortIdEntry: + _prefix, _, remainder = canonical_id.partition(":") + clone_kind, _, group_key = remainder.partition(":") + hashes = [part for part in group_key.split("|") if part] + if clone_kind == "function": + fingerprint = hashes[0] if hashes else group_key + bucket = "" + if "|" in group_key: + bucket = "|" + group_key.split("|")[-1] + return _CloneShortIdEntry( + canonical_id=canonical_id, + alias="fn", + token=fingerprint, + suffix=bucket, + ) + alias = {"block": "blk", "segment": "seg"}.get(clone_kind, "clone") + combined = "|".join(hashes) if hashes else group_key + token = hashlib.sha256(combined.encode()).hexdigest() + return _CloneShortIdEntry( + canonical_id=canonical_id, + alias=alias, + token=token, + suffix=f"|x{len(hashes) or 1}", + ) + + +def _disambiguated_clone_short_ids_payload( + canonical_ids: Sequence[str], +) -> dict[str, str]: + clone_entries = [ + _clone_short_id_entry_payload(canonical_id) for canonical_id in canonical_ids + ] + max_token_length = max((len(entry.token) for entry in clone_entries), default=0) + for prefix_length in range(_SHORT_HASH_ID_LENGTH + 2, max_token_length + 1, 2): + candidates = { + entry.canonical_id: entry.render(prefix_length) for entry in clone_entries + } + if len(set(candidates.values())) == len(candidates): + return candidates + return { + entry.canonical_id: entry.render(max_token_length) for entry in clone_entries + } + + +def _leaf_symbol_name_payload(value: object) -> str: + text = str(value).strip() + if not text: + return "" + if ":" in text: + text = text.rsplit(":", maxsplit=1)[-1] + if "." in text: + text = text.rsplit(".", maxsplit=1)[-1] + return text + + +def _base_short_finding_id_payload(canonical_id: str) -> str: + prefix, _, remainder = canonical_id.partition(":") + if prefix == "clone": + return _clone_short_id_entry_payload(canonical_id).render(_SHORT_HASH_ID_LENGTH) + if prefix == "structural": + finding_kind, _, finding_key = remainder.partition(":") + return f"struct:{finding_kind}:{finding_key[:_SHORT_HASH_ID_LENGTH]}" + if prefix == "dead_code": + return f"dead:{_leaf_symbol_name_payload(remainder)}" + if prefix == "design": + category, _, subject_key = remainder.partition(":") + return f"design:{category}:{_leaf_symbol_name_payload(subject_key)}" + return canonical_id + + +def _disambiguated_short_finding_id_payload(canonical_id: str) -> str: + prefix, _, remainder = canonical_id.partition(":") + if prefix == "clone": + return _clone_short_id_entry_payload(canonical_id).render(0) + if prefix == "structural": + return _partitioned_short_id("struct", remainder) + if prefix == "dead_code": + return f"dead:{remainder}" + if prefix == "design": + return _partitioned_short_id("design", remainder) + return canonical_id + + +def _json_text_payload( + payload: object, + *, + sort_keys: bool = True, +) -> str: + options = orjson.OPT_INDENT_2 + if sort_keys: + options |= orjson.OPT_SORT_KEYS + return orjson.dumps(payload, option=options).decode("utf-8") + + +def _git_diff_lines_payload( + *, + root_path: Path, + git_diff_ref: str, +) -> tuple[str, ...]: + try: + validated_ref = validate_git_diff_ref(git_diff_ref) + except ValueError as exc: + raise MCPGitDiffError(str(exc)) from exc + try: + completed = subprocess.run( + ["git", "diff", "--name-only", validated_ref, "--"], + cwd=root_path, + check=True, + capture_output=True, + text=True, + timeout=30, + ) + except (OSError, subprocess.CalledProcessError, subprocess.TimeoutExpired) as exc: + raise MCPGitDiffError( + f"Unable to resolve changed paths from git diff ref '{validated_ref}'." + ) from exc + return tuple( + sorted({line.strip() for line in completed.stdout.splitlines() if line.strip()}) + ) + + +def _load_report_document_payload(report_json: str) -> dict[str, object]: + try: + payload = orjson.loads(report_json) + except JSONDecodeError as exc: + raise MCPServiceError( + f"Generated canonical report is not valid JSON: {exc}" + ) from exc + if not isinstance(payload, dict): + raise MCPServiceError("Generated canonical report must be a JSON object.") + return dict(payload) + + +def _validated_history_limit(history_limit: int) -> int: + if not 1 <= history_limit <= MAX_MCP_HISTORY_LIMIT: + raise ValueError( + f"history_limit must be between 1 and {MAX_MCP_HISTORY_LIMIT}." + ) + return history_limit + + +class MCPServiceError(RuntimeError): + """Base class for CodeClone MCP service errors.""" + + +class MCPServiceContractError(MCPServiceError): + """Raised when an MCP request violates the CodeClone service contract.""" + + +class MCPRunNotFoundError(MCPServiceError): + """Raised when a requested MCP run is not available in the in-memory registry.""" + + +class MCPFindingNotFoundError(MCPServiceError): + """Raised when a requested finding id is not present in the selected run.""" + + +class MCPGitDiffError(MCPServiceError): + """Raised when changed paths cannot be resolved from a git ref.""" + + +class _BufferConsole: + def __init__(self) -> None: + self.messages: list[str] = [] + + def print(self, *objects: object, **_kwargs: object) -> None: + text = " ".join(str(obj) for obj in objects).strip() + if text: + self.messages.append(text) + + +@dataclass(frozen=True, slots=True) +class MCPAnalysisRequest: + root: str | None = None + analysis_mode: AnalysisMode = "full" + respect_pyproject: bool = True + changed_paths: tuple[str, ...] = () + git_diff_ref: str | None = None + processes: int | None = None + min_loc: int | None = None + min_stmt: int | None = None + block_min_loc: int | None = None + block_min_stmt: int | None = None + segment_min_loc: int | None = None + segment_min_stmt: int | None = None + api_surface: bool | None = None + coverage_xml: str | None = None + coverage_min: int | None = None + complexity_threshold: int | None = None + coupling_threshold: int | None = None + cohesion_threshold: int | None = None + baseline_path: str | None = None + metrics_baseline_path: str | None = None + max_baseline_size_mb: int | None = None + cache_policy: CachePolicy = "reuse" + cache_path: str | None = None + max_cache_size_mb: int | None = None + + +@dataclass(frozen=True, slots=True) +class MCPGateRequest: + run_id: str | None = None + fail_on_new: bool = False + fail_threshold: int = -1 + fail_complexity: int = -1 + fail_coupling: int = -1 + fail_cohesion: int = -1 + fail_cycles: bool = False + fail_dead_code: bool = False + fail_health: int = -1 + fail_on_new_metrics: bool = False + fail_on_typing_regression: bool = False + fail_on_docstring_regression: bool = False + fail_on_api_break: bool = False + fail_on_untested_hotspots: bool = False + min_typing_coverage: int = -1 + min_docstring_coverage: int = -1 + coverage_min: int = DEFAULT_COVERAGE_MIN + + +@dataclass(frozen=True, slots=True) +class MCPRunRecord: + run_id: str + root: Path + request: MCPAnalysisRequest + comparison_settings: tuple[object, ...] + report_document: dict[str, object] + summary: dict[str, object] + changed_paths: tuple[str, ...] + changed_projection: dict[str, object] | None + warnings: tuple[str, ...] + failures: tuple[str, ...] + func_clones_count: int + block_clones_count: int + project_metrics: ProjectMetrics | None + coverage_join: CoverageJoinResult | None + suggestions: tuple[Suggestion, ...] + new_func: frozenset[str] + new_block: frozenset[str] + metrics_diff: MetricsDiff | None + + +class CodeCloneMCPRunStore: + def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: + self._history_limit = _validated_history_limit(history_limit) + self._lock = RLock() + self._records: OrderedDict[str, MCPRunRecord] = OrderedDict() + self._latest_run_id: str | None = None + + def register(self, record: MCPRunRecord) -> MCPRunRecord: + with self._lock: + self._records.pop(record.run_id, None) + self._records[record.run_id] = record + self._records.move_to_end(record.run_id) + self._latest_run_id = record.run_id + while len(self._records) > self._history_limit: + self._records.popitem(last=False) + return record + + def get(self, run_id: str | None = None) -> MCPRunRecord: + with self._lock: + resolved_run_id = self._resolve_run_id(run_id) + if resolved_run_id is None: + raise MCPRunNotFoundError("No matching MCP analysis run is available.") + return self._records[resolved_run_id] + + def _resolve_run_id(self, run_id: str | None) -> str | None: + if run_id is None: + return self._latest_run_id + if run_id in self._records: + return run_id + matches = [ + candidate for candidate in self._records if candidate.startswith(run_id) + ] + if len(matches) == 1: + return matches[0] + if len(matches) > 1: + raise MCPServiceContractError( + f"Run id '{run_id}' is ambiguous in this MCP session." + ) + return None + + def records(self) -> tuple[MCPRunRecord, ...]: + with self._lock: + return tuple(self._records.values()) + + def clear(self) -> tuple[str, ...]: + with self._lock: + removed_run_ids = tuple(self._records.keys()) + self._records.clear() + self._latest_run_id = None + return removed_run_ids + + +__all__ = [ + "CATEGORY_CLONE", + "CATEGORY_COHESION", + "CATEGORY_COMPLEXITY", + "CATEGORY_COUPLING", + "CATEGORY_DEAD_CODE", + "CATEGORY_DEPENDENCY", + "CATEGORY_STRUCTURAL", + "CONFIDENCE_MEDIUM", + "DEFAULT_BASELINE_PATH", + "DEFAULT_BLOCK_MIN_LOC", + "DEFAULT_BLOCK_MIN_STMT", + "DEFAULT_COVERAGE_MIN", + "DEFAULT_MAX_BASELINE_SIZE_MB", + "DEFAULT_MAX_CACHE_SIZE_MB", + "DEFAULT_MCP_HISTORY_LIMIT", + "DEFAULT_MIN_LOC", + "DEFAULT_MIN_STMT", + "DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD", + "DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD", + "DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD", + "DEFAULT_SEGMENT_MIN_LOC", + "DEFAULT_SEGMENT_MIN_STMT", + "EFFORT_EASY", + "EFFORT_HARD", + "EFFORT_MODERATE", + "FAMILY_CLONE", + "FAMILY_CLONES", + "FAMILY_DEAD_CODE", + "FAMILY_DESIGN", + "FAMILY_STRUCTURAL", + "REPORT_SCHEMA_VERSION", + "SEVERITY_CRITICAL", + "SEVERITY_INFO", + "SEVERITY_WARNING", + "SOURCE_KIND_ORDER", + "SOURCE_KIND_OTHER", + "SOURCE_KIND_PRODUCTION", + "_CHECK_TO_DIMENSION", + "_COMPACT_ITEM_EMPTY_VALUES", + "_COMPACT_ITEM_PATH_KEYS", + "_CONFIDENCE_WEIGHT", + "_DESIGN_CHECK_CONTEXT", + "_EFFORT_WEIGHT", + "_FOCUS_CHANGED_PATHS", + "_FOCUS_PRODUCTION", + "_FOCUS_REPOSITORY", + "_HEALTH_SCOPE_REPOSITORY", + "_HELP_TOPIC_SPECS", + "_HOTLIST_REPORT_KEYS", + "_MCP_CONFIG_KEYS", + "_METRICS_DETAIL_FAMILY_ALIASES", + "_NOVELTY_WEIGHT", + "_REPORT_DUMMY_PATH", + "_RUNTIME_WEIGHT", + "_SEVERITY_WEIGHT", + "_SHORT_RUN_ID_LENGTH", + "_SOURCE_KIND_BREAKDOWN_ORDER", + "_VALID_ANALYSIS_MODES", + "_VALID_CACHE_POLICIES", + "_VALID_COMPARISON_FOCUS", + "_VALID_DETAIL_LEVELS", + "_VALID_FINDING_FAMILIES", + "_VALID_FINDING_NOVELTY", + "_VALID_FINDING_SORT", + "_VALID_HELP_DETAILS", + "_VALID_HELP_TOPICS", + "_VALID_HOTLIST_KINDS", + "_VALID_METRICS_DETAIL_FAMILIES", + "_VALID_PR_SUMMARY_FORMATS", + "_VALID_REPORT_SECTIONS", + "_VALID_SEVERITIES", + "AnalysisMode", + "Baseline", + "Cache", + "CachePolicy", + "CacheStatus", + "ChoiceT", + "CodeCloneMCPRunStore", + "ComparisonFocus", + "ConfigValidationError", + "DetailLevel", + "FindingFamilyFilter", + "FindingNoveltyFilter", + "FindingSort", + "FreshnessKind", + "GatingResult", + "HelpDetail", + "HelpTopic", + "HotlistKind", + "Iterable", + "MCPAnalysisRequest", + "MCPFindingNotFoundError", + "MCPGateRequest", + "MCPRunNotFoundError", + "MCPRunRecord", + "MCPServiceContractError", + "MCPServiceError", + "Mapping", + "MetricGateConfig", + "MetricsDetailFamily", + "MetricsDiff", + "Namespace", + "OrderedDict", + "OutputPaths", + "PRSummaryFormat", + "Path", + "RLock", + "ReportSection", + "Sequence", + "_BufferConsole", + "__version__", + "_as_float", + "_as_int", + "_base_short_finding_id_payload", + "_disambiguated_clone_short_ids_payload", + "_disambiguated_short_finding_id_payload", + "_evaluate_report_gates", + "_git_diff_lines_payload", + "_json_text_payload", + "_leaf_symbol_name_payload", + "_load_report_document_payload", + "_suggestion_finding_id_payload", + "_summarize_metrics_diff", + "analyze", + "bootstrap", + "discover", + "load_pyproject_config", + "paginate", + "process", + "report", + "resolve_finding_id", + "short_id", +] diff --git a/codeclone/surfaces/mcp/_session_state_mixin.py b/codeclone/surfaces/mcp/_session_state_mixin.py new file mode 100644 index 0000000..07c54fb --- /dev/null +++ b/codeclone/surfaces/mcp/_session_state_mixin.py @@ -0,0 +1,1205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +from ...baseline.metrics_baseline import probe_metrics_baseline_section +from . import _session_helpers as _helpers +from ._session_baseline import ( + CloneBaselineState, + MetricsBaselineState, +) +from ._session_finding_mixin import _MCPSessionFindingMixin, _StateLock +from ._session_runtime import validate_numeric_args +from ._session_shared import ( + _FOCUS_PRODUCTION, + _FOCUS_REPOSITORY, + _HEALTH_SCOPE_REPOSITORY, + _HELP_TOPIC_SPECS, + _MCP_CONFIG_KEYS, + _METRICS_DETAIL_FAMILY_ALIASES, + _VALID_COMPARISON_FOCUS, + _VALID_HELP_DETAILS, + _VALID_HELP_TOPICS, + _VALID_METRICS_DETAIL_FAMILIES, + _VALID_PR_SUMMARY_FORMATS, + _VALID_REPORT_SECTIONS, + DEFAULT_BASELINE_PATH, + DEFAULT_BLOCK_MIN_LOC, + DEFAULT_BLOCK_MIN_STMT, + DEFAULT_COVERAGE_MIN, + DEFAULT_MAX_BASELINE_SIZE_MB, + DEFAULT_MAX_CACHE_SIZE_MB, + DEFAULT_MIN_LOC, + DEFAULT_MIN_STMT, + DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + DEFAULT_SEGMENT_MIN_LOC, + DEFAULT_SEGMENT_MIN_STMT, + FAMILY_CLONE, + REPORT_SCHEMA_VERSION, + SOURCE_KIND_PRODUCTION, + CacheStatus, + CodeCloneMCPRunStore, + ComparisonFocus, + ConfigValidationError, + GatingResult, + HelpDetail, + HelpTopic, + Mapping, + MCPAnalysisRequest, + MCPGateRequest, + MCPRunRecord, + MCPServiceContractError, + MetricGateConfig, + MetricsDetailFamily, + MetricsDiff, + Namespace, + OrderedDict, + Path, + PRSummaryFormat, + ReportSection, + Sequence, + __version__, + _as_int, + _evaluate_report_gates, + _json_text_payload, + load_pyproject_config, + paginate, +) + + +class _MCPSessionChangedProjectionMixin(_MCPSessionFindingMixin): + _runs: CodeCloneMCPRunStore + _state_lock: _StateLock + _review_state: dict[str, OrderedDict[str, str | None]] + _last_gate_results: dict[str, dict[str, object]] + _spread_max_cache: dict[str, int] + + def _build_changed_projection( + self, + record: MCPRunRecord, + ) -> dict[str, object] | None: + if not record.changed_paths: + return None + items = self._query_findings( + record=record, + detail_level="summary", + changed_paths=record.changed_paths, + ) + new_count = sum(1 for item in items if str(item.get("novelty", "")) == "new") + known_count = sum( + 1 for item in items if str(item.get("novelty", "")) == "known" + ) + new_by_source_kind = _helpers._source_kind_breakdown( + item.get("source_kind") + for item in items + if str(item.get("novelty", "")) == "new" + ) + health_delta = _helpers._summary_health_delta(record.summary) + return { + "run_id": _helpers._short_run_id(record.run_id), + "changed_paths": list(record.changed_paths), + "total": len(items), + "new": new_count, + "known": known_count, + "new_by_source_kind": new_by_source_kind, + "items": items, + "health": dict(_helpers._summary_health_payload(record.summary)), + "health_delta": health_delta, + "verdict": _helpers._changed_verdict( + changed_projection={"new": new_count, "total": len(items)}, + health_delta=health_delta, + ), + } + + def _augment_summary_with_changed( + self, + *, + summary: Mapping[str, object], + changed_paths: Sequence[str], + changed_projection: Mapping[str, object] | None, + ) -> dict[str, object]: + payload = dict(summary) + if changed_paths: + payload["changed_paths"] = list(changed_paths) + if changed_projection is not None: + payload["changed_findings"] = { + "total": _as_int(changed_projection.get("total", 0), 0), + "new": _as_int(changed_projection.get("new", 0), 0), + "known": _as_int(changed_projection.get("known", 0), 0), + "items": [ + dict(_helpers._as_mapping(item)) + for item in _helpers._as_sequence(changed_projection.get("items"))[ + :10 + ] + ], + } + payload["health_delta"] = ( + _as_int(changed_projection.get("health_delta", 0), 0) + if changed_projection.get("health_delta") is not None + else None + ) + payload["verdict"] = str(changed_projection.get("verdict", "stable")) + return payload + + +class _MCPSessionAnalysisArgsMixin(_MCPSessionChangedProjectionMixin): + _runs: CodeCloneMCPRunStore + _state_lock: _StateLock + _review_state: dict[str, OrderedDict[str, str | None]] + _last_gate_results: dict[str, dict[str, object]] + _spread_max_cache: dict[str, int] + + def _comparison_index( + self, + record: MCPRunRecord, + *, + focus: str, + ) -> dict[str, dict[str, object]]: + findings = self._base_findings(record) + if focus == "clones": + findings = [f for f in findings if str(f.get("family", "")) == "clone"] + elif focus == "structural": + findings = [f for f in findings if str(f.get("family", "")) == "structural"] + elif focus == "metrics": + findings = [ + f + for f in findings + if str(f.get("family", "")) in {"design", "dead_code"} + ] + return {str(finding.get("id", "")): dict(finding) for finding in findings} + + def _build_args(self, *, root_path: Path, request: MCPAnalysisRequest) -> Namespace: + args = Namespace( + root=str(root_path), + min_loc=DEFAULT_MIN_LOC, + min_stmt=DEFAULT_MIN_STMT, + block_min_loc=DEFAULT_BLOCK_MIN_LOC, + block_min_stmt=DEFAULT_BLOCK_MIN_STMT, + segment_min_loc=DEFAULT_SEGMENT_MIN_LOC, + segment_min_stmt=DEFAULT_SEGMENT_MIN_STMT, + processes=None, + cache_path=None, + max_cache_size_mb=DEFAULT_MAX_CACHE_SIZE_MB, + baseline=DEFAULT_BASELINE_PATH, + max_baseline_size_mb=DEFAULT_MAX_BASELINE_SIZE_MB, + update_baseline=False, + fail_on_new=False, + fail_threshold=-1, + ci=False, + fail_complexity=-1, + fail_coupling=-1, + fail_cohesion=-1, + fail_cycles=False, + fail_dead_code=False, + fail_health=-1, + fail_on_new_metrics=False, + fail_on_typing_regression=False, + fail_on_docstring_regression=False, + fail_on_api_break=False, + min_typing_coverage=-1, + min_docstring_coverage=-1, + api_surface=False, + coverage_xml=None, + fail_on_untested_hotspots=False, + coverage_min=DEFAULT_COVERAGE_MIN, + design_complexity_threshold=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, + design_coupling_threshold=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, + design_cohesion_threshold=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, + update_metrics_baseline=False, + metrics_baseline=DEFAULT_BASELINE_PATH, + skip_metrics=False, + skip_dead_code=False, + skip_dependencies=False, + golden_fixture_paths=(), + html_out=None, + json_out=None, + md_out=None, + sarif_out=None, + text_out=None, + no_progress=True, + no_color=True, + quiet=True, + verbose=False, + debug=False, + open_html_report=False, + timestamped_report_paths=False, + ) + if request.respect_pyproject: + try: + config_values = load_pyproject_config(root_path) + except ConfigValidationError as exc: + raise MCPServiceContractError(str(exc)) from exc + for key in sorted(_MCP_CONFIG_KEYS.intersection(config_values)): + setattr(args, key, config_values[key]) + + self._apply_request_overrides(args=args, root_path=root_path, request=request) + + if request.analysis_mode == "clones_only": + args.skip_metrics = True + args.skip_dead_code = True + args.skip_dependencies = True + else: + args.skip_metrics = False + args.skip_dead_code = False + args.skip_dependencies = False + + if not validate_numeric_args(args): + raise MCPServiceContractError( + "Numeric analysis settings must be non-negative and thresholds " + "must be >= -1. Coverage thresholds must be between 0 and 100." + ) + + return args + + def _apply_request_overrides( + self, + *, + args: Namespace, + root_path: Path, + request: MCPAnalysisRequest, + ) -> None: + override_map: dict[str, object | None] = { + "processes": request.processes, + "min_loc": request.min_loc, + "min_stmt": request.min_stmt, + "block_min_loc": request.block_min_loc, + "block_min_stmt": request.block_min_stmt, + "segment_min_loc": request.segment_min_loc, + "segment_min_stmt": request.segment_min_stmt, + "api_surface": request.api_surface, + "coverage_min": request.coverage_min, + "max_baseline_size_mb": request.max_baseline_size_mb, + "max_cache_size_mb": request.max_cache_size_mb, + "design_complexity_threshold": request.complexity_threshold, + "design_coupling_threshold": request.coupling_threshold, + "design_cohesion_threshold": request.cohesion_threshold, + } + for key, value in override_map.items(): + if value is not None: + setattr(args, key, value) + + if request.baseline_path is not None: + args.baseline = str( + _helpers._resolve_optional_path(request.baseline_path, root_path) + ) + if request.metrics_baseline_path is not None: + args.metrics_baseline = str( + _helpers._resolve_optional_path( + request.metrics_baseline_path, + root_path, + ) + ) + if request.cache_path is not None: + args.cache_path = str( + _helpers._resolve_optional_path(request.cache_path, root_path) + ) + if request.coverage_xml is not None: + args.coverage_xml = str( + _helpers._resolve_optional_path(request.coverage_xml, root_path) + ) + + def _resolve_baseline_inputs( + self, + *, + root_path: Path, + args: Namespace, + ) -> tuple[Path, bool, Path, bool, dict[str, object] | None]: + baseline_path = _helpers._resolve_optional_path(str(args.baseline), root_path) + baseline_exists = baseline_path.exists() + + metrics_baseline_arg_path = _helpers._resolve_optional_path( + str(args.metrics_baseline), + root_path, + ) + shared_baseline_payload: dict[str, object] | None = None + if metrics_baseline_arg_path == baseline_path: + probe = probe_metrics_baseline_section(metrics_baseline_arg_path) + metrics_baseline_exists = probe.has_metrics_section + shared_baseline_payload = probe.payload + else: + metrics_baseline_exists = metrics_baseline_arg_path.exists() + + return ( + baseline_path, + baseline_exists, + metrics_baseline_arg_path, + metrics_baseline_exists, + shared_baseline_payload, + ) + + +class _MCPSessionRunSummaryBuilderMixin(_MCPSessionAnalysisArgsMixin): + _runs: CodeCloneMCPRunStore + _state_lock: _StateLock + _review_state: dict[str, OrderedDict[str, str | None]] + _last_gate_results: dict[str, dict[str, object]] + _spread_max_cache: dict[str, int] + + def _changed_analysis_payload( + self, + record: MCPRunRecord, + ) -> dict[str, object]: + changed_projection = _helpers._as_mapping(record.changed_projection) + health = _helpers._summary_health_payload(record.summary) + health_payload = ( + { + "score": health.get("score"), + "grade": health.get("grade"), + } + if health.get("available") is not False + else dict(health) + ) + return { + "run_id": _helpers._short_run_id(record.run_id), + "focus": "changed_paths", + "health_scope": "repository", + "baseline": dict( + _helpers._summary_trusted_state_payload( + record.summary, + key="baseline", + ) + ), + "changed_files": len(record.changed_paths), + "health": health_payload, + "analysis_profile": _helpers._summary_analysis_profile_payload( + record.summary + ), + "health_delta": ( + _as_int(changed_projection.get("health_delta", 0), 0) + if changed_projection.get("health_delta") is not None + else None + ), + "verdict": str(changed_projection.get("verdict", "stable")), + "new_findings": _as_int(changed_projection.get("new", 0), 0), + "new_by_source_kind": dict( + _helpers._as_mapping(changed_projection.get("new_by_source_kind")) + ), + "resolved_findings": 0, + "changed_findings": [], + "coverage_join": _helpers._summary_coverage_join_payload(record), + } + + def _build_run_summary_payload( + self, + *, + run_id: str, + root_path: Path, + request: MCPAnalysisRequest, + report_document: Mapping[str, object], + baseline_state: CloneBaselineState, + metrics_baseline_state: MetricsBaselineState, + cache_status: CacheStatus, + new_func: Sequence[str] | set[str], + new_block: Sequence[str] | set[str], + metrics_diff: MetricsDiff | None, + warnings: Sequence[str], + failures: Sequence[str], + ) -> dict[str, object]: + meta = _helpers._as_mapping(report_document.get("meta")) + meta_baseline = _helpers._as_mapping(meta.get("baseline")) + meta_metrics_baseline = _helpers._as_mapping(meta.get("metrics_baseline")) + meta_cache = _helpers._as_mapping(meta.get("cache")) + inventory = _helpers._as_mapping(report_document.get("inventory")) + findings = _helpers._as_mapping(report_document.get("findings")) + metrics = _helpers._as_mapping(report_document.get("metrics")) + metrics_summary = _helpers._as_mapping(metrics.get("summary")) + summary = _helpers._as_mapping(findings.get("summary")) + analysis_profile = _helpers._summary_analysis_profile_payload(meta) + payload = { + "run_id": run_id, + "root": str(root_path), + "analysis_mode": request.analysis_mode, + "codeclone_version": meta.get("codeclone_version", __version__), + "python_tag": str(meta.get("python_tag", "")), + "report_schema_version": report_document.get( + "report_schema_version", + REPORT_SCHEMA_VERSION, + ), + "baseline": { + "path": meta_baseline.get( + "path", + str(root_path / DEFAULT_BASELINE_PATH), + ), + "loaded": bool(meta_baseline.get("loaded", baseline_state.loaded)), + "status": str(meta_baseline.get("status", baseline_state.status.value)), + "trusted_for_diff": baseline_state.trusted_for_diff, + "python_tag": meta_baseline.get("python_tag"), + }, + "metrics_baseline": { + "path": meta_metrics_baseline.get( + "path", + str(root_path / DEFAULT_BASELINE_PATH), + ), + "loaded": bool( + meta_metrics_baseline.get( + "loaded", + metrics_baseline_state.loaded, + ) + ), + "status": str( + meta_metrics_baseline.get( + "status", + metrics_baseline_state.status.value, + ) + ), + "trusted_for_diff": metrics_baseline_state.trusted_for_diff, + }, + "cache": { + "path": meta_cache.get("path"), + "status": str(meta_cache.get("status", cache_status.value)), + "used": bool(meta_cache.get("used", False)), + "schema_version": meta_cache.get("schema_version"), + }, + "inventory": dict(inventory), + "findings_summary": dict(summary), + "health": dict(_helpers._as_mapping(metrics_summary.get("health"))), + "baseline_diff": { + "new_function_clone_groups": len(new_func), + "new_block_clone_groups": len(new_block), + "new_clone_groups_total": len(new_func) + len(new_block), + }, + "metrics_diff": _helpers._metrics_diff_payload(metrics_diff), + "warnings": list(warnings), + "failures": list(failures), + } + if analysis_profile: + payload["analysis_profile"] = analysis_profile + payload["cache"] = _helpers._summary_cache_payload(payload) + payload["health"] = _helpers._summary_health_payload(payload) + return payload + + +class _MCPSessionSummaryMixin(_MCPSessionRunSummaryBuilderMixin): + _runs: CodeCloneMCPRunStore + _state_lock: _StateLock + _review_state: dict[str, OrderedDict[str, str | None]] + _last_gate_results: dict[str, dict[str, object]] + _spread_max_cache: dict[str, int] + + def _summary_payload( + self, + summary: Mapping[str, object], + *, + record: MCPRunRecord | None = None, + ) -> dict[str, object]: + inventory = _helpers._as_mapping(summary.get("inventory")) + if ( + not summary.get("run_id") + and not record + and "inventory" in summary + and not summary.get("baseline") + ): + return { + "focus": _FOCUS_REPOSITORY, + "health_scope": _HEALTH_SCOPE_REPOSITORY, + "inventory": _helpers._summary_inventory_payload(inventory), + "health": _helpers._summary_health_payload(summary), + } + resolved_run_id = ( + record.run_id if record is not None else str(summary.get("run_id", "")) + ) + payload: dict[str, object] = { + "run_id": ( + _helpers._short_run_id(resolved_run_id) if resolved_run_id else "" + ), + "focus": _FOCUS_REPOSITORY, + "health_scope": _HEALTH_SCOPE_REPOSITORY, + "version": str(summary.get("codeclone_version", __version__)), + "schema": str(summary.get("report_schema_version", "")), + "mode": str(summary.get("analysis_mode", "")), + "baseline": self._summary_baseline_payload(summary), + "metrics_baseline": self._summary_metrics_baseline_payload(summary), + "cache": _helpers._summary_cache_payload(summary), + "inventory": _helpers._summary_inventory_payload(inventory), + "health": _helpers._summary_health_payload(summary), + "findings": self._summary_findings_payload(summary, record=record), + "diff": _helpers._summary_diff_payload(summary), + "warnings": list(_helpers._as_sequence(summary.get("warnings"))), + "failures": list(_helpers._as_sequence(summary.get("failures"))), + } + analysis_profile = _helpers._summary_analysis_profile_payload(summary) + if analysis_profile: + payload["analysis_profile"] = analysis_profile + if record is not None: + coverage_join = _helpers._summary_coverage_join_payload(record) + if coverage_join: + payload["coverage_join"] = coverage_join + return payload + + def _summary_baseline_payload( + self, + summary: Mapping[str, object], + ) -> dict[str, object]: + return _helpers._summary_trusted_state_payload(summary, key="baseline") + + def _summary_metrics_baseline_payload( + self, + summary: Mapping[str, object], + ) -> dict[str, object]: + return _helpers._summary_trusted_state_payload(summary, key="metrics_baseline") + + def _summary_findings_payload( + self, + summary: Mapping[str, object], + *, + record: MCPRunRecord | None, + ) -> dict[str, object]: + findings_summary = _helpers._as_mapping(summary.get("findings_summary")) + if record is None: + return { + "total": _as_int(findings_summary.get("total", 0), 0), + "new": 0, + "known": 0, + "by_family": {}, + "production": 0, + "new_by_source_kind": _helpers._source_kind_breakdown(()), + } + findings = self._base_findings(record) + by_family: dict[str, int] = { + "clones": 0, + "structural": 0, + "dead_code": 0, + "design": 0, + } + new_count = 0 + known_count = 0 + production_count = 0 + new_by_source_kind = _helpers._source_kind_breakdown( + _helpers._finding_source_kind(finding) + for finding in findings + if str(finding.get("novelty", "")).strip() == "new" + ) + for finding in findings: + family = str(finding.get("family", "")).strip() + family_key = "clones" if family == FAMILY_CLONE else family + if family_key in by_family: + by_family[family_key] += 1 + if str(finding.get("novelty", "")).strip() == "new": + new_count += 1 + else: + known_count += 1 + if _helpers._finding_source_kind(finding) == SOURCE_KIND_PRODUCTION: + production_count += 1 + return { + "total": len(findings), + "new": new_count, + "known": known_count, + "by_family": {key: value for key, value in by_family.items() if value > 0}, + "production": production_count, + "new_by_source_kind": new_by_source_kind, + } + + def _metrics_detail_payload( + self, + *, + metrics: Mapping[str, object], + family: MetricsDetailFamily | None, + path: str | None, + offset: int, + limit: int, + ) -> dict[str, object]: + summary = dict(_helpers._as_mapping(metrics.get("summary"))) + families = _helpers._as_mapping(metrics.get("families")) + normalized_path = _helpers._normalize_relative_path(path or "") + if family is None and not normalized_path: + return { + "summary": summary, + "_hint": "Use family and/or path parameters to access per-item detail.", + } + family_names = (family,) if family is not None else tuple(sorted(families)) + items: list[dict[str, object]] = [] + for family_name in family_names: + family_payload = _helpers._as_mapping(families.get(family_name)) + for item in _helpers._as_sequence(family_payload.get("items")): + item_map = _helpers._as_mapping(item) + if normalized_path and not _helpers._metric_item_matches_path( + item_map, + normalized_path, + ): + continue + compact_item = _helpers._compact_metrics_item(item_map) + if family is None: + compact_item = {"family": family_name, **compact_item} + items.append(compact_item) + if family is None: + items.sort( + key=lambda item: ( + str(item.get("family", "")), + str(item.get("path", "")), + str(item.get("qualname", "")), + _as_int(item.get("start_line", 0), 0), + ) + ) + page = paginate(items, offset=offset, limit=limit, max_limit=200) + return { + "family": family, + "path": normalized_path or None, + "offset": page.offset, + "limit": page.limit, + "returned": len(page.items), + "total": page.total, + "has_more": page.next_offset is not None, + "items": page.items, + } + + def _derived_section_payload(self, record: MCPRunRecord) -> dict[str, object]: + derived = _helpers._as_mapping(record.report_document.get("derived")) + if not derived: + raise MCPServiceContractError( + "Report section 'derived' is not available in this run." + ) + suggestions = self._triage_suggestion_rows(record) + canonical_to_short, _ = self._finding_id_maps(record) + hotlists = _helpers._as_mapping(derived.get("hotlists")) + projected_hotlists: dict[str, list[str]] = {} + for hotlist_key, hotlist_ids in hotlists.items(): + projected_hotlists[hotlist_key] = [ + canonical_to_short.get( + str(finding_id), + _helpers._base_short_finding_id(str(finding_id)), + ) + for finding_id in _helpers._as_sequence(hotlist_ids) + if str(finding_id) + ] + return { + "suggestions": suggestions, + "hotlists": projected_hotlists, + } + + +class _MCPSessionReportMixin(_MCPSessionSummaryMixin): + _runs: CodeCloneMCPRunStore + _state_lock: _StateLock + _review_state: dict[str, OrderedDict[str, str | None]] + _last_gate_results: dict[str, dict[str, object]] + _spread_max_cache: dict[str, int] + + def get_run_summary(self, run_id: str | None = None) -> dict[str, object]: + record = self._runs.get(run_id) + return self._summary_payload(record.summary, record=record) + + def compare_runs( + self, + *, + run_id_before: str, + run_id_after: str | None = None, + focus: ComparisonFocus = "all", + ) -> dict[str, object]: + validated_focus = _helpers._validate_choice( + "focus", + focus, + _VALID_COMPARISON_FOCUS, + ) + before = self._runs.get(run_id_before) + after = self._runs.get(run_id_after) + before_findings = self._comparison_index(before, focus=validated_focus) + after_findings = self._comparison_index(after, focus=validated_focus) + before_ids = set(before_findings) + after_ids = set(after_findings) + regressions = sorted(after_ids - before_ids) + improvements = sorted(before_ids - after_ids) + common = before_ids & after_ids + health_before = _helpers._summary_health_score(before.summary) + health_after = _helpers._summary_health_score(after.summary) + comparability = _helpers._comparison_scope(before=before, after=after) + comparable = bool(comparability["comparable"]) + health_delta = ( + health_after - health_before + if comparable and health_before is not None and health_after is not None + else None + ) + verdict = ( + _helpers._comparison_verdict( + regressions=len(regressions), + improvements=len(improvements), + health_delta=health_delta, + ) + if comparable + else "incomparable" + ) + regressions_payload = ( + [ + self._comparison_finding_card( + after, + after_findings[finding_id], + ) + for finding_id in regressions + ] + if comparable + else [] + ) + improvements_payload = ( + [ + self._comparison_finding_card( + before, + before_findings[finding_id], + ) + for finding_id in improvements + ] + if comparable + else [] + ) + payload: dict[str, object] = { + "before": { + "run_id": _helpers._short_run_id(before.run_id), + "health": health_before, + }, + "after": { + "run_id": _helpers._short_run_id(after.run_id), + "health": health_after, + }, + "comparable": comparable, + "health_delta": health_delta, + "verdict": verdict, + "regressions": regressions_payload, + "improvements": improvements_payload, + "unchanged": len(common) if comparable else None, + "summary": _helpers._comparison_summary_text( + comparable=comparable, + comparability_reason=str(comparability["reason"]), + regressions=len(regressions), + improvements=len(improvements), + health_delta=health_delta, + ), + } + if not comparable: + payload["reason"] = comparability["reason"] + return payload + + +class _MCPSessionStateMixin(_MCPSessionReportMixin): + _runs: CodeCloneMCPRunStore + _state_lock: _StateLock + _review_state: dict[str, OrderedDict[str, str | None]] + _last_gate_results: dict[str, dict[str, object]] + _spread_max_cache: dict[str, int] + + def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: + record = self._runs.get(request.run_id) + gate_result = self._evaluate_gate_snapshot(record=record, request=request) + result = { + "run_id": _helpers._short_run_id(record.run_id), + "would_fail": gate_result.exit_code != 0, + "exit_code": gate_result.exit_code, + "reasons": list(gate_result.reasons), + "config": { + "fail_on_new": request.fail_on_new, + "fail_threshold": request.fail_threshold, + "fail_complexity": request.fail_complexity, + "fail_coupling": request.fail_coupling, + "fail_cohesion": request.fail_cohesion, + "fail_cycles": request.fail_cycles, + "fail_dead_code": request.fail_dead_code, + "fail_health": request.fail_health, + "fail_on_new_metrics": request.fail_on_new_metrics, + "fail_on_typing_regression": request.fail_on_typing_regression, + "fail_on_docstring_regression": request.fail_on_docstring_regression, + "fail_on_api_break": request.fail_on_api_break, + "fail_on_untested_hotspots": request.fail_on_untested_hotspots, + "min_typing_coverage": request.min_typing_coverage, + "min_docstring_coverage": request.min_docstring_coverage, + "coverage_min": request.coverage_min, + }, + } + with self._state_lock: + self._last_gate_results[record.run_id] = dict(result) + return result + + def _evaluate_gate_snapshot( + self, + *, + record: MCPRunRecord, + request: MCPGateRequest, + ) -> GatingResult: + if request.fail_on_untested_hotspots: + if record.coverage_join is None: + raise MCPServiceContractError( + "Coverage gating requires a run created with coverage_xml." + ) + if record.coverage_join.status != "ok": + detail = record.coverage_join.invalid_reason or "invalid coverage input" + raise MCPServiceContractError( + "Coverage gating requires a valid Cobertura XML input. " + f"Reason: {detail}" + ) + return _evaluate_report_gates( + report_document=record.report_document, + config=MetricGateConfig( + fail_complexity=request.fail_complexity, + fail_coupling=request.fail_coupling, + fail_cohesion=request.fail_cohesion, + fail_cycles=request.fail_cycles, + fail_dead_code=request.fail_dead_code, + fail_health=request.fail_health, + fail_on_new_metrics=request.fail_on_new_metrics, + fail_on_typing_regression=request.fail_on_typing_regression, + fail_on_docstring_regression=request.fail_on_docstring_regression, + fail_on_api_break=request.fail_on_api_break, + fail_on_untested_hotspots=request.fail_on_untested_hotspots, + min_typing_coverage=request.min_typing_coverage, + min_docstring_coverage=request.min_docstring_coverage, + coverage_min=request.coverage_min, + fail_on_new=request.fail_on_new, + fail_threshold=request.fail_threshold, + ), + baseline_status=str( + _helpers._as_mapping( + _helpers._as_mapping(record.report_document.get("meta")).get( + "baseline" + ) + ).get("status", "") + ), + metrics_diff=record.metrics_diff, + clone_new_count=len(record.new_func) + len(record.new_block), + clone_total=record.func_clones_count + record.block_clones_count, + ) + + def get_report_section( + self, + *, + run_id: str | None = None, + section: ReportSection = "all", + family: MetricsDetailFamily | None = None, + path: str | None = None, + offset: int = 0, + limit: int = 50, + ) -> dict[str, object]: + validated_section = _helpers._validate_choice( + "section", + section, + _VALID_REPORT_SECTIONS, + ) + record = self._runs.get(run_id) + report_document = record.report_document + if validated_section == "all": + return dict(report_document) + if validated_section == "changed": + if record.changed_projection is None: + raise MCPServiceContractError( + "Report section 'changed' is not available in this run." + ) + return dict(record.changed_projection) + if validated_section == "metrics": + metrics = _helpers._as_mapping(report_document.get("metrics")) + return {"summary": dict(_helpers._as_mapping(metrics.get("summary")))} + if validated_section == "metrics_detail": + metrics = _helpers._as_mapping(report_document.get("metrics")) + if not metrics: + raise MCPServiceContractError( + "Report section 'metrics_detail' is not available in this run." + ) + validated_family_input = _helpers._validate_optional_choice( + "family", + family, + _VALID_METRICS_DETAIL_FAMILIES, + ) + normalized_family = ( + _METRICS_DETAIL_FAMILY_ALIASES.get( + str(validated_family_input), + str(validated_family_input), + ) + if validated_family_input is not None + else None + ) + validated_family = _helpers._metrics_detail_family(normalized_family) + return self._metrics_detail_payload( + metrics=metrics, + family=validated_family, + path=path, + offset=offset, + limit=limit, + ) + if validated_section == "derived": + return self._derived_section_payload(record) + payload = report_document.get(validated_section) + if not isinstance(payload, Mapping): + raise MCPServiceContractError( + f"Report section '{validated_section}' is not available in this run." + ) + return dict(payload) + + def get_production_triage( + self, + *, + run_id: str | None = None, + max_hotspots: int = 3, + max_suggestions: int = 3, + ) -> dict[str, object]: + record = self._runs.get(run_id) + summary = self._summary_payload(record.summary, record=record) + findings = self._base_findings(record) + findings_breakdown = _helpers._source_kind_breakdown( + _helpers._finding_source_kind(finding) for finding in findings + ) + suggestion_rows = self._triage_suggestion_rows(record) + suggestion_breakdown = _helpers._source_kind_breakdown( + row.get("source_kind") for row in suggestion_rows + ) + hotspot_limit = max(1, min(max_hotspots, 10)) + suggestion_limit = max(1, min(max_suggestions, 10)) + production_hotspots = self._hotspot_rows( + record=record, + kind="production_hotspots", + detail_level="summary", + changed_paths=(), + exclude_reviewed=False, + ) + production_suggestions = [ + dict(row) + for row in suggestion_rows + if str(row.get("source_kind", "")) == SOURCE_KIND_PRODUCTION + ] + payload: dict[str, object] = { + "run_id": _helpers._short_run_id(record.run_id), + "focus": _FOCUS_PRODUCTION, + "health_scope": _HEALTH_SCOPE_REPOSITORY, + "baseline": dict(_helpers._as_mapping(summary.get("baseline"))), + "health": dict(_helpers._summary_health_payload(summary)), + "cache": dict(_helpers._as_mapping(summary.get("cache"))), + "findings": { + "total": len(findings), + "by_source_kind": findings_breakdown, + "new_by_source_kind": dict( + _helpers._as_mapping( + _helpers._as_mapping(summary.get("findings")).get( + "new_by_source_kind" + ) + ) + ), + "outside_focus": len(findings) + - findings_breakdown[SOURCE_KIND_PRODUCTION], + }, + "top_hotspots": { + "kind": "production_hotspots", + "available": len(production_hotspots), + "returned": min(len(production_hotspots), hotspot_limit), + "items": [ + dict(_helpers._as_mapping(item)) + for item in production_hotspots[:hotspot_limit] + ], + }, + "suggestions": { + "total": len(suggestion_rows), + "by_source_kind": suggestion_breakdown, + "outside_focus": len(suggestion_rows) + - suggestion_breakdown[SOURCE_KIND_PRODUCTION], + }, + "top_suggestions": { + "available": len(production_suggestions), + "returned": min(len(production_suggestions), suggestion_limit), + "items": production_suggestions[:suggestion_limit], + }, + } + analysis_profile = _helpers._summary_analysis_profile_payload(summary) + if analysis_profile: + payload["analysis_profile"] = analysis_profile + coverage_join = _helpers._summary_coverage_join_payload(record) + if coverage_join: + payload["coverage_join"] = coverage_join + return payload + + def get_help( + self, + *, + topic: HelpTopic, + detail: HelpDetail = "compact", + ) -> dict[str, object]: + validated_topic = _helpers._validate_choice("topic", topic, _VALID_HELP_TOPICS) + validated_detail = _helpers._validate_choice( + "detail", + detail, + _VALID_HELP_DETAILS, + ) + spec = _HELP_TOPIC_SPECS[validated_topic] + payload: dict[str, object] = { + "topic": validated_topic, + "detail": validated_detail, + "summary": spec.summary, + "key_points": list(spec.key_points), + "recommended_tools": list(spec.recommended_tools), + "doc_links": [ + {"title": title, "url": url} for title, url in spec.doc_links + ], + } + if validated_detail == "normal": + if spec.warnings: + payload["warnings"] = list(spec.warnings) + if spec.anti_patterns: + payload["anti_patterns"] = list(spec.anti_patterns) + return payload + + def generate_pr_summary( + self, + *, + run_id: str | None = None, + changed_paths: tuple[str, ...] = (), + git_diff_ref: str | None = None, + format: PRSummaryFormat = "markdown", + ) -> dict[str, object]: + output_format = _helpers._validate_choice( + "format", + format, + _VALID_PR_SUMMARY_FORMATS, + ) + record = self._runs.get(run_id) + paths_filter = self._resolve_query_changed_paths( + record=record, + changed_paths=changed_paths, + git_diff_ref=git_diff_ref, + prefer_record_paths=True, + ) + changed_items = self._query_findings( + record=record, + detail_level="summary", + changed_paths=paths_filter, + ) + previous = self._previous_run_for_root(record) + resolved: list[dict[str, object]] = [] + if previous is not None: + compare_payload = self.compare_runs( + run_id_before=previous.run_id, + run_id_after=record.run_id, + focus="all", + ) + resolved = _helpers._dict_rows(compare_payload.get("improvements")) + with self._state_lock: + gate_result = dict( + self._last_gate_results.get( + record.run_id, + {"would_fail": False, "reasons": []}, + ) + ) + verdict = _helpers._changed_verdict( + changed_projection={ + "total": len(changed_items), + "new": sum( + 1 for item in changed_items if str(item.get("novelty", "")) == "new" + ), + }, + health_delta=_helpers._summary_health_delta(record.summary), + ) + payload: dict[str, object] = { + "run_id": _helpers._short_run_id(record.run_id), + "changed_files": len(paths_filter), + "health": _helpers._summary_health_payload(record.summary), + "health_delta": _helpers._summary_health_delta(record.summary), + "verdict": verdict, + "new_findings_in_changed_files": changed_items, + "resolved": resolved, + "blocking_gates": _helpers._string_rows(gate_result.get("reasons")), + } + if output_format == "json": + return payload + return { + "run_id": _helpers._short_run_id(record.run_id), + "format": output_format, + "content": _helpers._render_pr_summary_markdown(payload), + } + + def clear_session_runs(self) -> dict[str, object]: + removed_run_ids = self._runs.clear() + with self._state_lock: + cleared_review_entries = sum( + len(entries) for entries in self._review_state.values() + ) + cleared_gate_results = len(self._last_gate_results) + cleared_spread_cache_entries = len(self._spread_max_cache) + self._review_state.clear() + self._last_gate_results.clear() + self._spread_max_cache.clear() + return { + "cleared_runs": len(removed_run_ids), + "cleared_run_ids": [ + _helpers._short_run_id(run_id) for run_id in removed_run_ids + ], + "cleared_review_entries": cleared_review_entries, + "cleared_gate_results": cleared_gate_results, + "cleared_spread_cache_entries": cleared_spread_cache_entries, + } + + def read_resource(self, uri: str) -> str: + if uri == "codeclone://schema": + return _json_text_payload(_helpers._schema_resource_payload()) + if uri == "codeclone://latest/triage": + latest = self._runs.get() + return _json_text_payload(self.get_production_triage(run_id=latest.run_id)) + latest_prefix = "codeclone://latest/" + run_prefix = "codeclone://runs/" + if uri.startswith(latest_prefix): + latest = self._runs.get() + suffix = uri[len(latest_prefix) :] + return self._render_resource(latest, suffix) + if not uri.startswith(run_prefix): + raise MCPServiceContractError(f"Unsupported CodeClone resource URI: {uri}") + remainder = uri[len(run_prefix) :] + run_id, sep, suffix = remainder.partition("/") + if not sep: + raise MCPServiceContractError(f"Unsupported CodeClone resource URI: {uri}") + record = self._runs.get(run_id) + return self._render_resource(record, suffix) + + def _render_resource(self, record: MCPRunRecord, suffix: str) -> str: + if suffix == "summary": + return _json_text_payload( + self._summary_payload(record.summary, record=record) + ) + if suffix == "triage": + raise MCPServiceContractError( + "Production triage is exposed only as codeclone://latest/triage." + ) + if suffix == "health": + return _json_text_payload(_helpers._summary_health_payload(record.summary)) + if suffix == "gates": + with self._state_lock: + gate_result = self._last_gate_results.get(record.run_id) + if gate_result is None: + raise MCPServiceContractError( + "No gate evaluation result is available in this MCP session." + ) + return _json_text_payload(gate_result) + if suffix == "changed": + if record.changed_projection is None: + raise MCPServiceContractError( + "Changed-findings projection is not available in this run." + ) + return _json_text_payload(record.changed_projection) + if suffix == "schema": + return _json_text_payload(_helpers._schema_resource_payload()) + if suffix == "report.json": + return _json_text_payload(record.report_document, sort_keys=False) + if suffix == "overview": + return _json_text_payload( + self.list_hotspots(kind="highest_spread", run_id=record.run_id) + ) + finding_prefix = "findings/" + if suffix.startswith(finding_prefix): + finding_id = suffix[len(finding_prefix) :] + return _json_text_payload( + self._service_get_finding( + run_id=record.run_id, + finding_id=finding_id, + ) + ) + raise MCPServiceContractError( + f"Unsupported CodeClone resource suffix '{suffix}'." + ) + + def _prune_session_state(self) -> None: + active_run_ids = {record.run_id for record in self._runs.records()} + with self._state_lock: + for state_map in ( + self._review_state, + self._last_gate_results, + self._spread_max_cache, + ): + stale_run_ids = [ + run_id for run_id in state_map if run_id not in active_run_ids + ] + for run_id in stale_run_ids: + state_map.pop(run_id, None) diff --git a/codeclone/surfaces/mcp/session.py b/codeclone/surfaces/mcp/session.py index 2a11f09..3de4ce2 100644 --- a/codeclone/surfaces/mcp/session.py +++ b/codeclone/surfaces/mcp/session.py @@ -6,1079 +6,76 @@ from __future__ import annotations -import hashlib -import subprocess -from argparse import Namespace -from collections import OrderedDict -from collections.abc import Iterable, Mapping, Sequence -from dataclasses import dataclass -from json import JSONDecodeError -from pathlib import Path -from threading import RLock -from typing import Final, Literal, TypeVar - -import orjson - -from ... import __version__ -from ...baseline import Baseline -from ...cache.store import Cache -from ...cache.versioning import CacheStatus -from ...config.pyproject_loader import ( - ConfigValidationError, - load_pyproject_config, +from ...cache.store import resolve_cache_status +from ...report.meta import build_report_meta as _build_report_meta +from ...report.meta import current_report_timestamp_utc as _current_report_timestamp_utc +from . import _session_helpers as _helpers +from ._session_baseline import ( + resolve_clone_baseline_state, + resolve_metrics_baseline_state, ) -from ...config.spec import ( - DEFAULT_BASELINE_PATH, +from ._session_shared import ( + _REPORT_DUMMY_PATH, DEFAULT_BLOCK_MIN_LOC, DEFAULT_BLOCK_MIN_STMT, - DEFAULT_MAX_BASELINE_SIZE_MB, - DEFAULT_MAX_CACHE_SIZE_MB, + DEFAULT_MCP_HISTORY_LIMIT, DEFAULT_MIN_LOC, DEFAULT_MIN_STMT, - DEFAULT_SEGMENT_MIN_LOC, - DEFAULT_SEGMENT_MIN_STMT, -) -from ...contracts import ( - DEFAULT_COVERAGE_MIN, - DEFAULT_JSON_REPORT_PATH, DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - DOCS_URL, - REPORT_SCHEMA_VERSION, -) -from ...core._types import OutputPaths -from ...core.bootstrap import bootstrap -from ...core.discovery import discover -from ...core.parallelism import process -from ...core.pipeline import analyze -from ...core.reporting import report -from ...domain.findings import ( - CATEGORY_CLONE, - CATEGORY_COHESION, - CATEGORY_COMPLEXITY, - CATEGORY_COUPLING, - CATEGORY_DEAD_CODE, - CATEGORY_DEPENDENCY, - CATEGORY_STRUCTURAL, - CLONE_KIND_SEGMENT, - FAMILY_CLONE, - FAMILY_CLONES, - FAMILY_DEAD_CODE, - FAMILY_DESIGN, - FAMILY_STRUCTURAL, -) -from ...domain.quality import ( - CONFIDENCE_HIGH, - CONFIDENCE_LOW, - CONFIDENCE_MEDIUM, - EFFORT_EASY, - EFFORT_HARD, - EFFORT_MODERATE, - SEVERITY_CRITICAL, - SEVERITY_INFO, - SEVERITY_WARNING, -) -from ...domain.source_scope import ( - SOURCE_KIND_FIXTURES, - SOURCE_KIND_MIXED, - SOURCE_KIND_ORDER, - SOURCE_KIND_OTHER, - SOURCE_KIND_PRODUCTION, - SOURCE_KIND_TESTS, -) -from ...findings.ids import ( - clone_group_id, - dead_code_group_id, - design_group_id, - structural_group_id, -) -from ...models import CoverageJoinResult, MetricsDiff, ProjectMetrics, Suggestion -from ...report.gates.evaluator import GateResult as GatingResult -from ...report.gates.evaluator import MetricGateConfig -from ...report.gates.evaluator import evaluate_gates as _evaluate_report_gates -from ...report.gates.evaluator import summarize_metrics_diff as _summarize_metrics_diff -from ...utils.coerce import as_float as _as_float -from ...utils.coerce import as_int as _as_int -from ...utils.git_diff import validate_git_diff_ref -from ..cli.baseline_state import ( - CloneBaselineState, - MetricsBaselineState, - probe_metrics_baseline_section, - resolve_clone_baseline_state, - resolve_metrics_baseline_state, -) -from ..cli.report_meta import _build_report_meta, _current_report_timestamp_utc -from ..cli.runtime import ( - resolve_cache_path, - resolve_cache_status, - validate_numeric_args, + DEFAULT_SEGMENT_MIN_LOC, + DEFAULT_SEGMENT_MIN_STMT, + MAX_MCP_HISTORY_LIMIT, + AnalysisMode, + Baseline, + CachePolicy, + CacheStatus, + CodeCloneMCPRunStore, + DetailLevel, + MCPAnalysisRequest, + MCPFindingNotFoundError, + MCPGateRequest, + MCPGitDiffError, + MCPRunNotFoundError, + MCPRunRecord, + MCPServiceContractError, + MCPServiceError, + OrderedDict, + OutputPaths, + RLock, + __version__, + _as_int, + _BufferConsole, + _validated_history_limit, + analyze, + bootstrap, + discover, + process, + report, ) -from .payloads import paginate, resolve_finding_id, short_id - -AnalysisMode = Literal["full", "clones_only"] -CachePolicy = Literal["reuse", "refresh", "off"] -FreshnessKind = Literal["fresh", "mixed", "reused"] -HotlistKind = Literal[ - "most_actionable", - "highest_spread", - "highest_priority", - "production_hotspots", - "test_fixture_hotspots", -] -FindingFamilyFilter = Literal["all", "clone", "structural", "dead_code", "design"] -FindingNoveltyFilter = Literal["all", "new", "known"] -FindingSort = Literal["default", "priority", "severity", "spread"] -DetailLevel = Literal["summary", "normal", "full"] -ComparisonFocus = Literal["all", "clones", "structural", "metrics"] -PRSummaryFormat = Literal["markdown", "json"] -HelpTopic = Literal[ - "workflow", - "analysis_profile", - "suppressions", - "baseline", - "coverage", - "latest_runs", - "review_state", - "changed_scope", -] -HelpDetail = Literal["compact", "normal"] -MetricsDetailFamily = Literal[ - "complexity", - "coupling", - "cohesion", - "coverage_adoption", - "coverage_join", - "dependencies", - "dead_code", - "api_surface", - "god_modules", - "overloaded_modules", - "health", +from ._session_state_mixin import _MCPSessionStateMixin + +__all__ = [ + "DEFAULT_MCP_HISTORY_LIMIT", + "MAX_MCP_HISTORY_LIMIT", + "AnalysisMode", + "CachePolicy", + "DetailLevel", + "MCPAnalysisRequest", + "MCPFindingNotFoundError", + "MCPGateRequest", + "MCPGitDiffError", + "MCPRunNotFoundError", + "MCPRunRecord", + "MCPServiceContractError", + "MCPServiceError", + "MCPSession", + "_validated_history_limit", ] -ReportSection = Literal[ - "all", - "meta", - "inventory", - "findings", - "metrics", - "metrics_detail", - "derived", - "changed", - "integrity", -] -HealthScope = Literal["repository"] -SummaryFocus = Literal["repository", "production", "changed_paths"] - -_LEGACY_CACHE_PATH = Path("~/.cache/codeclone/cache.json").expanduser() -_REPORT_DUMMY_PATH = Path(DEFAULT_JSON_REPORT_PATH) -_HEALTH_SCOPE_REPOSITORY: Final[HealthScope] = "repository" -_FOCUS_REPOSITORY: Final[SummaryFocus] = "repository" -_FOCUS_PRODUCTION: Final[SummaryFocus] = "production" -_FOCUS_CHANGED_PATHS: Final[SummaryFocus] = "changed_paths" -_MCP_CONFIG_KEYS = frozenset( - { - "min_loc", - "min_stmt", - "block_min_loc", - "block_min_stmt", - "segment_min_loc", - "segment_min_stmt", - "processes", - "cache_path", - "max_cache_size_mb", - "baseline", - "max_baseline_size_mb", - "metrics_baseline", - "api_surface", - "coverage_xml", - "coverage_min", - "golden_fixture_paths", - } -) -_RESOURCE_SECTION_MAP: Final[dict[str, ReportSection]] = { - "report.json": "all", - "summary": "meta", - "health": "metrics", - "changed": "changed", - "overview": "derived", -} -_SEVERITY_WEIGHT: Final[dict[str, float]] = { - SEVERITY_CRITICAL: 1.0, - SEVERITY_WARNING: 0.6, - SEVERITY_INFO: 0.2, -} -_EFFORT_WEIGHT: Final[dict[str, float]] = { - EFFORT_EASY: 1.0, - EFFORT_MODERATE: 0.6, - EFFORT_HARD: 0.3, -} -_NOVELTY_WEIGHT: Final[dict[str, float]] = {"new": 1.0, "known": 0.5} -_RUNTIME_WEIGHT: Final[dict[str, float]] = { - "production": 1.0, - "mixed": 0.8, - "tests": 0.4, - "fixtures": 0.2, - "other": 0.5, -} -_CONFIDENCE_WEIGHT: Final[dict[str, float]] = { - CONFIDENCE_HIGH: 1.0, - CONFIDENCE_MEDIUM: 0.7, - CONFIDENCE_LOW: 0.3, -} -# Canonical report groups use FAMILY_CLONES ("clones"), while individual finding -# payloads use FAMILY_CLONE ("clone"). -_VALID_ANALYSIS_MODES = frozenset({"full", "clones_only"}) -_VALID_CACHE_POLICIES = frozenset({"reuse", "refresh", "off"}) -_VALID_FINDING_FAMILIES = frozenset( - {"all", "clone", "structural", "dead_code", "design"} -) -_VALID_FINDING_NOVELTY = frozenset({"all", "new", "known"}) -_VALID_FINDING_SORT = frozenset({"default", "priority", "severity", "spread"}) -_VALID_DETAIL_LEVELS = frozenset({"summary", "normal", "full"}) -_VALID_COMPARISON_FOCUS = frozenset({"all", "clones", "structural", "metrics"}) -_VALID_PR_SUMMARY_FORMATS = frozenset({"markdown", "json"}) -_VALID_HELP_TOPICS = frozenset( - { - "workflow", - "analysis_profile", - "suppressions", - "baseline", - "coverage", - "latest_runs", - "review_state", - "changed_scope", - } -) -_VALID_HELP_DETAILS = frozenset({"compact", "normal"}) -DEFAULT_MCP_HISTORY_LIMIT = 4 -MAX_MCP_HISTORY_LIMIT = 10 -_VALID_REPORT_SECTIONS = frozenset( - { - "all", - "meta", - "inventory", - "findings", - "metrics", - "metrics_detail", - "derived", - "changed", - "integrity", - } -) -_VALID_HOTLIST_KINDS = frozenset( - { - "most_actionable", - "highest_spread", - "highest_priority", - "production_hotspots", - "test_fixture_hotspots", - } -) -_VALID_SEVERITIES = frozenset({SEVERITY_CRITICAL, SEVERITY_WARNING, SEVERITY_INFO}) -_SOURCE_KIND_BREAKDOWN_ORDER: Final[tuple[str, ...]] = ( - SOURCE_KIND_PRODUCTION, - SOURCE_KIND_TESTS, - SOURCE_KIND_FIXTURES, - SOURCE_KIND_MIXED, - SOURCE_KIND_OTHER, -) -_COMPACT_ITEM_PATH_KEYS: Final[frozenset[str]] = frozenset( - {"relative_path", "path", "filepath", "file"} -) -_COMPACT_ITEM_EMPTY_VALUES: Final[tuple[object, ...]] = ("", None, [], {}, ()) -_HOTLIST_REPORT_KEYS: Final[dict[str, str]] = { - "most_actionable": "most_actionable_ids", - "highest_spread": "highest_spread_ids", - "production_hotspots": "production_hotspot_ids", - "test_fixture_hotspots": "test_fixture_hotspot_ids", -} -_CHECK_TO_DIMENSION: Final[dict[str, str]] = { - "cohesion": "cohesion", - "coupling": "coupling", - "dead_code": "dead_code", - "complexity": "complexity", - "clones": "clones", -} -_DESIGN_CHECK_CONTEXT: Final[dict[str, dict[str, object]]] = { - "complexity": { - "category": CATEGORY_COMPLEXITY, - "metric": "cyclomatic_complexity", - "operator": ">", - "default_threshold": DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, - }, - "coupling": { - "category": CATEGORY_COUPLING, - "metric": "cbo", - "operator": ">", - "default_threshold": DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - }, - "cohesion": { - "category": CATEGORY_COHESION, - "metric": "lcom4", - "operator": ">=", - "default_threshold": DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, - }, -} -_VALID_METRICS_DETAIL_FAMILIES = frozenset( - { - "complexity", - "coupling", - "cohesion", - "coverage_adoption", - "coverage_join", - "dependencies", - "dead_code", - "api_surface", - "god_modules", - "overloaded_modules", - "health", - } -) -_METRICS_DETAIL_FAMILY_ALIASES: Final[dict[str, str]] = { - "god_modules": "overloaded_modules", -} -_SHORT_RUN_ID_LENGTH = 8 -_SHORT_HASH_ID_LENGTH = 6 -ChoiceT = TypeVar("ChoiceT", bound=str) - - -@dataclass(frozen=True) -class MCPHelpTopicSpec: - summary: str - key_points: tuple[str, ...] - recommended_tools: tuple[str, ...] - doc_links: tuple[tuple[str, str], ...] - warnings: tuple[str, ...] = () - anti_patterns: tuple[str, ...] = () - - -_MCP_BOOK_URL: Final = f"{DOCS_URL}book/" -_MCP_GUIDE_URL: Final = f"{DOCS_URL}mcp/" -_MCP_INTERFACE_DOC_LINK: Final[tuple[str, str]] = ( - "MCP interface contract", - f"{_MCP_BOOK_URL}20-mcp-interface/", -) -_BASELINE_DOC_LINK: Final[tuple[str, str]] = ( - "Baseline contract", - f"{_MCP_BOOK_URL}06-baseline/", -) -_CONFIG_DOC_LINK: Final[tuple[str, str]] = ( - "Config and defaults", - f"{_MCP_BOOK_URL}04-config-and-defaults/", -) -_REPORT_DOC_LINK: Final[tuple[str, str]] = ( - "Report contract", - f"{_MCP_BOOK_URL}08-report/", -) -_CLI_DOC_LINK: Final[tuple[str, str]] = ( - "CLI contract", - f"{_MCP_BOOK_URL}09-cli/", -) -_PIPELINE_DOC_LINK: Final[tuple[str, str]] = ( - "Core pipeline", - f"{_MCP_BOOK_URL}05-core-pipeline/", -) -_SUPPRESSIONS_DOC_LINK: Final[tuple[str, str]] = ( - "Inline suppressions contract", - f"{_MCP_BOOK_URL}19-inline-suppressions/", -) -_MCP_GUIDE_DOC_LINK: Final[tuple[str, str]] = ("MCP usage guide", _MCP_GUIDE_URL) -_HELP_TOPIC_SPECS: Final[dict[str, MCPHelpTopicSpec]] = { - "workflow": MCPHelpTopicSpec( - summary=( - "CodeClone MCP is triage-first and budget-aware. Start with a " - "summary or production triage, then narrow through hotspots or " - "focused checks before opening one finding in detail." - ), - key_points=( - "Recommended first pass: analyze_repository or analyze_changed_paths.", - ( - "Start with default or pyproject-resolved thresholds; lower them " - "only for an explicit higher-sensitivity follow-up pass." - ), - ( - "Use get_run_summary or get_production_triage before broad " - "finding listing." - ), - ( - "Prefer list_hotspots or focused check_* tools over " - "list_findings on noisy repositories." - ), - ("Use get_finding and get_remediation only after selecting an issue."), - ( - "get_report_section(section='all') is an exception path, not " - "a default first step." - ), - ), - recommended_tools=( - "analyze_repository", - "analyze_changed_paths", - "get_run_summary", - "get_production_triage", - "list_hotspots", - "check_clones", - "check_dead_code", - "get_finding", - "get_remediation", - ), - doc_links=(_MCP_INTERFACE_DOC_LINK, _MCP_GUIDE_DOC_LINK), - warnings=( - ( - "Broad list_findings calls burn context quickly on large or " - "noisy repositories." - ), - ( - "Prefer generate_pr_summary(format='markdown') unless machine " - "JSON is explicitly required." - ), - ), - anti_patterns=( - "Starting exploration with list_findings on a noisy repository.", - "Using get_report_section(section='all') as the default first step.", - ( - "Escalating detail on larger lists instead of opening one " - "finding with get_finding." - ), - ), - ), - "analysis_profile": MCPHelpTopicSpec( - summary=( - "CodeClone default analysis is intentionally conservative: stable " - "first-pass review, baseline-aware governance, and CI-friendly " - "signal over maximum local sensitivity." - ), - key_points=( - ( - "Default thresholds are intentionally conservative and " - "production-friendly." - ), - ( - "A clean default run does not rule out smaller local " - "duplication or repetition." - ), - ( - "Lowering thresholds increases sensitivity and can surface " - "smaller functions, tighter windows, and finer local signals." - ), - ( - "Lower-threshold runs are best for exploratory local review, " - "not as a silent replacement for the default governance profile." - ), - "Interpret results in the context of the active threshold profile.", - ), - recommended_tools=( - "analyze_repository", - "analyze_changed_paths", - "get_run_summary", - "compare_runs", - ), - doc_links=( - _CONFIG_DOC_LINK, - _PIPELINE_DOC_LINK, - _MCP_INTERFACE_DOC_LINK, - ), - warnings=( - ( - "Do not treat a default-threshold run as proof that no smaller " - "local clone or repetition exists." - ), - ( - "Lower-threshold runs usually increase noise and should be read " - "as higher-sensitivity exploratory passes." - ), - "Run comparisons are most meaningful when profiles are aligned.", - ), - anti_patterns=( - ( - "Assuming a clean default pass means no finer-grained " - "duplication exists anywhere in the repository." - ), - ( - "Lowering thresholds for exploration and then interpreting the " - "result as if it had the same meaning as the conservative " - "default pass." - ), - ( - "Mixing low-threshold exploratory output into baseline or CI " - "reasoning without acknowledging the profile change." - ), - ), - ), - "suppressions": MCPHelpTopicSpec( - summary=( - "CodeClone supports explicit inline suppressions for selected " - "findings. They are local policy, not analysis truth, and should " - "stay narrow and declaration-scoped." - ), - key_points=( - "Current syntax uses codeclone: ignore[rule-id,...].", - "Binding is declaration-scoped: def, async def, or class.", - ( - "Supported placement is the previous line or inline on the " - "declaration or header line." - ), - ( - "Suppressions are target-specific and do not imply file-wide " - "or cascading scope." - ), - ( - "Use suppressions for accepted dynamic or runtime false " - "positives, not to hide broad classes of debt." - ), - ), - recommended_tools=("get_finding", "get_remediation"), - doc_links=(_SUPPRESSIONS_DOC_LINK, _MCP_INTERFACE_DOC_LINK), - warnings=( - ( - "MCP explains suppression semantics but never creates or " - "updates suppressions." - ), - ), - anti_patterns=( - "Treating suppressions as file-wide or inherited state.", - ( - "Using suppressions to hide broad structural debt instead of " - "accepted false positives." - ), - ), - ), - "baseline": MCPHelpTopicSpec( - summary=( - "A baseline is CodeClone's accepted comparison snapshot for clones " - "and optional metrics. It separates known debt from new regressions " - "and is trust-checked before use." - ), - key_points=( - ( - "Canonical baseline schema is v2.0 with meta and clone keys; " - "metrics may be embedded for unified flows." - ), - ( - "Compatibility depends on generator identity, supported " - "schema version, fingerprint version, python tag, and payload " - "integrity." - ), - ( - "Known means already present in the trusted baseline; new " - "means not accepted by baseline." - ), - ( - "In CI and gating contexts, untrusted baseline states are " - "contract errors rather than soft warnings." - ), - "MCP is read-only and does not update or rewrite baselines.", - ), - recommended_tools=("get_run_summary", "evaluate_gates", "compare_runs"), - doc_links=(_BASELINE_DOC_LINK,), - warnings=( - "Baseline trust semantics directly affect new-vs-known classification.", - ), - anti_patterns=( - "Treating baseline as mutable MCP session state.", - "Assuming an untrusted baseline is only cosmetic in CI contexts.", - ), - ), - "coverage": MCPHelpTopicSpec( - summary=( - "Coverage join is an external current-run signal: CodeClone reads " - "an existing Cobertura XML report and joins line hits to risky " - "function spans." - ), - key_points=( - "Use Cobertura XML such as `coverage xml` output from coverage.py.", - "Coverage join does not become baseline truth and does not affect health.", - ( - "Coverage hotspot gating is current-run only and focuses on " - "medium/high-risk functions measured below the configured " - "threshold." - ), - ( - "Functions missing from the supplied coverage.xml are surfaced " - "as scope gaps, not labeled as untested." - ), - "Use metrics_detail(family='coverage_join') for bounded drill-down.", - ), - recommended_tools=( - "analyze_repository", - "analyze_changed_paths", - "get_run_summary", - "get_report_section", - "evaluate_gates", - ), - doc_links=( - _MCP_INTERFACE_DOC_LINK, - _CLI_DOC_LINK, - _REPORT_DOC_LINK, - ), - warnings=( - "Coverage join is only as accurate as the external XML path mapping.", - "It does not infer branch coverage and does not execute tests.", - "Use fail-on-untested-hotspots only with a valid joined coverage input.", - ), - anti_patterns=( - "Treating missing coverage XML as zero coverage without stating it.", - "Reading coverage join as a baseline-aware trend signal.", - "Assuming dynamic runtime dispatch is visible through a static line join.", - ), - ), - "latest_runs": MCPHelpTopicSpec( - summary=( - "latest/* resources point to the most recent analysis run in the " - "current MCP session. They are convenience handles, not persistent " - "truth anchors." - ), - key_points=( - "Run history is in-memory only and bounded by history-limit.", - "The latest pointer moves when a newer analyze_* call registers a run.", - "A fresh repository state requires a fresh analyze run.", - ( - "Short run ids are convenience handles derived from canonical " - "run identity." - ), - ( - "Do not assume latest/* is globally current outside the " - "active MCP session." - ), - ), - recommended_tools=( - "analyze_repository", - "analyze_changed_paths", - "get_run_summary", - "compare_runs", - ), - doc_links=(_MCP_INTERFACE_DOC_LINK, _MCP_GUIDE_DOC_LINK), - warnings=( - ( - "latest/* can point at a different repository after a later " - "analyze call in the same session." - ), - ), - anti_patterns=( - ( - "Assuming latest/* remains tied to one repository across the " - "whole client session." - ), - ( - "Using latest/* as a substitute for starting a fresh run when " - "freshness matters." - ), - ), - ), - "review_state": MCPHelpTopicSpec( - summary=( - "Reviewed state in MCP is session-local workflow state. It helps " - "long sessions track review progress without modifying canonical " - "findings, baseline, or persisted artifacts." - ), - key_points=( - "Review markers are in-memory only.", - "They do not change report truth, finding identity, or CI semantics.", - "They are useful for triage workflows across long sessions.", - ( - "They should not be interpreted as acceptance, suppression, " - "or baseline update." - ), - ), - recommended_tools=( - "list_hotspots", - "get_finding", - "mark_finding_reviewed", - "list_reviewed_findings", - ), - doc_links=(_MCP_INTERFACE_DOC_LINK, _MCP_GUIDE_DOC_LINK), - warnings=( - "Reviewed markers disappear when the MCP session is cleared or restarted.", - ), - anti_patterns=( - "Treating reviewed state as a persistent acceptance signal.", - "Assuming reviewed findings are removed from canonical report truth.", - ), - ), - "changed_scope": MCPHelpTopicSpec( - summary=( - "Changed-scope analysis narrows review to findings that touch a " - "selected change set. It is for PR and patch review, not a " - "replacement for full canonical analysis." - ), - key_points=( - ( - "Use analyze_changed_paths with explicit changed_paths or " - "git_diff_ref for review-focused runs." - ), - ( - "Start with the same conservative profile as the default " - "review, then lower thresholds only when you explicitly want " - "a higher-sensitivity changed-files pass." - ), - ( - "Changed-scope is best for asking what new issues touch " - "modified files and whether anything should block CI." - ), - "Prefer production triage and hotspot views before broad listing.", - "If repository-wide truth is needed, run full analysis first.", - ), - recommended_tools=( - "analyze_changed_paths", - "get_run_summary", - "get_production_triage", - "evaluate_gates", - "generate_pr_summary", - ), - doc_links=(_MCP_INTERFACE_DOC_LINK, _MCP_GUIDE_DOC_LINK), - warnings=( - ( - "Changed-scope narrows review focus; it does not replace the " - "full canonical report for repository-wide truth." - ), - ), - anti_patterns=( - "Using changed-scope as if it were the only source of repository truth.", - ( - "Starting changed-files review with broad listing instead of " - "compact triage." - ), - ), - ), -} - - -def _suggestion_finding_id_payload(suggestion: object) -> str: - if not hasattr(suggestion, "finding_family"): - return "" - family = str(getattr(suggestion, "finding_family", "")).strip() - if family == FAMILY_CLONES: - kind = str(getattr(suggestion, "finding_kind", "")).strip() - subject_key = str(getattr(suggestion, "subject_key", "")).strip() - return clone_group_id(kind or CLONE_KIND_SEGMENT, subject_key) - if family == FAMILY_STRUCTURAL: - return structural_group_id( - str(getattr(suggestion, "finding_kind", "")).strip() or CATEGORY_STRUCTURAL, - str(getattr(suggestion, "subject_key", "")).strip(), - ) - category = str(getattr(suggestion, "category", "")).strip() - subject_key = str(getattr(suggestion, "subject_key", "")).strip() - if category == CATEGORY_DEAD_CODE: - return dead_code_group_id(subject_key) - return design_group_id( - category, - subject_key or str(getattr(suggestion, "title", "")), - ) - - -@dataclass(frozen=True, slots=True) -class _CloneShortIdEntry: - canonical_id: str - alias: str - token: str - suffix: str - - def render(self, prefix_length: int) -> str: - if prefix_length <= 0: - prefix_length = len(self.token) - return f"{self.alias}:{self.token[:prefix_length]}{self.suffix}" - - -def _partitioned_short_id(alias: str, remainder: str) -> str: - first, _, rest = remainder.partition(":") - return f"{alias}:{first}:{rest}" if rest else f"{alias}:{first}" - - -def _clone_short_id_entry_payload(canonical_id: str) -> _CloneShortIdEntry: - _prefix, _, remainder = canonical_id.partition(":") - clone_kind, _, group_key = remainder.partition(":") - hashes = [part for part in group_key.split("|") if part] - if clone_kind == "function": - fingerprint = hashes[0] if hashes else group_key - bucket = "" - if "|" in group_key: - bucket = "|" + group_key.split("|")[-1] - return _CloneShortIdEntry( - canonical_id=canonical_id, - alias="fn", - token=fingerprint, - suffix=bucket, - ) - alias = {"block": "blk", "segment": "seg"}.get(clone_kind, "clone") - combined = "|".join(hashes) if hashes else group_key - token = hashlib.sha256(combined.encode()).hexdigest() - return _CloneShortIdEntry( - canonical_id=canonical_id, - alias=alias, - token=token, - suffix=f"|x{len(hashes) or 1}", - ) - - -def _disambiguated_clone_short_ids_payload( - canonical_ids: Sequence[str], -) -> dict[str, str]: - clone_entries = [ - _clone_short_id_entry_payload(canonical_id) for canonical_id in canonical_ids - ] - max_token_length = max((len(entry.token) for entry in clone_entries), default=0) - for prefix_length in range(_SHORT_HASH_ID_LENGTH + 2, max_token_length + 1, 2): - candidates = { - entry.canonical_id: entry.render(prefix_length) for entry in clone_entries - } - if len(set(candidates.values())) == len(candidates): - return candidates - return { - entry.canonical_id: entry.render(max_token_length) for entry in clone_entries - } - - -def _leaf_symbol_name_payload(value: object) -> str: - text = str(value).strip() - if not text: - return "" - if ":" in text: - text = text.rsplit(":", maxsplit=1)[-1] - if "." in text: - text = text.rsplit(".", maxsplit=1)[-1] - return text - - -def _base_short_finding_id_payload(canonical_id: str) -> str: - prefix, _, remainder = canonical_id.partition(":") - if prefix == "clone": - return _clone_short_id_entry_payload(canonical_id).render(_SHORT_HASH_ID_LENGTH) - if prefix == "structural": - finding_kind, _, finding_key = remainder.partition(":") - return f"struct:{finding_kind}:{finding_key[:_SHORT_HASH_ID_LENGTH]}" - if prefix == "dead_code": - return f"dead:{_leaf_symbol_name_payload(remainder)}" - if prefix == "design": - category, _, subject_key = remainder.partition(":") - return f"design:{category}:{_leaf_symbol_name_payload(subject_key)}" - return canonical_id - - -def _disambiguated_short_finding_id_payload(canonical_id: str) -> str: - prefix, _, remainder = canonical_id.partition(":") - if prefix == "clone": - return _clone_short_id_entry_payload(canonical_id).render(0) - if prefix == "structural": - return _partitioned_short_id("struct", remainder) - if prefix == "dead_code": - return f"dead:{remainder}" - if prefix == "design": - return _partitioned_short_id("design", remainder) - return canonical_id - - -def _json_text_payload( - payload: object, - *, - sort_keys: bool = True, -) -> str: - options = orjson.OPT_INDENT_2 - if sort_keys: - options |= orjson.OPT_SORT_KEYS - return orjson.dumps(payload, option=options).decode("utf-8") - - -def _git_diff_lines_payload( - *, - root_path: Path, - git_diff_ref: str, -) -> tuple[str, ...]: - try: - validated_ref = validate_git_diff_ref(git_diff_ref) - except ValueError as exc: - raise MCPGitDiffError(str(exc)) from exc - try: - completed = subprocess.run( - ["git", "diff", "--name-only", validated_ref, "--"], - cwd=root_path, - check=True, - capture_output=True, - text=True, - timeout=30, - ) - except (OSError, subprocess.CalledProcessError, subprocess.TimeoutExpired) as exc: - raise MCPGitDiffError( - f"Unable to resolve changed paths from git diff ref '{validated_ref}'." - ) from exc - return tuple( - sorted({line.strip() for line in completed.stdout.splitlines() if line.strip()}) - ) - - -def _load_report_document_payload(report_json: str) -> dict[str, object]: - try: - payload = orjson.loads(report_json) - except JSONDecodeError as exc: - raise MCPServiceError( - f"Generated canonical report is not valid JSON: {exc}" - ) from exc - if not isinstance(payload, dict): - raise MCPServiceError("Generated canonical report must be a JSON object.") - return dict(payload) - - -def _validated_history_limit(history_limit: int) -> int: - if not 1 <= history_limit <= MAX_MCP_HISTORY_LIMIT: - raise ValueError( - f"history_limit must be between 1 and {MAX_MCP_HISTORY_LIMIT}." - ) - return history_limit - - -class MCPServiceError(RuntimeError): - """Base class for CodeClone MCP service errors.""" - - -class MCPServiceContractError(MCPServiceError): - """Raised when an MCP request violates the CodeClone service contract.""" - - -class MCPRunNotFoundError(MCPServiceError): - """Raised when a requested MCP run is not available in the in-memory registry.""" - - -class MCPFindingNotFoundError(MCPServiceError): - """Raised when a requested finding id is not present in the selected run.""" - - -class MCPGitDiffError(MCPServiceError): - """Raised when changed paths cannot be resolved from a git ref.""" - - -class _BufferConsole: - def __init__(self) -> None: - self.messages: list[str] = [] - - def print(self, *objects: object, **_kwargs: object) -> None: - text = " ".join(str(obj) for obj in objects).strip() - if text: - self.messages.append(text) - - -@dataclass(frozen=True, slots=True) -class MCPAnalysisRequest: - root: str | None = None - analysis_mode: AnalysisMode = "full" - respect_pyproject: bool = True - changed_paths: tuple[str, ...] = () - git_diff_ref: str | None = None - processes: int | None = None - min_loc: int | None = None - min_stmt: int | None = None - block_min_loc: int | None = None - block_min_stmt: int | None = None - segment_min_loc: int | None = None - segment_min_stmt: int | None = None - api_surface: bool | None = None - coverage_xml: str | None = None - coverage_min: int | None = None - complexity_threshold: int | None = None - coupling_threshold: int | None = None - cohesion_threshold: int | None = None - baseline_path: str | None = None - metrics_baseline_path: str | None = None - max_baseline_size_mb: int | None = None - cache_policy: CachePolicy = "reuse" - cache_path: str | None = None - max_cache_size_mb: int | None = None - - -@dataclass(frozen=True, slots=True) -class MCPGateRequest: - run_id: str | None = None - fail_on_new: bool = False - fail_threshold: int = -1 - fail_complexity: int = -1 - fail_coupling: int = -1 - fail_cohesion: int = -1 - fail_cycles: bool = False - fail_dead_code: bool = False - fail_health: int = -1 - fail_on_new_metrics: bool = False - fail_on_typing_regression: bool = False - fail_on_docstring_regression: bool = False - fail_on_api_break: bool = False - fail_on_untested_hotspots: bool = False - min_typing_coverage: int = -1 - min_docstring_coverage: int = -1 - coverage_min: int = DEFAULT_COVERAGE_MIN - - -@dataclass(frozen=True, slots=True) -class MCPRunRecord: - run_id: str - root: Path - request: MCPAnalysisRequest - comparison_settings: tuple[object, ...] - report_document: dict[str, object] - summary: dict[str, object] - changed_paths: tuple[str, ...] - changed_projection: dict[str, object] | None - warnings: tuple[str, ...] - failures: tuple[str, ...] - func_clones_count: int - block_clones_count: int - project_metrics: ProjectMetrics | None - coverage_join: CoverageJoinResult | None - suggestions: tuple[Suggestion, ...] - new_func: frozenset[str] - new_block: frozenset[str] - metrics_diff: MetricsDiff | None - -class CodeCloneMCPRunStore: - def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: - self._history_limit = _validated_history_limit(history_limit) - self._lock = RLock() - self._records: OrderedDict[str, MCPRunRecord] = OrderedDict() - self._latest_run_id: str | None = None - - def register(self, record: MCPRunRecord) -> MCPRunRecord: - with self._lock: - self._records.pop(record.run_id, None) - self._records[record.run_id] = record - self._records.move_to_end(record.run_id) - self._latest_run_id = record.run_id - while len(self._records) > self._history_limit: - self._records.popitem(last=False) - return record - - def get(self, run_id: str | None = None) -> MCPRunRecord: - with self._lock: - resolved_run_id = self._resolve_run_id(run_id) - if resolved_run_id is None: - raise MCPRunNotFoundError("No matching MCP analysis run is available.") - return self._records[resolved_run_id] - - def _resolve_run_id(self, run_id: str | None) -> str | None: - if run_id is None: - return self._latest_run_id - if run_id in self._records: - return run_id - matches = [ - candidate for candidate in self._records if candidate.startswith(run_id) - ] - if len(matches) == 1: - return matches[0] - if len(matches) > 1: - raise MCPServiceContractError( - f"Run id '{run_id}' is ambiguous in this MCP session." - ) - return None - - def records(self) -> tuple[MCPRunRecord, ...]: - with self._lock: - return tuple(self._records.values()) - - def clear(self) -> tuple[str, ...]: - with self._lock: - removed_run_ids = tuple(self._records.keys()) - self._records.clear() - self._latest_run_id = None - return removed_run_ids - -class MCPSession: +class MCPSession(_MCPSessionStateMixin): def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: self._runs = CodeCloneMCPRunStore(history_limit=history_limit) self._state_lock = RLock() @@ -1088,7 +85,7 @@ def __init__(self, *, history_limit: int = DEFAULT_MCP_HISTORY_LIMIT) -> None: def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: self._validate_analysis_request(request) - root_path = self._resolve_root(request.root) + root_path = _helpers._resolve_root(request.root) analysis_started_at_utc = _current_report_timestamp_utc() changed_paths = self._resolve_request_changed_paths( root_path=root_path, @@ -1103,8 +100,8 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: metrics_baseline_exists, shared_baseline_payload, ) = self._resolve_baseline_inputs(root_path=root_path, args=args) - cache_path = self._resolve_cache_path(root_path=root_path, args=args) - cache = self._build_cache( + cache_path = _helpers._resolve_cache_path(root_path=root_path, args=args) + cache = _helpers._build_cache( root_path=root_path, args=args, cache_path=cache_path, @@ -1127,13 +124,9 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: ) clone_baseline_state = resolve_clone_baseline_state( - args=args, baseline_path=baseline_path, baseline_exists=baseline_exists, - func_groups=analysis_result.func_groups, - block_groups=analysis_result.block_groups, - codeclone_version=__version__, - console=console, + max_baseline_size_mb=_as_int(args.max_baseline_size_mb, 0), shared_baseline_payload=( shared_baseline_payload if metrics_baseline_path == baseline_path @@ -1141,12 +134,10 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: ), ) metrics_baseline_state = resolve_metrics_baseline_state( - args=args, metrics_baseline_path=metrics_baseline_path, metrics_baseline_exists=metrics_baseline_exists, - baseline_updated_path=clone_baseline_state.updated_path, - project_metrics=analysis_result.project_metrics, - console=console, + max_baseline_size_mb=_as_int(args.max_baseline_size_mb, 0), + skip_metrics=bool(args.skip_metrics), shared_baseline_payload=( shared_baseline_payload if metrics_baseline_path == baseline_path @@ -1182,7 +173,7 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: else None ), analysis_mode=request.analysis_mode, - metrics_computed=self._metrics_computed(request.analysis_mode), + metrics_computed=_helpers._metrics_computed(request.analysis_mode), min_loc=_as_int(args.min_loc, DEFAULT_MIN_LOC), min_stmt=_as_int(args.min_stmt, DEFAULT_MIN_STMT), block_min_loc=_as_int(args.block_min_loc, DEFAULT_BLOCK_MIN_LOC), @@ -1248,10 +239,16 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: report_json = report_artifacts.json if report_json is None: raise MCPServiceError("CodeClone MCP expected a canonical JSON report.") - report_document = self._load_report_document(report_json) - run_id = self._report_digest(report_document) + report_document = _helpers._load_report_document(report_json) + run_id = _helpers._report_digest(report_document) warning_items = set(console.messages) + baseline_warning = getattr(clone_baseline_state, "warning_message", None) + if isinstance(baseline_warning, str) and baseline_warning: + warning_items.add(baseline_warning) + metrics_warning = getattr(metrics_baseline_state, "warning_message", None) + if isinstance(metrics_warning, str) and metrics_warning: + warning_items.add(metrics_warning) if cache.load_warning: warning_items.add(cache.load_warning) warning_items.update(discovery_result.skipped_warnings) @@ -1283,7 +280,10 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: run_id=run_id, root=root_path, request=request, - comparison_settings=self._comparison_settings(args=args, request=request), + comparison_settings=_helpers._comparison_settings( + args=args, + request=request, + ), report_document=report_document, summary=base_summary, changed_paths=changed_paths, @@ -1309,7 +309,10 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]: run_id=run_id, root=root_path, request=request, - comparison_settings=self._comparison_settings(args=args, request=request), + comparison_settings=_helpers._comparison_settings( + args=args, + request=request, + ), report_document=report_document, summary=summary, changed_paths=changed_paths, @@ -1337,3419 +340,3 @@ def analyze_changed_paths(self, request: MCPAnalysisRequest) -> dict[str, object analysis_summary = self.analyze_repository(request) record = self._runs.get(str(analysis_summary.get("run_id", "")) or None) return self._changed_analysis_payload(record) - - def get_run_summary(self, run_id: str | None = None) -> dict[str, object]: - record = self._runs.get(run_id) - return self._summary_payload(record.summary, record=record) - - def compare_runs( - self, - *, - run_id_before: str, - run_id_after: str | None = None, - focus: ComparisonFocus = "all", - ) -> dict[str, object]: - validated_focus = self._validate_choice( - "focus", - focus, - _VALID_COMPARISON_FOCUS, - ) - before = self._runs.get(run_id_before) - after = self._runs.get(run_id_after) - before_findings = self._comparison_index(before, focus=validated_focus) - after_findings = self._comparison_index(after, focus=validated_focus) - before_ids = set(before_findings) - after_ids = set(after_findings) - regressions = sorted(after_ids - before_ids) - improvements = sorted(before_ids - after_ids) - common = before_ids & after_ids - health_before = self._summary_health_score(before.summary) - health_after = self._summary_health_score(after.summary) - comparability = self._comparison_scope(before=before, after=after) - comparable = bool(comparability["comparable"]) - health_delta = ( - health_after - health_before - if comparable and health_before is not None and health_after is not None - else None - ) - verdict = ( - self._comparison_verdict( - regressions=len(regressions), - improvements=len(improvements), - health_delta=health_delta, - ) - if comparable - else "incomparable" - ) - regressions_payload = ( - [ - self._comparison_finding_card( - after, - after_findings[finding_id], - ) - for finding_id in regressions - ] - if comparable - else [] - ) - improvements_payload = ( - [ - self._comparison_finding_card( - before, - before_findings[finding_id], - ) - for finding_id in improvements - ] - if comparable - else [] - ) - payload: dict[str, object] = { - "before": { - "run_id": self._short_run_id(before.run_id), - "health": health_before, - }, - "after": { - "run_id": self._short_run_id(after.run_id), - "health": health_after, - }, - "comparable": comparable, - "health_delta": health_delta, - "verdict": verdict, - "regressions": regressions_payload, - "improvements": improvements_payload, - "unchanged": len(common) if comparable else None, - "summary": self._comparison_summary_text( - comparable=comparable, - comparability_reason=str(comparability["reason"]), - regressions=len(regressions), - improvements=len(improvements), - health_delta=health_delta, - ), - } - if not comparable: - payload["reason"] = comparability["reason"] - return payload - - def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]: - record = self._runs.get(request.run_id) - gate_result = self._evaluate_gate_snapshot(record=record, request=request) - result = { - "run_id": self._short_run_id(record.run_id), - "would_fail": gate_result.exit_code != 0, - "exit_code": gate_result.exit_code, - "reasons": list(gate_result.reasons), - "config": { - "fail_on_new": request.fail_on_new, - "fail_threshold": request.fail_threshold, - "fail_complexity": request.fail_complexity, - "fail_coupling": request.fail_coupling, - "fail_cohesion": request.fail_cohesion, - "fail_cycles": request.fail_cycles, - "fail_dead_code": request.fail_dead_code, - "fail_health": request.fail_health, - "fail_on_new_metrics": request.fail_on_new_metrics, - "fail_on_typing_regression": request.fail_on_typing_regression, - "fail_on_docstring_regression": request.fail_on_docstring_regression, - "fail_on_api_break": request.fail_on_api_break, - "fail_on_untested_hotspots": request.fail_on_untested_hotspots, - "min_typing_coverage": request.min_typing_coverage, - "min_docstring_coverage": request.min_docstring_coverage, - "coverage_min": request.coverage_min, - }, - } - with self._state_lock: - self._last_gate_results[record.run_id] = dict(result) - return result - - def _evaluate_gate_snapshot( - self, - *, - record: MCPRunRecord, - request: MCPGateRequest, - ) -> GatingResult: - if request.fail_on_untested_hotspots: - if record.coverage_join is None: - raise MCPServiceContractError( - "Coverage gating requires a run created with coverage_xml." - ) - if record.coverage_join.status != "ok": - detail = record.coverage_join.invalid_reason or "invalid coverage input" - raise MCPServiceContractError( - "Coverage gating requires a valid Cobertura XML input. " - f"Reason: {detail}" - ) - return _evaluate_report_gates( - report_document=record.report_document, - config=MetricGateConfig( - fail_complexity=request.fail_complexity, - fail_coupling=request.fail_coupling, - fail_cohesion=request.fail_cohesion, - fail_cycles=request.fail_cycles, - fail_dead_code=request.fail_dead_code, - fail_health=request.fail_health, - fail_on_new_metrics=request.fail_on_new_metrics, - fail_on_typing_regression=request.fail_on_typing_regression, - fail_on_docstring_regression=request.fail_on_docstring_regression, - fail_on_api_break=request.fail_on_api_break, - fail_on_untested_hotspots=request.fail_on_untested_hotspots, - min_typing_coverage=request.min_typing_coverage, - min_docstring_coverage=request.min_docstring_coverage, - coverage_min=request.coverage_min, - fail_on_new=request.fail_on_new, - fail_threshold=request.fail_threshold, - ), - baseline_status=str( - self._as_mapping( - self._as_mapping(record.report_document.get("meta")).get("baseline") - ).get("status", "") - ), - metrics_diff=record.metrics_diff, - clone_new_count=len(record.new_func) + len(record.new_block), - clone_total=record.func_clones_count + record.block_clones_count, - ) - - def get_report_section( - self, - *, - run_id: str | None = None, - section: ReportSection = "all", - family: MetricsDetailFamily | None = None, - path: str | None = None, - offset: int = 0, - limit: int = 50, - ) -> dict[str, object]: - validated_section = self._validate_choice( - "section", - section, - _VALID_REPORT_SECTIONS, - ) - record = self._runs.get(run_id) - report_document = record.report_document - if validated_section == "all": - return dict(report_document) - if validated_section == "changed": - if record.changed_projection is None: - raise MCPServiceContractError( - "Report section 'changed' is not available in this run." - ) - return dict(record.changed_projection) - if validated_section == "metrics": - metrics = self._as_mapping(report_document.get("metrics")) - return {"summary": dict(self._as_mapping(metrics.get("summary")))} - if validated_section == "metrics_detail": - metrics = self._as_mapping(report_document.get("metrics")) - if not metrics: - raise MCPServiceContractError( - "Report section 'metrics_detail' is not available in this run." - ) - validated_family_input = self._validate_optional_choice( - "family", - family, - _VALID_METRICS_DETAIL_FAMILIES, - ) - normalized_family = ( - _METRICS_DETAIL_FAMILY_ALIASES.get( - str(validated_family_input), - str(validated_family_input), - ) - if validated_family_input is not None - else None - ) - validated_family = self._metrics_detail_family(normalized_family) - return self._metrics_detail_payload( - metrics=metrics, - family=validated_family, - path=path, - offset=offset, - limit=limit, - ) - if validated_section == "derived": - return self._derived_section_payload(record) - payload = report_document.get(validated_section) - if not isinstance(payload, Mapping): - raise MCPServiceContractError( - f"Report section '{validated_section}' is not available in this run." - ) - return dict(payload) - - def list_findings( - self, - *, - run_id: str | None = None, - family: FindingFamilyFilter = "all", - category: str | None = None, - severity: str | None = None, - source_kind: str | None = None, - novelty: FindingNoveltyFilter = "all", - sort_by: FindingSort = "default", - detail_level: DetailLevel = "summary", - changed_paths: Sequence[str] = (), - git_diff_ref: str | None = None, - exclude_reviewed: bool = False, - offset: int = 0, - limit: int = 50, - max_results: int | None = None, - ) -> dict[str, object]: - validated_family = self._validate_choice( - "family", - family, - _VALID_FINDING_FAMILIES, - ) - validated_novelty = self._validate_choice( - "novelty", - novelty, - _VALID_FINDING_NOVELTY, - ) - validated_sort = self._validate_choice( - "sort_by", - sort_by, - _VALID_FINDING_SORT, - ) - validated_detail = self._validate_choice( - "detail_level", - detail_level, - _VALID_DETAIL_LEVELS, - ) - validated_severity = self._validate_optional_choice( - "severity", - severity, - _VALID_SEVERITIES, - ) - record = self._runs.get(run_id) - paths_filter = self._resolve_query_changed_paths( - record=record, - changed_paths=changed_paths, - git_diff_ref=git_diff_ref, - ) - normalized_limit = max( - 1, - min(max_results if max_results is not None else limit, 200), - ) - filtered = self._query_findings( - record=record, - family=validated_family, - category=category, - severity=validated_severity, - source_kind=source_kind, - novelty=validated_novelty, - sort_by=validated_sort, - detail_level=validated_detail, - changed_paths=paths_filter, - exclude_reviewed=exclude_reviewed, - ) - page = paginate( - filtered, - offset=offset, - limit=normalized_limit, - max_limit=200, - ) - return { - "run_id": self._short_run_id(record.run_id), - "detail_level": validated_detail, - "sort_by": validated_sort, - "changed_paths": list(paths_filter), - "offset": page.offset, - "limit": page.limit, - "returned": len(page.items), - "total": page.total, - "next_offset": page.next_offset, - "items": page.items, - } - - def get_finding( - self, - *, - finding_id: str, - run_id: str | None = None, - detail_level: DetailLevel = "normal", - ) -> dict[str, object]: - record = self._runs.get(run_id) - validated_detail = self._validate_choice( - "detail_level", - detail_level, - _VALID_DETAIL_LEVELS, - ) - canonical_id = self._resolve_canonical_finding_id(record, finding_id) - for finding in self._base_findings(record): - if str(finding.get("id")) == canonical_id: - return self._decorate_finding( - record, - finding, - detail_level=validated_detail, - ) - raise MCPFindingNotFoundError( - f"Finding id '{finding_id}' was not found in run " - f"'{self._short_run_id(record.run_id)}'." - ) - - def _service_get_finding( - self, - *, - finding_id: str, - run_id: str | None = None, - detail_level: DetailLevel = "normal", - ) -> dict[str, object]: - return self.get_finding( - finding_id=finding_id, - run_id=run_id, - detail_level=detail_level, - ) - - def get_remediation( - self, - *, - finding_id: str, - run_id: str | None = None, - detail_level: DetailLevel = "normal", - ) -> dict[str, object]: - validated_detail = self._validate_choice( - "detail_level", - detail_level, - _VALID_DETAIL_LEVELS, - ) - record = self._runs.get(run_id) - canonical_id = self._resolve_canonical_finding_id(record, finding_id) - finding = self._service_get_finding( - finding_id=canonical_id, - run_id=record.run_id, - detail_level="full", - ) - remediation = self._as_mapping(finding.get("remediation")) - if not remediation: - raise MCPFindingNotFoundError( - f"Finding id '{finding_id}' does not expose remediation guidance." - ) - return { - "run_id": self._short_run_id(record.run_id), - "finding_id": self._short_finding_id(record, canonical_id), - "detail_level": validated_detail, - "remediation": self._project_remediation( - remediation, - detail_level=validated_detail, - ), - } - - def list_hotspots( - self, - *, - kind: HotlistKind, - run_id: str | None = None, - detail_level: DetailLevel = "summary", - changed_paths: Sequence[str] = (), - git_diff_ref: str | None = None, - exclude_reviewed: bool = False, - limit: int = 10, - max_results: int | None = None, - ) -> dict[str, object]: - validated_kind = self._validate_choice("kind", kind, _VALID_HOTLIST_KINDS) - validated_detail = self._validate_choice( - "detail_level", - detail_level, - _VALID_DETAIL_LEVELS, - ) - record = self._runs.get(run_id) - paths_filter = self._resolve_query_changed_paths( - record=record, - changed_paths=changed_paths, - git_diff_ref=git_diff_ref, - ) - rows = self._hotspot_rows( - record=record, - kind=validated_kind, - detail_level=validated_detail, - changed_paths=paths_filter, - exclude_reviewed=exclude_reviewed, - ) - normalized_limit = max( - 1, - min(max_results if max_results is not None else limit, 50), - ) - return { - "run_id": self._short_run_id(record.run_id), - "kind": validated_kind, - "detail_level": validated_detail, - "changed_paths": list(paths_filter), - "returned": min(len(rows), normalized_limit), - "total": len(rows), - "items": [dict(self._as_mapping(item)) for item in rows[:normalized_limit]], - } - - def get_production_triage( - self, - *, - run_id: str | None = None, - max_hotspots: int = 3, - max_suggestions: int = 3, - ) -> dict[str, object]: - record = self._runs.get(run_id) - summary = self._summary_payload(record.summary, record=record) - findings = self._base_findings(record) - findings_breakdown = self._source_kind_breakdown( - self._finding_source_kind(finding) for finding in findings - ) - suggestion_rows = self._triage_suggestion_rows(record) - suggestion_breakdown = self._source_kind_breakdown( - row.get("source_kind") for row in suggestion_rows - ) - hotspot_limit = max(1, min(max_hotspots, 10)) - suggestion_limit = max(1, min(max_suggestions, 10)) - production_hotspots = self._hotspot_rows( - record=record, - kind="production_hotspots", - detail_level="summary", - changed_paths=(), - exclude_reviewed=False, - ) - production_suggestions = [ - dict(row) - for row in suggestion_rows - if str(row.get("source_kind", "")) == SOURCE_KIND_PRODUCTION - ] - payload: dict[str, object] = { - "run_id": self._short_run_id(record.run_id), - "focus": _FOCUS_PRODUCTION, - "health_scope": _HEALTH_SCOPE_REPOSITORY, - "baseline": dict(self._as_mapping(summary.get("baseline"))), - "health": dict(self._summary_health_payload(summary)), - "cache": dict(self._as_mapping(summary.get("cache"))), - "findings": { - "total": len(findings), - "by_source_kind": findings_breakdown, - "new_by_source_kind": dict( - self._as_mapping( - self._as_mapping(summary.get("findings")).get( - "new_by_source_kind" - ) - ) - ), - "outside_focus": len(findings) - - findings_breakdown[SOURCE_KIND_PRODUCTION], - }, - "top_hotspots": { - "kind": "production_hotspots", - "available": len(production_hotspots), - "returned": min(len(production_hotspots), hotspot_limit), - "items": [ - dict(self._as_mapping(item)) - for item in production_hotspots[:hotspot_limit] - ], - }, - "suggestions": { - "total": len(suggestion_rows), - "by_source_kind": suggestion_breakdown, - "outside_focus": len(suggestion_rows) - - suggestion_breakdown[SOURCE_KIND_PRODUCTION], - }, - "top_suggestions": { - "available": len(production_suggestions), - "returned": min(len(production_suggestions), suggestion_limit), - "items": production_suggestions[:suggestion_limit], - }, - } - analysis_profile = self._summary_analysis_profile_payload(summary) - if analysis_profile: - payload["analysis_profile"] = analysis_profile - coverage_join = self._summary_coverage_join_payload(record) - if coverage_join: - payload["coverage_join"] = coverage_join - return payload - - def get_help( - self, - *, - topic: HelpTopic, - detail: HelpDetail = "compact", - ) -> dict[str, object]: - validated_topic = self._validate_choice("topic", topic, _VALID_HELP_TOPICS) - validated_detail = self._validate_choice( - "detail", - detail, - _VALID_HELP_DETAILS, - ) - spec = _HELP_TOPIC_SPECS[validated_topic] - payload: dict[str, object] = { - "topic": validated_topic, - "detail": validated_detail, - "summary": spec.summary, - "key_points": list(spec.key_points), - "recommended_tools": list(spec.recommended_tools), - "doc_links": [ - {"title": title, "url": url} for title, url in spec.doc_links - ], - } - if validated_detail == "normal": - if spec.warnings: - payload["warnings"] = list(spec.warnings) - if spec.anti_patterns: - payload["anti_patterns"] = list(spec.anti_patterns) - return payload - - def generate_pr_summary( - self, - *, - run_id: str | None = None, - changed_paths: Sequence[str] = (), - git_diff_ref: str | None = None, - format: PRSummaryFormat = "markdown", - ) -> dict[str, object]: - output_format = self._validate_choice( - "format", - format, - _VALID_PR_SUMMARY_FORMATS, - ) - record = self._runs.get(run_id) - paths_filter = self._resolve_query_changed_paths( - record=record, - changed_paths=changed_paths, - git_diff_ref=git_diff_ref, - prefer_record_paths=True, - ) - changed_items = self._query_findings( - record=record, - detail_level="summary", - changed_paths=paths_filter, - ) - previous = self._previous_run_for_root(record) - resolved: list[dict[str, object]] = [] - if previous is not None: - compare_payload = self.compare_runs( - run_id_before=previous.run_id, - run_id_after=record.run_id, - focus="all", - ) - resolved = self._dict_rows(compare_payload.get("improvements")) - with self._state_lock: - gate_result = dict( - self._last_gate_results.get( - record.run_id, - {"would_fail": False, "reasons": []}, - ) - ) - verdict = self._changed_verdict( - changed_projection={ - "total": len(changed_items), - "new": sum( - 1 for item in changed_items if str(item.get("novelty", "")) == "new" - ), - }, - health_delta=self._summary_health_delta(record.summary), - ) - payload: dict[str, object] = { - "run_id": self._short_run_id(record.run_id), - "changed_files": len(paths_filter), - "health": self._summary_health_payload(record.summary), - "health_delta": self._summary_health_delta(record.summary), - "verdict": verdict, - "new_findings_in_changed_files": changed_items, - "resolved": resolved, - "blocking_gates": self._string_rows(gate_result.get("reasons")), - } - if output_format == "json": - return payload - return { - "run_id": self._short_run_id(record.run_id), - "format": output_format, - "content": self._render_pr_summary_markdown(payload), - } - - def mark_finding_reviewed( - self, - *, - finding_id: str, - run_id: str | None = None, - note: str | None = None, - ) -> dict[str, object]: - record = self._runs.get(run_id) - canonical_id = self._resolve_canonical_finding_id(record, finding_id) - self._service_get_finding( - finding_id=canonical_id, - run_id=record.run_id, - detail_level="normal", - ) - with self._state_lock: - review_map = self._review_state.setdefault(record.run_id, OrderedDict()) - review_map[canonical_id] = ( - note.strip() if isinstance(note, str) and note.strip() else None - ) - review_map.move_to_end(canonical_id) - return { - "run_id": self._short_run_id(record.run_id), - "finding_id": self._short_finding_id(record, canonical_id), - "reviewed": True, - "note": review_map[canonical_id], - "reviewed_count": len(review_map), - } - - def list_reviewed_findings( - self, - *, - run_id: str | None = None, - ) -> dict[str, object]: - record = self._runs.get(run_id) - with self._state_lock: - review_items = tuple( - self._review_state.get(record.run_id, OrderedDict()).items() - ) - items = [] - for finding_id, note in review_items: - try: - finding = self._service_get_finding( - finding_id=finding_id, - run_id=record.run_id, - ) - except MCPFindingNotFoundError: - continue - items.append( - { - "finding_id": self._short_finding_id(record, finding_id), - "note": note, - "finding": self._project_finding_detail( - record, - finding, - detail_level="summary", - ), - } - ) - return { - "run_id": self._short_run_id(record.run_id), - "reviewed_count": len(items), - "items": items, - } - - def clear_session_runs(self) -> dict[str, object]: - removed_run_ids = self._runs.clear() - with self._state_lock: - cleared_review_entries = sum( - len(entries) for entries in self._review_state.values() - ) - cleared_gate_results = len(self._last_gate_results) - cleared_spread_cache_entries = len(self._spread_max_cache) - self._review_state.clear() - self._last_gate_results.clear() - self._spread_max_cache.clear() - return { - "cleared_runs": len(removed_run_ids), - "cleared_run_ids": [ - self._short_run_id(run_id) for run_id in removed_run_ids - ], - "cleared_review_entries": cleared_review_entries, - "cleared_gate_results": cleared_gate_results, - "cleared_spread_cache_entries": cleared_spread_cache_entries, - } - - def check_complexity( - self, - *, - run_id: str | None = None, - root: str | None = None, - path: str | None = None, - min_complexity: int | None = None, - max_results: int = 10, - detail_level: DetailLevel = "summary", - ) -> dict[str, object]: - validated_detail = self._validate_choice( - "detail_level", - detail_level, - _VALID_DETAIL_LEVELS, - ) - record = self._resolve_granular_record( - run_id=run_id, - root=root, - analysis_mode="full", - ) - findings = self._query_findings( - record=record, - family="design", - category=CATEGORY_COMPLEXITY, - detail_level=validated_detail, - changed_paths=self._path_filter_tuple(path), - sort_by="priority", - ) - if min_complexity is not None: - findings = [ - finding - for finding in findings - if _as_int( - self._as_mapping(finding.get("facts")).get( - "cyclomatic_complexity", - 0, - ) - ) - >= min_complexity - ] - return self._granular_payload( - record=record, - check="complexity", - items=findings, - detail_level=validated_detail, - max_results=max_results, - path=path, - threshold_context=self._design_threshold_context( - record=record, - check="complexity", - path=path, - items=findings, - requested_min=min_complexity, - ), - ) - - def check_clones( - self, - *, - run_id: str | None = None, - root: str | None = None, - path: str | None = None, - clone_type: str | None = None, - source_kind: str | None = None, - max_results: int = 10, - detail_level: DetailLevel = "summary", - ) -> dict[str, object]: - validated_detail = self._validate_choice( - "detail_level", - detail_level, - _VALID_DETAIL_LEVELS, - ) - record = self._resolve_granular_record( - run_id=run_id, - root=root, - analysis_mode="clones_only", - ) - findings = self._query_findings( - record=record, - family="clone", - source_kind=source_kind, - detail_level=validated_detail, - changed_paths=self._path_filter_tuple(path), - sort_by="priority", - ) - if clone_type is not None: - findings = [ - finding - for finding in findings - if str(finding.get("clone_type", "")).strip() == clone_type - ] - return self._granular_payload( - record=record, - check="clones", - items=findings, - detail_level=validated_detail, - max_results=max_results, - path=path, - ) - - def check_coupling( - self, - *, - run_id: str | None = None, - root: str | None = None, - path: str | None = None, - max_results: int = 10, - detail_level: DetailLevel = "summary", - ) -> dict[str, object]: - return self._check_design_metric( - run_id=run_id, - root=root, - path=path, - max_results=max_results, - detail_level=detail_level, - category=CATEGORY_COUPLING, - check="coupling", - ) - - def check_cohesion( - self, - *, - run_id: str | None = None, - root: str | None = None, - path: str | None = None, - max_results: int = 10, - detail_level: DetailLevel = "summary", - ) -> dict[str, object]: - return self._check_design_metric( - run_id=run_id, - root=root, - path=path, - max_results=max_results, - detail_level=detail_level, - category=CATEGORY_COHESION, - check="cohesion", - ) - - def _check_design_metric( - self, - *, - run_id: str | None, - root: str | None, - path: str | None, - max_results: int, - detail_level: DetailLevel, - category: str, - check: str, - ) -> dict[str, object]: - validated_detail = self._validate_choice( - "detail_level", - detail_level, - _VALID_DETAIL_LEVELS, - ) - record = self._resolve_granular_record( - run_id=run_id, - root=root, - analysis_mode="full", - ) - findings = self._query_findings( - record=record, - family="design", - category=category, - detail_level=validated_detail, - changed_paths=self._path_filter_tuple(path), - sort_by="priority", - ) - return self._granular_payload( - record=record, - check=check, - items=findings, - detail_level=validated_detail, - max_results=max_results, - path=path, - threshold_context=self._design_threshold_context( - record=record, - check=check, - path=path, - items=findings, - ), - ) - - def check_dead_code( - self, - *, - run_id: str | None = None, - root: str | None = None, - path: str | None = None, - min_severity: str | None = None, - max_results: int = 10, - detail_level: DetailLevel = "summary", - ) -> dict[str, object]: - validated_detail = self._validate_choice( - "detail_level", - detail_level, - _VALID_DETAIL_LEVELS, - ) - validated_min_severity = self._validate_optional_choice( - "min_severity", - min_severity, - _VALID_SEVERITIES, - ) - record = self._resolve_granular_record( - run_id=run_id, - root=root, - analysis_mode="full", - ) - findings = self._query_findings( - record=record, - family="dead_code", - detail_level=validated_detail, - changed_paths=self._path_filter_tuple(path), - sort_by="priority", - ) - if validated_min_severity is not None: - findings = [ - finding - for finding in findings - if self._severity_rank(str(finding.get("severity", ""))) - >= self._severity_rank(validated_min_severity) - ] - return self._granular_payload( - record=record, - check="dead_code", - items=findings, - detail_level=validated_detail, - max_results=max_results, - path=path, - ) - - def read_resource(self, uri: str) -> str: - if uri == "codeclone://schema": - return _json_text_payload(self._schema_resource_payload()) - if uri == "codeclone://latest/triage": - latest = self._runs.get() - return _json_text_payload(self.get_production_triage(run_id=latest.run_id)) - latest_prefix = "codeclone://latest/" - run_prefix = "codeclone://runs/" - if uri.startswith(latest_prefix): - latest = self._runs.get() - suffix = uri[len(latest_prefix) :] - return self._render_resource(latest, suffix) - if not uri.startswith(run_prefix): - raise MCPServiceContractError(f"Unsupported CodeClone resource URI: {uri}") - remainder = uri[len(run_prefix) :] - run_id, sep, suffix = remainder.partition("/") - if not sep: - raise MCPServiceContractError(f"Unsupported CodeClone resource URI: {uri}") - record = self._runs.get(run_id) - return self._render_resource(record, suffix) - - def _render_resource(self, record: MCPRunRecord, suffix: str) -> str: - if suffix == "summary": - return _json_text_payload( - self._summary_payload(record.summary, record=record) - ) - if suffix == "triage": - raise MCPServiceContractError( - "Production triage is exposed only as codeclone://latest/triage." - ) - if suffix == "health": - return _json_text_payload(self._summary_health_payload(record.summary)) - if suffix == "gates": - with self._state_lock: - gate_result = self._last_gate_results.get(record.run_id) - if gate_result is None: - raise MCPServiceContractError( - "No gate evaluation result is available in this MCP session." - ) - return _json_text_payload(gate_result) - if suffix == "changed": - if record.changed_projection is None: - raise MCPServiceContractError( - "Changed-findings projection is not available in this run." - ) - return _json_text_payload(record.changed_projection) - if suffix == "schema": - return _json_text_payload(self._schema_resource_payload()) - if suffix == "report.json": - return _json_text_payload(record.report_document, sort_keys=False) - if suffix == "overview": - return _json_text_payload( - self.list_hotspots(kind="highest_spread", run_id=record.run_id) - ) - finding_prefix = "findings/" - if suffix.startswith(finding_prefix): - finding_id = suffix[len(finding_prefix) :] - return _json_text_payload( - self._service_get_finding( - run_id=record.run_id, - finding_id=finding_id, - ) - ) - raise MCPServiceContractError( - f"Unsupported CodeClone resource suffix '{suffix}'." - ) - - def _resolve_request_changed_paths( - self, - *, - root_path: Path, - changed_paths: Sequence[str], - git_diff_ref: str | None, - ) -> tuple[str, ...]: - if changed_paths and git_diff_ref is not None: - raise MCPServiceContractError( - "Provide changed_paths or git_diff_ref, not both." - ) - if git_diff_ref is not None: - return self._git_diff_paths(root_path=root_path, git_diff_ref=git_diff_ref) - if not changed_paths: - return () - return self._normalize_changed_paths(root_path=root_path, paths=changed_paths) - - def _resolve_query_changed_paths( - self, - *, - record: MCPRunRecord, - changed_paths: Sequence[str], - git_diff_ref: str | None, - prefer_record_paths: bool = False, - ) -> tuple[str, ...]: - if changed_paths or git_diff_ref is not None: - return self._resolve_request_changed_paths( - root_path=record.root, - changed_paths=changed_paths, - git_diff_ref=git_diff_ref, - ) - if prefer_record_paths: - return record.changed_paths - return () - - def _normalize_changed_paths( - self, - *, - root_path: Path, - paths: Sequence[str], - ) -> tuple[str, ...]: - normalized: set[str] = set() - for raw_path in paths: - candidate = Path(str(raw_path)).expanduser() - if candidate.is_absolute(): - try: - relative = candidate.resolve().relative_to(root_path) - except (OSError, ValueError) as exc: - raise MCPServiceContractError( - f"Changed path '{raw_path}' is outside root '{root_path}'." - ) from exc - normalized.add(relative.as_posix()) - continue - cleaned = self._normalize_relative_path(candidate.as_posix()) - if cleaned: - normalized.add(cleaned) - return tuple(sorted(normalized)) - - def _git_diff_paths( - self, - *, - root_path: Path, - git_diff_ref: str, - ) -> tuple[str, ...]: - lines = _git_diff_lines_payload( - root_path=root_path, - git_diff_ref=git_diff_ref, - ) - return self._normalize_changed_paths(root_path=root_path, paths=lines) - - def _prune_session_state(self) -> None: - active_run_ids = {record.run_id for record in self._runs.records()} - with self._state_lock: - for state_map in ( - self._review_state, - self._last_gate_results, - self._spread_max_cache, - ): - stale_run_ids = [ - run_id for run_id in state_map if run_id not in active_run_ids - ] - for run_id in stale_run_ids: - state_map.pop(run_id, None) - - def _summary_health_score(self, summary: Mapping[str, object]) -> int | None: - health = self._summary_health_payload(summary) - if health.get("available") is False: - return None - score = health.get("score", 0) - return _as_int(score, 0) - - def _summary_health_delta(self, summary: Mapping[str, object]) -> int | None: - if self._summary_health_payload(summary).get("available") is False: - return None - metrics_diff = self._as_mapping(summary.get("metrics_diff")) - value = metrics_diff.get("health_delta", 0) - return _as_int(value, 0) - - def _summary_health_payload( - self, - summary: Mapping[str, object], - ) -> dict[str, object]: - if str(summary.get("analysis_mode", "")) == "clones_only": - return {"available": False, "reason": "metrics_skipped"} - health = dict(self._as_mapping(summary.get("health"))) - if health: - return health - return {"available": False, "reason": "unavailable"} - - @staticmethod - def _short_run_id(run_id: str) -> str: - return short_id(run_id, length=_SHORT_RUN_ID_LENGTH) - - def _finding_id_maps( - self, - record: MCPRunRecord, - ) -> tuple[dict[str, str], dict[str, str]]: - canonical_ids = sorted( - str(finding.get("id", "")) - for finding in self._base_findings(record) - if str(finding.get("id", "")) - ) - base_ids = { - canonical_id: self._base_short_finding_id(canonical_id) - for canonical_id in canonical_ids - } - grouped: dict[str, list[str]] = {} - for canonical_id, short_name in base_ids.items(): - grouped.setdefault(short_name, []).append(canonical_id) - canonical_to_short: dict[str, str] = {} - short_to_canonical: dict[str, str] = {} - for short_name, group in grouped.items(): - if len(group) == 1: - canonical_id = group[0] - canonical_to_short[canonical_id] = short_name - short_to_canonical[short_name] = canonical_id - continue - disambiguated_ids = self._disambiguated_short_finding_ids(group) - for canonical_id, disambiguated in disambiguated_ids.items(): - canonical_to_short[canonical_id] = disambiguated - short_to_canonical[disambiguated] = canonical_id - return canonical_to_short, short_to_canonical - - @staticmethod - def _base_short_finding_id(canonical_id: str) -> str: - return _base_short_finding_id_payload(canonical_id) - - @staticmethod - def _disambiguated_short_finding_id(canonical_id: str) -> str: - return _disambiguated_short_finding_id_payload(canonical_id) - - def _disambiguated_short_finding_ids( - self, - canonical_ids: Sequence[str], - ) -> dict[str, str]: - clone_ids = [ - canonical_id - for canonical_id in canonical_ids - if canonical_id.startswith("clone:") - ] - if len(clone_ids) == len(canonical_ids): - clone_short_ids = _disambiguated_clone_short_ids_payload(clone_ids) - if len(set(clone_short_ids.values())) == len(clone_short_ids): - return clone_short_ids - return { - canonical_id: self._disambiguated_short_finding_id(canonical_id) - for canonical_id in canonical_ids - } - - def _short_finding_id( - self, - record: MCPRunRecord, - canonical_id: str, - ) -> str: - canonical_to_short, _short_to_canonical = self._finding_id_maps(record) - return canonical_to_short.get(canonical_id, canonical_id) - - def _resolve_canonical_finding_id( - self, - record: MCPRunRecord, - finding_id: str, - ) -> str: - canonical_to_short, short_to_canonical = self._finding_id_maps(record) - canonical = resolve_finding_id( - canonical_to_short=canonical_to_short, - short_to_canonical=short_to_canonical, - finding_id=finding_id, - ) - if canonical is not None: - return canonical - raise MCPFindingNotFoundError( - f"Finding id '{finding_id}' was not found in run " - f"'{self._short_run_id(record.run_id)}'." - ) - - def _leaf_symbol_name(self, value: object) -> str: - return _leaf_symbol_name_payload(value) - - @staticmethod - def _comparison_settings( - *, - args: Namespace, - request: MCPAnalysisRequest, - ) -> tuple[object, ...]: - return ( - request.analysis_mode, - _as_int(args.min_loc, DEFAULT_MIN_LOC), - _as_int(args.min_stmt, DEFAULT_MIN_STMT), - _as_int(args.block_min_loc, DEFAULT_BLOCK_MIN_LOC), - _as_int(args.block_min_stmt, DEFAULT_BLOCK_MIN_STMT), - _as_int(args.segment_min_loc, DEFAULT_SEGMENT_MIN_LOC), - _as_int(args.segment_min_stmt, DEFAULT_SEGMENT_MIN_STMT), - _as_int( - args.design_complexity_threshold, - DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, - ), - _as_int( - args.design_coupling_threshold, - DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - ), - _as_int( - args.design_cohesion_threshold, - DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, - ), - ) - - @staticmethod - def _comparison_scope( - *, - before: MCPRunRecord, - after: MCPRunRecord, - ) -> dict[str, object]: - same_root = before.root == after.root - same_analysis_settings = before.comparison_settings == after.comparison_settings - if same_root and same_analysis_settings: - reason = "comparable" - elif not same_root and not same_analysis_settings: - reason = "different_root_and_analysis_settings" - elif not same_root: - reason = "different_root" - else: - reason = "different_analysis_settings" - return { - "comparable": same_root and same_analysis_settings, - "same_root": same_root, - "same_analysis_settings": same_analysis_settings, - "reason": reason, - } - - @staticmethod - def _severity_rank(severity: str) -> int: - return { - SEVERITY_CRITICAL: 3, - SEVERITY_WARNING: 2, - SEVERITY_INFO: 1, - }.get(severity, 0) - - def _path_filter_tuple(self, path: str | None) -> tuple[str, ...]: - if not path: - return () - cleaned = self._normalize_relative_path(Path(path).as_posix()) - return (cleaned,) if cleaned else () - - def _normalize_relative_path(self, path: str) -> str: - cleaned = path.strip() - if cleaned == ".": - return "" - if cleaned.startswith("./"): - cleaned = cleaned[2:] - cleaned = cleaned.rstrip("/") - if ".." in Path(cleaned).parts: - raise MCPServiceContractError(f"path traversal not allowed: {path}") - return cleaned - - def _previous_run_for_root(self, record: MCPRunRecord) -> MCPRunRecord | None: - previous: MCPRunRecord | None = None - for item in self._runs.records(): - if item.run_id == record.run_id: - return previous - if item.root == record.root: - previous = item - return None - - @staticmethod - def _record_supports_analysis_mode( - record: MCPRunRecord, - *, - analysis_mode: AnalysisMode, - ) -> bool: - record_mode = record.request.analysis_mode - if analysis_mode == "clones_only": - return record_mode in {"clones_only", "full"} - return record_mode == "full" - - def _latest_compatible_record( - self, - *, - analysis_mode: AnalysisMode, - root_path: Path | None = None, - ) -> MCPRunRecord | None: - for item in reversed(self._runs.records()): - if root_path is not None and item.root != root_path: - continue - if self._record_supports_analysis_mode( - item, - analysis_mode=analysis_mode, - ): - return item - return None - - def _resolve_granular_record( - self, - *, - run_id: str | None, - root: str | None, - analysis_mode: AnalysisMode, - ) -> MCPRunRecord: - if run_id is not None: - record = self._runs.get(run_id) - if self._record_supports_analysis_mode(record, analysis_mode=analysis_mode): - return record - raise MCPServiceContractError( - "Selected MCP run is not compatible with this check. " - f"Call analyze_repository(root='{record.root}', " - "analysis_mode='full') first." - ) - root_path = self._resolve_optional_root(root) - latest_record = self._latest_compatible_record( - analysis_mode=analysis_mode, - root_path=root_path, - ) - if latest_record is not None: - return latest_record - if root_path is not None: - raise MCPRunNotFoundError( - f"No compatible MCP analysis run is available for root: {root_path}. " - f"Call analyze_repository(root='{root_path}') or " - f"analyze_changed_paths(root='{root_path}', changed_paths=[...]) first." - ) - raise MCPRunNotFoundError( - "No compatible MCP analysis run is available. " - "Call analyze_repository(root='/path/to/repo') or " - "analyze_changed_paths(root='/path/to/repo', changed_paths=[...]) first." - ) - - def _base_findings(self, record: MCPRunRecord) -> list[dict[str, object]]: - report_document = record.report_document - findings = self._as_mapping(report_document.get("findings")) - groups = self._as_mapping(findings.get("groups")) - clone_groups = self._as_mapping(groups.get(FAMILY_CLONES)) - return [ - *self._dict_list(clone_groups.get("functions")), - *self._dict_list(clone_groups.get("blocks")), - *self._dict_list(clone_groups.get("segments")), - *self._dict_list( - self._as_mapping(groups.get(FAMILY_STRUCTURAL)).get("groups") - ), - *self._dict_list( - self._as_mapping(groups.get(FAMILY_DEAD_CODE)).get("groups") - ), - *self._dict_list(self._as_mapping(groups.get(FAMILY_DESIGN)).get("groups")), - ] - - def _query_findings( - self, - *, - record: MCPRunRecord, - family: FindingFamilyFilter = "all", - category: str | None = None, - severity: str | None = None, - source_kind: str | None = None, - novelty: FindingNoveltyFilter = "all", - sort_by: FindingSort = "default", - detail_level: DetailLevel = "normal", - changed_paths: Sequence[str] = (), - exclude_reviewed: bool = False, - ) -> list[dict[str, object]]: - findings = self._base_findings(record) - max_spread_value = max( - (self._spread_value(finding) for finding in findings), - default=0, - ) - with self._state_lock: - self._spread_max_cache[record.run_id] = max_spread_value - filtered = [ - finding - for finding in findings - if self._matches_finding_filters( - finding=finding, - family=family, - category=category, - severity=severity, - source_kind=source_kind, - novelty=novelty, - ) - and ( - not changed_paths - or self._finding_touches_paths( - finding=finding, - changed_paths=changed_paths, - ) - ) - and (not exclude_reviewed or not self._finding_is_reviewed(record, finding)) - ] - remediation_map = { - str(finding.get("id", "")): self._remediation_for_finding(record, finding) - for finding in filtered - } - priority_map = { - str(finding.get("id", "")): self._priority_score( - record, - finding, - remediation=remediation_map[str(finding.get("id", ""))], - max_spread_value=max_spread_value, - ) - for finding in filtered - } - ordered = self._sort_findings( - record=record, - findings=filtered, - sort_by=sort_by, - priority_map=priority_map, - ) - return [ - self._decorate_finding( - record, - finding, - detail_level=detail_level, - remediation=remediation_map[str(finding.get("id", ""))], - priority_payload=priority_map[str(finding.get("id", ""))], - max_spread_value=max_spread_value, - ) - for finding in ordered - ] - - def _sort_findings( - self, - *, - record: MCPRunRecord, - findings: Sequence[Mapping[str, object]], - sort_by: FindingSort, - priority_map: Mapping[str, Mapping[str, object]] | None = None, - ) -> list[dict[str, object]]: - finding_rows = [dict(finding) for finding in findings] - if sort_by == "default": - return finding_rows - if sort_by == "severity": - finding_rows.sort( - key=lambda finding: ( - -self._severity_rank(str(finding.get("severity", ""))), - str(finding.get("id", "")), - ) - ) - elif sort_by == "spread": - finding_rows.sort( - key=lambda finding: ( - -self._spread_value(finding), - -_as_float(finding.get("priority", 0.0), 0.0), - str(finding.get("id", "")), - ) - ) - else: - finding_rows.sort( - key=lambda finding: ( - -_as_float( - self._as_mapping( - (priority_map or {}).get(str(finding.get("id", ""))) - ).get("score", 0.0), - 0.0, - ) - if priority_map is not None - else -_as_float( - self._priority_score(record, finding)["score"], - 0.0, - ), - -self._severity_rank(str(finding.get("severity", ""))), - str(finding.get("id", "")), - ) - ) - return finding_rows - - def _decorate_finding( - self, - record: MCPRunRecord, - finding: Mapping[str, object], - *, - detail_level: DetailLevel, - remediation: Mapping[str, object] | None = None, - priority_payload: Mapping[str, object] | None = None, - max_spread_value: int | None = None, - ) -> dict[str, object]: - resolved_remediation = ( - remediation - if remediation is not None - else self._remediation_for_finding(record, finding) - ) - resolved_priority_payload = ( - dict(priority_payload) - if priority_payload is not None - else self._priority_score( - record, - finding, - remediation=resolved_remediation, - max_spread_value=max_spread_value, - ) - ) - payload = dict(finding) - payload["priority_score"] = resolved_priority_payload["score"] - payload["priority_factors"] = resolved_priority_payload["factors"] - payload["locations"] = self._locations_for_finding( - record, - finding, - include_uri=detail_level == "full", - ) - payload["html_anchor"] = f"finding-{finding.get('id', '')}" - if resolved_remediation is not None: - payload["remediation"] = resolved_remediation - return self._project_finding_detail( - record, - payload, - detail_level=detail_level, - ) - - def _project_finding_detail( - self, - record: MCPRunRecord, - finding: Mapping[str, object], - *, - detail_level: DetailLevel, - ) -> dict[str, object]: - if detail_level == "full": - full_payload = dict(finding) - full_payload["id"] = self._short_finding_id( - record, - str(finding.get("id", "")), - ) - return full_payload - payload: dict[str, object] = { - "id": self._short_finding_id(record, str(finding.get("id", ""))), - "kind": self._finding_kind_label(finding), - "severity": str(finding.get("severity", "")), - "novelty": str(finding.get("novelty", "")), - "scope": self._finding_source_kind(finding), - "count": _as_int(finding.get("count", 0), 0), - "spread": dict(self._as_mapping(finding.get("spread"))), - "priority": round(_as_float(finding.get("priority_score", 0.0), 0.0), 2), - } - clone_type = str(finding.get("clone_type", "")).strip() - if clone_type: - payload["type"] = clone_type - locations = [ - self._as_mapping(item) - for item in self._as_sequence(finding.get("locations")) - ] - if detail_level == "summary": - remediation = self._as_mapping(finding.get("remediation")) - if remediation: - payload["effort"] = str(remediation.get("effort", "")) - payload["locations"] = [ - summary_location - for summary_location in ( - self._summary_location_string(location) for location in locations - ) - if summary_location - ] - return payload - remediation = self._as_mapping(finding.get("remediation")) - if remediation: - payload["remediation"] = self._project_remediation( - remediation, - detail_level="normal", - ) - payload["locations"] = [ - projected - for projected in ( - self._normal_location_payload(location) for location in locations - ) - if projected - ] - return payload - - def _finding_summary_card( - self, - record: MCPRunRecord, - finding: Mapping[str, object], - ) -> dict[str, object]: - return self._finding_summary_card_payload( - record, - self._decorate_finding(record, finding, detail_level="full"), - ) - - def _finding_summary_card_payload( - self, - record: MCPRunRecord, - finding: Mapping[str, object], - ) -> dict[str, object]: - return self._project_finding_detail(record, finding, detail_level="summary") - - def _comparison_finding_card( - self, - record: MCPRunRecord, - finding: Mapping[str, object], - ) -> dict[str, object]: - summary_card = self._finding_summary_card(record, finding) - return { - "id": summary_card.get("id"), - "kind": summary_card.get("kind"), - "severity": summary_card.get("severity"), - } - - @staticmethod - def _finding_kind_label(finding: Mapping[str, object]) -> str: - family = str(finding.get("family", "")).strip() - kind = str(finding.get("kind", finding.get("category", ""))).strip() - if family == FAMILY_CLONE: - clone_kind = str( - finding.get("clone_kind", finding.get("category", kind)) - ).strip() - return f"{clone_kind}_clone" if clone_kind else "clone" - if family == FAMILY_DEAD_CODE: - return "dead_code" - return kind or family - - @staticmethod - def _summary_location_string(location: Mapping[str, object]) -> str: - path = str(location.get("file", "")).strip() - line = _as_int(location.get("line", 0), 0) - if not path: - return "" - return f"{path}:{line}" if line > 0 else path - - def _normal_location_payload( - self, - location: Mapping[str, object], - ) -> dict[str, object]: - path = str(location.get("file", "")).strip() - if not path: - return {} - payload: dict[str, object] = { - "path": path, - "line": _as_int(location.get("line", 0), 0), - "end_line": _as_int(location.get("end_line", 0), 0), - } - symbol = self._leaf_symbol_name(location.get("symbol")) - if symbol: - payload["symbol"] = symbol - return payload - - def _matches_finding_filters( - self, - *, - finding: Mapping[str, object], - family: FindingFamilyFilter, - category: str | None = None, - severity: str | None, - source_kind: str | None, - novelty: FindingNoveltyFilter, - ) -> bool: - finding_family = str(finding.get("family", "")).strip() - if family != "all" and finding_family != family: - return False - if ( - category is not None - and str(finding.get("category", "")).strip() != category - ): - return False - if ( - severity is not None - and str(finding.get("severity", "")).strip() != severity - ): - return False - dominant_kind = str( - self._as_mapping(finding.get("source_scope")).get("dominant_kind", "") - ).strip() - if source_kind is not None and dominant_kind != source_kind: - return False - return novelty == "all" or str(finding.get("novelty", "")).strip() == novelty - - def _finding_touches_paths( - self, - *, - finding: Mapping[str, object], - changed_paths: Sequence[str], - ) -> bool: - normalized_paths = tuple(changed_paths) - for item in self._as_sequence(finding.get("items")): - relative_path = str(self._as_mapping(item).get("relative_path", "")).strip() - if relative_path and self._path_matches(relative_path, normalized_paths): - return True - return False - - @staticmethod - def _path_matches(relative_path: str, changed_paths: Sequence[str]) -> bool: - for candidate in changed_paths: - if relative_path == candidate or relative_path.startswith(candidate + "/"): - return True - return False - - def _finding_is_reviewed( - self, - record: MCPRunRecord, - finding: Mapping[str, object], - ) -> bool: - with self._state_lock: - review_map = self._review_state.get(record.run_id, OrderedDict()) - return str(finding.get("id", "")) in review_map - - def _include_hotspot_finding( - self, - *, - record: MCPRunRecord, - finding: Mapping[str, object], - changed_paths: Sequence[str], - exclude_reviewed: bool, - ) -> bool: - if changed_paths and not self._finding_touches_paths( - finding=finding, - changed_paths=changed_paths, - ): - return False - return not exclude_reviewed or not self._finding_is_reviewed(record, finding) - - def _priority_score( - self, - record: MCPRunRecord, - finding: Mapping[str, object], - *, - remediation: Mapping[str, object] | None = None, - max_spread_value: int | None = None, - ) -> dict[str, object]: - spread_weight = self._spread_weight( - record, - finding, - max_spread_value=max_spread_value, - ) - factors = { - "severity_weight": _SEVERITY_WEIGHT.get( - str(finding.get("severity", "")), - 0.2, - ), - "effort_weight": _EFFORT_WEIGHT.get( - ( - str(remediation.get("effort", EFFORT_MODERATE)) - if remediation is not None - else EFFORT_MODERATE - ), - 0.6, - ), - "novelty_weight": _NOVELTY_WEIGHT.get( - str(finding.get("novelty", "")), - 0.7, - ), - "runtime_weight": _RUNTIME_WEIGHT.get( - str( - self._as_mapping(finding.get("source_scope")).get( - "dominant_kind", - "other", - ) - ), - 0.5, - ), - "spread_weight": spread_weight, - "confidence_weight": _CONFIDENCE_WEIGHT.get( - str(finding.get("confidence", CONFIDENCE_MEDIUM)), - 0.7, - ), - } - product = 1.0 - for value in factors.values(): - product *= max(_as_float(value, 0.01), 0.01) - score = product ** (1.0 / max(len(factors), 1)) - return { - "score": round(score, 4), - "factors": { - key: round(_as_float(value, 0.0), 4) for key, value in factors.items() - }, - } - - def _spread_weight( - self, - record: MCPRunRecord, - finding: Mapping[str, object], - *, - max_spread_value: int | None = None, - ) -> float: - spread_value = self._spread_value(finding) - if max_spread_value is None: - with self._state_lock: - max_spread_value = self._spread_max_cache.get(record.run_id) - if max_spread_value is None: - max_spread_value = max( - (self._spread_value(item) for item in self._base_findings(record)), - default=0, - ) - with self._state_lock: - self._spread_max_cache[record.run_id] = max_spread_value - max_value = max_spread_value - if max_value <= 0: - return 0.3 - return max(0.2, min(1.0, spread_value / max_value)) - - def _spread_value(self, finding: Mapping[str, object]) -> int: - spread = self._as_mapping(finding.get("spread")) - files = _as_int(spread.get("files", 0), 0) - functions = _as_int(spread.get("functions", 0), 0) - count = _as_int(finding.get("count", 0), 0) - return max(files, functions, count, 1) - - def _locations_for_finding( - self, - record: MCPRunRecord, - finding: Mapping[str, object], - *, - include_uri: bool = True, - ) -> list[dict[str, object]]: - locations: list[dict[str, object]] = [] - for item in self._as_sequence(finding.get("items")): - item_map = self._as_mapping(item) - relative_path = str(item_map.get("relative_path", "")).strip() - if not relative_path: - continue - line = _as_int(item_map.get("start_line", 0) or 0, 0) - end_line = _as_int(item_map.get("end_line", 0) or 0, 0) - symbol = str(item_map.get("qualname", item_map.get("module", ""))).strip() - location: dict[str, object] = { - "file": relative_path, - "line": line, - "end_line": end_line, - "symbol": symbol, - } - if include_uri: - absolute_path = (record.root / relative_path).resolve() - uri = absolute_path.as_uri() - if line > 0: - uri = f"{uri}#L{line}" - location["uri"] = uri - locations.append(location) - deduped: list[dict[str, object]] = [] - seen: set[tuple[str, int, str]] = set() - for location in locations: - key = ( - str(location.get("file", "")), - _as_int(location.get("line", 0), 0), - str(location.get("symbol", "")), - ) - if key not in seen: - seen.add(key) - deduped.append(location) - return deduped - - @staticmethod - def _suggestion_finding_id(suggestion: object) -> str: - return _suggestion_finding_id_payload(suggestion) - - def _remediation_for_finding( - self, - record: MCPRunRecord, - finding: Mapping[str, object], - ) -> dict[str, object] | None: - suggestion = self._suggestion_for_finding(record, str(finding.get("id", ""))) - if suggestion is None: - return None - source_kind = str(getattr(suggestion, "source_kind", "other")) - spread_files = _as_int(getattr(suggestion, "spread_files", 0), 0) - spread_functions = _as_int(getattr(suggestion, "spread_functions", 0), 0) - title = str(getattr(suggestion, "title", "")).strip() - severity = str(finding.get("severity", "")).strip() - novelty = str(finding.get("novelty", "known")).strip() - count = _as_int( - getattr(suggestion, "fact_count", 0) or finding.get("count", 0) or 0, - 0, - ) - safe_refactor_shape = self._safe_refactor_shape(suggestion) - effort = str(getattr(suggestion, "effort", EFFORT_MODERATE)) - confidence = str(getattr(suggestion, "confidence", CONFIDENCE_MEDIUM)) - risk_level = self._risk_level_for_effort(effort) - return { - "effort": effort, - "priority": _as_float(getattr(suggestion, "priority", 0.0), 0.0), - "confidence": confidence, - "safe_refactor_shape": safe_refactor_shape, - "steps": list(getattr(suggestion, "steps", ())), - "risk_level": risk_level, - "why_now": self._why_now_text( - title=title, - severity=severity, - novelty=novelty, - count=count, - source_kind=source_kind, - spread_files=spread_files, - spread_functions=spread_functions, - effort=effort, - ), - "blast_radius": { - "files": spread_files, - "functions": spread_functions, - "is_production": source_kind == "production", - }, - } - - def _suggestion_for_finding( - self, - record: MCPRunRecord, - finding_id: str, - ) -> object | None: - for suggestion in record.suggestions: - if self._suggestion_finding_id(suggestion) == finding_id: - return suggestion - return None - - @staticmethod - def _safe_refactor_shape(suggestion: object) -> str: - category = str(getattr(suggestion, "category", "")).strip() - clone_type = str(getattr(suggestion, "clone_type", "")).strip() - title = str(getattr(suggestion, "title", "")).strip() - if category == CATEGORY_CLONE and clone_type == "Type-1": - return "Keep one canonical implementation and route callers through it." - if category == CATEGORY_CLONE and clone_type == "Type-2": - return "Extract shared implementation with explicit parameters." - if category == CATEGORY_CLONE and "Block" in title: - return "Extract the repeated statement sequence into a helper." - if category == CATEGORY_STRUCTURAL: - return "Extract the repeated branch family into a named helper." - if category == CATEGORY_COMPLEXITY: - return "Split the function into smaller named steps." - if category == CATEGORY_COUPLING: - return "Isolate responsibilities and invert unnecessary dependencies." - if category == CATEGORY_COHESION: - return "Split the class by responsibility boundary." - if category == CATEGORY_DEAD_CODE: - return "Delete the unused symbol or document intentional reachability." - if category == CATEGORY_DEPENDENCY: - return "Break the cycle by moving shared abstractions to a lower layer." - return "Extract the repeated logic into a shared, named abstraction." - - @staticmethod - def _risk_level_for_effort(effort: str) -> str: - return { - EFFORT_EASY: "low", - EFFORT_MODERATE: "medium", - EFFORT_HARD: "high", - }.get(effort, "medium") - - @staticmethod - def _why_now_text( - *, - title: str, - severity: str, - novelty: str, - count: int, - source_kind: str, - spread_files: int, - spread_functions: int, - effort: str, - ) -> str: - novelty_text = "new regression" if novelty == "new" else "known debt" - context = ( - "production code" - if source_kind == "production" - else source_kind or "mixed scope" - ) - spread_text = f"{spread_files} files / {spread_functions} functions" - count_text = f"{count} instances" if count > 0 else "localized issue" - return ( - f"{severity.upper()} {title} in {context} — {count_text}, " - f"{spread_text}, {effort} fix, {novelty_text}." - ) - - def _project_remediation( - self, - remediation: Mapping[str, object], - *, - detail_level: DetailLevel, - ) -> dict[str, object]: - if detail_level == "full": - return dict(remediation) - projected = { - "effort": remediation.get("effort"), - "risk": remediation.get("risk_level"), - "shape": remediation.get("safe_refactor_shape"), - "why_now": remediation.get("why_now"), - } - if detail_level == "summary": - return projected - projected["steps"] = list(self._as_sequence(remediation.get("steps"))) - return projected - - def _hotspot_rows( - self, - *, - record: MCPRunRecord, - kind: HotlistKind, - detail_level: DetailLevel, - changed_paths: Sequence[str], - exclude_reviewed: bool, - ) -> list[dict[str, object]]: - findings = self._base_findings(record) - finding_index = {str(finding.get("id", "")): finding for finding in findings} - max_spread_value = max( - (self._spread_value(finding) for finding in findings), - default=0, - ) - with self._state_lock: - self._spread_max_cache[record.run_id] = max_spread_value - remediation_map = { - str(finding.get("id", "")): self._remediation_for_finding(record, finding) - for finding in findings - } - priority_map = { - str(finding.get("id", "")): self._priority_score( - record, - finding, - remediation=remediation_map[str(finding.get("id", ""))], - max_spread_value=max_spread_value, - ) - for finding in findings - } - derived = self._as_mapping(record.report_document.get("derived")) - hotlists = self._as_mapping(derived.get("hotlists")) - if kind == "highest_priority": - ordered_ids = [ - str(finding.get("id", "")) - for finding in self._sort_findings( - record=record, - findings=findings, - sort_by="priority", - priority_map=priority_map, - ) - ] - else: - hotlist_key = _HOTLIST_REPORT_KEYS.get(kind) - if hotlist_key is None: - return [] - ordered_ids = [ - str(item) - for item in self._as_sequence(hotlists.get(hotlist_key)) - if str(item) - ] - rows: list[dict[str, object]] = [] - for finding_id in ordered_ids: - finding = finding_index.get(finding_id) - if finding is None or not self._include_hotspot_finding( - record=record, - finding=finding, - changed_paths=changed_paths, - exclude_reviewed=exclude_reviewed, - ): - continue - finding_id_key = str(finding.get("id", "")) - rows.append( - self._decorate_finding( - record, - finding, - detail_level=detail_level, - remediation=remediation_map[finding_id_key], - priority_payload=priority_map[finding_id_key], - max_spread_value=max_spread_value, - ) - ) - return rows - - def _build_changed_projection( - self, - record: MCPRunRecord, - ) -> dict[str, object] | None: - if not record.changed_paths: - return None - items = self._query_findings( - record=record, - detail_level="summary", - changed_paths=record.changed_paths, - ) - new_count = sum(1 for item in items if str(item.get("novelty", "")) == "new") - known_count = sum( - 1 for item in items if str(item.get("novelty", "")) == "known" - ) - new_by_source_kind = self._source_kind_breakdown( - item.get("source_kind") - for item in items - if str(item.get("novelty", "")) == "new" - ) - health_delta = self._summary_health_delta(record.summary) - return { - "run_id": self._short_run_id(record.run_id), - "changed_paths": list(record.changed_paths), - "total": len(items), - "new": new_count, - "known": known_count, - "new_by_source_kind": new_by_source_kind, - "items": items, - "health": dict(self._summary_health_payload(record.summary)), - "health_delta": health_delta, - "verdict": self._changed_verdict( - changed_projection={"new": new_count, "total": len(items)}, - health_delta=health_delta, - ), - } - - def _changed_analysis_payload( - self, - record: MCPRunRecord, - ) -> dict[str, object]: - changed_projection = self._as_mapping(record.changed_projection) - health = self._summary_health_payload(record.summary) - health_payload = ( - { - "score": health.get("score"), - "grade": health.get("grade"), - } - if health.get("available") is not False - else dict(health) - ) - return { - "run_id": self._short_run_id(record.run_id), - "focus": _FOCUS_CHANGED_PATHS, - "health_scope": _HEALTH_SCOPE_REPOSITORY, - "baseline": dict(self._summary_baseline_payload(record.summary)), - "changed_files": len(record.changed_paths), - "health": health_payload, - "analysis_profile": self._summary_analysis_profile_payload(record.summary), - "health_delta": ( - _as_int(changed_projection.get("health_delta", 0), 0) - if changed_projection.get("health_delta") is not None - else None - ), - "verdict": str(changed_projection.get("verdict", "stable")), - "new_findings": _as_int(changed_projection.get("new", 0), 0), - "new_by_source_kind": dict( - self._as_mapping(changed_projection.get("new_by_source_kind")) - ), - "resolved_findings": 0, - "changed_findings": [], - "coverage_join": self._summary_coverage_join_payload(record), - } - - def _augment_summary_with_changed( - self, - *, - summary: Mapping[str, object], - changed_paths: Sequence[str], - changed_projection: Mapping[str, object] | None, - ) -> dict[str, object]: - payload = dict(summary) - if changed_paths: - payload["changed_paths"] = list(changed_paths) - if changed_projection is not None: - payload["changed_findings"] = { - "total": _as_int(changed_projection.get("total", 0), 0), - "new": _as_int(changed_projection.get("new", 0), 0), - "known": _as_int(changed_projection.get("known", 0), 0), - "items": [ - dict(self._as_mapping(item)) - for item in self._as_sequence(changed_projection.get("items"))[:10] - ], - } - payload["health_delta"] = ( - _as_int(changed_projection.get("health_delta", 0), 0) - if changed_projection.get("health_delta") is not None - else None - ) - payload["verdict"] = str(changed_projection.get("verdict", "stable")) - return payload - - @staticmethod - def _changed_verdict( - *, - changed_projection: Mapping[str, object], - health_delta: int | None, - ) -> str: - if _as_int(changed_projection.get("new", 0), 0) > 0 or ( - health_delta is not None and health_delta < 0 - ): - return "regressed" - if ( - _as_int(changed_projection.get("total", 0), 0) == 0 - and health_delta is not None - and health_delta > 0 - ): - return "improved" - return "stable" - - def _comparison_index( - self, - record: MCPRunRecord, - *, - focus: ComparisonFocus, - ) -> dict[str, dict[str, object]]: - findings = self._base_findings(record) - if focus == "clones": - findings = [f for f in findings if str(f.get("family", "")) == FAMILY_CLONE] - elif focus == "structural": - findings = [ - f for f in findings if str(f.get("family", "")) == FAMILY_STRUCTURAL - ] - elif focus == "metrics": - findings = [ - f - for f in findings - if str(f.get("family", "")) in {FAMILY_DESIGN, FAMILY_DEAD_CODE} - ] - return {str(finding.get("id", "")): dict(finding) for finding in findings} - - @staticmethod - def _comparison_verdict( - *, - regressions: int, - improvements: int, - health_delta: int | None, - ) -> str: - has_negative_signal = regressions > 0 or ( - health_delta is not None and health_delta < 0 - ) - has_positive_signal = improvements > 0 or ( - health_delta is not None and health_delta > 0 - ) - if has_negative_signal and has_positive_signal: - return "mixed" - if has_negative_signal: - return "regressed" - if has_positive_signal: - return "improved" - return "stable" - - @staticmethod - def _comparison_summary_text( - *, - comparable: bool, - comparability_reason: str, - regressions: int, - improvements: int, - health_delta: int | None, - ) -> str: - if not comparable: - reason_text = { - "different_root": "different roots", - "different_analysis_settings": "different analysis settings", - "different_root_and_analysis_settings": ( - "different roots and analysis settings" - ), - }.get(comparability_reason, "incomparable runs") - return f"Finding and run health deltas omitted ({reason_text})" - if health_delta is None: - return ( - f"{improvements} findings resolved, {regressions} new regressions; " - "run health delta omitted (metrics unavailable)" - ) - return ( - f"{improvements} findings resolved, {regressions} new regressions, " - f"run health delta {health_delta:+d}" - ) - - def _render_pr_summary_markdown(self, payload: Mapping[str, object]) -> str: - health = self._as_mapping(payload.get("health")) - score = health.get("score", "n/a") - grade = health.get("grade", "n/a") - delta = _as_int(payload.get("health_delta", 0), 0) - changed_items = [ - self._as_mapping(item) - for item in self._as_sequence(payload.get("new_findings_in_changed_files")) - ] - resolved = [ - self._as_mapping(item) - for item in self._as_sequence(payload.get("resolved")) - ] - blocking_gates = [ - str(item) - for item in self._as_sequence(payload.get("blocking_gates")) - if str(item) - ] - health_line = ( - f"Health: {score}/100 ({grade}) | Delta: {delta:+d} | " - f"Verdict: {payload.get('verdict', 'stable')}" - if payload.get("health_delta") is not None - else ( - f"Health: {score}/100 ({grade}) | Delta: n/a | " - f"Verdict: {payload.get('verdict', 'stable')}" - ) - ) - lines = [ - "## CodeClone Summary", - "", - health_line, - "", - f"### New findings in changed files ({len(changed_items)})", - ] - if not changed_items: - lines.append("- None") - else: - lines.extend( - [ - ( - f"- **{str(item.get('severity', 'info')).upper()}** " - f"{item.get('kind', 'finding')} in " - f"`{self._finding_display_location(item)}`" - ) - for item in changed_items[:10] - ] - ) - lines.extend(["", f"### Resolved ({len(resolved)})"]) - if not resolved: - lines.append("- None") - else: - lines.extend( - [ - ( - f"- {item.get('kind', 'finding')} in " - f"`{self._finding_display_location(item)}`" - ) - for item in resolved[:10] - ] - ) - lines.extend(["", "### Blocking gates"]) - if not blocking_gates: - lines.append("- none") - else: - lines.extend([f"- `{reason}`" for reason in blocking_gates]) - return "\n".join(lines) - - def _finding_display_location(self, finding: Mapping[str, object]) -> str: - locations = self._as_sequence(finding.get("locations")) - if not locations: - return "(unknown)" - first = locations[0] - if isinstance(first, str): - return first - location = self._as_mapping(first) - path = str(location.get("path", location.get("file", ""))).strip() - line = _as_int(location.get("line", 0), 0) - if not path: - return "(unknown)" - return f"{path}:{line}" if line > 0 else path - - def _granular_payload( - self, - *, - record: MCPRunRecord, - check: str, - items: Sequence[Mapping[str, object]], - detail_level: DetailLevel, - max_results: int, - path: str | None, - threshold_context: Mapping[str, object] | None = None, - ) -> dict[str, object]: - bounded_items = [dict(item) for item in items[: max(1, max_results)]] - full_health = dict(self._as_mapping(record.summary.get("health"))) - dimensions = self._as_mapping(full_health.get("dimensions")) - relevant_dimension = _CHECK_TO_DIMENSION.get(check) - slim_dimensions = ( - {relevant_dimension: dimensions.get(relevant_dimension)} - if relevant_dimension and relevant_dimension in dimensions - else dict(dimensions) - ) - payload: dict[str, object] = { - "run_id": self._short_run_id(record.run_id), - "check": check, - "detail_level": detail_level, - "path": path, - "returned": len(bounded_items), - "total": len(items), - "health": { - "score": full_health.get("score"), - "grade": full_health.get("grade"), - "dimensions": slim_dimensions, - }, - "items": bounded_items, - } - if threshold_context: - payload["threshold_context"] = dict(threshold_context) - return payload - - def _design_threshold_context( - self, - *, - record: MCPRunRecord, - check: str, - path: str | None, - items: Sequence[Mapping[str, object]], - requested_min: int | None = None, - ) -> dict[str, object] | None: - if items: - return None - spec = _DESIGN_CHECK_CONTEXT.get(check) - if spec is None: - return None - category = str(spec["category"]) - metric = str(spec["metric"]) - operator = str(spec["operator"]) - normalized_path = self._normalize_relative_path(path or "") - metrics = self._as_mapping(record.report_document.get("metrics")) - families = self._as_mapping(metrics.get("families")) - family = self._as_mapping(families.get(category)) - metric_items = [ - self._as_mapping(item) - for item in self._as_sequence(family.get("items")) - if not normalized_path - or self._metric_item_matches_path( - self._as_mapping(item), - normalized_path, - ) - ] - if not metric_items: - return None - values = [_as_int(item.get(metric), 0) for item in metric_items] - finding_threshold = self._design_finding_threshold( - record=record, - check=check, - ) - threshold = finding_threshold - threshold_kind = "finding_threshold" - if requested_min is not None and requested_min > finding_threshold: - threshold = requested_min - threshold_kind = "requested_min" - highest_below = self._highest_below_threshold( - values=values, - operator=operator, - threshold=threshold, - ) - payload: dict[str, object] = { - "metric": metric, - "threshold": threshold, - "threshold_kind": threshold_kind, - "measured_units": len(metric_items), - } - if threshold_kind != "finding_threshold": - payload["finding_threshold"] = finding_threshold - if highest_below is not None: - payload["highest_below_threshold"] = highest_below - return payload - - def _design_finding_threshold( - self, - *, - record: MCPRunRecord, - check: str, - ) -> int: - spec = _DESIGN_CHECK_CONTEXT[check] - category = str(spec["category"]) - default_threshold = _as_int(spec["default_threshold"]) - findings = self._as_mapping(record.report_document.get("findings")) - thresholds = self._as_mapping( - self._as_mapping(findings.get("thresholds")).get("design_findings") - ) - threshold_payload = self._as_mapping(thresholds.get(category)) - if threshold_payload: - return _as_int(threshold_payload.get("value"), default_threshold) - request_value = { - "complexity": record.request.complexity_threshold, - "coupling": record.request.coupling_threshold, - "cohesion": record.request.cohesion_threshold, - }.get(check) - return _as_int(request_value, default_threshold) - - @staticmethod - def _highest_below_threshold( - *, - values: Sequence[int], - operator: str, - threshold: int, - ) -> int | None: - if operator == ">": - below = [value for value in values if value <= threshold] - elif operator == ">=": - below = [value for value in values if value < threshold] - else: - return None - if not below: - return None - return max(below) - - @staticmethod - def _normalized_source_kind(value: object) -> str: - normalized = str(value).strip().lower() - if normalized in SOURCE_KIND_ORDER: - return normalized - return SOURCE_KIND_OTHER - - def _finding_source_kind(self, finding: Mapping[str, object]) -> str: - source_scope = self._as_mapping(finding.get("source_scope")) - return self._normalized_source_kind(source_scope.get("dominant_kind")) - - def _source_kind_breakdown( - self, - source_kinds: Iterable[object], - ) -> dict[str, int]: - breakdown = dict.fromkeys(_SOURCE_KIND_BREAKDOWN_ORDER, 0) - for value in source_kinds: - breakdown[self._normalized_source_kind(value)] += 1 - return breakdown - - def _triage_suggestion_rows(self, record: MCPRunRecord) -> list[dict[str, object]]: - derived = self._as_mapping(record.report_document.get("derived")) - canonical_rows = self._dict_list(derived.get("suggestions")) - suggestion_source_kinds = { - self._suggestion_finding_id(suggestion): self._normalized_source_kind( - getattr(suggestion, "source_kind", SOURCE_KIND_OTHER) - ) - for suggestion in record.suggestions - } - rows: list[dict[str, object]] = [] - for row in canonical_rows: - canonical_finding_id = str(row.get("finding_id", "")) - action = self._as_mapping(row.get("action")) - try: - finding_id = self._short_finding_id( - record, - self._resolve_canonical_finding_id(record, canonical_finding_id), - ) - except MCPFindingNotFoundError: - finding_id = self._base_short_finding_id(canonical_finding_id) - rows.append( - { - "id": f"suggestion:{finding_id}", - "finding_id": finding_id, - "title": str(row.get("title", "")), - "summary": str(row.get("summary", "")), - "effort": str(action.get("effort", "")), - "steps": list(self._as_sequence(action.get("steps"))), - "source_kind": suggestion_source_kinds.get( - canonical_finding_id, - SOURCE_KIND_OTHER, - ), - } - ) - return rows - - def _derived_section_payload(self, record: MCPRunRecord) -> dict[str, object]: - derived = self._as_mapping(record.report_document.get("derived")) - if not derived: - raise MCPServiceContractError( - "Report section 'derived' is not available in this run." - ) - suggestions = self._triage_suggestion_rows(record) - canonical_to_short, _ = self._finding_id_maps(record) - hotlists = self._as_mapping(derived.get("hotlists")) - projected_hotlists: dict[str, list[str]] = {} - for hotlist_key, hotlist_ids in hotlists.items(): - projected_hotlists[hotlist_key] = [ - canonical_to_short.get( - str(finding_id), - self._base_short_finding_id(str(finding_id)), - ) - for finding_id in self._as_sequence(hotlist_ids) - if str(finding_id) - ] - return { - "suggestions": suggestions, - "hotlists": projected_hotlists, - } - - @staticmethod - def _schema_resource_payload() -> dict[str, object]: - return { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "CodeCloneCanonicalReport", - "type": "object", - "required": [ - "report_schema_version", - "meta", - "inventory", - "findings", - "derived", - "integrity", - ], - "properties": { - "report_schema_version": { - "type": "string", - "const": REPORT_SCHEMA_VERSION, - }, - "meta": {"type": "object"}, - "inventory": {"type": "object"}, - "findings": {"type": "object"}, - "metrics": {"type": "object"}, - "derived": {"type": "object"}, - "integrity": {"type": "object"}, - }, - } - - def _validate_analysis_request(self, request: MCPAnalysisRequest) -> None: - self._validate_choice( - "analysis_mode", - request.analysis_mode, - _VALID_ANALYSIS_MODES, - ) - self._validate_choice( - "cache_policy", - request.cache_policy, - _VALID_CACHE_POLICIES, - ) - if request.cache_policy == "refresh": - raise MCPServiceContractError( - "cache_policy='refresh' is not supported by the read-only " - "CodeClone MCP server. Use 'reuse' or 'off'." - ) - if request.analysis_mode == "clones_only" and request.coverage_xml is not None: - raise MCPServiceContractError( - "coverage_xml requires analysis_mode='full' because coverage join " - "depends on metrics-enabled analysis." - ) - - @staticmethod - def _validate_choice( - name: str, - value: ChoiceT, - allowed: Sequence[str] | frozenset[str], - ) -> ChoiceT: - if value not in allowed: - allowed_list = ", ".join(sorted(allowed)) - raise MCPServiceContractError( - f"Invalid value for {name}: {value!r}. Expected one of: {allowed_list}." - ) - return value - - def _validate_optional_choice( - self, - name: str, - value: ChoiceT | None, - allowed: Sequence[str] | frozenset[str], - ) -> ChoiceT | None: - if value is None: - return None - return self._validate_choice(name, value, allowed) - - @staticmethod - def _metrics_detail_family(value: str | None) -> MetricsDetailFamily | None: - match value: - case "complexity": - return "complexity" - case "coupling": - return "coupling" - case "cohesion": - return "cohesion" - case "coverage_adoption": - return "coverage_adoption" - case "coverage_join": - return "coverage_join" - case "dependencies": - return "dependencies" - case "dead_code": - return "dead_code" - case "api_surface": - return "api_surface" - case "god_modules" | "overloaded_modules": - return "overloaded_modules" - case "health": - return "health" - case _: - return None - - @staticmethod - def _dict_rows(value: object) -> list[dict[str, object]]: - if not isinstance(value, Sequence) or isinstance( - value, - (str, bytes, bytearray), - ): - return [] - return [dict(item) for item in value if isinstance(item, Mapping)] - - @staticmethod - def _string_rows(value: object) -> list[str]: - if not isinstance(value, Sequence) or isinstance( - value, - (str, bytes, bytearray), - ): - return [] - return [str(item) for item in value if isinstance(item, str)] - - @staticmethod - def _resolve_root(root: str | None) -> Path: - cleaned_root = "" if root is None else str(root).strip() - if not cleaned_root: - raise MCPServiceContractError( - "MCP analysis requires an absolute repository root. " - "Omitted or relative roots are unsafe because the MCP server " - "working directory may not match the client workspace." - ) - candidate = Path(cleaned_root).expanduser() - if not candidate.is_absolute(): - raise MCPServiceContractError( - f"MCP requires an absolute repository root; got relative root " - f"{cleaned_root!r}. Relative roots like '.' are unsafe because " - "the MCP server working directory may not match the client " - "workspace." - ) - try: - root_path = candidate.resolve() - except OSError as exc: - raise MCPServiceContractError( - f"Invalid root path '{cleaned_root}': {exc}" - ) from exc - if not root_path.exists(): - raise MCPServiceContractError(f"Root path does not exist: {root_path}") - if not root_path.is_dir(): - raise MCPServiceContractError(f"Root path is not a directory: {root_path}") - return root_path - - def _resolve_optional_root(self, root: str | None) -> Path | None: - cleaned_root = "" if root is None else str(root).strip() - if not cleaned_root: - return None - return self._resolve_root(cleaned_root) - - def _build_args(self, *, root_path: Path, request: MCPAnalysisRequest) -> Namespace: - args = Namespace( - root=str(root_path), - min_loc=DEFAULT_MIN_LOC, - min_stmt=DEFAULT_MIN_STMT, - block_min_loc=DEFAULT_BLOCK_MIN_LOC, - block_min_stmt=DEFAULT_BLOCK_MIN_STMT, - segment_min_loc=DEFAULT_SEGMENT_MIN_LOC, - segment_min_stmt=DEFAULT_SEGMENT_MIN_STMT, - processes=None, - cache_path=None, - max_cache_size_mb=DEFAULT_MAX_CACHE_SIZE_MB, - baseline=DEFAULT_BASELINE_PATH, - max_baseline_size_mb=DEFAULT_MAX_BASELINE_SIZE_MB, - update_baseline=False, - fail_on_new=False, - fail_threshold=-1, - ci=False, - fail_complexity=-1, - fail_coupling=-1, - fail_cohesion=-1, - fail_cycles=False, - fail_dead_code=False, - fail_health=-1, - fail_on_new_metrics=False, - fail_on_typing_regression=False, - fail_on_docstring_regression=False, - fail_on_api_break=False, - min_typing_coverage=-1, - min_docstring_coverage=-1, - api_surface=False, - coverage_xml=None, - fail_on_untested_hotspots=False, - coverage_min=DEFAULT_COVERAGE_MIN, - design_complexity_threshold=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD, - design_coupling_threshold=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD, - design_cohesion_threshold=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD, - update_metrics_baseline=False, - metrics_baseline=DEFAULT_BASELINE_PATH, - skip_metrics=False, - skip_dead_code=False, - skip_dependencies=False, - golden_fixture_paths=(), - html_out=None, - json_out=None, - md_out=None, - sarif_out=None, - text_out=None, - no_progress=True, - no_color=True, - quiet=True, - verbose=False, - debug=False, - open_html_report=False, - timestamped_report_paths=False, - ) - if request.respect_pyproject: - try: - config_values = load_pyproject_config(root_path) - except ConfigValidationError as exc: - raise MCPServiceContractError(str(exc)) from exc - for key in sorted(_MCP_CONFIG_KEYS.intersection(config_values)): - setattr(args, key, config_values[key]) - - self._apply_request_overrides(args=args, root_path=root_path, request=request) - - if request.analysis_mode == "clones_only": - args.skip_metrics = True - args.skip_dead_code = True - args.skip_dependencies = True - else: - args.skip_metrics = False - args.skip_dead_code = False - args.skip_dependencies = False - - if not validate_numeric_args(args): - raise MCPServiceContractError( - "Numeric analysis settings must be non-negative and thresholds " - "must be >= -1. Coverage thresholds must be between 0 and 100." - ) - - return args - - def _apply_request_overrides( - self, - *, - args: Namespace, - root_path: Path, - request: MCPAnalysisRequest, - ) -> None: - override_map: dict[str, object | None] = { - "processes": request.processes, - "min_loc": request.min_loc, - "min_stmt": request.min_stmt, - "block_min_loc": request.block_min_loc, - "block_min_stmt": request.block_min_stmt, - "segment_min_loc": request.segment_min_loc, - "segment_min_stmt": request.segment_min_stmt, - "api_surface": request.api_surface, - "coverage_min": request.coverage_min, - "max_baseline_size_mb": request.max_baseline_size_mb, - "max_cache_size_mb": request.max_cache_size_mb, - "design_complexity_threshold": request.complexity_threshold, - "design_coupling_threshold": request.coupling_threshold, - "design_cohesion_threshold": request.cohesion_threshold, - } - for key, value in override_map.items(): - if value is not None: - setattr(args, key, value) - - if request.baseline_path is not None: - args.baseline = str( - self._resolve_optional_path(request.baseline_path, root_path) - ) - if request.metrics_baseline_path is not None: - args.metrics_baseline = str( - self._resolve_optional_path(request.metrics_baseline_path, root_path) - ) - if request.cache_path is not None: - args.cache_path = str( - self._resolve_optional_path(request.cache_path, root_path) - ) - if request.coverage_xml is not None: - args.coverage_xml = str( - self._resolve_optional_path(request.coverage_xml, root_path) - ) - - @staticmethod - def _resolve_optional_path(value: str, root_path: Path) -> Path: - candidate = Path(value).expanduser() - resolved = candidate if candidate.is_absolute() else root_path / candidate - try: - return resolved.resolve() - except OSError as exc: - raise MCPServiceContractError( - f"Invalid path '{value}' relative to '{root_path}': {exc}" - ) from exc - - def _resolve_baseline_inputs( - self, - *, - root_path: Path, - args: Namespace, - ) -> tuple[Path, bool, Path, bool, dict[str, object] | None]: - baseline_path = self._resolve_optional_path(str(args.baseline), root_path) - baseline_exists = baseline_path.exists() - - metrics_baseline_arg_path = self._resolve_optional_path( - str(args.metrics_baseline), - root_path, - ) - shared_baseline_payload: dict[str, object] | None = None - if metrics_baseline_arg_path == baseline_path: - probe = probe_metrics_baseline_section(metrics_baseline_arg_path) - metrics_baseline_exists = probe.has_metrics_section - shared_baseline_payload = probe.payload - else: - metrics_baseline_exists = metrics_baseline_arg_path.exists() - - return ( - baseline_path, - baseline_exists, - metrics_baseline_arg_path, - metrics_baseline_exists, - shared_baseline_payload, - ) - - @staticmethod - def _resolve_cache_path(*, root_path: Path, args: Namespace) -> Path: - return resolve_cache_path( - root_path=root_path, - args=args, - from_args=bool(args.cache_path), - legacy_cache_path=_LEGACY_CACHE_PATH, - console=_BufferConsole(), - ) - - @staticmethod - def _build_cache( - *, - root_path: Path, - args: Namespace, - cache_path: Path, - policy: CachePolicy, - ) -> Cache: - cache = Cache( - cache_path, - root=root_path, - max_size_bytes=_as_int(args.max_cache_size_mb, 0) * 1024 * 1024, - min_loc=_as_int(args.min_loc, DEFAULT_MIN_LOC), - min_stmt=_as_int(args.min_stmt, DEFAULT_MIN_STMT), - block_min_loc=_as_int(args.block_min_loc, DEFAULT_BLOCK_MIN_LOC), - block_min_stmt=_as_int(args.block_min_stmt, DEFAULT_BLOCK_MIN_STMT), - segment_min_loc=_as_int(args.segment_min_loc, DEFAULT_SEGMENT_MIN_LOC), - segment_min_stmt=_as_int( - args.segment_min_stmt, - DEFAULT_SEGMENT_MIN_STMT, - ), - collect_api_surface=bool(getattr(args, "api_surface", False)), - ) - if policy != "off": - cache.load() - return cache - - @staticmethod - def _metrics_computed(analysis_mode: AnalysisMode) -> tuple[str, ...]: - return ( - () - if analysis_mode == "clones_only" - else ( - "complexity", - "coupling", - "cohesion", - "health", - "dependencies", - "dead_code", - ) - ) - - @staticmethod - def _load_report_document(report_json: str) -> dict[str, object]: - return _load_report_document_payload(report_json) - - def _report_digest(self, report_document: Mapping[str, object]) -> str: - integrity = self._as_mapping(report_document.get("integrity")) - digest = self._as_mapping(integrity.get("digest")) - value = digest.get("value") - if not isinstance(value, str) or not value: - raise MCPServiceError("Canonical report digest is missing.") - return value - - def _build_run_summary_payload( - self, - *, - run_id: str, - root_path: Path, - request: MCPAnalysisRequest, - report_document: Mapping[str, object], - baseline_state: CloneBaselineState, - metrics_baseline_state: MetricsBaselineState, - cache_status: CacheStatus, - new_func: Sequence[str] | set[str], - new_block: Sequence[str] | set[str], - metrics_diff: MetricsDiff | None, - warnings: Sequence[str], - failures: Sequence[str], - ) -> dict[str, object]: - meta = self._as_mapping(report_document.get("meta")) - meta_baseline = self._as_mapping(meta.get("baseline")) - meta_metrics_baseline = self._as_mapping(meta.get("metrics_baseline")) - meta_cache = self._as_mapping(meta.get("cache")) - inventory = self._as_mapping(report_document.get("inventory")) - findings = self._as_mapping(report_document.get("findings")) - metrics = self._as_mapping(report_document.get("metrics")) - metrics_summary = self._as_mapping(metrics.get("summary")) - summary = self._as_mapping(findings.get("summary")) - analysis_profile = self._summary_analysis_profile_payload(meta) - payload = { - "run_id": run_id, - "root": str(root_path), - "analysis_mode": request.analysis_mode, - "codeclone_version": meta.get("codeclone_version", __version__), - "python_tag": str(meta.get("python_tag", "")), - "report_schema_version": report_document.get( - "report_schema_version", - REPORT_SCHEMA_VERSION, - ), - "baseline": { - "path": meta_baseline.get( - "path", - str(root_path / DEFAULT_BASELINE_PATH), - ), - "loaded": bool(meta_baseline.get("loaded", baseline_state.loaded)), - "status": str(meta_baseline.get("status", baseline_state.status.value)), - "trusted_for_diff": baseline_state.trusted_for_diff, - "python_tag": meta_baseline.get("python_tag"), - }, - "metrics_baseline": { - "path": meta_metrics_baseline.get( - "path", - str(root_path / DEFAULT_BASELINE_PATH), - ), - "loaded": bool( - meta_metrics_baseline.get( - "loaded", - metrics_baseline_state.loaded, - ) - ), - "status": str( - meta_metrics_baseline.get( - "status", - metrics_baseline_state.status.value, - ) - ), - "trusted_for_diff": metrics_baseline_state.trusted_for_diff, - }, - "cache": { - "path": meta_cache.get("path"), - "status": str(meta_cache.get("status", cache_status.value)), - "used": bool(meta_cache.get("used", False)), - "schema_version": meta_cache.get("schema_version"), - }, - "inventory": dict(inventory), - "findings_summary": dict(summary), - "health": dict(self._as_mapping(metrics_summary.get("health"))), - "baseline_diff": { - "new_function_clone_groups": len(new_func), - "new_block_clone_groups": len(new_block), - "new_clone_groups_total": len(new_func) + len(new_block), - }, - "metrics_diff": self._metrics_diff_payload(metrics_diff), - "warnings": list(warnings), - "failures": list(failures), - } - if analysis_profile: - payload["analysis_profile"] = analysis_profile - payload["cache"] = self._summary_cache_payload(payload) - payload["health"] = self._summary_health_payload(payload) - return payload - - def _summary_payload( - self, - summary: Mapping[str, object], - *, - record: MCPRunRecord | None = None, - ) -> dict[str, object]: - inventory = self._as_mapping(summary.get("inventory")) - if ( - not summary.get("run_id") - and not record - and "inventory" in summary - and not summary.get("baseline") - ): - return { - "focus": _FOCUS_REPOSITORY, - "health_scope": _HEALTH_SCOPE_REPOSITORY, - "inventory": self._summary_inventory_payload(inventory), - "health": self._summary_health_payload(summary), - } - resolved_run_id = ( - record.run_id if record is not None else str(summary.get("run_id", "")) - ) - payload: dict[str, object] = { - "run_id": self._short_run_id(resolved_run_id) if resolved_run_id else "", - "focus": _FOCUS_REPOSITORY, - "health_scope": _HEALTH_SCOPE_REPOSITORY, - "version": str(summary.get("codeclone_version", __version__)), - "schema": str(summary.get("report_schema_version", REPORT_SCHEMA_VERSION)), - "mode": str(summary.get("analysis_mode", "")), - "baseline": self._summary_baseline_payload(summary), - "metrics_baseline": self._summary_metrics_baseline_payload(summary), - "cache": self._summary_cache_payload(summary), - "inventory": self._summary_inventory_payload(inventory), - "health": self._summary_health_payload(summary), - "findings": self._summary_findings_payload(summary, record=record), - "diff": self._summary_diff_payload(summary), - "warnings": list(self._as_sequence(summary.get("warnings"))), - "failures": list(self._as_sequence(summary.get("failures"))), - } - analysis_profile = self._summary_analysis_profile_payload(summary) - if analysis_profile: - payload["analysis_profile"] = analysis_profile - if record is not None: - coverage_join = self._summary_coverage_join_payload(record) - if coverage_join: - payload["coverage_join"] = coverage_join - return payload - - def _summary_analysis_profile_payload( - self, - summary: Mapping[str, object], - ) -> dict[str, int]: - analysis_profile = self._as_mapping(summary.get("analysis_profile")) - if not analysis_profile: - return {} - keys = ( - "min_loc", - "min_stmt", - "block_min_loc", - "block_min_stmt", - "segment_min_loc", - "segment_min_stmt", - ) - payload = {key: _as_int(analysis_profile.get(key), -1) for key in keys} - return {key: value for key, value in payload.items() if value >= 0} - - def _summary_baseline_payload( - self, - summary: Mapping[str, object], - ) -> dict[str, object]: - return self._summary_trusted_state_payload(summary, key="baseline") - - def _summary_metrics_baseline_payload( - self, - summary: Mapping[str, object], - ) -> dict[str, object]: - return self._summary_trusted_state_payload(summary, key="metrics_baseline") - - def _summary_trusted_state_payload( - self, - summary: Mapping[str, object], - *, - key: str, - ) -> dict[str, object]: - baseline = self._as_mapping(summary.get(key)) - trusted = bool(baseline.get("trusted_for_diff", False)) - payload: dict[str, object] = { - "loaded": bool(baseline.get("loaded", False)), - "status": str(baseline.get("status", "")), - "trusted": trusted, - } - if key == "baseline": - payload["compared_without_valid_baseline"] = not trusted - baseline_python_tag = baseline.get("python_tag") - runtime_python_tag = summary.get("python_tag") - if isinstance(baseline_python_tag, str) and baseline_python_tag.strip(): - payload["baseline_python_tag"] = baseline_python_tag - if isinstance(runtime_python_tag, str) and runtime_python_tag.strip(): - payload["runtime_python_tag"] = runtime_python_tag - return payload - - def _summary_cache_payload( - self, - summary: Mapping[str, object], - ) -> dict[str, object]: - cache = dict(self._as_mapping(summary.get("cache"))) - if not cache: - return {} - return { - "used": bool(cache.get("used", False)), - "freshness": self._effective_freshness(summary), - } - - def _effective_freshness( - self, - summary: Mapping[str, object], - ) -> FreshnessKind: - inventory = self._as_mapping(summary.get("inventory")) - files = self._as_mapping(inventory.get("files")) - analyzed = max(0, _as_int(files.get("analyzed", 0), 0)) - cached = max(0, _as_int(files.get("cached", 0), 0)) - cache = self._as_mapping(summary.get("cache")) - cache_used = bool(cache.get("used")) - if cache_used and cached > 0 and analyzed == 0: - return "reused" - if cache_used and cached > 0 and analyzed > 0: - return "mixed" - return "fresh" - - def _summary_inventory_payload( - self, - inventory: Mapping[str, object], - ) -> dict[str, object]: - if not inventory: - return {} - files = self._as_mapping(inventory.get("files")) - code = self._as_mapping(inventory.get("code")) - total_files = _as_int( - files.get( - "total_found", - files.get( - "analyzed", - len( - self._as_sequence( - self._as_mapping(inventory.get("file_registry")).get( - "items" - ) - ) - ), - ), - ), - 0, - ) - functions = _as_int(code.get("functions", 0), 0) + _as_int( - code.get("methods", 0), - 0, - ) - return { - "files": total_files, - "lines": _as_int(code.get("parsed_lines", 0), 0), - "functions": functions, - "classes": _as_int(code.get("classes", 0), 0), - } - - def _summary_findings_payload( - self, - summary: Mapping[str, object], - *, - record: MCPRunRecord | None, - ) -> dict[str, object]: - findings_summary = self._as_mapping(summary.get("findings_summary")) - if record is None: - return { - "total": _as_int(findings_summary.get("total", 0), 0), - "new": 0, - "known": 0, - "by_family": {}, - "production": 0, - "new_by_source_kind": self._source_kind_breakdown(()), - } - findings = self._base_findings(record) - by_family: dict[str, int] = { - "clones": 0, - "structural": 0, - "dead_code": 0, - "design": 0, - } - new_count = 0 - known_count = 0 - production_count = 0 - new_by_source_kind = self._source_kind_breakdown( - self._finding_source_kind(finding) - for finding in findings - if str(finding.get("novelty", "")).strip() == "new" - ) - for finding in findings: - family = str(finding.get("family", "")).strip() - family_key = "clones" if family == FAMILY_CLONE else family - if family_key in by_family: - by_family[family_key] += 1 - if str(finding.get("novelty", "")).strip() == "new": - new_count += 1 - else: - known_count += 1 - if self._finding_source_kind(finding) == SOURCE_KIND_PRODUCTION: - production_count += 1 - return { - "total": len(findings), - "new": new_count, - "known": known_count, - "by_family": {key: value for key, value in by_family.items() if value > 0}, - "production": production_count, - "new_by_source_kind": new_by_source_kind, - } - - def _summary_diff_payload( - self, - summary: Mapping[str, object], - ) -> dict[str, object]: - baseline_diff = self._as_mapping(summary.get("baseline_diff")) - metrics_diff = self._as_mapping(summary.get("metrics_diff")) - return { - "new_clones": _as_int(baseline_diff.get("new_clone_groups_total", 0), 0), - "health_delta": ( - _as_int(metrics_diff.get("health_delta", 0), 0) - if metrics_diff - and self._summary_health_payload(summary).get("available") is not False - else None - ), - "typing_param_permille_delta": _as_int( - metrics_diff.get("typing_param_permille_delta", 0), - 0, - ), - "typing_return_permille_delta": _as_int( - metrics_diff.get("typing_return_permille_delta", 0), - 0, - ), - "docstring_permille_delta": _as_int( - metrics_diff.get("docstring_permille_delta", 0), - 0, - ), - "api_breaking_changes": _as_int( - metrics_diff.get("api_breaking_changes", 0), - 0, - ), - "new_api_symbols": _as_int( - metrics_diff.get("new_api_symbols", 0), - 0, - ), - } - - def _summary_coverage_join_payload( - self, - record: MCPRunRecord, - ) -> dict[str, object]: - metrics = self._as_mapping(record.report_document.get("metrics")) - families = self._as_mapping(metrics.get("families")) - coverage_join = self._as_mapping(families.get("coverage_join")) - summary = self._as_mapping(coverage_join.get("summary")) - if not summary: - return {} - payload: dict[str, object] = { - "status": str(summary.get("status", "")).strip(), - "overall_permille": _as_int(summary.get("overall_permille", 0), 0), - "coverage_hotspots": _as_int(summary.get("coverage_hotspots", 0), 0), - "scope_gap_hotspots": _as_int(summary.get("scope_gap_hotspots", 0), 0), - "hotspot_threshold_percent": _as_int( - summary.get("hotspot_threshold_percent", 0), - 0, - ), - } - source_value = summary.get("source") - source = source_value.strip() if isinstance(source_value, str) else "" - if source: - payload["source"] = source - invalid_reason_value = summary.get("invalid_reason") - invalid_reason = ( - invalid_reason_value.strip() - if isinstance(invalid_reason_value, str) - else "" - ) - if invalid_reason: - payload["invalid_reason"] = invalid_reason - return payload - - def _metrics_detail_payload( - self, - *, - metrics: Mapping[str, object], - family: MetricsDetailFamily | None, - path: str | None, - offset: int, - limit: int, - ) -> dict[str, object]: - summary = dict(self._as_mapping(metrics.get("summary"))) - families = self._as_mapping(metrics.get("families")) - normalized_path = self._normalize_relative_path(path or "") - if family is None and not normalized_path: - return { - "summary": summary, - "_hint": "Use family and/or path parameters to access per-item detail.", - } - family_names: Sequence[str] = ( - (family,) if family is not None else tuple(sorted(families)) - ) - items: list[dict[str, object]] = [] - for family_name in family_names: - family_payload = self._as_mapping(families.get(family_name)) - for item in self._as_sequence(family_payload.get("items")): - item_map = self._as_mapping(item) - if normalized_path and not self._metric_item_matches_path( - item_map, - normalized_path, - ): - continue - compact_item = self._compact_metrics_item(item_map) - if family is None: - compact_item = {"family": family_name, **compact_item} - items.append(compact_item) - if family is None: - items.sort( - key=lambda item: ( - str(item.get("family", "")), - str(item.get("path", "")), - str(item.get("qualname", "")), - _as_int(item.get("start_line", 0), 0), - ) - ) - page = paginate(items, offset=offset, limit=limit, max_limit=200) - return { - "family": family, - "path": normalized_path or None, - "offset": page.offset, - "limit": page.limit, - "returned": len(page.items), - "total": page.total, - "has_more": page.next_offset is not None, - "items": page.items, - } - - def _metric_item_matches_path( - self, - item: Mapping[str, object], - normalized_path: str, - ) -> bool: - path_value = ( - str(item.get("relative_path", "")).strip() - or str(item.get("path", "")).strip() - or str(item.get("filepath", "")).strip() - or str(item.get("file", "")).strip() - ) - if not path_value: - return False - return self._path_matches(path_value, (normalized_path,)) - - @staticmethod - def _compact_metrics_item( - item: Mapping[str, object], - ) -> dict[str, object]: - compact: dict[str, object] = {} - path_value = ( - str(item.get("relative_path", "")).strip() - or str(item.get("path", "")).strip() - or str(item.get("filepath", "")).strip() - or str(item.get("file", "")).strip() - ) - if path_value: - compact["path"] = path_value - for key, value in item.items(): - if ( - key not in _COMPACT_ITEM_PATH_KEYS - and value not in _COMPACT_ITEM_EMPTY_VALUES - ): - compact[str(key)] = value - return compact - - @staticmethod - def _metrics_diff_payload( - metrics_diff: MetricsDiff | None, - ) -> dict[str, object] | None: - payload = _summarize_metrics_diff(metrics_diff) - return dict(payload) if payload is not None else None - - def _dict_list(self, value: object) -> list[dict[str, object]]: - return [dict(self._as_mapping(item)) for item in self._as_sequence(value)] - - @staticmethod - def _as_mapping(value: object) -> Mapping[str, object]: - return value if isinstance(value, Mapping) else {} - - @staticmethod - def _as_sequence(value: object) -> Sequence[object]: - if isinstance(value, Sequence) and not isinstance( - value, - (str, bytes, bytearray), - ): - return value - return () diff --git a/scripts/build_docs_example_report.py b/scripts/build_docs_example_report.py index b003fd2..805adcc 100644 --- a/scripts/build_docs_example_report.py +++ b/scripts/build_docs_example_report.py @@ -21,6 +21,7 @@ from codeclone import __version__ DEFAULT_OUTPUT_DIR = Path("site/examples/report/live") +CODECLONE_CLI_MODULE = "codeclone.main" @dataclass(frozen=True) @@ -61,7 +62,7 @@ def _run_codeclone(scan_root: Path, artifacts: ReportArtifacts) -> None: cmd = [ sys.executable, "-m", - "codeclone.cli", + CODECLONE_CLI_MODULE, str(scan_root), "--html", str(artifacts.html), diff --git a/tests/test_docs_example_report.py b/tests/test_docs_example_report.py new file mode 100644 index 0000000..eee1230 --- /dev/null +++ b/tests/test_docs_example_report.py @@ -0,0 +1,69 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import runpy +import sys +from pathlib import Path +from unittest.mock import patch + + +def _load_docs_report_namespace() -> dict[str, object]: + script_path = ( + Path(__file__).resolve().parents[1] / "scripts" / "build_docs_example_report.py" + ) + return runpy.run_path(str(script_path)) + + +def test_docs_example_report_uses_main_entrypoint( + tmp_path: Path, +) -> None: + module = _load_docs_report_namespace() + observed: dict[str, object] = {} + + def _fake_run( + cmd: list[str], + *, + cwd: Path, + check: bool, + ) -> None: + observed["cmd"] = cmd + observed["cwd"] = cwd + observed["check"] = check + + report_artifacts_type = module["ReportArtifacts"] + assert callable(report_artifacts_type) + artifacts = report_artifacts_type( + html=tmp_path / "index.html", + json=tmp_path / "report.json", + sarif=tmp_path / "report.sarif", + manifest=tmp_path / "manifest.json", + ) + run_codeclone = module["_run_codeclone"] + assert callable(run_codeclone) + + with patch("subprocess.run", side_effect=_fake_run): + run_codeclone(tmp_path, artifacts) + + assert observed == { + "cmd": [ + sys.executable, + "-m", + "codeclone.main", + str(tmp_path), + "--html", + str(artifacts.html), + "--json", + str(artifacts.json), + "--sarif", + str(artifacts.sarif), + "--no-progress", + "--quiet", + ], + "cwd": tmp_path, + "check": True, + } diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 1e21f7b..128b39d 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -30,15 +30,15 @@ from codeclone.report.document.builder import build_report_document from codeclone.report.explain import build_block_group_facts from codeclone.report.html import ( + build_html_report as _core_build_html_report, +) +from codeclone.report.html.widgets.badges import _tab_empty_info +from codeclone.report.html.widgets.snippets import ( _FileCache, _pygments_css, _render_code_block, _try_pygments, ) -from codeclone.report.html import ( - build_html_report as _core_build_html_report, -) -from codeclone.report.html.widgets.badges import _tab_empty_info from codeclone.report.renderers.json import render_json_report_document from tests._assertions import assert_contains_all from tests._report_fixtures import ( @@ -1324,14 +1324,14 @@ def test_html_report_with_blocks(tmp_path: Path) -> None: def test_html_report_pygments_fallback(monkeypatch: pytest.MonkeyPatch) -> None: - import codeclone.report.html as hr + import codeclone.report.html.widgets.snippets as snippets def _fake_css(name: str) -> str: if name in ("github-dark", "github-light"): return "" return "x" - monkeypatch.setattr(hr, "_pygments_css", _fake_css) + monkeypatch.setattr(snippets, "_pygments_css", _fake_css) html = build_html_report( func_groups={}, block_groups={}, segment_groups={}, title="Pygments" ) @@ -1457,11 +1457,11 @@ def test_render_code_block_truncates_and_fallback( f = tmp_path / "a.py" f.write_text("\n".join([f"line{i}" for i in range(1, 30)]), "utf-8") - import codeclone.report.html as hr + import codeclone.report.html.widgets.snippets as snippets - monkeypatch.setattr(hr, "_try_pygments", lambda _text: None) + monkeypatch.setattr(snippets, "_try_pygments", lambda _text: None) cache = _FileCache(maxsize=2) - snippet = hr._render_code_block( + snippet = snippets._render_code_block( filepath=str(f), start_line=1, end_line=20, diff --git a/tests/test_mcp_service.py b/tests/test_mcp_service.py index b0342b6..72a764f 100644 --- a/tests/test_mcp_service.py +++ b/tests/test_mcp_service.py @@ -18,8 +18,15 @@ import pytest -import codeclone.surfaces.mcp.session as mcp_service_mod +import codeclone.surfaces.mcp._session_baseline as mcp_baseline_mod +import codeclone.surfaces.mcp._session_finding_mixin as mcp_finding_mod +import codeclone.surfaces.mcp._session_helpers as mcp_helpers_mod +import codeclone.surfaces.mcp._session_runtime as mcp_runtime_mod +import codeclone.surfaces.mcp._session_shared as mcp_shared_mod +import codeclone.surfaces.mcp._session_state_mixin as mcp_state_mod +import codeclone.surfaces.mcp.session as mcp_session_mod from codeclone.baseline import Baseline, current_python_tag +from codeclone.baseline.metrics_baseline import MetricsBaseline from codeclone.cache.store import Cache from codeclone.config.pyproject_loader import ConfigValidationError from codeclone.contracts import REPORT_SCHEMA_VERSION @@ -214,6 +221,141 @@ def _build_quality_service(root: Path) -> CodeCloneMCPService: return service +def _assert_loaded_mcp_baseline_state( + *, + calls: dict[str, object], + expected_size_mb: int, + expected_payload: dict[str, object] | None, + state: mcp_baseline_mod.CloneBaselineState | mcp_baseline_mod.MetricsBaselineState, +) -> None: + assert calls == { + "max_size_bytes": expected_size_mb * 1024 * 1024, + "preloaded_payload": expected_payload, + "python_tag": current_python_tag(), + } + assert state.loaded is True + assert state.trusted_for_diff is True + assert state.status.value == "ok" + assert state.warning_message is None + + +def test_mcp_runtime_resolve_cache_path_prefers_explicit_path(tmp_path: Path) -> None: + args = SimpleNamespace(cache_path="~/codeclone-explicit-cache.json") + + resolved = mcp_runtime_mod.resolve_cache_path(root_path=tmp_path, args=args) + + assert resolved == Path("~/codeclone-explicit-cache.json").expanduser() + + +def test_mcp_clone_baseline_state_loads_existing_baseline( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + calls: dict[str, object] = {} + + def fake_load( + self: Baseline, + *, + max_size_bytes: int, + preloaded_payload: dict[str, object] | None = None, + ) -> None: + calls["max_size_bytes"] = max_size_bytes + calls["preloaded_payload"] = preloaded_payload + + def fake_verify(self: Baseline, *, current_python_tag: str) -> None: + calls["python_tag"] = current_python_tag + + monkeypatch.setattr(Baseline, "load", fake_load) + monkeypatch.setattr(Baseline, "verify_compatibility", fake_verify) + + state = mcp_baseline_mod.resolve_clone_baseline_state( + baseline_path=tmp_path / "codeclone.baseline.json", + baseline_exists=True, + max_baseline_size_mb=2, + ) + + _assert_loaded_mcp_baseline_state( + calls=calls, + expected_size_mb=2, + expected_payload=None, + state=state, + ) + + +def test_mcp_metrics_baseline_state_loads_existing_baseline( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + calls: dict[str, object] = {} + + def fake_load( + self: MetricsBaseline, + *, + max_size_bytes: int, + preloaded_payload: dict[str, object] | None = None, + ) -> None: + calls["max_size_bytes"] = max_size_bytes + calls["preloaded_payload"] = preloaded_payload + + def fake_verify(self: MetricsBaseline, *, runtime_python_tag: str) -> None: + calls["python_tag"] = runtime_python_tag + + monkeypatch.setattr(MetricsBaseline, "load", fake_load) + monkeypatch.setattr(MetricsBaseline, "verify_compatibility", fake_verify) + + state = mcp_baseline_mod.resolve_metrics_baseline_state( + metrics_baseline_path=tmp_path / "metrics-baseline.json", + metrics_baseline_exists=True, + max_baseline_size_mb=3, + skip_metrics=False, + ) + + _assert_loaded_mcp_baseline_state( + calls=calls, + expected_size_mb=3, + expected_payload=None, + state=state, + ) + + +def test_mcp_metrics_baseline_state_uses_shared_payload( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + calls: dict[str, object] = {} + payload: dict[str, object] = {"metrics": {"summary": {"health": {"score": 100}}}} + + def fake_load( + self: MetricsBaseline, + *, + max_size_bytes: int, + preloaded_payload: dict[str, object] | None = None, + ) -> None: + calls["max_size_bytes"] = max_size_bytes + calls["preloaded_payload"] = preloaded_payload + + def fake_verify(self: MetricsBaseline, *, runtime_python_tag: str) -> None: + calls["python_tag"] = runtime_python_tag + + monkeypatch.setattr(MetricsBaseline, "load", fake_load) + monkeypatch.setattr(MetricsBaseline, "verify_compatibility", fake_verify) + + state = mcp_baseline_mod.resolve_metrics_baseline_state( + metrics_baseline_path=tmp_path / "metrics-baseline.json", + metrics_baseline_exists=True, + max_baseline_size_mb=1, + skip_metrics=False, + shared_baseline_payload=payload, + ) + + _assert_loaded_mcp_baseline_state( + calls=calls, + expected_size_mb=1, + expected_payload=payload, + state=state, + ) + + def _analyze_quality_repository( root: Path, ) -> tuple[CodeCloneMCPService, dict[str, object]]: @@ -958,10 +1100,8 @@ def test_mcp_service_summary_reuses_canonical_meta_for_cache_and_health( def test_mcp_service_effective_freshness_classifies_summary_cache_usage() -> None: - service = CodeCloneMCPService(history_limit=4) - assert ( - service._effective_freshness( + mcp_helpers_mod._effective_freshness( { "cache": {"used": False}, "inventory": {"files": {"analyzed": 2, "cached": 0}}, @@ -970,7 +1110,7 @@ def test_mcp_service_effective_freshness_classifies_summary_cache_usage() -> Non == "fresh" ) assert ( - service._effective_freshness( + mcp_helpers_mod._effective_freshness( { "cache": {"used": True}, "inventory": {"files": {"analyzed": 0, "cached": 2}}, @@ -979,7 +1119,7 @@ def test_mcp_service_effective_freshness_classifies_summary_cache_usage() -> Non == "reused" ) assert ( - service._effective_freshness( + mcp_helpers_mod._effective_freshness( { "cache": {"used": True}, "inventory": {"files": {"analyzed": 1, "cached": 2}}, @@ -1260,7 +1400,7 @@ def test_mcp_service_build_args_handles_pyproject_and_invalid_settings( service = CodeCloneMCPService(history_limit=4) monkeypatch.setattr( - mcp_service_mod, + mcp_state_mod, "load_pyproject_config", lambda _root: { "min_loc": 12, @@ -1286,7 +1426,7 @@ def test_mcp_service_build_args_handles_pyproject_and_invalid_settings( assert str(args.metrics_baseline).endswith("metrics.json") monkeypatch.setattr( - mcp_service_mod, + mcp_state_mod, "load_pyproject_config", lambda _root: (_ for _ in ()).throw(ConfigValidationError("bad config")), ) @@ -1345,17 +1485,17 @@ def test_mcp_service_root_and_helper_contract_errors( ) with pytest.raises(MCPServiceError): - service._load_report_document("{") + mcp_helpers_mod._load_report_document("{") with pytest.raises(MCPServiceError): - service._load_report_document("[]") + mcp_helpers_mod._load_report_document("[]") with pytest.raises(MCPServiceError): - service._report_digest({}) + mcp_helpers_mod._report_digest({}) def test_mcp_service_helper_filters_and_metrics_payload() -> None: service = CodeCloneMCPService(history_limit=4) - payload = service._metrics_diff_payload( + payload = mcp_helpers_mod._metrics_diff_payload( MetricsDiff( new_high_risk_functions=("pkg.a:f",), new_high_coupling_classes=("pkg.a:C",), @@ -1376,7 +1516,7 @@ def test_mcp_service_helper_filters_and_metrics_payload() -> None: "new_api_symbols": 0, "api_breaking_changes": 0, } - assert service._metrics_diff_payload(None) is None + assert mcp_helpers_mod._metrics_diff_payload(None) is None finding = { "family": "clone", @@ -1414,7 +1554,7 @@ def test_mcp_service_helper_filters_and_metrics_payload() -> None: ) is False ) - assert service._as_sequence("not-a-sequence") == () + assert mcp_helpers_mod._as_sequence("not-a-sequence") == () def test_mcp_service_git_diff_and_helper_branch_edges( @@ -1423,22 +1563,22 @@ def test_mcp_service_git_diff_and_helper_branch_edges( service = CodeCloneMCPService(history_limit=4) with pytest.raises(MCPGitDiffError, match="Invalid git diff ref"): - mcp_service_mod._git_diff_lines_payload( + mcp_shared_mod._git_diff_lines_payload( root_path=tmp_path, git_diff_ref="--cached", ) with pytest.raises(MCPGitDiffError, match="safe revision expression"): - mcp_service_mod._git_diff_lines_payload( + mcp_shared_mod._git_diff_lines_payload( root_path=tmp_path, git_diff_ref="HEAD:path", ) - assert service._normalize_relative_path("./.github/workflows/docs.yml") == ( + assert mcp_helpers_mod._normalize_relative_path("./.github/workflows/docs.yml") == ( ".github/workflows/docs.yml" ) with pytest.raises(MCPServiceContractError, match="path traversal not allowed"): - service._normalize_relative_path("../outside.py") + mcp_helpers_mod._normalize_relative_path("../outside.py") full_record = _dummy_run_record(tmp_path, "full") object.__setattr__( @@ -1586,7 +1726,7 @@ def _fake_load(self: Cache) -> None: load_calls.append("loaded") monkeypatch.setattr(Cache, "load", _fake_load) - service._build_cache( + mcp_helpers_mod._build_cache( root_path=tmp_path, args=args, cache_path=tmp_path / "cache.json", @@ -1602,7 +1742,7 @@ def test_mcp_service_build_args_defers_process_count_to_runtime( service = CodeCloneMCPService(history_limit=4) monkeypatch.setattr( - mcp_service_mod, + mcp_state_mod, "load_pyproject_config", lambda _root: {"processes": 3}, ) @@ -1629,19 +1769,17 @@ def test_mcp_service_invalid_path_resolution_contract_errors( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - service = CodeCloneMCPService(history_limit=4) - def _boom(self: Path, *args: object, **kwargs: object) -> Path: raise OSError("bad path") monkeypatch.setattr(Path, "resolve", _boom) with pytest.raises(MCPServiceContractError): - service._resolve_root(str(tmp_path)) + mcp_helpers_mod._resolve_root(str(tmp_path)) with pytest.raises(MCPServiceContractError, match="absolute repository root"): - service._resolve_root(".") + mcp_helpers_mod._resolve_root(".") with pytest.raises(MCPServiceContractError): - service._resolve_optional_path("cache.json", tmp_path) + mcp_helpers_mod._resolve_optional_path("cache.json", tmp_path) def test_mcp_service_granular_checks_reject_relative_root_and_allow_omission( @@ -1716,7 +1854,7 @@ def _fake_report(**kwargs: Any) -> object: ) monkeypatch = pytest.MonkeyPatch() - monkeypatch.setattr(mcp_service_mod, "report", _fake_report) + monkeypatch.setattr(mcp_session_mod, "report", _fake_report) try: with pytest.raises(MCPServiceError): service.analyze_repository( @@ -1734,19 +1872,19 @@ def test_mcp_service_low_level_runtime_helpers_and_run_store( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - console = mcp_service_mod._BufferConsole() + console = mcp_shared_mod._BufferConsole() console.print("alpha", 2) console.print(" ") assert console.messages == ["alpha 2"] monkeypatch.setattr( - cast(Any, mcp_service_mod).subprocess, + cast(Any, mcp_shared_mod).subprocess, "run", lambda *args, **kwargs: SimpleNamespace( stdout="pkg/a.py\npkg/b.py\npkg/a.py\n" ), ) - assert mcp_service_mod._git_diff_lines_payload( + assert mcp_shared_mod._git_diff_lines_payload( root_path=tmp_path, git_diff_ref="HEAD", ) == ("pkg/a.py", "pkg/b.py") @@ -1754,17 +1892,17 @@ def test_mcp_service_low_level_runtime_helpers_and_run_store( def _raise_subprocess(*args: object, **kwargs: object) -> object: raise subprocess.CalledProcessError(1, ["git", "diff"]) - monkeypatch.setattr(cast(Any, mcp_service_mod).subprocess, "run", _raise_subprocess) + monkeypatch.setattr(cast(Any, mcp_shared_mod).subprocess, "run", _raise_subprocess) with pytest.raises(MCPGitDiffError): - mcp_service_mod._git_diff_lines_payload(root_path=tmp_path, git_diff_ref="HEAD") + mcp_shared_mod._git_diff_lines_payload(root_path=tmp_path, git_diff_ref="HEAD") - assert mcp_service_mod._load_report_document_payload('{"ok": true}') == {"ok": True} + assert mcp_shared_mod._load_report_document_payload('{"ok": true}') == {"ok": True} with pytest.raises(MCPServiceError): - mcp_service_mod._load_report_document_payload("{") + mcp_shared_mod._load_report_document_payload("{") with pytest.raises(MCPServiceError): - mcp_service_mod._load_report_document_payload("[]") + mcp_shared_mod._load_report_document_payload("[]") - store = mcp_service_mod.CodeCloneMCPRunStore(history_limit=1) + store = mcp_shared_mod.CodeCloneMCPRunStore(history_limit=1) first = _dummy_run_record(tmp_path, "first") second = _dummy_run_record(tmp_path, "second") assert store.register(first) is first @@ -1774,7 +1912,7 @@ def _raise_subprocess(*args: object, **kwargs: object) -> object: with pytest.raises(MCPRunNotFoundError): store.get("first") with pytest.raises(ValueError): - mcp_service_mod.CodeCloneMCPRunStore(history_limit=11) + mcp_shared_mod.CodeCloneMCPRunStore(history_limit=11) def test_mcp_service_branch_helpers_on_real_runs( @@ -1903,7 +2041,7 @@ def test_mcp_service_branch_helpers_on_real_runs( ) monkeypatch.setattr( - mcp_service_mod, + mcp_finding_mod, "_git_diff_lines_payload", lambda **kwargs: ("pkg/dup.py", "pkg/dup.py"), ) @@ -1947,7 +2085,7 @@ def test_mcp_service_branch_helpers_on_real_runs( }, ) assert len(duplicate_locations) == 1 - assert service._path_matches("pkg/dup.py", ("pkg",)) + assert mcp_helpers_mod._path_matches("pkg/dup.py", ("pkg",)) assert service._finding_touches_paths( finding={"items": [{"relative_path": "pkg/dup.py"}]}, changed_paths=("pkg",), @@ -1981,7 +2119,7 @@ def test_mcp_service_remediation_and_comparison_helper_branches( ) _assert_comparable_comparison(comparison, verdict="improved") assert ( - service._comparison_verdict( + mcp_helpers_mod._comparison_verdict( regressions=1, improvements=0, health_delta=0, @@ -1989,7 +2127,7 @@ def test_mcp_service_remediation_and_comparison_helper_branches( == "regressed" ) assert ( - service._comparison_verdict( + mcp_helpers_mod._comparison_verdict( regressions=0, improvements=1, health_delta=0, @@ -1997,7 +2135,7 @@ def test_mcp_service_remediation_and_comparison_helper_branches( == "improved" ) assert ( - service._comparison_verdict( + mcp_helpers_mod._comparison_verdict( regressions=1, improvements=0, health_delta=1, @@ -2005,7 +2143,7 @@ def test_mcp_service_remediation_and_comparison_helper_branches( == "mixed" ) assert ( - service._comparison_verdict( + mcp_helpers_mod._comparison_verdict( regressions=0, improvements=1, health_delta=-1, @@ -2013,7 +2151,7 @@ def test_mcp_service_remediation_and_comparison_helper_branches( == "mixed" ) assert ( - service._comparison_verdict( + mcp_helpers_mod._comparison_verdict( regressions=1, improvements=1, health_delta=0, @@ -2021,7 +2159,7 @@ def test_mcp_service_remediation_and_comparison_helper_branches( == "mixed" ) assert ( - service._comparison_verdict( + mcp_helpers_mod._comparison_verdict( regressions=0, improvements=0, health_delta=0, @@ -2029,7 +2167,7 @@ def test_mcp_service_remediation_and_comparison_helper_branches( == "stable" ) assert ( - service._comparison_verdict( + mcp_helpers_mod._comparison_verdict( regressions=0, improvements=1, health_delta=None, @@ -2037,21 +2175,21 @@ def test_mcp_service_remediation_and_comparison_helper_branches( == "improved" ) assert ( - service._changed_verdict( + mcp_helpers_mod._changed_verdict( changed_projection={"new": 1, "total": 1}, health_delta=0, ) == "regressed" ) assert ( - service._changed_verdict( + mcp_helpers_mod._changed_verdict( changed_projection={"new": 0, "total": 0}, health_delta=1, ) == "improved" ) assert ( - service._changed_verdict( + mcp_helpers_mod._changed_verdict( changed_projection={"new": 0, "total": 1}, health_delta=0, ) @@ -2076,22 +2214,25 @@ def test_mcp_service_remediation_and_comparison_helper_branches( "blast_radius": {"files": 1}, "steps": ["one", "two"], } - assert service._project_remediation(remediation, detail_level="full") == remediation - summary_remediation = service._project_remediation( + assert ( + mcp_helpers_mod._project_remediation(remediation, detail_level="full") + == remediation + ) + summary_remediation = mcp_helpers_mod._project_remediation( remediation, detail_level="summary", ) assert "steps" not in summary_remediation assert summary_remediation["shape"] == "Extract helper" assert summary_remediation["risk"] == "medium" - normal_remediation = service._project_remediation( + normal_remediation = mcp_helpers_mod._project_remediation( remediation, detail_level="normal", ) assert normal_remediation["steps"] == ["one", "two"] - assert service._risk_level_for_effort("easy") == "low" - assert service._risk_level_for_effort("hard") == "high" - assert "new regression" in service._why_now_text( + assert mcp_helpers_mod._risk_level_for_effort("easy") == "low" + assert mcp_helpers_mod._risk_level_for_effort("hard") == "high" + assert "new regression" in mcp_helpers_mod._why_now_text( title="Clone group", severity="warning", novelty="new", @@ -2140,7 +2281,7 @@ def test_mcp_service_compare_runs_marks_different_roots_incomparable( assert len(str(after_payload["run_id"])) == 8 _assert_incomparable_comparison(comparison, reason="different_root") assert "Finding and run health deltas omitted" in str(comparison["summary"]) - assert "known debt" in service._why_now_text( + assert "known debt" in mcp_helpers_mod._why_now_text( title="Clone group", severity="warning", novelty="known", @@ -2151,38 +2292,38 @@ def test_mcp_service_compare_runs_marks_different_roots_incomparable( effort="easy", ) - assert service._safe_refactor_shape( + assert mcp_helpers_mod._safe_refactor_shape( SimpleNamespace(category="clone", clone_type="Type-1", title="Function clone"), ).startswith("Keep one canonical") - assert service._safe_refactor_shape( + assert mcp_helpers_mod._safe_refactor_shape( SimpleNamespace(category="clone", clone_type="Type-2", title="Function clone"), ).startswith("Extract shared") - assert service._safe_refactor_shape( + assert mcp_helpers_mod._safe_refactor_shape( SimpleNamespace(category="clone", clone_type="Type-4", title="Block clone"), ).startswith("Extract the repeated statement") - assert service._safe_refactor_shape( + assert mcp_helpers_mod._safe_refactor_shape( SimpleNamespace(category="structural", clone_type="", title="Branches"), ).startswith("Extract the repeated branch") - assert service._safe_refactor_shape( + assert mcp_helpers_mod._safe_refactor_shape( SimpleNamespace(category="complexity", clone_type="", title="Complex"), ).startswith("Split the function") - assert service._safe_refactor_shape( + assert mcp_helpers_mod._safe_refactor_shape( SimpleNamespace(category="coupling", clone_type="", title="Coupling"), ).startswith("Isolate responsibilities") - assert service._safe_refactor_shape( + assert mcp_helpers_mod._safe_refactor_shape( SimpleNamespace(category="cohesion", clone_type="", title="Cohesion"), ).startswith("Split the class") - assert service._safe_refactor_shape( + assert mcp_helpers_mod._safe_refactor_shape( SimpleNamespace(category="dead_code", clone_type="", title="Dead code"), ).startswith("Delete the unused symbol") - assert service._safe_refactor_shape( + assert mcp_helpers_mod._safe_refactor_shape( SimpleNamespace(category="dependency", clone_type="", title="Cycle"), ).startswith("Break the cycle") - assert service._safe_refactor_shape( + assert mcp_helpers_mod._safe_refactor_shape( SimpleNamespace(category="other", clone_type="", title="Other"), ).startswith("Extract the repeated logic") - empty_markdown = service._render_pr_summary_markdown( + empty_markdown = mcp_helpers_mod._render_pr_summary_markdown( { "health": {"score": 81, "grade": "B"}, "health_delta": 0, @@ -2275,8 +2416,8 @@ def test_mcp_service_additional_projection_and_error_branches( run_id = str(summary["run_id"]) record = service._runs.get(run_id) - assert mcp_service_mod._suggestion_finding_id_payload(object()) == "" - assert mcp_service_mod._suggestion_finding_id_payload( + assert mcp_shared_mod._suggestion_finding_id_payload(object()) == "" + assert mcp_shared_mod._suggestion_finding_id_payload( SimpleNamespace( finding_family="structural", finding_kind="duplicated_branches", @@ -2285,7 +2426,7 @@ def test_mcp_service_additional_projection_and_error_branches( title="Structural", ) ).startswith("structural:") - assert mcp_service_mod._suggestion_finding_id_payload( + assert mcp_shared_mod._suggestion_finding_id_payload( SimpleNamespace( finding_family="design", finding_kind="", @@ -2294,7 +2435,7 @@ def test_mcp_service_additional_projection_and_error_branches( title="Dead code", ) ).startswith("dead_code:") - assert mcp_service_mod._suggestion_finding_id_payload( + assert mcp_shared_mod._suggestion_finding_id_payload( SimpleNamespace( finding_family="design", finding_kind="", @@ -2648,10 +2789,13 @@ def _patched_get_finding( "measured_units": 1, } assert ( - service._highest_below_threshold(values=(9,), operator=">", threshold=5) is None + mcp_helpers_mod._highest_below_threshold(values=(9,), operator=">", threshold=5) + is None ) assert ( - service._highest_below_threshold(values=(1, 2), operator="!=", threshold=5) + mcp_helpers_mod._highest_below_threshold( + values=(1, 2), operator="!=", threshold=5 + ) is None ) detail_payload = service._project_finding_detail( @@ -2772,7 +2916,7 @@ def _patched_get_finding( ) metrics_focus = service._comparison_index(record, focus="metrics") assert isinstance(metrics_focus, dict) - resolved_markdown = service._render_pr_summary_markdown( + resolved_markdown = mcp_helpers_mod._render_pr_summary_markdown( { "health": {"score": 81, "grade": "B"}, "health_delta": 1, @@ -2846,7 +2990,7 @@ def test_mcp_service_metrics_diff_warning_and_projection_branches( ), ) monkeypatch.setattr( - mcp_service_mod, + mcp_session_mod, "resolve_metrics_baseline_state", lambda **kwargs: SimpleNamespace( baseline=fake_metrics_baseline, @@ -2863,7 +3007,7 @@ def test_mcp_service_metrics_diff_warning_and_projection_branches( ) cache_with_warning.load_warning = "cache warning" monkeypatch.setattr( - service.session, + mcp_helpers_mod, "_build_cache", lambda **kwargs: cache_with_warning, ) @@ -3178,43 +3322,43 @@ def test_mcp_service_short_id_and_comparison_helper_branches( ) -> None: service = CodeCloneMCPService(history_limit=4) - entry = mcp_service_mod._CloneShortIdEntry( + entry = mcp_shared_mod._CloneShortIdEntry( canonical_id="clone:block:abcdefghij|rest", alias="blk", token="abcdefghijrest", suffix="|x2", ) assert entry.render(0) == "blk:abcdefghijrest|x2" - assert mcp_service_mod._partitioned_short_id("design", "cohesion") == ( + assert mcp_shared_mod._partitioned_short_id("design", "cohesion") == ( "design:cohesion" ) - function_entry = mcp_service_mod._clone_short_id_entry_payload( + function_entry = mcp_shared_mod._clone_short_id_entry_payload( "clone:function:abcdef123456|bucket2" ) assert function_entry.alias == "fn" assert function_entry.token == "abcdef123456" assert function_entry.suffix == "|bucket2" - plain_function_entry = mcp_service_mod._clone_short_id_entry_payload( + plain_function_entry = mcp_shared_mod._clone_short_id_entry_payload( "clone:function:abcdef123456" ) assert plain_function_entry.alias == "fn" assert plain_function_entry.suffix == "" - fallback_entry = mcp_service_mod._clone_short_id_entry_payload("clone:weird:opaque") + fallback_entry = mcp_shared_mod._clone_short_id_entry_payload("clone:weird:opaque") assert fallback_entry.alias == "clone" assert len(fallback_entry.token) == 64 # sha256 hex digest assert fallback_entry.suffix == "|x1" canonical_one = "clone:block:abcdefghzz|rest" canonical_two = "clone:block:abcdefghyy|rest" - clone_short_ids = mcp_service_mod._disambiguated_clone_short_ids_payload( + clone_short_ids = mcp_shared_mod._disambiguated_clone_short_ids_payload( [canonical_one, canonical_two] ) assert len(set(clone_short_ids.values())) == 2 assert all(value.startswith("blk:") for value in clone_short_ids.values()) assert all("|x2" in value for value in clone_short_ids.values()) - single_result = mcp_service_mod._disambiguated_clone_short_ids_payload( + single_result = mcp_shared_mod._disambiguated_clone_short_ids_payload( ["clone:block:ab"] ) assert "clone:block:ab" in single_result @@ -3261,70 +3405,82 @@ def test_mcp_service_short_id_and_comparison_helper_branches( assert len(set(canonical_to_short.values())) == 2 assert set(short_to_canonical) == set(canonical_to_short.values()) assert ( - service._disambiguated_short_finding_ids([canonical_one, canonical_two]) + mcp_helpers_mod._disambiguated_short_finding_ids([canonical_one, canonical_two]) == clone_short_ids ) - assert service._base_short_finding_id(canonical_one) == "blk:a1c488|x2" + assert mcp_helpers_mod._base_short_finding_id(canonical_one) == "blk:a1c488|x2" assert ( - mcp_service_mod._base_short_finding_id_payload("clone:function:abcdef123456") + mcp_shared_mod._base_short_finding_id_payload("clone:function:abcdef123456") == "fn:abcdef" ) - assert service._base_short_finding_id("clone:function:abcdef123456") == "fn:abcdef" assert ( - service._base_short_finding_id("structural:duplicated_branches:abcdef123456") + mcp_helpers_mod._base_short_finding_id("clone:function:abcdef123456") + == "fn:abcdef" + ) + assert ( + mcp_helpers_mod._base_short_finding_id( + "structural:duplicated_branches:abcdef123456" + ) == "struct:duplicated_branches:abcdef" ) assert ( - mcp_service_mod._base_short_finding_id_payload( + mcp_shared_mod._base_short_finding_id_payload( "design:cohesion:pkg.mod:Runner.run" ) == "design:cohesion:run" ) - assert service._base_short_finding_id("custom:finding") == "custom:finding" + assert mcp_helpers_mod._base_short_finding_id("custom:finding") == "custom:finding" assert ( - service._disambiguated_short_finding_id("clone:function:abcdef123456") + mcp_helpers_mod._disambiguated_short_finding_id("clone:function:abcdef123456") == "fn:abcdef123456" ) assert ( - service._disambiguated_short_finding_id("clone:function:abcdef123456|bucket2") + mcp_helpers_mod._disambiguated_short_finding_id( + "clone:function:abcdef123456|bucket2" + ) == "fn:abcdef123456|bucket2" ) assert ( - service._disambiguated_short_finding_id("clone:block:abcdef123456|rest") + mcp_helpers_mod._disambiguated_short_finding_id("clone:block:abcdef123456|rest") == "blk:e38144d04782fe95c05f0588c53ea7d553f0efdc555788f629e73be6501597d1|x2" ) assert ( - service._disambiguated_short_finding_id("structural:dup:abc:def") + mcp_helpers_mod._disambiguated_short_finding_id("structural:dup:abc:def") == "struct:dup:abc:def" ) assert ( - service._disambiguated_short_finding_id("dead_code:pkg.mod:Runner.run") + mcp_helpers_mod._disambiguated_short_finding_id("dead_code:pkg.mod:Runner.run") == "dead:pkg.mod:Runner.run" ) - assert service._disambiguated_short_finding_id("custom:finding") == "custom:finding" assert ( - service._disambiguated_short_finding_id("design:cohesion:pkg.mod:Runner") + mcp_helpers_mod._disambiguated_short_finding_id("custom:finding") + == "custom:finding" + ) + assert ( + mcp_helpers_mod._disambiguated_short_finding_id( + "design:cohesion:pkg.mod:Runner" + ) == "design:cohesion:pkg.mod:Runner" ) assert ( - mcp_service_mod._disambiguated_short_finding_id_payload( + mcp_shared_mod._disambiguated_short_finding_id_payload( "dead_code:pkg.mod:Runner.run" ) == "dead:pkg.mod:Runner.run" ) - mixed_short_ids = service._disambiguated_short_finding_ids( + mixed_short_ids = mcp_helpers_mod._disambiguated_short_finding_ids( [canonical_one, "design:cohesion:pkg.mod:Runner"] ) assert mixed_short_ids[canonical_one].startswith("blk:") assert mixed_short_ids["design:cohesion:pkg.mod:Runner"] == ( "design:cohesion:pkg.mod:Runner" ) - assert service._leaf_symbol_name("") == "" - assert service._leaf_symbol_name("pkg.mod:Runner.run") == "run" - assert service._leaf_symbol_name("pkg.mod") == "mod" - assert mcp_service_mod._leaf_symbol_name_payload("pkg.mod:Runner.run") == "run" - assert json.loads(mcp_service_mod._json_text_payload({"b": 1, "a": 2})) == { + assert mcp_helpers_mod._leaf_symbol_name("") == "" + assert mcp_helpers_mod._leaf_symbol_name("pkg.mod:Runner.run") == "run" + assert mcp_helpers_mod._leaf_symbol_name("pkg.mod") == "mod" + assert mcp_shared_mod._leaf_symbol_name_payload("pkg.mod:Runner.run") == "run" + assert json.loads(mcp_shared_mod._json_text_payload({"b": 1, "a": 2})) == { "a": 2, "b": 1, } @@ -3336,12 +3492,12 @@ def test_mcp_service_short_id_and_comparison_helper_branches( lambda _record: [{"id": "clone:block:one"}, {"id": "clone:block:two"}], ) monkeypatch.setattr( - collision_service.session, + mcp_helpers_mod, "_base_short_finding_id", lambda _cid: "blk:dup|x1", ) monkeypatch.setattr( - collision_service.session, + mcp_helpers_mod, "_disambiguated_short_finding_ids", lambda _ids: { "clone:block:one": "blk:resolved1|x1", @@ -3384,7 +3540,7 @@ def test_mcp_service_short_id_and_comparison_helper_branches( new_block=frozenset(), metrics_diff=None, ) - scope = service._comparison_scope(before=same_root, after=different_scope) + scope = mcp_helpers_mod._comparison_scope(before=same_root, after=different_scope) assert scope["comparable"] is False assert scope["reason"] == "different_root_and_analysis_settings" @@ -3393,13 +3549,13 @@ def test_mcp_service_clone_short_id_helper_iteration_and_fallback_branches( monkeypatch: pytest.MonkeyPatch, ) -> None: iterative_entries = { - "clone:block:one": mcp_service_mod._CloneShortIdEntry( + "clone:block:one": mcp_shared_mod._CloneShortIdEntry( canonical_id="clone:block:one", alias="blk", token="abcdefghij", suffix="|x1", ), - "clone:block:two": mcp_service_mod._CloneShortIdEntry( + "clone:block:two": mcp_shared_mod._CloneShortIdEntry( canonical_id="clone:block:two", alias="blk", token="abcdefghkl", @@ -3407,11 +3563,11 @@ def test_mcp_service_clone_short_id_helper_iteration_and_fallback_branches( ), } monkeypatch.setattr( - mcp_service_mod, + mcp_shared_mod, "_clone_short_id_entry_payload", lambda canonical_id: iterative_entries[canonical_id], ) - assert mcp_service_mod._disambiguated_clone_short_ids_payload( + assert mcp_shared_mod._disambiguated_clone_short_ids_payload( ["clone:block:one", "clone:block:two"] ) == { "clone:block:one": "blk:abcdefghij|x1", @@ -3419,13 +3575,13 @@ def test_mcp_service_clone_short_id_helper_iteration_and_fallback_branches( } fallback_entries = { - "clone:block:one": mcp_service_mod._CloneShortIdEntry( + "clone:block:one": mcp_shared_mod._CloneShortIdEntry( canonical_id="clone:block:one", alias="blk", token="abcdefghij", suffix="|x1", ), - "clone:block:two": mcp_service_mod._CloneShortIdEntry( + "clone:block:two": mcp_shared_mod._CloneShortIdEntry( canonical_id="clone:block:two", alias="blk", token="abcdefghij", @@ -3433,11 +3589,11 @@ def test_mcp_service_clone_short_id_helper_iteration_and_fallback_branches( ), } monkeypatch.setattr( - mcp_service_mod, + mcp_shared_mod, "_clone_short_id_entry_payload", lambda canonical_id: fallback_entries[canonical_id], ) - assert mcp_service_mod._disambiguated_clone_short_ids_payload( + assert mcp_shared_mod._disambiguated_clone_short_ids_payload( ["clone:block:one", "clone:block:two"] ) == { "clone:block:one": "blk:abcdefghij|x1", @@ -3487,10 +3643,10 @@ def test_mcp_service_payload_and_resolution_helper_fallbacks( ) with pytest.raises(MCPServiceContractError, match="absolute repository root"): - service._resolve_root(None) + mcp_helpers_mod._resolve_root(None) - assert service._normal_location_payload({"file": "", "line": 4}) == {} - assert service._normal_location_payload( + assert mcp_helpers_mod._normal_location_payload({"file": "", "line": 4}) == {} + assert mcp_helpers_mod._normal_location_payload( {"file": "pkg/mod.py", "line": 4, "end_line": 9, "symbol": "pkg.mod:Runner.run"} ) == { "path": "pkg/mod.py", @@ -3498,22 +3654,24 @@ def test_mcp_service_payload_and_resolution_helper_fallbacks( "end_line": 9, "symbol": "run", } - assert service._normal_location_payload( + assert mcp_helpers_mod._normal_location_payload( {"file": "pkg/mod.py", "line": 0, "symbol": ""} ) == {"path": "pkg/mod.py", "line": 0, "end_line": 0} - assert service._finding_display_location({"locations": []}) == "(unknown)" + assert mcp_helpers_mod._finding_display_location({"locations": []}) == "(unknown)" assert ( - service._finding_display_location({"locations": [{"file": "", "line": 3}]}) + mcp_helpers_mod._finding_display_location( + {"locations": [{"file": "", "line": 3}]} + ) == "(unknown)" ) assert ( - service._finding_display_location( + mcp_helpers_mod._finding_display_location( {"locations": [{"file": "pkg/mod.py", "line": 0}]} ) == "pkg/mod.py" ) - assert service._comparison_summary_text( + assert mcp_helpers_mod._comparison_summary_text( comparable=True, comparability_reason="comparable", regressions=2, @@ -3542,7 +3700,7 @@ def test_mcp_service_payload_and_resolution_helper_fallbacks( source_kind="tests", title="Reduce complexity", ) - canonical_finding_id = mcp_service_mod._suggestion_finding_id_payload(suggestion) + canonical_finding_id = mcp_shared_mod._suggestion_finding_id_payload(suggestion) triage_record = MCPRunRecord( run_id="triage", root=tmp_path, @@ -3612,13 +3770,17 @@ def test_mcp_service_summary_and_metrics_detail_helper_fallbacks( with pytest.raises(MCPServiceContractError, match="section 'derived'"): service._derived_section_payload(_dummy_run_record(tmp_path, "no-derived")) - assert service._summary_health_payload({"analysis_mode": "clones_only"}) == { + assert mcp_helpers_mod._summary_health_payload( + {"analysis_mode": "clones_only"} + ) == { "available": False, "reason": "metrics_skipped", } - assert service._summary_health_score({"analysis_mode": "clones_only"}) is None assert ( - service._summary_health_delta( + mcp_helpers_mod._summary_health_score({"analysis_mode": "clones_only"}) is None + ) + assert ( + mcp_helpers_mod._summary_health_delta( { "analysis_mode": "clones_only", "metrics_diff": {"health_delta": 7}, @@ -3626,11 +3788,11 @@ def test_mcp_service_summary_and_metrics_detail_helper_fallbacks( ) is None ) - assert service._summary_health_payload({}) == { + assert mcp_helpers_mod._summary_health_payload({}) == { "available": False, "reason": "unavailable", } - assert service._summary_cache_payload({}) == {} + assert mcp_helpers_mod._summary_cache_payload({}) == {} assert service._summary_findings_payload( {"findings_summary": {"total": 7}}, record=None, @@ -3910,7 +4072,7 @@ def test_mcp_service_summary_and_metrics_detail_helper_fallbacks( }, ], } - assert service._compact_metrics_item( + assert mcp_helpers_mod._compact_metrics_item( {"qualname": "pkg.mod:run", "score": 10, "skip": None} ) == {"qualname": "pkg.mod:run", "score": 10} @@ -3918,9 +4080,8 @@ def test_mcp_service_summary_and_metrics_detail_helper_fallbacks( def test_mcp_service_clone_only_short_id_fallback_branch( monkeypatch: pytest.MonkeyPatch, ) -> None: - service = CodeCloneMCPService(history_limit=2) monkeypatch.setattr( - mcp_service_mod, + mcp_helpers_mod, "_disambiguated_clone_short_ids_payload", lambda _canonical_ids: { "clone:block:one": "blk:dup|x1", @@ -3928,7 +4089,7 @@ def test_mcp_service_clone_only_short_id_fallback_branch( }, ) - result = service._disambiguated_short_finding_ids( + result = mcp_helpers_mod._disambiguated_short_finding_ids( ["clone:block:one", "clone:block:two"] ) import hashlib diff --git a/tests/test_structural_findings.py b/tests/test_structural_findings.py index 942792c..afba181 100644 --- a/tests/test_structural_findings.py +++ b/tests/test_structural_findings.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: MPL-2.0 # Copyright (c) 2026 Den Rozhnovskiy -"""Unit tests for codeclone.structural_findings (Phase 1: duplicated_branches).""" +"""Unit tests for codeclone.findings.structural.detectors.""" from __future__ import annotations From f0dd2a2f802419b9e8626ead287db7715e74fca8 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 23 Apr 2026 17:24:44 +0500 Subject: [PATCH 14/32] feat(cli): show one-time VS Code extension hint in interactive terminals --- CHANGELOG.md | 2 + codeclone/surfaces/cli/tips.py | 159 +++++++++++++++++++++++++++++ codeclone/surfaces/cli/workflow.py | 9 ++ codeclone/ui_messages/__init__.py | 10 ++ docs/book/09-cli.md | 4 + docs/vscode-extension.md | 5 + tests/test_cli_inprocess.py | 30 ++++++ tests/test_cli_unit.py | 105 +++++++++++++++++++ 8 files changed, 324 insertions(+) create mode 100644 codeclone/surfaces/cli/tips.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d6ea5ad..5153cac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ entrypoints, cleaner dependency boundaries, refreshed tests, and aligned docs. read-only MCP semantics. - Prune stale deleted-file cache entries and tighten dependency chains that were inflating post-refactor architectural depth. +- Add a quiet one-time VS Code extension hint in interactive VS Code terminals, + tracked per CodeClone version next to the resolved project cache path. ## [2.0.0b5] - 2026-04-16 diff --git a/codeclone/surfaces/cli/tips.py b/codeclone/surfaces/cli/tips.py new file mode 100644 index 0000000..7b2e22c --- /dev/null +++ b/codeclone/surfaces/cli/tips.py @@ -0,0 +1,159 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Den Rozhnovskiy + +from __future__ import annotations + +import os +import sys +from collections.abc import Mapping +from pathlib import Path +from typing import TextIO + +from ... import ui_messages as ui +from ...utils.json_io import read_json_object, write_json_document_atomically +from .attrs import bool_attr +from .types import PrinterLike + +_VSCODE_EXTENSION_TIP_KEY = "vscode_extension" +_TIPS_SCHEMA_VERSION = 1 +_VSCODE_EXTENSION_URL = ( + "https://marketplace.visualstudio.com/items?itemName=orenlab.codeclone" +) +_CI_ENV_KEYS: tuple[str, ...] = ( + "CI", + "GITHUB_ACTIONS", + "BUILDKITE", + "TF_BUILD", + "TEAMCITY_VERSION", +) +_VSCODE_ENV_KEYS: tuple[str, ...] = ( + "VSCODE_PID", + "VSCODE_IPC_HOOK", + "VSCODE_CWD", +) + + +def _tips_state_path(cache_path: Path) -> Path: + return cache_path.parent / "tips.json" + + +def _is_vscode_environment(environ: Mapping[str, str]) -> bool: + if environ.get("TERM_PROGRAM", "").strip().lower() == "vscode": + return True + return any(key in environ for key in _VSCODE_ENV_KEYS) + + +def _is_ci_environment(environ: Mapping[str, str]) -> bool: + return any(environ.get(key, "").strip() for key in _CI_ENV_KEYS) + + +def _stream_is_tty(stream: TextIO) -> bool: + try: + return bool(stream.isatty()) + except OSError: + return False + + +def _empty_tips_state() -> dict[str, object]: + return { + "schema_version": _TIPS_SCHEMA_VERSION, + "tips": {}, + } + + +def _load_tips_state(path: Path) -> dict[str, object]: + try: + payload = read_json_object(path) + except (OSError, TypeError, ValueError): + return _empty_tips_state() + tips = payload.get("tips") + if not isinstance(tips, dict): + return _empty_tips_state() + return { + "schema_version": _TIPS_SCHEMA_VERSION, + "tips": dict(tips), + } + + +def _tip_last_shown_version(state: Mapping[str, object], *, tip_key: str) -> str: + tips = state.get("tips") + if not isinstance(tips, dict): + return "" + entry = tips.get(tip_key) + if not isinstance(entry, dict): + return "" + last_shown_version = entry.get("last_shown_version") + if isinstance(last_shown_version, str): + return last_shown_version + return "" + + +def _remember_tip_version( + *, + path: Path, + state: Mapping[str, object], + tip_key: str, + codeclone_version: str, +) -> None: + tips = state.get("tips") + updated_tips = dict(tips) if isinstance(tips, dict) else {} + updated_tips[tip_key] = {"last_shown_version": codeclone_version} + write_json_document_atomically( + path, + { + "schema_version": _TIPS_SCHEMA_VERSION, + "tips": updated_tips, + }, + sort_keys=True, + indent=True, + trailing_newline=True, + ) + + +def maybe_print_vscode_extension_tip( + *, + args: object, + console: PrinterLike, + codeclone_version: str, + cache_path: Path, + environ: Mapping[str, str] | None = None, + stream: TextIO | None = None, +) -> bool: + effective_environ = os.environ if environ is None else environ + effective_stream = sys.stdout if stream is None else stream + if bool_attr(args, "quiet") or bool_attr(args, "ci"): + return False + if _is_ci_environment(effective_environ): + return False + if not _stream_is_tty(effective_stream): + return False + if not _is_vscode_environment(effective_environ): + return False + + state_path = _tips_state_path(cache_path) + state = _load_tips_state(state_path) + if ( + _tip_last_shown_version(state, tip_key=_VSCODE_EXTENSION_TIP_KEY) + == codeclone_version + ): + return False + + console.print(ui.fmt_vscode_extension_tip(url=_VSCODE_EXTENSION_URL)) + try: + _remember_tip_version( + path=state_path, + state=state, + tip_key=_VSCODE_EXTENSION_TIP_KEY, + codeclone_version=codeclone_version, + ) + except OSError: + return True + return True + + +__all__ = [ + "maybe_print_vscode_extension_tip", +] diff --git a/codeclone/surfaces/cli/workflow.py b/codeclone/surfaces/cli/workflow.py index 24f408d..bbe714e 100644 --- a/codeclone/surfaces/cli/workflow.py +++ b/codeclone/surfaces/cli/workflow.py @@ -42,6 +42,7 @@ from . import startup as cli_startup from . import state as cli_state from . import summary as cli_summary +from . import tips as cli_tips from .types import CLIArgsLike, StatusConsole, require_status_console __all__ = [ @@ -78,6 +79,7 @@ "discover", "gate", "main", + "maybe_print_vscode_extension_tip", "print_banner", "process", "report", @@ -111,6 +113,7 @@ print_metrics_if_available = cli_post_run.print_metrics_if_available resolve_changed_clone_gate = cli_post_run.resolve_changed_clone_gate warn_new_clones_without_fail = cli_post_run.warn_new_clones_without_fail +maybe_print_vscode_extension_tip = cli_tips.maybe_print_vscode_extension_tip _report_path_origins = cli_reports_output._report_path_origins _resolve_output_paths = cli_reports_output._resolve_output_paths @@ -518,6 +521,12 @@ def _main_impl() -> None: notice_new_clones_count=notice_new_clones_count, console=_console(), ) + maybe_print_vscode_extension_tip( + args=args, + console=_console(), + codeclone_version=__version__, + cache_path=cache_path, + ) print_pipeline_done_if_needed(args=args, run_started_at=run_started_at) diff --git a/codeclone/ui_messages/__init__.py b/codeclone/ui_messages/__init__.py index d59e50c..388b324 100644 --- a/codeclone/ui_messages/__init__.py +++ b/codeclone/ui_messages/__init__.py @@ -341,6 +341,12 @@ "\n[warning]New clones detected but --fail-on-new not set.[/warning]\n" "Run with --update-baseline to accept them as technical debt." ) +TIP_VSCODE_EXTENSION = ( + "\n[dim]Tip:[/dim] VS Code detected. " + "CodeClone has a native extension for triage-first review and hotspot " + "navigation.\n" + "[dim]{url}[/dim]" +) def version_output(version: str) -> str: @@ -414,6 +420,10 @@ def fmt_cache_save_failed(error: object) -> str: return WARN_CACHE_SAVE_FAILED.format(error=error) +def fmt_vscode_extension_tip(*, url: str) -> str: + return TIP_VSCODE_EXTENSION.format(url=url) + + def fmt_legacy_cache_warning(*, legacy_path: Path, new_path: Path) -> str: return WARN_LEGACY_CACHE.format(legacy_path=legacy_path, new_path=new_path) diff --git a/docs/book/09-cli.md b/docs/book/09-cli.md index 79b4ebb..6cbe37a 100644 --- a/docs/book/09-cli.md +++ b/docs/book/09-cli.md @@ -46,6 +46,10 @@ Refs: - Bare report flags write to deterministic default paths under `.cache/codeclone/`. - `--open-html-report` is layered on top of `--html`; it does not imply HTML output. - `--timestamped-report-paths` rewrites only default report paths requested via bare flags. +- In interactive VS Code terminals, the CLI may print a one-time extension hint + after summary output. The hint is suppressed in `--quiet`, CI, and non-TTY + contexts, and is tracked per CodeClone version next to the resolved project + cache path. - Changed-scope review uses: - `--changed-only` - `--diff-against` diff --git a/docs/vscode-extension.md b/docs/vscode-extension.md index 45d2599..561a412 100644 --- a/docs/vscode-extension.md +++ b/docs/vscode-extension.md @@ -51,6 +51,11 @@ Verify the launcher: codeclone-mcp --help ``` +When you run the CLI inside an interactive VS Code terminal, CodeClone may also +show a one-time extension hint after the summary. It is suppressed in quiet, +CI, and non-interactive runs, and is remembered per CodeClone version next to +the resolved project cache path. + ## Main views ### Overview diff --git a/tests/test_cli_inprocess.py b/tests/test_cli_inprocess.py index 606598d..952379c 100644 --- a/tests/test_cli_inprocess.py +++ b/tests/test_cli_inprocess.py @@ -24,6 +24,7 @@ import codeclone.core.worker as core_worker import codeclone.surfaces.cli.report_meta as cli_meta import codeclone.surfaces.cli.reports_output as cli_reports +import codeclone.surfaces.cli.tips as cli_tips import codeclone.surfaces.cli.workflow as cli from codeclone import __version__ from codeclone.cache.store import Cache, file_stat_signature @@ -2261,6 +2262,35 @@ def test_cli_outputs_quiet_no_print( assert "report saved" not in out +def test_cli_shows_vscode_extension_tip_once_per_version( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _write_default_source(tmp_path) + tips_path = tmp_path / ".cache" / "codeclone" / "tips.json" + + monkeypatch.setenv("TERM_PROGRAM", "vscode") + monkeypatch.delenv("CI", raising=False) + monkeypatch.delenv("GITHUB_ACTIONS", raising=False) + monkeypatch.setattr(cli_tips, "_stream_is_tty", lambda _stream: True) + + _run_parallel_main(monkeypatch, [str(tmp_path), "--no-progress", "--no-color"]) + first_out = capsys.readouterr().out + + assert "VS Code detected" in first_out + assert "marketplace.visualstudio.com" in first_out + assert first_out.index("Summary") < first_out.index("Tip:") + + state = json.loads(tips_path.read_text("utf-8")) + assert state["tips"]["vscode_extension"]["last_shown_version"] == __version__ + + _run_parallel_main(monkeypatch, [str(tmp_path), "--no-progress", "--no-color"]) + second_out = capsys.readouterr().out + + assert "VS Code detected" not in second_out + + def test_cli_update_baseline_skips_version_check( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/test_cli_unit.py b/tests/test_cli_unit.py index 3dd6aa0..6a9abf2 100644 --- a/tests/test_cli_unit.py +++ b/tests/test_cli_unit.py @@ -11,6 +11,7 @@ import webbrowser from argparse import Namespace from collections.abc import Callable +from io import StringIO from pathlib import Path from types import SimpleNamespace from typing import Any, cast @@ -28,6 +29,7 @@ import codeclone.surfaces.cli.reports_output as cli_reports import codeclone.surfaces.cli.runtime as cli_runtime import codeclone.surfaces.cli.summary as cli_summary +import codeclone.surfaces.cli.tips as cli_tips import codeclone.surfaces.cli.workflow as cli from codeclone import __version__ from codeclone import ui_messages as ui @@ -57,6 +59,15 @@ def print(self, *objects: object, **kwargs: object) -> None: self.lines.append(" ".join(str(obj) for obj in objects)) +class _TTYStream(StringIO): + def __init__(self, *, is_tty: bool) -> None: + super().__init__() + self._is_tty = is_tty + + def isatty(self) -> bool: + return self._is_tty + + def _metrics_baseline_runtime_for_gate_checks() -> ( cli_baselines_mod._MetricsBaselineRuntime ): @@ -178,6 +189,100 @@ def test_cli_attr_helpers_handle_bool_int_and_path_edges(tmp_path: Path) -> None assert cli_attrs.optional_text_attr(args, "invalid_text") is None +def test_cli_tips_detect_vscode_environment_signals() -> None: + assert cli_tips._is_vscode_environment({"TERM_PROGRAM": "vscode"}) is True + assert cli_tips._is_vscode_environment({"VSCODE_PID": "123"}) is True + assert cli_tips._is_vscode_environment({"TERM_PROGRAM": "xterm-256color"}) is False + + +def test_cli_vscode_extension_tip_uses_versioned_cache( + tmp_path: Path, +) -> None: + printer = _RecordingPrinter() + args = SimpleNamespace(quiet=False, ci=False) + env = {"TERM_PROGRAM": "vscode"} + cache_path = tmp_path / ".cache" / "codeclone" / "cache.json" + + cli_tips.maybe_print_vscode_extension_tip( + args=args, + console=printer, + codeclone_version=__version__, + cache_path=cache_path, + environ=env, + stream=_TTYStream(is_tty=True), + ) + + assert len(printer.lines) == 1 + assert "VS Code detected" in printer.lines[0] + assert "marketplace.visualstudio.com" in printer.lines[0] + + tips_path = cache_path.parent / "tips.json" + state = json.loads(tips_path.read_text("utf-8")) + assert state["tips"]["vscode_extension"]["last_shown_version"] == __version__ + + shown_again = cli_tips.maybe_print_vscode_extension_tip( + args=args, + console=printer, + codeclone_version=__version__, + cache_path=cache_path, + environ=env, + stream=_TTYStream(is_tty=True), + ) + assert shown_again is False + assert len(printer.lines) == 1 + + shown_for_new_version = cli_tips.maybe_print_vscode_extension_tip( + args=args, + console=printer, + codeclone_version=f"{__version__}.post1", + cache_path=cache_path, + environ=env, + stream=_TTYStream(is_tty=True), + ) + assert shown_for_new_version is True + assert len(printer.lines) == 2 + + +@pytest.mark.parametrize( + ("args", "env", "isatty"), + [ + (SimpleNamespace(quiet=True, ci=False), {"TERM_PROGRAM": "vscode"}, True), + (SimpleNamespace(quiet=False, ci=True), {"TERM_PROGRAM": "vscode"}, True), + ( + SimpleNamespace(quiet=False, ci=False), + {"TERM_PROGRAM": "vscode", "CI": "1"}, + True, + ), + (SimpleNamespace(quiet=False, ci=False), {"TERM_PROGRAM": "vscode"}, False), + ( + SimpleNamespace(quiet=False, ci=False), + {"TERM_PROGRAM": "xterm-256color"}, + True, + ), + ], +) +def test_cli_vscode_extension_tip_respects_context_gates( + tmp_path: Path, + args: SimpleNamespace, + env: dict[str, str], + isatty: bool, +) -> None: + printer = _RecordingPrinter() + effective_env = dict(env) + + shown = cli_tips.maybe_print_vscode_extension_tip( + args=args, + console=printer, + codeclone_version=__version__, + cache_path=tmp_path / ".cache" / "codeclone" / "cache.json", + environ=effective_env, + stream=_TTYStream(is_tty=isatty), + ) + + assert shown is False + assert printer.lines == [] + + def test_cli_module_main_guard(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr(sys, "argv", ["codeclone", "--help"]) with pytest.raises(SystemExit) as exc: From 7bd2a113a994547f8f71f206fbae0518730431bd Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 23 Apr 2026 17:38:02 +0500 Subject: [PATCH 15/32] refactor(report): move html template into canonical html package --- AGENTS.md | 2 +- codeclone/report/html/assemble.py | 2 +- codeclone/{templates.py => report/html/template.py} | 5 +++++ docs/book/01-architecture-map.md | 2 +- tests/test_target_module_map_imports.py | 1 + 5 files changed, 9 insertions(+), 3 deletions(-) rename codeclone/{templates.py => report/html/template.py} (96%) diff --git a/AGENTS.md b/AGENTS.md index 5d45c03..312450c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -384,7 +384,7 @@ Architecture is layered, but grounded in current code (not aspirational diagrams - **Canonical report + projections** (`codeclone/report/document/*`, `codeclone/report/gates/*`, `codeclone/report/renderers/*`, `codeclone/report/*.py`) converts analysis facts into deterministic report payloads and deterministic projections. -- **HTML/UI rendering** (`codeclone/report/html/*`, `codeclone/templates.py`) renders views from canonical report/meta +- **HTML/UI rendering** (`codeclone/report/html/*`) renders views from canonical report/meta facts. HTML is render-only. - **MCP agent interface** (`codeclone/surfaces/mcp/*`) exposes the same pipeline/report contracts as a deterministic, read-only MCP surface for AI agents and MCP-capable clients. diff --git a/codeclone/report/html/assemble.py b/codeclone/report/html/assemble.py index f8e40cd..cc6331b 100644 --- a/codeclone/report/html/assemble.py +++ b/codeclone/report/html/assemble.py @@ -15,7 +15,6 @@ from ...contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL from ...domain.quality import CONFIDENCE_HIGH from ...findings.structural.detectors import normalize_structural_findings -from ...templates import FONT_CSS_URL, REPORT_TEMPLATE from ...utils import coerce as _coerce from ._context import _meta_pick, build_context from .assets.css import build_css @@ -29,6 +28,7 @@ from .sections._overview import render_overview_panel from .sections._structural import render_structural_panel from .sections._suggestions import render_suggestions_panel +from .template import FONT_CSS_URL, REPORT_TEMPLATE from .widgets.icons import BRAND_LOGO, ICONS, section_icon_html from .widgets.snippets import _FileCache, _pygments_css diff --git a/codeclone/templates.py b/codeclone/report/html/template.py similarity index 96% rename from codeclone/templates.py rename to codeclone/report/html/template.py index a13cb31..b083d7d 100644 --- a/codeclone/templates.py +++ b/codeclone/report/html/template.py @@ -45,3 +45,8 @@ """ ) + +__all__ = [ + "FONT_CSS_URL", + "REPORT_TEMPLATE", +] diff --git a/docs/book/01-architecture-map.md b/docs/book/01-architecture-map.md index 774a794..61d66a7 100644 --- a/docs/book/01-architecture-map.md +++ b/docs/book/01-architecture-map.md @@ -34,7 +34,7 @@ Main ownership layers: | Persistence | `codeclone/baseline/*`, `codeclone/cache/*` | Trusted comparison state and optimization-only cache contracts | | Canonical report | `codeclone/report/document/*`, `codeclone/report/gates/*`, `codeclone/report/*.py` | Canonical report payload, derived projections, explainability, suggestions, gate reasons | | Deterministic renderers | `codeclone/report/renderers/*` | Text/Markdown/SARIF/JSON projections over the canonical report | -| HTML render layer | `codeclone/report/html/*`, `codeclone/templates.py` | Render-only HTML view over canonical report/meta facts | +| HTML render layer | `codeclone/report/html/*` | Render-only HTML view over canonical report/meta facts | | MCP surface | `codeclone/surfaces/mcp/*` | Read-only MCP tools/resources over the same pipeline/report contracts | | Client surfaces | `extensions/vscode-codeclone/*`, `extensions/claude-desktop-codeclone/*`, `plugins/codeclone/*` | Native clients/install surfaces over `codeclone-mcp` | diff --git a/tests/test_target_module_map_imports.py b/tests/test_target_module_map_imports.py index fe628a3..db29b9b 100644 --- a/tests/test_target_module_map_imports.py +++ b/tests/test_target_module_map_imports.py @@ -61,6 +61,7 @@ def test_old_analysis_and_findings_paths_are_gone() -> None: assert importlib.util.find_spec("codeclone.grouping") is None assert importlib.util.find_spec("codeclone.pipeline") is None assert importlib.util.find_spec("codeclone.structural_findings") is None + assert importlib.util.find_spec("codeclone.templates") is None assert callable(canonical_build_groups) assert callable(canonical_scan_function_structure) From 67a0adea2ad0e78effd390ad7d79401fa555ac35 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 23 Apr 2026 17:59:03 +0500 Subject: [PATCH 16/32] refactor(analysis,findings): move suppressions and golden fixtures to canonical modules --- AGENTS.md | 10 ++++++---- codeclone/analysis/_module_walk.py | 16 ++++++++-------- codeclone/{ => analysis}/suppressions.py | 3 +-- codeclone/config/pyproject_loader.py | 2 +- codeclone/core/metrics_payload.py | 5 ++++- codeclone/core/pipeline.py | 8 ++++---- .../{ => findings/clones}/golden_fixtures.py | 6 +++--- codeclone/metrics/dead_code.py | 2 +- codeclone/report/document/metrics.py | 2 +- docs/book/16-dead-code-contract.md | 2 +- docs/book/19-inline-suppressions.md | 12 ++++++------ tests/test_golden_fixtures.py | 2 +- tests/test_suppressions.py | 2 +- tests/test_target_module_map_imports.py | 2 ++ 14 files changed, 40 insertions(+), 34 deletions(-) rename codeclone/{ => analysis}/suppressions.py (98%) rename codeclone/{ => findings/clones}/golden_fixtures.py (97%) diff --git a/AGENTS.md b/AGENTS.md index 312450c..983a4df 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -430,8 +430,10 @@ Use this map to route changes to the right owner module. and deterministic. - `codeclone/metrics/*` — metric computations and dead-code/dependency/health logic; change metric math and thresholds here; do not make metrics depend on renderer/UI concerns. -- `codeclone/suppressions.py` — inline `# codeclone: ignore[...]` parse/bind/index logic; keep it declaration-scoped - and deterministic. +- `codeclone/analysis/suppressions.py` — inline `# codeclone: ignore[...]` parse/bind/index logic; keep it + declaration-scoped and deterministic. +- `codeclone/findings/clones/golden_fixtures.py` — golden-fixture clone exclusion policy and suppressed-clone bucket + shaping; keep it clone-derivation-only and deterministic. - `codeclone/baseline/clone_baseline.py` + `codeclone/baseline/trust.py` — clone baseline schema/trust/integrity/ compatibility contract; all clone-baseline format changes go here with explicit contract process. - `codeclone/baseline/metrics_baseline.py` + `codeclone/baseline/_metrics_baseline_*` — metrics-baseline schema, @@ -497,7 +499,7 @@ Operational rules: Inline suppressions are explicit local policy, not analysis truth. -- Supported syntax is `# codeclone: ignore[rule-id,...]` via `codeclone/suppressions.py`. +- Supported syntax is `# codeclone: ignore[rule-id,...]` via `codeclone/analysis/suppressions.py`. - Binding scope is declaration-only (`def`, `async def`, `class`) using: - leading comment on the line immediately before declaration - inline comment on the declaration header start line @@ -523,7 +525,7 @@ If you change a contract-sensitive zone, route docs/tests/approval deliberately. | Canonical report JSON shape (`codeclone/report/document/*`, report projections) | `docs/book/08-report.md` (+ `docs/book/10-html-render.md` if rendering contract impacted), `docs/sarif.md` when SARIF changes, `CHANGELOG.md` | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py`, relevant report-format tests | finding/meta/summary schema changes | stable JSON fields/meaning/order guarantees change | | CLI flags/help/exit behavior (`codeclone/main.py`, `codeclone/surfaces/cli/*`, `codeclone/config/*`, `codeclone/contracts/*`) | `docs/book/09-cli.md`, `docs/book/03-contracts-exit-codes.md`, `README.md`, `CHANGELOG.md` | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py`, `tests/test_cli_smoke.py` | exit-code semantics, script-facing behavior, flag contracts change | user-visible CLI contract changes | | Fingerprint-adjacent analysis (`extractor/cfg/normalize/grouping`) | `docs/book/05-core-pipeline.md`, `docs/cfg.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_fingerprint.py`, `tests/test_extractor.py`, `tests/test_cfg.py`, golden tests (`tests/test_detector_golden.py`, `tests/test_golden_v2.py`) | always (see Section 1.6) | clone identity / NEW-vs-KNOWN / fingerprint inputs change | -| Suppression semantics/reporting (`suppressions`, extractor dead-code wiring, report/UI counters) | `docs/book/19-inline-suppressions.md`, `docs/book/16-dead-code-contract.md`, `docs/book/08-report.md`, and interface docs if surfaced (`09-cli`, `10-html-render`) | `tests/test_suppressions.py`, `tests/test_extractor.py`, `tests/test_metrics_modules.py`, `tests/test_pipeline_metrics.py`, report/html/cli tests | declaration scope semantics, rule effect, or contract-visible counters/fields change | suppression changes alter active finding output or contract-visible report payload | +| Suppression semantics/reporting (`codeclone/analysis/suppressions.py`, extractor dead-code wiring, report/UI counters) | `docs/book/19-inline-suppressions.md`, `docs/book/16-dead-code-contract.md`, `docs/book/08-report.md`, and interface docs if surfaced (`09-cli`, `10-html-render`) | `tests/test_suppressions.py`, `tests/test_extractor.py`, `tests/test_metrics_modules.py`, `tests/test_pipeline_metrics.py`, report/html/cli tests | declaration scope semantics, rule effect, or contract-visible counters/fields change | suppression changes alter active finding output or contract-visible report payload | | MCP interface (`codeclone/surfaces/mcp/*`, packaging extra/launcher) | `README.md`, `docs/book/20-mcp-interface.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/book/14-compatibility-and-versioning.md`, `CHANGELOG.md` | `tests/test_mcp_service.py`, `tests/test_mcp_server.py`, plus CLI/package tests if launcher/install semantics change | tool/resource shapes, read-only semantics, optional-dependency packaging behavior change | public MCP tool names, resource URIs, launcher/install behavior, or response semantics change | | VS Code extension surface (`extensions/vscode-codeclone/*`) | `README.md`, `docs/book/21-vscode-extension.md`, `docs/vscode-extension.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `node --check extensions/vscode-codeclone/src/support.js`, `node --check extensions/vscode-codeclone/src/mcpClient.js`, `node --check extensions/vscode-codeclone/src/extension.js`, `node --test extensions/vscode-codeclone/test/*.test.js`, plus local extension-host smoke and package smoke when surface/manifest/assets change | command/view UX, trust/runtime model, source-first review flow, or packaging metadata change | documented commands/views/setup/trust behavior, packaged assets, or publish metadata change | | Claude Desktop bundle surface (`extensions/claude-desktop-codeclone/*`) | `docs/book/22-claude-desktop-bundle.md`, `docs/claude-desktop-bundle.md`, `docs/mcp.md`, `docs/book/01-architecture-map.md`, `docs/README.md`, `CHANGELOG.md` | `node --check extensions/claude-desktop-codeclone/server/index.js`, `node --check extensions/claude-desktop-codeclone/src/launcher.js`, `node --check extensions/claude-desktop-codeclone/scripts/build-mcpb.mjs`, `node --test extensions/claude-desktop-codeclone/test/*.test.js`, plus `.mcpb` build smoke | bundle install/runtime model, launcher UX, local-stdio constraints, or bundle metadata change | documented Claude Desktop install/setup/runtime behavior or packaged bundle semantics change | diff --git a/codeclone/analysis/_module_walk.py b/codeclone/analysis/_module_walk.py index 2eef8ba..bba111e 100644 --- a/codeclone/analysis/_module_walk.py +++ b/codeclone/analysis/_module_walk.py @@ -13,13 +13,6 @@ from .. import qualnames as _qualnames from ..models import DeadCandidate, ModuleDep -from ..suppressions import ( - DeclarationTarget, - bind_suppressions_to_declarations, - build_suppression_index, - extract_suppression_directives, - suppression_target_key, -) from .class_metrics import _node_line_span from .parser import ( _build_declaration_token_index, @@ -27,11 +20,18 @@ _DeclarationTokenIndexKey, _source_tokens, ) +from .suppressions import ( + DeclarationTarget, + bind_suppressions_to_declarations, + build_suppression_index, + extract_suppression_directives, + suppression_target_key, +) if TYPE_CHECKING: from collections.abc import Mapping - from ..suppressions import SuppressionTargetKey + from .suppressions import SuppressionTargetKey _NamedDeclarationNode = _qualnames.FunctionNode | ast.ClassDef diff --git a/codeclone/suppressions.py b/codeclone/analysis/suppressions.py similarity index 98% rename from codeclone/suppressions.py rename to codeclone/analysis/suppressions.py index de49616..b717f6b 100644 --- a/codeclone/suppressions.py +++ b/codeclone/analysis/suppressions.py @@ -265,6 +265,5 @@ def build_suppression_index( end_line=binding.end_line, kind=binding.kind, ) - existing = index.get(key, ()) - index[key] = _merge_rules(existing, binding.rules) + index[key] = _merge_rules(index.get(key, ()), binding.rules) return index diff --git a/codeclone/config/pyproject_loader.py b/codeclone/config/pyproject_loader.py index 596f32f..3c2cd20 100644 --- a/codeclone/config/pyproject_loader.py +++ b/codeclone/config/pyproject_loader.py @@ -5,7 +5,7 @@ from pathlib import Path from typing import TYPE_CHECKING -from ..golden_fixtures import ( +from ..findings.clones.golden_fixtures import ( GoldenFixturePatternError, normalize_golden_fixture_patterns, ) diff --git a/codeclone/core/metrics_payload.py b/codeclone/core/metrics_payload.py index 6f76cb2..d2386c0 100644 --- a/codeclone/core/metrics_payload.py +++ b/codeclone/core/metrics_payload.py @@ -8,6 +8,10 @@ from collections.abc import Mapping, Sequence +from ..analysis.suppressions import ( + DEAD_CODE_RULE_ID, + INLINE_CODECLONE_SUPPRESSION_SOURCE, +) from ..domain.findings import CATEGORY_COHESION, CATEGORY_COMPLEXITY, CATEGORY_COUPLING from ..domain.quality import CONFIDENCE_HIGH, RISK_LOW from ..metrics.overloaded_modules import build_overloaded_modules_payload @@ -20,7 +24,6 @@ ModuleDep, ProjectMetrics, ) -from ..suppressions import DEAD_CODE_RULE_ID, INLINE_CODECLONE_SUPPRESSION_SOURCE from ..utils.coerce import as_int, as_mapping, as_sequence, as_str from .api_surface_payload import ( _api_surface_rows, diff --git a/codeclone/core/pipeline.py b/codeclone/core/pipeline.py index 7625dee..8e18e9c 100644 --- a/codeclone/core/pipeline.py +++ b/codeclone/core/pipeline.py @@ -9,6 +9,10 @@ from collections.abc import Mapping, Sequence from ..contracts import DEFAULT_COVERAGE_MIN +from ..findings.clones.golden_fixtures import ( + build_suppressed_clone_groups, + split_clone_groups_for_golden_fixtures, +) from ..findings.clones.grouping import ( build_block_groups, build_groups, @@ -17,10 +21,6 @@ from ..findings.structural.detectors import ( build_clone_cohort_structural_findings, ) -from ..golden_fixtures import ( - build_suppressed_clone_groups, - split_clone_groups_for_golden_fixtures, -) from ..metrics._base import MetricProjectContext from ..metrics.coverage_join import CoverageJoinParseError, build_coverage_join from ..metrics.dead_code import find_suppressed_unused diff --git a/codeclone/golden_fixtures.py b/codeclone/findings/clones/golden_fixtures.py similarity index 97% rename from codeclone/golden_fixtures.py rename to codeclone/findings/clones/golden_fixtures.py index 3b6fe47..b60caaa 100644 --- a/codeclone/golden_fixtures.py +++ b/codeclone/findings/clones/golden_fixtures.py @@ -11,15 +11,15 @@ from pathlib import PurePosixPath from typing import Literal -from .domain.source_scope import SOURCE_KIND_FIXTURES, SOURCE_KIND_TESTS -from .models import ( +from ...domain.source_scope import SOURCE_KIND_FIXTURES, SOURCE_KIND_TESTS +from ...models import ( GroupItem, GroupItemLike, GroupMap, GroupMapLike, SuppressedCloneGroup, ) -from .paths import classify_source_kind, normalize_repo_path, relative_repo_path +from ...paths import classify_source_kind, normalize_repo_path, relative_repo_path CloneGroupKind = Literal["function", "block", "segment"] diff --git a/codeclone/metrics/dead_code.py b/codeclone/metrics/dead_code.py index b6306d9..599dc1c 100644 --- a/codeclone/metrics/dead_code.py +++ b/codeclone/metrics/dead_code.py @@ -9,11 +9,11 @@ from dataclasses import replace from typing import Literal +from ..analysis.suppressions import DEAD_CODE_RULE_ID from ..domain.findings import SYMBOL_KIND_FUNCTION, SYMBOL_KIND_METHOD from ..domain.quality import CONFIDENCE_HIGH, CONFIDENCE_MEDIUM from ..models import DeadCandidate, DeadItem from ..paths import is_test_filepath -from ..suppressions import DEAD_CODE_RULE_ID _TEST_NAME_PREFIXES = ("test_", "pytest_") _DYNAMIC_METHOD_PREFIXES = ("visit_",) diff --git a/codeclone/report/document/metrics.py b/codeclone/report/document/metrics.py index 3c98024..577ee01 100644 --- a/codeclone/report/document/metrics.py +++ b/codeclone/report/document/metrics.py @@ -8,6 +8,7 @@ from collections.abc import Mapping +from ...analysis.suppressions import INLINE_CODECLONE_SUPPRESSION_SOURCE from ...domain.findings import ( CATEGORY_COHESION, CATEGORY_COMPLEXITY, @@ -23,7 +24,6 @@ SOURCE_KIND_OTHER, ) from ...metrics.registry import METRIC_FAMILIES -from ...suppressions import INLINE_CODECLONE_SUPPRESSION_SOURCE from ...utils.coerce import as_float as _as_float from ...utils.coerce import as_int as _as_int from ...utils.coerce import as_mapping as _as_mapping diff --git a/docs/book/16-dead-code-contract.md b/docs/book/16-dead-code-contract.md index c4c3805..6875bce 100644 --- a/docs/book/16-dead-code-contract.md +++ b/docs/book/16-dead-code-contract.md @@ -8,7 +8,7 @@ Define dead-code liveness rules, canonical symbol-usage boundaries, and gating s - Dead-code detection core: `codeclone/metrics/dead_code.py:find_unused` - Test-path classifier: `codeclone/paths.py:is_test_filepath` -- Inline suppression parser/binder: `codeclone/suppressions.py` +- Inline suppression parser/binder: `codeclone/analysis/suppressions.py` - Extraction of referenced names/candidates: `codeclone/analysis/units.py:extract_units_and_stats_from_source` - Cache load boundary for referenced names: diff --git a/docs/book/19-inline-suppressions.md b/docs/book/19-inline-suppressions.md index 1062d5c..cb296ad 100644 --- a/docs/book/19-inline-suppressions.md +++ b/docs/book/19-inline-suppressions.md @@ -7,7 +7,7 @@ source comments, without introducing broad/project-wide ignores. ## Public surface -- Suppression directive parser and binder: `codeclone/suppressions.py` +- Suppression directive parser and binder: `codeclone/analysis/suppressions.py` - Dead-code final filter: `codeclone/metrics/dead_code.py:find_unused` - Suppressed dead-code projection helper: `codeclone/metrics/dead_code.py:find_suppressed_unused` @@ -22,9 +22,9 @@ source comments, without introducing broad/project-wide ignores. Refs: -- `codeclone/suppressions.py:SuppressionDirective` -- `codeclone/suppressions.py:DeclarationTarget` -- `codeclone/suppressions.py:SuppressionBinding` +- `codeclone/analysis/suppressions.py:SuppressionDirective` +- `codeclone/analysis/suppressions.py:DeclarationTarget` +- `codeclone/analysis/suppressions.py:SuppressionBinding` - `codeclone/models.py:DeadCandidate` ## Contracts @@ -76,8 +76,8 @@ Refs: Refs: -- `codeclone/suppressions.py:extract_suppression_directives` -- `codeclone/suppressions.py:bind_suppressions_to_declarations` +- `codeclone/analysis/suppressions.py:extract_suppression_directives` +- `codeclone/analysis/suppressions.py:bind_suppressions_to_declarations` - `codeclone/cache/_canonicalize.py:_canonicalize_cache_entry` ## Locked by tests diff --git a/tests/test_golden_fixtures.py b/tests/test_golden_fixtures.py index 398e52d..46d52d1 100644 --- a/tests/test_golden_fixtures.py +++ b/tests/test_golden_fixtures.py @@ -8,7 +8,7 @@ import pytest -from codeclone.golden_fixtures import ( +from codeclone.findings.clones.golden_fixtures import ( GoldenFixturePatternError, build_suppressed_clone_groups, normalize_golden_fixture_patterns, diff --git a/tests/test_suppressions.py b/tests/test_suppressions.py index 960cb5c..54bdb89 100644 --- a/tests/test_suppressions.py +++ b/tests/test_suppressions.py @@ -8,7 +8,7 @@ import pytest -from codeclone.suppressions import ( +from codeclone.analysis.suppressions import ( DeclarationTarget, SuppressionBinding, SuppressionDirective, diff --git a/tests/test_target_module_map_imports.py b/tests/test_target_module_map_imports.py index db29b9b..49e53cc 100644 --- a/tests/test_target_module_map_imports.py +++ b/tests/test_target_module_map_imports.py @@ -62,6 +62,8 @@ def test_old_analysis_and_findings_paths_are_gone() -> None: assert importlib.util.find_spec("codeclone.pipeline") is None assert importlib.util.find_spec("codeclone.structural_findings") is None assert importlib.util.find_spec("codeclone.templates") is None + assert importlib.util.find_spec("codeclone.golden_fixtures") is None + assert importlib.util.find_spec("codeclone.suppressions") is None assert callable(canonical_build_groups) assert callable(canonical_scan_function_structure) From ddf41e8f50e84242a001ac7a23945cc801d3c616 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 23 Apr 2026 18:29:29 +0500 Subject: [PATCH 17/32] fix(dependencies): align depth threshold docs and UI with health model --- codeclone/contracts/__init__.py | 6 ++++++ codeclone/metrics/health.py | 15 ++++++++++++--- codeclone/report/html/sections/_dependencies.py | 13 ++++++++++--- codeclone/report/html/widgets/glossary.py | 4 +++- docs/book/04-config-and-defaults.md | 7 +++++++ docs/book/15-health-score.md | 5 +++++ docs/book/15-metrics-and-quality-gates.md | 3 +++ tests/test_defaults_contract.py | 8 ++++++++ tests/test_html_report.py | 1 + tests/test_metrics_modules.py | 13 +++++++++++-- tests/test_options_spec_coverage.py | 13 +++++++++++++ 11 files changed, 79 insertions(+), 9 deletions(-) diff --git a/codeclone/contracts/__init__.py b/codeclone/contracts/__init__.py index 9564ac5..469e065 100644 --- a/codeclone/contracts/__init__.py +++ b/codeclone/contracts/__init__.py @@ -46,6 +46,9 @@ COUPLING_RISK_LOW_MAX: Final = 5 COUPLING_RISK_MEDIUM_MAX: Final = 10 COHESION_RISK_MEDIUM_MAX: Final = 3 +HEALTH_DEPENDENCY_CYCLE_PENALTY: Final = 25 +HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE: Final = 8 +HEALTH_DEPENDENCY_DEPTH_LEVEL_PENALTY: Final = 4 HEALTH_WEIGHTS: Final[dict[str, float]] = { "clones": 0.25, @@ -123,6 +126,9 @@ def cli_help_epilog() -> str: "DEFAULT_SEGMENT_MIN_STMT", "DEFAULT_TEXT_REPORT_PATH", "DOCS_URL", + "HEALTH_DEPENDENCY_CYCLE_PENALTY", + "HEALTH_DEPENDENCY_DEPTH_LEVEL_PENALTY", + "HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE", "HEALTH_WEIGHTS", "ISSUES_URL", "METRICS_BASELINE_SCHEMA_VERSION", diff --git a/codeclone/metrics/health.py b/codeclone/metrics/health.py index 1433f01..cf5536f 100644 --- a/codeclone/metrics/health.py +++ b/codeclone/metrics/health.py @@ -9,7 +9,12 @@ from dataclasses import dataclass from typing import Literal -from ..contracts import HEALTH_WEIGHTS +from ..contracts import ( + HEALTH_DEPENDENCY_CYCLE_PENALTY, + HEALTH_DEPENDENCY_DEPTH_LEVEL_PENALTY, + HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE, + HEALTH_WEIGHTS, +) from ..models import HealthScore @@ -104,8 +109,12 @@ def compute_health(inputs: HealthInputs) -> HealthScore: dead_code_score = _clamp_score(100 - inputs.dead_code_items * 8) dependency_score = _clamp_score( 100 - - inputs.dependency_cycles * 25 - - max(0, inputs.dependency_max_depth - 8) * 4 + - inputs.dependency_cycles * HEALTH_DEPENDENCY_CYCLE_PENALTY + - max( + 0, + inputs.dependency_max_depth - HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE, + ) + * HEALTH_DEPENDENCY_DEPTH_LEVEL_PENALTY ) coverage_score = _clamp_score( _safe_div(inputs.files_analyzed_or_cached * 100.0, max(1, inputs.files_found)) diff --git a/codeclone/report/html/sections/_dependencies.py b/codeclone/report/html/sections/_dependencies.py index e667358..bea4a41 100644 --- a/codeclone/report/html/sections/_dependencies.py +++ b/codeclone/report/html/sections/_dependencies.py @@ -12,6 +12,7 @@ from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING +from codeclone.contracts import HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE from codeclone.utils import coerce as _coerce from ..primitives.escape import _escape_html @@ -367,8 +368,14 @@ def render_dependencies_panel(ctx: ReportContext) -> str: _stat_card( "Max depth", dep_max_depth, - detail=_micro_badges(("target", "< 8")), - value_tone="warn" if dep_max_depth > 8 else "good", + detail=_micro_badges( + ("target", f"<= {HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE}") + ), + value_tone=( + "warn" + if dep_max_depth > HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE + else "good" + ), css_class="meta-item", glossary_tip_fn=glossary_tip, ), @@ -442,7 +449,7 @@ def render_dependencies_panel(ctx: ReportContext) -> str: answer = f"Cycles: {cycle_count}; max dependency depth: {dep_max_depth}." if cycle_count > 0: tone = "risk" - elif dep_max_depth > 8: + elif dep_max_depth > HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE: tone = "warn" else: tone = "ok" diff --git a/codeclone/report/html/widgets/glossary.py b/codeclone/report/html/widgets/glossary.py index 5b30e32..5f013ce 100644 --- a/codeclone/report/html/widgets/glossary.py +++ b/codeclone/report/html/widgets/glossary.py @@ -46,7 +46,9 @@ # Dependency stat cards "modules": "Total number of Python modules analyzed", "edges": "Total number of import relationships between modules", - "max depth": "Longest chain of transitive imports", + "max depth": ( + "Longest chain of transitive imports; health stays in the safe zone at <= 8" + ), "cycles": "Number of circular import dependencies detected", # Complexity stat cards "high-risk functions": ( diff --git a/docs/book/04-config-and-defaults.md b/docs/book/04-config-and-defaults.md index 8eec526..7f3988c 100644 --- a/docs/book/04-config-and-defaults.md +++ b/docs/book/04-config-and-defaults.md @@ -166,6 +166,13 @@ Current-run coverage join config: rules as CLI flags. - Coverage join remains current-run only and does not persist to baseline. +Dependency depth config note: + +- `dependency_max_depth` is an observed metric in reports/baselines, not a + CLI or `pyproject.toml` option. +- The current health safe zone for dependency depth is internal and fixed at + `<= 8`; there is no user-facing knob to tune it in `2.0.0b6`. + Metrics baseline path selection contract: - Relative `baseline` / `metrics_baseline` paths coming from defaults or diff --git a/docs/book/15-health-score.md b/docs/book/15-health-score.md index 70a4751..9c7541f 100644 --- a/docs/book/15-health-score.md +++ b/docs/book/15-health-score.md @@ -48,6 +48,11 @@ Important clarifications: - `coverage` here means analysis completeness, not test coverage. - Segment clones are visible in reports but do not currently affect Health Score. - Suppressed or non-actionable dead-code items do not penalize the score. +- Dependencies score currently uses: + `100 - cycles * 25 - max(0, max_depth - 8) * 4`. +- The dependency-depth safe zone is `<= 8`. +- That dependency-depth threshold is currently internal and not configurable + through CLI or `pyproject.toml`. ## Current non-scoring layers diff --git a/docs/book/15-metrics-and-quality-gates.md b/docs/book/15-metrics-and-quality-gates.md index e5fe4b9..53f8758 100644 --- a/docs/book/15-metrics-and-quality-gates.md +++ b/docs/book/15-metrics-and-quality-gates.md @@ -53,6 +53,9 @@ Refs: - `--skip-metrics` is incompatible with metrics gating/update flags. - If metrics are not explicitly requested and no metrics baseline exists, runtime may auto-enable clone-only mode. - In clone-only mode, dead-code and dependency analysis are skipped unless explicitly forced by gates. +- There is currently no user-facing gate or config knob for `dependency_max_depth`; + the metric is observed and contributes to Health Score through the internal + health model only. - `--coverage` is a current-run signal only; it does not update baseline state. - Invalid Cobertura XML becomes `coverage_join.status="invalid"` in normal runs and becomes a contract error only when hotspot gating requires a valid join. diff --git a/tests/test_defaults_contract.py b/tests/test_defaults_contract.py index 405c83d..a5c134a 100644 --- a/tests/test_defaults_contract.py +++ b/tests/test_defaults_contract.py @@ -26,9 +26,11 @@ DEFAULT_ROOT, DEFAULT_SEGMENT_MIN_LOC, DEFAULT_SEGMENT_MIN_STMT, + HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE, ) from codeclone.core._types import DEFAULT_RUNTIME_PROCESSES from codeclone.report.gates.evaluator import MetricGateConfig +from codeclone.report.html.sections import _dependencies as html_dependencies_mod from codeclone.surfaces.mcp import server as mcp_server from codeclone.surfaces.mcp.service import CodeCloneMCPService from codeclone.surfaces.mcp.session import MCPAnalysisRequest, MCPGateRequest @@ -117,3 +119,9 @@ def test_mcp_parser_and_builder_defaults_stay_in_sync() -> None: assert signature.parameters["stateless_http"].default == args.stateless_http assert signature.parameters["debug"].default == args.debug assert signature.parameters["log_level"].default == args.log_level + + +def test_dependency_depth_safe_zone_stays_shared_between_contract_and_html() -> None: + source = inspect.getsource(html_dependencies_mod.render_dependencies_panel) + assert "HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE" in source + assert HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE == 8 diff --git a/tests/test_html_report.py b/tests/test_html_report.py index 128b39d..e53fb56 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -1690,6 +1690,7 @@ def test_html_report_metrics_warn_branches_and_dependency_svg() -> None: assert "insight-warn" in html assert "dep-graph-svg" in html assert "Grade B" in html + assert "<= 8" in html assert "pkg.mod.func" in html assert "outside/project/pkg/mod.py" in html diff --git a/tests/test_metrics_modules.py b/tests/test_metrics_modules.py index ddc8608..60b1059 100644 --- a/tests/test_metrics_modules.py +++ b/tests/test_metrics_modules.py @@ -11,6 +11,7 @@ import pytest from codeclone.analysis.cfg_model import CFG +from codeclone.contracts import HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE from codeclone.metrics import complexity as complexity_mod from codeclone.metrics import coupling as coupling_mod from codeclone.metrics import health as health_mod @@ -646,8 +647,16 @@ def _health_inputs(*, dependency_max_depth: int) -> HealthInputs: dead_code_items=0, ) - safe = compute_health(_health_inputs(dependency_max_depth=8)) - warn = compute_health(_health_inputs(dependency_max_depth=9)) + safe = compute_health( + _health_inputs( + dependency_max_depth=HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE, + ) + ) + warn = compute_health( + _health_inputs( + dependency_max_depth=HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE + 1, + ) + ) assert safe.dimensions["dependencies"] == 100 assert warn.dimensions["dependencies"] == 96 diff --git a/tests/test_options_spec_coverage.py b/tests/test_options_spec_coverage.py index f6ae2a0..3b930da 100644 --- a/tests/test_options_spec_coverage.py +++ b/tests/test_options_spec_coverage.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from pathlib import Path import pytest @@ -108,3 +109,15 @@ def test_option_specs_have_pyproject_loading_coverage( loaded = load_pyproject_config(tmp_path) assert loaded[pyproject_key] == expected + + +def test_config_defaults_doc_covers_exact_pyproject_key_set() -> None: + text = Path("docs/book/04-config-and-defaults.md").read_text(encoding="utf-8") + documented = set(re.findall(r"^\| `([a-z0-9_]+)`\s+\|", text, re.MULTILINE)) + declared = { + option.pyproject_key + for option in PYPROJECT_OPTIONS + if option.pyproject_key is not None + } + + assert documented == declared From 013af81e39b073549ea877ce6c93b85a47b440e6 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Thu, 23 Apr 2026 21:28:23 +0500 Subject: [PATCH 18/32] feat(dependencies): switch to adaptive depth profiling and surface it in reports - replace the fixed max-depth penalty with an adaptive dependency depth model based on avg_depth, p95_depth, and max_depth - limit dependency scoring to the internal module graph and keep cycles as the hard dependency signal - surface the dependency profile in HTML, text/markdown, and CLI/CI summaries - bump report schema to 2.9 and align contracts, docs, snapshots, and tests - refresh b6 changelog/docs for the new dependency semantics --- AGENTS.md | 2 +- CHANGELOG.md | 49 +++++++--- codeclone/contracts/__init__.py | 8 +- codeclone/core/metrics_payload.py | 6 ++ codeclone/core/pipeline.py | 5 +- codeclone/metrics/dependencies.py | 89 +++++++++++++++++-- codeclone/metrics/health.py | 33 ++++++- codeclone/metrics/registry.py | 8 ++ codeclone/models.py | 2 + codeclone/report/document/metrics.py | 2 + .../report/html/sections/_dependencies.py | 39 +++++--- codeclone/report/html/widgets/glossary.py | 2 +- codeclone/report/renderers/markdown.py | 2 +- codeclone/report/renderers/text.py | 9 +- codeclone/surfaces/cli/summary.py | 24 +++++ codeclone/ui_messages/__init__.py | 27 +++++- docs/README.md | 2 +- docs/architecture.md | 2 +- docs/book/04-config-and-defaults.md | 5 +- docs/book/08-report.md | 11 ++- docs/book/09-cli.md | 1 + docs/book/13-testing-as-spec.md | 18 ++-- docs/book/14-compatibility-and-versioning.md | 2 +- docs/book/15-health-score.md | 13 +-- docs/book/15-metrics-and-quality-gates.md | 4 +- docs/book/appendix/b-schema-layouts.md | 8 +- .../golden_expected_cli_snapshot.json | 2 +- tests/test_cli_inprocess.py | 23 ++++- tests/test_cli_unit.py | 72 ++++++++++++++- tests/test_defaults_contract.py | 21 +++-- tests/test_html_report.py | 8 +- tests/test_html_report_helpers.py | 2 +- tests/test_metrics_modules.py | 51 +++++++++-- 33 files changed, 459 insertions(+), 93 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 983a4df..84051f7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -145,7 +145,7 @@ from another doc.** Current values (verified at write time): | `BASELINE_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `2.1` | | `BASELINE_FINGERPRINT_VERSION` | `codeclone/contracts/__init__.py` | `1` | | `CACHE_VERSION` | `codeclone/contracts/__init__.py` | `2.5` | -| `REPORT_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `2.8` | +| `REPORT_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `2.9` | | `METRICS_BASELINE_SCHEMA_VERSION` | `codeclone/contracts/__init__.py` | `1.2` | When updating any doc that mentions a version, re-read `codeclone/contracts/__init__.py` first. Do not derive diff --git a/CHANGELOG.md b/CHANGELOG.md index 5153cac..6f74670 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,22 +1,45 @@ # Changelog -## [2.0.0b6] - 2026-04-22 +## [2.0.0b6] -Stabilizes the post-refactor architecture: canonical package layout, thinner -entrypoints, cleaner dependency boundaries, refreshed tests, and aligned docs. +Stabilizes the post-refactor architecture, removes the remaining legacy shim +tails, and replaces the old fixed dependency-depth penalty with an adaptive +project-relative model. ### Architecture and contracts -- Move the runtime onto the new package layout: `main` + `surfaces/cli`, - `surfaces/mcp`, `core`, `analysis`, `baseline`, `cache`, `contracts`, - `report/document`, `report/renderers`, and `report/html`. -- Remove legacy root shims and stale compatibility modules in favor of direct - canonical imports. -- Keep clone baseline schema `2.1`, cache schema `2.5`, report schema `2.8`, - and metrics-baseline schema `1.2` unchanged while preserving determinism and - read-only MCP semantics. -- Prune stale deleted-file cache entries and tighten dependency chains that were - inflating post-refactor architectural depth. +- Move the runtime fully onto the canonical package layout: + `main` + `surfaces/cli`, `surfaces/mcp`, `core`, `analysis`, `baseline`, + `cache`, `contracts`, `report/document`, `report/renderers`, and + `report/html`. +- Remove remaining legacy root shims and stale compatibility modules in favor + of direct canonical imports. +- Keep clone baseline schema `2.1`, cache schema `2.5`, and metrics-baseline + schema `1.2` unchanged; bump report schema to `2.9` for additive dependency + depth profile fields. +- Preserve deterministic contracts and read-only MCP semantics across the new + layout. + +### Dependencies and health scoring + +- Replace the old fixed dependency-depth penalty (`max_depth > 8`) with an + adaptive internal-graph profile based on `avg_depth`, `p95_depth`, and + `max_depth`. +- Keep dependency cycles as the hard signal; treat acyclic depth as adaptive + pressure relative to the project’s own dependency profile. +- Limit dependency-depth scoring to the internal module graph instead of + external imports such as `typing` or `argparse`. +- Surface the dependency depth profile in the canonical report, HTML + Dependencies tab, and CLI/CI summaries. +- Remove stale deleted-file cache entries and trim post-refactor import tails + that were inflating dependency depth and clone pressure. + +### Tooling, docs, and UX + +- Refresh AGENTS, docs/book, and changelog content for the b6 package layout + and report schema `2.9`. +- Tighten preview client metadata and install guidance for VS Code, Claude + Desktop, and Codex. - Add a quiet one-time VS Code extension hint in interactive VS Code terminals, tracked per CodeClone version next to the resolved project cache path. diff --git a/codeclone/contracts/__init__.py b/codeclone/contracts/__init__.py index 469e065..353987c 100644 --- a/codeclone/contracts/__init__.py +++ b/codeclone/contracts/__init__.py @@ -13,7 +13,7 @@ BASELINE_FINGERPRINT_VERSION: Final = "1" CACHE_VERSION: Final = "2.5" -REPORT_SCHEMA_VERSION: Final = "2.8" +REPORT_SCHEMA_VERSION: Final = "2.9" METRICS_BASELINE_SCHEMA_VERSION: Final = "1.2" DEFAULT_COMPLEXITY_THRESHOLD: Final = 20 @@ -47,8 +47,9 @@ COUPLING_RISK_MEDIUM_MAX: Final = 10 COHESION_RISK_MEDIUM_MAX: Final = 3 HEALTH_DEPENDENCY_CYCLE_PENALTY: Final = 25 -HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE: Final = 8 HEALTH_DEPENDENCY_DEPTH_LEVEL_PENALTY: Final = 4 +HEALTH_DEPENDENCY_DEPTH_AVG_MULTIPLIER: Final = 2.0 +HEALTH_DEPENDENCY_DEPTH_P95_MARGIN: Final = 1 HEALTH_WEIGHTS: Final[dict[str, float]] = { "clones": 0.25, @@ -127,8 +128,9 @@ def cli_help_epilog() -> str: "DEFAULT_TEXT_REPORT_PATH", "DOCS_URL", "HEALTH_DEPENDENCY_CYCLE_PENALTY", + "HEALTH_DEPENDENCY_DEPTH_AVG_MULTIPLIER", "HEALTH_DEPENDENCY_DEPTH_LEVEL_PENALTY", - "HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE", + "HEALTH_DEPENDENCY_DEPTH_P95_MARGIN", "HEALTH_WEIGHTS", "ISSUES_URL", "METRICS_BASELINE_SCHEMA_VERSION", diff --git a/codeclone/core/metrics_payload.py b/codeclone/core/metrics_payload.py index d2386c0..4ba6091 100644 --- a/codeclone/core/metrics_payload.py +++ b/codeclone/core/metrics_payload.py @@ -19,6 +19,7 @@ ClassMetrics, CoverageJoinResult, DeadItem, + DepGraph, GroupItemLike, MetricsDiff, ModuleDep, @@ -105,6 +106,7 @@ def build_metrics_report_payload( *, scan_root: str = "", project_metrics: ProjectMetrics, + dep_graph: DepGraph | None = None, coverage_join: CoverageJoinResult | None = None, units: Sequence[GroupItemLike], class_metrics: Sequence[ClassMetrics], @@ -225,6 +227,10 @@ def _serialize_dead_item( "modules": project_metrics.dependency_modules, "edges": project_metrics.dependency_edges, "max_depth": project_metrics.dependency_max_depth, + "avg_depth": ( + round(dep_graph.avg_depth, 2) if dep_graph is not None else 0.0 + ), + "p95_depth": dep_graph.p95_depth if dep_graph is not None else 0, "cycles": [list(cycle) for cycle in project_metrics.dependency_cycles], "longest_chains": [ list(chain) for chain in project_metrics.dependency_longest_chains diff --git a/codeclone/core/pipeline.py b/codeclone/core/pipeline.py index 8e18e9c..3ad6169 100644 --- a/codeclone/core/pipeline.py +++ b/codeclone/core/pipeline.py @@ -114,6 +114,8 @@ def compute_project_metrics( edges=(), cycles=(), max_depth=0, + avg_depth=0.0, + p95_depth=0, longest_chains=(), ) dead_items: tuple[DeadItem, ...] = () @@ -265,7 +267,7 @@ def analyze( *cohort_structural_findings, ) if not boot.args.skip_metrics: - project_metrics, _, _ = compute_project_metrics( + project_metrics, dep_graph, _ = compute_project_metrics( units=processing.units, class_metrics=processing.class_metrics, module_deps=processing.module_deps, @@ -325,6 +327,7 @@ def analyze( metrics_payload = build_metrics_report_payload( scan_root=str(boot.root), project_metrics=project_metrics, + dep_graph=dep_graph, coverage_join=coverage_join, units=processing.units, class_metrics=processing.class_metrics, diff --git a/codeclone/metrics/dependencies.py b/codeclone/metrics/dependencies.py index 48ba032..573cc9e 100644 --- a/codeclone/metrics/dependencies.py +++ b/codeclone/metrics/dependencies.py @@ -6,6 +6,7 @@ from __future__ import annotations +from math import ceil from typing import TYPE_CHECKING from ..models import DepGraph, ModuleDep @@ -16,6 +17,37 @@ DepAdjacency = dict[str, set[str]] +def _internal_roots( + modules: Iterable[str], + deps: Sequence[ModuleDep], +) -> frozenset[str]: + roots: set[str] = set() + for module_name in modules: + if module_name: + roots.add(module_name.split(".", 1)[0]) + for dep in deps: + if dep.source: + roots.add(dep.source.split(".", 1)[0]) + return frozenset(sorted(roots)) + + +def _is_internal_target(target: str, *, internal_roots: frozenset[str]) -> bool: + if not target: + return False + return target.split(".", 1)[0] in internal_roots + + +def _unique_sorted_edges(deps: Sequence[ModuleDep]) -> tuple[ModuleDep, ...]: + return tuple( + sorted( + { + (dep.source, dep.target, dep.import_type, dep.line): dep for dep in deps + }.values(), + key=lambda dep: (dep.source, dep.target, dep.import_type, dep.line), + ) + ) + + def build_import_graph( *, modules: Iterable[str], @@ -123,6 +155,23 @@ def max_depth(graph: DepAdjacency) -> int: return best +def depth_profile(graph: DepAdjacency) -> tuple[float, int]: + if not graph: + return 0.0, 0 + + memo: dict[str, int] = {} + depths = sorted( + _longest_path_from(node, graph=graph, visiting=set(), memo=memo) + for node in sorted(graph) + ) + if not depths: + return 0.0, 0 + + avg_depth = sum(depths) / len(depths) + percentile_index = max(0, ceil(len(depths) * 0.95) - 1) + return avg_depth, int(depths[percentile_index]) + + def _longest_path_nodes_from( node: str, *, @@ -180,22 +229,44 @@ def longest_chains( def build_dep_graph(*, modules: Iterable[str], deps: Sequence[ModuleDep]) -> DepGraph: - graph = build_import_graph(modules=modules, deps=deps) - cycles = find_cycles(graph) - depth = max_depth(graph) - chains = longest_chains(graph) - unique_edges = tuple( + base_modules = frozenset( sorted( { - (dep.source, dep.target, dep.import_type, dep.line): dep for dep in deps - }.values(), - key=lambda dep: (dep.source, dep.target, dep.import_type, dep.line), + str(module_name).strip() + for module_name in modules + if str(module_name).strip() + } ) ) + internal_roots = _internal_roots(base_modules, deps) + internal_edges = _unique_sorted_edges( + tuple( + dep + for dep in deps + if dep.source + and _is_internal_target(dep.target, internal_roots=internal_roots) + ) + ) + graph_modules = frozenset( + sorted( + { + *base_modules, + *(dep.source for dep in internal_edges if dep.source), + *(dep.target for dep in internal_edges if dep.target), + } + ) + ) + graph = build_import_graph(modules=graph_modules, deps=internal_edges) + cycles = find_cycles(graph) + depth = max_depth(graph) + avg_depth, p95_depth = depth_profile(graph) + chains = longest_chains(graph) return DepGraph( modules=frozenset(graph.keys()), - edges=unique_edges, + edges=internal_edges, cycles=cycles, max_depth=depth, + avg_depth=avg_depth, + p95_depth=p95_depth, longest_chains=chains, ) diff --git a/codeclone/metrics/health.py b/codeclone/metrics/health.py index cf5536f..354bb5a 100644 --- a/codeclone/metrics/health.py +++ b/codeclone/metrics/health.py @@ -7,12 +7,14 @@ from __future__ import annotations from dataclasses import dataclass +from math import ceil from typing import Literal from ..contracts import ( HEALTH_DEPENDENCY_CYCLE_PENALTY, + HEALTH_DEPENDENCY_DEPTH_AVG_MULTIPLIER, HEALTH_DEPENDENCY_DEPTH_LEVEL_PENALTY, - HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE, + HEALTH_DEPENDENCY_DEPTH_P95_MARGIN, HEALTH_WEIGHTS, ) from ..models import HealthScore @@ -34,6 +36,8 @@ class HealthInputs: low_cohesion_classes: int dependency_cycles: int dependency_max_depth: int + dependency_avg_depth: float + dependency_p95_depth: int dead_code_items: int @@ -59,6 +63,26 @@ def _safe_div(numerator: float, denominator: float) -> float: return numerator / denominator +def _dependency_expected_tail(*, avg_depth: float, p95_depth: int) -> int: + avg_based = ceil(max(0.0, avg_depth) * HEALTH_DEPENDENCY_DEPTH_AVG_MULTIPLIER) + p95_based = max(0, p95_depth) + HEALTH_DEPENDENCY_DEPTH_P95_MARGIN + return max(avg_based, p95_based) + + +def _dependency_tail_pressure( + *, + max_depth: int, + avg_depth: float, + p95_depth: int, +) -> int: + if max_depth <= 0: + return 0 + return max( + 0, + max_depth - _dependency_expected_tail(avg_depth=avg_depth, p95_depth=p95_depth), + ) + + # Piecewise clone-density curve: mild penalty for low density, # steep in the structural-debt zone, brutal when it's systemic. _CLONE_BREAKPOINTS: tuple[tuple[float, float], ...] = ( @@ -110,9 +134,10 @@ def compute_health(inputs: HealthInputs) -> HealthScore: dependency_score = _clamp_score( 100 - inputs.dependency_cycles * HEALTH_DEPENDENCY_CYCLE_PENALTY - - max( - 0, - inputs.dependency_max_depth - HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE, + - _dependency_tail_pressure( + max_depth=inputs.dependency_max_depth, + avg_depth=inputs.dependency_avg_depth, + p95_depth=inputs.dependency_p95_depth, ) * HEALTH_DEPENDENCY_DEPTH_LEVEL_PENALTY ) diff --git a/codeclone/metrics/registry.py b/codeclone/metrics/registry.py index 93d6391..0b51a31 100644 --- a/codeclone/metrics/registry.py +++ b/codeclone/metrics/registry.py @@ -66,6 +66,8 @@ def _empty_dep_graph() -> DepGraph: edges=(), cycles=(), max_depth=0, + avg_depth=0.0, + p95_depth=0, longest_chains=(), ) @@ -86,6 +88,8 @@ def _empty_dep_graph() -> DepGraph: low_cohesion_classes=0, dependency_cycles=0, dependency_max_depth=0, + dependency_avg_depth=0.0, + dependency_p95_depth=0, dead_code_items=0, ) ) @@ -439,6 +443,8 @@ def _build_dependencies_result(context: MetricProjectContext) -> MetricResult: "dependency_edge_list": dep_graph.edges, "dependency_cycles": dep_graph.cycles, "dependency_max_depth": dep_graph.max_depth, + "dependency_avg_depth": dep_graph.avg_depth, + "dependency_p95_depth": dep_graph.p95_depth, "dependency_longest_chains": dep_graph.longest_chains, "dep_graph": dep_graph, } @@ -532,6 +538,8 @@ def _build_health_result(context: MetricProjectContext) -> MetricResult: _result_nested_tuple_str(dependencies, "dependency_cycles") ), dependency_max_depth=_result_int(dependencies, "dependency_max_depth"), + dependency_avg_depth=_result_float(dependencies, "dependency_avg_depth"), + dependency_p95_depth=_result_int(dependencies, "dependency_p95_depth"), dead_code_items=len(_result_dead_items(dead_code, "dead_code")), ) ) diff --git a/codeclone/models.py b/codeclone/models.py index 4814fc1..5549d7e 100644 --- a/codeclone/models.py +++ b/codeclone/models.py @@ -93,6 +93,8 @@ class DepGraph: edges: tuple[ModuleDep, ...] cycles: tuple[tuple[str, ...], ...] max_depth: int + avg_depth: float + p95_depth: int longest_chains: tuple[tuple[str, ...], ...] diff --git a/codeclone/report/document/metrics.py b/codeclone/report/document/metrics.py index 577ee01..08b62a4 100644 --- a/codeclone/report/document/metrics.py +++ b/codeclone/report/document/metrics.py @@ -487,6 +487,8 @@ def _normalize_suppressed_by( "edges": _as_int(dependencies.get("edges")), "cycles": len(dependency_cycles), "max_depth": _as_int(dependencies.get("max_depth")), + "avg_depth": round(_as_float(dependencies.get("avg_depth")), 2), + "p95_depth": _as_int(dependencies.get("p95_depth")), }, "items": dependency_edges, "cycles": dependency_cycles, diff --git a/codeclone/report/html/sections/_dependencies.py b/codeclone/report/html/sections/_dependencies.py index bea4a41..078c223 100644 --- a/codeclone/report/html/sections/_dependencies.py +++ b/codeclone/report/html/sections/_dependencies.py @@ -12,7 +12,6 @@ from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING -from codeclone.contracts import HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE from codeclone.utils import coerce as _coerce from ..primitives.escape import _escape_html @@ -31,6 +30,7 @@ from .._context import ReportContext _as_int = _coerce.as_int +_as_float = _coerce.as_float _as_mapping = _coerce.as_mapping _as_sequence = _coerce.as_sequence @@ -344,11 +344,25 @@ def render_dependencies_panel(ctx: ReportContext) -> str: dep_module_count = _as_int(ctx.dependencies_map.get("modules")) dep_edge_count = _as_int(ctx.dependencies_map.get("edges")) dep_max_depth = _as_int(ctx.dependencies_map.get("max_depth")) + dep_avg_depth = _as_float(ctx.dependencies_map.get("avg_depth")) + dep_p95_depth = _as_int(ctx.dependencies_map.get("p95_depth")) cycle_count = len(dep_cycles) + dependency_health = _as_int( + _as_mapping(ctx.health_map.get("dimensions")).get("dependencies"), + ) dep_avg = ( f"{dep_edge_count / dep_module_count:.1f}" if dep_module_count > 0 else "n/a" ) + dep_avg_depth_label = f"{dep_avg_depth:.1f}" if dep_module_count > 0 else "n/a" + + dependency_tone: Tone + if cycle_count > 0: + dependency_tone = "risk" + elif dependency_health < 100: + dependency_tone = "warn" + else: + dependency_tone = "ok" cards = [ _stat_card( @@ -369,13 +383,12 @@ def render_dependencies_panel(ctx: ReportContext) -> str: "Max depth", dep_max_depth, detail=_micro_badges( - ("target", f"<= {HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE}") - ), - value_tone=( - "warn" - if dep_max_depth > HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE - else "good" + ("avg", dep_avg_depth_label), + ("p95", dep_p95_depth), ), + value_tone="bad" + if cycle_count > 0 + else ("warn" if dependency_health < 100 else "good"), css_class="meta-item", glossary_tip_fn=glossary_tip, ), @@ -446,13 +459,11 @@ def render_dependencies_panel(ctx: ReportContext) -> str: if not ctx.metrics_available: answer, tone = "Metrics are skipped for this run.", "info" else: - answer = f"Cycles: {cycle_count}; max dependency depth: {dep_max_depth}." - if cycle_count > 0: - tone = "risk" - elif dep_max_depth > HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE: - tone = "warn" - else: - tone = "ok" + answer = ( + f"Cycles: {cycle_count}; avg depth: {dep_avg_depth_label}; " + f"p95 depth: {dep_p95_depth}; max dependency depth: {dep_max_depth}." + ) + tone = dependency_tone return ( insight_block( diff --git a/codeclone/report/html/widgets/glossary.py b/codeclone/report/html/widgets/glossary.py index 5f013ce..19ab172 100644 --- a/codeclone/report/html/widgets/glossary.py +++ b/codeclone/report/html/widgets/glossary.py @@ -47,7 +47,7 @@ "modules": "Total number of Python modules analyzed", "edges": "Total number of import relationships between modules", "max depth": ( - "Longest chain of transitive imports; health stays in the safe zone at <= 8" + "Longest internal transitive import chain; compare with avg and p95 depth" ), "cycles": "Number of circular import dependencies detected", # Complexity stat cards diff --git a/codeclone/report/renderers/markdown.py b/codeclone/report/renderers/markdown.py index 18a79f1..9de38ef 100644 --- a/codeclone/report/renderers/markdown.py +++ b/codeclone/report/renderers/markdown.py @@ -546,7 +546,7 @@ def render_markdown_report_document(payload: Mapping[str, object]) -> str: ( "dependencies", "Dependencies", - ("modules", "edges", "cycles", "max_depth"), + ("modules", "edges", "cycles", "avg_depth", "p95_depth", "max_depth"), ("source", "target", "import_type", "line"), ), ( diff --git a/codeclone/report/renderers/text.py b/codeclone/report/renderers/text.py index 4074652..231de1d 100644 --- a/codeclone/report/renderers/text.py +++ b/codeclone/report/renderers/text.py @@ -672,7 +672,14 @@ def render_text_report_document(payload: Mapping[str, object]) -> str: "hotspot_threshold_percent", ) case "dependencies": - keys = ("modules", "edges", "cycles", "max_depth") + keys = ( + "modules", + "edges", + "cycles", + "avg_depth", + "p95_depth", + "max_depth", + ) case "overloaded_modules": keys = ( "total", diff --git a/codeclone/surfaces/cli/summary.py b/codeclone/surfaces/cli/summary.py index f0c14aa..d71512e 100644 --- a/codeclone/surfaces/cli/summary.py +++ b/codeclone/surfaces/cli/summary.py @@ -31,6 +31,9 @@ class MetricsSnapshot: dead_code_count: int health_total: int health_grade: str + dependency_avg_depth: float = 0.0 + dependency_p95_depth: int = 0 + dependency_max_depth: int = 0 suppressed_dead_code_count: int = 0 overloaded_modules_candidates: int = 0 overloaded_modules_total: int = 0 @@ -114,6 +117,13 @@ def build_metrics_snapshot( cohesion_avg=project_metrics.cohesion_avg, cohesion_max=project_metrics.cohesion_max, cycles_count=len(project_metrics.dependency_cycles), + dependency_avg_depth=_coerce.as_float( + _as_mapping(metrics_payload_map.get("dependencies")).get("avg_depth") + ), + dependency_p95_depth=_as_int( + _as_mapping(metrics_payload_map.get("dependencies")).get("p95_depth") + ), + dependency_max_depth=project_metrics.dependency_max_depth, dead_code_count=len(project_metrics.dead_code), health_total=project_metrics.health.total, health_grade=project_metrics.health.grade, @@ -276,6 +286,13 @@ def _print_metrics( overloaded_modules=metrics.overloaded_modules_candidates, ) ) + console.print( + ui.fmt_summary_compact_dependencies( + avg_depth=metrics.dependency_avg_depth, + p95_depth=metrics.dependency_p95_depth, + max_depth=metrics.dependency_max_depth, + ) + ) if ( metrics.adoption_param_permille is not None and metrics.adoption_return_permille is not None @@ -329,6 +346,13 @@ def _print_metrics( ui.fmt_metrics_cohesion(metrics.cohesion_avg, metrics.cohesion_max) ) console.print(ui.fmt_metrics_cycles(metrics.cycles_count)) + console.print( + ui.fmt_metrics_dependencies( + avg_depth=metrics.dependency_avg_depth, + p95_depth=metrics.dependency_p95_depth, + max_depth=metrics.dependency_max_depth, + ) + ) console.print( ui.fmt_metrics_dead_code( metrics.dead_code_count, diff --git a/codeclone/ui_messages/__init__.py b/codeclone/ui_messages/__init__.py index 388b324..5069287 100644 --- a/codeclone/ui_messages/__init__.py +++ b/codeclone/ui_messages/__init__.py @@ -241,6 +241,9 @@ " lcom4={lcom_avg}/{lcom_max} cycles={cycles} dead_code={dead}" " health={health}({grade}) overloaded_modules={overloaded_modules}" ) +SUMMARY_COMPACT_DEPENDENCIES = ( + "Dependencies avg={avg_depth} p95={p95_depth} max={max_depth}" +) SUMMARY_COMPACT_CHANGED_SCOPE = ( "Changed paths={paths} findings={findings} new={new} known={known}" ) @@ -494,6 +497,19 @@ def fmt_summary_compact_metrics( ) +def fmt_summary_compact_dependencies( + *, + avg_depth: float, + p95_depth: int, + max_depth: int, +) -> str: + return SUMMARY_COMPACT_DEPENDENCIES.format( + avg_depth=f"{avg_depth:.1f}", + p95_depth=p95_depth, + max_depth=max_depth, + ) + + def fmt_summary_compact_adoption( *, param_permille: int, @@ -559,7 +575,7 @@ def fmt_summary_compact_coverage_join( HEALTH_GRADE_F: "bold red", } -_L = 12 # label column width (after 2-space indent) +_L = 13 # label column width (after 2-space indent) def _v(n: int, style: str = "") -> str: @@ -665,6 +681,15 @@ def fmt_metrics_cycles(count: int) -> str: return f" {'Cycles':<{_L}}[bold red]{count} detected[/bold red]" +def fmt_metrics_dependencies( + *, avg_depth: float, p95_depth: int, max_depth: int +) -> str: + return ( + f" {'Dependencies':<{_L}}" + f"avg {avg_depth:.1f} · p95 {p95_depth} · max {max_depth}" + ) + + def fmt_metrics_dead_code(count: int, *, suppressed: int = 0) -> str: suppressed_suffix = ( f" [dim]({suppressed} suppressed)[/dim]" if suppressed > 0 else "" diff --git a/docs/README.md b/docs/README.md index 2d04be9..2b19742 100644 --- a/docs/README.md +++ b/docs/README.md @@ -39,7 +39,7 @@ repository build: - [Core pipeline and invariants](book/05-core-pipeline.md) - [Baseline contract (schema v2.1)](book/06-baseline.md) - [Cache contract (schema v2.5)](book/07-cache.md) -- [Report contract (schema v2.8)](book/08-report.md) +- [Report contract (schema v2.9)](book/08-report.md) ## Interfaces diff --git a/docs/architecture.md b/docs/architecture.md index d6b30f8..b246934 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -144,7 +144,7 @@ gating decisions. Detected findings can be rendered as: - interactive HTML (`--html`), -- canonical JSON (`--json`, schema `2.8`), +- canonical JSON (`--json`, schema `2.9`), - deterministic text projection (`--text`), - deterministic Markdown projection (`--md`), - deterministic SARIF projection (`--sarif`). diff --git a/docs/book/04-config-and-defaults.md b/docs/book/04-config-and-defaults.md index 7f3988c..c8d02a7 100644 --- a/docs/book/04-config-and-defaults.md +++ b/docs/book/04-config-and-defaults.md @@ -170,8 +170,9 @@ Dependency depth config note: - `dependency_max_depth` is an observed metric in reports/baselines, not a CLI or `pyproject.toml` option. -- The current health safe zone for dependency depth is internal and fixed at - `<= 8`; there is no user-facing knob to tune it in `2.0.0b6`. +- Dependency depth now uses an internal adaptive profile based on + `avg_depth`, `p95_depth`, and `max_depth` for the internal module graph. +- There is no user-facing knob to tune that model in `2.0.0b6`. Metrics baseline path selection contract: diff --git a/docs/book/08-report.md b/docs/book/08-report.md index d140747..f73bc88 100644 --- a/docs/book/08-report.md +++ b/docs/book/08-report.md @@ -2,7 +2,7 @@ ## Purpose -Define the canonical report contract in `2.0.0b6`: report schema `2.8` plus +Define the canonical report contract in `2.0.0b6`: report schema `2.9` plus deterministic text/Markdown/SARIF/HTML projections. ## Public surface @@ -48,6 +48,15 @@ Current canonical report-only metric families include: - `coverage_join` - `overloaded_modules` +Dependency depth facts in the canonical report now include: + +- `avg_depth` +- `p95_depth` +- `max_depth` + +These describe the internal module dependency graph. They are report facts, not +user-facing config knobs. + Current finding families include: - `findings.groups.clones.{functions,blocks,segments}` diff --git a/docs/book/09-cli.md b/docs/book/09-cli.md index 6cbe37a..9b7db13 100644 --- a/docs/book/09-cli.md +++ b/docs/book/09-cli.md @@ -30,6 +30,7 @@ Summary metrics include: - function/block/segment clone groups - suppressed clone groups from `golden_fixture_paths` - dead-code active/suppressed status +- dependency depth profile (`avg_depth`, `p95_depth`, `max_depth`) when metrics are computed - adoption/API/coverage-join facts when computed - new vs baseline diff --git a/docs/book/13-testing-as-spec.md b/docs/book/13-testing-as-spec.md index c2b03b2..73f62c4 100644 --- a/docs/book/13-testing-as-spec.md +++ b/docs/book/13-testing-as-spec.md @@ -33,16 +33,16 @@ Test classes by role: The following matrix is treated as executable contract: -| Contract | Tests | -|--------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------| -| Baseline schema/integrity/compat gates | `tests/test_baseline.py` | +| Contract | Tests | +|----------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Baseline schema/integrity/compat gates | `tests/test_baseline.py` | | Cache v2.5 fail-open + status mapping + API-surface-aware reuse + API signature order preservation | `tests/test_cache.py`, `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag`, `tests/test_cli_inprocess.py::test_cli_public_api_breaking_count_stable_across_warm_cache`, `tests/test_cli_inprocess.py::test_cli_api_surface_ignores_non_api_warm_cache` | -| Exit code categories and markers | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py` | -| Report schema v2.8 canonical/derived/integrity + JSON/TXT/MD/SARIF projections | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py` | -| HTML render-only explainability + escaping | `tests/test_html_report.py` | -| Current-run Cobertura coverage join parsing, gating, and projections | `tests/test_coverage_join.py`, `tests/test_pipeline_metrics.py`, `tests/test_cli_unit.py`, `tests/test_mcp_service.py`, `tests/test_html_report.py` | -| Golden fixture clone exclusion policy | `tests/test_golden_fixtures.py`, `tests/test_cli_inprocess.py::test_cli_pyproject_golden_fixture_paths_exclude_fixture_clone_groups`, `tests/test_report.py::test_report_json_clone_groups_can_include_suppressed_golden_fixture_bucket` | -| Scanner traversal safety | `tests/test_scanner_extra.py`, `tests/test_security.py` | +| Exit code categories and markers | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py` | +| Report schema v2.9 canonical/derived/integrity + JSON/TXT/MD/SARIF projections | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py` | +| HTML render-only explainability + escaping | `tests/test_html_report.py` | +| Current-run Cobertura coverage join parsing, gating, and projections | `tests/test_coverage_join.py`, `tests/test_pipeline_metrics.py`, `tests/test_cli_unit.py`, `tests/test_mcp_service.py`, `tests/test_html_report.py` | +| Golden fixture clone exclusion policy | `tests/test_golden_fixtures.py`, `tests/test_cli_inprocess.py::test_cli_pyproject_golden_fixture_paths_exclude_fixture_clone_groups`, `tests/test_report.py::test_report_json_clone_groups_can_include_suppressed_golden_fixture_bucket` | +| Scanner traversal safety | `tests/test_scanner_extra.py`, `tests/test_security.py` | ## Invariants (MUST) diff --git a/docs/book/14-compatibility-and-versioning.md b/docs/book/14-compatibility-and-versioning.md index ac9186f..130e126 100644 --- a/docs/book/14-compatibility-and-versioning.md +++ b/docs/book/14-compatibility-and-versioning.md @@ -25,7 +25,7 @@ Current contract versions: - `BASELINE_SCHEMA_VERSION = "2.1"` - `BASELINE_FINGERPRINT_VERSION = "1"` - `CACHE_VERSION = "2.5"` -- `REPORT_SCHEMA_VERSION = "2.8"` +- `REPORT_SCHEMA_VERSION = "2.9"` - `METRICS_BASELINE_SCHEMA_VERSION = "1.2"` Refs: diff --git a/docs/book/15-health-score.md b/docs/book/15-health-score.md index 9c7541f..1a6b330 100644 --- a/docs/book/15-health-score.md +++ b/docs/book/15-health-score.md @@ -48,11 +48,14 @@ Important clarifications: - `coverage` here means analysis completeness, not test coverage. - Segment clones are visible in reports but do not currently affect Health Score. - Suppressed or non-actionable dead-code items do not penalize the score. -- Dependencies score currently uses: - `100 - cycles * 25 - max(0, max_depth - 8) * 4`. -- The dependency-depth safe zone is `<= 8`. -- That dependency-depth threshold is currently internal and not configurable - through CLI or `pyproject.toml`. +- Dependencies score uses the internal module dependency graph only. +- Cycles still penalize the dependencies dimension directly. +- Acyclic depth pressure is adaptive: + `expected_tail = max(ceil(avg_depth * 2.0), p95_depth + 1)`, then + `tail_pressure = max(0, max_depth - expected_tail)`. +- The dependencies dimension score is: + `100 - cycles * 25 - tail_pressure * 4`. +- This model is internal and not configurable through CLI or `pyproject.toml`. ## Current non-scoring layers diff --git a/docs/book/15-metrics-and-quality-gates.md b/docs/book/15-metrics-and-quality-gates.md index 53f8758..48d230f 100644 --- a/docs/book/15-metrics-and-quality-gates.md +++ b/docs/book/15-metrics-and-quality-gates.md @@ -54,8 +54,8 @@ Refs: - If metrics are not explicitly requested and no metrics baseline exists, runtime may auto-enable clone-only mode. - In clone-only mode, dead-code and dependency analysis are skipped unless explicitly forced by gates. - There is currently no user-facing gate or config knob for `dependency_max_depth`; - the metric is observed and contributes to Health Score through the internal - health model only. + dependency depth contributes to Health Score through the internal adaptive + model over `avg_depth`, `p95_depth`, and `max_depth` only. - `--coverage` is a current-run signal only; it does not update baseline state. - Invalid Cobertura XML becomes `coverage_join.status="invalid"` in normal runs and becomes a contract error only when hotspot gating requires a valid join. diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md index 1890f4e..1333cc7 100644 --- a/docs/book/appendix/b-schema-layouts.md +++ b/docs/book/appendix/b-schema-layouts.md @@ -146,11 +146,11 @@ Notes: - `u` row decoder accepts both legacy 11-column rows and canonical 17-column rows (legacy rows map new structural fields to neutral defaults). -## Report schema (`2.8`) +## Report schema (`2.9`) ```json { - "report_schema_version": "2.8", + "report_schema_version": "2.9", "meta": { "codeclone_version": "2.0.0b5", "project_name": "codeclone", @@ -439,7 +439,7 @@ Notes: ```text # CodeClone Report - Markdown schema: 1.0 -- Source report schema: 2.8 +- Source report schema: 2.9 ... ## Overview ## Inventory @@ -525,7 +525,7 @@ Notes: ], "properties": { "profileVersion": "1.0", - "reportSchemaVersion": "2.8" + "reportSchemaVersion": "2.9" }, "results": [ { diff --git a/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json b/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json index ab5236f..fe58425 100644 --- a/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json +++ b/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json @@ -2,7 +2,7 @@ "meta": { "python_tag": "cp313" }, - "report_schema_version": "2.8", + "report_schema_version": "2.9", "project_name": "pyproject_defaults", "scan_root": ".", "baseline_status": "missing", diff --git a/tests/test_cli_inprocess.py b/tests/test_cli_inprocess.py index 952379c..717e108 100644 --- a/tests/test_cli_inprocess.py +++ b/tests/test_cli_inprocess.py @@ -3474,7 +3474,16 @@ def test_cli_summary_with_metrics_baseline_shows_metrics_section( ], ) out = capsys.readouterr().out - assert_contains_all(out, "Metrics", "Adoption", "Overloaded") + assert_contains_all( + out, + "Metrics", + "Dependencies", + "avg", + "p95", + "max", + "Adoption", + "Overloaded", + ) def test_cli_summary_with_api_surface_shows_public_api_line( @@ -3530,7 +3539,17 @@ def test_cli_ci_summary_includes_adoption_and_public_api_lines( ], ) out = capsys.readouterr().out - assert_contains_all(out, "Adoption", "Public API", "symbols=", "docstrings=") + assert_contains_all( + out, + "Dependencies", + "avg=", + "p95=", + "max=", + "Adoption", + "Public API", + "symbols=", + "docstrings=", + ) def test_cli_pyproject_golden_fixture_paths_exclude_fixture_clone_groups( diff --git a/tests/test_cli_unit.py b/tests/test_cli_unit.py index 6a9abf2..657c1ba 100644 --- a/tests/test_cli_unit.py +++ b/tests/test_cli_unit.py @@ -14,7 +14,7 @@ from io import StringIO from pathlib import Path from types import SimpleNamespace -from typing import Any, cast +from typing import Any, TextIO, cast import pytest @@ -195,6 +195,37 @@ def test_cli_tips_detect_vscode_environment_signals() -> None: assert cli_tips._is_vscode_environment({"TERM_PROGRAM": "xterm-256color"}) is False +def test_cli_stream_is_tty_handles_oserror() -> None: + class _BrokenTTY: + def isatty(self) -> bool: + raise OSError("tty unavailable") + + assert cli_tips._stream_is_tty(cast("TextIO", _BrokenTTY())) is False + + +def test_cli_load_tips_state_rejects_invalid_tip_shapes( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setattr(cli_tips, "read_json_object", lambda _path: {"tips": []}) + assert cli_tips._load_tips_state(tmp_path / "tips.json") == { + "schema_version": 1, + "tips": {}, + } + + +def test_cli_tip_last_shown_version_rejects_invalid_shapes() -> None: + assert ( + cli_tips._tip_last_shown_version({"tips": []}, tip_key="vscode_extension") == "" + ) + assert ( + cli_tips._tip_last_shown_version( + {"tips": {"vscode_extension": {"last_shown_version": 7}}}, + tip_key="vscode_extension", + ) + == "" + ) + + def test_cli_vscode_extension_tip_uses_versioned_cache( tmp_path: Path, ) -> None: @@ -243,6 +274,31 @@ def test_cli_vscode_extension_tip_uses_versioned_cache( assert len(printer.lines) == 2 +def test_cli_vscode_extension_tip_tolerates_state_write_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + printer = _RecordingPrinter() + args = SimpleNamespace(quiet=False, ci=False) + + def _fail_remember(**_kwargs: object) -> None: + raise OSError("read-only cache") + + monkeypatch.setattr(cli_tips, "_remember_tip_version", _fail_remember) + + shown = cli_tips.maybe_print_vscode_extension_tip( + args=args, + console=printer, + codeclone_version=__version__, + cache_path=tmp_path / ".cache" / "codeclone" / "cache.json", + environ={"TERM_PROGRAM": "vscode"}, + stream=_TTYStream(is_tty=True), + ) + + assert shown is True + assert len(printer.lines) == 1 + + @pytest.mark.parametrize( ("args", "env", "isatty"), [ @@ -1152,6 +1208,14 @@ def test_compact_summary_labels_use_machine_scannable_keys() -> None: == "Metrics cc=2.8/21 cbo=0.6/8 lcom4=1.2/4" " cycles=0 dead_code=1 health=85(B) overloaded_modules=3" ) + assert ( + ui.fmt_summary_compact_dependencies( + avg_depth=4.0, + p95_depth=13, + max_depth=16, + ) + == "Dependencies avg=4.0 p95=13 max=16" + ) assert ( ui.fmt_summary_compact_adoption( param_permille=750, @@ -1221,6 +1285,12 @@ def test_ui_summary_formatters_cover_optional_branches() -> None: assert "[yellow]2[/yellow] fixtures" in clones assert "5 detected" in ui.fmt_metrics_cycles(5) + dependencies = ui.fmt_metrics_dependencies( + avg_depth=4.0, + p95_depth=13, + max_depth=16, + ) + assert_contains_all(dependencies, "avg 4.0", "p95 13", "max 16") dead_with_suppressed = ui.fmt_metrics_dead_code(447, suppressed=9) assert "447 found" in dead_with_suppressed assert "(9 suppressed)" in dead_with_suppressed diff --git a/tests/test_defaults_contract.py b/tests/test_defaults_contract.py index a5c134a..0515e04 100644 --- a/tests/test_defaults_contract.py +++ b/tests/test_defaults_contract.py @@ -26,9 +26,11 @@ DEFAULT_ROOT, DEFAULT_SEGMENT_MIN_LOC, DEFAULT_SEGMENT_MIN_STMT, - HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE, + HEALTH_DEPENDENCY_DEPTH_AVG_MULTIPLIER, + HEALTH_DEPENDENCY_DEPTH_P95_MARGIN, ) from codeclone.core._types import DEFAULT_RUNTIME_PROCESSES +from codeclone.metrics import health as health_mod from codeclone.report.gates.evaluator import MetricGateConfig from codeclone.report.html.sections import _dependencies as html_dependencies_mod from codeclone.surfaces.mcp import server as mcp_server @@ -121,7 +123,16 @@ def test_mcp_parser_and_builder_defaults_stay_in_sync() -> None: assert signature.parameters["log_level"].default == args.log_level -def test_dependency_depth_safe_zone_stays_shared_between_contract_and_html() -> None: - source = inspect.getsource(html_dependencies_mod.render_dependencies_panel) - assert "HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE" in source - assert HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE == 8 +def test_dependency_depth_profile_contract_stays_shared_between_health_and_html() -> ( + None +): + health_source = inspect.getsource(health_mod._dependency_expected_tail) + html_source = inspect.getsource(html_dependencies_mod.render_dependencies_panel) + + assert "HEALTH_DEPENDENCY_DEPTH_AVG_MULTIPLIER" in health_source + assert "HEALTH_DEPENDENCY_DEPTH_P95_MARGIN" in health_source + assert HEALTH_DEPENDENCY_DEPTH_AVG_MULTIPLIER == 2.0 + assert HEALTH_DEPENDENCY_DEPTH_P95_MARGIN == 1 + assert "avg depth" in html_source + assert "p95 depth" in html_source + assert "HEALTH_DEPENDENCY_MAX_DEPTH_SAFE_ZONE" not in html_source diff --git a/tests/test_html_report.py b/tests/test_html_report.py index e53fb56..b188017 100644 --- a/tests/test_html_report.py +++ b/tests/test_html_report.py @@ -1508,6 +1508,8 @@ def _metrics_payload( dep_max_depth: object, dead_total: object, dead_critical: object, + dep_avg_depth: object = 2.5, + dep_p95_depth: object = 3, dead_suppressed: object = 0, ) -> dict[str, object]: suppressed_items: list[dict[str, object]] = [] @@ -1594,6 +1596,8 @@ def _metrics_payload( "modules": 4, "edges": 4, "max_depth": dep_max_depth, + "avg_depth": dep_avg_depth, + "p95_depth": dep_p95_depth, "cycles": dep_cycles, "longest_chains": [["pkg.a", "pkg.b", "pkg.c"]], "edge_list": [ @@ -1690,7 +1694,7 @@ def test_html_report_metrics_warn_branches_and_dependency_svg() -> None: assert "insight-warn" in html assert "dep-graph-svg" in html assert "Grade B" in html - assert "<= 8" in html + assert "Cycles: 0; avg depth: 2.5; p95 depth: 3; max dependency depth: 9." in html assert "pkg.mod.func" in html assert "outside/project/pkg/mod.py" in html @@ -1718,7 +1722,7 @@ def test_html_report_metrics_risk_branches() -> None: html, "insight-risk", 'stroke="var(--error)"', - "Cycles: 1; max dependency depth: 4.", + "Cycles: 1; avg depth: 2.5; p95 depth: 3; max dependency depth: 4.", "5 candidates total; 2 high-confidence items; 0 suppressed.", '