From d52a3a4c2b83b4d27a9e1c483960221fe5aa147e Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 21 Mar 2026 05:07:12 -0600 Subject: [PATCH 01/33] =?UTF-8?q?feat:=20add=20maintenance=20skills=20?= =?UTF-8?q?=E2=80=94=20deps-audit,=20bench-check,=20test-health,=20houseke?= =?UTF-8?q?ep?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four recurring maintenance routines as Claude Code skills: - /deps-audit: vulnerability scanning, staleness, unused deps, license checks - /bench-check: benchmark regression detection against saved baselines - /test-health: flaky test detection, dead tests, coverage gap analysis - /housekeep: clean worktrees, dirt files, sync main, prune branches --- .claude/skills/bench-check/SKILL.md | 223 +++++++++++++++++++++++ .claude/skills/deps-audit/SKILL.md | 164 +++++++++++++++++ .claude/skills/housekeep/SKILL.md | 266 ++++++++++++++++++++++++++++ .claude/skills/test-health/SKILL.md | 248 ++++++++++++++++++++++++++ 4 files changed, 901 insertions(+) create mode 100644 .claude/skills/bench-check/SKILL.md create mode 100644 .claude/skills/deps-audit/SKILL.md create mode 100644 .claude/skills/housekeep/SKILL.md create mode 100644 .claude/skills/test-health/SKILL.md diff --git a/.claude/skills/bench-check/SKILL.md b/.claude/skills/bench-check/SKILL.md new file mode 100644 index 00000000..2b48ff3a --- /dev/null +++ b/.claude/skills/bench-check/SKILL.md @@ -0,0 +1,223 @@ +--- +name: bench-check +description: Run benchmarks against a saved baseline, detect performance regressions, and update the baseline — guards against silent slowdowns +argument-hint: "[--save-baseline | --compare-only | --threshold 15] (default: compare + save)" +allowed-tools: Bash, Read, Write, Edit, Glob, Grep, Agent +--- + +# /bench-check — Performance Regression Check + +Run the project's benchmark suite, compare results against a saved baseline, flag regressions beyond a threshold, and optionally update the baseline. Prevents silent performance degradation between releases. + +## Arguments + +- `$ARGUMENTS` may contain: + - `--save-baseline` — run benchmarks and save as the new baseline (no comparison) + - `--compare-only` — compare against baseline without updating it + - `--threshold N` — regression threshold percentage (default: 15%) + - No arguments — compare against baseline, then update it if no regressions + +## Phase 0 — Pre-flight + +1. Confirm we're in the codegraph repo root +2. Check that benchmark scripts exist: + - `scripts/benchmark.js` (build speed, query latency) + - `scripts/incremental-benchmark.js` (incremental build tiers) + - `scripts/query-benchmark.js` (query depth scaling) + - `scripts/embedding-benchmark.js` (search recall) — optional, skip if embedding deps missing +3. Parse `$ARGUMENTS`: + - `SAVE_ONLY=true` if `--save-baseline` + - `COMPARE_ONLY=true` if `--compare-only` + - `THRESHOLD=N` from `--threshold N` (default: 15) +4. Check for existing baseline at `generated/bench-check/baseline.json` + - If missing and not `--save-baseline`: warn that this will be an initial baseline run + +## Phase 1 — Run Benchmarks + +Run each benchmark script and collect results. Each script outputs JSON to stdout. + +### 1a. Build & Query Benchmark + +```bash +node scripts/benchmark.js 2>/dev/null +``` + +Extract: +- `buildTime` (ms) — per engine (native, WASM) +- `queryTime` (ms) — per query type +- `nodeCount`, `edgeCount` — graph size + +### 1b. Incremental Benchmark + +```bash +node scripts/incremental-benchmark.js 2>/dev/null +``` + +Extract: +- `noOpRebuild` (ms) — time for no-change rebuild +- `singleFileRebuild` (ms) — time after one file change +- `importResolution` (ms) — resolution throughput + +### 1c. Query Depth Benchmark + +```bash +node scripts/query-benchmark.js 2>/dev/null +``` + +Extract: +- `fnDeps` scaling by depth +- `fnImpact` scaling by depth +- `diffImpact` latency + +### 1d. Embedding Benchmark (optional) + +```bash +node scripts/embedding-benchmark.js 2>/dev/null +``` + +Extract: +- `embeddingTime` (ms) +- `recall` at Hit@1, Hit@3, Hit@5, Hit@10 + +> **Timeout:** Each benchmark gets 5 minutes max. If it times out, record `"timeout"` for that suite and continue. + +> **Errors:** If a benchmark script fails (non-zero exit), record `"error: "` and continue with remaining benchmarks. + +## Phase 2 — Normalize Results + +Build a flat metrics object from all benchmark results: + +```json +{ + "timestamp": "", + "version": "", + "gitRef": "", + "metrics": { + "build.native.ms": 1234, + "build.wasm.ms": 2345, + "query.fnDeps.depth3.ms": 45, + "query.fnImpact.depth3.ms": 67, + "query.diffImpact.ms": 89, + "incremental.noOp.ms": 12, + "incremental.singleFile.ms": 34, + "incremental.importResolution.ms": 56, + "graph.nodes": 500, + "graph.edges": 1200, + "embedding.time.ms": 3000, + "embedding.recall.hit1": 0.85, + "embedding.recall.hit5": 0.95 + } +} +``` + +Adapt the metric keys to match whatever the benchmark scripts actually output — the above are representative. The goal is a flat key→number map for easy comparison. + +## Phase 3 — Compare Against Baseline + +Skip this phase if `SAVE_ONLY=true` or no baseline exists. + +For each metric in the current run: + +1. Look up the same metric in the baseline +2. Compute: `delta_pct = ((current - baseline) / baseline) * 100` +3. Classify: + - **Regression**: metric increased by more than `THRESHOLD`% (for time metrics) or decreased by more than `THRESHOLD`% (for recall/quality metrics) + - **Improvement**: metric decreased by more than `THRESHOLD`% (time) or increased (quality) + - **Stable**: within threshold + +> **Direction awareness:** For latency metrics (ms), higher = worse. For recall/quality metrics, higher = better. For count metrics (nodes, edges), changes are informational only — not regressions. + +### Regression table + +| Metric | Baseline | Current | Delta | Status | +|--------|----------|---------|-------|--------| +| build.native.ms | 1200 | 1500 | +25% | REGRESSION | +| query.fnDeps.depth3.ms | 45 | 43 | -4.4% | stable | + +## Phase 4 — Verdict + +Based on comparison results: + +### No regressions found +- Print: `BENCH-CHECK PASSED — no regressions beyond {THRESHOLD}% threshold` +- If not `COMPARE_ONLY`: update baseline with current results + +### Regressions found +- Print: `BENCH-CHECK FAILED — {N} regressions detected` +- List each regression with metric name, baseline value, current value, delta % +- Do NOT update the baseline +- Suggest investigation: + - `git log --oneline ..HEAD` to find what changed + - `codegraph diff-impact -T` to find structural changes + - Re-run individual benchmarks to confirm (not flaky) + +### First run (no baseline) +- Print: `BENCH-CHECK — initial baseline saved` +- Save current results as baseline + +## Phase 5 — Save Baseline + +When saving (initial run, `--save-baseline`, or passed comparison): + +Write to `generated/bench-check/baseline.json`: +```json +{ + "savedAt": "", + "version": "", + "gitRef": "", + "threshold": 15, + "metrics": { ... } +} +``` + +Also append a one-line summary to `generated/bench-check/history.ndjson`: +```json +{"timestamp":"...","version":"...","gitRef":"...","metrics":{...}} +``` + +This creates a running log of benchmark results over time. + +## Phase 6 — Report + +Write a human-readable report to `generated/bench-check/BENCH_REPORT_.md`: + +```markdown +# Benchmark Report — + +**Version:** X.Y.Z | **Git ref:** abc1234 | **Threshold:** 15% + +## Verdict: PASSED / FAILED + +## Comparison vs Baseline + + + +## Regressions (if any) + + + +## Trend (if history.ndjson has 3+ entries) + + + +## Raw Results + + +``` + +## Phase 7 — Cleanup + +1. If report was written, print its path +2. If baseline was updated, print confirmation +3. Print one-line summary: `PASSED (0 regressions) | FAILED (N regressions) | BASELINE SAVED` + +## Rules + +- **Never skip a benchmark** — if it fails, record the failure and continue +- **Timeout is 5 minutes per benchmark** — use appropriate timeout flags +- **Don't update baseline on regression** — the user must investigate first +- **Recall/quality metrics are inverted** — a decrease is a regression +- **Count metrics are informational** — graph growing isn't a regression +- **The baseline file is committed to git** — it's a shared reference point +- **history.ndjson is append-only** — never truncate or rewrite it +- Generated files go in `generated/bench-check/` — create the directory if needed diff --git a/.claude/skills/deps-audit/SKILL.md b/.claude/skills/deps-audit/SKILL.md new file mode 100644 index 00000000..cc2e4b12 --- /dev/null +++ b/.claude/skills/deps-audit/SKILL.md @@ -0,0 +1,164 @@ +--- +name: deps-audit +description: Audit dependencies for vulnerabilities, staleness, unused packages, and license risks — produce a health report with actionable fixes +argument-hint: "[--fix] (optional — auto-fix safe updates)" +allowed-tools: Bash, Read, Write, Edit, Glob, Grep, Agent +--- + +# /deps-audit — Dependency Health Audit + +Audit the project's dependency tree for security vulnerabilities, outdated packages, unused dependencies, and license compliance. Produce a structured report and optionally auto-fix safe updates. + +## Arguments + +- `$ARGUMENTS` may contain `--fix` to auto-apply safe updates (patch/minor only) + +## Phase 0 — Pre-flight + +1. Confirm we're in the codegraph repo root (check for `package.json` and `package-lock.json`) +2. Run `node --version` — must be >= 20 +3. Run `npm --version` to capture toolchain info +4. Parse `$ARGUMENTS` — set `AUTO_FIX=true` if `--fix` is present + +## Phase 1 — Security Vulnerabilities + +Run `npm audit --json` and parse the output: + +1. Count vulnerabilities by severity: `critical`, `high`, `moderate`, `low`, `info` +2. For each `critical` or `high` vulnerability: + - Record: package name, severity, CVE/GHSA ID, vulnerable version range, patched version, dependency path (direct vs transitive) + - Check if a fix is available (`npm audit fix --dry-run --json`) +3. Summarize: total vulns, fixable count, breaking-fix count + +**If `AUTO_FIX` is set:** Run `npm audit fix` (non-breaking fixes only). Record what changed. Do NOT run `npm audit fix --force` — breaking changes require manual review. + +## Phase 2 — Outdated Dependencies + +Run `npm outdated --json` and categorize: + +### 2a. Direct dependencies (`dependencies` + `devDependencies`) + +For each outdated package, record: +- Package name +- Current version → Wanted (semver-compatible) → Latest +- Whether the update is patch, minor, or major +- If major: check the package's CHANGELOG/release notes for breaking changes relevant to our usage + +### 2b. Staleness score + +Classify each outdated dep: +| Category | Definition | +|----------|-----------| +| **Fresh** | On latest or within 1 patch | +| **Aging** | 1+ minor versions behind | +| **Stale** | 1+ major versions behind | +| **Abandoned** | No release in 12+ months (check npm registry publish date) | + +For any package classified as **Abandoned**, check if there's a maintained fork or alternative. + +**If `AUTO_FIX` is set:** Run `npm update` to apply semver-compatible updates. Record what changed. + +## Phase 3 — Unused Dependencies + +Detect dependencies declared in `package.json` but never imported: + +1. Read `dependencies` and `devDependencies` from `package.json` +2. For each dependency, search for imports/requires across `src/`, `tests/`, `scripts/`, `cli.js`, `index.js`: + - `require('')` or `require('/...')` + - `import ... from ''` or `import ''` + - `import('')` (dynamic imports) +3. Skip known implicit dependencies that don't have direct imports: + - `@anthropic-ai/tokenizer` — may be used by `@anthropic-ai/sdk` + - `tree-sitter-*` and `web-tree-sitter` — loaded dynamically via WASM + - `@biomejs/biome` — used as CLI tool only + - `commit-and-tag-version` — used as npm script + - `@optave/codegraph-*` — platform-specific optional binaries + - `vitest` — test runner, invoked via CLI + - Anything in `optionalDependencies` +4. For each truly unused dep: recommend removal with `npm uninstall ` + +> **Important:** Some deps are used transitively or via CLI — don't blindly remove. Flag as "likely unused" and let the user decide. + +## Phase 4 — License Compliance + +Check licenses for all direct dependencies: + +1. For each package in `dependencies`, read its `node_modules//package.json` → `license` field +2. Classify: + - **Permissive** (MIT, ISC, BSD-2-Clause, BSD-3-Clause, Apache-2.0, 0BSD, Unlicense): OK + - **Weak copyleft** (LGPL-2.1, LGPL-3.0, MPL-2.0): Flag for review + - **Strong copyleft** (GPL-2.0, GPL-3.0, AGPL-3.0): Flag as risk — may conflict with MIT license of codegraph + - **Unknown/UNLICENSED/missing**: Flag for investigation +3. Only flag non-permissive licenses — don't list every MIT dep + +## Phase 5 — Duplicate Packages + +Check for duplicate versions of the same package in the dependency tree: + +1. Run `npm ls --all --json` and look for packages that appear multiple times with different versions +2. Only flag duplicates that add significant bundle weight (> 100KB) or are security-sensitive (crypto, auth, etc.) +3. Suggest deduplication: `npm dedupe` + +## Phase 6 — Report + +Write a report to `generated/deps-audit/DEPS_AUDIT_.md` with this structure: + +```markdown +# Dependency Audit Report — + +## Summary + +| Metric | Value | +|--------|-------| +| Total dependencies (direct) | N | +| Total dependencies (transitive) | N | +| Security vulnerabilities | N critical, N high, N moderate, N low | +| Outdated packages | N stale, N aging, N fresh | +| Unused dependencies | N | +| License risks | N | +| Duplicates | N | +| **Health score** | **X/100** | + +## Health Score Calculation + +- Start at 100 +- -20 per critical vuln, -10 per high vuln, -3 per moderate vuln +- -5 per stale (major behind) dep, -2 per aging dep +- -5 per unused dep +- -10 per copyleft license risk +- Floor at 0 + +## Security Vulnerabilities + + +## Outdated Packages + + +## Unused Dependencies + + +## License Flags + + +## Duplicates + + +## Recommended Actions + +``` + +## Phase 7 — Auto-fix Summary (if `--fix`) + +If `AUTO_FIX` was set, summarize all changes made: +1. List each package updated/fixed +2. Run `npm test` to verify nothing broke +3. If tests fail, revert with `git checkout -- package.json package-lock.json` and report what failed + +## Rules + +- **Never run `npm audit fix --force`** — breaking changes need human review +- **Never remove a dependency** without asking the user, even if it appears unused — flag it in the report instead +- **Always run tests** after any auto-fix changes +- **If `--fix` causes test failures**, revert all changes and report the failure +- Treat `optionalDependencies` separately — they're expected to fail on some platforms +- The report goes in `generated/deps-audit/` — create the directory if it doesn't exist diff --git a/.claude/skills/housekeep/SKILL.md b/.claude/skills/housekeep/SKILL.md new file mode 100644 index 00000000..a00a88f5 --- /dev/null +++ b/.claude/skills/housekeep/SKILL.md @@ -0,0 +1,266 @@ +--- +name: housekeep +description: Local repo maintenance — clean stale worktrees, remove dirt files, sync with main, update codegraph, prune branches, and verify repo health +argument-hint: "[--full | --dry-run | --skip-update] (default: full cleanup)" +allowed-tools: Bash, Read, Write, Edit, Glob, Grep +--- + +# /housekeep — Local Repository Maintenance + +Clean up the local repo: remove stale worktrees, delete dirt/temp files, sync with main, update codegraph to latest, prune merged branches, and verify repo health. The "spring cleaning" routine. + +## Arguments + +- `$ARGUMENTS` may contain: + - `--full` — run all phases (default behavior) + - `--dry-run` — show what would be cleaned without actually doing it + - `--skip-update` — skip the codegraph npm update phase + - No arguments — full cleanup + +## Phase 0 — Pre-flight + +1. Confirm we're in the codegraph repo root (check `package.json` with `"name": "@optave/codegraph"`) +2. Parse `$ARGUMENTS`: + - `DRY_RUN=true` if `--dry-run` + - `SKIP_UPDATE=true` if `--skip-update` +3. Record current branch: `git branch --show-current` +4. Record current git status: `git status --short` +5. Warn the user if there are uncommitted changes — housekeeping works best from a clean state + +## Phase 1 — Clean Stale Worktrees + +### 1a. List all worktrees + +```bash +git worktree list +``` + +### 1b. Identify stale worktrees + +A worktree is stale if: +- Its directory no longer exists on disk (prunable) +- It has no uncommitted changes AND its branch has been merged to main +- It was created more than 7 days ago with no commits since (abandoned) + +Check `.claude/worktrees/` for Claude Code worktrees specifically. + +### 1c. Clean up + +For prunable worktrees (missing directory): +```bash +git worktree prune +``` + +For stale worktrees with merged branches: +- List them and ask the user for confirmation before removing +- If confirmed (or `--full` without `--dry-run`): + ```bash + git worktree remove + git branch -d # only if fully merged + ``` + +**If `DRY_RUN`:** Just list what would be removed, don't do it. + +> **Never force-remove** a worktree with uncommitted changes. List it as "has uncommitted work" and skip. + +## Phase 2 — Delete Dirt Files + +Remove temporary and generated files that accumulate over time: + +### 2a. Known dirt patterns + +Search for and remove: +- `*.tmp.*`, `*.bak`, `*.orig` files in the repo (but NOT in `node_modules/`) +- `.DS_Store` files +- `*.log` files in repo root (not in `node_modules/`) +- Empty directories (except `.codegraph/`, `.claude/`, `node_modules/`) +- `coverage/` directory (regenerated by `npm run test:coverage`) +- `.codegraph/graph.db-journal` (SQLite WAL leftovers) +- Stale lock files: `.codegraph/*.lock` older than 1 hour + +### 2b. Large untracked files + +Find untracked files larger than 1MB: +```bash +git ls-files --others --exclude-standard | while read f; do + size=$(stat --format='%s' "$f" 2>/dev/null || stat -f '%z' "$f" 2>/dev/null) + if [ "$size" -gt 1048576 ]; then echo "$f ($size bytes)"; fi +done +``` + +Flag these for user review — they might be accidentally untracked binaries. + +### 2c. Clean up + +**If `DRY_RUN`:** List all files that would be removed with their sizes. + +**Otherwise:** +- Remove known dirt patterns automatically +- For large untracked files: list and ask the user + +> **Never delete** files that are tracked by git. Only clean untracked/ignored files. + +## Phase 3 — Sync with Main + +### 3a. Fetch latest + +```bash +git fetch origin +``` + +### 3b. Check main branch status + +```bash +git log HEAD..origin/main --oneline +``` + +If main has new commits: +- If on main: `git pull origin main` +- If on a feature branch: inform the user how many commits behind main they are + - Suggest: `git merge origin/main` (never rebase — per project rules) + +### 3c. Check for diverged branches + +List local branches that have diverged from their remote tracking branch: +```bash +git for-each-ref --format='%(refname:short) %(upstream:track)' refs/heads/ +``` + +Flag any branches marked `[ahead N, behind M]` — these may need attention. + +## Phase 4 — Prune Merged Branches + +### 4a. Find merged branches + +```bash +git branch --merged main +``` + +### 4b. Safe to delete + +Branches that are: +- Fully merged into main +- Not `main` itself +- Not the current branch +- Not a worktree branch (check `git worktree list`) + +### 4c. Prune remote tracking refs + +```bash +git remote prune origin +``` + +This removes local refs to branches that no longer exist on the remote. + +### 4d. Clean up + +**If `DRY_RUN`:** List branches that would be deleted. + +**Otherwise:** Delete merged branches: +```bash +git branch -d # safe delete, only if fully merged +``` + +> **Never use `git branch -D`** (force delete). If `-d` fails, the branch has unmerged work — skip it. + +## Phase 5 — Update Codegraph + +**Skip if `SKIP_UPDATE` is set.** + +### 5a. Check current version + +```bash +node -e "console.log(require('./package.json').version)" +``` + +### 5b. Check latest published version + +```bash +npm view @optave/codegraph version +``` + +### 5c. Update if needed + +If a newer version is available: +- Show the version diff (current → latest) +- Check the CHANGELOG for what changed +- If it's a patch/minor: update automatically + ```bash + npm install + ``` +- If it's a major: warn the user and ask for confirmation + +### 5d. Rebuild + +After any update: +```bash +npm install +``` + +Verify the build works: +```bash +npx codegraph stats 2>/dev/null && echo "OK" || echo "FAILED" +``` + +## Phase 6 — Verify Repo Health + +Quick health checks to catch issues: + +### 6a. Graph integrity + +```bash +npx codegraph stats +``` + +If the graph is stale (built from a different commit), rebuild: +```bash +npx codegraph build +``` + +### 6b. Node modules integrity + +```bash +npm ls --depth=0 2>&1 | grep -c "missing\|invalid\|WARN" +``` + +If issues found: `npm install` to fix. + +### 6c. Git integrity + +```bash +git fsck --no-dangling 2>&1 | head -20 +``` + +Flag any errors (rare but important). + +## Phase 7 — Report + +Print a summary to the console (no file needed — this is a local maintenance task): + +``` +=== Housekeeping Report === + +Worktrees: removed 2 stale, 1 has uncommitted work (skipped) +Dirt files: cleaned 5 temp files (12KB), 1 large untracked flagged +Branches: pruned 3 merged branches, 2 remote refs +Main sync: up to date (or: 4 commits behind — merge suggested) +Codegraph: v3.1.2 → v3.1.3 updated (or: already latest) +Graph: rebuilt (was stale) (or: fresh) +Node mods: OK (or: fixed 2 missing deps) +Git: OK + +Status: CLEAN ✓ +``` + +**If `DRY_RUN`:** prefix with `[DRY RUN]` and show what would happen without doing it. + +## Rules + +- **Never force-delete** anything — use safe deletes only (`git branch -d`, `git worktree remove`) +- **Never rebase** — sync with main via merge only (per project rules) +- **Never delete tracked files** — only clean untracked/ignored dirt +- **Never delete worktrees with uncommitted changes** — warn and skip +- **Ask before deleting large untracked files** — they might be intentional +- **This is a local-only operation** — no pushes, no remote modifications, no PR creation +- **Idempotent** — running twice should be safe (second run finds nothing to clean) +- **`--dry-run` is sacred** — it must NEVER modify anything, only report diff --git a/.claude/skills/test-health/SKILL.md b/.claude/skills/test-health/SKILL.md new file mode 100644 index 00000000..2bb06194 --- /dev/null +++ b/.claude/skills/test-health/SKILL.md @@ -0,0 +1,248 @@ +--- +name: test-health +description: Audit test suite health — detect flaky tests, dead tests, coverage gaps, and missing assertions — produce a health report with fix suggestions +argument-hint: "[--flaky-runs 5 | --coverage | --quick] (default: full audit)" +allowed-tools: Bash, Read, Write, Edit, Glob, Grep, Agent +--- + +# /test-health — Test Suite Health Audit + +Audit the test suite for flaky tests, dead/trivial tests, coverage gaps on recent changes, missing assertions, and structural issues. Produce a health report with prioritized recommendations. + +## Arguments + +- `$ARGUMENTS` may contain: + - `--flaky-runs N` — number of times to run the suite for flaky detection (default: 5) + - `--coverage` — only run the coverage gap analysis (skip flaky/dead detection) + - `--quick` — skip flaky detection (most time-consuming), run everything else + - No arguments — full audit + +## Phase 0 — Pre-flight + +1. Confirm we're in the codegraph repo root +2. Verify vitest is available: `npx vitest --version` +3. Parse `$ARGUMENTS`: + - `FLAKY_RUNS=N` from `--flaky-runs N` (default: 5) + - `COVERAGE_ONLY=true` if `--coverage` + - `QUICK=true` if `--quick` +4. Discover all test files: + ```bash + find tests/ -name '*.test.js' -o -name '*.test.ts' | sort + ``` +5. Count total test files and categorize by directory (integration, parsers, graph, search, unit) + +## Phase 1 — Flaky Test Detection + +**Skip if `COVERAGE_ONLY` or `QUICK` is set.** + +Run the full test suite `FLAKY_RUNS` times and track per-test pass/fail: + +```bash +for i in $(seq 1 $FLAKY_RUNS); do + npx vitest run --reporter=json 2>/dev/null +done +``` + +For each run, parse the JSON reporter output to get per-test results. + +### Analysis + +A test is **flaky** if it passes in some runs and fails in others. + +For each flaky test found: +1. Record: test file, test name, pass count, fail count, failure messages +2. Categorize likely cause: + - **Timing-dependent**: failure message mentions timeout, race condition, or test has `setTimeout`/`sleep` + - **Order-dependent**: only fails when run with other tests (passes in isolation) + - **Resource-dependent**: mentions file system, network, port, or temp directory + - **Non-deterministic**: random/Date.now/Math.random in test or source + +> **Timeout:** Each full suite run gets 3 minutes. If it times out, record partial results and continue. + +## Phase 2 — Dead & Trivial Test Detection + +Scan all test files for problematic patterns: + +### 2a. Empty / no-assertion tests + +Search for test bodies that: +- Have no `expect()`, `assert()`, `toBe()`, `toEqual()`, or similar assertion calls +- Only contain `console.log` or comments +- Are skipped: `it.skip(`, `test.skip(`, `xit(`, `xtest(` +- Are TODO: `it.todo(`, `test.todo(` + +``` +Pattern: test bodies with 0 assertions = dead tests +``` + +### 2b. Trivial / tautological tests + +Detect tests that assert on constants or trivially true conditions: +- `expect(true).toBe(true)` +- `expect(1).toBe(1)` +- `expect(result).toBeDefined()` as the ONLY assertion (too weak) + +### 2c. Commented-out tests + +Search for commented-out test blocks: +- `// it(`, `// test(`, `/* it(`, `/* test(` +- Large commented blocks inside `describe` blocks + +### 2d. Orphaned fixtures + +Check if any files in `tests/fixtures/` are not referenced by any test file. + +### 2e. Duplicate test names + +Search for duplicate test descriptions within the same `describe` block — these indicate copy-paste errors. + +## Phase 3 — Coverage Gap Analysis + +Run vitest with coverage and analyze: + +```bash +npx vitest run --coverage --coverage.reporter=json 2>/dev/null +``` + +### 3a. Overall coverage + +Parse `coverage/coverage-summary.json` and extract: +- Line coverage % +- Branch coverage % +- Function coverage % +- Statement coverage % + +### 3b. Uncovered files + +Find source files in `src/` with 0% coverage (no tests touch them at all). + +### 3c. Low-coverage hotspots + +Find files with < 50% line coverage. For each: +- List uncovered functions (from the detailed coverage data) +- Check if the file is in `domain/` or `features/` (core logic — coverage matters more) +- Check file's complexity with `codegraph complexity -T` — high complexity + low coverage = high risk + +### 3d. Recent changes without coverage + +Compare against `main` branch to find recently changed files: + +```bash +git diff --name-only main...HEAD -- src/ +``` + +For each changed source file, check if: +1. It has corresponding test changes +2. Its coverage increased, decreased, or stayed the same +3. New functions/exports were added without test coverage + +> **Note:** If the coverage tool is not configured or fails, skip this phase and note it in the report. Coverage is a vitest plugin — it may need `@vitest/coverage-v8` installed. + +## Phase 4 — Test Structure Analysis + +Analyze the test suite's structural health: + +### 4a. Test-to-source mapping + +For each directory in `src/`: +- Count source files +- Count corresponding test files +- Calculate test coverage ratio (files with tests / total files) +- Flag directories with < 30% test file coverage + +### 4b. Test file size distribution + +- Find oversized test files (> 500 lines) — may need splitting +- Find tiny test files (< 10 lines) — may be stubs or dead + +### 4c. Setup/teardown hygiene + +Check for: +- Tests that create temp files/dirs but don't clean up (`afterEach`/`afterAll` missing) +- Tests that mutate global state without restoration +- Missing `beforeEach` resets in `describe` blocks that share state + +### 4d. Timeout analysis + +- Find tests with custom timeouts: `{ timeout: ... }` +- Find tests that exceed the default 30s timeout in recent runs +- High timeouts often indicate tests that should be restructured or are testing too much + +## Phase 5 — Report + +Write report to `generated/test-health/TEST_HEALTH_.md`: + +```markdown +# Test Health Report — + +## Summary + +| Metric | Value | +|--------|-------| +| Total test files | N | +| Total test cases | N | +| Flaky tests | N | +| Dead/trivial tests | N | +| Skipped tests | N | +| Coverage (lines) | X% | +| Coverage (branches) | X% | +| Uncovered source files | N | +| **Health score** | **X/100** | + +## Health Score Calculation + +- Start at 100 +- -10 per flaky test +- -3 per dead/trivial test +- -2 per skipped test (without TODO explaining why) +- -1 per uncovered source file in `domain/` or `features/` +- -(100 - line_coverage) / 5 (coverage penalty) +- Floor at 0 + +## Flaky Tests + + +## Dead & Trivial Tests + + +## Coverage Gaps + + +## Structural Issues + + +## Recommended Actions + +### Priority 1 — Fix flaky tests + + +### Priority 2 — Remove or fix dead tests + + +### Priority 3 — Add coverage for high-risk gaps + + +### Priority 4 — Structural improvements + +``` + +## Phase 6 — Quick Wins + +After writing the report, identify tests that can be fixed immediately (< 5 min each): + +1. Remove `.skip` from tests that now pass (run them to check) +2. Add missing assertions to empty test bodies (if the intent is clear) +3. Delete commented-out test blocks older than 6 months (check git blame) + +**Do NOT auto-fix** — list these as suggestions in the report. The user decides. + +## Rules + +- **Never delete or modify test files** without explicit user approval — this is a read-only audit +- **Flaky detection is slow** — warn the user before running 5+ iterations +- **Coverage requires `@vitest/coverage-v8`** — if missing, skip coverage and note it +- **Order-dependent flakiness** requires running tests both in suite and in isolation — only do this for tests that flaked in Phase 1 +- **Fixture files may be shared** across tests — don't flag as orphaned if used indirectly +- **Skipped tests aren't always bad** — only flag if there's no `TODO` or comment explaining why +- Generated files go in `generated/test-health/` — create the directory if needed +- **This is a diagnostic tool** — it reports problems, it doesn't fix them (unless the user opts in) From a562b523045e4d72145f447b97f792ae0db6ea58 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 21 Mar 2026 05:25:16 -0600 Subject: [PATCH 02/33] fix(bench-check): capture stderr, guard division-by-zero, commit baseline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace 2>/dev/null with output=$(... 2>&1) + exit_code check on all four benchmark invocations so error messages are captured and recorded - Add division-by-zero guard in Phase 3: when baseline == 0, mark delta as "N/A — baseline was zero" (informational only, not a regression) - Add git add + git commit step in Phase 5 so the baseline file is actually committed after each save, matching the documented rule --- .claude/skills/bench-check/SKILL.md | 33 +++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/.claude/skills/bench-check/SKILL.md b/.claude/skills/bench-check/SKILL.md index 2b48ff3a..334345a1 100644 --- a/.claude/skills/bench-check/SKILL.md +++ b/.claude/skills/bench-check/SKILL.md @@ -39,9 +39,12 @@ Run each benchmark script and collect results. Each script outputs JSON to stdou ### 1a. Build & Query Benchmark ```bash -node scripts/benchmark.js 2>/dev/null +output=$(node scripts/benchmark.js 2>&1) +exit_code=$? ``` +If `exit_code` is non-zero: record `"error: $output"` for this suite and continue. + Extract: - `buildTime` (ms) — per engine (native, WASM) - `queryTime` (ms) — per query type @@ -50,9 +53,12 @@ Extract: ### 1b. Incremental Benchmark ```bash -node scripts/incremental-benchmark.js 2>/dev/null +output=$(node scripts/incremental-benchmark.js 2>&1) +exit_code=$? ``` +If `exit_code` is non-zero: record `"error: $output"` for this suite and continue. + Extract: - `noOpRebuild` (ms) — time for no-change rebuild - `singleFileRebuild` (ms) — time after one file change @@ -61,9 +67,12 @@ Extract: ### 1c. Query Depth Benchmark ```bash -node scripts/query-benchmark.js 2>/dev/null +output=$(node scripts/query-benchmark.js 2>&1) +exit_code=$? ``` +If `exit_code` is non-zero: record `"error: $output"` for this suite and continue. + Extract: - `fnDeps` scaling by depth - `fnImpact` scaling by depth @@ -72,9 +81,12 @@ Extract: ### 1d. Embedding Benchmark (optional) ```bash -node scripts/embedding-benchmark.js 2>/dev/null +output=$(node scripts/embedding-benchmark.js 2>&1) +exit_code=$? ``` +If `exit_code` is non-zero: record `"error: $output"` for this suite and continue. + Extract: - `embeddingTime` (ms) - `recall` at Hit@1, Hit@3, Hit@5, Hit@10 @@ -119,8 +131,9 @@ Skip this phase if `SAVE_ONLY=true` or no baseline exists. For each metric in the current run: 1. Look up the same metric in the baseline -2. Compute: `delta_pct = ((current - baseline) / baseline) * 100` -3. Classify: +2. Guard against division-by-zero: if `baseline == 0`, mark the delta as `"N/A baseline was zero"` and treat the metric as **informational only** (not a regression or improvement) +3. Otherwise compute: `delta_pct = ((current - baseline) / baseline) * 100` +4. Classify: - **Regression**: metric increased by more than `THRESHOLD`% (for time metrics) or decreased by more than `THRESHOLD`% (for recall/quality metrics) - **Improvement**: metric decreased by more than `THRESHOLD`% (time) or increased (quality) - **Stable**: within threshold @@ -177,6 +190,12 @@ Also append a one-line summary to `generated/bench-check/history.ndjson`: This creates a running log of benchmark results over time. +After writing both files, commit the baseline so it is a shared reference point: +```bash +git add generated/bench-check/baseline.json generated/bench-check/history.ndjson +git commit -m "chore: update bench-check baseline ()" +``` + ## Phase 6 — Report Write a human-readable report to `generated/bench-check/BENCH_REPORT_.md`: @@ -218,6 +237,6 @@ Write a human-readable report to `generated/bench-check/BENCH_REPORT_.md`: - **Don't update baseline on regression** — the user must investigate first - **Recall/quality metrics are inverted** — a decrease is a regression - **Count metrics are informational** — graph growing isn't a regression -- **The baseline file is committed to git** — it's a shared reference point +- **The baseline file is committed to git** — it's a shared reference point; Phase 5 always commits it - **history.ndjson is append-only** — never truncate or rewrite it - Generated files go in `generated/bench-check/` — create the directory if needed From 4fc994d8ce28ed74e433a0b4758736d7ce8892c3 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 21 Mar 2026 05:25:30 -0600 Subject: [PATCH 03/33] fix(deps-audit): run npm ci after revert, document tokenizer skip reason - After reverting package.json + package-lock.json on --fix test failure, also run `npm ci` to resync node_modules/ with the restored lock file; without this the manifest is reverted but installed packages are not - Add explanatory comment on @anthropic-ai/tokenizer skip-list entry clarifying it is a peer dependency of @anthropic-ai/sdk and may be required at runtime without an explicit import in our code --- .claude/skills/deps-audit/SKILL.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.claude/skills/deps-audit/SKILL.md b/.claude/skills/deps-audit/SKILL.md index cc2e4b12..8240bf7e 100644 --- a/.claude/skills/deps-audit/SKILL.md +++ b/.claude/skills/deps-audit/SKILL.md @@ -68,7 +68,7 @@ Detect dependencies declared in `package.json` but never imported: - `import ... from ''` or `import ''` - `import('')` (dynamic imports) 3. Skip known implicit dependencies that don't have direct imports: - - `@anthropic-ai/tokenizer` — may be used by `@anthropic-ai/sdk` + - `@anthropic-ai/tokenizer` — peer dependency of `@anthropic-ai/sdk`; the SDK may require it at runtime without an explicit import in our code (verify against package.json before removing) - `tree-sitter-*` and `web-tree-sitter` — loaded dynamically via WASM - `@biomejs/biome` — used as CLI tool only - `commit-and-tag-version` — used as npm script @@ -152,13 +152,16 @@ Write a report to `generated/deps-audit/DEPS_AUDIT_.md` with this structur If `AUTO_FIX` was set, summarize all changes made: 1. List each package updated/fixed 2. Run `npm test` to verify nothing broke -3. If tests fail, revert with `git checkout -- package.json package-lock.json` and report what failed +3. If tests fail: + - Revert the manifest: `git checkout -- package.json package-lock.json` + - Restore `node_modules/` to match the reverted lock file: `npm ci` + - Report what failed ## Rules - **Never run `npm audit fix --force`** — breaking changes need human review - **Never remove a dependency** without asking the user, even if it appears unused — flag it in the report instead - **Always run tests** after any auto-fix changes -- **If `--fix` causes test failures**, revert all changes and report the failure +- **If `--fix` causes test failures**, revert manifest with `git checkout -- package.json package-lock.json` then run `npm ci` to resync `node_modules/`, and report the failure - Treat `optionalDependencies` separately — they're expected to fail on some platforms - The report goes in `generated/deps-audit/` — create the directory if it doesn't exist From 89aef6b0a1bc57a114b1214ca73c3fd484c16fb5 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 21 Mar 2026 05:25:42 -0600 Subject: [PATCH 04/33] fix(housekeep): guard Phase 5 in source repo, fix stale-worktree criterion - Phase 5 (Update Codegraph): add source-repo guard that skips the self-update logic when running inside the codegraph source repo; comparing the dev version to the published release and running npm install is a no-op since codegraph is not one of its own deps - Phase 1b stale-worktree criterion: replace "created more than 7 days ago" (not determinable via git worktree list) with "last commit on the branch is more than 7 days old AND branch has no commits ahead of origin/main", using `git log -1 --format=%ci ` --- .claude/skills/housekeep/SKILL.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.claude/skills/housekeep/SKILL.md b/.claude/skills/housekeep/SKILL.md index a00a88f5..d5bd9d7a 100644 --- a/.claude/skills/housekeep/SKILL.md +++ b/.claude/skills/housekeep/SKILL.md @@ -40,7 +40,8 @@ git worktree list A worktree is stale if: - Its directory no longer exists on disk (prunable) - It has no uncommitted changes AND its branch has been merged to main -- It was created more than 7 days ago with no commits since (abandoned) +- Its branch has no commits ahead of `origin/main` AND the branch's last commit is more than 7 days old + (check: `git log -1 --format=%ci ` — `git worktree list` does not expose creation timestamps) Check `.claude/worktrees/` for Claude Code worktrees specifically. @@ -167,6 +168,9 @@ git branch -d # safe delete, only if fully merged **Skip if `SKIP_UPDATE` is set.** +> **Source-repo guard:** This phase is only meaningful when codegraph is installed as a *dependency* of a consumer project. Because the pre-flight confirms we are inside the codegraph *source* repo (`"name": "@optave/codegraph"`), comparing the dev version to the published release and running `npm install` would be a no-op — codegraph is not one of its own dependencies. **Skip this entire phase** when running inside the source repo and print: +> `Codegraph: skipped (running inside source repo — update via git pull / branch sync instead)` + ### 5a. Check current version ```bash From ce5d811225d4d53da0a1934c0da97e8a0297b7a5 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 21 Mar 2026 19:34:16 -0600 Subject: [PATCH 05/33] fix: address Round 3 Greptile review feedback --- .claude/skills/bench-check/SKILL.md | 2 +- .claude/skills/deps-audit/SKILL.md | 16 ++++++++++++---- .claude/skills/housekeep/SKILL.md | 2 +- .claude/skills/test-health/SKILL.md | 4 ++-- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/.claude/skills/bench-check/SKILL.md b/.claude/skills/bench-check/SKILL.md index 334345a1..85031103 100644 --- a/.claude/skills/bench-check/SKILL.md +++ b/.claude/skills/bench-check/SKILL.md @@ -193,7 +193,7 @@ This creates a running log of benchmark results over time. After writing both files, commit the baseline so it is a shared reference point: ```bash git add generated/bench-check/baseline.json generated/bench-check/history.ndjson -git commit -m "chore: update bench-check baseline ()" +git diff --cached --quiet || git commit -m "chore: update bench-check baseline ()" ``` ## Phase 6 — Report diff --git a/.claude/skills/deps-audit/SKILL.md b/.claude/skills/deps-audit/SKILL.md index 8240bf7e..0098779d 100644 --- a/.claude/skills/deps-audit/SKILL.md +++ b/.claude/skills/deps-audit/SKILL.md @@ -149,11 +149,19 @@ Write a report to `generated/deps-audit/DEPS_AUDIT_.md` with this structur ## Phase 7 — Auto-fix Summary (if `--fix`) -If `AUTO_FIX` was set, summarize all changes made: +If `AUTO_FIX` was set: + +**Before running any auto-fix** (in Phase 1/2), save the original manifests so pre-existing unstaged changes are preserved: +```bash +git stash push -m "deps-audit-backup" -- package.json package-lock.json +``` + +Summarize all changes made: 1. List each package updated/fixed 2. Run `npm test` to verify nothing broke -3. If tests fail: - - Revert the manifest: `git checkout -- package.json package-lock.json` +3. If tests pass: drop the saved state (`git stash drop`) +4. If tests fail: + - Restore the saved manifests: `git stash pop` - Restore `node_modules/` to match the reverted lock file: `npm ci` - Report what failed @@ -162,6 +170,6 @@ If `AUTO_FIX` was set, summarize all changes made: - **Never run `npm audit fix --force`** — breaking changes need human review - **Never remove a dependency** without asking the user, even if it appears unused — flag it in the report instead - **Always run tests** after any auto-fix changes -- **If `--fix` causes test failures**, revert manifest with `git checkout -- package.json package-lock.json` then run `npm ci` to resync `node_modules/`, and report the failure +- **If `--fix` causes test failures**, restore manifests from the saved state (`git stash pop`) then run `npm ci` to resync `node_modules/`, and report the failure - Treat `optionalDependencies` separately — they're expected to fail on some platforms - The report goes in `generated/deps-audit/` — create the directory if it doesn't exist diff --git a/.claude/skills/housekeep/SKILL.md b/.claude/skills/housekeep/SKILL.md index d5bd9d7a..d8c75fb7 100644 --- a/.claude/skills/housekeep/SKILL.md +++ b/.claude/skills/housekeep/SKILL.md @@ -134,7 +134,7 @@ Flag any branches marked `[ahead N, behind M]` — these may need attention. ### 4a. Find merged branches ```bash -git branch --merged main +git branch --merged origin/main ``` ### 4b. Safe to delete diff --git a/.claude/skills/test-health/SKILL.md b/.claude/skills/test-health/SKILL.md index 2bb06194..855b0628 100644 --- a/.claude/skills/test-health/SKILL.md +++ b/.claude/skills/test-health/SKILL.md @@ -39,7 +39,7 @@ Run the full test suite `FLAKY_RUNS` times and track per-test pass/fail: ```bash for i in $(seq 1 $FLAKY_RUNS); do - npx vitest run --reporter=json 2>/dev/null + npx vitest run --reporter=json 2>&1 done ``` @@ -101,7 +101,7 @@ Search for duplicate test descriptions within the same `describe` block — thes Run vitest with coverage and analyze: ```bash -npx vitest run --coverage --coverage.reporter=json 2>/dev/null +npx vitest run --coverage --coverage.reporter=json 2>&1 ``` ### 3a. Overall coverage From 01b5110296b67bef5e8938764a3d2a1126d60c41 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 21 Mar 2026 23:32:56 -0600 Subject: [PATCH 06/33] fix: move deps-audit stash to Phase 0, before npm commands modify manifests --- .claude/skills/deps-audit/SKILL.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.claude/skills/deps-audit/SKILL.md b/.claude/skills/deps-audit/SKILL.md index 0098779d..189bbe04 100644 --- a/.claude/skills/deps-audit/SKILL.md +++ b/.claude/skills/deps-audit/SKILL.md @@ -19,6 +19,10 @@ Audit the project's dependency tree for security vulnerabilities, outdated packa 2. Run `node --version` — must be >= 20 3. Run `npm --version` to capture toolchain info 4. Parse `$ARGUMENTS` — set `AUTO_FIX=true` if `--fix` is present +5. **If `AUTO_FIX` is set:** Save the original manifests now, before any npm commands run, so pre-existing unstaged changes are preserved: + ```bash + git stash push -m "deps-audit-backup" -- package.json package-lock.json + ``` ## Phase 1 — Security Vulnerabilities @@ -151,11 +155,6 @@ Write a report to `generated/deps-audit/DEPS_AUDIT_.md` with this structur If `AUTO_FIX` was set: -**Before running any auto-fix** (in Phase 1/2), save the original manifests so pre-existing unstaged changes are preserved: -```bash -git stash push -m "deps-audit-backup" -- package.json package-lock.json -``` - Summarize all changes made: 1. List each package updated/fixed 2. Run `npm test` to verify nothing broke From 3b0e29309310763957f2a37c8aaed362f41ebe11 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 21 Mar 2026 23:33:04 -0600 Subject: [PATCH 07/33] fix: capture flaky-detection loop output to per-run files for comparison --- .claude/skills/test-health/SKILL.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.claude/skills/test-health/SKILL.md b/.claude/skills/test-health/SKILL.md index 855b0628..5ec83719 100644 --- a/.claude/skills/test-health/SKILL.md +++ b/.claude/skills/test-health/SKILL.md @@ -38,12 +38,13 @@ Audit the test suite for flaky tests, dead/trivial tests, coverage gaps on recen Run the full test suite `FLAKY_RUNS` times and track per-test pass/fail: ```bash +mkdir -p /tmp/test-health-runs for i in $(seq 1 $FLAKY_RUNS); do - npx vitest run --reporter=json 2>&1 + npx vitest run --reporter=json > /tmp/test-health-runs/run-$i.json 2>/tmp/test-health-runs/run-$i.err done ``` -For each run, parse the JSON reporter output to get per-test results. +For each run, parse the JSON reporter output from `/tmp/test-health-runs/run-$i.json` to get per-test results. ### Analysis From 52de49526db077c1a2291a606b1b21b71bbdad15 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 21 Mar 2026 23:33:13 -0600 Subject: [PATCH 08/33] fix: always require confirmation for stale worktree removal --- .claude/skills/housekeep/SKILL.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.claude/skills/housekeep/SKILL.md b/.claude/skills/housekeep/SKILL.md index d8c75fb7..e0659672 100644 --- a/.claude/skills/housekeep/SKILL.md +++ b/.claude/skills/housekeep/SKILL.md @@ -53,8 +53,8 @@ git worktree prune ``` For stale worktrees with merged branches: -- List them and ask the user for confirmation before removing -- If confirmed (or `--full` without `--dry-run`): +- List them and **always ask the user for confirmation before removing**, regardless of `--full` +- If confirmed: ```bash git worktree remove git branch -d # only if fully merged From 8be5cecf241e82ea1ad50a34df8b084fef1f3b32 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 21 Mar 2026 23:33:22 -0600 Subject: [PATCH 09/33] fix: use parsed threshold in baseline.json, guard --compare-only on first run --- .claude/skills/bench-check/SKILL.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.claude/skills/bench-check/SKILL.md b/.claude/skills/bench-check/SKILL.md index 85031103..fc2ccff4 100644 --- a/.claude/skills/bench-check/SKILL.md +++ b/.claude/skills/bench-check/SKILL.md @@ -131,7 +131,7 @@ Skip this phase if `SAVE_ONLY=true` or no baseline exists. For each metric in the current run: 1. Look up the same metric in the baseline -2. Guard against division-by-zero: if `baseline == 0`, mark the delta as `"N/A baseline was zero"` and treat the metric as **informational only** (not a regression or improvement) +2. Guard against division-by-zero: if `baseline == 0`, mark the delta as `"N/A � baseline was zero"` and treat the metric as **informational only** (not a regression or improvement) 3. Otherwise compute: `delta_pct = ((current - baseline) / baseline) * 100` 4. Classify: - **Regression**: metric increased by more than `THRESHOLD`% (for time metrics) or decreased by more than `THRESHOLD`% (for recall/quality metrics) @@ -165,8 +165,8 @@ Based on comparison results: - Re-run individual benchmarks to confirm (not flaky) ### First run (no baseline) -- Print: `BENCH-CHECK — initial baseline saved` -- Save current results as baseline +- If `COMPARE_ONLY` is set: print a warning that no baseline exists and exit without saving +- Otherwise: print `BENCH-CHECK — initial baseline saved` and save current results as baseline ## Phase 5 — Save Baseline @@ -178,7 +178,7 @@ Write to `generated/bench-check/baseline.json`: "savedAt": "", "version": "", "gitRef": "", - "threshold": 15, + "threshold": $THRESHOLD, "metrics": { ... } } ``` From 87d9213cb3bcf75d1bcbff252d73d9b348bde8be Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 21 Mar 2026 23:55:16 -0600 Subject: [PATCH 10/33] fix(deps-audit): track stash creation to avoid operating on wrong entry When Phase 0 stash push is a no-op (manifests unchanged), Phase 7 was calling stash drop/pop on the wrong entry. Track STASH_CREATED exit code and branch on it: use git checkout when no stash exists. --- .claude/skills/deps-audit/SKILL.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.claude/skills/deps-audit/SKILL.md b/.claude/skills/deps-audit/SKILL.md index 189bbe04..d4c672af 100644 --- a/.claude/skills/deps-audit/SKILL.md +++ b/.claude/skills/deps-audit/SKILL.md @@ -22,7 +22,9 @@ Audit the project's dependency tree for security vulnerabilities, outdated packa 5. **If `AUTO_FIX` is set:** Save the original manifests now, before any npm commands run, so pre-existing unstaged changes are preserved: ```bash git stash push -m "deps-audit-backup" -- package.json package-lock.json + STASH_CREATED=$? ``` + Track `STASH_CREATED` — when `0`, a stash entry was actually created; when `1`, the files had no changes so nothing was stashed. ## Phase 1 — Security Vulnerabilities @@ -158,17 +160,22 @@ If `AUTO_FIX` was set: Summarize all changes made: 1. List each package updated/fixed 2. Run `npm test` to verify nothing broke -3. If tests pass: drop the saved state (`git stash drop`) -4. If tests fail: +3. If tests pass and `STASH_CREATED` is `0`: drop the saved state (`git stash drop`) + If tests pass and `STASH_CREATED` is `1`: discard manifest changes with `git checkout -- package.json package-lock.json` (no stash entry exists) +4. If tests fail and `STASH_CREATED` is `0`: - Restore the saved manifests: `git stash pop` - Restore `node_modules/` to match the reverted lock file: `npm ci` - Report what failed +5. If tests fail and `STASH_CREATED` is `1`: + - Discard manifest changes: `git checkout -- package.json package-lock.json` + - Restore `node_modules/` to match the reverted lock file: `npm ci` + - Report what failed ## Rules - **Never run `npm audit fix --force`** — breaking changes need human review - **Never remove a dependency** without asking the user, even if it appears unused — flag it in the report instead - **Always run tests** after any auto-fix changes -- **If `--fix` causes test failures**, restore manifests from the saved state (`git stash pop`) then run `npm ci` to resync `node_modules/`, and report the failure +- **If `--fix` causes test failures**, restore manifests from the saved state (git stash pop if `STASH_CREATED=0`, or `git checkout` if stash was a no-op) then run `npm ci` to resync `node_modules/`, and report the failure - Treat `optionalDependencies` separately — they're expected to fail on some platforms - The report goes in `generated/deps-audit/` — create the directory if it doesn't exist From 65d983698c9a1353e90763944cfffbbb52631d7c Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 21 Mar 2026 23:55:27 -0600 Subject: [PATCH 11/33] fix(test-health): use mktemp for flaky-run directory to avoid concurrent corruption Replace hardcoded /tmp/test-health-runs/ with mktemp -d so parallel sessions get isolated directories. Add cleanup at end of analysis. --- .claude/skills/test-health/SKILL.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.claude/skills/test-health/SKILL.md b/.claude/skills/test-health/SKILL.md index 5ec83719..0cc0bc1f 100644 --- a/.claude/skills/test-health/SKILL.md +++ b/.claude/skills/test-health/SKILL.md @@ -38,13 +38,18 @@ Audit the test suite for flaky tests, dead/trivial tests, coverage gaps on recen Run the full test suite `FLAKY_RUNS` times and track per-test pass/fail: ```bash -mkdir -p /tmp/test-health-runs +RUN_DIR=$(mktemp -d /tmp/test-health-XXXXXX) for i in $(seq 1 $FLAKY_RUNS); do - npx vitest run --reporter=json > /tmp/test-health-runs/run-$i.json 2>/tmp/test-health-runs/run-$i.err + npx vitest run --reporter=json > "$RUN_DIR/run-$i.json" 2>"$RUN_DIR/run-$i.err" done ``` -For each run, parse the JSON reporter output from `/tmp/test-health-runs/run-$i.json` to get per-test results. +For each run, parse the JSON reporter output from `$RUN_DIR/run-$i.json` to get per-test results. + +After all runs are parsed and analysis is complete, clean up the temporary directory: +```bash +rm -rf "$RUN_DIR" +``` ### Analysis From eef2c03fa9f16784639bd2a6935b428be55e5d93 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 21 Mar 2026 23:55:37 -0600 Subject: [PATCH 12/33] fix(bench-check): add save-baseline verdict path, fix em-dash, use explicit commit paths Add 4th verdict path for --save-baseline when baseline already exists. Replace corrupted em-dash character in N/A string. Change commit command to use explicit file paths per project convention. --- .claude/skills/bench-check/SKILL.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.claude/skills/bench-check/SKILL.md b/.claude/skills/bench-check/SKILL.md index fc2ccff4..cb12ab92 100644 --- a/.claude/skills/bench-check/SKILL.md +++ b/.claude/skills/bench-check/SKILL.md @@ -131,7 +131,7 @@ Skip this phase if `SAVE_ONLY=true` or no baseline exists. For each metric in the current run: 1. Look up the same metric in the baseline -2. Guard against division-by-zero: if `baseline == 0`, mark the delta as `"N/A � baseline was zero"` and treat the metric as **informational only** (not a regression or improvement) +2. Guard against division-by-zero: if `baseline == 0`, mark the delta as `"N/A — baseline was zero"` and treat the metric as **informational only** (not a regression or improvement) 3. Otherwise compute: `delta_pct = ((current - baseline) / baseline) * 100` 4. Classify: - **Regression**: metric increased by more than `THRESHOLD`% (for time metrics) or decreased by more than `THRESHOLD`% (for recall/quality metrics) @@ -168,6 +168,10 @@ Based on comparison results: - If `COMPARE_ONLY` is set: print a warning that no baseline exists and exit without saving - Otherwise: print `BENCH-CHECK — initial baseline saved` and save current results as baseline +### Save-baseline with existing baseline (`--save-baseline`) +- Print: `BENCH-CHECK — baseline overwritten (previous: , new: )` +- Save current results as the new baseline (overwrite existing) + ## Phase 5 — Save Baseline When saving (initial run, `--save-baseline`, or passed comparison): @@ -192,8 +196,7 @@ This creates a running log of benchmark results over time. After writing both files, commit the baseline so it is a shared reference point: ```bash -git add generated/bench-check/baseline.json generated/bench-check/history.ndjson -git diff --cached --quiet || git commit -m "chore: update bench-check baseline ()" +git diff --quiet generated/bench-check/baseline.json generated/bench-check/history.ndjson || git commit generated/bench-check/baseline.json generated/bench-check/history.ndjson -m "chore: update bench-check baseline ()" ``` ## Phase 6 — Report From 19b14e93d6125bc131efadfc11879956b62f8aff Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 22 Mar 2026 00:05:37 -0600 Subject: [PATCH 13/33] docs(roadmap): update Phase 5 TypeScript migration with accurate progress Phase 5 was listed as "2 of 7 complete" with outdated pre-Phase 3 file paths. Updated to reflect actual state: 32 of 269 source modules migrated (~12%). Steps 5.3-5.5 now list exact migrated/remaining files with verified counts (5.3=8, 5.4=54, 5.5=175, total=237 JS-only files). Added note about 14 stale .js counterparts of already-migrated .ts files needing deletion. --- docs/roadmap/ROADMAP.md | 128 +++++++++++++++++++++++++++++----------- 1 file changed, 94 insertions(+), 34 deletions(-) diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index b9664157..06fdbc0e 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -18,7 +18,7 @@ Codegraph is a strong local-first code graph CLI. This roadmap describes planned | [**2.7**](#phase-27--deep-analysis--graph-enrichment) | Deep Analysis & Graph Enrichment | Dataflow analysis, intraprocedural CFG, AST node storage, expanded node/edge types, extractors refactoring, CLI consolidation, interactive viewer, exports command, normalizeSymbol | **Complete** (v3.0.0) | | [**3**](#phase-3--architectural-refactoring) | Architectural Refactoring (Vertical Slice) | Unified AST analysis framework, command/query separation, repository pattern, queries.js decomposition, composable MCP, CLI commands, domain errors, builder pipeline, presentation layer, domain grouping, curated API, unified graph model, qualified names, CLI composability | **Complete** (v3.1.5) | | [**4**](#phase-4--resolution-accuracy) | Resolution Accuracy | Dead role sub-categories, receiver type tracking, interface/trait implementation edges, resolution precision/recall benchmarks, `package.json` exports field, monorepo workspace resolution | **In Progress** (5 of 6 complete) | -| [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration, supply-chain security, CI coverage gates | **In Progress** (2 of 7 complete) | +| [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration | **In Progress** (32 of 269 src modules migrated; 14 stale `.js` to delete) | | [**6**](#phase-6--native-analysis-acceleration) | Native Analysis Acceleration | Move JS-only build phases (AST nodes, CFG, dataflow, insert nodes, structure, roles, complexity) to Rust; fix incremental rebuild data loss on native; sub-100ms 1-file rebuilds | Planned | | [**7**](#phase-7--runtime--extensibility) | Runtime & Extensibility | Event-driven pipeline, unified engine strategy, subgraph export filtering, transitive confidence, query caching, configuration profiles, pagination, plugin system, DX & onboarding, confidence annotations, shell completion | Planned | | [**8**](#phase-8--intelligent-embeddings) | Intelligent Embeddings | LLM-generated descriptions, enhanced embeddings, build-time semantic metadata, module summaries | Planned | @@ -1080,12 +1080,16 @@ npm workspaces (`package.json` `workspaces`), `pnpm-workspace.yaml`, and `lerna. ## Phase 5 -- TypeScript Migration -> **Status:** In Progress +> **Status:** In Progress — 32 of 269 source modules migrated (~12%), plus 14 stale `.js` counterparts to delete **Goal:** Migrate the codebase from plain JavaScript to TypeScript, leveraging the clean module boundaries established in Phase 3. Incremental module-by-module migration starting from leaf modules inward. **Why after Phase 4:** The resolution accuracy work (Phase 4) operates on the existing JS codebase and produces immediate accuracy gains. TypeScript migration builds on Phase 3's clean module boundaries to add type safety across the entire codebase. Every subsequent phase benefits from types: MCP schema auto-generation, API contracts, refactoring safety. The Phase 4 resolution improvements (receiver tracking, interface edges) establish the resolution model that TypeScript types will formalize. +**Note:** File paths below reflect the post-Phase 3 directory structure. Migration has progressed non-linearly — some orchestration modules were migrated before all leaf/core modules were complete. `.js` and `.ts` coexist during migration (`allowJs: true` in tsconfig). 14 already-migrated modules still have stale `.js` counterparts that need deletion (see cleanup note at the end of this section). + +**File counts (as of March 2026):** 32 `.ts` modules in `src/`, 237 `.js`-only files needing migration, 14 stale `.js` duplicates of already-migrated `.ts` files needing deletion. Remaining by step: 5.3 = 8, 5.4 = 54, 5.5 = 175 (total = 237). + ### ~~5.1 -- Project Setup~~ ✅ TypeScript project configured with strict mode, ES module output, path aliases, incremental compilation, and `dist/` build output with source maps. Biome configured for `.ts` files. `package.json` `exports` point to compiled output. @@ -1108,50 +1112,106 @@ Comprehensive TypeScript type definitions for the entire domain model — symbol **New file:** `src/types.ts` ([#516](https://github.com/optave/codegraph/pull/516)) -### 5.3 -- Leaf Module Migration +### 5.3 -- Leaf Module Migration (In Progress) -Migrate modules with no internal dependencies first: +Migrate modules with no or minimal internal dependencies. + +**Migrated:** | Module | Notes | |--------|-------| -| `src/errors.ts` | Domain error hierarchy (Phase 3.7) | -| `src/logger.ts` | Minimal, no internal deps | -| `src/constants.ts` | Pure data | -| `src/config.ts` | Config types derived from `.codegraphrc.json` schema | -| `src/db/connection.ts` | SQLite connection wrapper | -| `src/db/migrations.ts` | Schema version management | -| `src/formatters/*.ts` | Pure input->string transforms | -| `src/paginate.ts` | Generic pagination helpers | +| `src/shared/errors.ts` | Domain error hierarchy (Phase 3.7) | +| `src/shared/kinds.ts` | Symbol and edge kind constants | +| `src/shared/normalize.ts` | Symbol name normalization | +| `src/shared/paginate.ts` | Generic pagination helpers | +| `src/infrastructure/logger.ts` | Structured logging | +| `src/infrastructure/result-formatter.ts` | JSON/NDJSON output formatting | +| `src/infrastructure/test-filter.ts` | Test file detection heuristics | +| `src/presentation/colors.ts` | ANSI color constants | +| `src/presentation/table.ts` | CLI table formatting | + +**Remaining:** -Allow `.js` and `.ts` to coexist during migration (`allowJs: true` in tsconfig). +| Module | Notes | +|--------|-------| +| `src/shared/constants.js` | `EXTENSIONS`, `IGNORE_DIRS` constants | +| `src/shared/file-utils.js` | File path utilities | +| `src/shared/generators.js` | Generator/async iterator helpers | +| `src/shared/hierarchy.js` | Hierarchy traversal helpers | +| `src/infrastructure/config.js` | Config loading, env overrides, secret resolution | +| `src/infrastructure/native.js` | Native napi-rs addon loader with WASM fallback | +| `src/infrastructure/registry.js` | Global repo registry for multi-repo MCP | +| `src/infrastructure/update-check.js` | npm update availability check | + +### 5.4 -- Core Module Migration (In Progress) -### 5.4 -- Core Module Migration +Migrate modules that implement domain logic and Phase 3 interfaces. -Migrate modules that implement Phase 3 interfaces: +**Migrated:** | Module | Key types | |--------|-----------| -| `src/db/repository.ts` | `Repository` interface, all prepared statements typed | -| `src/parser/engine.ts` | `Engine` interface, native/WASM dispatch | -| `src/parser/registry.ts` | `LanguageEntry` type, extension mapping | -| `src/parser/tree-utils.ts` | Tree-sitter node helpers | -| `src/parser/base-extractor.ts` | `Extractor` interface, handler map | -| `src/parser/extractors/*.ts` | Per-language extractors | -| `src/analysis/*.ts` | Typed analysis results (impact scores, call chains) | -| `src/resolve.ts` | Import resolution with confidence types | - -### 5.5 -- Orchestration & Public API Migration - -Migrate top-level orchestration and entry points: +| `src/graph/model.ts` | `CodeGraph` class, unified graph model | +| `src/graph/algorithms/bfs.ts` | Breadth-first search traversal | +| `src/graph/algorithms/centrality.ts` | Centrality metrics (degree, betweenness) | +| `src/graph/algorithms/shortest-path.ts` | Shortest path between symbols | +| `src/graph/algorithms/tarjan.ts` | Tarjan SCC (cycle detection) | +| `src/graph/algorithms/leiden/rng.ts` | Random number generator for Leiden | +| `src/graph/classifiers/risk.ts` | Risk scoring classifier | +| `src/graph/classifiers/roles.ts` | Symbol role classifier | +| `src/domain/graph/resolve.ts` | Import resolution with confidence types | + +**Remaining (54 files):** + +| Module | Files | Key types | +|--------|-------|-----------| +| `src/db/` | 18 | `Repository` interface, SQLite connection, migrations, query builder, all repository modules | +| `src/domain/parser.js` | 1 | `Engine` interface, tree-sitter WASM wrapper, `LANGUAGE_REGISTRY` | +| `src/domain/queries.js` | 1 | Query functions: symbol search, file deps, impact analysis, diff-impact | +| `src/domain/analysis/` | 9 | Analysis results (context, impact, dependencies, exports, roles, etc.) | +| `src/extractors/` | 11 | Per-language extractors (JS, TS, Go, Rust, Java, C#, PHP, Ruby, Python, HCL) + helpers + barrel | +| `src/graph/algorithms/` | 8 | Louvain, Leiden (6 files: adapter, CPM, index, modularity, optimiser, partition), algorithms barrel | +| `src/graph/builders/` | 4 | Dependency, structure, temporal graph builders + barrel | +| `src/graph/classifiers/index.js` + `src/graph/index.js` | 2 | Barrel exports | + +### 5.5 -- Orchestration & Public API Migration (In Progress) + +Migrate top-level orchestration, features, and entry points. + +**Migrated:** | Module | Notes | |--------|-------| -| `src/builder.ts` | Pipeline stages with typed `PipelineStage` | -| `src/watcher.ts` | File system events + pipeline | -| `src/embeddings/*.ts` | Vector store interface, model registry | -| `src/mcp/*.ts` | Tool schemas, typed handlers | -| `src/cli/*.ts` | Command objects with typed options | -| `src/index.ts` | Curated public API with proper export types | +| `src/domain/graph/builder.ts` | Graph build orchestrator | +| `src/domain/graph/builder/context.ts` | Build context (options, state) | +| `src/domain/graph/builder/helpers.ts` | Builder utility functions | +| `src/domain/graph/builder/pipeline.ts` | Pipeline stage definitions | +| `src/domain/graph/watcher.ts` | File system events + rebuild triggers | +| `src/domain/search/generator.ts` | Embedding vector generation | +| `src/domain/search/index.ts` | Search module entry point | +| `src/domain/search/models.ts` | Model management | +| `src/mcp/index.ts` | MCP server entry point | +| `src/mcp/middleware.ts` | MCP middleware layer | +| `src/mcp/server.ts` | MCP server implementation | +| `src/mcp/tool-registry.ts` | Dynamic tool list builder | +| `src/features/export.ts` | Graph export orchestration | + +**Remaining (175 files):** + +| Module | Files | Notes | +|--------|-------|-------| +| `src/cli.js` + `src/cli/` | 48 | Commander CLI entry point, 43 command handlers (`commands/`), barrel, shared CLI utilities (`shared/`: open-graph, options, output) | +| `src/index.js` | 1 | Curated public API exports | +| `src/features/` | 20 | ast, audit, batch, boundaries, branch-compare, cfg, check, cochange, communities, complexity, dataflow, flow, graph-enrichment, manifesto, owners, sequence, snapshot, structure, triage, `shared/find-nodes` | +| `src/presentation/` | 28 | All presentation formatters (14 files), `queries-cli/` (7 files), result-formatter, sequence-renderer, viewer, query, export, flow, brief | +| `src/mcp/tools/` | 36 | Individual MCP tool handlers + barrel | +| `src/domain/graph/builder/stages/` | 9 | Build pipeline stages (collect-files, parse-files, resolve-imports, etc.) | +| `src/domain/graph/builder/incremental.js` | 1 | Incremental rebuild logic | +| `src/domain/graph/` | 3 | `cycles.js`, `journal.js`, `change-journal.js` | +| `src/domain/search/` | 11 | Search subsystem: `search/` (6 files), `stores/` (2 files), `strategies/` (3 files) | +| `src/ast-analysis/` | 18 | AST analysis framework, visitors, language-specific rules | + +**JS counterpart cleanup (14 files to delete):** The following `.js` files are stale counterparts of already-migrated `.ts` files and should be deleted once all consumers import from `.ts`: `domain/graph/builder.js`, `domain/graph/builder/{context,helpers,pipeline}.js`, `domain/graph/resolve.js`, `domain/graph/watcher.js`, `domain/search/{generator,index,models}.js`, `features/export.js`, `mcp/{index,middleware,server,tool-registry}.js` ### 5.6 -- Test Migration @@ -1162,7 +1222,7 @@ Migrate top-level orchestration and entry points: **Verification:** All existing tests pass. `tsc --noEmit` succeeds with zero errors. No `any` escape hatches except at FFI boundaries (napi-rs addon, tree-sitter WASM). -**Affected files:** All `src/**/*.js` -> `src/**/*.ts`, all `tests/**/*.js` -> `tests/**/*.ts`, `package.json`, `biome.json` +**Affected files:** All remaining `src/**/*.js` → `src/**/*.ts`, all `tests/**/*.js` → `tests/**/*.ts`, `package.json`, `biome.json` --- From 5bda6bae7ff30242a9fb0f7144580382d2305a53 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 22 Mar 2026 08:48:50 -0600 Subject: [PATCH 14/33] fix: deps-audit success path should keep npm changes, not revert (#565) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When STASH_CREATED=1 and tests pass, the npm audit fix changes are good — no action needed. Previously it ran git checkout to discard them, which undid the successful fix. --- .claude/skills/deps-audit/SKILL.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.claude/skills/deps-audit/SKILL.md b/.claude/skills/deps-audit/SKILL.md index d4c672af..ccd7e6c4 100644 --- a/.claude/skills/deps-audit/SKILL.md +++ b/.claude/skills/deps-audit/SKILL.md @@ -160,8 +160,8 @@ If `AUTO_FIX` was set: Summarize all changes made: 1. List each package updated/fixed 2. Run `npm test` to verify nothing broke -3. If tests pass and `STASH_CREATED` is `0`: drop the saved state (`git stash drop`) - If tests pass and `STASH_CREATED` is `1`: discard manifest changes with `git checkout -- package.json package-lock.json` (no stash entry exists) +3. If tests pass and `STASH_CREATED` is `0`: drop the saved state (`git stash drop`) — the npm changes are good, no rollback needed + If tests pass and `STASH_CREATED` is `1`: no action needed — the npm changes are good and no stash entry exists to clean up 4. If tests fail and `STASH_CREATED` is `0`: - Restore the saved manifests: `git stash pop` - Restore `node_modules/` to match the reverted lock file: `npm ci` From bd0ba1a39a8aad1c4709058b450eeeee20d27186 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 22 Mar 2026 08:49:00 -0600 Subject: [PATCH 15/33] fix: bench-check use git add + diff --cached to detect new files (#565) git diff --quiet ignores untracked files, so on the first run when baseline.json and history.ndjson are newly created, the commit was skipped. Stage first with git add, then check with --cached. --- .claude/skills/bench-check/SKILL.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.claude/skills/bench-check/SKILL.md b/.claude/skills/bench-check/SKILL.md index cb12ab92..16a0d4a9 100644 --- a/.claude/skills/bench-check/SKILL.md +++ b/.claude/skills/bench-check/SKILL.md @@ -196,9 +196,12 @@ This creates a running log of benchmark results over time. After writing both files, commit the baseline so it is a shared reference point: ```bash -git diff --quiet generated/bench-check/baseline.json generated/bench-check/history.ndjson || git commit generated/bench-check/baseline.json generated/bench-check/history.ndjson -m "chore: update bench-check baseline ()" +git add generated/bench-check/baseline.json generated/bench-check/history.ndjson +git diff --cached --quiet || git commit generated/bench-check/baseline.json generated/bench-check/history.ndjson -m "chore: update bench-check baseline ()" ``` +> `git add` first so that newly created files (first run) are staged; `--cached` then detects them correctly. Without this, `git diff --quiet` ignores untracked files and the baseline is never committed on the first run. + ## Phase 6 — Report Write a human-readable report to `generated/bench-check/BENCH_REPORT_.md`: From 7b91e3c524701e52902da50c673a6eee09e6aca1 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 22 Mar 2026 08:49:10 -0600 Subject: [PATCH 16/33] fix: housekeep require confirmation before branch deletion (#565) Branch deletion now asks for user confirmation before each delete, consistent with worktree removal in Phase 1c. --- .claude/skills/housekeep/SKILL.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.claude/skills/housekeep/SKILL.md b/.claude/skills/housekeep/SKILL.md index e0659672..0a086fa6 100644 --- a/.claude/skills/housekeep/SKILL.md +++ b/.claude/skills/housekeep/SKILL.md @@ -157,12 +157,17 @@ This removes local refs to branches that no longer exist on the remote. **If `DRY_RUN`:** List branches that would be deleted. -**Otherwise:** Delete merged branches: +**Otherwise:** For each merged branch, ask the user for confirmation before deleting: +``` +Delete merged branch ''? (y/n) +``` +If confirmed, delete the branch: ```bash git branch -d # safe delete, only if fully merged ``` > **Never use `git branch -D`** (force delete). If `-d` fails, the branch has unmerged work — skip it. +> **Always confirm before deleting** — consistent with worktree removal in Phase 1c. ## Phase 5 — Update Codegraph From 5462d32e905a146aab333753eace0bae752a08aa Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 22 Mar 2026 19:14:52 -0600 Subject: [PATCH 17/33] fix: scope git diff --cached to bench-check files only (#565) --- .claude/skills/bench-check/SKILL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude/skills/bench-check/SKILL.md b/.claude/skills/bench-check/SKILL.md index 16a0d4a9..9ba48309 100644 --- a/.claude/skills/bench-check/SKILL.md +++ b/.claude/skills/bench-check/SKILL.md @@ -197,7 +197,7 @@ This creates a running log of benchmark results over time. After writing both files, commit the baseline so it is a shared reference point: ```bash git add generated/bench-check/baseline.json generated/bench-check/history.ndjson -git diff --cached --quiet || git commit generated/bench-check/baseline.json generated/bench-check/history.ndjson -m "chore: update bench-check baseline ()" +git diff --cached --quiet -- generated/bench-check/baseline.json generated/bench-check/history.ndjson || git commit generated/bench-check/baseline.json generated/bench-check/history.ndjson -m "chore: update bench-check baseline ()" ``` > `git add` first so that newly created files (first run) are staged; `--cached` then detects them correctly. Without this, `git diff --quiet` ignores untracked files and the baseline is never committed on the first run. From 457e6b9dfe195a793b52167ba809c46aec57aa19 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 22 Mar 2026 19:14:56 -0600 Subject: [PATCH 18/33] fix: use json-summary reporter to match coverage-summary.json output (#565) --- .claude/skills/test-health/SKILL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude/skills/test-health/SKILL.md b/.claude/skills/test-health/SKILL.md index 0cc0bc1f..e9d57d2e 100644 --- a/.claude/skills/test-health/SKILL.md +++ b/.claude/skills/test-health/SKILL.md @@ -107,7 +107,7 @@ Search for duplicate test descriptions within the same `describe` block — thes Run vitest with coverage and analyze: ```bash -npx vitest run --coverage --coverage.reporter=json 2>&1 +npx vitest run --coverage --coverage.reporter=json-summary 2>&1 ``` ### 3a. Overall coverage From 852003dfde920059dc940792c9067e2495f2f77d Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 22 Mar 2026 19:15:01 -0600 Subject: [PATCH 19/33] fix: capture stash ref by name to avoid position-based targeting (#565) --- .claude/skills/deps-audit/SKILL.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.claude/skills/deps-audit/SKILL.md b/.claude/skills/deps-audit/SKILL.md index ccd7e6c4..83cc0481 100644 --- a/.claude/skills/deps-audit/SKILL.md +++ b/.claude/skills/deps-audit/SKILL.md @@ -25,6 +25,11 @@ Audit the project's dependency tree for security vulnerabilities, outdated packa STASH_CREATED=$? ``` Track `STASH_CREATED` — when `0`, a stash entry was actually created; when `1`, the files had no changes so nothing was stashed. + If `STASH_CREATED` is `0`, immediately capture the stash ref for later use: + ```bash + STASH_REF=$(git stash list --format='%gd %s' | grep 'deps-audit-backup' | head -1 | awk '{print $1}') + ``` + Use `$STASH_REF` (not `stash@{0}`) in all later stash drop/pop commands to avoid targeting the wrong entry if other stashes are pushed in the interim. ## Phase 1 — Security Vulnerabilities @@ -160,10 +165,10 @@ If `AUTO_FIX` was set: Summarize all changes made: 1. List each package updated/fixed 2. Run `npm test` to verify nothing broke -3. If tests pass and `STASH_CREATED` is `0`: drop the saved state (`git stash drop`) — the npm changes are good, no rollback needed +3. If tests pass and `STASH_CREATED` is `0`: drop the saved state (`git stash drop $STASH_REF`) — the npm changes are good, no rollback needed If tests pass and `STASH_CREATED` is `1`: no action needed — the npm changes are good and no stash entry exists to clean up 4. If tests fail and `STASH_CREATED` is `0`: - - Restore the saved manifests: `git stash pop` + - Restore the saved manifests: `git stash pop $STASH_REF` - Restore `node_modules/` to match the reverted lock file: `npm ci` - Report what failed 5. If tests fail and `STASH_CREATED` is `1`: @@ -176,6 +181,6 @@ Summarize all changes made: - **Never run `npm audit fix --force`** — breaking changes need human review - **Never remove a dependency** without asking the user, even if it appears unused — flag it in the report instead - **Always run tests** after any auto-fix changes -- **If `--fix` causes test failures**, restore manifests from the saved state (git stash pop if `STASH_CREATED=0`, or `git checkout` if stash was a no-op) then run `npm ci` to resync `node_modules/`, and report the failure +- **If `--fix` causes test failures**, restore manifests from the saved state (`git stash pop $STASH_REF` if `STASH_CREATED=0`, or `git checkout` if stash was a no-op) then run `npm ci` to resync `node_modules/`, and report the failure - Treat `optionalDependencies` separately — they're expected to fail on some platforms - The report goes in `generated/deps-audit/` — create the directory if it doesn't exist From eea2954bdeceaa594a9599cb3fe037d692cfcb7a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 22 Mar 2026 19:15:06 -0600 Subject: [PATCH 20/33] fix: remove unreachable Phase 5 subphases since source-repo guard always skips (#565) --- .claude/skills/housekeep/SKILL.md | 35 ------------------------------- 1 file changed, 35 deletions(-) diff --git a/.claude/skills/housekeep/SKILL.md b/.claude/skills/housekeep/SKILL.md index 0a086fa6..c4c7cca0 100644 --- a/.claude/skills/housekeep/SKILL.md +++ b/.claude/skills/housekeep/SKILL.md @@ -176,41 +176,6 @@ git branch -d # safe delete, only if fully merged > **Source-repo guard:** This phase is only meaningful when codegraph is installed as a *dependency* of a consumer project. Because the pre-flight confirms we are inside the codegraph *source* repo (`"name": "@optave/codegraph"`), comparing the dev version to the published release and running `npm install` would be a no-op — codegraph is not one of its own dependencies. **Skip this entire phase** when running inside the source repo and print: > `Codegraph: skipped (running inside source repo — update via git pull / branch sync instead)` -### 5a. Check current version - -```bash -node -e "console.log(require('./package.json').version)" -``` - -### 5b. Check latest published version - -```bash -npm view @optave/codegraph version -``` - -### 5c. Update if needed - -If a newer version is available: -- Show the version diff (current → latest) -- Check the CHANGELOG for what changed -- If it's a patch/minor: update automatically - ```bash - npm install - ``` -- If it's a major: warn the user and ask for confirmation - -### 5d. Rebuild - -After any update: -```bash -npm install -``` - -Verify the build works: -```bash -npx codegraph stats 2>/dev/null && echo "OK" || echo "FAILED" -``` - ## Phase 6 — Verify Repo Health Quick health checks to catch issues: From 9b4869c47c8afcf40cf2084888779807f6737cfc Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 22 Mar 2026 19:31:28 -0600 Subject: [PATCH 21/33] fix: use dynamic threshold variable in bench-check Phase 6 report template (#565) --- .claude/skills/bench-check/SKILL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude/skills/bench-check/SKILL.md b/.claude/skills/bench-check/SKILL.md index 9ba48309..e1926569 100644 --- a/.claude/skills/bench-check/SKILL.md +++ b/.claude/skills/bench-check/SKILL.md @@ -209,7 +209,7 @@ Write a human-readable report to `generated/bench-check/BENCH_REPORT_.md`: ```markdown # Benchmark Report — -**Version:** X.Y.Z | **Git ref:** abc1234 | **Threshold:** 15% +**Version:** X.Y.Z | **Git ref:** abc1234 | **Threshold:** $THRESHOLD% ## Verdict: PASSED / FAILED From 8d92c999cf344344149a0e5c73e667b7217ab409 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 22 Mar 2026 21:32:34 -0600 Subject: [PATCH 22/33] fix: address open review items in maintenance skills (#565) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - bench-check: add timeout 300 wrappers to all 4 benchmark invocations with exit code 124 check for timeout detection - bench-check: add explicit COMPARE_ONLY guard at Phase 5 entry - housekeep: fix grep portability — use grep -cE instead of GNU \| syntax - test-health: add timeout 180 wrapper in flaky detection loop - test-health: fix find command -o precedence with grouping parentheses --- .claude/skills/bench-check/SKILL.md | 16 +++++++++++----- .claude/skills/housekeep/SKILL.md | 2 +- .claude/skills/test-health/SKILL.md | 9 ++++++--- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/.claude/skills/bench-check/SKILL.md b/.claude/skills/bench-check/SKILL.md index e1926569..362b1db0 100644 --- a/.claude/skills/bench-check/SKILL.md +++ b/.claude/skills/bench-check/SKILL.md @@ -39,10 +39,11 @@ Run each benchmark script and collect results. Each script outputs JSON to stdou ### 1a. Build & Query Benchmark ```bash -output=$(node scripts/benchmark.js 2>&1) +output=$(timeout 300 node scripts/benchmark.js 2>&1) exit_code=$? ``` +If `exit_code` is 124: record `"timeout"` for this suite and continue. If `exit_code` is non-zero: record `"error: $output"` for this suite and continue. Extract: @@ -53,10 +54,11 @@ Extract: ### 1b. Incremental Benchmark ```bash -output=$(node scripts/incremental-benchmark.js 2>&1) +output=$(timeout 300 node scripts/incremental-benchmark.js 2>&1) exit_code=$? ``` +If `exit_code` is 124: record `"timeout"` for this suite and continue. If `exit_code` is non-zero: record `"error: $output"` for this suite and continue. Extract: @@ -67,10 +69,11 @@ Extract: ### 1c. Query Depth Benchmark ```bash -output=$(node scripts/query-benchmark.js 2>&1) +output=$(timeout 300 node scripts/query-benchmark.js 2>&1) exit_code=$? ``` +If `exit_code` is 124: record `"timeout"` for this suite and continue. If `exit_code` is non-zero: record `"error: $output"` for this suite and continue. Extract: @@ -81,17 +84,18 @@ Extract: ### 1d. Embedding Benchmark (optional) ```bash -output=$(node scripts/embedding-benchmark.js 2>&1) +output=$(timeout 300 node scripts/embedding-benchmark.js 2>&1) exit_code=$? ``` +If `exit_code` is 124: record `"timeout"` for this suite and continue. If `exit_code` is non-zero: record `"error: $output"` for this suite and continue. Extract: - `embeddingTime` (ms) - `recall` at Hit@1, Hit@3, Hit@5, Hit@10 -> **Timeout:** Each benchmark gets 5 minutes max. If it times out, record `"timeout"` for that suite and continue. +> **Timeout:** Each benchmark gets 5 minutes max (`timeout 300`). Exit code 124 indicates timeout — record `"timeout"` for that suite and continue. > **Errors:** If a benchmark script fails (non-zero exit), record `"error: "` and continue with remaining benchmarks. @@ -174,6 +178,8 @@ Based on comparison results: ## Phase 5 — Save Baseline +**Skip this phase if `COMPARE_ONLY` is set.** Compare-only mode never writes or commits baselines. + When saving (initial run, `--save-baseline`, or passed comparison): Write to `generated/bench-check/baseline.json`: diff --git a/.claude/skills/housekeep/SKILL.md b/.claude/skills/housekeep/SKILL.md index c4c7cca0..0c8c0d6b 100644 --- a/.claude/skills/housekeep/SKILL.md +++ b/.claude/skills/housekeep/SKILL.md @@ -194,7 +194,7 @@ npx codegraph build ### 6b. Node modules integrity ```bash -npm ls --depth=0 2>&1 | grep -c "missing\|invalid\|WARN" +npm ls --depth=0 2>&1 | grep -cE "missing|invalid|WARN" ``` If issues found: `npm install` to fix. diff --git a/.claude/skills/test-health/SKILL.md b/.claude/skills/test-health/SKILL.md index e9d57d2e..bb66790e 100644 --- a/.claude/skills/test-health/SKILL.md +++ b/.claude/skills/test-health/SKILL.md @@ -27,7 +27,7 @@ Audit the test suite for flaky tests, dead/trivial tests, coverage gaps on recen - `QUICK=true` if `--quick` 4. Discover all test files: ```bash - find tests/ -name '*.test.js' -o -name '*.test.ts' | sort + find tests/ \( -name '*.test.js' -o -name '*.test.ts' \) | sort ``` 5. Count total test files and categorize by directory (integration, parsers, graph, search, unit) @@ -40,7 +40,10 @@ Run the full test suite `FLAKY_RUNS` times and track per-test pass/fail: ```bash RUN_DIR=$(mktemp -d /tmp/test-health-XXXXXX) for i in $(seq 1 $FLAKY_RUNS); do - npx vitest run --reporter=json > "$RUN_DIR/run-$i.json" 2>"$RUN_DIR/run-$i.err" + timeout 180 npx vitest run --reporter=json > "$RUN_DIR/run-$i.json" 2>"$RUN_DIR/run-$i.err" + if [ $? -eq 124 ]; then + echo '{"timeout":true}' > "$RUN_DIR/run-$i.json" + fi done ``` @@ -63,7 +66,7 @@ For each flaky test found: - **Resource-dependent**: mentions file system, network, port, or temp directory - **Non-deterministic**: random/Date.now/Math.random in test or source -> **Timeout:** Each full suite run gets 3 minutes. If it times out, record partial results and continue. +> **Timeout:** Each full suite run gets 3 minutes (`timeout 180`). Exit code 124 indicates timeout — the run is recorded as `{"timeout":true}` and the loop continues. ## Phase 2 — Dead & Trivial Test Detection From 9ad37ea1117be6ffd8ed72a9ae43f961dd62a546 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 22 Mar 2026 23:25:11 -0600 Subject: [PATCH 23/33] fix: add COVERAGE_ONLY guards to Phase 2 and Phase 4 in test-health --- .claude/skills/test-health/SKILL.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.claude/skills/test-health/SKILL.md b/.claude/skills/test-health/SKILL.md index bb66790e..4c836586 100644 --- a/.claude/skills/test-health/SKILL.md +++ b/.claude/skills/test-health/SKILL.md @@ -70,6 +70,8 @@ For each flaky test found: ## Phase 2 — Dead & Trivial Test Detection +**Skip if `COVERAGE_ONLY` is set.** + Scan all test files for problematic patterns: ### 2a. Empty / no-assertion tests @@ -149,6 +151,8 @@ For each changed source file, check if: ## Phase 4 — Test Structure Analysis +**Skip if `COVERAGE_ONLY` is set.** + Analyze the test suite's structural health: ### 4a. Test-to-source mapping From 30ab30e64f5db5eacb0a608676e58ad0ddfe498c Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 23 Mar 2026 01:38:03 -0600 Subject: [PATCH 24/33] fix: add regression skip guard to bench-check Phase 5, expand deps-audit search dirs --- .claude/skills/bench-check/SKILL.md | 1 + .claude/skills/deps-audit/SKILL.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.claude/skills/bench-check/SKILL.md b/.claude/skills/bench-check/SKILL.md index 362b1db0..70b6d70b 100644 --- a/.claude/skills/bench-check/SKILL.md +++ b/.claude/skills/bench-check/SKILL.md @@ -179,6 +179,7 @@ Based on comparison results: ## Phase 5 — Save Baseline **Skip this phase if `COMPARE_ONLY` is set.** Compare-only mode never writes or commits baselines. +**Skip this phase if regressions were detected in Phase 4.** The baseline is only updated on a clean run. When saving (initial run, `--save-baseline`, or passed comparison): diff --git a/.claude/skills/deps-audit/SKILL.md b/.claude/skills/deps-audit/SKILL.md index 83cc0481..083a8c77 100644 --- a/.claude/skills/deps-audit/SKILL.md +++ b/.claude/skills/deps-audit/SKILL.md @@ -74,7 +74,7 @@ For any package classified as **Abandoned**, check if there's a maintained fork Detect dependencies declared in `package.json` but never imported: 1. Read `dependencies` and `devDependencies` from `package.json` -2. For each dependency, search for imports/requires across `src/`, `tests/`, `scripts/`, `cli.js`, `index.js`: +2. For each dependency, search for imports/requires across `src/`, `tests/`, `scripts/`, `mcp/`, `graph/`, `ast-analysis/`, `cli.js`, `index.js`: - `require('')` or `require('/...')` - `import ... from ''` or `import ''` - `import('')` (dynamic imports) From a8631d2a9522dd24e29cb169c2f927a48b056427 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 23 Mar 2026 02:26:00 -0600 Subject: [PATCH 25/33] fix: add empty-string guard for stat size check in housekeep (#565) When both stat variants (GNU and BSD) fail, $size is empty and the arithmetic comparison errors out. Add a [ -z "$size" ] && continue guard so the loop skips files whose size cannot be determined. --- .claude/skills/housekeep/SKILL.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.claude/skills/housekeep/SKILL.md b/.claude/skills/housekeep/SKILL.md index 0c8c0d6b..ef15efb7 100644 --- a/.claude/skills/housekeep/SKILL.md +++ b/.claude/skills/housekeep/SKILL.md @@ -85,6 +85,7 @@ Find untracked files larger than 1MB: ```bash git ls-files --others --exclude-standard | while read f; do size=$(stat --format='%s' "$f" 2>/dev/null || stat -f '%z' "$f" 2>/dev/null) + [ -z "$size" ] && continue if [ "$size" -gt 1048576 ]; then echo "$f ($size bytes)"; fi done ``` From 8fd7430c0b167a57826f838c837238881fc25d43 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 23 Mar 2026 02:26:11 -0600 Subject: [PATCH 26/33] fix: add BASELINE SAVED verdict path and clarify if/else-if in bench-check (#565) Phase 6: when SAVE_ONLY or first-run (no prior baseline), write a shortened report with "Verdict: BASELINE SAVED" instead of the full comparison report. Phases 1a-1d: replace ambiguous "If timeout / If non-zero" with explicit "If timeout / Else if non-zero" so the two conditions are clearly mutually exclusive. --- .claude/skills/bench-check/SKILL.md | 34 +++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/.claude/skills/bench-check/SKILL.md b/.claude/skills/bench-check/SKILL.md index 70b6d70b..a6f474d3 100644 --- a/.claude/skills/bench-check/SKILL.md +++ b/.claude/skills/bench-check/SKILL.md @@ -43,8 +43,8 @@ output=$(timeout 300 node scripts/benchmark.js 2>&1) exit_code=$? ``` -If `exit_code` is 124: record `"timeout"` for this suite and continue. -If `exit_code` is non-zero: record `"error: $output"` for this suite and continue. +If `exit_code` is 124: record `"timeout"` for this suite and skip to the next suite. +Else if `exit_code` is non-zero: record `"error: $output"` for this suite and skip to the next suite. Extract: - `buildTime` (ms) — per engine (native, WASM) @@ -58,8 +58,8 @@ output=$(timeout 300 node scripts/incremental-benchmark.js 2>&1) exit_code=$? ``` -If `exit_code` is 124: record `"timeout"` for this suite and continue. -If `exit_code` is non-zero: record `"error: $output"` for this suite and continue. +If `exit_code` is 124: record `"timeout"` for this suite and skip to the next suite. +Else if `exit_code` is non-zero: record `"error: $output"` for this suite and skip to the next suite. Extract: - `noOpRebuild` (ms) — time for no-change rebuild @@ -73,8 +73,8 @@ output=$(timeout 300 node scripts/query-benchmark.js 2>&1) exit_code=$? ``` -If `exit_code` is 124: record `"timeout"` for this suite and continue. -If `exit_code` is non-zero: record `"error: $output"` for this suite and continue. +If `exit_code` is 124: record `"timeout"` for this suite and skip to the next suite. +Else if `exit_code` is non-zero: record `"error: $output"` for this suite and skip to the next suite. Extract: - `fnDeps` scaling by depth @@ -88,8 +88,8 @@ output=$(timeout 300 node scripts/embedding-benchmark.js 2>&1) exit_code=$? ``` -If `exit_code` is 124: record `"timeout"` for this suite and continue. -If `exit_code` is non-zero: record `"error: $output"` for this suite and continue. +If `exit_code` is 124: record `"timeout"` for this suite and skip to the next suite. +Else if `exit_code` is non-zero: record `"error: $output"` for this suite and skip to the next suite. Extract: - `embeddingTime` (ms) @@ -211,7 +211,23 @@ git diff --cached --quiet -- generated/bench-check/baseline.json generated/bench ## Phase 6 — Report -Write a human-readable report to `generated/bench-check/BENCH_REPORT_.md`: +Write a human-readable report to `generated/bench-check/BENCH_REPORT_.md`. + +**If `SAVE_ONLY` is set or no prior baseline existed (first run):** write a shortened report — omit the "Comparison vs Baseline" and "Regressions" sections since no comparison was performed: + +```markdown +# Benchmark Report — + +**Version:** X.Y.Z | **Git ref:** abc1234 | **Threshold:** $THRESHOLD% + +## Verdict: BASELINE SAVED — no comparison performed + +## Raw Results + + +``` + +**Otherwise (comparison was performed):** write the full report with comparison and verdict: ```markdown # Benchmark Report — From 23f2f76c3afa2690659cefc8927a34198ea7dc35 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 23 Mar 2026 02:26:49 -0600 Subject: [PATCH 27/33] docs(roadmap): mark Phase 4 complete, update Phase 5 progress (5 of 7) Phase 4 (Resolution Accuracy) had all 6 sub-phases merged but status still said "In Progress". Phase 5 (TypeScript Migration) had 5.3-5.5 merged via PRs #553, #554, #555, #566 but was listed with stale counts. Updated both to reflect actual state: Phase 4 complete, Phase 5 at 5/7 with 76 of 283 modules migrated (~27%). --- docs/roadmap/ROADMAP.md | 142 +++++++++++----------------------------- 1 file changed, 37 insertions(+), 105 deletions(-) diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index 06fdbc0e..3d6af3e4 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -17,8 +17,8 @@ Codegraph is a strong local-first code graph CLI. This roadmap describes planned | [**2.5**](#phase-25--analysis-expansion) | Analysis Expansion | Complexity metrics, community detection, flow tracing, co-change, manifesto, boundary rules, check, triage, audit, batch, hybrid search | **Complete** (v2.7.0) | | [**2.7**](#phase-27--deep-analysis--graph-enrichment) | Deep Analysis & Graph Enrichment | Dataflow analysis, intraprocedural CFG, AST node storage, expanded node/edge types, extractors refactoring, CLI consolidation, interactive viewer, exports command, normalizeSymbol | **Complete** (v3.0.0) | | [**3**](#phase-3--architectural-refactoring) | Architectural Refactoring (Vertical Slice) | Unified AST analysis framework, command/query separation, repository pattern, queries.js decomposition, composable MCP, CLI commands, domain errors, builder pipeline, presentation layer, domain grouping, curated API, unified graph model, qualified names, CLI composability | **Complete** (v3.1.5) | -| [**4**](#phase-4--resolution-accuracy) | Resolution Accuracy | Dead role sub-categories, receiver type tracking, interface/trait implementation edges, resolution precision/recall benchmarks, `package.json` exports field, monorepo workspace resolution | **In Progress** (5 of 6 complete) | -| [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration | **In Progress** (32 of 269 src modules migrated; 14 stale `.js` to delete) | +| [**4**](#phase-4--resolution-accuracy) | Resolution Accuracy | Dead role sub-categories, receiver type tracking, interface/trait implementation edges, resolution precision/recall benchmarks, `package.json` exports field, monorepo workspace resolution | **Complete** (v3.3.1) | +| [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration | **In Progress** (5 of 7 complete — 76 of 283 src modules migrated, ~27%) | | [**6**](#phase-6--native-analysis-acceleration) | Native Analysis Acceleration | Move JS-only build phases (AST nodes, CFG, dataflow, insert nodes, structure, roles, complexity) to Rust; fix incremental rebuild data loss on native; sub-100ms 1-file rebuilds | Planned | | [**7**](#phase-7--runtime--extensibility) | Runtime & Extensibility | Event-driven pipeline, unified engine strategy, subgraph export filtering, transitive confidence, query caching, configuration profiles, pagination, plugin system, DX & onboarding, confidence annotations, shell completion | Planned | | [**8**](#phase-8--intelligent-embeddings) | Intelligent Embeddings | LLM-generated descriptions, enhanced embeddings, build-time semantic metadata, module summaries | Planned | @@ -994,9 +994,9 @@ src/domain/ --- -## Phase 4 -- Resolution Accuracy +## Phase 4 -- Resolution Accuracy ✅ -> **Status:** In Progress +> **Status:** Complete -- all 6 sub-phases shipped across v3.2.0 → v3.3.1 **Goal:** Close the most impactful gaps in call graph accuracy before investing in type safety or native acceleration. The entire value proposition — blast radius, impact analysis, dependency chains — rests on the call graph. These targeted improvements make the graph trustworthy. @@ -1080,15 +1080,15 @@ npm workspaces (`package.json` `workspaces`), `pnpm-workspace.yaml`, and `lerna. ## Phase 5 -- TypeScript Migration -> **Status:** In Progress — 32 of 269 source modules migrated (~12%), plus 14 stale `.js` counterparts to delete +> **Status:** In Progress — 5 of 7 steps complete (76 of 283 source modules migrated, ~27%) **Goal:** Migrate the codebase from plain JavaScript to TypeScript, leveraging the clean module boundaries established in Phase 3. Incremental module-by-module migration starting from leaf modules inward. **Why after Phase 4:** The resolution accuracy work (Phase 4) operates on the existing JS codebase and produces immediate accuracy gains. TypeScript migration builds on Phase 3's clean module boundaries to add type safety across the entire codebase. Every subsequent phase benefits from types: MCP schema auto-generation, API contracts, refactoring safety. The Phase 4 resolution improvements (receiver tracking, interface edges) establish the resolution model that TypeScript types will formalize. -**Note:** File paths below reflect the post-Phase 3 directory structure. Migration has progressed non-linearly — some orchestration modules were migrated before all leaf/core modules were complete. `.js` and `.ts` coexist during migration (`allowJs: true` in tsconfig). 14 already-migrated modules still have stale `.js` counterparts that need deletion (see cleanup note at the end of this section). +**Note:** File paths below reflect the post-Phase 3 directory structure. `.js` and `.ts` coexist during migration (`allowJs: true` in tsconfig). Steps 5.3–5.5 completed across PRs #553, #554, #555, #566. Remaining work: test migration (5.6) and remaining `.js` source files (~207 files). -**File counts (as of March 2026):** 32 `.ts` modules in `src/`, 237 `.js`-only files needing migration, 14 stale `.js` duplicates of already-migrated `.ts` files needing deletion. Remaining by step: 5.3 = 8, 5.4 = 54, 5.5 = 175 (total = 237). +**File counts (as of March 2026):** 76 `.ts` modules in `src/`, ~207 `.js` files remaining. Steps 5.1–5.5 complete. ### ~~5.1 -- Project Setup~~ ✅ @@ -1112,104 +1112,36 @@ Comprehensive TypeScript type definitions for the entire domain model — symbol **New file:** `src/types.ts` ([#516](https://github.com/optave/codegraph/pull/516)) -### 5.3 -- Leaf Module Migration (In Progress) - -Migrate modules with no or minimal internal dependencies. - -**Migrated:** - -| Module | Notes | -|--------|-------| -| `src/shared/errors.ts` | Domain error hierarchy (Phase 3.7) | -| `src/shared/kinds.ts` | Symbol and edge kind constants | -| `src/shared/normalize.ts` | Symbol name normalization | -| `src/shared/paginate.ts` | Generic pagination helpers | -| `src/infrastructure/logger.ts` | Structured logging | -| `src/infrastructure/result-formatter.ts` | JSON/NDJSON output formatting | -| `src/infrastructure/test-filter.ts` | Test file detection heuristics | -| `src/presentation/colors.ts` | ANSI color constants | -| `src/presentation/table.ts` | CLI table formatting | - -**Remaining:** - -| Module | Notes | -|--------|-------| -| `src/shared/constants.js` | `EXTENSIONS`, `IGNORE_DIRS` constants | -| `src/shared/file-utils.js` | File path utilities | -| `src/shared/generators.js` | Generator/async iterator helpers | -| `src/shared/hierarchy.js` | Hierarchy traversal helpers | -| `src/infrastructure/config.js` | Config loading, env overrides, secret resolution | -| `src/infrastructure/native.js` | Native napi-rs addon loader with WASM fallback | -| `src/infrastructure/registry.js` | Global repo registry for multi-repo MCP | -| `src/infrastructure/update-check.js` | npm update availability check | - -### 5.4 -- Core Module Migration (In Progress) - -Migrate modules that implement domain logic and Phase 3 interfaces. - -**Migrated:** - -| Module | Key types | -|--------|-----------| -| `src/graph/model.ts` | `CodeGraph` class, unified graph model | -| `src/graph/algorithms/bfs.ts` | Breadth-first search traversal | -| `src/graph/algorithms/centrality.ts` | Centrality metrics (degree, betweenness) | -| `src/graph/algorithms/shortest-path.ts` | Shortest path between symbols | -| `src/graph/algorithms/tarjan.ts` | Tarjan SCC (cycle detection) | -| `src/graph/algorithms/leiden/rng.ts` | Random number generator for Leiden | -| `src/graph/classifiers/risk.ts` | Risk scoring classifier | -| `src/graph/classifiers/roles.ts` | Symbol role classifier | -| `src/domain/graph/resolve.ts` | Import resolution with confidence types | - -**Remaining (54 files):** - -| Module | Files | Key types | -|--------|-------|-----------| -| `src/db/` | 18 | `Repository` interface, SQLite connection, migrations, query builder, all repository modules | -| `src/domain/parser.js` | 1 | `Engine` interface, tree-sitter WASM wrapper, `LANGUAGE_REGISTRY` | -| `src/domain/queries.js` | 1 | Query functions: symbol search, file deps, impact analysis, diff-impact | -| `src/domain/analysis/` | 9 | Analysis results (context, impact, dependencies, exports, roles, etc.) | -| `src/extractors/` | 11 | Per-language extractors (JS, TS, Go, Rust, Java, C#, PHP, Ruby, Python, HCL) + helpers + barrel | -| `src/graph/algorithms/` | 8 | Louvain, Leiden (6 files: adapter, CPM, index, modularity, optimiser, partition), algorithms barrel | -| `src/graph/builders/` | 4 | Dependency, structure, temporal graph builders + barrel | -| `src/graph/classifiers/index.js` + `src/graph/index.js` | 2 | Barrel exports | - -### 5.5 -- Orchestration & Public API Migration (In Progress) - -Migrate top-level orchestration, features, and entry points. - -**Migrated:** - -| Module | Notes | -|--------|-------| -| `src/domain/graph/builder.ts` | Graph build orchestrator | -| `src/domain/graph/builder/context.ts` | Build context (options, state) | -| `src/domain/graph/builder/helpers.ts` | Builder utility functions | -| `src/domain/graph/builder/pipeline.ts` | Pipeline stage definitions | -| `src/domain/graph/watcher.ts` | File system events + rebuild triggers | -| `src/domain/search/generator.ts` | Embedding vector generation | -| `src/domain/search/index.ts` | Search module entry point | -| `src/domain/search/models.ts` | Model management | -| `src/mcp/index.ts` | MCP server entry point | -| `src/mcp/middleware.ts` | MCP middleware layer | -| `src/mcp/server.ts` | MCP server implementation | -| `src/mcp/tool-registry.ts` | Dynamic tool list builder | -| `src/features/export.ts` | Graph export orchestration | - -**Remaining (175 files):** - -| Module | Files | Notes | -|--------|-------|-------| -| `src/cli.js` + `src/cli/` | 48 | Commander CLI entry point, 43 command handlers (`commands/`), barrel, shared CLI utilities (`shared/`: open-graph, options, output) | -| `src/index.js` | 1 | Curated public API exports | -| `src/features/` | 20 | ast, audit, batch, boundaries, branch-compare, cfg, check, cochange, communities, complexity, dataflow, flow, graph-enrichment, manifesto, owners, sequence, snapshot, structure, triage, `shared/find-nodes` | -| `src/presentation/` | 28 | All presentation formatters (14 files), `queries-cli/` (7 files), result-formatter, sequence-renderer, viewer, query, export, flow, brief | -| `src/mcp/tools/` | 36 | Individual MCP tool handlers + barrel | -| `src/domain/graph/builder/stages/` | 9 | Build pipeline stages (collect-files, parse-files, resolve-imports, etc.) | -| `src/domain/graph/builder/incremental.js` | 1 | Incremental rebuild logic | -| `src/domain/graph/` | 3 | `cycles.js`, `journal.js`, `change-journal.js` | -| `src/domain/search/` | 11 | Search subsystem: `search/` (6 files), `stores/` (2 files), `strategies/` (3 files) | -| `src/ast-analysis/` | 18 | AST analysis framework, visitors, language-specific rules | +### ~~5.3 -- Leaf Module Migration~~ ✅ + +Migrated 25 leaf modules (no internal dependencies) from JavaScript to TypeScript in two waves: + +- ✅ Wave 1 (17 modules): `shared/errors`, `shared/kinds`, `shared/normalize`, `shared/paginate`, `infrastructure/logger`, `infrastructure/result-formatter`, `infrastructure/test-filter`, `db/index`, `domain/analysis/*` (context, dependencies, exports, impact, implementations, module-map, roles, symbol-lookup), `domain/graph/cycles`, `presentation/colors`, `presentation/table` ([#553](https://github.com/optave/codegraph/pull/553)) +- ✅ Wave 2 (8 modules): `shared/constants`, `shared/file-utils`, `shared/generators`, `shared/hierarchy`, `infrastructure/config`, `infrastructure/native`, `infrastructure/registry`, `infrastructure/update-check` ([#566](https://github.com/optave/codegraph/pull/566)) + +### ~~5.4 -- Core Module Migration~~ ✅ + +Migrated 54 core modules that implement Phase 3 interfaces — database repository, parser engine, language extractors, import resolution, graph builders, and analysis modules. + +- ✅ `db/repository/*.ts` — all prepared statements typed +- ✅ `domain/parser.ts`, `domain/graph/resolve.ts` — engine and resolution with confidence types +- ✅ `extractors/*.ts` — all 11 language extractors +- ✅ `domain/graph/builder/**/*.ts` — full build pipeline +- ✅ `graph/**/*.ts` — unified graph model, algorithms (Tarjan, Louvain, Leiden, BFS, centrality, shortest-path), classifiers (role, risk), builders + +([#554](https://github.com/optave/codegraph/pull/554)) + +### ~~5.5 -- Orchestration & Public API Migration~~ ✅ + +Migrated top-level orchestration and entry points — builder pipeline, watcher, embeddings subsystem, MCP server, CLI commands, and public API index. + +- ✅ `domain/graph/builder.ts`, `domain/graph/watcher.ts` — pipeline stages typed +- ✅ `domain/search/*.ts` — vector store, model registry, search modes +- ✅ `mcp/*.ts` — tool schemas, typed handlers +- ✅ `features/*.ts`, `presentation/*.ts` — feature modules and CLI formatters +- ✅ `index.ts` — curated public API with proper export types + +([#555](https://github.com/optave/codegraph/pull/555)) **JS counterpart cleanup (14 files to delete):** The following `.js` files are stale counterparts of already-migrated `.ts` files and should be deleted once all consumers import from `.ts`: `domain/graph/builder.js`, `domain/graph/builder/{context,helpers,pipeline}.js`, `domain/graph/resolve.js`, `domain/graph/watcher.js`, `domain/search/{generator,index,models}.js`, `features/export.js`, `mcp/{index,middleware,server,tool-registry}.js` From 2616c788d891aac17c3d20b290cca20eff39b264 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 23 Mar 2026 02:41:03 -0600 Subject: [PATCH 28/33] =?UTF-8?q?docs(roadmap):=20correct=20Phase=205=20pr?= =?UTF-8?q?ogress=20=E2=80=94=205.3/5.4/5.5=20still=20in=20progress?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous commit incorrectly marked 5.3-5.5 as complete. In reality 76 of 283 src files are .ts (~27%) while 207 remain .js (~73%). PRs #553, #554, #555, #566 migrated a first wave but left substantial work in each step: 4 leaf files, 39 core files, 159 orchestration files. Updated each step with accurate migrated/remaining counts. --- docs/roadmap/ROADMAP.md | 73 ++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index 3d6af3e4..4614c75e 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -18,7 +18,7 @@ Codegraph is a strong local-first code graph CLI. This roadmap describes planned | [**2.7**](#phase-27--deep-analysis--graph-enrichment) | Deep Analysis & Graph Enrichment | Dataflow analysis, intraprocedural CFG, AST node storage, expanded node/edge types, extractors refactoring, CLI consolidation, interactive viewer, exports command, normalizeSymbol | **Complete** (v3.0.0) | | [**3**](#phase-3--architectural-refactoring) | Architectural Refactoring (Vertical Slice) | Unified AST analysis framework, command/query separation, repository pattern, queries.js decomposition, composable MCP, CLI commands, domain errors, builder pipeline, presentation layer, domain grouping, curated API, unified graph model, qualified names, CLI composability | **Complete** (v3.1.5) | | [**4**](#phase-4--resolution-accuracy) | Resolution Accuracy | Dead role sub-categories, receiver type tracking, interface/trait implementation edges, resolution precision/recall benchmarks, `package.json` exports field, monorepo workspace resolution | **Complete** (v3.3.1) | -| [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration | **In Progress** (5 of 7 complete — 76 of 283 src modules migrated, ~27%) | +| [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration | **In Progress** (76 of 283 src files migrated, ~27%) | | [**6**](#phase-6--native-analysis-acceleration) | Native Analysis Acceleration | Move JS-only build phases (AST nodes, CFG, dataflow, insert nodes, structure, roles, complexity) to Rust; fix incremental rebuild data loss on native; sub-100ms 1-file rebuilds | Planned | | [**7**](#phase-7--runtime--extensibility) | Runtime & Extensibility | Event-driven pipeline, unified engine strategy, subgraph export filtering, transitive confidence, query caching, configuration profiles, pagination, plugin system, DX & onboarding, confidence annotations, shell completion | Planned | | [**8**](#phase-8--intelligent-embeddings) | Intelligent Embeddings | LLM-generated descriptions, enhanced embeddings, build-time semantic metadata, module summaries | Planned | @@ -1080,15 +1080,13 @@ npm workspaces (`package.json` `workspaces`), `pnpm-workspace.yaml`, and `lerna. ## Phase 5 -- TypeScript Migration -> **Status:** In Progress — 5 of 7 steps complete (76 of 283 source modules migrated, ~27%) +> **Status:** In Progress — 76 of 283 source files migrated (~27%), 207 `.js` files remaining **Goal:** Migrate the codebase from plain JavaScript to TypeScript, leveraging the clean module boundaries established in Phase 3. Incremental module-by-module migration starting from leaf modules inward. **Why after Phase 4:** The resolution accuracy work (Phase 4) operates on the existing JS codebase and produces immediate accuracy gains. TypeScript migration builds on Phase 3's clean module boundaries to add type safety across the entire codebase. Every subsequent phase benefits from types: MCP schema auto-generation, API contracts, refactoring safety. The Phase 4 resolution improvements (receiver tracking, interface edges) establish the resolution model that TypeScript types will formalize. -**Note:** File paths below reflect the post-Phase 3 directory structure. `.js` and `.ts` coexist during migration (`allowJs: true` in tsconfig). Steps 5.3–5.5 completed across PRs #553, #554, #555, #566. Remaining work: test migration (5.6) and remaining `.js` source files (~207 files). - -**File counts (as of March 2026):** 76 `.ts` modules in `src/`, ~207 `.js` files remaining. Steps 5.1–5.5 complete. +**Note:** `.js` and `.ts` coexist during migration (`allowJs: true` in tsconfig). PRs #553, #554, #555, #566 migrated a first wave of files across steps 5.3–5.5, but substantial work remains in each step. 13 stale `.js` files have `.ts` counterparts and need deletion. ### ~~5.1 -- Project Setup~~ ✅ @@ -1112,38 +1110,61 @@ Comprehensive TypeScript type definitions for the entire domain model — symbol **New file:** `src/types.ts` ([#516](https://github.com/optave/codegraph/pull/516)) -### ~~5.3 -- Leaf Module Migration~~ ✅ +### 5.3 -- Leaf Module Migration (In Progress) + +Migrate modules with no or minimal internal dependencies. 25 migrated, 4 remaining. + +**Migrated (25):** `shared/errors`, `shared/kinds`, `shared/normalize`, `shared/paginate`, `shared/constants`, `shared/file-utils`, `shared/generators`, `shared/hierarchy`, `infrastructure/logger`, `infrastructure/config`, `infrastructure/native`, `infrastructure/registry`, `infrastructure/update-check`, `infrastructure/result-formatter`, `infrastructure/test-filter`, `db/repository/*` (14 files), `domain/analysis/*` (9 files), `presentation/colors`, `presentation/table` — via [#553](https://github.com/optave/codegraph/pull/553), [#566](https://github.com/optave/codegraph/pull/566) + +**Remaining (4):** + +| Module | Notes | +|--------|-------| +| `src/db/connection.js` | SQLite connection wrapper | +| `src/db/index.js` | DB barrel/schema entry point | +| `src/db/migrations.js` | Schema version management | +| `src/db/query-builder.js` | Dynamic query builder | -Migrated 25 leaf modules (no internal dependencies) from JavaScript to TypeScript in two waves: +### 5.4 -- Core Module Migration (In Progress) -- ✅ Wave 1 (17 modules): `shared/errors`, `shared/kinds`, `shared/normalize`, `shared/paginate`, `infrastructure/logger`, `infrastructure/result-formatter`, `infrastructure/test-filter`, `db/index`, `domain/analysis/*` (context, dependencies, exports, impact, implementations, module-map, roles, symbol-lookup), `domain/graph/cycles`, `presentation/colors`, `presentation/table` ([#553](https://github.com/optave/codegraph/pull/553)) -- ✅ Wave 2 (8 modules): `shared/constants`, `shared/file-utils`, `shared/generators`, `shared/hierarchy`, `infrastructure/config`, `infrastructure/native`, `infrastructure/registry`, `infrastructure/update-check` ([#566](https://github.com/optave/codegraph/pull/566)) +Migrate modules that implement domain logic and Phase 3 interfaces. Some migrated via [#554](https://github.com/optave/codegraph/pull/554), 39 files remaining. -### ~~5.4 -- Core Module Migration~~ ✅ +**Migrated:** `db/repository/*.ts` (14 files), `domain/parser.ts`, `domain/graph/resolve.ts`, `extractors/*.ts` (11 files), `domain/graph/builder.ts` + `context.ts` + `helpers.ts` + `pipeline.ts`, `domain/graph/watcher.ts`, `domain/search/{generator,index,models}.ts`, `graph/model.ts`, `graph/algorithms/{bfs,centrality,shortest-path,tarjan}.ts`, `graph/algorithms/leiden/rng.ts`, `graph/classifiers/{risk,roles}.ts` -Migrated 54 core modules that implement Phase 3 interfaces — database repository, parser engine, language extractors, import resolution, graph builders, and analysis modules. +**Remaining (39):** -- ✅ `db/repository/*.ts` — all prepared statements typed -- ✅ `domain/parser.ts`, `domain/graph/resolve.ts` — engine and resolution with confidence types -- ✅ `extractors/*.ts` — all 11 language extractors -- ✅ `domain/graph/builder/**/*.ts` — full build pipeline -- ✅ `graph/**/*.ts` — unified graph model, algorithms (Tarjan, Louvain, Leiden, BFS, centrality, shortest-path), classifiers (role, risk), builders +| Module | Files | Notes | +|--------|-------|-------| +| `domain/graph/builder/stages/` | 9 | All 9 build pipeline stages (collect-files, parse-files, resolve-imports, build-edges, etc.) | +| `domain/graph/builder/incremental.js` | 1 | Incremental rebuild logic | +| `domain/graph/{cycles,journal,change-journal}.js` | 3 | Graph utilities | +| `domain/queries.js` | 1 | Core query functions | +| `domain/search/search/` | 6 | Search subsystem (hybrid, semantic, keyword, filters, cli-formatter, prepare) | +| `domain/search/stores/` | 2 | FTS5, SQLite blob stores | +| `domain/search/strategies/` | 3 | Source, structured, text-utils strategies | +| `graph/algorithms/leiden/` | 6 | Leiden community detection (adapter, CPM, modularity, optimiser, partition, index) | +| `graph/algorithms/{louvain,index}.js` | 2 | Louvain + algorithms barrel | +| `graph/builders/` | 4 | Dependency, structure, temporal builders + barrel | +| `graph/classifiers/index.js` + `graph/index.js` | 2 | Barrel exports | -([#554](https://github.com/optave/codegraph/pull/554)) +### 5.5 -- Orchestration & Public API Migration (In Progress) -### ~~5.5 -- Orchestration & Public API Migration~~ ✅ +Migrate top-level orchestration, features, and entry points. Some migrated via [#555](https://github.com/optave/codegraph/pull/555), 159 files remaining. -Migrated top-level orchestration and entry points — builder pipeline, watcher, embeddings subsystem, MCP server, CLI commands, and public API index. +**Migrated:** `domain/graph/builder.ts` + `context.ts` + `helpers.ts` + `pipeline.ts`, `domain/graph/watcher.ts`, `domain/search/{generator,index,models}.ts`, `mcp/{index,middleware,server,tool-registry}.ts`, `features/export.ts`, `index.ts` -- ✅ `domain/graph/builder.ts`, `domain/graph/watcher.ts` — pipeline stages typed -- ✅ `domain/search/*.ts` — vector store, model registry, search modes -- ✅ `mcp/*.ts` — tool schemas, typed handlers -- ✅ `features/*.ts`, `presentation/*.ts` — feature modules and CLI formatters -- ✅ `index.ts` — curated public API with proper export types +**Remaining (159):** -([#555](https://github.com/optave/codegraph/pull/555)) +| Module | Files | Notes | +|--------|-------|-------| +| `cli.js` + `cli/` | 55 | Commander entry point, 43 command handlers (`commands/`), barrel, shared CLI utilities | +| `mcp/tools/` | 36 | Individual MCP tool handlers + barrel | +| `presentation/` | 28 | Presentation formatters (14 files), `queries-cli/` (7 files), sequence-renderer, viewer, export, etc. | +| `features/` | 21 | audit, batch, boundaries, cfg, check, cochange, communities, complexity, dataflow, flow, graph-enrichment, manifesto, owners, sequence, snapshot, structure, triage, ast, branch-compare, `shared/find-nodes` | +| `ast-analysis/` | 18 | AST analysis framework, visitors (4), language-specific rules (9), engine, metrics, shared, visitor-utils | +| `index.js` | 1 | Public API exports (stale — `.ts` exists) | -**JS counterpart cleanup (14 files to delete):** The following `.js` files are stale counterparts of already-migrated `.ts` files and should be deleted once all consumers import from `.ts`: `domain/graph/builder.js`, `domain/graph/builder/{context,helpers,pipeline}.js`, `domain/graph/resolve.js`, `domain/graph/watcher.js`, `domain/search/{generator,index,models}.js`, `features/export.js`, `mcp/{index,middleware,server,tool-registry}.js` +**Stale `.js` counterparts to delete (13 files):** `domain/graph/builder.js`, `domain/graph/builder/{context,helpers,pipeline}.js`, `domain/graph/watcher.js`, `domain/search/{generator,index,models}.js`, `features/export.js`, `mcp/{index,middleware,server,tool-registry}.js` — these have `.ts` counterparts already ### 5.6 -- Test Migration From 9d2b7ff1e10320ac0953a417c339fe31454569d3 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 23 Mar 2026 03:00:16 -0600 Subject: [PATCH 29/33] fix(skill): ban untracked deferrals in /review skill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /review skill allowed replying "acknowledged as follow-up" to reviewer comments without tracking them anywhere. These deferrals get lost — nobody revisits PR comment threads after merge. Now: if a fix is genuinely out of scope, the skill must create a GitHub issue with the follow-up label before replying. The reply must include the issue link. A matching rule in the Rules section reinforces the ban. --- .claude/skills/review/SKILL.md | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/.claude/skills/review/SKILL.md b/.claude/skills/review/SKILL.md index b0a7e00d..ce3ef428 100644 --- a/.claude/skills/review/SKILL.md +++ b/.claude/skills/review/SKILL.md @@ -120,7 +120,29 @@ For **each** review comment — including minor suggestions, nits, style feedbac 2. **Read the relevant code** at the file and line referenced. 3. **Make the change.** Even if the comment is marked as "nit" or "suggestion" or "minor" — address it. The goal is zero outstanding comments. 4. **If you disagree** with a suggestion (e.g., it would introduce a bug or contradicts project conventions), do NOT silently ignore it. Reply to the comment explaining why you chose a different approach. -5. **Reply to each comment** explaining what you did. The reply mechanism depends on where the comment lives: +5. **If the fix is genuinely out of scope** for this PR (e.g., it affects a different module not touched by this PR, or requires a design decision beyond the PR's purpose), you MUST create a GitHub issue to track it before replying. Never reply with "acknowledged as follow-up" or "noted for later" without a tracked issue — untracked deferrals get lost and nobody will ever revisit them. + + ```bash + # Create a tracking issue for the deferred item + gh issue create \ + --title "follow-up: " \ + --body "$(cat <<'EOF' + Deferred from PR # review. + + **Original reviewer comment:** https://github.com/optave/codegraph/pull/#discussion_r + + **Context:** + EOF + )" \ + --label "follow-up" + ``` + + Then reply to the reviewer comment referencing the issue: + ```bash + gh api repos/optave/codegraph/pulls//comments//replies \ + -f body="Out of scope for this PR — tracked in #" + ``` +6. **Reply to each comment** explaining what you did. The reply mechanism depends on where the comment lives: **For inline PR review comments** (from Claude, Greptile, or humans — these have a `path` and `line`): ```bash @@ -220,3 +242,4 @@ After processing all PRs, output a summary table: - **One concern per commit** — don't lump conflict resolution with code fixes. - **Flag scope creep.** If a PR's diff contains files unrelated to its stated purpose (e.g., a docs PR carrying `src/` or test changes from a merged feature branch), flag it immediately. Split the unrelated changes into a separate branch and PR. Do not proceed with review until the PR is scoped correctly — scope creep is not acceptable. - If a PR is fundamentally broken beyond what review feedback can fix, note it in the summary and skip to the next PR. +- **Never defer without tracking.** Do not reply "acknowledged as follow-up", "noted for later", or "tracking for follow-up" to a reviewer comment without creating a GitHub issue first. If you can't fix it now and it's genuinely out of scope, create an issue with the `follow-up` label and include the issue link in your reply. Untracked acknowledgements are the same as ignoring the comment — they will never be revisited. From ef11f5cf84b0034b69839ba1fe976f1dd4fb5977 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 23 Mar 2026 03:01:47 -0600 Subject: [PATCH 30/33] feat(types): migrate db, graph algorithms/builders, and domain/queries to TypeScript (Phase 5.5) Migrate 19 remaining JS files to TypeScript across db/, graph/, and domain/: - db/: connection, migrations, query-builder, index barrel - graph/algorithms/leiden/: adapter, cpm, modularity, optimiser, partition, index - graph/algorithms/: louvain, index barrel - graph/builders/: dependency, structure, temporal, index barrel - graph/classifiers/: index barrel - graph/: index barrel - domain/: queries barrel Key type additions: - GraphAdapter, Partition, DetectClustersResult interfaces for Leiden - LockedDatabase type for advisory-locked DB instances - DependencyGraphOptions, TemporalGraphOptions for graph builders - Generic Statement in vendor.d.ts for type-safe DB queries Also fixes pre-existing type errors in module-map.ts (untyped prepare calls) and generator.ts (null vs undefined argument). --- src/db/{connection.js => connection.ts} | 83 +-- src/db/{index.js => index.ts} | 2 + src/db/{migrations.js => migrations.ts} | 44 +- src/db/{query-builder.js => query-builder.ts} | 115 ++--- src/domain/analysis/module-map.ts | 30 +- src/domain/{queries.js => queries.ts} | 2 +- src/domain/search/generator.ts | 2 +- src/graph/algorithms/{index.js => index.ts} | 0 src/graph/algorithms/leiden/adapter.js | 160 ------ src/graph/algorithms/leiden/adapter.ts | 204 ++++++++ src/graph/algorithms/leiden/cpm.js | 39 -- src/graph/algorithms/leiden/cpm.ts | 77 +++ src/graph/algorithms/leiden/index.js | 144 ------ src/graph/algorithms/leiden/index.ts | 185 +++++++ src/graph/algorithms/leiden/modularity.js | 71 --- src/graph/algorithms/leiden/modularity.ts | 122 +++++ .../leiden/{optimiser.js => optimiser.ts} | 384 ++++++++------ src/graph/algorithms/leiden/partition.js | 407 --------------- src/graph/algorithms/leiden/partition.ts | 479 ++++++++++++++++++ .../algorithms/{louvain.js => louvain.ts} | 27 +- .../builders/{dependency.js => dependency.ts} | 60 ++- src/graph/builders/index.js | 3 - src/graph/builders/structure.js | 40 -- src/graph/builders/structure.ts | 58 +++ src/graph/builders/temporal.js | 33 -- src/graph/builders/temporal.ts | 51 ++ src/graph/classifiers/{index.js => index.ts} | 0 src/graph/{index.js => index.ts} | 0 src/types.ts | 2 +- src/vendor.d.ts | 15 +- 30 files changed, 1628 insertions(+), 1211 deletions(-) rename src/db/{connection.js => connection.ts} (70%) rename src/db/{index.js => index.ts} (96%) rename src/db/{migrations.js => migrations.ts} (90%) rename src/db/{query-builder.js => query-builder.ts} (76%) rename src/domain/{queries.js => queries.ts} (98%) rename src/graph/algorithms/{index.js => index.ts} (100%) delete mode 100644 src/graph/algorithms/leiden/adapter.js create mode 100644 src/graph/algorithms/leiden/adapter.ts delete mode 100644 src/graph/algorithms/leiden/cpm.js create mode 100644 src/graph/algorithms/leiden/cpm.ts delete mode 100644 src/graph/algorithms/leiden/index.js create mode 100644 src/graph/algorithms/leiden/index.ts delete mode 100644 src/graph/algorithms/leiden/modularity.js create mode 100644 src/graph/algorithms/leiden/modularity.ts rename src/graph/algorithms/leiden/{optimiser.js => optimiser.ts} (53%) delete mode 100644 src/graph/algorithms/leiden/partition.js create mode 100644 src/graph/algorithms/leiden/partition.ts rename src/graph/algorithms/{louvain.js => louvain.ts} (57%) rename src/graph/builders/{dependency.js => dependency.ts} (63%) delete mode 100644 src/graph/builders/index.js delete mode 100644 src/graph/builders/structure.js create mode 100644 src/graph/builders/structure.ts delete mode 100644 src/graph/builders/temporal.js create mode 100644 src/graph/builders/temporal.ts rename src/graph/classifiers/{index.js => index.ts} (100%) rename src/graph/{index.js => index.ts} (100%) diff --git a/src/db/connection.js b/src/db/connection.ts similarity index 70% rename from src/db/connection.js rename to src/db/connection.ts index 59114bbd..cadd04e0 100644 --- a/src/db/connection.js +++ b/src/db/connection.ts @@ -4,11 +4,15 @@ import path from 'node:path'; import Database from 'better-sqlite3'; import { debug, warn } from '../infrastructure/logger.js'; import { DbError } from '../shared/errors.js'; +import type { BetterSqlite3Database } from '../types.js'; import { Repository } from './repository/base.js'; import { SqliteRepository } from './repository/sqlite-repository.js'; -let _cachedRepoRoot; // undefined = not computed, null = not a git repo -let _cachedRepoRootCwd; // cwd at the time the cache was populated +/** DB instance with optional advisory lock path. */ +export type LockedDatabase = BetterSqlite3Database & { __lockPath?: string }; + +let _cachedRepoRoot: string | null | undefined; // undefined = not computed, null = not a git repo +let _cachedRepoRootCwd: string | undefined; // cwd at the time the cache was populated /** * Return the git worktree/repo root for the given directory (or cwd). @@ -17,15 +21,13 @@ let _cachedRepoRootCwd; // cwd at the time the cache was populated * Results are cached per-process when called without arguments. * The cache is keyed on cwd so it invalidates if the working directory changes * (e.g. MCP server serving multiple sessions). - * @param {string} [fromDir] - Directory to resolve from (defaults to cwd) - * @returns {string | null} Absolute path to repo root, or null if not in a git repo */ -export function findRepoRoot(fromDir) { +export function findRepoRoot(fromDir?: string): string | null { const dir = fromDir || process.cwd(); if (!fromDir && _cachedRepoRoot !== undefined && _cachedRepoRootCwd === dir) { return _cachedRepoRoot; } - let root = null; + let root: string | null = null; try { const raw = execFileSync('git', ['rev-parse', '--show-toplevel'], { cwd: dir, @@ -38,11 +40,11 @@ export function findRepoRoot(fromDir) { try { root = fs.realpathSync(raw); } catch (e) { - debug(`realpathSync failed for git root "${raw}", using resolve: ${e.message}`); + debug(`realpathSync failed for git root "${raw}", using resolve: ${(e as Error).message}`); root = path.resolve(raw); } } catch (e) { - debug(`git rev-parse failed for "${dir}": ${e.message}`); + debug(`git rev-parse failed for "${dir}": ${(e as Error).message}`); root = null; } if (!fromDir) { @@ -53,22 +55,22 @@ export function findRepoRoot(fromDir) { } /** Reset the cached repo root (for testing). */ -export function _resetRepoRootCache() { +export function _resetRepoRootCache(): void { _cachedRepoRoot = undefined; _cachedRepoRootCwd = undefined; } -function isProcessAlive(pid) { +function isProcessAlive(pid: number): boolean { try { process.kill(pid, 0); return true; } catch (e) { - debug(`PID ${pid} not alive: ${e.code || e.message}`); + debug(`PID ${pid} not alive: ${(e as NodeJS.ErrnoException).code || (e as Error).message}`); return false; } } -function acquireAdvisoryLock(dbPath) { +function acquireAdvisoryLock(dbPath: string): void { const lockPath = `${dbPath}.lock`; try { if (fs.existsSync(lockPath)) { @@ -79,23 +81,23 @@ function acquireAdvisoryLock(dbPath) { } } } catch (e) { - debug(`Advisory lock read failed: ${e.message}`); + debug(`Advisory lock read failed: ${(e as Error).message}`); } try { fs.writeFileSync(lockPath, String(process.pid), 'utf-8'); } catch (e) { - debug(`Advisory lock write failed: ${e.message}`); + debug(`Advisory lock write failed: ${(e as Error).message}`); } } -function releaseAdvisoryLock(lockPath) { +function releaseAdvisoryLock(lockPath: string): void { try { const content = fs.readFileSync(lockPath, 'utf-8').trim(); if (Number(content) === process.pid) { fs.unlinkSync(lockPath); } } catch (e) { - debug(`Advisory lock release failed for ${lockPath}: ${e.message}`); + debug(`Advisory lock release failed for ${lockPath}: ${(e as Error).message}`); } } @@ -104,58 +106,64 @@ function releaseAdvisoryLock(lockPath) { * Handles Windows 8.3 short names (RUNNER~1 vs runneradmin) and macOS * symlinks (/tmp vs /private/tmp) where string comparison fails. */ -function isSameDirectory(a, b) { +function isSameDirectory(a: string, b: string): boolean { if (path.resolve(a) === path.resolve(b)) return true; try { const sa = fs.statSync(a); const sb = fs.statSync(b); return sa.dev === sb.dev && sa.ino === sb.ino; } catch (e) { - debug(`isSameDirectory stat failed: ${e.message}`); + debug(`isSameDirectory stat failed: ${(e as Error).message}`); return false; } } -export function openDb(dbPath) { +export function openDb(dbPath: string): LockedDatabase { const dir = path.dirname(dbPath); if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); acquireAdvisoryLock(dbPath); - const db = new Database(dbPath); + // vendor.d.ts declares Database as a callable; cast through unknown for construct usage + const db = new ( + Database as unknown as new ( + path: string, + opts?: Record, + ) => LockedDatabase + )(dbPath); db.pragma('journal_mode = WAL'); db.pragma('busy_timeout = 5000'); db.__lockPath = `${dbPath}.lock`; return db; } -export function closeDb(db) { +export function closeDb(db: LockedDatabase): void { db.close(); if (db.__lockPath) releaseAdvisoryLock(db.__lockPath); } -export function findDbPath(customPath) { +export function findDbPath(customPath?: string): string { if (customPath) return path.resolve(customPath); const rawCeiling = findRepoRoot(); // Normalize ceiling with realpathSync to resolve 8.3 short names (Windows // RUNNER~1 → runneradmin) and symlinks (macOS /var → /private/var). // findRepoRoot already applies realpathSync internally, but the git output // may still contain short names on some Windows CI environments. - let ceiling; + let ceiling: string | null; if (rawCeiling) { try { ceiling = fs.realpathSync(rawCeiling); } catch (e) { - debug(`realpathSync failed for ceiling "${rawCeiling}": ${e.message}`); + debug(`realpathSync failed for ceiling "${rawCeiling}": ${(e as Error).message}`); ceiling = rawCeiling; } } else { ceiling = null; } // Resolve symlinks (e.g. macOS /var → /private/var) so dir matches ceiling from git - let dir; + let dir: string; try { dir = fs.realpathSync(process.cwd()); } catch (e) { - debug(`realpathSync failed for cwd: ${e.message}`); + debug(`realpathSync failed for cwd: ${(e as Error).message}`); dir = process.cwd(); } while (true) { @@ -173,10 +181,8 @@ export function findDbPath(customPath) { return path.join(base, '.codegraph', 'graph.db'); } -/** - * Open a database in readonly mode, with a user-friendly error if the DB doesn't exist. - */ -export function openReadonlyOrFail(customPath) { +/** Open a database in readonly mode, with a user-friendly error if the DB doesn't exist. */ +export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database { const dbPath = findDbPath(customPath); if (!fs.existsSync(dbPath)) { throw new DbError( @@ -184,7 +190,12 @@ export function openReadonlyOrFail(customPath) { { file: dbPath }, ); } - return new Database(dbPath, { readonly: true }); + return new ( + Database as unknown as new ( + path: string, + opts?: Record, + ) => BetterSqlite3Database + )(dbPath, { readonly: true }); } /** @@ -192,13 +203,11 @@ export function openReadonlyOrFail(customPath) { * * When `opts.repo` is a Repository instance, returns it directly (no DB opened). * Otherwise opens a readonly SQLite DB and wraps it in SqliteRepository. - * - * @param {string} [customDbPath] - Path to graph.db (ignored when opts.repo is set) - * @param {object} [opts] - * @param {Repository} [opts.repo] - Pre-built Repository to use instead of SQLite - * @returns {{ repo: Repository, close(): void }} */ -export function openRepo(customDbPath, opts = {}) { +export function openRepo( + customDbPath?: string, + opts: { repo?: Repository } = {}, +): { repo: Repository; close(): void } { if (opts.repo != null) { if (!(opts.repo instanceof Repository)) { throw new TypeError( diff --git a/src/db/index.js b/src/db/index.ts similarity index 96% rename from src/db/index.js rename to src/db/index.ts index 7d938e1d..ffcc396a 100644 --- a/src/db/index.js +++ b/src/db/index.ts @@ -1,4 +1,6 @@ // Barrel re-export — keeps all existing `import { ... } from '…/db/index.js'` working. + +export type { LockedDatabase } from './connection.js'; export { closeDb, findDbPath, diff --git a/src/db/migrations.js b/src/db/migrations.ts similarity index 90% rename from src/db/migrations.js rename to src/db/migrations.ts index ecafa49e..ade92708 100644 --- a/src/db/migrations.js +++ b/src/db/migrations.ts @@ -1,7 +1,14 @@ import { debug } from '../infrastructure/logger.js'; +import type { BetterSqlite3Database } from '../types.js'; // ─── Schema Migrations ───────────────────────────────────────────────── -export const MIGRATIONS = [ + +interface Migration { + version: number; + up: string; +} + +export const MIGRATIONS: Migration[] = [ { version: 1, up: ` @@ -242,28 +249,43 @@ export const MIGRATIONS = [ }, ]; -function hasColumn(db, table, column) { - const cols = db.pragma(`table_info(${table})`); +interface PragmaColumnInfo { + name: string; + type: string; + notnull: number; + dflt_value: unknown; + pk: number; +} + +function hasColumn(db: BetterSqlite3Database, table: string, column: string): boolean { + const cols = db.pragma(`table_info(${table})`) as PragmaColumnInfo[]; return cols.some((c) => c.name === column); } -function hasTable(db, table) { - const row = db.prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name=?").get(table); +function hasTable(db: BetterSqlite3Database, table: string): boolean { + const row = db + .prepare<{ '1': number }>("SELECT 1 FROM sqlite_master WHERE type='table' AND name=?") + .get(table); return !!row; } -export function getBuildMeta(db, key) { +export function getBuildMeta(db: BetterSqlite3Database, key: string): string | null { if (!hasTable(db, 'build_meta')) return null; try { - const row = db.prepare('SELECT value FROM build_meta WHERE key = ?').get(key); + const row = db + .prepare<{ value: string }>('SELECT value FROM build_meta WHERE key = ?') + .get(key); return row ? row.value : null; } catch (e) { - debug(`getBuildMeta failed for key "${key}": ${e.message}`); + debug(`getBuildMeta failed for key "${key}": ${(e as Error).message}`); return null; } } -export function setBuildMeta(db, entries) { +export function setBuildMeta( + db: BetterSqlite3Database, + entries: Record, +): void { const upsert = db.prepare('INSERT OR REPLACE INTO build_meta (key, value) VALUES (?, ?)'); const tx = db.transaction(() => { for (const [key, value] of Object.entries(entries)) { @@ -273,10 +295,10 @@ export function setBuildMeta(db, entries) { tx(); } -export function initSchema(db) { +export function initSchema(db: BetterSqlite3Database): void { db.exec(`CREATE TABLE IF NOT EXISTS schema_version (version INTEGER NOT NULL DEFAULT 0)`); - const row = db.prepare('SELECT version FROM schema_version').get(); + const row = db.prepare<{ version: number }>('SELECT version FROM schema_version').get(); let currentVersion = row ? row.version : 0; if (!row) { diff --git a/src/db/query-builder.js b/src/db/query-builder.ts similarity index 76% rename from src/db/query-builder.js rename to src/db/query-builder.ts index ae2d11db..66fe5cd9 100644 --- a/src/db/query-builder.js +++ b/src/db/query-builder.ts @@ -1,5 +1,6 @@ import { DbError } from '../shared/errors.js'; import { DEAD_ROLE_PREFIX, EVERY_EDGE_KIND } from '../shared/kinds.js'; +import type { BetterSqlite3Database } from '../types.js'; // ─── Validation Helpers ───────────────────────────────────────────── @@ -11,19 +12,19 @@ const SAFE_ORDER_TERM_RE = /^[a-z_][a-z0-9_]*(?:\.[a-z_][a-z0-9_]*)?\s*(?:asc|de const SAFE_SELECT_TOKEN_RE = /^(?:[a-z_][a-z0-9_]*(?:\.[a-z_*][a-z0-9_]*)?\s*(?:as\s+[a-z_][a-z0-9_]*)?|[a-z_]+\([^)]*\)\s*(?:as\s+[a-z_][a-z0-9_]*)?)$/i; -function validateAlias(alias) { +function validateAlias(alias: string): void { if (!SAFE_ALIAS_RE.test(alias)) { throw new DbError(`Invalid SQL alias: ${alias}`); } } -function validateColumn(column) { +function validateColumn(column: string): void { if (!SAFE_COLUMN_RE.test(column)) { throw new DbError(`Invalid SQL column: ${column}`); } } -function validateOrderBy(clause) { +function validateOrderBy(clause: string): void { const terms = clause.split(',').map((t) => t.trim()); for (const term of terms) { if (!SAFE_ORDER_TERM_RE.test(term)) { @@ -32,8 +33,8 @@ function validateOrderBy(clause) { } } -function splitTopLevelCommas(str) { - const parts = []; +function splitTopLevelCommas(str: string): string[] { + const parts: string[] = []; let depth = 0; let start = 0; for (let i = 0; i < str.length; i++) { @@ -48,7 +49,7 @@ function splitTopLevelCommas(str) { return parts; } -function validateSelectCols(cols) { +function validateSelectCols(cols: string): void { const tokens = splitTopLevelCommas(cols); for (const token of tokens) { if (!SAFE_SELECT_TOKEN_RE.test(token)) { @@ -57,8 +58,8 @@ function validateSelectCols(cols) { } } -function validateEdgeKind(edgeKind) { - if (!EVERY_EDGE_KIND.includes(edgeKind)) { +function validateEdgeKind(edgeKind: string): void { + if (!EVERY_EDGE_KIND.includes(edgeKind as never)) { throw new DbError( `Invalid edge kind: ${edgeKind} (expected one of ${EVERY_EDGE_KIND.join(', ')})`, ); @@ -68,17 +69,15 @@ function validateEdgeKind(edgeKind) { // ─── LIKE Escaping ────────────────────────────────────────────────── /** Escape LIKE wildcards in a literal string segment. */ -export function escapeLike(s) { +export function escapeLike(s: string): string { return s.replace(/[%_\\]/g, '\\$&'); } /** * Normalize a file filter value (string, string[], or falsy) into a flat array. * Returns an empty array when the input is falsy. - * @param {string|string[]|undefined|null} file - * @returns {string[]} */ -export function normalizeFileFilter(file) { +export function normalizeFileFilter(file: string | string[] | undefined | null): string[] { if (!file) return []; return Array.isArray(file) ? file : [file]; } @@ -86,19 +85,18 @@ export function normalizeFileFilter(file) { /** * Build a SQL condition + params for a multi-value file LIKE filter. * Returns `{ sql: '', params: [] }` when the filter is empty. - * - * @param {string|string[]} file - One or more partial file paths - * @param {string} [column='file'] - The column name to filter on (e.g. 'n.file', 'a.file') - * @returns {{ sql: string, params: string[] }} */ -export function buildFileConditionSQL(file, column = 'file') { +export function buildFileConditionSQL( + file: string | string[], + column = 'file', +): { sql: string; params: string[] } { validateColumn(column); const files = normalizeFileFilter(file); if (files.length === 0) return { sql: '', params: [] }; if (files.length === 1) { return { sql: ` AND ${column} LIKE ? ESCAPE '\\'`, - params: [`%${escapeLike(files[0])}%`], + params: [`%${escapeLike(files[0] as string)}%`], }; } const clauses = files.map(() => `${column} LIKE ? ESCAPE '\\'`); @@ -111,11 +109,8 @@ export function buildFileConditionSQL(file, column = 'file') { /** * Commander option accumulator for repeatable `--file` flag. * Use as: `['-f, --file ', 'Scope to file (partial match, repeatable)', collectFile]` - * @param {string} val - New value from Commander - * @param {string[]} acc - Accumulated values (undefined on first call) - * @returns {string[]} */ -export function collectFile(val, acc) { +export function collectFile(val: string, acc?: string[]): string[] { acc = acc || []; acc.push(val); return acc; @@ -126,10 +121,8 @@ export function collectFile(val, acc) { /** * Return a SQL AND clause that excludes test/spec/stories files. * Returns empty string when disabled. - * @param {string} [column='n.file'] - Column to filter on - * @param {boolean} [enabled=true] - No-op when false */ -export function testFilterSQL(column = 'n.file', enabled = true) { +export function testFilterSQL(column = 'n.file', enabled = true): string { if (!enabled) return ''; validateColumn(column); return `AND ${column} NOT LIKE '%.test.%' @@ -139,12 +132,8 @@ export function testFilterSQL(column = 'n.file', enabled = true) { AND ${column} NOT LIKE '%.stories.%'`; } -/** - * Build IN (?, ?, ?) placeholders and params array for a kind filter. - * @param {string[]} kinds - * @returns {{ placeholders: string, params: string[] }} - */ -export function kindInClause(kinds) { +/** Build IN (?, ?, ?) placeholders and params array for a kind filter. */ +export function kindInClause(kinds: string[]): { placeholders: string; params: string[] } { return { placeholders: kinds.map(() => '?').join(', '), params: [...kinds], @@ -153,10 +142,8 @@ export function kindInClause(kinds) { /** * Return a LEFT JOIN subquery for fan-in (incoming edge count). - * @param {string} [edgeKind='calls'] - Edge kind to count - * @param {string} [alias='fi'] - Subquery alias */ -export function fanInJoinSQL(edgeKind = 'calls', alias = 'fi') { +export function fanInJoinSQL(edgeKind = 'calls', alias = 'fi'): string { validateEdgeKind(edgeKind); validateAlias(alias); return `LEFT JOIN ( @@ -166,10 +153,8 @@ export function fanInJoinSQL(edgeKind = 'calls', alias = 'fi') { /** * Return a LEFT JOIN subquery for fan-out (outgoing edge count). - * @param {string} [edgeKind='calls'] - Edge kind to count - * @param {string} [alias='fo'] - Subquery alias */ -export function fanOutJoinSQL(edgeKind = 'calls', alias = 'fo') { +export function fanOutJoinSQL(edgeKind = 'calls', alias = 'fo'): string { validateEdgeKind(edgeKind); validateAlias(alias); return `LEFT JOIN ( @@ -185,21 +170,21 @@ export function fanOutJoinSQL(edgeKind = 'calls', alias = 'fo') { */ export class NodeQuery { #selectCols = 'n.*'; - #joins = []; - #conditions = []; - #params = []; + #joins: string[] = []; + #conditions: string[] = []; + #params: (string | number)[] = []; #orderByClause = ''; - #limitValue = null; + #limitValue: number | null = null; /** Set SELECT columns (default: `n.*`). */ - select(cols) { + select(cols: string): this { validateSelectCols(cols); this.#selectCols = cols; return this; } /** WHERE n.kind IN (?, ?, ...) */ - kinds(kindArray) { + kinds(kindArray: string[] | undefined | null): this { if (!kindArray || kindArray.length === 0) return this; const { placeholders, params } = kindInClause(kindArray); this.#conditions.push(`n.kind IN (${placeholders})`); @@ -208,7 +193,7 @@ export class NodeQuery { } /** Add 5 NOT LIKE conditions to exclude test files. No-op when enabled is falsy. */ - excludeTests(enabled) { + excludeTests(enabled: boolean | undefined): this { if (!enabled) return this; this.#conditions.push( `n.file NOT LIKE '%.test.%'`, @@ -221,12 +206,12 @@ export class NodeQuery { } /** WHERE n.file LIKE ? (no-op if falsy). Accepts a single string or string[]. */ - fileFilter(file) { + fileFilter(file: string | string[] | undefined | null): this { const files = normalizeFileFilter(file); if (files.length === 0) return this; if (files.length === 1) { this.#conditions.push("n.file LIKE ? ESCAPE '\\'"); - this.#params.push(`%${escapeLike(files[0])}%`); + this.#params.push(`%${escapeLike(files[0] as string)}%`); } else { const clauses = files.map(() => "n.file LIKE ? ESCAPE '\\'"); this.#conditions.push(`(${clauses.join(' OR ')})`); @@ -236,7 +221,7 @@ export class NodeQuery { } /** WHERE n.kind = ? (no-op if falsy). */ - kindFilter(kind) { + kindFilter(kind: string | undefined | null): this { if (!kind) return this; this.#conditions.push('n.kind = ?'); this.#params.push(kind); @@ -244,7 +229,7 @@ export class NodeQuery { } /** WHERE n.role = ? (no-op if falsy). 'dead' matches all dead-* sub-roles. */ - roleFilter(role) { + roleFilter(role: string | undefined | null): this { if (!role) return this; if (role === DEAD_ROLE_PREFIX) { this.#conditions.push('n.role LIKE ?'); @@ -257,7 +242,7 @@ export class NodeQuery { } /** WHERE n.name LIKE ? (no-op if falsy). Escapes LIKE wildcards in the value. */ - nameLike(pattern) { + nameLike(pattern: string | undefined | null): this { if (!pattern) return this; this.#conditions.push("n.name LIKE ? ESCAPE '\\'"); this.#params.push(`%${escapeLike(pattern)}%`); @@ -265,54 +250,54 @@ export class NodeQuery { } /** Raw WHERE condition escape hatch. */ - where(sql, ...params) { + where(sql: string, ...params: (string | number)[]): this { this.#conditions.push(sql); this.#params.push(...params); return this; } /** Add fan-in LEFT JOIN subquery. */ - withFanIn(edgeKind = 'calls') { + withFanIn(edgeKind = 'calls'): this { return this._join(fanInJoinSQL(edgeKind)); } /** Add fan-out LEFT JOIN subquery. */ - withFanOut(edgeKind = 'calls') { + withFanOut(edgeKind = 'calls'): this { return this._join(fanOutJoinSQL(edgeKind)); } /** LEFT JOIN function_complexity. */ - withComplexity() { + withComplexity(): this { return this._join('LEFT JOIN function_complexity fc ON fc.node_id = n.id'); } /** LEFT JOIN file_commit_counts. */ - withChurn() { + withChurn(): this { return this._join('LEFT JOIN file_commit_counts fcc ON n.file = fcc.file'); } - /** @private Raw JOIN — internal use only; external callers should use withFanIn/withFanOut/withComplexity/withChurn. */ - _join(sql) { + /** @internal Raw JOIN — internal use only; external callers should use withFanIn/withFanOut/withComplexity/withChurn. */ + _join(sql: string): this { this.#joins.push(sql); return this; } /** ORDER BY clause. */ - orderBy(clause) { + orderBy(clause: string): this { validateOrderBy(clause); this.#orderByClause = clause; return this; } /** LIMIT ?. */ - limit(n) { + limit(n: number | undefined | null): this { if (n == null) return this; this.#limitValue = n; return this; } /** Build the SQL and params without executing. */ - build() { + build(): { sql: string; params: (string | number)[] } { const joins = this.#joins.length > 0 ? `\n ${this.#joins.join('\n ')}` : ''; const where = this.#conditions.length > 0 ? `\n WHERE ${this.#conditions.join(' AND ')}` : ''; @@ -330,20 +315,20 @@ export class NodeQuery { } /** Execute and return all rows. */ - all(db) { + all>(db: BetterSqlite3Database): TRow[] { const { sql, params } = this.build(); - return db.prepare(sql).all(...params); + return db.prepare(sql).all(...params) as TRow[]; } /** Execute and return first row. */ - get(db) { + get>(db: BetterSqlite3Database): TRow | undefined { const { sql, params } = this.build(); - return db.prepare(sql).get(...params); + return db.prepare(sql).get(...params) as TRow | undefined; } /** Execute and return an iterator. */ - iterate(db) { + iterate>(db: BetterSqlite3Database): IterableIterator { const { sql, params } = this.build(); - return db.prepare(sql).iterate(...params); + return db.prepare(sql).iterate(...params) as IterableIterator; } } diff --git a/src/domain/analysis/module-map.ts b/src/domain/analysis/module-map.ts index e2d3c79b..686291a6 100644 --- a/src/domain/analysis/module-map.ts +++ b/src/domain/analysis/module-map.ts @@ -140,8 +140,8 @@ function countFilesByLanguage( return { total: fileNodes.length, languages: Object.keys(byLanguage).length, byLanguage }; } -// biome-ignore lint/suspicious/noExplicitAny: db handle from better-sqlite3 function findHotspots( + // biome-ignore lint/suspicious/noExplicitAny: db handle from better-sqlite3 db: any, noTests: boolean, limit: number, @@ -173,15 +173,15 @@ function getEmbeddingsInfo(db: any): object | null { // biome-ignore lint/suspicious/noExplicitAny: untyped SQLite row const count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get() as any; if (count && count.c > 0) { - const meta: Record = {}; + const meta: { model?: string; dim?: string; built_at?: string } = {}; // biome-ignore lint/suspicious/noExplicitAny: untyped SQLite row const metaRows = db.prepare('SELECT key, value FROM embedding_meta').all() as any[]; - for (const r of metaRows) meta[r.key] = r.value; + for (const r of metaRows) (meta as Record)[r.key] = r.value; return { count: count.c, - model: meta['model'] || null, - dim: meta['dim'] ? parseInt(meta['dim'], 10) : null, - builtAt: meta['built_at'] || null, + model: meta.model || null, + dim: meta.dim ? parseInt(meta.dim, 10) : null, + builtAt: meta.built_at || null, }; } } catch (e) { @@ -278,13 +278,13 @@ function countRoles(db: any, noTests: boolean): Record { .prepare('SELECT role, COUNT(*) as c FROM nodes WHERE role IS NOT NULL GROUP BY role') .all(); } - const roles: Record = {}; + const roles: Record & { dead?: number } = {}; let deadTotal = 0; for (const r of roleRows) { roles[r.role] = r.c; if (r.role.startsWith(DEAD_ROLE_PREFIX)) deadTotal += r.c; } - if (deadTotal > 0) roles['dead'] = deadTotal; + if (deadTotal > 0) roles.dead = deadTotal; return roles; } @@ -358,9 +358,17 @@ export function moduleMapData( coupling: n.in_edges + n.out_edges, })); - const totalNodes = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c; - const totalEdges = db.prepare('SELECT COUNT(*) as c FROM edges').get().c; - const totalFiles = db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get().c; + const totalNodes = ( + db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM nodes').get() as { c: number } + ).c; + const totalEdges = ( + db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM edges').get() as { c: number } + ).c; + const totalFiles = ( + db.prepare<{ c: number }>("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get() as { + c: number; + } + ).c; return { limit, topNodes, stats: { totalFiles, totalNodes, totalEdges } }; } finally { diff --git a/src/domain/queries.js b/src/domain/queries.ts similarity index 98% rename from src/domain/queries.js rename to src/domain/queries.ts index 7a3c6207..b35ad981 100644 --- a/src/domain/queries.js +++ b/src/domain/queries.ts @@ -1,5 +1,5 @@ /** - * queries.js — Barrel re-export file. + * queries.ts — Barrel re-export file. * * All query logic lives in the sub-modules under src/analysis/ and src/shared/. * This file exists purely for backward compatibility so that all existing diff --git a/src/domain/search/generator.ts b/src/domain/search/generator.ts index 9129332c..085acbea 100644 --- a/src/domain/search/generator.ts +++ b/src/domain/search/generator.ts @@ -61,7 +61,7 @@ export async function buildEmbeddings( options: BuildEmbeddingsOptions = {}, ): Promise { const strategy = options.strategy || 'structured'; - const dbPath = customDbPath || findDbPath(null); + const dbPath = customDbPath || findDbPath(undefined); if (!fs.existsSync(dbPath)) { throw new DbError( diff --git a/src/graph/algorithms/index.js b/src/graph/algorithms/index.ts similarity index 100% rename from src/graph/algorithms/index.js rename to src/graph/algorithms/index.ts diff --git a/src/graph/algorithms/leiden/adapter.js b/src/graph/algorithms/leiden/adapter.js deleted file mode 100644 index c5425a5f..00000000 --- a/src/graph/algorithms/leiden/adapter.js +++ /dev/null @@ -1,160 +0,0 @@ -/** - * Graph adapter that converts a CodeGraph into the dense array format - * expected by the Leiden optimiser. - * - * Vendored from ngraph.leiden (MIT) — adapted for CodeGraph. - */ - -/** - * @param {import('../../model.js').CodeGraph} graph - * @param {object} [opts] - * @param {boolean} [opts.directed] - * @param {(attrs: object) => number} [opts.linkWeight] - extract weight from edge attrs - * @param {(attrs: object) => number} [opts.nodeSize] - extract size from node attrs - * @param {string[]} [opts.baseNodeIds] - */ -export function makeGraphAdapter(graph, opts = {}) { - const linkWeight = - opts.linkWeight || ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)); - const nodeSize = - opts.nodeSize || ((attrs) => (attrs && typeof attrs.size === 'number' ? attrs.size : 1)); - const directed = !!opts.directed; - const baseNodeIds = opts.baseNodeIds; - - // Build dense node index mapping - const nodeIds = []; - const idToIndex = new Map(); - if (Array.isArray(baseNodeIds) && baseNodeIds.length > 0) { - for (let i = 0; i < baseNodeIds.length; i++) { - const id = baseNodeIds[i]; - if (!graph.hasNode(id)) throw new Error(`Missing node: ${id}`); - idToIndex.set(id, i); - nodeIds.push(id); - } - } else { - for (const [id] of graph.nodes()) { - idToIndex.set(id, nodeIds.length); - nodeIds.push(id); - } - } - const n = nodeIds.length; - - // Storage - const size = new Float64Array(n); - const selfLoop = new Float64Array(n); - const strengthOut = new Float64Array(n); - const strengthIn = new Float64Array(n); - - // Edge list by source for fast iteration - const outEdges = new Array(n); - const inEdges = new Array(n); - for (let i = 0; i < n; i++) { - outEdges[i] = []; - inEdges[i] = []; - } - - // Populate from graph - if (directed) { - for (const [src, tgt, attrs] of graph.edges()) { - const from = idToIndex.get(src); - const to = idToIndex.get(tgt); - if (from == null || to == null) continue; - const w = +linkWeight(attrs) || 0; - if (from === to) { - selfLoop[from] += w; - // Self-loop is intentionally kept in outEdges/inEdges as well. - // partition.js's moveNodeToCommunity (directed path) accounts for this - // by subtracting selfLoopWeight once from outToOld+inFromOld to avoid - // triple-counting (see partition.js moveNodeToCommunity directed block). - } - outEdges[from].push({ to, w }); - inEdges[to].push({ from, w }); - strengthOut[from] += w; - strengthIn[to] += w; - } - } else { - // Undirected: symmetrize and average reciprocal pairs - const pairAgg = new Map(); - - for (const [src, tgt, attrs] of graph.edges()) { - const a = idToIndex.get(src); - const b = idToIndex.get(tgt); - if (a == null || b == null) continue; - const w = +linkWeight(attrs) || 0; - if (a === b) { - selfLoop[a] += w; - continue; - } - const i = a < b ? a : b; - const j = a < b ? b : a; - const key = `${i}:${j}`; - let rec = pairAgg.get(key); - if (!rec) { - rec = { sum: 0, seenAB: 0, seenBA: 0 }; - pairAgg.set(key, rec); - } - rec.sum += w; - if (a === i) rec.seenAB = 1; - else rec.seenBA = 1; - } - - for (const [key, rec] of pairAgg.entries()) { - const [iStr, jStr] = key.split(':'); - const i = +iStr; - const j = +jStr; - const dirCount = (rec.seenAB ? 1 : 0) + (rec.seenBA ? 1 : 0); - const w = dirCount > 0 ? rec.sum / dirCount : 0; - if (w === 0) continue; - outEdges[i].push({ to: j, w }); - outEdges[j].push({ to: i, w }); - inEdges[i].push({ from: j, w }); - inEdges[j].push({ from: i, w }); - strengthOut[i] += w; - strengthOut[j] += w; - strengthIn[i] += w; - strengthIn[j] += w; - } - - // Add self-loops into adjacency and strengths. - // Note: uses single-w convention (not standard 2w) — the modularity formulas in - // modularity.js are written to match this convention, keeping the system self-consistent. - for (let v = 0; v < n; v++) { - const w = selfLoop[v]; - if (w !== 0) { - outEdges[v].push({ to: v, w }); - inEdges[v].push({ from: v, w }); - strengthOut[v] += w; - strengthIn[v] += w; - } - } - } - - // Node sizes - for (const [id, attrs] of graph.nodes()) { - const i = idToIndex.get(id); - if (i != null) size[i] = +nodeSize(attrs) || 0; - } - - // Totals - const totalWeight = strengthOut.reduce((a, b) => a + b, 0); - - function forEachNeighbor(i, cb) { - const list = outEdges[i]; - for (let k = 0; k < list.length; k++) cb(list[k].to, list[k].w); - } - - return { - n, - nodeIds, - idToIndex, - size, - selfLoop, - strengthOut, - strengthIn, - outEdges, - inEdges, - directed, - totalWeight, - forEachNeighbor, - }; -} diff --git a/src/graph/algorithms/leiden/adapter.ts b/src/graph/algorithms/leiden/adapter.ts new file mode 100644 index 00000000..5434cee0 --- /dev/null +++ b/src/graph/algorithms/leiden/adapter.ts @@ -0,0 +1,204 @@ +/** + * Graph adapter that converts a CodeGraph into the dense array format + * expected by the Leiden optimiser. + * + * Vendored from ngraph.leiden (MIT) — adapted for CodeGraph. + */ + +import type { CodeGraph, EdgeAttrs, NodeAttrs } from '../../model.js'; + +export interface EdgeEntry { + to: number; + w: number; +} + +export interface InEdgeEntry { + from: number; + w: number; +} + +export interface GraphAdapterOptions { + directed?: boolean; + linkWeight?: (attrs: EdgeAttrs) => number; + nodeSize?: (attrs: NodeAttrs) => number; + baseNodeIds?: string[]; +} + +export interface GraphAdapter { + n: number; + nodeIds: string[]; + idToIndex: Map; + size: Float64Array; + selfLoop: Float64Array; + strengthOut: Float64Array; + strengthIn: Float64Array; + outEdges: EdgeEntry[][]; + inEdges: InEdgeEntry[][]; + directed: boolean; + totalWeight: number; + forEachNeighbor: (i: number, cb: (to: number, w: number) => void) => void; +} + +// Typed arrays always return a number for in-bounds access, but noUncheckedIndexedAccess +// widens the return to `number | undefined`. These helpers wrap compound assignment +// patterns (+=, -=) that appear frequently in this performance-critical code. +function taGet(a: Float64Array, i: number): number { + return a[i] as number; +} + +function taAdd(a: Float64Array, i: number, v: number): void { + a[i] = taGet(a, i) + v; +} + +function taSub(a: Float64Array, i: number, v: number): void { + a[i] = taGet(a, i) - v; +} + +export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = {}): GraphAdapter { + const linkWeight: (attrs: EdgeAttrs) => number = + opts.linkWeight || + // biome-ignore lint/complexity/useLiteralKeys: index signature requires bracket access + ((attrs) => (attrs && typeof attrs['weight'] === 'number' ? attrs['weight'] : 1)); + const nodeSize: (attrs: NodeAttrs) => number = + // biome-ignore lint/complexity/useLiteralKeys: index signature requires bracket access + opts.nodeSize || ((attrs) => (attrs && typeof attrs['size'] === 'number' ? attrs['size'] : 1)); + const directed: boolean = !!opts.directed; + const baseNodeIds: string[] | undefined = opts.baseNodeIds; + + // Build dense node index mapping + const nodeIds: string[] = []; + const idToIndex = new Map(); + if (Array.isArray(baseNodeIds) && baseNodeIds.length > 0) { + for (let i = 0; i < baseNodeIds.length; i++) { + const id = baseNodeIds[i] as string; + if (!graph.hasNode(id)) throw new Error(`Missing node: ${id}`); + idToIndex.set(id, i); + nodeIds.push(id); + } + } else { + for (const [id] of graph.nodes()) { + idToIndex.set(id, nodeIds.length); + nodeIds.push(id); + } + } + const n: number = nodeIds.length; + + // Storage + const size = new Float64Array(n); + const selfLoop = new Float64Array(n); + const strengthOut = new Float64Array(n); + const strengthIn = new Float64Array(n); + + // Edge list by source for fast iteration + const outEdges: EdgeEntry[][] = new Array(n); + const inEdges: InEdgeEntry[][] = new Array(n); + for (let i = 0; i < n; i++) { + outEdges[i] = []; + inEdges[i] = []; + } + + // Populate from graph + if (directed) { + for (const [src, tgt, attrs] of graph.edges()) { + const from = idToIndex.get(src); + const to = idToIndex.get(tgt); + if (from == null || to == null) continue; + const w: number = +linkWeight(attrs) || 0; + if (from === to) { + taAdd(selfLoop, from, w); + // Self-loop is intentionally kept in outEdges/inEdges as well. + // partition.ts's moveNodeToCommunity (directed path) accounts for this + // by subtracting selfLoopWeight once from outToOld+inFromOld to avoid + // triple-counting (see partition.ts moveNodeToCommunity directed block). + } + (outEdges[from] as EdgeEntry[]).push({ to, w }); + (inEdges[to] as InEdgeEntry[]).push({ from, w }); + taAdd(strengthOut, from, w); + taAdd(strengthIn, to, w); + } + } else { + // Undirected: symmetrize and average reciprocal pairs + const pairAgg = new Map(); + + for (const [src, tgt, attrs] of graph.edges()) { + const a = idToIndex.get(src); + const b = idToIndex.get(tgt); + if (a == null || b == null) continue; + const w: number = +linkWeight(attrs) || 0; + if (a === b) { + taAdd(selfLoop, a, w); + continue; + } + const i = a < b ? a : b; + const j = a < b ? b : a; + const key = `${i}:${j}`; + let rec = pairAgg.get(key); + if (!rec) { + rec = { sum: 0, seenAB: 0, seenBA: 0 }; + pairAgg.set(key, rec); + } + rec.sum += w; + if (a === i) rec.seenAB = 1; + else rec.seenBA = 1; + } + + for (const [key, rec] of pairAgg.entries()) { + const parts = key.split(':'); + const i = +(parts[0] as string); + const j = +(parts[1] as string); + const dirCount: number = (rec.seenAB ? 1 : 0) + (rec.seenBA ? 1 : 0); + const w: number = dirCount > 0 ? rec.sum / dirCount : 0; + if (w === 0) continue; + (outEdges[i] as EdgeEntry[]).push({ to: j, w }); + (outEdges[j] as EdgeEntry[]).push({ to: i, w }); + (inEdges[i] as InEdgeEntry[]).push({ from: j, w }); + (inEdges[j] as InEdgeEntry[]).push({ from: i, w }); + taAdd(strengthOut, i, w); + taAdd(strengthOut, j, w); + taAdd(strengthIn, i, w); + taAdd(strengthIn, j, w); + } + + // Add self-loops into adjacency and strengths. + // Note: uses single-w convention (not standard 2w) — the modularity formulas in + // modularity.ts are written to match this convention, keeping the system self-consistent. + for (let v = 0; v < n; v++) { + const w: number = taGet(selfLoop, v); + if (w !== 0) { + (outEdges[v] as EdgeEntry[]).push({ to: v, w }); + (inEdges[v] as InEdgeEntry[]).push({ from: v, w }); + taAdd(strengthOut, v, w); + taAdd(strengthIn, v, w); + } + } + } + + // Node sizes + for (const [id, attrs] of graph.nodes()) { + const i = idToIndex.get(id); + if (i != null) size[i] = +nodeSize(attrs) || 0; + } + + // Totals + const totalWeight: number = strengthOut.reduce((a, b) => a + b, 0); + + function forEachNeighbor(i: number, cb: (to: number, w: number) => void): void { + const list = outEdges[i] as EdgeEntry[]; + for (let k = 0; k < list.length; k++) cb((list[k] as EdgeEntry).to, (list[k] as EdgeEntry).w); + } + + return { + n, + nodeIds, + idToIndex, + size, + selfLoop, + strengthOut, + strengthIn, + outEdges, + inEdges, + directed, + totalWeight, + forEachNeighbor, + }; +} diff --git a/src/graph/algorithms/leiden/cpm.js b/src/graph/algorithms/leiden/cpm.js deleted file mode 100644 index b32a2167..00000000 --- a/src/graph/algorithms/leiden/cpm.js +++ /dev/null @@ -1,39 +0,0 @@ -/** - * CPM (Constant Potts Model) quality functions. - * Vendored from ngraph.leiden (MIT) — no external dependencies. - */ - -export function diffCPM(part, g, v, c, gamma = 1.0) { - const oldC = part.nodeCommunity[v]; - if (c === oldC) return 0; - let w_old, w_new; - let selfCorrection = 0; - if (g.directed) { - w_old = - (part.getOutEdgeWeightToCommunity(oldC) || 0) + - (part.getInEdgeWeightFromCommunity(oldC) || 0); - w_new = - c < g.n - ? (part.getOutEdgeWeightToCommunity(c) || 0) + (part.getInEdgeWeightFromCommunity(c) || 0) - : 0; - // Self-loop weight appears in both out and in arrays for oldC, - // making w_old include 2×selfLoop. Correct to match moveNodeToCommunity. - selfCorrection = 2 * (g.selfLoop[v] || 0); - } else { - w_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; - w_new = c < g.n ? part.getNeighborEdgeWeightToCommunity(c) || 0 : 0; - } - const s_v = g.size[v] || 1; - const S_old = part.communityTotalSize[oldC] || 0; - const S_new = c < part.communityTotalSize.length ? part.communityTotalSize[c] : 0; - return w_new - w_old + selfCorrection - gamma * s_v * (S_new - S_old + s_v); -} - -export function qualityCPM(part, _g, gamma = 1.0) { - let sum = 0; - for (let c = 0; c < part.communityCount; c++) { - const S = part.communityTotalSize[c] || 0; - sum += part.communityInternalEdgeWeight[c] - (gamma * (S * (S - 1))) / 2; - } - return sum; -} diff --git a/src/graph/algorithms/leiden/cpm.ts b/src/graph/algorithms/leiden/cpm.ts new file mode 100644 index 00000000..957a605f --- /dev/null +++ b/src/graph/algorithms/leiden/cpm.ts @@ -0,0 +1,77 @@ +/** + * CPM (Constant Potts Model) quality functions. + * Vendored from ngraph.leiden (MIT) — no external dependencies. + */ + +/** + * Minimal view of a partition needed by CPM quality functions. + */ +export interface PartitionView { + readonly communityCount: number; + nodeCommunity: Int32Array; + readonly communityInternalEdgeWeight: Float64Array; + readonly communityTotalSize: Float64Array; + getOutEdgeWeightToCommunity(c: number): number; + getInEdgeWeightFromCommunity(c: number): number; + getNeighborEdgeWeightToCommunity(c: number): number; +} + +/** + * Minimal view of a graph needed by CPM quality functions. + */ +export interface GraphView { + n: number; + directed: boolean; + selfLoop: Float64Array; + size: Float64Array; +} + +// Typed array safe-access helper (see adapter.ts for rationale) +function fget(a: Float64Array, i: number): number { + return a[i] as number; +} +function iget(a: Int32Array, i: number): number { + return a[i] as number; +} + +export function diffCPM( + part: PartitionView, + g: GraphView, + v: number, + c: number, + gamma: number = 1.0, +): number { + const oldC: number = iget(part.nodeCommunity, v); + if (c === oldC) return 0; + let w_old: number; + let w_new: number; + let selfCorrection: number = 0; + if (g.directed) { + w_old = + (part.getOutEdgeWeightToCommunity(oldC) || 0) + + (part.getInEdgeWeightFromCommunity(oldC) || 0); + w_new = + c < g.n + ? (part.getOutEdgeWeightToCommunity(c) || 0) + (part.getInEdgeWeightFromCommunity(c) || 0) + : 0; + // Self-loop weight appears in both out and in arrays for oldC, + // making w_old include 2x selfLoop. Correct to match moveNodeToCommunity. + selfCorrection = 2 * (fget(g.selfLoop, v) || 0); + } else { + w_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; + w_new = c < g.n ? part.getNeighborEdgeWeightToCommunity(c) || 0 : 0; + } + const s_v: number = fget(g.size, v) || 1; + const S_old: number = fget(part.communityTotalSize, oldC) || 0; + const S_new: number = c < part.communityTotalSize.length ? fget(part.communityTotalSize, c) : 0; + return w_new - w_old + selfCorrection - gamma * s_v * (S_new - S_old + s_v); +} + +export function qualityCPM(part: PartitionView, _g: GraphView, gamma: number = 1.0): number { + let sum: number = 0; + for (let c = 0; c < part.communityCount; c++) { + const S: number = fget(part.communityTotalSize, c) || 0; + sum += fget(part.communityInternalEdgeWeight, c) - (gamma * (S * (S - 1))) / 2; + } + return sum; +} diff --git a/src/graph/algorithms/leiden/index.js b/src/graph/algorithms/leiden/index.js deleted file mode 100644 index 4db9a027..00000000 --- a/src/graph/algorithms/leiden/index.js +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Leiden community detection — vendored from ngraph.leiden (MIT). - * Adapted to work directly with CodeGraph (no external graph library dependency). - * - * Original: https://github.com/anvaka/ngraph.leiden - * License: MIT — see LICENSE in this directory. - */ - -import { qualityCPM } from './cpm.js'; -import { qualityModularity } from './modularity.js'; -import { runLouvainUndirectedModularity } from './optimiser.js'; - -/** - * Detect communities in a CodeGraph using the Leiden algorithm. - * - * @param {import('../../model.js').CodeGraph} graph - * @param {object} [options] - * @param {number} [options.randomSeed=42] - * @param {boolean} [options.directed=false] - * @param {boolean} [options.refine=true] - Leiden refinement (set false for plain Louvain) - * @param {string} [options.quality='modularity'] - 'modularity' | 'cpm' - * @param {number} [options.resolution=1.0] - * @param {number} [options.maxCommunitySize] - * @param {Set|Array} [options.fixedNodes] - * @param {string} [options.candidateStrategy] - 'neighbors' | 'all' | 'random' | 'random-neighbor' - * @param {number} [options.refinementTheta=1.0] - Temperature for probabilistic Leiden refinement (Algorithm 3, Traag et al. 2019). Lower → more greedy, higher → more exploratory. Deterministic via seeded PRNG - * @returns {{ getClass(id): number, getCommunities(): Map, quality(): number, toJSON(): object }} - * - * **Note on `quality()`:** For modularity, `quality()` always evaluates at γ=1.0 - * (standard Newman-Girvan modularity) regardless of the `resolution` used during - * optimization. This makes quality values comparable across runs with different - * resolutions. For CPM, `quality()` uses the caller-specified resolution since γ - * is intrinsic to the CPM metric. Do not use modularity `quality()` values to - * compare partitions found at different resolutions — they reflect Q at γ=1.0, - * not the objective that was actually optimized. - */ -export function detectClusters(graph, options = {}) { - const { levels, originalToCurrent, originalNodeIds, baseGraph } = runLouvainUndirectedModularity( - graph, - options, - ); - - const idToClass = new Map(); - for (let i = 0; i < originalNodeIds.length; i++) { - const comm = originalToCurrent[i]; - idToClass.set(originalNodeIds[i], comm); - } - - return { - getClass(nodeId) { - return idToClass.get(String(nodeId)); - }, - getCommunities() { - const out = new Map(); - for (const [id, c] of idToClass) { - if (!out.has(c)) out.set(c, []); - out.get(c).push(id); - } - return out; - }, - quality() { - // Compute quality on the original (level-0) graph with the final - // partition mapped back. Computing on the last coarse-level graph - // produces inflated values because the modularity null model depends - // on the degree distribution, which changes after coarsening. - const part = buildOriginalPartition(baseGraph, originalToCurrent); - const q = (options.quality || 'modularity').toLowerCase(); - if (q === 'cpm') { - const gamma = typeof options.resolution === 'number' ? options.resolution : 1.0; - return qualityCPM(part, baseGraph, gamma); - } - // Always evaluate at gamma=1.0 for standard Newman-Girvan modularity - return qualityModularity(part, baseGraph, 1.0); - }, - toJSON() { - const membershipObj = {}; - for (const [id, c] of idToClass) membershipObj[id] = c; - return { - membership: membershipObj, - meta: { levels: levels.length, quality: this.quality(), options }, - }; - }, - }; -} - -/** - * Build a minimal partition-like object from the original graph and the - * final community mapping, suitable for qualityModularity / qualityCPM. - */ -function buildOriginalPartition(g, communityMap) { - const n = g.n; - let maxC = 0; - for (let i = 0; i < n; i++) if (communityMap[i] > maxC) maxC = communityMap[i]; - const cc = maxC + 1; - - const internalWeight = new Float64Array(cc); - const totalStr = new Float64Array(cc); - const totalOutStr = new Float64Array(cc); - const totalInStr = new Float64Array(cc); - const totalSize = new Float64Array(cc); - - for (let i = 0; i < n; i++) { - const c = communityMap[i]; - totalSize[c] += g.size[i]; - if (g.directed) { - totalOutStr[c] += g.strengthOut[i]; - totalInStr[c] += g.strengthIn[i]; - } else { - totalStr[c] += g.strengthOut[i]; - } - if (g.selfLoop[i]) internalWeight[c] += g.selfLoop[i]; - } - - if (g.directed) { - for (let i = 0; i < n; i++) { - const ci = communityMap[i]; - const list = g.outEdges[i]; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]; - if (i === j) continue; - if (ci === communityMap[j]) internalWeight[ci] += w; - } - } - } else { - for (let i = 0; i < n; i++) { - const ci = communityMap[i]; - const list = g.outEdges[i]; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]; - if (j <= i) continue; - if (ci === communityMap[j]) internalWeight[ci] += w; - } - } - } - - return { - communityCount: cc, - communityInternalEdgeWeight: internalWeight, - communityTotalStrength: totalStr, - communityTotalOutStrength: totalOutStr, - communityTotalInStrength: totalInStr, - communityTotalSize: totalSize, - }; -} diff --git a/src/graph/algorithms/leiden/index.ts b/src/graph/algorithms/leiden/index.ts new file mode 100644 index 00000000..6a1f300a --- /dev/null +++ b/src/graph/algorithms/leiden/index.ts @@ -0,0 +1,185 @@ +/** + * Leiden community detection — vendored from ngraph.leiden (MIT). + * Adapted to work directly with CodeGraph (no external graph library dependency). + * + * Original: https://github.com/anvaka/ngraph.leiden + * License: MIT — see LICENSE in this directory. + */ + +import type { CodeGraph } from '../../model.js'; +import type { GraphAdapter } from './adapter.js'; +import { qualityCPM } from './cpm.js'; +import { qualityModularity } from './modularity.js'; +import type { LeidenOptions } from './optimiser.js'; +import { runLouvainUndirectedModularity } from './optimiser.js'; + +export type { LeidenOptions } from './optimiser.js'; + +export interface DetectClustersOptions extends LeidenOptions {} + +export interface DetectClustersResult { + getClass(nodeId: string | number): number | undefined; + getCommunities(): Map; + quality(): number; + toJSON(): { + membership: Record; + meta: { levels: number; quality: number; options: DetectClustersOptions }; + }; +} + +// Typed array safe-access helpers (see adapter.ts for rationale) +function fget(a: Float64Array, i: number): number { + return a[i] as number; +} +function iget(a: Int32Array, i: number): number { + return a[i] as number; +} + +/** + * Detect communities in a CodeGraph using the Leiden algorithm. + * + * Note on `quality()`: For modularity, `quality()` always evaluates at gamma=1.0 + * (standard Newman-Girvan modularity) regardless of the `resolution` used during + * optimization. This makes quality values comparable across runs with different + * resolutions. For CPM, `quality()` uses the caller-specified resolution since gamma + * is intrinsic to the CPM metric. Do not use modularity `quality()` values to + * compare partitions found at different resolutions — they reflect Q at gamma=1.0, + * not the objective that was actually optimized. + */ +export function detectClusters( + graph: CodeGraph, + options: DetectClustersOptions = {}, +): DetectClustersResult { + const { levels, originalToCurrent, originalNodeIds, baseGraph } = runLouvainUndirectedModularity( + graph, + options, + ); + + const idToClass = new Map(); + for (let i = 0; i < originalNodeIds.length; i++) { + const comm: number = iget(originalToCurrent, i); + idToClass.set(originalNodeIds[i]!, comm); + } + + return { + getClass(nodeId: string | number): number | undefined { + return idToClass.get(String(nodeId)); + }, + getCommunities(): Map { + const out = new Map(); + for (const [id, c] of idToClass) { + if (!out.has(c)) out.set(c, []); + out.get(c)!.push(id); + } + return out; + }, + quality(): number { + // Compute quality on the original (level-0) graph with the final + // partition mapped back. Computing on the last coarse-level graph + // produces inflated values because the modularity null model depends + // on the degree distribution, which changes after coarsening. + const part = buildOriginalPartition(baseGraph, originalToCurrent); + const q: string = (options.quality || 'modularity').toLowerCase(); + if (q === 'cpm') { + const gamma: number = typeof options.resolution === 'number' ? options.resolution : 1.0; + return qualityCPM(part, baseGraph, gamma); + } + // Always evaluate at gamma=1.0 for standard Newman-Girvan modularity + return qualityModularity(part, baseGraph, 1.0); + }, + toJSON() { + const membershipObj: Record = {}; + for (const [id, c] of idToClass) membershipObj[id] = c; + return { + membership: membershipObj, + meta: { levels: levels.length, quality: this.quality(), options }, + }; + }, + }; +} + +/** + * Minimal partition-like object built from the original graph and the + * final community mapping, suitable for qualityModularity / qualityCPM. + * + * Implements the subset of PartitionView needed by the quality functions + * (no scratch-space methods needed since this is read-only evaluation). + */ +interface OriginalPartition { + communityCount: number; + nodeCommunity: Int32Array; + communityInternalEdgeWeight: Float64Array; + communityTotalStrength: Float64Array; + communityTotalOutStrength: Float64Array; + communityTotalInStrength: Float64Array; + communityTotalSize: Float64Array; + // Stub methods required by PartitionView but not called by qualityModularity/qualityCPM + getNeighborEdgeWeightToCommunity(c: number): number; + getOutEdgeWeightToCommunity(c: number): number; + getInEdgeWeightFromCommunity(c: number): number; +} + +function buildOriginalPartition(g: GraphAdapter, communityMap: Int32Array): OriginalPartition { + const n: number = g.n; + let maxC: number = 0; + for (let i = 0; i < n; i++) { + const ci = iget(communityMap, i); + if (ci > maxC) maxC = ci; + } + const cc: number = maxC + 1; + + const nodeCommunity = communityMap; + const internalWeight = new Float64Array(cc); + const totalStr = new Float64Array(cc); + const totalOutStr = new Float64Array(cc); + const totalInStr = new Float64Array(cc); + const totalSize = new Float64Array(cc); + + for (let i = 0; i < n; i++) { + const c: number = iget(communityMap, i); + totalSize[c] = fget(totalSize, c) + fget(g.size, i); + if (g.directed) { + totalOutStr[c] = fget(totalOutStr, c) + fget(g.strengthOut, i); + totalInStr[c] = fget(totalInStr, c) + fget(g.strengthIn, i); + } else { + totalStr[c] = fget(totalStr, c) + fget(g.strengthOut, i); + } + if (fget(g.selfLoop, i)) internalWeight[c] = fget(internalWeight, c) + fget(g.selfLoop, i); + } + + if (g.directed) { + for (let i = 0; i < n; i++) { + const ci: number = iget(communityMap, i); + const list = g.outEdges[i]!; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]!; + if (i === j) continue; + if (ci === iget(communityMap, j)) internalWeight[ci] = fget(internalWeight, ci) + w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci: number = iget(communityMap, i); + const list = g.outEdges[i]!; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]!; + if (j <= i) continue; + if (ci === iget(communityMap, j)) internalWeight[ci] = fget(internalWeight, ci) + w; + } + } + } + + return { + communityCount: cc, + nodeCommunity, + communityInternalEdgeWeight: internalWeight, + communityTotalStrength: totalStr, + communityTotalOutStrength: totalOutStr, + communityTotalInStrength: totalInStr, + communityTotalSize: totalSize, + // Stubs — quality functions only read the aggregate arrays, not these methods + getNeighborEdgeWeightToCommunity: () => 0, + getOutEdgeWeightToCommunity: () => 0, + getInEdgeWeightFromCommunity: () => 0, + }; +} diff --git a/src/graph/algorithms/leiden/modularity.js b/src/graph/algorithms/leiden/modularity.js deleted file mode 100644 index 15a5caf0..00000000 --- a/src/graph/algorithms/leiden/modularity.js +++ /dev/null @@ -1,71 +0,0 @@ -/** - * Modularity quality functions. - * Vendored from ngraph.leiden (MIT) — no external dependencies. - */ - -export function diffModularity(part, g, v, c, gamma = 1.0) { - if (g.directed) return diffModularityDirected(part, g, v, c, gamma); - const oldC = part.nodeCommunity[v]; - if (c === oldC) return 0; - const k_v = g.strengthOut[v]; - const m2 = g.totalWeight; - const k_v_in_new = part.getNeighborEdgeWeightToCommunity(c) || 0; - const k_v_in_old = part.getNeighborEdgeWeightToCommunity(oldC) || 0; - const wTot_new = c < part.communityTotalStrength.length ? part.communityTotalStrength[c] : 0; - const wTot_old = part.communityTotalStrength[oldC]; - const gain_remove = -(k_v_in_old / m2 - (gamma * (k_v * wTot_old)) / (m2 * m2)); - const gain_add = k_v_in_new / m2 - (gamma * (k_v * wTot_new)) / (m2 * m2); - return gain_remove + gain_add; -} - -export function diffModularityDirected(part, g, v, c, gamma = 1.0) { - const oldC = part.nodeCommunity[v]; - if (c === oldC) return 0; - const m = g.totalWeight; - const k_out = g.strengthOut[v]; - const k_in = g.strengthIn[v]; - const w_new_in = c < g.n ? part.getInEdgeWeightFromCommunity(c) || 0 : 0; - const w_new_out = c < g.n ? part.getOutEdgeWeightToCommunity(c) || 0 : 0; - const w_old_in = part.getInEdgeWeightFromCommunity(oldC) || 0; - const w_old_out = part.getOutEdgeWeightToCommunity(oldC) || 0; - const T_new = c < part.communityTotalInStrength.length ? part.communityTotalInStrength[c] : 0; - const F_new = c < part.communityTotalOutStrength.length ? part.communityTotalOutStrength[c] : 0; - const T_old = part.communityTotalInStrength[oldC]; - const F_old = part.communityTotalOutStrength[oldC]; - // Self-loop correction: the self-loop edge (v→v) appears in both - // outEdgeWeightToCommunity[oldC] and inEdgeWeightFromCommunity[oldC], - // making w_old include 2×selfLoop. Since the self-loop moves with the - // node, add it back to match moveNodeToCommunity's directed accounting. - const selfW = g.selfLoop[v] || 0; - const deltaInternal = (w_new_in + w_new_out - w_old_in - w_old_out + 2 * selfW) / m; - // The full Δ(F·T) expansion includes a constant 2·k_out·k_in term that - // doesn't depend on the target community but does affect the move-vs-stay - // decision. Without it, coarse-level merges can appear profitable when - // they actually decrease quality. - const deltaExpected = - (gamma * (k_out * (T_new - T_old) + k_in * (F_new - F_old) + 2 * k_out * k_in)) / (m * m); - return deltaInternal - deltaExpected; -} - -export function qualityModularity(part, g, gamma = 1.0) { - const m2 = g.totalWeight; - let sum = 0; - if (g.directed) { - for (let c = 0; c < part.communityCount; c++) - sum += - part.communityInternalEdgeWeight[c] / m2 - - (gamma * (part.communityTotalOutStrength[c] * part.communityTotalInStrength[c])) / - (m2 * m2); - } else { - // communityInternalEdgeWeight counts each undirected edge once (j > i), - // but m2 = totalWeight = 2m (sum of symmetrized degrees). The standard - // Newman-Girvan formula is Q = Σ_c [2·L_c/(2m) - γ·(d_c/(2m))²], so - // we multiply lc by 2 to match. - for (let c = 0; c < part.communityCount; c++) { - const lc = part.communityInternalEdgeWeight[c]; - const dc = part.communityTotalStrength[c]; - sum += (2 * lc) / m2 - (gamma * (dc * dc)) / (m2 * m2); - } - } - return sum; -} diff --git a/src/graph/algorithms/leiden/modularity.ts b/src/graph/algorithms/leiden/modularity.ts new file mode 100644 index 00000000..98a9a038 --- /dev/null +++ b/src/graph/algorithms/leiden/modularity.ts @@ -0,0 +1,122 @@ +/** + * Modularity quality functions. + * Vendored from ngraph.leiden (MIT) — no external dependencies. + */ + +/** + * Minimal view of a partition needed by modularity quality functions. + */ +export interface PartitionView { + readonly communityCount: number; + nodeCommunity: Int32Array; + readonly communityInternalEdgeWeight: Float64Array; + readonly communityTotalStrength: Float64Array; + readonly communityTotalOutStrength: Float64Array; + readonly communityTotalInStrength: Float64Array; + getNeighborEdgeWeightToCommunity(c: number): number; + getOutEdgeWeightToCommunity(c: number): number; + getInEdgeWeightFromCommunity(c: number): number; +} + +/** + * Minimal view of a graph needed by modularity quality functions. + */ +export interface GraphView { + n: number; + directed: boolean; + totalWeight: number; + strengthOut: Float64Array; + strengthIn: Float64Array; + selfLoop: Float64Array; +} + +// Typed array safe-access helper (see adapter.ts for rationale) +function fget(a: Float64Array, i: number): number { + return a[i] as number; +} +function iget(a: Int32Array, i: number): number { + return a[i] as number; +} + +export function diffModularity( + part: PartitionView, + g: GraphView, + v: number, + c: number, + gamma: number = 1.0, +): number { + if (g.directed) return diffModularityDirected(part, g, v, c, gamma); + const oldC: number = iget(part.nodeCommunity, v); + if (c === oldC) return 0; + const k_v: number = fget(g.strengthOut, v); + const m2: number = g.totalWeight; + const k_v_in_new: number = part.getNeighborEdgeWeightToCommunity(c) || 0; + const k_v_in_old: number = part.getNeighborEdgeWeightToCommunity(oldC) || 0; + const wTot_new: number = + c < part.communityTotalStrength.length ? fget(part.communityTotalStrength, c) : 0; + const wTot_old: number = fget(part.communityTotalStrength, oldC); + const gain_remove: number = -(k_v_in_old / m2 - (gamma * (k_v * wTot_old)) / (m2 * m2)); + const gain_add: number = k_v_in_new / m2 - (gamma * (k_v * wTot_new)) / (m2 * m2); + return gain_remove + gain_add; +} + +export function diffModularityDirected( + part: PartitionView, + g: GraphView, + v: number, + c: number, + gamma: number = 1.0, +): number { + const oldC: number = iget(part.nodeCommunity, v); + if (c === oldC) return 0; + const m: number = g.totalWeight; + const k_out: number = fget(g.strengthOut, v); + const k_in: number = fget(g.strengthIn, v); + const w_new_in: number = c < g.n ? part.getInEdgeWeightFromCommunity(c) || 0 : 0; + const w_new_out: number = c < g.n ? part.getOutEdgeWeightToCommunity(c) || 0 : 0; + const w_old_in: number = part.getInEdgeWeightFromCommunity(oldC) || 0; + const w_old_out: number = part.getOutEdgeWeightToCommunity(oldC) || 0; + const T_new: number = + c < part.communityTotalInStrength.length ? fget(part.communityTotalInStrength, c) : 0; + const F_new: number = + c < part.communityTotalOutStrength.length ? fget(part.communityTotalOutStrength, c) : 0; + const T_old: number = fget(part.communityTotalInStrength, oldC); + const F_old: number = fget(part.communityTotalOutStrength, oldC); + // Self-loop correction: the self-loop edge (v->v) appears in both + // outEdgeWeightToCommunity[oldC] and inEdgeWeightFromCommunity[oldC], + // making w_old include 2x selfLoop. Since the self-loop moves with the + // node, add it back to match moveNodeToCommunity's directed accounting. + const selfW: number = fget(g.selfLoop, v) || 0; + const deltaInternal: number = (w_new_in + w_new_out - w_old_in - w_old_out + 2 * selfW) / m; + // The full delta(F*T) expansion includes a constant 2*k_out*k_in term that + // doesn't depend on the target community but does affect the move-vs-stay + // decision. Without it, coarse-level merges can appear profitable when + // they actually decrease quality. + const deltaExpected: number = + (gamma * (k_out * (T_new - T_old) + k_in * (F_new - F_old) + 2 * k_out * k_in)) / (m * m); + return deltaInternal - deltaExpected; +} + +export function qualityModularity(part: PartitionView, g: GraphView, gamma: number = 1.0): number { + const m2: number = g.totalWeight; + let sum: number = 0; + if (g.directed) { + for (let c = 0; c < part.communityCount; c++) + sum += + fget(part.communityInternalEdgeWeight, c) / m2 - + (gamma * + (fget(part.communityTotalOutStrength, c) * fget(part.communityTotalInStrength, c))) / + (m2 * m2); + } else { + // communityInternalEdgeWeight counts each undirected edge once (j > i), + // but m2 = totalWeight = 2m (sum of symmetrized degrees). The standard + // Newman-Girvan formula is Q = sum_c [2*L_c/(2m) - gamma*(d_c/(2m))^2], so + // we multiply lc by 2 to match. + for (let c = 0; c < part.communityCount; c++) { + const lc: number = fget(part.communityInternalEdgeWeight, c); + const dc: number = fget(part.communityTotalStrength, c); + sum += (2 * lc) / m2 - (gamma * (dc * dc)) / (m2 * m2); + } + } + return sum; +} diff --git a/src/graph/algorithms/leiden/optimiser.js b/src/graph/algorithms/leiden/optimiser.ts similarity index 53% rename from src/graph/algorithms/leiden/optimiser.js rename to src/graph/algorithms/leiden/optimiser.ts index 52a5a732..3531709f 100644 --- a/src/graph/algorithms/leiden/optimiser.js +++ b/src/graph/algorithms/leiden/optimiser.ts @@ -4,44 +4,100 @@ */ import { CodeGraph } from '../../model.js'; +import type { EdgeEntry, GraphAdapter, GraphAdapterOptions, InEdgeEntry } from './adapter.js'; import { makeGraphAdapter } from './adapter.js'; import { diffCPM } from './cpm.js'; import { diffModularity } from './modularity.js'; +import type { Partition } from './partition.js'; import { makePartition } from './partition.js'; import { createRng } from './rng.js'; // Mirrored in DEFAULTS.community (src/infrastructure/config.js) for user override -// via .codegraphrc.json. Callers (e.g. louvain.js) can pass overrides through options. -const DEFAULT_MAX_LEVELS = 50; -const DEFAULT_MAX_LOCAL_PASSES = 20; -const GAIN_EPSILON = 1e-12; +// via .codegraphrc.json. Callers (e.g. louvain.ts) can pass overrides through options. +const DEFAULT_MAX_LEVELS: number = 50; +const DEFAULT_MAX_LOCAL_PASSES: number = 20; +const GAIN_EPSILON: number = 1e-12; const CandidateStrategy = { Neighbors: 0, All: 1, RandomAny: 2, RandomNeighbor: 3, -}; +} as const; + +type CandidateStrategyCode = (typeof CandidateStrategy)[keyof typeof CandidateStrategy]; + +export interface LeidenOptions { + directed?: boolean; + randomSeed?: number; + maxLevels?: number; + maxLocalPasses?: number; + allowNewCommunity?: boolean; + candidateStrategy?: 'neighbors' | 'all' | 'random' | 'random-neighbor'; + quality?: string; + resolution?: number; + refine?: boolean; + preserveLabels?: boolean | Map; + maxCommunitySize?: number; + refinementTheta?: number; + fixedNodes?: Set | string[]; + linkWeight?: GraphAdapterOptions['linkWeight']; + nodeSize?: GraphAdapterOptions['nodeSize']; + baseNodeIds?: string[]; +} + +export interface NormalizedOptions { + directed: boolean; + randomSeed: number; + maxLevels: number; + maxLocalPasses: number; + allowNewCommunity: boolean; + candidateStrategyCode: CandidateStrategyCode; + quality: string; + resolution: number; + refine: boolean; + preserveLabels: boolean | Map | undefined; + maxCommunitySize: number; + refinementTheta: number; + fixedNodes: Set | string[] | undefined; +} + +export interface LevelEntry { + graph: GraphAdapter; + partition: Partition; +} -export function runLouvainUndirectedModularity(graph, optionsInput = {}) { - const options = normalizeOptions(optionsInput); - let currentGraph = graph; - const levels = []; +export interface LouvainResult { + graph: GraphAdapter; + partition: Partition; + levels: LevelEntry[]; + originalToCurrent: Int32Array; + originalNodeIds: string[]; + baseGraph: GraphAdapter; +} + +export function runLouvainUndirectedModularity( + graph: CodeGraph, + optionsInput: LeidenOptions = {}, +): LouvainResult { + const options: NormalizedOptions = normalizeOptions(optionsInput); + let currentGraph: CodeGraph = graph; + const levels: LevelEntry[] = []; const rngSource = createRng(options.randomSeed); - const random = () => rngSource.nextDouble(); + const random: () => number = () => rngSource.nextDouble(); - const baseGraphAdapter = makeGraphAdapter(currentGraph, { + const baseGraphAdapter: GraphAdapter = makeGraphAdapter(currentGraph, { directed: options.directed, ...optionsInput, }); - const origN = baseGraphAdapter.n; + const origN: number = baseGraphAdapter.n; const originalToCurrent = new Int32Array(origN); for (let i = 0; i < origN; i++) originalToCurrent[i] = i; - let fixedNodeMask = null; + let fixedNodeMask: Uint8Array | null = null; if (options.fixedNodes) { const fixed = new Uint8Array(origN); - const asSet = + const asSet: Set = options.fixedNodes instanceof Set ? options.fixedNodes : new Set(options.fixedNodes); for (const id of asSet) { const idx = baseGraphAdapter.idToIndex.get(String(id)); @@ -51,75 +107,77 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { } for (let level = 0; level < options.maxLevels; level++) { - const graphAdapter = + const graphAdapter: GraphAdapter = level === 0 ? baseGraphAdapter : makeGraphAdapter(currentGraph, { directed: options.directed, ...optionsInput }); - const partition = makePartition(graphAdapter); + const partition: Partition = makePartition(graphAdapter); partition.graph = graphAdapter; partition.initializeAggregates(); const order = new Int32Array(graphAdapter.n); for (let i = 0; i < graphAdapter.n; i++) order[i] = i; - let improved = true; - let localPasses = 0; - const strategyCode = options.candidateStrategyCode; + let improved: boolean = true; + let localPasses: number = 0; + const strategyCode: CandidateStrategyCode = options.candidateStrategyCode; while (improved) { improved = false; localPasses++; shuffleArrayInPlace(order, random); for (let idx = 0; idx < order.length; idx++) { - const nodeIndex = order[idx]; + const nodeIndex: number = order[idx]!; if (level === 0 && fixedNodeMask && fixedNodeMask[nodeIndex]) continue; - const candidateCount = partition.accumulateNeighborCommunityEdgeWeights(nodeIndex); - let bestCommunityId = partition.nodeCommunity[nodeIndex]; - let bestGain = 0; - const maxCommunitySize = options.maxCommunitySize; + const candidateCount: number = partition.accumulateNeighborCommunityEdgeWeights(nodeIndex); + let bestCommunityId: number = partition.nodeCommunity[nodeIndex]!; + let bestGain: number = 0; + const maxCommunitySize: number = options.maxCommunitySize; if (strategyCode === CandidateStrategy.All) { for (let communityId = 0; communityId < partition.communityCount; communityId++) { - if (communityId === partition.nodeCommunity[nodeIndex]) continue; + if (communityId === partition.nodeCommunity[nodeIndex]!) continue; if ( maxCommunitySize < Infinity && - partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex] > + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]! > maxCommunitySize ) continue; - const gain = computeQualityGain(partition, nodeIndex, communityId, options); + const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); if (gain > bestGain) { bestGain = gain; bestCommunityId = communityId; } } } else if (strategyCode === CandidateStrategy.RandomAny) { - const tries = Math.min(10, Math.max(1, partition.communityCount)); + const tries: number = Math.min(10, Math.max(1, partition.communityCount)); for (let trialIndex = 0; trialIndex < tries; trialIndex++) { - const communityId = (random() * partition.communityCount) | 0; - if (communityId === partition.nodeCommunity[nodeIndex]) continue; + const communityId: number = (random() * partition.communityCount) | 0; + if (communityId === partition.nodeCommunity[nodeIndex]!) continue; if ( maxCommunitySize < Infinity && - partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex] > + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]! > maxCommunitySize ) continue; - const gain = computeQualityGain(partition, nodeIndex, communityId, options); + const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); if (gain > bestGain) { bestGain = gain; bestCommunityId = communityId; } } } else if (strategyCode === CandidateStrategy.RandomNeighbor) { - const tries = Math.min(10, Math.max(1, candidateCount)); + const tries: number = Math.min(10, Math.max(1, candidateCount)); for (let trialIndex = 0; trialIndex < tries; trialIndex++) { - const communityId = partition.getCandidateCommunityAt((random() * candidateCount) | 0); - if (communityId === partition.nodeCommunity[nodeIndex]) continue; + const communityId: number = partition.getCandidateCommunityAt( + (random() * candidateCount) | 0, + ); + if (communityId === partition.nodeCommunity[nodeIndex]!) continue; if ( maxCommunitySize < Infinity && - partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex] > + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]! > maxCommunitySize ) continue; - const gain = computeQualityGain(partition, nodeIndex, communityId, options); + const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); if (gain > bestGain) { bestGain = gain; bestCommunityId = communityId; @@ -127,13 +185,13 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { } } else { for (let trialIndex = 0; trialIndex < candidateCount; trialIndex++) { - const communityId = partition.getCandidateCommunityAt(trialIndex); + const communityId: number = partition.getCandidateCommunityAt(trialIndex); if (maxCommunitySize < Infinity) { - const nextSize = - partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]; + const nextSize: number = + partition.getCommunityTotalSize(communityId) + graphAdapter.size[nodeIndex]!; if (nextSize > maxCommunitySize) continue; } - const gain = computeQualityGain(partition, nodeIndex, communityId, options); + const gain: number = computeQualityGain(partition, nodeIndex, communityId, options); if (gain > bestGain) { bestGain = gain; bestCommunityId = communityId; @@ -141,14 +199,14 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { } } if (options.allowNewCommunity) { - const newCommunityId = partition.communityCount; - const gain = computeQualityGain(partition, nodeIndex, newCommunityId, options); + const newCommunityId: number = partition.communityCount; + const gain: number = computeQualityGain(partition, nodeIndex, newCommunityId, options); if (gain > bestGain) { bestGain = gain; bestCommunityId = newCommunityId; } } - if (bestCommunityId !== partition.nodeCommunity[nodeIndex] && bestGain > GAIN_EPSILON) { + if (bestCommunityId !== partition.nodeCommunity[nodeIndex]! && bestGain > GAIN_EPSILON) { partition.moveNodeToCommunity(nodeIndex, bestCommunityId); improved = true; } @@ -158,9 +216,9 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { renumberCommunities(partition, options.preserveLabels); - let effectivePartition = partition; + let effectivePartition: Partition = partition; if (options.refine) { - const refined = refineWithinCoarseCommunities( + const refined: Partition = refineWithinCoarseCommunities( graphAdapter, partition, random, @@ -169,7 +227,7 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { ); // Post-refinement: split any disconnected communities into their // connected components. This is the cheap O(V+E) alternative to - // checking γ-connectedness on every candidate during refinement. + // checking gamma-connectedness on every candidate during refinement. // A disconnected community violates even basic connectivity, so // splitting is always correct. splitDisconnectedCommunities(graphAdapter, refined); @@ -178,15 +236,15 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { } levels.push({ graph: graphAdapter, partition: effectivePartition }); - const fineToCoarse = effectivePartition.nodeCommunity; + const fineToCoarse: Int32Array = effectivePartition.nodeCommunity; for (let i = 0; i < originalToCurrent.length; i++) { - originalToCurrent[i] = fineToCoarse[originalToCurrent[i]]; + originalToCurrent[i] = fineToCoarse[originalToCurrent[i]!]!; } - // Terminate when no further coarsening is possible. Check both the + // Terminate when no further coarsening is possible. Check both the // move-phase partition (did the greedy phase find merges?) and the // effective partition that feeds buildCoarseGraph (would coarsening - // actually reduce the graph?). When refine is enabled the refined + // actually reduce the graph?). When refine is enabled the refined // partition starts from singletons and may have more communities than // the move phase found, so checking only effectivePartition would // cause premature termination. @@ -198,7 +256,7 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { currentGraph = buildCoarseGraph(graphAdapter, effectivePartition); } - const last = levels[levels.length - 1]; + const last: LevelEntry = levels[levels.length - 1]!; return { graph: last.graph, partition: last.partition, @@ -209,39 +267,35 @@ export function runLouvainUndirectedModularity(graph, optionsInput = {}) { }; } -/** - * Build a coarse graph where each community becomes a node. - * Uses CodeGraph instead of ngraph.graph. - */ // Build a coarse graph where each community becomes a single node. // Self-loops (g.selfLoop[]) don't need separate handling here because they // are already present in g.outEdges (directed path keeps them in both arrays). // When the coarse graph is fed back to makeGraphAdapter at the next level, // the adapter re-detects cu===cu edges as self-loops and populates selfLoop[]. -function buildCoarseGraph(g, p) { +function buildCoarseGraph(g: GraphAdapter, p: Partition): CodeGraph { const coarse = new CodeGraph({ directed: g.directed }); for (let c = 0; c < p.communityCount; c++) { - coarse.addNode(String(c), { size: p.communityTotalSize[c] }); + coarse.addNode(String(c), { size: p.communityTotalSize[c]! }); } - const acc = new Map(); + const acc = new Map(); for (let i = 0; i < g.n; i++) { - const cu = p.nodeCommunity[i]; - const list = g.outEdges[i]; + const cu: number = p.nodeCommunity[i]!; + const list: EdgeEntry[] = g.outEdges[i]!; for (let k = 0; k < list.length; k++) { - const j = list[k].to; - const w = list[k].w; - const cv = p.nodeCommunity[j]; + const j: number = list[k]!.to; + const w: number = list[k]!.w; + const cv: number = p.nodeCommunity[j]!; // Undirected: each non-self edge (i,j) appears in both outEdges[i] and // outEdges[j]. For intra-community edges (cu===cv), skip the reverse to - // avoid inflating the coarse self-loop weight by 2×. + // avoid inflating the coarse self-loop weight by 2x. if (!g.directed && cu === cv && j < i) continue; const key = `${cu}:${cv}`; acc.set(key, (acc.get(key) || 0) + w); } } for (const [key, w] of acc.entries()) { - const [cuStr, cvStr] = key.split(':'); - coarse.addEdge(cuStr, cvStr, { weight: w }); + const parts = key.split(':'); + coarse.addEdge(parts[0]!, parts[1]!, { weight: w }); } return coarse; } @@ -251,32 +305,38 @@ function buildCoarseGraph(g, p) { * * Key properties that distinguish this from Louvain-style refinement: * - * 1. **Singleton start** — each node begins in its own community. - * 2. **Singleton guard** — only nodes still in singleton communities are + * 1. Singleton start — each node begins in its own community. + * 2. Singleton guard — only nodes still in singleton communities are * considered for merging. Once a node joins a non-singleton community * it is locked for the remainder of the pass. This prevents oscillation - * and is essential for the γ-connectedness guarantee. - * 3. **Single pass** — one randomized sweep through all nodes, not an + * and is essential for the gamma-connectedness guarantee. + * 3. Single pass — one randomized sweep through all nodes, not an * iterative loop until convergence (that would be Louvain behavior). - * 4. **Probabilistic selection** — candidate communities are sampled from - * a Boltzmann distribution `p(v, C) ∝ exp(ΔH / θ)`, with the "stay - * as singleton" option (ΔH = 0) included in the distribution. This - * means a node may probabilistically choose to remain alone even when - * positive-gain merges exist. + * 4. Probabilistic selection — candidate communities are sampled from + * a Boltzmann distribution p(v, C) proportional to exp(deltaH / theta), + * with the "stay as singleton" option (deltaH = 0) included in the + * distribution. This means a node may probabilistically choose to remain + * alone even when positive-gain merges exist. * - * θ (refinementTheta) controls temperature: lower → more deterministic - * (approaches greedy), higher → more exploratory. Determinism is preserved + * theta (refinementTheta) controls temperature: lower = more deterministic + * (approaches greedy), higher = more exploratory. Determinism is preserved * via the seeded PRNG — same seed produces the same assignments. */ -function refineWithinCoarseCommunities(g, basePart, rng, opts, fixedMask0) { - const p = makePartition(g); +function refineWithinCoarseCommunities( + g: GraphAdapter, + basePart: Partition, + rng: () => number, + opts: NormalizedOptions, + fixedMask0: Uint8Array | null, +): Partition { + const p: Partition = makePartition(g); p.initializeAggregates(); p.graph = g; - const macro = basePart.nodeCommunity; + const macro: Int32Array = basePart.nodeCommunity; const commMacro = new Int32Array(p.communityCount); - for (let i = 0; i < p.communityCount; i++) commMacro[i] = macro[i]; + for (let i = 0; i < p.communityCount; i++) commMacro[i] = macro[i]!; - const theta = typeof opts.refinementTheta === 'number' ? opts.refinementTheta : 1.0; + const theta: number = typeof opts.refinementTheta === 'number' ? opts.refinementTheta : 1.0; if (theta <= 0) throw new RangeError(`refinementTheta must be > 0 (got ${theta})`); // Single pass in random order (Algorithm 3, step 2). @@ -291,27 +351,29 @@ function refineWithinCoarseCommunities(g, basePart, rng, opts, fixedMask0) { const candWeight = new Float64Array(g.n); for (let idx = 0; idx < order.length; idx++) { - const v = order[idx]; + const v: number = order[idx]!; if (fixedMask0?.[v]) continue; // Singleton guard: only move nodes still alone in their community. - if (p.getCommunityNodeCount(p.nodeCommunity[v]) > 1) continue; + if (p.getCommunityNodeCount(p.nodeCommunity[v]!) > 1) continue; - const macroV = macro[v]; - const touchedCount = p.accumulateNeighborCommunityEdgeWeights(v); - const maxSize = Number.isFinite(opts.maxCommunitySize) ? opts.maxCommunitySize : Infinity; + const macroV: number = macro[v]!; + const touchedCount: number = p.accumulateNeighborCommunityEdgeWeights(v); + const maxSize: number = Number.isFinite(opts.maxCommunitySize) + ? opts.maxCommunitySize + : Infinity; // Collect eligible communities and their quality gains. - let candLen = 0; + let candLen: number = 0; for (let t = 0; t < touchedCount; t++) { - const c = p.getCandidateCommunityAt(t); - if (c === p.nodeCommunity[v]) continue; - if (commMacro[c] !== macroV) continue; + const c: number = p.getCandidateCommunityAt(t); + if (c === p.nodeCommunity[v]!) continue; + if (commMacro[c]! !== macroV) continue; if (maxSize < Infinity) { - const nextSize = p.getCommunityTotalSize(c) + g.size[v]; + const nextSize: number = p.getCommunityTotalSize(c) + g.size[v]!; if (nextSize > maxSize) continue; } - const gain = computeQualityGain(p, v, c, opts); + const gain: number = computeQualityGain(p, v, c, opts); if (gain > GAIN_EPSILON) { candC[candLen] = c; candGain[candLen] = gain; @@ -321,30 +383,30 @@ function refineWithinCoarseCommunities(g, basePart, rng, opts, fixedMask0) { if (candLen === 0) continue; - // Probabilistic selection: p(v, C) ∝ exp(ΔH / θ), with the "stay" - // option (ΔH = 0) included per Algorithm 3. + // Probabilistic selection: p(v, C) proportional to exp(deltaH / theta), + // with the "stay" option (deltaH = 0) included per Algorithm 3. // For numerical stability, subtract the max gain before exponentiation. - let maxGain = 0; + let maxGain: number = 0; for (let i = 0; i < candLen; i++) { - if (candGain[i] > maxGain) maxGain = candGain[i]; + if (candGain[i]! > maxGain) maxGain = candGain[i]!; } // "Stay as singleton" weight: exp((0 - maxGain) / theta) - const stayWeight = Math.exp((0 - maxGain) / theta); - let totalWeight = stayWeight; + const stayWeight: number = Math.exp((0 - maxGain) / theta); + let totalWeight: number = stayWeight; for (let i = 0; i < candLen; i++) { - candWeight[i] = Math.exp((candGain[i] - maxGain) / theta); - totalWeight += candWeight[i]; + candWeight[i] = Math.exp((candGain[i]! - maxGain) / theta); + totalWeight += candWeight[i]!; } - const r = rng() * totalWeight; + const r: number = rng() * totalWeight; if (r < stayWeight) continue; // node stays as singleton - let cumulative = stayWeight; - let chosenC = candC[candLen - 1]; // fallback + let cumulative: number = stayWeight; + let chosenC: number = candC[candLen - 1]!; // fallback for (let i = 0; i < candLen; i++) { - cumulative += candWeight[i]; + cumulative += candWeight[i]!; if (r < cumulative) { - chosenC = candC[i]; + chosenC = candC[i]!; break; } } @@ -355,59 +417,59 @@ function refineWithinCoarseCommunities(g, basePart, rng, opts, fixedMask0) { } /** - * Post-refinement connectivity check. For each community, run a BFS on + * Post-refinement connectivity check. For each community, run a BFS on * the subgraph induced by its members (using the adapter's outEdges). * If a community has multiple connected components, assign secondary * components to new community IDs, then reinitialize aggregates once. * * O(V+E) total since communities partition V. * - * This replaces the per-candidate γ-connectedness check from the paper + * This replaces the per-candidate gamma-connectedness check from the paper * with a cheaper post-step that catches the most important violation * (disconnected subcommunities). */ -function splitDisconnectedCommunities(g, partition) { - const n = g.n; - const nc = partition.nodeCommunity; - const members = partition.getCommunityMembers(); - let nextC = partition.communityCount; - let didSplit = false; +function splitDisconnectedCommunities(g: GraphAdapter, partition: Partition): void { + const n: number = g.n; + const nc: Int32Array = partition.nodeCommunity; + const members: number[][] = partition.getCommunityMembers(); + let nextC: number = partition.communityCount; + let didSplit: boolean = false; const visited = new Uint8Array(n); const inCommunity = new Uint8Array(n); for (let c = 0; c < members.length; c++) { - const nodes = members[c]; + const nodes: number[] = members[c]!; if (nodes.length <= 1) continue; - for (let i = 0; i < nodes.length; i++) inCommunity[nodes[i]] = 1; + for (let i = 0; i < nodes.length; i++) inCommunity[nodes[i]!] = 1; - let componentCount = 0; + let componentCount: number = 0; for (let i = 0; i < nodes.length; i++) { - const start = nodes[i]; + const start: number = nodes[i]!; if (visited[start]) continue; componentCount++; // BFS within the community subgraph. // For directed graphs, traverse both outEdges and inEdges to check // weak connectivity (reachability ignoring edge direction). - const queue = [start]; + const queue: number[] = [start]; visited[start] = 1; - let head = 0; + let head: number = 0; while (head < queue.length) { - const v = queue[head++]; - const out = g.outEdges[v]; + const v: number = queue[head++]!; + const out: EdgeEntry[] = g.outEdges[v]!; for (let k = 0; k < out.length; k++) { - const w = out[k].to; + const w: number = out[k]!.to; if (inCommunity[w] && !visited[w]) { visited[w] = 1; queue.push(w); } } if (g.directed && g.inEdges) { - const inc = g.inEdges[v]; + const inc: InEdgeEntry[] = g.inEdges[v]!; for (let k = 0; k < inc.length; k++) { - const w = inc[k].from; + const w: number = inc[k]!.from; if (inCommunity[w] && !visited[w]) { visited[w] = 1; queue.push(w); @@ -418,15 +480,15 @@ function splitDisconnectedCommunities(g, partition) { if (componentCount > 1) { // Secondary component — assign new community ID directly. - const newC = nextC++; - for (let q = 0; q < queue.length; q++) nc[queue[q]] = newC; + const newC: number = nextC++; + for (let q = 0; q < queue.length; q++) nc[queue[q]!] = newC; didSplit = true; } } for (let i = 0; i < nodes.length; i++) { - inCommunity[nodes[i]] = 0; - visited[nodes[i]] = 0; + inCommunity[nodes[i]!] = 0; + visited[nodes[i]!] = 0; } } @@ -438,27 +500,32 @@ function splitDisconnectedCommunities(g, partition) { } } -function computeQualityGain(partition, v, c, opts) { - const quality = (opts.quality || 'modularity').toLowerCase(); - const gamma = typeof opts.resolution === 'number' ? opts.resolution : 1.0; +function computeQualityGain( + partition: Partition, + v: number, + c: number, + opts: NormalizedOptions, +): number { + const quality: string = (opts.quality || 'modularity').toLowerCase(); + const gamma: number = typeof opts.resolution === 'number' ? opts.resolution : 1.0; if (quality === 'cpm') { - return diffCPM(partition, partition.graph || {}, v, c, gamma); + return diffCPM(partition, partition.graph || ({} as GraphAdapter), v, c, gamma); } // diffModularity dispatches to diffModularityDirected internally when g.directed is true - return diffModularity(partition, partition.graph || {}, v, c, gamma); + return diffModularity(partition, partition.graph || ({} as GraphAdapter), v, c, gamma); } -function shuffleArrayInPlace(arr, rng = Math.random) { +function shuffleArrayInPlace(arr: Int32Array, rng: () => number = Math.random): Int32Array { for (let i = arr.length - 1; i > 0; i--) { - const j = Math.floor(rng() * (i + 1)); - const t = arr[i]; - arr[i] = arr[j]; + const j: number = Math.floor(rng() * (i + 1)); + const t: number = arr[i]!; + arr[i] = arr[j]!; arr[j] = t; } return arr; } -function resolveCandidateStrategy(options) { +function resolveCandidateStrategy(options: LeidenOptions): CandidateStrategyCode { const val = options.candidateStrategy; if (typeof val !== 'string') return CandidateStrategy.Neighbors; switch (val) { @@ -475,23 +542,27 @@ function resolveCandidateStrategy(options) { } } -function normalizeOptions(options = {}) { - const directed = !!options.directed; - const randomSeed = Number.isFinite(options.randomSeed) ? options.randomSeed : 42; - const maxLevels = Number.isFinite(options.maxLevels) ? options.maxLevels : DEFAULT_MAX_LEVELS; - const maxLocalPasses = Number.isFinite(options.maxLocalPasses) - ? options.maxLocalPasses +function normalizeOptions(options: LeidenOptions = {}): NormalizedOptions { + const directed: boolean = !!options.directed; + const randomSeed: number = Number.isFinite(options.randomSeed) + ? (options.randomSeed as number) + : 42; + const maxLevels: number = Number.isFinite(options.maxLevels) + ? (options.maxLevels as number) + : DEFAULT_MAX_LEVELS; + const maxLocalPasses: number = Number.isFinite(options.maxLocalPasses) + ? (options.maxLocalPasses as number) : DEFAULT_MAX_LOCAL_PASSES; - const allowNewCommunity = !!options.allowNewCommunity; - const candidateStrategyCode = resolveCandidateStrategy(options); - const quality = (options.quality || 'modularity').toLowerCase(); - const resolution = typeof options.resolution === 'number' ? options.resolution : 1.0; - const refine = options.refine !== false; + const allowNewCommunity: boolean = !!options.allowNewCommunity; + const candidateStrategyCode: CandidateStrategyCode = resolveCandidateStrategy(options); + const quality: string = (options.quality || 'modularity').toLowerCase(); + const resolution: number = typeof options.resolution === 'number' ? options.resolution : 1.0; + const refine: boolean = options.refine !== false; const preserveLabels = options.preserveLabels; - const maxCommunitySize = Number.isFinite(options.maxCommunitySize) - ? options.maxCommunitySize + const maxCommunitySize: number = Number.isFinite(options.maxCommunitySize) + ? (options.maxCommunitySize as number) : Infinity; - const refinementTheta = + const refinementTheta: number = typeof options.refinementTheta === 'number' ? options.refinementTheta : 1.0; return { directed, @@ -510,7 +581,10 @@ function normalizeOptions(options = {}) { }; } -function renumberCommunities(partition, preserveLabels) { +function renumberCommunities( + partition: Partition, + preserveLabels: boolean | Map | undefined, +): void { if (preserveLabels && preserveLabels instanceof Map) { partition.compactCommunityIds({ preserveMap: preserveLabels }); } else if (preserveLabels === true) { diff --git a/src/graph/algorithms/leiden/partition.js b/src/graph/algorithms/leiden/partition.js deleted file mode 100644 index 0e39c1e3..00000000 --- a/src/graph/algorithms/leiden/partition.js +++ /dev/null @@ -1,407 +0,0 @@ -/** - * Mutable community assignment with per-community aggregates. - * Vendored from ngraph.leiden (MIT) — no external dependencies. - * - * Maintains per-community totals and per-move scratch accumulators so we can - * compute modularity/CPM gains in O(neighborhood) time without rescanning the - * whole graph after each move. - */ - -export function makePartition(graph) { - const n = graph.n; - const nodeCommunity = new Int32Array(n); - for (let i = 0; i < n; i++) nodeCommunity[i] = i; - let communityCount = n; - - let communityTotalSize = new Float64Array(communityCount); - let communityNodeCount = new Int32Array(communityCount); - let communityInternalEdgeWeight = new Float64Array(communityCount); - let communityTotalStrength = new Float64Array(communityCount); - let communityTotalOutStrength = new Float64Array(communityCount); - let communityTotalInStrength = new Float64Array(communityCount); - - const candidateCommunities = new Int32Array(n); - let candidateCommunityCount = 0; - const neighborEdgeWeightToCommunity = new Float64Array(n); - const outEdgeWeightToCommunity = new Float64Array(n); - const inEdgeWeightFromCommunity = new Float64Array(n); - const isCandidateCommunity = new Uint8Array(n); - - function ensureCommCapacity(newCount) { - if (newCount <= communityTotalSize.length) return; - const growTo = Math.max(newCount, Math.ceil(communityTotalSize.length * 1.5)); - communityTotalSize = growFloat(communityTotalSize, growTo); - communityNodeCount = growInt(communityNodeCount, growTo); - communityInternalEdgeWeight = growFloat(communityInternalEdgeWeight, growTo); - communityTotalStrength = growFloat(communityTotalStrength, growTo); - communityTotalOutStrength = growFloat(communityTotalOutStrength, growTo); - communityTotalInStrength = growFloat(communityTotalInStrength, growTo); - } - - function initializeAggregates() { - communityTotalSize.fill(0); - communityNodeCount.fill(0); - communityInternalEdgeWeight.fill(0); - communityTotalStrength.fill(0); - communityTotalOutStrength.fill(0); - communityTotalInStrength.fill(0); - for (let i = 0; i < n; i++) { - const c = nodeCommunity[i]; - communityTotalSize[c] += graph.size[i]; - communityNodeCount[c] += 1; - if (graph.directed) { - communityTotalOutStrength[c] += graph.strengthOut[i]; - communityTotalInStrength[c] += graph.strengthIn[i]; - } else { - communityTotalStrength[c] += graph.strengthOut[i]; - } - if (graph.selfLoop[i] !== 0) communityInternalEdgeWeight[c] += graph.selfLoop[i]; - } - if (graph.directed) { - for (let i = 0; i < n; i++) { - const ci = nodeCommunity[i]; - const neighbors = graph.outEdges[i]; - for (let k = 0; k < neighbors.length; k++) { - const { to: j, w } = neighbors[k]; - if (i === j) continue; // self-loop already counted via graph.selfLoop[i] - if (ci === nodeCommunity[j]) communityInternalEdgeWeight[ci] += w; - } - } - } else { - for (let i = 0; i < n; i++) { - const ci = nodeCommunity[i]; - const neighbors = graph.outEdges[i]; - for (let k = 0; k < neighbors.length; k++) { - const { to: j, w } = neighbors[k]; - if (j <= i) continue; - if (ci === nodeCommunity[j]) communityInternalEdgeWeight[ci] += w; - } - } - } - } - - function resetScratch() { - for (let i = 0; i < candidateCommunityCount; i++) { - const c = candidateCommunities[i]; - isCandidateCommunity[c] = 0; - neighborEdgeWeightToCommunity[c] = 0; - outEdgeWeightToCommunity[c] = 0; - inEdgeWeightFromCommunity[c] = 0; - } - candidateCommunityCount = 0; - } - - function touch(c) { - if (isCandidateCommunity[c]) return; - isCandidateCommunity[c] = 1; - candidateCommunities[candidateCommunityCount++] = c; - } - - function accumulateNeighborCommunityEdgeWeights(v) { - resetScratch(); - const ci = nodeCommunity[v]; - touch(ci); - if (graph.directed) { - const outL = graph.outEdges[v]; - for (let k = 0; k < outL.length; k++) { - const j = outL[k].to; - const w = outL[k].w; - const cj = nodeCommunity[j]; - touch(cj); - outEdgeWeightToCommunity[cj] += w; - } - const inL = graph.inEdges[v]; - for (let k = 0; k < inL.length; k++) { - const i2 = inL[k].from; - const w = inL[k].w; - const ci2 = nodeCommunity[i2]; - touch(ci2); - inEdgeWeightFromCommunity[ci2] += w; - } - } else { - const list = graph.outEdges[v]; - for (let k = 0; k < list.length; k++) { - const j = list[k].to; - const w = list[k].w; - const cj = nodeCommunity[j]; - touch(cj); - neighborEdgeWeightToCommunity[cj] += w; - } - } - return candidateCommunityCount; - } - - const twoMUndirected = graph.totalWeight; - function deltaModularityUndirected(v, newC, gamma = 1.0) { - const oldC = nodeCommunity[v]; - if (newC === oldC) return 0; - const strengthV = graph.strengthOut[v]; - const weightToNew = - newC < neighborEdgeWeightToCommunity.length ? neighborEdgeWeightToCommunity[newC] || 0 : 0; - const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; - const totalStrengthNew = - newC < communityTotalStrength.length ? communityTotalStrength[newC] : 0; - const totalStrengthOld = communityTotalStrength[oldC]; - const gain_remove = -( - weightToOld / twoMUndirected - - (gamma * (strengthV * totalStrengthOld)) / (twoMUndirected * twoMUndirected) - ); - const gain_add = - weightToNew / twoMUndirected - - (gamma * (strengthV * totalStrengthNew)) / (twoMUndirected * twoMUndirected); - return gain_remove + gain_add; - } - - function deltaModularityDirected(v, newC, gamma = 1.0) { - const oldC = nodeCommunity[v]; - if (newC === oldC) return 0; - const totalEdgeWeight = graph.totalWeight; - const strengthOutV = graph.strengthOut[v]; - const strengthInV = graph.strengthIn[v]; - const inFromNew = - newC < inEdgeWeightFromCommunity.length ? inEdgeWeightFromCommunity[newC] || 0 : 0; - const outToNew = - newC < outEdgeWeightToCommunity.length ? outEdgeWeightToCommunity[newC] || 0 : 0; - const inFromOld = inEdgeWeightFromCommunity[oldC] || 0; - const outToOld = outEdgeWeightToCommunity[oldC] || 0; - const totalInStrengthNew = - newC < communityTotalInStrength.length ? communityTotalInStrength[newC] : 0; - const totalOutStrengthNew = - newC < communityTotalOutStrength.length ? communityTotalOutStrength[newC] : 0; - const totalInStrengthOld = communityTotalInStrength[oldC]; - const totalOutStrengthOld = communityTotalOutStrength[oldC]; - // Self-loop correction + constant term (see modularity.js diffModularityDirected) - const selfW = graph.selfLoop[v] || 0; - const deltaInternal = - (inFromNew + outToNew - inFromOld - outToOld + 2 * selfW) / totalEdgeWeight; - const deltaExpected = - (gamma * - (strengthOutV * (totalInStrengthNew - totalInStrengthOld) + - strengthInV * (totalOutStrengthNew - totalOutStrengthOld) + - 2 * strengthOutV * strengthInV)) / - (totalEdgeWeight * totalEdgeWeight); - return deltaInternal - deltaExpected; - } - - function deltaCPM(v, newC, gamma = 1.0) { - const oldC = nodeCommunity[v]; - if (newC === oldC) return 0; - let w_old, w_new; - let selfCorrection = 0; - if (graph.directed) { - w_old = (outEdgeWeightToCommunity[oldC] || 0) + (inEdgeWeightFromCommunity[oldC] || 0); - w_new = - newC < outEdgeWeightToCommunity.length - ? (outEdgeWeightToCommunity[newC] || 0) + (inEdgeWeightFromCommunity[newC] || 0) - : 0; - // Self-loop correction (see cpm.js diffCPM) - selfCorrection = 2 * (graph.selfLoop[v] || 0); - } else { - w_old = neighborEdgeWeightToCommunity[oldC] || 0; - w_new = - newC < neighborEdgeWeightToCommunity.length ? neighborEdgeWeightToCommunity[newC] || 0 : 0; - } - const nodeSize = graph.size[v] || 1; - const sizeOld = communityTotalSize[oldC] || 0; - const sizeNew = newC < communityTotalSize.length ? communityTotalSize[newC] : 0; - return w_new - w_old + selfCorrection - gamma * nodeSize * (sizeNew - sizeOld + nodeSize); - } - - function moveNodeToCommunity(v, newC) { - const oldC = nodeCommunity[v]; - if (oldC === newC) return false; - if (newC >= communityCount) { - ensureCommCapacity(newC + 1); - communityCount = newC + 1; - } - const strengthOutV = graph.strengthOut[v]; - const strengthInV = graph.strengthIn[v]; - const selfLoopWeight = graph.selfLoop[v]; - const nodeSize = graph.size[v]; - - communityNodeCount[oldC] -= 1; - communityNodeCount[newC] += 1; - communityTotalSize[oldC] -= nodeSize; - communityTotalSize[newC] += nodeSize; - if (graph.directed) { - communityTotalOutStrength[oldC] -= strengthOutV; - communityTotalOutStrength[newC] += strengthOutV; - communityTotalInStrength[oldC] -= strengthInV; - communityTotalInStrength[newC] += strengthInV; - } else { - communityTotalStrength[oldC] -= strengthOutV; - communityTotalStrength[newC] += strengthOutV; - } - - if (graph.directed) { - const outToOld = outEdgeWeightToCommunity[oldC] || 0; - const inFromOld = inEdgeWeightFromCommunity[oldC] || 0; - const outToNew = - newC < outEdgeWeightToCommunity.length ? outEdgeWeightToCommunity[newC] || 0 : 0; - const inFromNew = - newC < inEdgeWeightFromCommunity.length ? inEdgeWeightFromCommunity[newC] || 0 : 0; - // outToOld/inFromOld already include the self-loop weight (self-loops are - // in outEdges/inEdges), so subtract it once to avoid triple-counting. - communityInternalEdgeWeight[oldC] -= outToOld + inFromOld - selfLoopWeight; - communityInternalEdgeWeight[newC] += outToNew + inFromNew + selfLoopWeight; - } else { - const weightToOld = neighborEdgeWeightToCommunity[oldC] || 0; - const weightToNew = neighborEdgeWeightToCommunity[newC] || 0; - communityInternalEdgeWeight[oldC] -= 2 * weightToOld + selfLoopWeight; - communityInternalEdgeWeight[newC] += 2 * weightToNew + selfLoopWeight; - } - - nodeCommunity[v] = newC; - return true; - } - - function compactCommunityIds(opts = {}) { - const ids = []; - for (let c = 0; c < communityCount; c++) if (communityNodeCount[c] > 0) ids.push(c); - if (opts.keepOldOrder) { - ids.sort((a, b) => a - b); - } else if (opts.preserveMap instanceof Map) { - ids.sort((a, b) => { - const pa = opts.preserveMap.get(a); - const pb = opts.preserveMap.get(b); - if (pa != null && pb != null && pa !== pb) return pa - pb; - if (pa != null && pb == null) return -1; - if (pb != null && pa == null) return 1; - return ( - communityTotalSize[b] - communityTotalSize[a] || - communityNodeCount[b] - communityNodeCount[a] || - a - b - ); - }); - } else { - ids.sort( - (a, b) => - communityTotalSize[b] - communityTotalSize[a] || - communityNodeCount[b] - communityNodeCount[a] || - a - b, - ); - } - const newId = new Int32Array(communityCount).fill(-1); - ids.forEach((c, i) => { - newId[c] = i; - }); - for (let i = 0; i < nodeCommunity.length; i++) nodeCommunity[i] = newId[nodeCommunity[i]]; - const remappedCount = ids.length; - const newTotalSize = new Float64Array(remappedCount); - const newNodeCount = new Int32Array(remappedCount); - const newInternalEdgeWeight = new Float64Array(remappedCount); - const newTotalStrength = new Float64Array(remappedCount); - const newTotalOutStrength = new Float64Array(remappedCount); - const newTotalInStrength = new Float64Array(remappedCount); - for (let i = 0; i < n; i++) { - const c = nodeCommunity[i]; - newTotalSize[c] += graph.size[i]; - newNodeCount[c] += 1; - if (graph.directed) { - newTotalOutStrength[c] += graph.strengthOut[i]; - newTotalInStrength[c] += graph.strengthIn[i]; - } else { - newTotalStrength[c] += graph.strengthOut[i]; - } - if (graph.selfLoop[i] !== 0) newInternalEdgeWeight[c] += graph.selfLoop[i]; - } - if (graph.directed) { - for (let i = 0; i < n; i++) { - const ci = nodeCommunity[i]; - const list = graph.outEdges[i]; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]; - if (i === j) continue; // self-loop already counted via graph.selfLoop[i] - if (ci === nodeCommunity[j]) newInternalEdgeWeight[ci] += w; - } - } - } else { - for (let i = 0; i < n; i++) { - const ci = nodeCommunity[i]; - const list = graph.outEdges[i]; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]; - if (j <= i) continue; - if (ci === nodeCommunity[j]) newInternalEdgeWeight[ci] += w; - } - } - } - communityCount = remappedCount; - communityTotalSize = newTotalSize; - communityNodeCount = newNodeCount; - communityInternalEdgeWeight = newInternalEdgeWeight; - communityTotalStrength = newTotalStrength; - communityTotalOutStrength = newTotalOutStrength; - communityTotalInStrength = newTotalInStrength; - } - - function getCommunityMembers() { - const comms = new Array(communityCount); - for (let i = 0; i < communityCount; i++) comms[i] = []; - for (let i = 0; i < n; i++) comms[nodeCommunity[i]].push(i); - return comms; - } - - function getCommunityTotalSize(c) { - return c < communityTotalSize.length ? communityTotalSize[c] : 0; - } - function getCommunityNodeCount(c) { - return c < communityNodeCount.length ? communityNodeCount[c] : 0; - } - - return { - n, - get communityCount() { - return communityCount; - }, - nodeCommunity, - get communityTotalSize() { - return communityTotalSize; - }, - get communityNodeCount() { - return communityNodeCount; - }, - get communityInternalEdgeWeight() { - return communityInternalEdgeWeight; - }, - get communityTotalStrength() { - return communityTotalStrength; - }, - get communityTotalOutStrength() { - return communityTotalOutStrength; - }, - get communityTotalInStrength() { - return communityTotalInStrength; - }, - resizeCommunities(newCount) { - ensureCommCapacity(newCount); - communityCount = newCount; - }, - initializeAggregates, - accumulateNeighborCommunityEdgeWeights, - getCandidateCommunityCount: () => candidateCommunityCount, - getCandidateCommunityAt: (i) => candidateCommunities[i], - getNeighborEdgeWeightToCommunity: (c) => neighborEdgeWeightToCommunity[c] || 0, - getOutEdgeWeightToCommunity: (c) => outEdgeWeightToCommunity[c] || 0, - getInEdgeWeightFromCommunity: (c) => inEdgeWeightFromCommunity[c] || 0, - deltaModularityUndirected, - deltaModularityDirected, - deltaCPM, - moveNodeToCommunity, - compactCommunityIds, - getCommunityMembers, - getCommunityTotalSize, - getCommunityNodeCount, - }; -} - -function growFloat(a, to) { - const b = new Float64Array(to); - b.set(a); - return b; -} -function growInt(a, to) { - const b = new Int32Array(to); - b.set(a); - return b; -} diff --git a/src/graph/algorithms/leiden/partition.ts b/src/graph/algorithms/leiden/partition.ts new file mode 100644 index 00000000..64e62bfb --- /dev/null +++ b/src/graph/algorithms/leiden/partition.ts @@ -0,0 +1,479 @@ +/** + * Mutable community assignment with per-community aggregates. + * Vendored from ngraph.leiden (MIT) — no external dependencies. + * + * Maintains per-community totals and per-move scratch accumulators so we can + * compute modularity/CPM gains in O(neighborhood) time without rescanning the + * whole graph after each move. + */ + +import type { GraphAdapter } from './adapter.js'; + +export interface CompactOptions { + keepOldOrder?: boolean; + preserveMap?: Map; +} + +export interface Partition { + n: number; + readonly communityCount: number; + nodeCommunity: Int32Array; + readonly communityTotalSize: Float64Array; + readonly communityNodeCount: Int32Array; + readonly communityInternalEdgeWeight: Float64Array; + readonly communityTotalStrength: Float64Array; + readonly communityTotalOutStrength: Float64Array; + readonly communityTotalInStrength: Float64Array; + resizeCommunities(newCount: number): void; + initializeAggregates(): void; + accumulateNeighborCommunityEdgeWeights(v: number): number; + getCandidateCommunityCount(): number; + getCandidateCommunityAt(i: number): number; + getNeighborEdgeWeightToCommunity(c: number): number; + getOutEdgeWeightToCommunity(c: number): number; + getInEdgeWeightFromCommunity(c: number): number; + deltaModularityUndirected(v: number, newC: number, gamma?: number): number; + deltaModularityDirected(v: number, newC: number, gamma?: number): number; + deltaCPM(v: number, newC: number, gamma?: number): number; + moveNodeToCommunity(v: number, newC: number): boolean; + compactCommunityIds(opts?: CompactOptions): void; + getCommunityMembers(): number[][]; + getCommunityTotalSize(c: number): number; + getCommunityNodeCount(c: number): number; + /** Attached by optimiser after creation */ + graph: GraphAdapter; +} + +// Typed arrays always return a number for in-bounds access, but noUncheckedIndexedAccess +// widens to `number | undefined`. These helpers keep the compound assignment patterns readable. +function fget(a: Float64Array, i: number): number { + return a[i] as number; +} +function iget(a: Int32Array, i: number): number { + return a[i] as number; +} +function u8get(a: Uint8Array, i: number): number { + return a[i] as number; +} + +export function makePartition(graph: GraphAdapter): Partition { + const n: number = graph.n; + const nodeCommunity = new Int32Array(n); + for (let i = 0; i < n; i++) nodeCommunity[i] = i; + let communityCount: number = n; + + let communityTotalSize = new Float64Array(communityCount); + let communityNodeCount = new Int32Array(communityCount); + let communityInternalEdgeWeight = new Float64Array(communityCount); + let communityTotalStrength = new Float64Array(communityCount); + let communityTotalOutStrength = new Float64Array(communityCount); + let communityTotalInStrength = new Float64Array(communityCount); + + const candidateCommunities = new Int32Array(n); + let candidateCommunityCount: number = 0; + const neighborEdgeWeightToCommunity = new Float64Array(n); + const outEdgeWeightToCommunity = new Float64Array(n); + const inEdgeWeightFromCommunity = new Float64Array(n); + const isCandidateCommunity = new Uint8Array(n); + + function ensureCommCapacity(newCount: number): void { + if (newCount <= communityTotalSize.length) return; + const growTo: number = Math.max(newCount, Math.ceil(communityTotalSize.length * 1.5)); + communityTotalSize = growFloat(communityTotalSize, growTo); + communityNodeCount = growInt(communityNodeCount, growTo); + communityInternalEdgeWeight = growFloat(communityInternalEdgeWeight, growTo); + communityTotalStrength = growFloat(communityTotalStrength, growTo); + communityTotalOutStrength = growFloat(communityTotalOutStrength, growTo); + communityTotalInStrength = growFloat(communityTotalInStrength, growTo); + } + + function initializeAggregates(): void { + communityTotalSize.fill(0); + communityNodeCount.fill(0); + communityInternalEdgeWeight.fill(0); + communityTotalStrength.fill(0); + communityTotalOutStrength.fill(0); + communityTotalInStrength.fill(0); + for (let i = 0; i < n; i++) { + const c: number = iget(nodeCommunity, i); + communityTotalSize[c] = fget(communityTotalSize, c) + fget(graph.size, i); + communityNodeCount[c] = iget(communityNodeCount, c) + 1; + if (graph.directed) { + communityTotalOutStrength[c] = + fget(communityTotalOutStrength, c) + fget(graph.strengthOut, i); + communityTotalInStrength[c] = fget(communityTotalInStrength, c) + fget(graph.strengthIn, i); + } else { + communityTotalStrength[c] = fget(communityTotalStrength, c) + fget(graph.strengthOut, i); + } + if (fget(graph.selfLoop, i) !== 0) + communityInternalEdgeWeight[c] = + fget(communityInternalEdgeWeight, c) + fget(graph.selfLoop, i); + } + if (graph.directed) { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const neighbors = graph.outEdges[i]!; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]!; + if (i === j) continue; // self-loop already counted via graph.selfLoop[i] + if (ci === iget(nodeCommunity, j)) + communityInternalEdgeWeight[ci] = fget(communityInternalEdgeWeight, ci) + w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const neighbors = graph.outEdges[i]!; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]!; + if (j <= i) continue; + if (ci === iget(nodeCommunity, j)) + communityInternalEdgeWeight[ci] = fget(communityInternalEdgeWeight, ci) + w; + } + } + } + } + + function resetScratch(): void { + for (let i = 0; i < candidateCommunityCount; i++) { + const c: number = iget(candidateCommunities, i); + isCandidateCommunity[c] = 0; + neighborEdgeWeightToCommunity[c] = 0; + outEdgeWeightToCommunity[c] = 0; + inEdgeWeightFromCommunity[c] = 0; + } + candidateCommunityCount = 0; + } + + function touch(c: number): void { + if (u8get(isCandidateCommunity, c)) return; + isCandidateCommunity[c] = 1; + candidateCommunities[candidateCommunityCount++] = c; + } + + function accumulateNeighborCommunityEdgeWeights(v: number): number { + resetScratch(); + const ci: number = iget(nodeCommunity, v); + touch(ci); + if (graph.directed) { + const outL = graph.outEdges[v]!; + for (let k = 0; k < outL.length; k++) { + const j: number = outL[k]!.to; + const w: number = outL[k]!.w; + const cj: number = iget(nodeCommunity, j); + touch(cj); + outEdgeWeightToCommunity[cj] = fget(outEdgeWeightToCommunity, cj) + w; + } + const inL = graph.inEdges[v]!; + for (let k = 0; k < inL.length; k++) { + const i2: number = inL[k]!.from; + const w: number = inL[k]!.w; + const ci2: number = iget(nodeCommunity, i2); + touch(ci2); + inEdgeWeightFromCommunity[ci2] = fget(inEdgeWeightFromCommunity, ci2) + w; + } + } else { + const list = graph.outEdges[v]!; + for (let k = 0; k < list.length; k++) { + const j: number = list[k]!.to; + const w: number = list[k]!.w; + const cj: number = iget(nodeCommunity, j); + touch(cj); + neighborEdgeWeightToCommunity[cj] = fget(neighborEdgeWeightToCommunity, cj) + w; + } + } + return candidateCommunityCount; + } + + const twoMUndirected: number = graph.totalWeight; + function deltaModularityUndirected(v: number, newC: number, gamma: number = 1.0): number { + const oldC: number = iget(nodeCommunity, v); + if (newC === oldC) return 0; + const strengthV: number = fget(graph.strengthOut, v); + const weightToNew: number = + newC < neighborEdgeWeightToCommunity.length + ? fget(neighborEdgeWeightToCommunity, newC) || 0 + : 0; + const weightToOld: number = fget(neighborEdgeWeightToCommunity, oldC) || 0; + const totalStrengthNew: number = + newC < communityTotalStrength.length ? fget(communityTotalStrength, newC) : 0; + const totalStrengthOld: number = fget(communityTotalStrength, oldC); + const gain_remove: number = -( + weightToOld / twoMUndirected - + (gamma * (strengthV * totalStrengthOld)) / (twoMUndirected * twoMUndirected) + ); + const gain_add: number = + weightToNew / twoMUndirected - + (gamma * (strengthV * totalStrengthNew)) / (twoMUndirected * twoMUndirected); + return gain_remove + gain_add; + } + + function deltaModularityDirected(v: number, newC: number, gamma: number = 1.0): number { + const oldC: number = iget(nodeCommunity, v); + if (newC === oldC) return 0; + const totalEdgeWeight: number = graph.totalWeight; + const strengthOutV: number = fget(graph.strengthOut, v); + const strengthInV: number = fget(graph.strengthIn, v); + const inFromNew: number = + newC < inEdgeWeightFromCommunity.length ? fget(inEdgeWeightFromCommunity, newC) || 0 : 0; + const outToNew: number = + newC < outEdgeWeightToCommunity.length ? fget(outEdgeWeightToCommunity, newC) || 0 : 0; + const inFromOld: number = fget(inEdgeWeightFromCommunity, oldC) || 0; + const outToOld: number = fget(outEdgeWeightToCommunity, oldC) || 0; + const totalInStrengthNew: number = + newC < communityTotalInStrength.length ? fget(communityTotalInStrength, newC) : 0; + const totalOutStrengthNew: number = + newC < communityTotalOutStrength.length ? fget(communityTotalOutStrength, newC) : 0; + const totalInStrengthOld: number = fget(communityTotalInStrength, oldC); + const totalOutStrengthOld: number = fget(communityTotalOutStrength, oldC); + // Self-loop correction + constant term (see modularity.ts diffModularityDirected) + const selfW: number = fget(graph.selfLoop, v) || 0; + const deltaInternal: number = + (inFromNew + outToNew - inFromOld - outToOld + 2 * selfW) / totalEdgeWeight; + const deltaExpected: number = + (gamma * + (strengthOutV * (totalInStrengthNew - totalInStrengthOld) + + strengthInV * (totalOutStrengthNew - totalOutStrengthOld) + + 2 * strengthOutV * strengthInV)) / + (totalEdgeWeight * totalEdgeWeight); + return deltaInternal - deltaExpected; + } + + function deltaCPM(v: number, newC: number, gamma: number = 1.0): number { + const oldC: number = iget(nodeCommunity, v); + if (newC === oldC) return 0; + let w_old: number; + let w_new: number; + let selfCorrection: number = 0; + if (graph.directed) { + w_old = + (fget(outEdgeWeightToCommunity, oldC) || 0) + (fget(inEdgeWeightFromCommunity, oldC) || 0); + w_new = + newC < outEdgeWeightToCommunity.length + ? (fget(outEdgeWeightToCommunity, newC) || 0) + + (fget(inEdgeWeightFromCommunity, newC) || 0) + : 0; + // Self-loop correction (see cpm.ts diffCPM) + selfCorrection = 2 * (fget(graph.selfLoop, v) || 0); + } else { + w_old = fget(neighborEdgeWeightToCommunity, oldC) || 0; + w_new = + newC < neighborEdgeWeightToCommunity.length + ? fget(neighborEdgeWeightToCommunity, newC) || 0 + : 0; + } + const nodeSz: number = fget(graph.size, v) || 1; + const sizeOld: number = fget(communityTotalSize, oldC) || 0; + const sizeNew: number = newC < communityTotalSize.length ? fget(communityTotalSize, newC) : 0; + return w_new - w_old + selfCorrection - gamma * nodeSz * (sizeNew - sizeOld + nodeSz); + } + + function moveNodeToCommunity(v: number, newC: number): boolean { + const oldC: number = iget(nodeCommunity, v); + if (oldC === newC) return false; + if (newC >= communityCount) { + ensureCommCapacity(newC + 1); + communityCount = newC + 1; + } + const strengthOutV: number = fget(graph.strengthOut, v); + const strengthInV: number = fget(graph.strengthIn, v); + const selfLoopWeight: number = fget(graph.selfLoop, v); + const nodeSz: number = fget(graph.size, v); + + communityNodeCount[oldC] = iget(communityNodeCount, oldC) - 1; + communityNodeCount[newC] = iget(communityNodeCount, newC) + 1; + communityTotalSize[oldC] = fget(communityTotalSize, oldC) - nodeSz; + communityTotalSize[newC] = fget(communityTotalSize, newC) + nodeSz; + if (graph.directed) { + communityTotalOutStrength[oldC] = fget(communityTotalOutStrength, oldC) - strengthOutV; + communityTotalOutStrength[newC] = fget(communityTotalOutStrength, newC) + strengthOutV; + communityTotalInStrength[oldC] = fget(communityTotalInStrength, oldC) - strengthInV; + communityTotalInStrength[newC] = fget(communityTotalInStrength, newC) + strengthInV; + } else { + communityTotalStrength[oldC] = fget(communityTotalStrength, oldC) - strengthOutV; + communityTotalStrength[newC] = fget(communityTotalStrength, newC) + strengthOutV; + } + + if (graph.directed) { + const outToOld: number = fget(outEdgeWeightToCommunity, oldC) || 0; + const inFromOld: number = fget(inEdgeWeightFromCommunity, oldC) || 0; + const outToNew: number = + newC < outEdgeWeightToCommunity.length ? fget(outEdgeWeightToCommunity, newC) || 0 : 0; + const inFromNew: number = + newC < inEdgeWeightFromCommunity.length ? fget(inEdgeWeightFromCommunity, newC) || 0 : 0; + // outToOld/inFromOld already include the self-loop weight (self-loops are + // in outEdges/inEdges), so subtract it once to avoid triple-counting. + communityInternalEdgeWeight[oldC] = + fget(communityInternalEdgeWeight, oldC) - (outToOld + inFromOld - selfLoopWeight); + communityInternalEdgeWeight[newC] = + fget(communityInternalEdgeWeight, newC) + (outToNew + inFromNew + selfLoopWeight); + } else { + const weightToOld: number = fget(neighborEdgeWeightToCommunity, oldC) || 0; + const weightToNew: number = fget(neighborEdgeWeightToCommunity, newC) || 0; + communityInternalEdgeWeight[oldC] = + fget(communityInternalEdgeWeight, oldC) - (2 * weightToOld + selfLoopWeight); + communityInternalEdgeWeight[newC] = + fget(communityInternalEdgeWeight, newC) + (2 * weightToNew + selfLoopWeight); + } + + nodeCommunity[v] = newC; + return true; + } + + function compactCommunityIds(opts: CompactOptions = {}): void { + const ids: number[] = []; + for (let c = 0; c < communityCount; c++) if (iget(communityNodeCount, c) > 0) ids.push(c); + if (opts.keepOldOrder) { + ids.sort((a, b) => a - b); + } else if (opts.preserveMap instanceof Map) { + const preserveMap = opts.preserveMap; + ids.sort((a, b) => { + const pa = preserveMap.get(a); + const pb = preserveMap.get(b); + if (pa != null && pb != null && pa !== pb) return pa - pb; + if (pa != null && pb == null) return -1; + if (pb != null && pa == null) return 1; + return ( + fget(communityTotalSize, b) - fget(communityTotalSize, a) || + iget(communityNodeCount, b) - iget(communityNodeCount, a) || + a - b + ); + }); + } else { + ids.sort( + (a, b) => + fget(communityTotalSize, b) - fget(communityTotalSize, a) || + iget(communityNodeCount, b) - iget(communityNodeCount, a) || + a - b, + ); + } + const newId = new Int32Array(communityCount).fill(-1); + ids.forEach((c, i) => { + newId[c] = i; + }); + for (let i = 0; i < nodeCommunity.length; i++) + nodeCommunity[i] = iget(newId, iget(nodeCommunity, i)); + const remappedCount: number = ids.length; + const newTotalSize = new Float64Array(remappedCount); + const newNodeCount = new Int32Array(remappedCount); + const newInternalEdgeWeight = new Float64Array(remappedCount); + const newTotalStrength = new Float64Array(remappedCount); + const newTotalOutStrength = new Float64Array(remappedCount); + const newTotalInStrength = new Float64Array(remappedCount); + for (let i = 0; i < n; i++) { + const c: number = iget(nodeCommunity, i); + newTotalSize[c] = fget(newTotalSize, c) + fget(graph.size, i); + newNodeCount[c] = iget(newNodeCount, c) + 1; + if (graph.directed) { + newTotalOutStrength[c] = fget(newTotalOutStrength, c) + fget(graph.strengthOut, i); + newTotalInStrength[c] = fget(newTotalInStrength, c) + fget(graph.strengthIn, i); + } else { + newTotalStrength[c] = fget(newTotalStrength, c) + fget(graph.strengthOut, i); + } + if (fget(graph.selfLoop, i) !== 0) + newInternalEdgeWeight[c] = fget(newInternalEdgeWeight, c) + fget(graph.selfLoop, i); + } + if (graph.directed) { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const list = graph.outEdges[i]!; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]!; + if (i === j) continue; // self-loop already counted via graph.selfLoop[i] + if (ci === iget(nodeCommunity, j)) + newInternalEdgeWeight[ci] = fget(newInternalEdgeWeight, ci) + w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const list = graph.outEdges[i]!; + for (let k = 0; k < list.length; k++) { + const { to: j, w } = list[k]!; + if (j <= i) continue; + if (ci === iget(nodeCommunity, j)) + newInternalEdgeWeight[ci] = fget(newInternalEdgeWeight, ci) + w; + } + } + } + communityCount = remappedCount; + communityTotalSize = newTotalSize; + communityNodeCount = newNodeCount; + communityInternalEdgeWeight = newInternalEdgeWeight; + communityTotalStrength = newTotalStrength; + communityTotalOutStrength = newTotalOutStrength; + communityTotalInStrength = newTotalInStrength; + } + + function getCommunityMembers(): number[][] { + const comms: number[][] = new Array(communityCount); + for (let i = 0; i < communityCount; i++) comms[i] = []; + for (let i = 0; i < n; i++) comms[iget(nodeCommunity, i)]!.push(i); + return comms; + } + + function getCommunityTotalSizeFn(c: number): number { + return c < communityTotalSize.length ? fget(communityTotalSize, c) : 0; + } + function getCommunityNodeCountFn(c: number): number { + return c < communityNodeCount.length ? iget(communityNodeCount, c) : 0; + } + + return { + n, + get communityCount() { + return communityCount; + }, + nodeCommunity, + get communityTotalSize() { + return communityTotalSize; + }, + get communityNodeCount() { + return communityNodeCount; + }, + get communityInternalEdgeWeight() { + return communityInternalEdgeWeight; + }, + get communityTotalStrength() { + return communityTotalStrength; + }, + get communityTotalOutStrength() { + return communityTotalOutStrength; + }, + get communityTotalInStrength() { + return communityTotalInStrength; + }, + resizeCommunities(newCount: number): void { + ensureCommCapacity(newCount); + communityCount = newCount; + }, + initializeAggregates, + accumulateNeighborCommunityEdgeWeights, + getCandidateCommunityCount: (): number => candidateCommunityCount, + getCandidateCommunityAt: (i: number): number => iget(candidateCommunities, i), + getNeighborEdgeWeightToCommunity: (c: number): number => + fget(neighborEdgeWeightToCommunity, c) || 0, + getOutEdgeWeightToCommunity: (c: number): number => fget(outEdgeWeightToCommunity, c) || 0, + getInEdgeWeightFromCommunity: (c: number): number => fget(inEdgeWeightFromCommunity, c) || 0, + deltaModularityUndirected, + deltaModularityDirected, + deltaCPM, + moveNodeToCommunity, + compactCommunityIds, + getCommunityMembers, + getCommunityTotalSize: getCommunityTotalSizeFn, + getCommunityNodeCount: getCommunityNodeCountFn, + graph: undefined as unknown as GraphAdapter, + }; +} + +function growFloat(a: Float64Array, to: number): Float64Array { + const b = new Float64Array(to); + for (let i = 0; i < a.length; i++) b[i] = a[i] as number; + return b; +} +function growInt(a: Int32Array, to: number): Int32Array { + const b = new Int32Array(to); + for (let i = 0; i < a.length; i++) b[i] = a[i] as number; + return b; +} diff --git a/src/graph/algorithms/louvain.js b/src/graph/algorithms/louvain.ts similarity index 57% rename from src/graph/algorithms/louvain.js rename to src/graph/algorithms/louvain.ts index c4195b60..c8643b93 100644 --- a/src/graph/algorithms/louvain.js +++ b/src/graph/algorithms/louvain.ts @@ -2,23 +2,32 @@ * Community detection via vendored Leiden algorithm. * Maintains backward-compatible API: { assignments: Map, modularity: number } * - * **Note:** Always runs in undirected mode (`directed: false`) regardless of + * Note: Always runs in undirected mode (`directed: false`) regardless of * the input graph's directedness. For direction-aware community detection, * use `detectClusters` from `./leiden/index.js` directly. - * - * @param {import('../model.js').CodeGraph} graph - * @param {{ resolution?: number, maxLevels?: number, maxLocalPasses?: number }} [opts] - * @returns {{ assignments: Map, modularity: number }} */ +import type { CodeGraph } from '../model.js'; +import type { DetectClustersResult } from './leiden/index.js'; import { detectClusters } from './leiden/index.js'; -export function louvainCommunities(graph, opts = {}) { +export interface LouvainOptions { + resolution?: number; + maxLevels?: number; + maxLocalPasses?: number; +} + +export interface LouvainResult { + assignments: Map; + modularity: number; +} + +export function louvainCommunities(graph: CodeGraph, opts: LouvainOptions = {}): LouvainResult { if (graph.nodeCount === 0 || graph.edgeCount === 0) { return { assignments: new Map(), modularity: 0 }; } - const resolution = opts.resolution ?? 1.0; - const result = detectClusters(graph, { + const resolution: number = opts.resolution ?? 1.0; + const result: DetectClustersResult = detectClusters(graph, { resolution, randomSeed: 42, directed: false, @@ -26,7 +35,7 @@ export function louvainCommunities(graph, opts = {}) { ...(opts.maxLocalPasses != null && { maxLocalPasses: opts.maxLocalPasses }), }); - const assignments = new Map(); + const assignments = new Map(); for (const [id] of graph.nodes()) { const cls = result.getClass(id); if (cls != null) assignments.set(id, cls); diff --git a/src/graph/builders/dependency.js b/src/graph/builders/dependency.ts similarity index 63% rename from src/graph/builders/dependency.js rename to src/graph/builders/dependency.ts index 7024f0db..9af53751 100644 --- a/src/graph/builders/dependency.js +++ b/src/graph/builders/dependency.ts @@ -11,17 +11,29 @@ import { Repository, } from '../../db/index.js'; import { isTestFile } from '../../infrastructure/test-filter.js'; +import type { + BetterSqlite3Database, + CallableNodeRow, + CallEdgeRow, + FileNodeRow, + ImportGraphEdgeRow, +} from '../../types.js'; import { CodeGraph } from '../model.js'; +export interface DependencyGraphOptions { + fileLevel?: boolean; + noTests?: boolean; + minConfidence?: number; +} + /** - * @param {object} dbOrRepo - Open better-sqlite3 database (readonly) or a Repository instance - * @param {object} [opts] - * @param {boolean} [opts.fileLevel=true] - File-level (imports) or function-level (calls) - * @param {boolean} [opts.noTests=false] - Exclude test files - * @param {number} [opts.minConfidence] - Minimum edge confidence (function-level only) - * @returns {CodeGraph} + * Build a dependency graph from an open database or Repository instance. + * Supports both file-level (import edges) and function-level (call edges) graphs. */ -export function buildDependencyGraph(dbOrRepo, opts = {}) { +export function buildDependencyGraph( + dbOrRepo: BetterSqlite3Database | Repository, + opts: DependencyGraphOptions = {}, +): CodeGraph { const fileLevel = opts.fileLevel !== false; const noTests = opts.noTests || false; @@ -31,20 +43,23 @@ export function buildDependencyGraph(dbOrRepo, opts = {}) { return buildFunctionLevelGraph(dbOrRepo, noTests, opts.minConfidence); } -function buildFileLevelGraph(dbOrRepo, noTests) { +function buildFileLevelGraph( + dbOrRepo: BetterSqlite3Database | Repository, + noTests: boolean, +): CodeGraph { const graph = new CodeGraph(); const isRepo = dbOrRepo instanceof Repository; - let nodes = isRepo ? dbOrRepo.getFileNodesAll() : getFileNodesAll(dbOrRepo); + let nodes: FileNodeRow[] = isRepo ? dbOrRepo.getFileNodesAll() : getFileNodesAll(dbOrRepo); if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file)); - const nodeIds = new Set(); + const nodeIds = new Set(); for (const n of nodes) { graph.addNode(String(n.id), { label: n.file, file: n.file, dbId: n.id }); nodeIds.add(n.id); } - const edges = isRepo ? dbOrRepo.getImportEdges() : getImportEdges(dbOrRepo); + const edges: ImportGraphEdgeRow[] = isRepo ? dbOrRepo.getImportEdges() : getImportEdges(dbOrRepo); for (const e of edges) { if (!nodeIds.has(e.source_id) || !nodeIds.has(e.target_id)) continue; const src = String(e.source_id); @@ -58,14 +73,23 @@ function buildFileLevelGraph(dbOrRepo, noTests) { return graph; } -function buildFunctionLevelGraph(dbOrRepo, noTests, minConfidence) { +interface MinConfidenceEdgeRow { + source_id: number; + target_id: number; +} + +function buildFunctionLevelGraph( + dbOrRepo: BetterSqlite3Database | Repository, + noTests: boolean, + minConfidence?: number, +): CodeGraph { const graph = new CodeGraph(); const isRepo = dbOrRepo instanceof Repository; - let nodes = isRepo ? dbOrRepo.getCallableNodes() : getCallableNodes(dbOrRepo); + let nodes: CallableNodeRow[] = isRepo ? dbOrRepo.getCallableNodes() : getCallableNodes(dbOrRepo); if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file)); - const nodeIds = new Set(); + const nodeIds = new Set(); for (const n of nodes) { graph.addNode(String(n.id), { label: n.name, @@ -76,7 +100,7 @@ function buildFunctionLevelGraph(dbOrRepo, noTests, minConfidence) { nodeIds.add(n.id); } - let edges; + let edges: CallEdgeRow[] | MinConfidenceEdgeRow[]; if (minConfidence != null) { if (isRepo) { // Trade-off: Repository.getCallEdges() returns all call edges, so we @@ -88,8 +112,10 @@ function buildFunctionLevelGraph(dbOrRepo, noTests, minConfidence) { .getCallEdges() .filter((e) => e.confidence != null && e.confidence >= minConfidence); } else { - edges = dbOrRepo - .prepare("SELECT source_id, target_id FROM edges WHERE kind = 'calls' AND confidence >= ?") + edges = (dbOrRepo as BetterSqlite3Database) + .prepare( + "SELECT source_id, target_id FROM edges WHERE kind = 'calls' AND confidence >= ?", + ) .all(minConfidence); } } else { diff --git a/src/graph/builders/index.js b/src/graph/builders/index.js deleted file mode 100644 index 18bbad63..00000000 --- a/src/graph/builders/index.js +++ /dev/null @@ -1,3 +0,0 @@ -export { buildDependencyGraph } from './dependency.js'; -export { buildStructureGraph } from './structure.js'; -export { buildTemporalGraph } from './temporal.js'; diff --git a/src/graph/builders/structure.js b/src/graph/builders/structure.js deleted file mode 100644 index 10efb110..00000000 --- a/src/graph/builders/structure.js +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Build a containment graph (directory → file) from the SQLite database. - */ - -import { CodeGraph } from '../model.js'; - -/** - * @param {object} db - Open better-sqlite3 database (readonly) - * @returns {CodeGraph} Directed graph with directory→file containment edges - */ -export function buildStructureGraph(db) { - const graph = new CodeGraph(); - - const dirs = db.prepare("SELECT id, name FROM nodes WHERE kind = 'directory'").all(); - - for (const d of dirs) { - graph.addNode(String(d.id), { label: d.name, kind: 'directory' }); - } - - const files = db.prepare("SELECT id, name, file FROM nodes WHERE kind = 'file'").all(); - - for (const f of files) { - graph.addNode(String(f.id), { label: f.name, kind: 'file', file: f.file }); - } - - const containsEdges = db - .prepare(` - SELECT e.source_id, e.target_id - FROM edges e - JOIN nodes n ON e.source_id = n.id - WHERE e.kind = 'contains' AND n.kind = 'directory' - `) - .all(); - - for (const e of containsEdges) { - graph.addEdge(String(e.source_id), String(e.target_id), { kind: 'contains' }); - } - - return graph; -} diff --git a/src/graph/builders/structure.ts b/src/graph/builders/structure.ts new file mode 100644 index 00000000..d1114d82 --- /dev/null +++ b/src/graph/builders/structure.ts @@ -0,0 +1,58 @@ +/** + * Build a containment graph (directory -> file) from the SQLite database. + */ + +import type { BetterSqlite3Database } from '../../types.js'; +import { CodeGraph } from '../model.js'; + +interface DirRow { + id: number; + name: string; +} + +interface FileRow { + id: number; + name: string; + file: string; +} + +interface ContainsEdgeRow { + source_id: number; + target_id: number; +} + +/** + * Build a directed graph with directory->file containment edges. + */ +export function buildStructureGraph(db: BetterSqlite3Database): CodeGraph { + const graph = new CodeGraph(); + + const dirs = db.prepare("SELECT id, name FROM nodes WHERE kind = 'directory'").all(); + + for (const d of dirs) { + graph.addNode(String(d.id), { label: d.name, kind: 'directory' }); + } + + const files = db.prepare("SELECT id, name, file FROM nodes WHERE kind = 'file'").all(); + + for (const f of files) { + graph.addNode(String(f.id), { label: f.name, kind: 'file', file: f.file }); + } + + const containsEdges = db + .prepare(` + SELECT e.source_id, e.target_id + FROM edges e + JOIN nodes n ON e.source_id = n.id + WHERE e.kind = 'contains' AND n.kind = 'directory' + `) + .all(); + + for (const e of containsEdges) { + graph.addEdge(String(e.source_id), String(e.target_id), { + kind: 'contains', + }); + } + + return graph; +} diff --git a/src/graph/builders/temporal.js b/src/graph/builders/temporal.js deleted file mode 100644 index c694d47c..00000000 --- a/src/graph/builders/temporal.js +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Build a co-change (temporal) graph weighted by Jaccard similarity. - */ - -import { CodeGraph } from '../model.js'; - -/** - * @param {object} db - Open better-sqlite3 database (readonly) - * @param {{ minJaccard?: number }} [opts] - * @returns {CodeGraph} Undirected graph weighted by Jaccard similarity - */ -export function buildTemporalGraph(db, opts = {}) { - const minJaccard = opts.minJaccard ?? 0.0; - const graph = new CodeGraph({ directed: false }); - - // Check if co_changes table exists - const tableCheck = db - .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='co_changes'") - .get(); - if (!tableCheck) return graph; - - const rows = db - .prepare('SELECT file_a, file_b, jaccard FROM co_changes WHERE jaccard >= ?') - .all(minJaccard); - - for (const r of rows) { - if (!graph.hasNode(r.file_a)) graph.addNode(r.file_a, { label: r.file_a }); - if (!graph.hasNode(r.file_b)) graph.addNode(r.file_b, { label: r.file_b }); - graph.addEdge(r.file_a, r.file_b, { jaccard: r.jaccard }); - } - - return graph; -} diff --git a/src/graph/builders/temporal.ts b/src/graph/builders/temporal.ts new file mode 100644 index 00000000..43aea31a --- /dev/null +++ b/src/graph/builders/temporal.ts @@ -0,0 +1,51 @@ +/** + * Build a co-change (temporal) graph weighted by Jaccard similarity. + */ + +import type { BetterSqlite3Database } from '../../types.js'; +import { CodeGraph } from '../model.js'; + +export interface TemporalGraphOptions { + minJaccard?: number; +} + +interface TableCheckRow { + name: string; +} + +interface CoChangeRow { + file_a: string; + file_b: string; + jaccard: number; +} + +/** + * Build an undirected graph weighted by Jaccard similarity from the co_changes table. + */ +export function buildTemporalGraph( + db: BetterSqlite3Database, + opts: TemporalGraphOptions = {}, +): CodeGraph { + const minJaccard = opts.minJaccard ?? 0.0; + const graph = new CodeGraph({ directed: false }); + + // Check if co_changes table exists + const tableCheck = db + .prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name='co_changes'", + ) + .get(); + if (!tableCheck) return graph; + + const rows = db + .prepare('SELECT file_a, file_b, jaccard FROM co_changes WHERE jaccard >= ?') + .all(minJaccard); + + for (const r of rows) { + if (!graph.hasNode(r.file_a)) graph.addNode(r.file_a, { label: r.file_a }); + if (!graph.hasNode(r.file_b)) graph.addNode(r.file_b, { label: r.file_b }); + graph.addEdge(r.file_a, r.file_b, { jaccard: r.jaccard }); + } + + return graph; +} diff --git a/src/graph/classifiers/index.js b/src/graph/classifiers/index.ts similarity index 100% rename from src/graph/classifiers/index.js rename to src/graph/classifiers/index.ts diff --git a/src/graph/index.js b/src/graph/index.ts similarity index 100% rename from src/graph/index.js rename to src/graph/index.ts diff --git a/src/types.ts b/src/types.ts index 8041c8f4..393e2bef 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1661,7 +1661,7 @@ export interface SqliteStatement { /** Minimal database interface matching the better-sqlite3 surface we use. */ export interface BetterSqlite3Database { prepare(sql: string): SqliteStatement; - exec(sql: string): void; + exec(sql: string): this; close(): void; pragma(sql: string): unknown; // biome-ignore lint/suspicious/noExplicitAny: must be compatible with better-sqlite3's generic Transaction return type diff --git a/src/vendor.d.ts b/src/vendor.d.ts index 9edc233b..54d5ef6b 100644 --- a/src/vendor.d.ts +++ b/src/vendor.d.ts @@ -7,18 +7,21 @@ declare module 'better-sqlite3' { namespace BetterSqlite3 { interface Database { - prepare(sql: string): Statement; + prepare(sql: string): Statement; exec(sql: string): Database; - transaction unknown>(fn: T): T; + // biome-ignore lint/suspicious/noExplicitAny: must match better-sqlite3's generic Transaction + transaction(fn: (...args: any[]) => T): (...args: any[]) => T; close(): void; pragma(pragma: string, options?: { simple?: boolean }): unknown; + readonly open: boolean; + readonly name: string; } - interface Statement { + interface Statement { run(...params: unknown[]): RunResult; - get(...params: unknown[]): unknown | undefined; - all(...params: unknown[]): unknown[]; - iterate(...params: unknown[]): IterableIterator; + get(...params: unknown[]): TRow | undefined; + all(...params: unknown[]): TRow[]; + iterate(...params: unknown[]): IterableIterator; } interface RunResult { From 49e9d0f2ee519bbe9db894db993f33fef99e2e15 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:05:56 -0600 Subject: [PATCH 31/33] feat(types): migrate builder stages, search, and graph domain to TypeScript (Phase 5.4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate 24 JS files to strict TypeScript across three domains: - domain/graph/builder/stages/ (9 files): all build pipeline stages - domain/graph/builder/incremental.ts + cycles.ts + journal.ts + change-journal.ts - domain/search/search/ (6 files): hybrid, semantic, keyword, filters, prepare, cli-formatter - domain/search/stores/ (2 files): fts5, sqlite-blob - domain/search/strategies/ (3 files): source, structured, text-utils Key type fixes: ParseChange stat type alignment (mtimeMs→mtime conversion), IncrementalStmts duck-typed interfaces for watcher compatibility, NodeWithId relaxed to Pick for partial node queries, ESCAPE clause backslash fix. Passes tsc --noEmit with zero errors. No test regressions. --- docs/roadmap/ROADMAP.md | 24 +- .../{incremental.js => incremental.ts} | 252 +++++++++++----- .../stages/{build-edges.js => build-edges.ts} | 227 ++++++++++---- ...{build-structure.js => build-structure.ts} | 51 ++-- .../{collect-files.js => collect-files.ts} | 14 +- .../{detect-changes.js => detect-changes.ts} | 279 ++++++++++-------- .../stages/{finalize.js => finalize.ts} | 66 +++-- .../{insert-nodes.js => insert-nodes.ts} | 75 +++-- .../stages/{parse-files.js => parse-files.ts} | 6 +- ...{resolve-imports.js => resolve-imports.ts} | 60 ++-- .../{run-analyses.js => run-analyses.ts} | 17 +- .../{change-journal.js => change-journal.ts} | 85 +++--- src/domain/graph/cycles.ts | 66 +++++ src/domain/graph/{journal.js => journal.ts} | 48 ++- src/domain/graph/watcher.ts | 22 +- src/domain/search/generator.ts | 3 +- .../{cli-formatter.js => cli-formatter.ts} | 41 ++- .../search/search/{filters.js => filters.ts} | 26 +- .../search/search/{hybrid.js => hybrid.ts} | 94 ++++-- .../search/search/{keyword.js => keyword.ts} | 58 +++- .../search/search/{prepare.js => prepare.ts} | 47 ++- .../search/{semantic.js => semantic.ts} | 110 ++++--- src/domain/search/stores/{fts5.js => fts5.ts} | 16 +- src/domain/search/stores/sqlite-blob.ts | 15 + .../strategies/{source.js => source.ts} | 7 +- .../{structured.js => structured.ts} | 19 +- .../{text-utils.js => text-utils.ts} | 20 +- src/types.ts | 2 + 28 files changed, 1138 insertions(+), 612 deletions(-) rename src/domain/graph/builder/{incremental.js => incremental.ts} (68%) rename src/domain/graph/builder/stages/{build-edges.js => build-edges.ts} (73%) rename src/domain/graph/builder/stages/{build-structure.js => build-structure.ts} (65%) rename src/domain/graph/builder/stages/{collect-files.js => collect-files.ts} (79%) rename src/domain/graph/builder/stages/{detect-changes.js => detect-changes.ts} (63%) rename src/domain/graph/builder/stages/{finalize.js => finalize.ts} (64%) rename src/domain/graph/builder/stages/{insert-nodes.js => insert-nodes.ts} (74%) rename src/domain/graph/builder/stages/{parse-files.js => parse-files.ts} (87%) rename src/domain/graph/builder/stages/{resolve-imports.js => resolve-imports.ts} (75%) rename src/domain/graph/builder/stages/{run-analyses.js => run-analyses.ts} (71%) rename src/domain/graph/{change-journal.js => change-journal.ts} (60%) create mode 100644 src/domain/graph/cycles.ts rename src/domain/graph/{journal.js => journal.ts} (67%) rename src/domain/search/search/{cli-formatter.js => cli-formatter.ts} (80%) rename src/domain/search/search/{filters.js => filters.ts} (54%) rename src/domain/search/search/{hybrid.js => hybrid.ts} (55%) rename src/domain/search/search/{keyword.js => keyword.ts} (59%) rename src/domain/search/search/{prepare.js => prepare.ts} (67%) rename src/domain/search/search/{semantic.js => semantic.ts} (59%) rename src/domain/search/stores/{fts5.js => fts5.ts} (58%) create mode 100644 src/domain/search/stores/sqlite-blob.ts rename src/domain/search/strategies/{source.js => source.ts} (71%) rename src/domain/search/strategies/{structured.js => structured.ts} (71%) rename src/domain/search/strategies/{text-utils.js => text-utils.ts} (53%) diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index 4614c75e..1b52329d 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -1086,7 +1086,7 @@ npm workspaces (`package.json` `workspaces`), `pnpm-workspace.yaml`, and `lerna. **Why after Phase 4:** The resolution accuracy work (Phase 4) operates on the existing JS codebase and produces immediate accuracy gains. TypeScript migration builds on Phase 3's clean module boundaries to add type safety across the entire codebase. Every subsequent phase benefits from types: MCP schema auto-generation, API contracts, refactoring safety. The Phase 4 resolution improvements (receiver tracking, interface edges) establish the resolution model that TypeScript types will formalize. -**Note:** `.js` and `.ts` coexist during migration (`allowJs: true` in tsconfig). PRs #553, #554, #555, #566 migrated a first wave of files across steps 5.3–5.5, but substantial work remains in each step. 13 stale `.js` files have `.ts` counterparts and need deletion. +**Note:** `.js` and `.ts` coexist during migration (`allowJs: true` in tsconfig). PRs #553, #554, #555, #566 migrated a first wave of files across steps 5.3–5.5. ### ~~5.1 -- Project Setup~~ ✅ @@ -1127,25 +1127,19 @@ Migrate modules with no or minimal internal dependencies. 25 migrated, 4 remaini ### 5.4 -- Core Module Migration (In Progress) -Migrate modules that implement domain logic and Phase 3 interfaces. Some migrated via [#554](https://github.com/optave/codegraph/pull/554), 39 files remaining. +Migrate modules that implement domain logic and Phase 3 interfaces. Some migrated via [#554](https://github.com/optave/codegraph/pull/554), [#566](https://github.com/optave/codegraph/pull/566). 15 files remaining. -**Migrated:** `db/repository/*.ts` (14 files), `domain/parser.ts`, `domain/graph/resolve.ts`, `extractors/*.ts` (11 files), `domain/graph/builder.ts` + `context.ts` + `helpers.ts` + `pipeline.ts`, `domain/graph/watcher.ts`, `domain/search/{generator,index,models}.ts`, `graph/model.ts`, `graph/algorithms/{bfs,centrality,shortest-path,tarjan}.ts`, `graph/algorithms/leiden/rng.ts`, `graph/classifiers/{risk,roles}.ts` +**Migrated:** `db/repository/*.ts` (14 files), `domain/parser.ts`, `domain/graph/resolve.ts`, `extractors/*.ts` (11 files), `domain/graph/builder.ts` + `context.ts` + `helpers.ts` + `pipeline.ts`, `domain/graph/watcher.ts`, `domain/search/{generator,index,models}.ts`, `graph/model.ts`, `graph/algorithms/{bfs,centrality,shortest-path,tarjan}.ts`, `graph/algorithms/leiden/rng.ts`, `graph/classifiers/{risk,roles}.ts`, `domain/graph/builder/stages/*.ts` (9 files), `domain/graph/builder/incremental.ts`, `domain/graph/{cycles,journal,change-journal}.ts`, `domain/queries.ts`, `domain/search/search/*.ts` (6 files), `domain/search/stores/*.ts` (2 files), `domain/search/strategies/*.ts` (3 files), `graph/algorithms/leiden/*.ts` (6 files), `graph/algorithms/{louvain,index}.ts`, `graph/builders/*.ts` (4 files), `graph/classifiers/index.ts` + `graph/index.ts` -**Remaining (39):** +**Remaining (15):** | Module | Files | Notes | |--------|-------|-------| -| `domain/graph/builder/stages/` | 9 | All 9 build pipeline stages (collect-files, parse-files, resolve-imports, build-edges, etc.) | -| `domain/graph/builder/incremental.js` | 1 | Incremental rebuild logic | -| `domain/graph/{cycles,journal,change-journal}.js` | 3 | Graph utilities | -| `domain/queries.js` | 1 | Core query functions | -| `domain/search/search/` | 6 | Search subsystem (hybrid, semantic, keyword, filters, cli-formatter, prepare) | -| `domain/search/stores/` | 2 | FTS5, SQLite blob stores | -| `domain/search/strategies/` | 3 | Source, structured, text-utils strategies | -| `graph/algorithms/leiden/` | 6 | Leiden community detection (adapter, CPM, modularity, optimiser, partition, index) | -| `graph/algorithms/{louvain,index}.js` | 2 | Louvain + algorithms barrel | -| `graph/builders/` | 4 | Dependency, structure, temporal builders + barrel | -| `graph/classifiers/index.js` + `graph/index.js` | 2 | Barrel exports | +| `src/db/connection.js` | 1 | SQLite connection wrapper (also listed in 5.3) | +| `src/db/index.js` | 1 | DB barrel/schema entry point (also listed in 5.3) | +| `src/db/migrations.js` | 1 | Schema version management (also listed in 5.3) | +| `src/db/query-builder.js` | 1 | Dynamic query builder (also listed in 5.3) | +| Remaining 5.5 files | ~11 | See 5.5 for orchestration/features/entry points | ### 5.5 -- Orchestration & Public API Migration (In Progress) diff --git a/src/domain/graph/builder/incremental.js b/src/domain/graph/builder/incremental.ts similarity index 68% rename from src/domain/graph/builder/incremental.js rename to src/domain/graph/builder/incremental.ts index 2be5cefa..48034b25 100644 --- a/src/domain/graph/builder/incremental.js +++ b/src/domain/graph/builder/incremental.ts @@ -9,16 +9,44 @@ */ import fs from 'node:fs'; import path from 'node:path'; +import type BetterSqlite3 from 'better-sqlite3'; import { bulkNodeIdsByFile } from '../../../db/index.js'; import { warn } from '../../../infrastructure/logger.js'; import { normalizePath } from '../../../shared/constants.js'; +import type { EngineOpts, ExtractorOutput, PathAliases } from '../../../types.js'; import { parseFileIncremental } from '../../parser.js'; import { computeConfidence, resolveImportPath } from '../resolve.js'; import { BUILTIN_RECEIVERS, readFileSafe } from './helpers.js'; +// ── Local types ───────────────────────────────────────────────────────── + +export interface IncrementalStmts { + insertNode: { run: (...params: unknown[]) => unknown }; + insertEdge: { run: (...params: unknown[]) => unknown }; + getNodeId: { get: (...params: unknown[]) => { id: number } | undefined }; + deleteEdgesForFile: { run: (...params: unknown[]) => unknown }; + deleteNodes: { run: (...params: unknown[]) => unknown }; + countNodes: { get: (...params: unknown[]) => { c: number } | undefined }; + listSymbols: { all: (...params: unknown[]) => unknown[] }; + findNodeInFile: { all: (...params: unknown[]) => unknown[] }; + findNodeByName: { all: (...params: unknown[]) => unknown[] }; +} + +interface RebuildResult { + file: string; + nodesAdded: number; + nodesRemoved: number; + edgesAdded: number; + deleted?: boolean; + event?: string; + symbolDiff?: unknown; + nodesBefore?: number; + nodesAfter?: number; +} + // ── Node insertion ────────────────────────────────────────────────────── -function insertFileNodes(stmts, relPath, symbols) { +function insertFileNodes(stmts: IncrementalStmts, relPath: string, symbols: ExtractorOutput): void { stmts.insertNode.run(relPath, 'file', relPath, 0, null); for (const def of symbols.definitions) { stmts.insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); @@ -35,8 +63,13 @@ function insertFileNodes(stmts, relPath, symbols) { // ── Containment edges ────────────────────────────────────────────────── -function buildContainmentEdges(db, stmts, relPath, symbols) { - const nodeIdMap = new Map(); +function buildContainmentEdges( + db: BetterSqlite3.Database, + stmts: IncrementalStmts, + relPath: string, + symbols: ExtractorOutput, +): number { + const nodeIdMap = new Map(); for (const row of bulkNodeIdsByFile(db, relPath)) { nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } @@ -68,11 +101,14 @@ function buildContainmentEdges(db, stmts, relPath, symbols) { // ── Reverse-dep cascade ──────────────────────────────────────────────── // Lazily-cached prepared statements for reverse-dep operations -let _revDepDb = null; -let _findRevDepsStmt = null; -let _deleteOutEdgesStmt = null; - -function getRevDepStmts(db) { +let _revDepDb: BetterSqlite3.Database | null = null; +let _findRevDepsStmt: BetterSqlite3.Statement | null = null; +let _deleteOutEdgesStmt: BetterSqlite3.Statement | null = null; + +function getRevDepStmts(db: BetterSqlite3.Database): { + findRevDepsStmt: BetterSqlite3.Statement; + deleteOutEdgesStmt: BetterSqlite3.Statement; +} { if (_revDepDb !== db) { _revDepDb = db; _findRevDepsStmt = db.prepare( @@ -85,24 +121,32 @@ function getRevDepStmts(db) { 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', ); } - return { findRevDepsStmt: _findRevDepsStmt, deleteOutEdgesStmt: _deleteOutEdgesStmt }; + return { + findRevDepsStmt: _findRevDepsStmt!, + deleteOutEdgesStmt: _deleteOutEdgesStmt!, + }; } -function findReverseDeps(db, relPath) { +function findReverseDeps(db: BetterSqlite3.Database, relPath: string): string[] { const { findRevDepsStmt } = getRevDepStmts(db); - return findRevDepsStmt.all(relPath, relPath).map((r) => r.file); + return (findRevDepsStmt.all(relPath, relPath) as Array<{ file: string }>).map((r) => r.file); } -function deleteOutgoingEdges(db, relPath) { +function deleteOutgoingEdges(db: BetterSqlite3.Database, relPath: string): void { const { deleteOutEdgesStmt } = getRevDepStmts(db); deleteOutEdgesStmt.run(relPath); } -async function parseReverseDep(rootDir, depRelPath, engineOpts, cache) { +async function parseReverseDep( + rootDir: string, + depRelPath: string, + engineOpts: EngineOpts, + cache: unknown, +): Promise { const absPath = path.join(rootDir, depRelPath); if (!fs.existsSync(absPath)) return null; - let code; + let code: string; try { code = readFileSafe(absPath); } catch { @@ -112,13 +156,20 @@ async function parseReverseDep(rootDir, depRelPath, engineOpts, cache) { return parseFileIncremental(cache, absPath, code, engineOpts); } -function rebuildReverseDepEdges(db, rootDir, depRelPath, symbols, stmts, skipBarrel) { +function rebuildReverseDepEdges( + db: BetterSqlite3.Database, + rootDir: string, + depRelPath: string, + symbols: ExtractorOutput, + stmts: IncrementalStmts, + skipBarrel: boolean, +): number { const fileNodeRow = stmts.getNodeId.get(depRelPath, 'file', depRelPath, 0); if (!fileNodeRow) return 0; - const aliases = { baseUrl: null, paths: {} }; + const aliases: PathAliases = { baseUrl: null, paths: {} }; let edgesAdded = buildContainmentEdges(db, stmts, depRelPath, symbols); - // Don't rebuild dir→file containment for reverse-deps (it was never deleted) + // Don't rebuild dir->file containment for reverse-deps (it was never deleted) edgesAdded += buildImportEdges( stmts, depRelPath, @@ -135,7 +186,11 @@ function rebuildReverseDepEdges(db, rootDir, depRelPath, symbols, stmts, skipBar // ── Directory containment edges ──────────────────────────────────────── -function rebuildDirContainment(_db, stmts, relPath) { +function rebuildDirContainment( + _db: BetterSqlite3.Database, + stmts: IncrementalStmts, + relPath: string, +): number { const dir = normalizePath(path.dirname(relPath)); if (!dir || dir === '.') return 0; const dirRow = stmts.getNodeId.get(dir, 'directory', dir, 0); @@ -149,12 +204,12 @@ function rebuildDirContainment(_db, stmts, relPath) { // ── Ancillary table cleanup ──────────────────────────────────────────── -function purgeAncillaryData(db, relPath) { - const tryExec = (sql, ...args) => { +function purgeAncillaryData(db: BetterSqlite3.Database, relPath: string): void { + const tryExec = (sql: string, ...args: string[]): void => { try { db.prepare(sql).run(...args); - } catch (err) { - if (!err?.message?.includes('no such table')) throw err; + } catch (err: unknown) { + if (!(err as Error | undefined)?.message?.includes('no such table')) throw err; } }; tryExec( @@ -184,12 +239,16 @@ function purgeAncillaryData(db, relPath) { // ── Import edge building ──────────────────────────────────────────────── // Lazily-cached prepared statements for barrel resolution (avoid re-preparing in hot loops) -let _barrelDb = null; -let _isBarrelStmt = null; -let _reexportTargetsStmt = null; -let _hasDefStmt = null; - -function getBarrelStmts(db) { +let _barrelDb: BetterSqlite3.Database | null = null; +let _isBarrelStmt: BetterSqlite3.Statement | null = null; +let _reexportTargetsStmt: BetterSqlite3.Statement | null = null; +let _hasDefStmt: BetterSqlite3.Statement | null = null; + +function getBarrelStmts(db: BetterSqlite3.Database): { + isBarrelStmt: BetterSqlite3.Statement; + reexportTargetsStmt: BetterSqlite3.Statement; + hasDefStmt: BetterSqlite3.Statement; +} { if (_barrelDb !== db) { _barrelDb = db; _isBarrelStmt = db.prepare( @@ -208,26 +267,31 @@ function getBarrelStmts(db) { ); } return { - isBarrelStmt: _isBarrelStmt, - reexportTargetsStmt: _reexportTargetsStmt, - hasDefStmt: _hasDefStmt, + isBarrelStmt: _isBarrelStmt!, + reexportTargetsStmt: _reexportTargetsStmt!, + hasDefStmt: _hasDefStmt!, }; } -function isBarrelFile(db, relPath) { +function isBarrelFile(db: BetterSqlite3.Database, relPath: string): boolean { const { isBarrelStmt } = getBarrelStmts(db); - const reexportCount = isBarrelStmt.get(relPath)?.c; + const reexportCount = (isBarrelStmt.get(relPath) as { c: number } | undefined)?.c; return (reexportCount || 0) > 0; } -function resolveBarrelTarget(db, barrelPath, symbolName, visited = new Set()) { +function resolveBarrelTarget( + db: BetterSqlite3.Database, + barrelPath: string, + symbolName: string, + visited: Set = new Set(), +): string | null { if (visited.has(barrelPath)) return null; visited.add(barrelPath); const { reexportTargetsStmt, hasDefStmt } = getBarrelStmts(db); // Find re-export targets from this barrel - const reexportTargets = reexportTargetsStmt.all(barrelPath); + const reexportTargets = reexportTargetsStmt.all(barrelPath) as Array<{ file: string }>; for (const { file: targetFile } of reexportTargets) { // Check if the symbol is defined in this target file @@ -247,10 +311,16 @@ function resolveBarrelTarget(db, barrelPath, symbolName, visited = new Set()) { * Resolve barrel imports for a single import statement and create edges to actual source files. * Shared by buildImportEdges (primary file) and Pass 2 of the reverse-dep cascade. */ -function resolveBarrelImportEdges(db, stmts, fileNodeId, resolvedPath, imp) { +function resolveBarrelImportEdges( + db: BetterSqlite3.Database, + stmts: IncrementalStmts, + fileNodeId: number, + resolvedPath: string, + imp: ExtractorOutput['imports'][number], +): number { let edgesAdded = 0; if (!isBarrelFile(db, resolvedPath)) return edgesAdded; - const resolvedSources = new Set(); + const resolvedSources = new Set(); for (const name of imp.names) { const cleanName = name.replace(/^\*\s+as\s+/, ''); const actualSource = resolveBarrelTarget(db, resolvedPath, cleanName); @@ -267,7 +337,15 @@ function resolveBarrelImportEdges(db, stmts, fileNodeId, resolvedPath, imp) { return edgesAdded; } -function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases, db) { +function buildImportEdges( + stmts: IncrementalStmts, + relPath: string, + symbols: ExtractorOutput, + rootDir: string, + fileNodeId: number, + aliases: PathAliases, + db: BetterSqlite3.Database | null, +): number { let edgesAdded = 0; for (const imp of symbols.imports) { const resolvedPath = resolveImportPath( @@ -291,8 +369,13 @@ function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases, return edgesAdded; } -function buildImportedNamesMap(symbols, rootDir, relPath, aliases) { - const importedNames = new Map(); +function buildImportedNamesMap( + symbols: ExtractorOutput, + rootDir: string, + relPath: string, + aliases: PathAliases, +): Map { + const importedNames = new Map(); for (const imp of symbols.imports) { const resolvedPath = resolveImportPath( path.join(rootDir, relPath), @@ -309,8 +392,13 @@ function buildImportedNamesMap(symbols, rootDir, relPath, aliases) { // ── Call edge building ────────────────────────────────────────────────── -function findCaller(call, definitions, relPath, stmts) { - let caller = null; +function findCaller( + call: ExtractorOutput['calls'][number], + definitions: ExtractorOutput['definitions'], + relPath: string, + stmts: IncrementalStmts, +): { id: number } | null { + let caller: { id: number } | null = null; let callerSpan = Infinity; for (const def of definitions) { if (def.line <= call.line) { @@ -333,16 +421,25 @@ function findCaller(call, definitions, relPath, stmts) { return caller; } -function resolveCallTargets(stmts, call, relPath, importedNames, typeMap) { +function resolveCallTargets( + stmts: IncrementalStmts, + call: ExtractorOutput['calls'][number], + relPath: string, + importedNames: Map, + typeMap: Map, +): { targets: Array<{ id: number; file: string }>; importedFrom: string | undefined } { const importedFrom = importedNames.get(call.name); - let targets; + let targets: Array<{ id: number; file: string }> | undefined; if (importedFrom) { - targets = stmts.findNodeInFile.all(call.name, importedFrom); + targets = stmts.findNodeInFile.all(call.name, importedFrom) as Array<{ + id: number; + file: string; + }>; } if (!targets || targets.length === 0) { - targets = stmts.findNodeInFile.all(call.name, relPath); + targets = stmts.findNodeInFile.all(call.name, relPath) as Array<{ id: number; file: string }>; if (targets.length === 0) { - targets = stmts.findNodeByName.all(call.name); + targets = stmts.findNodeByName.all(call.name) as Array<{ id: number; file: string }>; } } // Type-aware resolution: translate variable receiver to declared type @@ -351,23 +448,34 @@ function resolveCallTargets(stmts, call, relPath, importedNames, typeMap) { const typeName = typeEntry ? typeof typeEntry === 'string' ? typeEntry - : typeEntry.type + : (typeEntry as { type?: string }).type : null; if (typeName) { const qualified = `${typeName}.${call.name}`; - targets = stmts.findNodeByName.all(qualified); + targets = stmts.findNodeByName.all(qualified) as Array<{ id: number; file: string }>; } } - return { targets, importedFrom }; + return { targets: targets ?? [], importedFrom }; } -function buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames) { - const rawTM = symbols.typeMap; - const typeMap = +function buildCallEdges( + stmts: IncrementalStmts, + relPath: string, + symbols: ExtractorOutput, + fileNodeRow: { id: number }, + importedNames: Map, +): number { + const rawTM: unknown = symbols.typeMap; + const typeMap: Map = rawTM instanceof Map ? rawTM : Array.isArray(rawTM) && rawTM.length > 0 - ? new Map(rawTM.map((e) => [e.name, e.typeName ?? e.type ?? null])) + ? new Map( + (rawTM as Array<{ name: string; typeName?: string; type?: string }>).map((e) => [ + e.name, + e.typeName ?? e.type ?? null, + ]), + ) : new Map(); let edgesAdded = 0; for (const call of symbols.calls) { @@ -397,22 +505,20 @@ function buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames) { /** * Parse a single file and update the database incrementally. - * - * @param {import('better-sqlite3').Database} db - * @param {string} rootDir - Absolute root directory - * @param {string} filePath - Absolute file path - * @param {object} stmts - Prepared DB statements - * @param {object} engineOpts - Engine options - * @param {object|null} cache - Parse tree cache (native only) - * @param {object} [options] - * @param {Function} [options.diffSymbols] - Symbol diff function - * @returns {Promise} Update result or null on failure */ -export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cache, options = {}) { +export async function rebuildFile( + db: BetterSqlite3.Database, + rootDir: string, + filePath: string, + stmts: IncrementalStmts, + engineOpts: EngineOpts, + cache: unknown, + options: { diffSymbols?: (old: unknown[], new_: unknown[]) => unknown } = {}, +): Promise { const { diffSymbols } = options; const relPath = normalizePath(path.relative(rootDir, filePath)); const oldNodes = stmts.countNodes.get(relPath)?.c || 0; - const oldSymbols = diffSymbols ? stmts.listSymbols.all(relPath) : []; + const oldSymbols: unknown[] = diffSymbols ? stmts.listSymbols.all(relPath) : []; // Find reverse-deps BEFORE purging (edges still reference the old nodes) const reverseDeps = findReverseDeps(db, relPath); @@ -423,7 +529,7 @@ export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cach stmts.deleteNodes.run(relPath); if (!fs.existsSync(filePath)) { - if (cache) cache.remove(filePath); + if (cache) (cache as { remove(p: string): void }).remove(filePath); const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, []) : null; return { file: relPath, @@ -438,11 +544,11 @@ export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cach }; } - let code; + let code: string; try { code = readFileSafe(filePath); } catch (err) { - warn(`Cannot read ${relPath}: ${err.message}`); + warn(`Cannot read ${relPath}: ${(err as Error).message}`); return null; } @@ -452,13 +558,13 @@ export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cach insertFileNodes(stmts, relPath, symbols); const newNodes = stmts.countNodes.get(relPath)?.c || 0; - const newSymbols = diffSymbols ? stmts.listSymbols.all(relPath) : []; + const newSymbols: unknown[] = diffSymbols ? stmts.listSymbols.all(relPath) : []; const fileNodeRow = stmts.getNodeId.get(relPath, 'file', relPath, 0); if (!fileNodeRow) return { file: relPath, nodesAdded: newNodes, nodesRemoved: oldNodes, edgesAdded: 0 }; - const aliases = { baseUrl: null, paths: {} }; + const aliases: PathAliases = { baseUrl: null, paths: {} }; let edgesAdded = buildContainmentEdges(db, stmts, relPath, symbols); edgesAdded += rebuildDirContainment(db, stmts, relPath); @@ -469,7 +575,7 @@ export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cach // Cascade: rebuild outgoing edges for reverse-dep files. // Two-pass approach: first rebuild direct edges (creating reexports edges for barrels), // then add barrel import edges (which need reexports edges to exist for resolution). - const depSymbols = new Map(); + const depSymbols = new Map(); for (const depRelPath of reverseDeps) { const symbols_ = await parseReverseDep(rootDir, depRelPath, engineOpts, cache); if (symbols_) { @@ -485,7 +591,7 @@ export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cach for (const [depRelPath, symbols_] of depSymbols) { const fileNodeRow_ = stmts.getNodeId.get(depRelPath, 'file', depRelPath, 0); if (!fileNodeRow_) continue; - const aliases_ = { baseUrl: null, paths: {} }; + const aliases_: PathAliases = { baseUrl: null, paths: {} }; for (const imp of symbols_.imports) { if (imp.reexport) continue; const resolvedPath = resolveImportPath( diff --git a/src/domain/graph/builder/stages/build-edges.js b/src/domain/graph/builder/stages/build-edges.ts similarity index 73% rename from src/domain/graph/builder/stages/build-edges.js rename to src/domain/graph/builder/stages/build-edges.ts index 1aa03471..1445709c 100644 --- a/src/domain/graph/builder/stages/build-edges.js +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -6,40 +6,99 @@ */ import path from 'node:path'; import { performance } from 'node:perf_hooks'; +import type BetterSqlite3 from 'better-sqlite3'; import { getNodeId } from '../../../../db/index.js'; import { loadNative } from '../../../../infrastructure/native.js'; +import type { + Call, + ClassRelation, + Definition, + ExtractorOutput, + Import, + NativeAddon, + NodeRow, + TypeMapEntry, +} from '../../../../types.js'; import { computeConfidence } from '../../resolve.js'; +import type { PipelineContext } from '../context.js'; import { BUILTIN_RECEIVERS, batchInsertEdges } from '../helpers.js'; import { getResolved, isBarrelFile, resolveBarrelExport } from './resolve-imports.js'; +// ── Local types ────────────────────────────────────────────────────────── + +type EdgeRowTuple = [number, number, string, number, number]; + +interface NodeIdStmt { + get(name: string, kind: string, file: string, line: number): { id: number } | undefined; +} + +/** Minimal node shape returned by the SELECT query. */ +interface QueryNodeRow { + id: number; + name: string; + kind: string; + file: string; + line: number; +} + +/** Shape fed to the native buildCallEdges FFI. */ +interface NativeFileEntry { + file: string; + fileNodeId: number; + definitions: Array<{ name: string; kind: string; line: number; endLine: number | null }>; + calls: Call[]; + importedNames: Array<{ name: string; file: string }>; + classes: ClassRelation[]; + typeMap: Array<{ name: string; typeName: string; confidence: number }>; +} + +/** Shape returned by native buildCallEdges. */ +interface NativeEdge { + sourceId: number; + targetId: number; + kind: string; + confidence: number; + dynamic: number; +} + +/** TypeMap entry used in receiver supplement (normalized from native format). */ +interface NormalizedTypeEntry { + type: string; + confidence: number; +} + // ── Node lookup setup ─────────────────────────────────────────────────── -function makeGetNodeIdStmt(db) { +function makeGetNodeIdStmt(db: BetterSqlite3.Database): NodeIdStmt { return { - get: (name, kind, file, line) => { + get: (name: string, kind: string, file: string, line: number) => { const id = getNodeId(db, name, kind, file, line); return id != null ? { id } : undefined; }, }; } -function setupNodeLookups(ctx, allNodes) { +function setupNodeLookups(ctx: PipelineContext, allNodes: QueryNodeRow[]): void { ctx.nodesByName = new Map(); for (const node of allNodes) { if (!ctx.nodesByName.has(node.name)) ctx.nodesByName.set(node.name, []); - ctx.nodesByName.get(node.name).push(node); + ctx.nodesByName.get(node.name)!.push(node as unknown as NodeRow); } ctx.nodesByNameAndFile = new Map(); for (const node of allNodes) { const key = `${node.name}|${node.file}`; if (!ctx.nodesByNameAndFile.has(key)) ctx.nodesByNameAndFile.set(key, []); - ctx.nodesByNameAndFile.get(key).push(node); + ctx.nodesByNameAndFile.get(key)!.push(node as unknown as NodeRow); } } // ── Import edges ──────────────────────────────────────────────────────── -function buildImportEdges(ctx, getNodeIdStmt, allEdgeRows) { +function buildImportEdges( + ctx: PipelineContext, + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], +): void { const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; for (const [relPath, symbols] of fileSymbols) { @@ -69,8 +128,16 @@ function buildImportEdges(ctx, getNodeIdStmt, allEdgeRows) { } } -function buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeIdStmt, edgeRows) { - const resolvedSources = new Set(); +function buildBarrelEdges( + ctx: PipelineContext, + imp: Import, + resolvedPath: string, + fileNodeId: number, + edgeKind: string, + getNodeIdStmt: NodeIdStmt, + edgeRows: EdgeRowTuple[], +): void { + const resolvedSources = new Set(); for (const name of imp.names) { const cleanName = name.replace(/^\*\s+as\s+/, ''); const actualSource = resolveBarrelExport(ctx, resolvedPath, cleanName); @@ -92,9 +159,15 @@ function buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeI // ── Call edges (native engine) ────────────────────────────────────────── -function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) { +function buildCallEdgesNative( + ctx: PipelineContext, + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], + allNodes: QueryNodeRow[], + native: NativeAddon, +): void { const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; - const nativeFiles = []; + const nativeFiles: NativeFileEntry[] = []; for (const [relPath, symbols] of fileSymbols) { if (barrelOnlyFiles.has(relPath)) continue; @@ -102,7 +175,7 @@ function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) if (!fileNodeRow) continue; const importedNames = buildImportedNamesForNative(ctx, relPath, symbols, rootDir); - const typeMap = + const typeMap: Array<{ name: string; typeName: string; confidence: number }> = symbols.typeMap instanceof Map ? [...symbols.typeMap.entries()].map(([name, entry]) => ({ name, @@ -110,7 +183,7 @@ function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) confidence: typeof entry === 'object' ? entry.confidence : 0.9, })) : Array.isArray(symbols.typeMap) - ? symbols.typeMap + ? (symbols.typeMap as Array<{ name: string; typeName: string; confidence: number }>) : []; nativeFiles.push({ file: relPath, @@ -128,7 +201,9 @@ function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) }); } - const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [...BUILTIN_RECEIVERS]); + const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [ + ...BUILTIN_RECEIVERS, + ]) as NativeEdge[]; for (const e of nativeEdges) { allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]); } @@ -142,8 +217,13 @@ function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) } } -function buildImportedNamesForNative(ctx, relPath, symbols, rootDir) { - const importedNames = []; +function buildImportedNamesForNative( + ctx: PipelineContext, + relPath: string, + symbols: ExtractorOutput, + rootDir: string, +): Array<{ name: string; file: string }> { + const importedNames: Array<{ name: string; file: string }> = []; for (const imp of symbols.imports) { const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); for (const name of imp.names) { @@ -161,8 +241,13 @@ function buildImportedNamesForNative(ctx, relPath, symbols, rootDir) { // ── Receiver edge supplement for older native binaries ────────────────── -function supplementReceiverEdges(ctx, nativeFiles, getNodeIdStmt, allEdgeRows) { - const seenCallEdges = new Set(); +function supplementReceiverEdges( + ctx: PipelineContext, + nativeFiles: NativeFileEntry[], + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], +): void { + const seenCallEdges = new Set(); // Collect existing edges to avoid duplicates for (const row of allEdgeRows) { seenCallEdges.add(`${row[0]}|${row[1]}|${row[2]}`); @@ -170,7 +255,7 @@ function supplementReceiverEdges(ctx, nativeFiles, getNodeIdStmt, allEdgeRows) { for (const nf of nativeFiles) { const relPath = nf.file; - const typeMap = new Map( + const typeMap = new Map( nf.typeMap.map((t) => [t.name, { type: t.typeName, confidence: t.confidence ?? 0.9 }]), ); const fileNodeRow = { id: nf.fileNodeId }; @@ -208,7 +293,11 @@ function supplementReceiverEdges(ctx, nativeFiles, getNodeIdStmt, allEdgeRows) { // ── Call edges (JS fallback) ──────────────────────────────────────────── -function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) { +function buildCallEdgesJS( + ctx: PipelineContext, + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], +): void { const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; for (const [relPath, symbols] of fileSymbols) { @@ -217,8 +306,8 @@ function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) { if (!fileNodeRow) continue; const importedNames = buildImportedNamesMap(ctx, relPath, symbols, rootDir); - const typeMap = symbols.typeMap || new Map(); - const seenCallEdges = new Set(); + const typeMap: Map = symbols.typeMap || new Map(); + const seenCallEdges = new Set(); buildFileCallEdges( ctx, @@ -235,8 +324,13 @@ function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) { } } -function buildImportedNamesMap(ctx, relPath, symbols, rootDir) { - const importedNames = new Map(); +function buildImportedNamesMap( + ctx: PipelineContext, + relPath: string, + symbols: ExtractorOutput, + rootDir: string, +): Map { + const importedNames = new Map(); for (const imp of symbols.imports) { const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); for (const name of imp.names) { @@ -246,8 +340,14 @@ function buildImportedNamesMap(ctx, relPath, symbols, rootDir) { return importedNames; } -function findCaller(call, definitions, relPath, getNodeIdStmt, fileNodeRow) { - let caller = null; +function findCaller( + call: Call, + definitions: ReadonlyArray<{ name: string; kind: string; line: number; endLine?: number | null }>, + relPath: string, + getNodeIdStmt: NodeIdStmt, + fileNodeRow: { id: number }, +): { id: number } { + let caller: { id: number } | null = null; let callerSpan = Infinity; for (const def of definitions) { if (def.line <= call.line) { @@ -270,9 +370,15 @@ function findCaller(call, definitions, relPath, getNodeIdStmt, fileNodeRow) { return caller || fileNodeRow; } -function resolveCallTargets(ctx, call, relPath, importedNames, typeMap) { +function resolveCallTargets( + ctx: PipelineContext, + call: Call, + relPath: string, + importedNames: Map, + typeMap: Map, +): { targets: NodeRow[]; importedFrom: string | undefined } { const importedFrom = importedNames.get(call.name); - let targets; + let targets: NodeRow[] | undefined; if (importedFrom) { targets = ctx.nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || []; @@ -293,8 +399,8 @@ function resolveCallTargets(ctx, call, relPath, importedNames, typeMap) { if (targets.length > 1) { targets.sort((a, b) => { - const confA = computeConfidence(relPath, a.file, importedFrom); - const confB = computeConfidence(relPath, b.file, importedFrom); + const confA = computeConfidence(relPath, a.file, importedFrom ?? null); + const confB = computeConfidence(relPath, b.file, importedFrom ?? null); return confB - confA; }); } @@ -302,7 +408,12 @@ function resolveCallTargets(ctx, call, relPath, importedNames, typeMap) { return { targets, importedFrom }; } -function resolveByMethodOrGlobal(ctx, call, relPath, typeMap) { +function resolveByMethodOrGlobal( + ctx: PipelineContext, + call: Call, + relPath: string, + typeMap: Map, +): NodeRow[] { // Type-aware resolution: translate variable receiver to its declared type if (call.receiver && typeMap) { const typeEntry = typeMap.get(call.receiver); @@ -332,21 +443,21 @@ function resolveByMethodOrGlobal(ctx, call, relPath, typeMap) { } function buildFileCallEdges( - ctx, - relPath, - symbols, - fileNodeRow, - importedNames, - seenCallEdges, - getNodeIdStmt, - allEdgeRows, - typeMap, -) { + ctx: PipelineContext, + relPath: string, + symbols: ExtractorOutput, + fileNodeRow: { id: number }, + importedNames: Map, + seenCallEdges: Set, + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], + typeMap: Map, +): void { for (const call of symbols.calls) { if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; const caller = findCaller(call, symbols.definitions, relPath, getNodeIdStmt, fileNodeRow); - const isDynamic = call.dynamic ? 1 : 0; + const isDynamic: number = call.dynamic ? 1 : 0; const { targets, importedFrom } = resolveCallTargets( ctx, call, @@ -359,7 +470,7 @@ function buildFileCallEdges( const edgeKey = `${caller.id}|${t.id}`; if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) { seenCallEdges.add(edgeKey); - const confidence = computeConfidence(relPath, t.file, importedFrom); + const confidence = computeConfidence(relPath, t.file, importedFrom ?? null); allEdgeRows.push([caller.id, t.id, 'calls', confidence, isDynamic]); } } @@ -377,17 +488,25 @@ function buildFileCallEdges( } } -function buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows, typeMap) { +function buildReceiverEdge( + ctx: PipelineContext, + call: Call, + caller: { id: number }, + relPath: string, + seenCallEdges: Set, + allEdgeRows: EdgeRowTuple[], + typeMap: Map, +): void { const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); - const typeEntry = typeMap?.get(call.receiver); + const typeEntry = typeMap?.get(call.receiver!); const typeName = typeEntry ? (typeof typeEntry === 'string' ? typeEntry : typeEntry.type) : null; const typeConfidence = typeEntry && typeof typeEntry === 'object' ? typeEntry.confidence : null; - const effectiveReceiver = typeName || call.receiver; + const effectiveReceiver = typeName || call.receiver!; const samefile = ctx.nodesByNameAndFile.get(`${effectiveReceiver}|${relPath}`) || []; const candidates = samefile.length > 0 ? samefile : ctx.nodesByName.get(effectiveReceiver) || []; const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind)); if (receiverNodes.length > 0 && caller) { - const recvTarget = receiverNodes[0]; + const recvTarget = receiverNodes[0]!; const recvKey = `recv|${caller.id}|${recvTarget.id}`; if (!seenCallEdges.has(recvKey)) { seenCallEdges.add(recvKey); @@ -404,7 +523,12 @@ const HIERARCHY_SOURCE_KINDS = new Set(['class', 'struct', 'record', 'enum']); const EXTENDS_TARGET_KINDS = new Set(['class', 'struct', 'trait', 'record']); const IMPLEMENTS_TARGET_KINDS = new Set(['interface', 'trait', 'class']); -function buildClassHierarchyEdges(ctx, relPath, symbols, allEdgeRows) { +function buildClassHierarchyEdges( + ctx: PipelineContext, + relPath: string, + symbols: ExtractorOutput, + allEdgeRows: EdgeRowTuple[], +): void { for (const cls of symbols.classes) { if (cls.extends) { const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find((n) => @@ -438,10 +562,7 @@ function buildClassHierarchyEdges(ctx, relPath, symbols, allEdgeRows) { // ── Main entry point ──────────────────────────────────────────────────── -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function buildEdges(ctx) { +export async function buildEdges(ctx: PipelineContext): Promise { const { db, engineName } = ctx; const getNodeIdStmt = makeGetNodeIdStmt(db); @@ -450,12 +571,12 @@ export async function buildEdges(ctx) { .prepare( `SELECT id, name, kind, file, line FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`, ) - .all(); + .all() as QueryNodeRow[]; setupNodeLookups(ctx, allNodes); const t0 = performance.now(); const buildEdgesTx = db.transaction(() => { - const allEdgeRows = []; + const allEdgeRows: EdgeRowTuple[] = []; buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); diff --git a/src/domain/graph/builder/stages/build-structure.js b/src/domain/graph/builder/stages/build-structure.ts similarity index 65% rename from src/domain/graph/builder/stages/build-structure.js rename to src/domain/graph/builder/stages/build-structure.ts index f4737df9..0f22a694 100644 --- a/src/domain/graph/builder/stages/build-structure.js +++ b/src/domain/graph/builder/stages/build-structure.ts @@ -7,19 +7,20 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { debug } from '../../../../infrastructure/logger.js'; import { normalizePath } from '../../../../shared/constants.js'; +import type { ExtractorOutput } from '../../../../types.js'; +import type { PipelineContext } from '../context.js'; import { readFileSafe } from '../helpers.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function buildStructure(ctx) { +export async function buildStructure(ctx: PipelineContext): Promise { const { db, fileSymbols, rootDir, discoveredDirs, allSymbols, isFullBuild } = ctx; // Build line count map (prefer cached _lineCount from parser) ctx.lineCountMap = new Map(); for (const [relPath, symbols] of fileSymbols) { - if (symbols.lineCount ?? symbols._lineCount) { - ctx.lineCountMap.set(relPath, symbols.lineCount ?? symbols._lineCount); + const lineCount = + (symbols as ExtractorOutput & { lineCount?: number }).lineCount ?? symbols._lineCount; + if (lineCount) { + ctx.lineCountMap.set(relPath, lineCount); } else { const absPath = path.join(rootDir, relPath); try { @@ -33,7 +34,9 @@ export async function buildStructure(ctx) { // For incremental builds, load unchanged files from DB for complete structure if (!isFullBuild) { - const existingFiles = db.prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'").all(); + const existingFiles = db + .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'") + .all() as Array<{ file: string }>; const defsByFile = db.prepare( "SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file' AND kind != 'directory'", ); @@ -48,19 +51,20 @@ export async function buildStructure(ctx) { FROM node_metrics m JOIN nodes n ON m.node_id = n.id WHERE n.kind = 'file'`, ); - const cachedLineCounts = new Map(); - for (const row of lineCountByFile.all()) { + const cachedLineCounts = new Map(); + for (const row of lineCountByFile.all() as Array<{ file: string; line_count: number }>) { cachedLineCounts.set(row.file, row.line_count); } let loadedFromDb = 0; for (const { file: relPath } of existingFiles) { if (!fileSymbols.has(relPath)) { - const importCount = importCountByFile.get(relPath)?.cnt || 0; + const importCount = + (importCountByFile.get(relPath) as { cnt: number } | undefined)?.cnt || 0; fileSymbols.set(relPath, { definitions: defsByFile.all(relPath), - imports: new Array(importCount), + imports: new Array(importCount) as unknown as ExtractorOutput['imports'], exports: [], - }); + } as unknown as ExtractorOutput); loadedFromDb++; } if (!ctx.lineCountMap.has(relPath)) { @@ -83,23 +87,36 @@ export async function buildStructure(ctx) { // Build directory structure const t0 = performance.now(); - const relDirs = new Set(); + const relDirs = new Set(); for (const absDir of discoveredDirs) { relDirs.add(normalizePath(path.relative(rootDir, absDir))); } try { - const { buildStructure: buildStructureFn } = await import('../../../../features/structure.js'); + const { buildStructure: buildStructureFn } = (await import( + '../../../../features/structure.js' + )) as { + buildStructure: ( + db: PipelineContext['db'], + fileSymbols: Map, + rootDir: string, + lineCountMap: Map, + directories: Set, + changedFiles: string[] | null, + ) => void; + }; const changedFilePaths = isFullBuild ? null : [...allSymbols.keys()]; buildStructureFn(db, fileSymbols, rootDir, ctx.lineCountMap, relDirs, changedFilePaths); } catch (err) { - debug(`Structure analysis failed: ${err.message}`); + debug(`Structure analysis failed: ${(err as Error).message}`); } ctx.timing.structureMs = performance.now() - t0; // Classify node roles const t1 = performance.now(); try { - const { classifyNodeRoles } = await import('../../../../features/structure.js'); + const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { + classifyNodeRoles: (db: PipelineContext['db']) => Record; + }; const roleSummary = classifyNodeRoles(db); debug( `Roles: ${Object.entries(roleSummary) @@ -107,7 +124,7 @@ export async function buildStructure(ctx) { .join(', ')}`, ); } catch (err) { - debug(`Role classification failed: ${err.message}`); + debug(`Role classification failed: ${(err as Error).message}`); } ctx.timing.rolesMs = performance.now() - t1; } diff --git a/src/domain/graph/builder/stages/collect-files.js b/src/domain/graph/builder/stages/collect-files.ts similarity index 79% rename from src/domain/graph/builder/stages/collect-files.js rename to src/domain/graph/builder/stages/collect-files.ts index 9f3eb636..6551b598 100644 --- a/src/domain/graph/builder/stages/collect-files.js +++ b/src/domain/graph/builder/stages/collect-files.ts @@ -7,19 +7,17 @@ import fs from 'node:fs'; import path from 'node:path'; import { info } from '../../../../infrastructure/logger.js'; import { normalizePath } from '../../../../shared/constants.js'; +import type { PipelineContext } from '../context.js'; import { collectFiles as collectFilesUtil } from '../helpers.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function collectFiles(ctx) { +export async function collectFiles(ctx: PipelineContext): Promise { const { rootDir, config, opts } = ctx; if (opts.scope) { // Scoped rebuild: rebuild only specified files - const scopedFiles = opts.scope.map((f) => normalizePath(f)); - const existing = []; - const missing = []; + const scopedFiles = opts.scope.map((f: string) => normalizePath(f)); + const existing: Array<{ file: string; relPath: string }> = []; + const missing: string[] = []; for (const rel of scopedFiles) { const abs = path.join(rootDir, rel); if (fs.existsSync(abs)) { @@ -36,7 +34,7 @@ export async function collectFiles(ctx) { ctx.isFullBuild = false; info(`Scoped rebuild: ${existing.length} files to rebuild, ${missing.length} to purge`); } else { - const collected = collectFilesUtil(rootDir, [], config, new Set()); + const collected = collectFilesUtil(rootDir, [], config, new Set()); ctx.allFiles = collected.files; ctx.discoveredDirs = collected.directories; info(`Found ${ctx.allFiles.length} files to parse`); diff --git a/src/domain/graph/builder/stages/detect-changes.js b/src/domain/graph/builder/stages/detect-changes.ts similarity index 63% rename from src/domain/graph/builder/stages/detect-changes.js rename to src/domain/graph/builder/stages/detect-changes.ts index baf03e95..cbb18897 100644 --- a/src/domain/graph/builder/stages/detect-changes.js +++ b/src/domain/graph/builder/stages/detect-changes.ts @@ -1,27 +1,65 @@ /** * Stage: detectChanges * - * Three-tier change detection cascade + incremental reverse-dependency handling. - * Sets ctx.parseChanges, ctx.metadataUpdates, ctx.removed, ctx.isFullBuild, ctx.earlyExit. + * Determines which files have changed since the last build using a tiered + * strategy: journal → mtime+size → content hash. Handles full, incremental, + * and scoped rebuilds. */ import fs from 'node:fs'; import path from 'node:path'; +import type BetterSqlite3 from 'better-sqlite3'; import { closeDb } from '../../../../db/index.js'; import { debug, info } from '../../../../infrastructure/logger.js'; import { normalizePath } from '../../../../shared/constants.js'; +import type { EngineOpts, ExtractorOutput } from '../../../../types.js'; import { parseFilesAuto } from '../../../parser.js'; import { readJournal, writeJournalHeader } from '../../journal.js'; +import type { PipelineContext } from '../context.js'; import { fileHash, fileStat, purgeFilesFromGraph, readFileSafe } from '../helpers.js'; -// ── Three-tier change detection ───────────────────────────────────────── +// ── Local types ──────────────────────────────────────────────────────── -/** - * Determine which files have changed since last build. - * Tier 0 — Journal: O(changed) when watcher was running - * Tier 1 — mtime+size: O(n) stats, O(changed) reads - * Tier 2 — Hash comparison: O(changed) reads (fallback from Tier 1) - */ -function getChangedFiles(db, allFiles, rootDir) { +interface FileHashRow { + file: string; + hash: string; + mtime: number; + size: number; +} + +interface FileStat { + mtimeMs: number; + size: number; +} + +interface ChangedFile { + file: string; + relPath?: string; + content?: string; + hash?: string; + stat?: FileStat; + metadataOnly?: boolean; + _reverseDepOnly?: boolean; +} + +interface ChangeResult { + changed: ChangedFile[]; + removed: string[]; + isFullBuild: boolean; +} + +interface NeedsHashItem { + file: string; + relPath: string; + stat?: FileStat; +} + +// ── Helpers ──────────────────────────────────────────────────────────── + +function getChangedFiles( + db: BetterSqlite3.Database, + allFiles: string[], + rootDir: string, +): ChangeResult { let hasTable = false; try { db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get(); @@ -38,30 +76,28 @@ function getChangedFiles(db, allFiles, rootDir) { }; } - const existing = new Map( - db - .prepare('SELECT file, hash, mtime, size FROM file_hashes') - .all() - .map((r) => [r.file, r]), + const existing = new Map( + (db.prepare('SELECT file, hash, mtime, size FROM file_hashes').all() as FileHashRow[]).map( + (r) => [r.file, r], + ), ); const removed = detectRemovedFiles(existing, allFiles, rootDir); - - // Tier 0: Journal const journalResult = tryJournalTier(db, existing, rootDir, removed); if (journalResult) return journalResult; - - // Tier 1 + 2: mtime/size fast-path → hash comparison return mtimeAndHashTiers(existing, allFiles, rootDir, removed); } -function detectRemovedFiles(existing, allFiles, rootDir) { - const currentFiles = new Set(); +function detectRemovedFiles( + existing: Map, + allFiles: string[], + rootDir: string, +): string[] { + const currentFiles = new Set(); for (const file of allFiles) { currentFiles.add(normalizePath(path.relative(rootDir, file))); } - - const removed = []; + const removed: string[] = []; for (const existingFile of existing.keys()) { if (!currentFiles.has(existingFile)) { removed.push(existingFile); @@ -70,15 +106,22 @@ function detectRemovedFiles(existing, allFiles, rootDir) { return removed; } -function tryJournalTier(db, existing, rootDir, removed) { +function tryJournalTier( + db: BetterSqlite3.Database, + existing: Map, + rootDir: string, + removed: string[], +): ChangeResult | null { const journal = readJournal(rootDir); if (!journal.valid) return null; - const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get(); + const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get() as + | { latest: number | null } + | undefined; const latestDbMtime = dbMtimes?.latest || 0; - const hasJournalEntries = journal.changed.length > 0 || journal.removed.length > 0; + const hasJournalEntries = journal.changed!.length > 0 || journal.removed!.length > 0; - if (!hasJournalEntries || journal.timestamp < latestDbMtime) { + if (!hasJournalEntries || journal.timestamp! < latestDbMtime) { debug( `Tier 0: skipped (${hasJournalEntries ? 'timestamp stale' : 'no entries'}), falling to Tier 1`, ); @@ -86,16 +129,15 @@ function tryJournalTier(db, existing, rootDir, removed) { } debug( - `Tier 0: journal valid, ${journal.changed.length} changed, ${journal.removed.length} removed`, + `Tier 0: journal valid, ${journal.changed!.length} changed, ${journal.removed!.length} removed`, ); - const changed = []; + const changed: ChangedFile[] = []; - for (const relPath of journal.changed) { + for (const relPath of journal.changed!) { const absPath = path.join(rootDir, relPath); - const stat = fileStat(absPath); + const stat = fileStat(absPath) as FileStat | undefined; if (!stat) continue; - - let content; + let content: string | undefined; try { content = readFileSafe(absPath); } catch { @@ -109,38 +151,37 @@ function tryJournalTier(db, existing, rootDir, removed) { } const removedSet = new Set(removed); - for (const relPath of journal.removed) { + for (const relPath of journal.removed!) { if (existing.has(relPath)) removedSet.add(relPath); } return { changed, removed: [...removedSet], isFullBuild: false }; } -function mtimeAndHashTiers(existing, allFiles, rootDir, removed) { - // Tier 1: mtime+size fast-path - const needsHash = []; - const skipped = []; +function mtimeAndHashTiers( + existing: Map, + allFiles: string[], + rootDir: string, + removed: string[], +): ChangeResult { + const needsHash: NeedsHashItem[] = []; + const skipped: string[] = []; for (const file of allFiles) { const relPath = normalizePath(path.relative(rootDir, file)); const record = existing.get(relPath); - if (!record) { needsHash.push({ file, relPath }); continue; } - - const stat = fileStat(file); + const stat = fileStat(file) as FileStat | undefined; if (!stat) continue; - const storedMtime = record.mtime || 0; const storedSize = record.size || 0; - if (storedSize > 0 && Math.floor(stat.mtimeMs) === storedMtime && stat.size === storedSize) { skipped.push(relPath); continue; } - needsHash.push({ file, relPath, stat }); } @@ -148,20 +189,17 @@ function mtimeAndHashTiers(existing, allFiles, rootDir, removed) { debug(`Tier 1: ${skipped.length} skipped by mtime+size, ${needsHash.length} need hash check`); } - // Tier 2: Hash comparison - const changed = []; - + const changed: ChangedFile[] = []; for (const item of needsHash) { - let content; + let content: string | undefined; try { content = readFileSafe(item.file); } catch { continue; } const hash = fileHash(content); - const stat = item.stat || fileStat(item.file); + const stat = item.stat || (fileStat(item.file) as FileStat | undefined); const record = existing.get(item.relPath); - if (!record || record.hash !== hash) { changed.push({ file: item.file, content, hash, relPath: item.relPath, stat }); } else if (stat) { @@ -186,41 +224,44 @@ function mtimeAndHashTiers(existing, allFiles, rootDir, removed) { return { changed, removed, isFullBuild: false }; } -// ── Pending analysis ──────────────────────────────────────────────────── - -/** - * Run pending analysis pass when no file changes but analysis tables are empty. - */ -async function runPendingAnalysis(ctx) { +async function runPendingAnalysis(ctx: PipelineContext): Promise { const { db, opts, engineOpts, allFiles, rootDir } = ctx; - const needsCfg = - opts.cfg !== false && + (opts as Record)['cfg'] !== false && (() => { try { - return db.prepare('SELECT COUNT(*) as c FROM cfg_blocks').get().c === 0; + return ( + (db.prepare('SELECT COUNT(*) as c FROM cfg_blocks').get() as { c: number } | undefined) + ?.c === 0 + ); } catch { return true; } })(); const needsDataflow = - opts.dataflow !== false && + (opts as Record)['dataflow'] !== false && (() => { try { - return db.prepare('SELECT COUNT(*) as c FROM dataflow').get().c === 0; + return ( + (db.prepare('SELECT COUNT(*) as c FROM dataflow').get() as { c: number } | undefined) + ?.c === 0 + ); } catch { return true; } })(); - if (!needsCfg && !needsDataflow) return false; info('No file changes. Running pending analysis pass...'); const analysisOpts = { ...engineOpts, - dataflow: needsDataflow && opts.dataflow !== false, + dataflow: needsDataflow && (opts as Record)['dataflow'] !== false, }; - const analysisSymbols = await parseFilesAuto(allFiles, rootDir, analysisOpts); + const analysisSymbols: Map = await parseFilesAuto( + allFiles, + rootDir, + analysisOpts, + ); if (needsCfg) { const { buildCFGData } = await import('../../../../features/cfg.js'); await buildCFGData(db, analysisSymbols, rootDir, engineOpts); @@ -232,9 +273,7 @@ async function runPendingAnalysis(ctx) { return true; } -// ── Metadata self-heal ────────────────────────────────────────────────── - -function healMetadata(ctx) { +function healMetadata(ctx: PipelineContext): void { const { db, metadataUpdates } = ctx; if (!metadataUpdates || metadataUpdates.length === 0) return; try { @@ -243,7 +282,7 @@ function healMetadata(ctx) { ); const healTx = db.transaction(() => { for (const item of metadataUpdates) { - const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0; + const mtime = item.stat ? Math.floor(item.stat.mtime) : 0; const size = item.stat ? item.stat.size : 0; healHash.run(item.relPath, item.hash, mtime, size); } @@ -255,12 +294,13 @@ function healMetadata(ctx) { } } -// ── Reverse-dependency cascade ────────────────────────────────────────── - -function findReverseDependencies(db, changedRelPaths, rootDir) { - const reverseDeps = new Set(); +function findReverseDependencies( + db: BetterSqlite3.Database, + changedRelPaths: Set, + rootDir: string, +): Set { + const reverseDeps = new Set(); if (changedRelPaths.size === 0) return reverseDeps; - const findReverseDepsStmt = db.prepare(` SELECT DISTINCT n_src.file FROM edges e JOIN nodes n_src ON e.source_id = n_src.id @@ -268,7 +308,7 @@ function findReverseDependencies(db, changedRelPaths, rootDir) { WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' `); for (const relPath of changedRelPaths) { - for (const row of findReverseDepsStmt.all(relPath)) { + for (const row of findReverseDepsStmt.all(relPath) as Array<{ file: string }>) { if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { const absPath = path.join(rootDir, row.file); if (fs.existsSync(absPath)) { @@ -280,13 +320,15 @@ function findReverseDependencies(db, changedRelPaths, rootDir) { return reverseDeps; } -function purgeAndAddReverseDeps(ctx, changePaths, reverseDeps) { +function purgeAndAddReverseDeps( + ctx: PipelineContext, + changePaths: string[], + reverseDeps: Set, +): void { const { db, rootDir } = ctx; - if (changePaths.length > 0 || ctx.removed.length > 0) { purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false }); } - if (reverseDeps.size > 0) { const deleteOutgoingEdgesForFile = db.prepare( 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', @@ -301,9 +343,7 @@ function purgeAndAddReverseDeps(ctx, changePaths, reverseDeps) { } } -// ── Shared helpers ─────────────────────────────────────────────────────── - -function detectHasEmbeddings(db) { +function detectHasEmbeddings(db: BetterSqlite3.Database): boolean { try { db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); return true; @@ -312,39 +352,27 @@ function detectHasEmbeddings(db) { } } -// ── Scoped build path ─────────────────────────────────────────────────── - -function handleScopedBuild(ctx) { +function handleScopedBuild(ctx: PipelineContext): void { const { db, rootDir, opts } = ctx; - ctx.hasEmbeddings = detectHasEmbeddings(db); - const changePaths = ctx.parseChanges.map( (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), ); - - let reverseDeps = new Set(); - if (!opts.noReverseDeps) { - const changedRelPaths = new Set([...changePaths, ...ctx.removed]); + let reverseDeps = new Set(); + if (!(opts as Record)['noReverseDeps']) { + const changedRelPaths = new Set([...changePaths, ...ctx.removed]); reverseDeps = findReverseDependencies(db, changedRelPaths, rootDir); } - - // Purge changed + removed files, then add reverse-deps purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); - info( `Scoped rebuild: ${changePaths.length} changed, ${ctx.removed.length} removed, ${reverseDeps.size} reverse-deps`, ); } -// ── Full/incremental build path ───────────────────────────────────────── - -function handleFullBuild(ctx) { +function handleFullBuild(ctx: PipelineContext): void { const { db } = ctx; - const hasEmbeddings = detectHasEmbeddings(db); ctx.hasEmbeddings = hasEmbeddings; - const deletions = 'PRAGMA foreign_keys = OFF; DELETE FROM cfg_edges; DELETE FROM cfg_blocks; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; db.exec( @@ -354,14 +382,12 @@ function handleFullBuild(ctx) { ); } -function handleIncrementalBuild(ctx) { +function handleIncrementalBuild(ctx: PipelineContext): void { const { db, rootDir, opts } = ctx; - ctx.hasEmbeddings = detectHasEmbeddings(db); - - let reverseDeps = new Set(); - if (!opts.noReverseDeps) { - const changedRelPaths = new Set(); + let reverseDeps = new Set(); + if (!(opts as Record)['noReverseDeps']) { + const changedRelPaths = new Set(); for (const item of ctx.parseChanges) { changedRelPaths.add(item.relPath || normalizePath(path.relative(rootDir, item.file))); } @@ -370,45 +396,54 @@ function handleIncrementalBuild(ctx) { } reverseDeps = findReverseDependencies(db, changedRelPaths, rootDir); } - info( `Incremental: ${ctx.parseChanges.length} changed, ${ctx.removed.length} removed${reverseDeps.size > 0 ? `, ${reverseDeps.size} reverse-deps` : ''}`, ); if (ctx.parseChanges.length > 0) debug(`Changed files: ${ctx.parseChanges.map((c) => c.relPath).join(', ')}`); if (ctx.removed.length > 0) debug(`Removed files: ${ctx.removed.join(', ')}`); - const changePaths = ctx.parseChanges.map( (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), ); purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); } -// ── Main entry point ──────────────────────────────────────────────────── - -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function detectChanges(ctx) { +export async function detectChanges(ctx: PipelineContext): Promise { const { db, allFiles, rootDir, incremental, forceFullRebuild, opts } = ctx; - - // Scoped builds already set parseChanges in collectFiles - if (opts.scope) { + if ((opts as Record)['scope']) { handleScopedBuild(ctx); return; } - const increResult = incremental && !forceFullRebuild ? getChangedFiles(db, allFiles, rootDir) - : { changed: allFiles.map((f) => ({ file: f })), removed: [], isFullBuild: true }; - + : { + changed: allFiles.map((f): ChangedFile => ({ file: f })), + removed: [] as string[], + isFullBuild: true, + }; ctx.removed = increResult.removed; ctx.isFullBuild = increResult.isFullBuild; - ctx.parseChanges = increResult.changed.filter((c) => !c.metadataOnly); - ctx.metadataUpdates = increResult.changed.filter((c) => c.metadataOnly); - - // Early exit: no changes detected + ctx.parseChanges = increResult.changed + .filter((c) => !c.metadataOnly) + .map((c) => ({ + file: c.file, + relPath: c.relPath, + content: c.content, + hash: c.hash, + stat: c.stat ? { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size } : undefined, + _reverseDepOnly: c._reverseDepOnly, + })); + ctx.metadataUpdates = increResult.changed + .filter( + (c): c is ChangedFile & { relPath: string; hash: string; stat: FileStat } => + !!c.metadataOnly && !!c.relPath && !!c.hash && !!c.stat, + ) + .map((c) => ({ + relPath: c.relPath, + hash: c.hash, + stat: { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size }, + })); if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) { const ranAnalysis = await runPendingAnalysis(ctx); if (ranAnalysis) { @@ -417,7 +452,6 @@ export async function detectChanges(ctx) { ctx.earlyExit = true; return; } - healMetadata(ctx); info('No changes detected. Graph is up to date.'); closeDb(db); @@ -425,7 +459,6 @@ export async function detectChanges(ctx) { ctx.earlyExit = true; return; } - if (ctx.isFullBuild) { handleFullBuild(ctx); } else { diff --git a/src/domain/graph/builder/stages/finalize.js b/src/domain/graph/builder/stages/finalize.ts similarity index 64% rename from src/domain/graph/builder/stages/finalize.js rename to src/domain/graph/builder/stages/finalize.ts index 6b493785..90d23757 100644 --- a/src/domain/graph/builder/stages/finalize.js +++ b/src/domain/graph/builder/stages/finalize.ts @@ -9,33 +9,36 @@ import { performance } from 'node:perf_hooks'; import { closeDb, getBuildMeta, setBuildMeta } from '../../../../db/index.js'; import { debug, info, warn } from '../../../../infrastructure/logger.js'; import { writeJournalHeader } from '../../journal.js'; +import type { PipelineContext } from '../context.js'; const __builderDir = path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/i, '$1')); -const CODEGRAPH_VERSION = JSON.parse( - fs.readFileSync(path.join(__builderDir, '..', '..', '..', '..', '..', 'package.json'), 'utf-8'), +const CODEGRAPH_VERSION = ( + JSON.parse( + fs.readFileSync(path.join(__builderDir, '..', '..', '..', '..', '..', 'package.json'), 'utf-8'), + ) as { version: string } ).version; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function finalize(ctx) { +export async function finalize(ctx: PipelineContext): Promise { const { db, allSymbols, rootDir, isFullBuild, hasEmbeddings, config, opts, schemaVersion } = ctx; const t0 = performance.now(); // Release cached WASM trees for (const [, symbols] of allSymbols) { - if (symbols._tree && typeof symbols._tree.delete === 'function') { + const tree = symbols._tree as { delete?: () => void } | undefined; + if (tree && typeof tree.delete === 'function') { try { - symbols._tree.delete(); - } catch {} + tree.delete(); + } catch { + /* ignore cleanup errors */ + } } - symbols._tree = null; - symbols._langId = null; + symbols._tree = undefined; + symbols._langId = undefined; } - const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c; - const actualEdgeCount = db.prepare('SELECT COUNT(*) as c FROM edges').get().c; + const nodeCount = (db.prepare('SELECT COUNT(*) as c FROM nodes').get() as { c: number }).c; + const actualEdgeCount = (db.prepare('SELECT COUNT(*) as c FROM edges').get() as { c: number }).c; info(`Graph built: ${nodeCount} nodes, ${actualEdgeCount} edges`); info(`Stored in ${ctx.dbPath}`); @@ -49,10 +52,11 @@ export async function finalize(ctx) { if (prevN > 0) { const nodeDrift = Math.abs(nodeCount - prevN) / prevN; const edgeDrift = prevE > 0 ? Math.abs(actualEdgeCount - prevE) / prevE : 0; - const driftThreshold = config.build?.driftThreshold ?? 0.2; + const driftThreshold = + (config as { build?: { driftThreshold?: number } }).build?.driftThreshold ?? 0.2; if (nodeDrift > driftThreshold || edgeDrift > driftThreshold) { warn( - `Incremental build diverged significantly from previous counts (nodes: ${prevN}→${nodeCount} [${(nodeDrift * 100).toFixed(1)}%], edges: ${prevE}→${actualEdgeCount} [${(edgeDrift * 100).toFixed(1)}%], threshold: ${(driftThreshold * 100).toFixed(0)}%). Consider rebuilding with --no-incremental.`, + `Incremental build diverged significantly from previous counts (nodes: ${prevN}\u2192${nodeCount} [${(nodeDrift * 100).toFixed(1)}%], edges: ${prevE}\u2192${actualEdgeCount} [${(edgeDrift * 100).toFixed(1)}%], threshold: ${(driftThreshold * 100).toFixed(0)}%). Consider rebuilding with --no-incremental.`, ); } } @@ -62,24 +66,29 @@ export async function finalize(ctx) { // Orphaned embeddings warning if (hasEmbeddings) { try { - const orphaned = db - .prepare('SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)') - .get().c; + const orphaned = ( + db + .prepare( + 'SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)', + ) + .get() as { c: number } + ).c; if (orphaned > 0) { warn( `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`, ); } } catch { - /* ignore — embeddings table may have been dropped */ + /* ignore - embeddings table may have been dropped */ } } // Unused exports warning try { - const unusedCount = db - .prepare( - `SELECT COUNT(*) as c FROM nodes + const unusedCount = ( + db + .prepare( + `SELECT COUNT(*) as c FROM nodes WHERE exported = 1 AND kind != 'file' AND id NOT IN ( SELECT DISTINCT e.target_id FROM edges e @@ -87,8 +96,9 @@ export async function finalize(ctx) { JOIN nodes target ON e.target_id = target.id WHERE e.kind = 'calls' AND caller.file != target.file )`, - ) - .get().c; + ) + .get() as { c: number } + ).c; if (unusedCount > 0) { warn( `${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports --unused" to inspect.`, @@ -110,7 +120,7 @@ export async function finalize(ctx) { edge_count: actualEdgeCount, }); } catch (err) { - warn(`Failed to write build metadata: ${err.message}`); + warn(`Failed to write build metadata: ${(err as Error).message}`); } closeDb(db); @@ -127,10 +137,12 @@ export async function finalize(ctx) { debug(`Skipping auto-registration for temp directory: ${resolvedRoot}`); } else { try { - const { registerRepo } = await import('../../../../infrastructure/registry.js'); + const { registerRepo } = (await import('../../../../infrastructure/registry.js')) as { + registerRepo: (rootDir: string) => void; + }; registerRepo(rootDir); } catch (err) { - debug(`Auto-registration failed: ${err.message}`); + debug(`Auto-registration failed: ${(err as Error).message}`); } } } diff --git a/src/domain/graph/builder/stages/insert-nodes.js b/src/domain/graph/builder/stages/insert-nodes.ts similarity index 74% rename from src/domain/graph/builder/stages/insert-nodes.js rename to src/domain/graph/builder/stages/insert-nodes.ts index 6e22c966..46737844 100644 --- a/src/domain/graph/builder/stages/insert-nodes.js +++ b/src/domain/graph/builder/stages/insert-nodes.ts @@ -6,7 +6,10 @@ */ import path from 'node:path'; import { performance } from 'node:perf_hooks'; +import type BetterSqlite3 from 'better-sqlite3'; import { bulkNodeIdsByFile } from '../../../../db/index.js'; +import type { ExtractorOutput, MetadataUpdate, NodeIdRow } from '../../../../types.js'; +import type { PipelineContext } from '../context.js'; import { batchInsertEdges, batchInsertNodes, @@ -15,10 +18,23 @@ import { readFileSafe, } from '../helpers.js'; +/** Shape of precomputed file data gathered from filesToParse entries. */ +interface PrecomputedFileData { + file: string; + relPath?: string; + content?: string; + hash?: string; + stat?: { mtime: number; size: number } | null; + _reverseDepOnly?: boolean; +} + // ── Phase 1: Insert file nodes, definitions, exports ──────────────────── -function insertDefinitionsAndExports(db, allSymbols) { - const phase1Rows = []; +function insertDefinitionsAndExports( + db: BetterSqlite3.Database, + allSymbols: Map, +): void { + const phase1Rows: unknown[][] = []; for (const [relPath, symbols] of allSymbols) { phase1Rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]); for (const def of symbols.definitions) { @@ -55,10 +71,13 @@ function insertDefinitionsAndExports(db, allSymbols) { // ── Phase 2: Insert children (needs parent IDs) ──────────────────────── -function insertChildren(db, allSymbols) { - const childRows = []; +function insertChildren( + db: BetterSqlite3.Database, + allSymbols: Map, +): void { + const childRows: unknown[][] = []; for (const [relPath, symbols] of allSymbols) { - const nodeIdMap = new Map(); + const nodeIdMap = new Map(); for (const row of bulkNodeIdsByFile(db, relPath)) { nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } @@ -87,10 +106,13 @@ function insertChildren(db, allSymbols) { // ── Phase 3: Insert containment + parameter_of edges ──────────────────── -function insertContainmentEdges(db, allSymbols) { - const edgeRows = []; +function insertContainmentEdges( + db: BetterSqlite3.Database, + allSymbols: Map, +): void { + const edgeRows: unknown[][] = []; for (const [relPath, symbols] of allSymbols) { - const nodeIdMap = new Map(); + const nodeIdMap = new Map(); for (const row of bulkNodeIdsByFile(db, relPath)) { nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } @@ -118,7 +140,14 @@ function insertContainmentEdges(db, allSymbols) { // ── Phase 4: Update file hashes ───────────────────────────────────────── -function updateFileHashes(_db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash) { +function updateFileHashes( + _db: BetterSqlite3.Database, + allSymbols: Map, + precomputedData: Map, + metadataUpdates: MetadataUpdate[], + rootDir: string, + upsertHash: BetterSqlite3.Statement | null, +): void { if (!upsertHash) return; for (const [relPath] of allSymbols) { @@ -126,13 +155,20 @@ function updateFileHashes(_db, allSymbols, precomputedData, metadataUpdates, roo if (precomputed?._reverseDepOnly) { // no-op: file unchanged, hash already correct } else if (precomputed?.hash) { - const stat = precomputed.stat || fileStat(path.join(rootDir, relPath)); - const mtime = stat ? Math.floor(stat.mtimeMs) : 0; - const size = stat ? stat.size : 0; + let mtime: number; + let size: number; + if (precomputed.stat) { + mtime = precomputed.stat.mtime; + size = precomputed.stat.size; + } else { + const rawStat = fileStat(path.join(rootDir, relPath)); + mtime = rawStat ? Math.floor(rawStat.mtimeMs) : 0; + size = rawStat ? rawStat.size : 0; + } upsertHash.run(relPath, precomputed.hash, mtime, size); } else { const absPath = path.join(rootDir, relPath); - let code; + let code: string | null; try { code = readFileSafe(absPath); } catch { @@ -149,7 +185,7 @@ function updateFileHashes(_db, allSymbols, precomputedData, metadataUpdates, roo // Also update metadata-only entries (self-heal mtime/size without re-parse) for (const item of metadataUpdates) { - const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0; + const mtime = item.stat ? Math.floor(item.stat.mtime) : 0; const size = item.stat ? item.stat.size : 0; upsertHash.run(item.relPath, item.hash, mtime, size); } @@ -157,18 +193,15 @@ function updateFileHashes(_db, allSymbols, precomputedData, metadataUpdates, roo // ── Main entry point ──────────────────────────────────────────────────── -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function insertNodes(ctx) { +export async function insertNodes(ctx: PipelineContext): Promise { const { db, allSymbols, filesToParse, metadataUpdates, rootDir, removed } = ctx; - const precomputedData = new Map(); + const precomputedData = new Map(); for (const item of filesToParse) { - if (item.relPath) precomputedData.set(item.relPath, item); + if (item.relPath) precomputedData.set(item.relPath, item as PrecomputedFileData); } - let upsertHash; + let upsertHash: BetterSqlite3.Statement | null; try { upsertHash = db.prepare( 'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)', diff --git a/src/domain/graph/builder/stages/parse-files.js b/src/domain/graph/builder/stages/parse-files.ts similarity index 87% rename from src/domain/graph/builder/stages/parse-files.js rename to src/domain/graph/builder/stages/parse-files.ts index 6690bb5f..9e8254c1 100644 --- a/src/domain/graph/builder/stages/parse-files.js +++ b/src/domain/graph/builder/stages/parse-files.ts @@ -7,11 +7,9 @@ import { performance } from 'node:perf_hooks'; import { info } from '../../../../infrastructure/logger.js'; import { parseFilesAuto } from '../../../parser.js'; +import type { PipelineContext } from '../context.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function parseFiles(ctx) { +export async function parseFiles(ctx: PipelineContext): Promise { const { allFiles, parseChanges, isFullBuild, engineOpts, rootDir } = ctx; ctx.filesToParse = isFullBuild ? allFiles.map((f) => ({ file: f })) : parseChanges; diff --git a/src/domain/graph/builder/stages/resolve-imports.js b/src/domain/graph/builder/stages/resolve-imports.ts similarity index 75% rename from src/domain/graph/builder/stages/resolve-imports.js rename to src/domain/graph/builder/stages/resolve-imports.ts index 7d9bbe40..66dc4551 100644 --- a/src/domain/graph/builder/stages/resolve-imports.js +++ b/src/domain/graph/builder/stages/resolve-imports.ts @@ -1,23 +1,19 @@ -/** - * Stage: resolveImports - * - * Batch import resolution + barrel/re-export map construction. - * For incremental builds, loads unchanged barrel files for resolution. - */ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { parseFilesAuto } from '../../../parser.js'; import { resolveImportPath, resolveImportsBatch } from '../../resolve.js'; +import type { PipelineContext } from '../context.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function resolveImports(ctx) { - const { db, fileSymbols, rootDir, aliases, allFiles, isFullBuild, engineOpts } = ctx; +interface ReexportEntry { + source: string; + names: string[]; + wildcardReexport: boolean; +} - // Collect all (fromFile, importSource) pairs and resolve in one native call +export async function resolveImports(ctx: PipelineContext): Promise { + const { db, fileSymbols, rootDir, aliases, allFiles, isFullBuild, engineOpts } = ctx; const t0 = performance.now(); - const batchInputs = []; + const batchInputs: Array<{ fromFile: string; importSource: string }> = []; for (const [relPath, symbols] of fileSymbols) { const absFile = path.join(rootDir, relPath); for (const imp of symbols.imports) { @@ -27,8 +23,7 @@ export async function resolveImports(ctx) { ctx.batchResolved = resolveImportsBatch(batchInputs, rootDir, aliases, allFiles); ctx.timing.resolveMs = performance.now() - t0; - // Build re-export map for barrel resolution - ctx.reexportMap = new Map(); + ctx.reexportMap = new Map(); for (const [relPath, symbols] of fileSymbols) { const reexports = symbols.imports.filter((imp) => imp.reexport); if (reexports.length > 0) { @@ -43,16 +38,13 @@ export async function resolveImports(ctx) { } } - // For incremental builds, load unchanged barrel files into reexportMap - ctx.barrelOnlyFiles = new Set(); + ctx.barrelOnlyFiles = new Set(); if (!isFullBuild) { const barrelCandidates = db - .prepare( - `SELECT DISTINCT n1.file FROM edges e + .prepare(`SELECT DISTINCT n1.file FROM edges e JOIN nodes n1 ON e.source_id = n1.id - WHERE e.kind = 'reexports' AND n1.kind = 'file'`, - ) - .all(); + WHERE e.kind = 'reexports' AND n1.kind = 'file'`) + .all() as Array<{ file: string }>; for (const { file: relPath } of barrelCandidates) { if (fileSymbols.has(relPath)) continue; const absPath = path.join(rootDir, relPath); @@ -81,11 +73,7 @@ export async function resolveImports(ctx) { } } -/** - * Resolve an import source, preferring batch results. - * Exported so other stages (build-edges) can reuse it. - */ -export function getResolved(ctx, absFile, importSource) { +export function getResolved(ctx: PipelineContext, absFile: string, importSource: string): string { if (ctx.batchResolved) { const key = `${absFile}|${importSource}`; const hit = ctx.batchResolved.get(key); @@ -94,10 +82,7 @@ export function getResolved(ctx, absFile, importSource) { return resolveImportPath(absFile, importSource, ctx.rootDir, ctx.aliases); } -/** - * Check if a file is a barrel (re-export hub). - */ -export function isBarrelFile(ctx, relPath) { +export function isBarrelFile(ctx: PipelineContext, relPath: string): boolean { const symbols = ctx.fileSymbols.get(relPath); if (!symbols) return false; const reexports = symbols.imports.filter((imp) => imp.reexport); @@ -106,15 +91,16 @@ export function isBarrelFile(ctx, relPath) { return reexports.length >= ownDefs; } -/** - * Resolve a symbol through barrel re-export chains. - */ -export function resolveBarrelExport(ctx, barrelPath, symbolName, visited = new Set()) { +export function resolveBarrelExport( + ctx: PipelineContext, + barrelPath: string, + symbolName: string, + visited: Set = new Set(), +): string | null { if (visited.has(barrelPath)) return null; visited.add(barrelPath); - const reexports = ctx.reexportMap.get(barrelPath); + const reexports = ctx.reexportMap.get(barrelPath) as ReexportEntry[] | undefined; if (!reexports) return null; - for (const re of reexports) { if (re.names.length > 0 && !re.wildcardReexport) { if (re.names.includes(symbolName)) { diff --git a/src/domain/graph/builder/stages/run-analyses.js b/src/domain/graph/builder/stages/run-analyses.ts similarity index 71% rename from src/domain/graph/builder/stages/run-analyses.js rename to src/domain/graph/builder/stages/run-analyses.ts index 53384613..c943cdf4 100644 --- a/src/domain/graph/builder/stages/run-analyses.js +++ b/src/domain/graph/builder/stages/run-analyses.ts @@ -5,18 +5,19 @@ * Filters out reverse-dep files for incremental builds. */ import { debug, warn } from '../../../../infrastructure/logger.js'; +import type { ExtractorOutput } from '../../../../types.js'; +import type { PipelineContext } from '../context.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function runAnalyses(ctx) { +export async function runAnalyses(ctx: PipelineContext): Promise { const { db, allSymbols, rootDir, opts, engineOpts, isFullBuild, filesToParse } = ctx; // For incremental builds, exclude reverse-dep-only files - let astComplexitySymbols = allSymbols; + let astComplexitySymbols: Map = allSymbols; if (!isFullBuild) { const reverseDepFiles = new Set( - filesToParse.filter((item) => item._reverseDepOnly).map((item) => item.relPath), + filesToParse + .filter((item) => (item as { _reverseDepOnly?: boolean })._reverseDepOnly) + .map((item) => item.relPath), ); if (reverseDepFiles.size > 0) { astComplexitySymbols = new Map(); @@ -39,6 +40,8 @@ export async function runAnalyses(ctx) { ctx.timing.cfgMs = analysisTiming.cfgMs; ctx.timing.dataflowMs = analysisTiming.dataflowMs; } catch (err) { - warn(`Analysis engine failed (AST/complexity/CFG/dataflow may be incomplete): ${err.message}`); + warn( + `Analysis engine failed (AST/complexity/CFG/dataflow may be incomplete): ${(err as Error).message}`, + ); } } diff --git a/src/domain/graph/change-journal.js b/src/domain/graph/change-journal.ts similarity index 60% rename from src/domain/graph/change-journal.js rename to src/domain/graph/change-journal.ts index 7589b5a6..0bfa6b4f 100644 --- a/src/domain/graph/change-journal.js +++ b/src/domain/graph/change-journal.ts @@ -5,36 +5,36 @@ import { debug, warn } from '../../infrastructure/logger.js'; export const CHANGE_EVENTS_FILENAME = 'change-events.ndjson'; export const DEFAULT_MAX_BYTES = 1024 * 1024; // 1 MB -/** - * Returns the absolute path to the NDJSON change events file. - */ -export function changeEventsPath(rootDir) { +export function changeEventsPath(rootDir: string): string { return path.join(rootDir, '.codegraph', CHANGE_EVENTS_FILENAME); } -/** - * Compare old and new symbol arrays, returning added/removed/modified sets. - * Symbols are keyed on `name\0kind`. A symbol is "modified" if the same - * name+kind exists in both but the line changed. - * - * @param {Array<{name:string, kind:string, line:number}>} oldSymbols - * @param {Array<{name:string, kind:string, line:number}>} newSymbols - * @returns {{ added: Array, removed: Array, modified: Array }} - */ -export function diffSymbols(oldSymbols, newSymbols) { - const oldMap = new Map(); +interface SymbolEntry { + name: string; + kind: string; + line: number; +} + +interface SymbolDiff { + added: Array<{ name: string; kind: string; line: number }>; + removed: Array<{ name: string; kind: string }>; + modified: Array<{ name: string; kind: string; line: number }>; +} + +export function diffSymbols(oldSymbols: SymbolEntry[], newSymbols: SymbolEntry[]): SymbolDiff { + const oldMap = new Map(); for (const s of oldSymbols) { oldMap.set(`${s.name}\0${s.kind}`, s); } - const newMap = new Map(); + const newMap = new Map(); for (const s of newSymbols) { newMap.set(`${s.name}\0${s.kind}`, s); } - const added = []; - const removed = []; - const modified = []; + const added: SymbolDiff['added'] = []; + const removed: SymbolDiff['removed'] = []; + const modified: SymbolDiff['modified'] = []; for (const [key, s] of newMap) { const old = oldMap.get(key); @@ -54,10 +54,29 @@ export function diffSymbols(oldSymbols, newSymbols) { return { added, removed, modified }; } -/** - * Assemble a single change event object. - */ -export function buildChangeEvent(file, event, symbolDiff, counts) { +interface ChangeEvent { + ts: string; + file: string; + event: string; + symbols: unknown; + counts: { + nodes: { before: number; after: number }; + edges: { added: number }; + }; +} + +interface ChangeEventCounts { + nodesBefore?: number; + nodesAfter?: number; + edgesAdded?: number; +} + +export function buildChangeEvent( + file: string, + event: string, + symbolDiff: unknown, + counts: ChangeEventCounts, +): ChangeEvent { return { ts: new Date().toISOString(), file, @@ -70,11 +89,7 @@ export function buildChangeEvent(file, event, symbolDiff, counts) { }; } -/** - * Append change events as NDJSON lines to the change events file. - * Creates the .codegraph directory if needed. Non-fatal on failure. - */ -export function appendChangeEvents(rootDir, events) { +export function appendChangeEvents(rootDir: string, events: ChangeEvent[]): void { const filePath = changeEventsPath(rootDir); const dir = path.dirname(filePath); @@ -86,7 +101,7 @@ export function appendChangeEvents(rootDir, events) { fs.appendFileSync(filePath, lines); debug(`Appended ${events.length} change event(s) to ${filePath}`); } catch (err) { - warn(`Failed to append change events: ${err.message}`); + warn(`Failed to append change events: ${(err as Error).message}`); return; } @@ -97,16 +112,12 @@ export function appendChangeEvents(rootDir, events) { } } -/** - * If the file exceeds maxBytes, keep the last ~half by finding - * the first newline at or after the midpoint and rewriting from there. - */ -export function rotateIfNeeded(filePath, maxBytes = DEFAULT_MAX_BYTES) { - let stat; +export function rotateIfNeeded(filePath: string, maxBytes: number = DEFAULT_MAX_BYTES): void { + let stat: fs.Stats; try { stat = fs.statSync(filePath); } catch { - return; // file doesn't exist, nothing to rotate + return; } if (stat.size <= maxBytes) return; @@ -125,6 +136,6 @@ export function rotateIfNeeded(filePath, maxBytes = DEFAULT_MAX_BYTES) { fs.writeFileSync(filePath, kept); debug(`Rotated change events: ${stat.size} → ${kept.length} bytes`); } catch (err) { - warn(`Failed to rotate change events: ${err.message}`); + warn(`Failed to rotate change events: ${(err as Error).message}`); } } diff --git a/src/domain/graph/cycles.ts b/src/domain/graph/cycles.ts new file mode 100644 index 00000000..9517133d --- /dev/null +++ b/src/domain/graph/cycles.ts @@ -0,0 +1,66 @@ +import { tarjan } from '../../graph/algorithms/tarjan.js'; +import { buildDependencyGraph } from '../../graph/builders/dependency.js'; +import { CodeGraph } from '../../graph/model.js'; +import { loadNative } from '../../infrastructure/native.js'; +import type { BetterSqlite3Database } from '../../types.js'; + +export function findCycles( + db: BetterSqlite3Database, + opts: { fileLevel?: boolean; noTests?: boolean } = {}, +): string[][] { + const fileLevel = opts.fileLevel !== false; + const noTests = opts.noTests || false; + + const graph = buildDependencyGraph(db, { fileLevel, noTests }); + + const idToLabel = new Map(); + for (const [id, attrs] of graph.nodes()) { + if (fileLevel) { + idToLabel.set(id, attrs['file'] as string); + } else { + idToLabel.set(id, `${attrs['label']}|${attrs['file']}`); + } + } + + const edges = graph.toEdgeArray().map((e) => ({ + source: idToLabel.get(e.source) ?? e.source, + target: idToLabel.get(e.target) ?? e.target, + })); + + const native = loadNative(); + if (native) { + return native.detectCycles(edges) as string[][]; + } + + const labelGraph = new CodeGraph(); + for (const { source, target } of edges) { + labelGraph.addEdge(source, target); + } + return tarjan(labelGraph); +} + +export function findCyclesJS(edges: Array<{ source: string; target: string }>): string[][] { + const graph = new CodeGraph(); + for (const { source, target } of edges) { + graph.addEdge(source, target); + } + return tarjan(graph); +} + +export function formatCycles(cycles: string[][]): string { + if (cycles.length === 0) { + return 'No circular dependencies detected.'; + } + + const lines: string[] = [`Found ${cycles.length} circular dependency cycle(s):\n`]; + for (let i = 0; i < cycles.length; i++) { + const cycle = cycles[i]!; + lines.push(` Cycle ${i + 1} (${cycle.length} files):`); + for (const file of cycle) { + lines.push(` -> ${file}`); + } + lines.push(` -> ${cycle[0]} (back to start)`); + lines.push(''); + } + return lines.join('\n'); +} diff --git a/src/domain/graph/journal.js b/src/domain/graph/journal.ts similarity index 67% rename from src/domain/graph/journal.js rename to src/domain/graph/journal.ts index 714889f2..4ad63a35 100644 --- a/src/domain/graph/journal.js +++ b/src/domain/graph/journal.ts @@ -5,13 +5,16 @@ import { debug, warn } from '../../infrastructure/logger.js'; export const JOURNAL_FILENAME = 'changes.journal'; const HEADER_PREFIX = '# codegraph-journal v1 '; -/** - * Read and validate the change journal. - * Returns { valid, timestamp, changed[], removed[] } or { valid: false }. - */ -export function readJournal(rootDir) { +interface JournalResult { + valid: boolean; + timestamp?: number; + changed?: string[]; + removed?: string[]; +} + +export function readJournal(rootDir: string): JournalResult { const journalPath = path.join(rootDir, '.codegraph', JOURNAL_FILENAME); - let content; + let content: string; try { content = fs.readFileSync(journalPath, 'utf-8'); } catch { @@ -19,24 +22,24 @@ export function readJournal(rootDir) { } const lines = content.split('\n'); - if (lines.length === 0 || !lines[0].startsWith(HEADER_PREFIX)) { + if (lines.length === 0 || !lines[0]!.startsWith(HEADER_PREFIX)) { debug('Journal has malformed or missing header'); return { valid: false }; } - const timestamp = Number(lines[0].slice(HEADER_PREFIX.length).trim()); + const timestamp = Number(lines[0]!.slice(HEADER_PREFIX.length).trim()); if (!Number.isFinite(timestamp) || timestamp <= 0) { debug('Journal has invalid timestamp'); return { valid: false }; } - const changed = []; - const removed = []; - const seenChanged = new Set(); - const seenRemoved = new Set(); + const changed: string[] = []; + const removed: string[] = []; + const seenChanged = new Set(); + const seenRemoved = new Set(); for (let i = 1; i < lines.length; i++) { - const line = lines[i].trim(); + const line = lines[i]!.trim(); if (!line || line.startsWith('#')) continue; if (line.startsWith('DELETED ')) { @@ -56,11 +59,10 @@ export function readJournal(rootDir) { return { valid: true, timestamp, changed, removed }; } -/** - * Append changed/deleted paths to the journal. - * Creates the journal with a header if it doesn't exist. - */ -export function appendJournalEntries(rootDir, entries) { +export function appendJournalEntries( + rootDir: string, + entries: Array<{ file: string; deleted?: boolean }>, +): void { const dir = path.join(rootDir, '.codegraph'); const journalPath = path.join(dir, JOURNAL_FILENAME); @@ -68,7 +70,6 @@ export function appendJournalEntries(rootDir, entries) { fs.mkdirSync(dir, { recursive: true }); } - // If journal doesn't exist, create with a placeholder header if (!fs.existsSync(journalPath)) { fs.writeFileSync(journalPath, `${HEADER_PREFIX}0\n`); } @@ -81,11 +82,7 @@ export function appendJournalEntries(rootDir, entries) { fs.appendFileSync(journalPath, `${lines.join('\n')}\n`); } -/** - * Write a fresh journal header after a successful build. - * Atomic: write to temp file then rename. - */ -export function writeJournalHeader(rootDir, timestamp) { +export function writeJournalHeader(rootDir: string, timestamp: number): void { const dir = path.join(rootDir, '.codegraph'); const journalPath = path.join(dir, JOURNAL_FILENAME); const tmpPath = `${journalPath}.tmp`; @@ -98,8 +95,7 @@ export function writeJournalHeader(rootDir, timestamp) { fs.writeFileSync(tmpPath, `${HEADER_PREFIX}${timestamp}\n`); fs.renameSync(tmpPath, journalPath); } catch (err) { - warn(`Failed to write journal header: ${err.message}`); - // Clean up temp file if rename failed + warn(`Failed to write journal header: ${(err as Error).message}`); try { fs.unlinkSync(tmpPath); } catch { diff --git a/src/domain/graph/watcher.ts b/src/domain/graph/watcher.ts index c7948355..30cef5d6 100644 --- a/src/domain/graph/watcher.ts +++ b/src/domain/graph/watcher.ts @@ -5,7 +5,7 @@ import { info } from '../../infrastructure/logger.js'; import { EXTENSIONS, IGNORE_DIRS, normalizePath } from '../../shared/constants.js'; import { DbError } from '../../shared/errors.js'; import { createParseTreeCache, getActiveEngine } from '../parser.js'; -import { rebuildFile } from './builder/incremental.js'; +import { type IncrementalStmts, rebuildFile } from './builder/incremental.js'; import { appendChangeEvents, buildChangeEvent, diffSymbols } from './change-journal.js'; import { appendJournalEntries } from './journal.js'; @@ -28,7 +28,11 @@ export async function watchProject(rootDir: string, opts: { engine?: string } = // Alias for functions expecting the project's BetterSqlite3Database interface const typedDb = db as unknown as import('../../types.js').BetterSqlite3Database; initSchema(db); - const engineOpts = { engine: (opts.engine || 'auto') as import('../../types.js').EngineMode }; + const engineOpts: import('../../types.js').EngineOpts = { + engine: (opts.engine || 'auto') as import('../../types.js').EngineMode, + dataflow: false, + ast: false, + }; const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts); console.log( `Watch mode using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`, @@ -99,9 +103,17 @@ export async function watchProject(rootDir: string, opts: { engine?: string } = edgesAdded: number; }> = []; for (const filePath of files) { - const result = (await rebuildFile(db, rootDir, filePath, stmts, engineOpts, cache, { - diffSymbols, - })) as (typeof results)[number] | null; + const result = (await rebuildFile( + db, + rootDir, + filePath, + stmts as IncrementalStmts, + engineOpts, + cache, + { + diffSymbols: diffSymbols as (old: unknown[], new_: unknown[]) => unknown, + }, + )) as (typeof results)[number] | null; if (result) results.push(result); } const updates = results; diff --git a/src/domain/search/generator.ts b/src/domain/search/generator.ts index 085acbea..05cd58ea 100644 --- a/src/domain/search/generator.ts +++ b/src/domain/search/generator.ts @@ -4,6 +4,7 @@ import type BetterSqlite3 from 'better-sqlite3'; import { closeDb, findDbPath, openDb } from '../../db/index.js'; import { warn } from '../../infrastructure/logger.js'; import { DbError } from '../../shared/errors.js'; +import type { NodeRow } from '../../types.js'; import { embed, getModelConfig } from './models.js'; import { buildSourceText } from './strategies/source.js'; import { buildStructuredText } from './strategies/structured.js'; @@ -81,7 +82,7 @@ export async function buildEmbeddings( .prepare( `SELECT * FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`, ) - .all() as Array<{ id: number; name: string; kind: string; file: string; line: number }>; + .all() as Array; console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`); diff --git a/src/domain/search/search/cli-formatter.js b/src/domain/search/search/cli-formatter.ts similarity index 80% rename from src/domain/search/search/cli-formatter.js rename to src/domain/search/search/cli-formatter.ts index a0b45a80..1081afc7 100644 --- a/src/domain/search/search/cli-formatter.js +++ b/src/domain/search/search/cli-formatter.ts @@ -1,37 +1,41 @@ import { warn } from '../../../infrastructure/logger.js'; import { hybridSearchData } from './hybrid.js'; import { ftsSearchData } from './keyword.js'; +import type { SemanticSearchOpts } from './semantic.js'; import { multiSearchData, searchData } from './semantic.js'; -/** - * Search with mode support — CLI wrapper with multi-query detection. - * Modes: 'hybrid' (default), 'semantic', 'keyword' - */ -export async function search(query, customDbPath, opts = {}) { +interface SearchOpts extends SemanticSearchOpts { + mode?: 'hybrid' | 'semantic' | 'keyword'; + json?: boolean; +} + +export async function search( + query: string, + customDbPath: string | undefined, + opts: SearchOpts = {}, +): Promise { const mode = opts.mode || 'hybrid'; - // Split by semicolons, trim, filter empties const queries = query .split(';') .map((q) => q.trim()) .filter((q) => q.length > 0); - const kindIcon = (kind) => (kind === 'function' ? 'f' : kind === 'class' ? '*' : 'o'); + const kindIcon = (kind: string): string => + kind === 'function' ? 'f' : kind === 'class' ? '*' : 'o'; - // ─── Keyword-only mode ────────────────────────────────────────────── + // Keyword-only mode if (mode === 'keyword') { - const singleQuery = queries.length === 1 ? queries[0] : query; + const singleQuery = queries.length === 1 ? queries[0]! : query; const data = ftsSearchData(singleQuery, customDbPath, opts); if (!data) { console.log('No FTS5 index found. Run `codegraph embed` to build the keyword index.'); return; } - if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; } - console.log(`\nKeyword search: "${singleQuery}" (BM25)\n`); if (data.results.length === 0) { console.log(' No results found.'); @@ -46,18 +50,16 @@ export async function search(query, customDbPath, opts = {}) { return; } - // ─── Semantic-only mode ───────────────────────────────────────────── + // Semantic-only mode if (mode === 'semantic') { if (queries.length <= 1) { const singleQuery = queries[0] || query; const data = await searchData(singleQuery, customDbPath, opts); if (!data) return; - if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; } - console.log(`\nSemantic search: "${singleQuery}"\n`); if (data.results.length === 0) { console.log(' No results above threshold.'); @@ -72,12 +74,10 @@ export async function search(query, customDbPath, opts = {}) { } else { const data = await multiSearchData(queries, customDbPath, opts); if (!data) return; - if (opts.json) { console.log(JSON.stringify(data, null, 2)); return; } - console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`); for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`); console.log(); @@ -101,11 +101,10 @@ export async function search(query, customDbPath, opts = {}) { return; } - // ─── Hybrid mode (default) ────────────────────────────────────────── + // Hybrid mode (default) const data = await hybridSearchData(query, customDbPath, opts); if (!data) { - // No FTS5 index — fall back to semantic-only warn( 'FTS5 index not found — using semantic search only. Re-run `codegraph embed` to enable hybrid mode.', ); @@ -134,12 +133,12 @@ export async function search(query, customDbPath, opts = {}) { console.log( ` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`, ); - const parts = []; + const parts: string[] = []; if (r.bm25Rank != null) { - parts.push(`BM25: rank ${r.bm25Rank} (score ${r.bm25Score.toFixed(2)})`); + parts.push(`BM25: rank ${r.bm25Rank} (score ${r.bm25Score!.toFixed(2)})`); } if (r.semanticRank != null) { - parts.push(`Semantic: rank ${r.semanticRank} (${(r.similarity * 100).toFixed(1)}%)`); + parts.push(`Semantic: rank ${r.semanticRank} (${(r.similarity! * 100).toFixed(1)}%)`); } if (parts.length > 0) { console.log(` ${parts.join(' | ')}`); diff --git a/src/domain/search/search/filters.js b/src/domain/search/search/filters.ts similarity index 54% rename from src/domain/search/search/filters.js rename to src/domain/search/search/filters.ts index 47becc3a..4d91bb56 100644 --- a/src/domain/search/search/filters.js +++ b/src/domain/search/search/filters.ts @@ -1,13 +1,6 @@ -/** - * Match a file path against a glob pattern. - * Supports *, **, and ? wildcards. Zero dependencies. - */ -export function globMatch(filePath, pattern) { - // Normalize separators to forward slashes +export function globMatch(filePath: string, pattern: string): boolean { const normalized = filePath.replace(/\\/g, '/'); - // Escape regex specials except glob chars let regex = pattern.replace(/\\/g, '/').replace(/[.+^${}()|[\]\\]/g, '\\$&'); - // Replace ** first (matches any path segment), then * and ? regex = regex.replace(/\*\*/g, '\0'); regex = regex.replace(/\*/g, '[^/]*'); regex = regex.replace(/\0/g, '.*'); @@ -15,23 +8,18 @@ export function globMatch(filePath, pattern) { try { return new RegExp(`^${regex}$`).test(normalized); } catch { - // Malformed pattern — fall back to substring match return normalized.includes(pattern); } } const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./; -/** - * Apply post-query filters (glob pattern, noTests) to a set of rows. - * Mutates nothing — returns a new filtered array. - * @param {Array} rows - Rows with at least a `file` property - * @param {object} opts - * @param {string} [opts.filePattern] - Glob pattern (only applied if it contains glob chars) - * @param {boolean} [opts.noTests] - Exclude test/spec files - * @returns {Array} - */ -export function applyFilters(rows, opts = {}) { +export interface FilterOpts { + filePattern?: string | string[]; + noTests?: boolean; +} + +export function applyFilters(rows: T[], opts: FilterOpts = {}): T[] { let filtered = rows; const fp = opts.filePattern; const fpArr = Array.isArray(fp) ? fp : fp ? [fp] : []; diff --git a/src/domain/search/search/hybrid.js b/src/domain/search/search/hybrid.ts similarity index 55% rename from src/domain/search/search/hybrid.js rename to src/domain/search/search/hybrid.ts index 2c6cd00a..80370312 100644 --- a/src/domain/search/search/hybrid.js +++ b/src/domain/search/search/hybrid.ts @@ -1,22 +1,41 @@ import { openReadonlyOrFail } from '../../../db/index.js'; import { loadConfig } from '../../../infrastructure/config.js'; +import type { BetterSqlite3Database, CodegraphConfig } from '../../../types.js'; import { hasFtsIndex } from '../stores/fts5.js'; import { ftsSearchData } from './keyword.js'; +import type { SemanticSearchOpts } from './semantic.js'; import { searchData } from './semantic.js'; -/** - * Hybrid BM25 + semantic search with RRF fusion. - * Returns { results: [{ name, kind, file, line, rrf, bm25Score, bm25Rank, similarity, semanticRank }] } - * or null if no FTS5 index (caller should fall back to semantic-only). - */ -export async function hybridSearchData(query, customDbPath, opts = {}) { +interface HybridResult { + name: string; + kind: string; + file: string; + line: number; + endLine: number | null; + role: string | null; + fileHash: string | null; + rrf: number; + bm25Score: number | null; + bm25Rank: number | null; + similarity: number | null; + semanticRank: number | null; +} + +export interface HybridSearchResult { + results: HybridResult[]; +} + +export async function hybridSearchData( + query: string, + customDbPath: string | undefined, + opts: SemanticSearchOpts = {}, +): Promise { const config = opts.config || loadConfig(); - const searchCfg = config.search || {}; + const searchCfg = config.search || ({} as CodegraphConfig['search']); const limit = opts.limit ?? searchCfg.topK ?? 15; const k = opts.rrfK ?? searchCfg.rrfK ?? 60; const topK = (opts.limit ?? searchCfg.topK ?? 15) * 5; - // Split semicolons for multi-query support const queries = typeof query === 'string' ? query @@ -25,30 +44,41 @@ export async function hybridSearchData(query, customDbPath, opts = {}) { .filter((q) => q.length > 0) : [query]; - // Check FTS5 availability first (sync, cheap) - const checkDb = openReadonlyOrFail(customDbPath); + const checkDb = openReadonlyOrFail(customDbPath) as BetterSqlite3Database; const ftsAvailable = hasFtsIndex(checkDb); checkDb.close(); if (!ftsAvailable) return null; - // Collect ranked lists: for each query, one BM25 list + one semantic list - const rankedLists = []; + interface RankedItem { + key: string; + rank: number; + source: 'bm25' | 'semantic'; + name: string; + kind: string; + file: string; + line: number; + endLine?: number | null; + role?: string | null; + fileHash?: string | null; + bm25Score?: number; + similarity?: number; + } + + const rankedLists: RankedItem[][] = []; for (const q of queries) { - // BM25 ranked list (sync) const bm25Data = ftsSearchData(q, customDbPath, { ...opts, limit: topK }); if (bm25Data?.results) { rankedLists.push( bm25Data.results.map((r, idx) => ({ key: `${r.name}:${r.file}:${r.line}`, rank: idx + 1, - source: 'bm25', + source: 'bm25' as const, ...r, })), ); } - // Semantic ranked list (async) const semData = await searchData(q, customDbPath, { ...opts, limit: topK, @@ -59,15 +89,29 @@ export async function hybridSearchData(query, customDbPath, opts = {}) { semData.results.map((r, idx) => ({ key: `${r.name}:${r.file}:${r.line}`, rank: idx + 1, - source: 'semantic', + source: 'semantic' as const, ...r, })), ); } } - // RRF fusion across all ranked lists - const fusionMap = new Map(); + interface FusionEntry { + name: string; + kind: string; + file: string; + line: number; + endLine: number | null; + role: string | null; + fileHash: string | null; + rrfScore: number; + bm25Score: number | null; + bm25Rank: number | null; + similarity: number | null; + semanticRank: number | null; + } + + const fusionMap = new Map(); for (const list of rankedLists) { for (const item of list) { if (!fusionMap.has(item.key)) { @@ -76,9 +120,9 @@ export async function hybridSearchData(query, customDbPath, opts = {}) { kind: item.kind, file: item.file, line: item.line, - endLine: item.endLine ?? null, - role: item.role ?? null, - fileHash: item.fileHash ?? null, + endLine: (item.endLine as number | null) ?? null, + role: (item.role as string | null) ?? null, + fileHash: (item.fileHash as string | null) ?? null, rrfScore: 0, bm25Score: null, bm25Rank: null, @@ -86,23 +130,23 @@ export async function hybridSearchData(query, customDbPath, opts = {}) { semanticRank: null, }); } - const entry = fusionMap.get(item.key); + const entry = fusionMap.get(item.key)!; entry.rrfScore += 1 / (k + item.rank); if (item.source === 'bm25') { if (entry.bm25Rank === null || item.rank < entry.bm25Rank) { - entry.bm25Score = item.bm25Score; + entry.bm25Score = (item as RankedItem & { bm25Score?: number }).bm25Score ?? null; entry.bm25Rank = item.rank; } } else { if (entry.semanticRank === null || item.rank < entry.semanticRank) { - entry.similarity = item.similarity; + entry.similarity = (item as RankedItem & { similarity?: number }).similarity ?? null; entry.semanticRank = item.rank; } } } } - const results = [...fusionMap.values()] + const results: HybridResult[] = [...fusionMap.values()] .sort((a, b) => b.rrfScore - a.rrfScore) .slice(0, limit) .map((e) => ({ diff --git a/src/domain/search/search/keyword.js b/src/domain/search/search/keyword.ts similarity index 59% rename from src/domain/search/search/keyword.js rename to src/domain/search/search/keyword.ts index 4a4e3ed7..66eda0ac 100644 --- a/src/domain/search/search/keyword.js +++ b/src/domain/search/search/keyword.ts @@ -1,17 +1,47 @@ import { openReadonlyOrFail } from '../../../db/index.js'; import { buildFileConditionSQL } from '../../../db/query-builder.js'; +import type { BetterSqlite3Database } from '../../../types.js'; import { normalizeSymbol } from '../../queries.js'; import { hasFtsIndex, sanitizeFtsQuery } from '../stores/fts5.js'; import { applyFilters } from './filters.js'; -/** - * BM25 keyword search via FTS5. - * Returns { results: [{ name, kind, file, line, bm25Score }] } or null if no FTS5 index. - */ -export function ftsSearchData(query, customDbPath, opts = {}) { +export interface FtsSearchOpts { + limit?: number; + kind?: string; + filePattern?: string | string[]; + noTests?: boolean; +} + +interface FtsRow { + node_id: number; + bm25_score: number; + name: string; + kind: string; + file: string; + line: number; + end_line: number | null; + role: string | null; +} + +export interface FtsSearchResult { + results: Array<{ + name: string; + kind: string; + file: string; + line: number; + bm25Score: number; + [key: string]: unknown; + }>; +} + +export function ftsSearchData( + query: string, + customDbPath: string | undefined, + opts: FtsSearchOpts = {}, +): FtsSearchResult | null { const limit = opts.limit || 15; - const db = openReadonlyOrFail(customDbPath); + const db = openReadonlyOrFail(customDbPath) as BetterSqlite3Database; try { if (!hasFtsIndex(db)) { @@ -30,7 +60,7 @@ export function ftsSearchData(query, customDbPath, opts = {}) { JOIN nodes n ON f.rowid = n.id WHERE fts_index MATCH ? `; - const params = [ftsQuery]; + const params: unknown[] = [ftsQuery]; if (opts.kind) { sql += ' AND n.kind = ?'; @@ -40,9 +70,6 @@ export function ftsSearchData(query, customDbPath, opts = {}) { const fp = opts.filePattern; const fpArr = Array.isArray(fp) ? fp : fp ? [fp] : []; const isGlob = fpArr.length > 0 && fpArr.some((p) => /[*?[\]]/.test(p)); - // For non-glob patterns, push filtering into SQL via buildFileConditionSQL - // (handles escapeLike + ESCAPE clause). Glob patterns are handled post-query - // by applyFilters. if (fpArr.length > 0 && !isGlob) { const fc = buildFileConditionSQL(fpArr, 'n.file'); sql += fc.sql; @@ -50,22 +77,21 @@ export function ftsSearchData(query, customDbPath, opts = {}) { } sql += ' ORDER BY rank LIMIT ?'; - params.push(limit * 5); // fetch generous set for post-filtering + params.push(limit * 5); - let rows; + let rows: FtsRow[]; try { - rows = db.prepare(sql).all(...params); + rows = db.prepare(sql).all(...params) as FtsRow[]; } catch { - // Invalid FTS5 query syntax — return empty return { results: [] }; } rows = applyFilters(rows, opts); - const hc = new Map(); + const hc = new Map(); const results = rows.slice(0, limit).map((row) => ({ ...normalizeSymbol(row, db, hc), - bm25Score: -row.bm25_score, // FTS5 rank is negative; negate for display + bm25Score: -row.bm25_score, })); return { results }; diff --git a/src/domain/search/search/prepare.js b/src/domain/search/search/prepare.ts similarity index 67% rename from src/domain/search/search/prepare.js rename to src/domain/search/search/prepare.ts index fb1552e4..3907aa5b 100644 --- a/src/domain/search/search/prepare.js +++ b/src/domain/search/search/prepare.ts @@ -1,17 +1,39 @@ import { openReadonlyOrFail } from '../../../db/index.js'; import { escapeLike } from '../../../db/query-builder.js'; import { getEmbeddingCount, getEmbeddingMeta } from '../../../db/repository/embeddings.js'; +import type { BetterSqlite3Database } from '../../../types.js'; import { MODELS } from '../models.js'; import { applyFilters } from './filters.js'; -/** - * Shared setup for search functions: opens DB, validates embeddings/model, loads rows. - * Returns { db, rows, modelKey, storedDim } or null on failure (prints error). - * On null return, the DB is closed. On exception, the DB is also closed - * (callers only need to close DB from the returned object on the happy path). - */ -export function prepareSearch(customDbPath, opts = {}) { - const db = openReadonlyOrFail(customDbPath); +export interface PreparedSearch { + db: BetterSqlite3Database; + rows: Array<{ + node_id: number; + vector: Buffer; + text_preview: string; + name: string; + kind: string; + file: string; + line: number; + end_line: number | null; + role: string | null; + }>; + modelKey: string | null; + storedDim: number | null; +} + +export interface PrepareSearchOpts { + model?: string; + kind?: string; + filePattern?: string | string[]; + noTests?: boolean; +} + +export function prepareSearch( + customDbPath: string | undefined, + opts: PrepareSearchOpts = {}, +): PreparedSearch | null { + const db = openReadonlyOrFail(customDbPath) as BetterSqlite3Database; try { const count = getEmbeddingCount(db); @@ -35,7 +57,6 @@ export function prepareSearch(customDbPath, opts = {}) { } } - // Pre-filter: allow filtering by kind or file pattern to reduce search space const fp = opts.filePattern; const fpArr = Array.isArray(fp) ? fp : fp ? [fp] : []; const isGlob = fpArr.length > 0 && fpArr.some((p) => /[*?[\]]/.test(p)); @@ -44,8 +65,8 @@ export function prepareSearch(customDbPath, opts = {}) { FROM embeddings e JOIN nodes n ON e.node_id = n.id `; - const params = []; - const conditions = []; + const params: unknown[] = []; + const conditions: string[] = []; if (opts.kind) { conditions.push('n.kind = ?'); params.push(opts.kind); @@ -53,7 +74,7 @@ export function prepareSearch(customDbPath, opts = {}) { if (fpArr.length > 0 && !isGlob) { if (fpArr.length === 1) { conditions.push("n.file LIKE ? ESCAPE '\\'"); - params.push(`%${escapeLike(fpArr[0])}%`); + params.push(`%${escapeLike(fpArr[0]!)}%`); } else { conditions.push(`(${fpArr.map(() => "n.file LIKE ? ESCAPE '\\'").join(' OR ')})`); params.push(...fpArr.map((f) => `%${escapeLike(f)}%`)); @@ -63,7 +84,7 @@ export function prepareSearch(customDbPath, opts = {}) { sql += ` WHERE ${conditions.join(' AND ')}`; } - let rows = db.prepare(sql).all(...params); + let rows = db.prepare(sql).all(...params) as PreparedSearch['rows']; rows = applyFilters(rows, opts); return { db, rows, modelKey, storedDim }; diff --git a/src/domain/search/search/semantic.js b/src/domain/search/search/semantic.ts similarity index 59% rename from src/domain/search/search/semantic.js rename to src/domain/search/search/semantic.ts index 262d5946..40e2f887 100644 --- a/src/domain/search/search/semantic.js +++ b/src/domain/search/search/semantic.ts @@ -1,17 +1,42 @@ import { loadConfig } from '../../../infrastructure/config.js'; import { warn } from '../../../infrastructure/logger.js'; +import type { BetterSqlite3Database, CodegraphConfig } from '../../../types.js'; import { normalizeSymbol } from '../../queries.js'; import { embed } from '../models.js'; import { cosineSim } from '../stores/sqlite-blob.js'; import { prepareSearch } from './prepare.js'; -/** - * Single-query semantic search — returns data instead of printing. - * Returns { results: [{ name, kind, file, line, similarity }] } or null on failure. - */ -export async function searchData(query, customDbPath, opts = {}) { +export interface SemanticSearchOpts { + config?: CodegraphConfig; + limit?: number; + minScore?: number; + model?: string; + kind?: string; + filePattern?: string | string[]; + noTests?: boolean; + rrfK?: number; +} + +interface SemanticResult { + name: string; + kind: string; + file: string; + line: number; + similarity: number; + [key: string]: unknown; +} + +export interface SearchDataResult { + results: SemanticResult[]; +} + +export async function searchData( + query: string, + customDbPath: string | undefined, + opts: SemanticSearchOpts = {}, +): Promise { const config = opts.config || loadConfig(); - const searchCfg = config.search || {}; + const searchCfg = config.search || ({} as CodegraphConfig['search']); const limit = opts.limit ?? searchCfg.topK ?? 15; const minScore = opts.minScore ?? searchCfg.defaultMinScore ?? 0.2; @@ -23,7 +48,7 @@ export async function searchData(query, customDbPath, opts = {}) { const { vectors: [queryVec], dim, - } = await embed([query], modelKey); + } = await embed([query], modelKey ?? undefined); if (storedDim && dim !== storedDim) { console.log( @@ -33,15 +58,15 @@ export async function searchData(query, customDbPath, opts = {}) { return null; } - const hc = new Map(); - const results = []; + const hc = new Map(); + const results: SemanticResult[] = []; for (const row of rows) { - const vec = new Float32Array(new Uint8Array(row.vector).buffer); - const sim = cosineSim(queryVec, vec); + const vec = new Float32Array(new Uint8Array(row.vector as unknown as ArrayBuffer).buffer); + const sim = cosineSim(queryVec!, vec); if (sim >= minScore) { results.push({ - ...normalizeSymbol(row, db, hc), + ...normalizeSymbol(row, db as BetterSqlite3Database, hc), similarity: sim, }); } @@ -54,13 +79,25 @@ export async function searchData(query, customDbPath, opts = {}) { } } -/** - * Multi-query semantic search with Reciprocal Rank Fusion (RRF). - * Returns { results: [{ name, kind, file, line, rrf, queryScores }] } or null on failure. - */ -export async function multiSearchData(queries, customDbPath, opts = {}) { +export interface MultiSearchResult { + results: Array<{ + name: string; + kind: string; + file: string; + line: number; + rrf: number; + queryScores: Array<{ query: string; similarity: number; rank: number }>; + [key: string]: unknown; + }>; +} + +export async function multiSearchData( + queries: string[], + customDbPath: string | undefined, + opts: SemanticSearchOpts = {}, +): Promise { const config = opts.config || loadConfig(); - const searchCfg = config.search || {}; + const searchCfg = config.search || ({} as CodegraphConfig['search']); const limit = opts.limit ?? searchCfg.topK ?? 15; const minScore = opts.minScore ?? searchCfg.defaultMinScore ?? 0.2; const k = opts.rrfK ?? searchCfg.rrfK ?? 60; @@ -70,13 +107,12 @@ export async function multiSearchData(queries, customDbPath, opts = {}) { const { db, rows, modelKey, storedDim } = prepared; try { - const { vectors: queryVecs, dim } = await embed(queries, modelKey); + const { vectors: queryVecs, dim } = await embed(queries, modelKey ?? undefined); - // Warn about similar queries that may bias RRF results const SIMILARITY_WARN_THRESHOLD = searchCfg.similarityWarnThreshold ?? 0.85; for (let i = 0; i < queryVecs.length; i++) { for (let j = i + 1; j < queryVecs.length; j++) { - const sim = cosineSim(queryVecs[i], queryVecs[j]); + const sim = cosineSim(queryVecs[i]!, queryVecs[j]!); if (sim >= SIMILARITY_WARN_THRESHOLD) { warn( `Queries "${queries[i]}" and "${queries[j]}" are very similar ` + @@ -96,47 +132,47 @@ export async function multiSearchData(queries, customDbPath, opts = {}) { return null; } - // Parse row vectors once - const rowVecs = rows.map((row) => new Float32Array(new Uint8Array(row.vector).buffer)); + const rowVecs = rows.map( + (row) => new Float32Array(new Uint8Array(row.vector as unknown as ArrayBuffer).buffer), + ); - // For each query: compute similarities, filter by minScore, rank const perQueryRanked = queries.map((_query, qi) => { - const scored = []; + const scored: Array<{ rowIndex: number; similarity: number }> = []; for (let ri = 0; ri < rows.length; ri++) { - const sim = cosineSim(queryVecs[qi], rowVecs[ri]); + const sim = cosineSim(queryVecs[qi]!, rowVecs[ri]!); if (sim >= minScore) { scored.push({ rowIndex: ri, similarity: sim }); } } scored.sort((a, b) => b.similarity - a.similarity); - // Assign 1-indexed ranks return scored.map((item, rank) => ({ ...item, rank: rank + 1 })); }); - // Fuse results using RRF: for each unique row, sum 1/(k + rank_i) across queries - const fusionMap = new Map(); // rowIndex -> { rrfScore, queryScores[] } + const fusionMap = new Map< + number, + { rrfScore: number; queryScores: Array<{ query: string; similarity: number; rank: number }> } + >(); for (let qi = 0; qi < queries.length; qi++) { - for (const item of perQueryRanked[qi]) { + for (const item of perQueryRanked[qi]!) { if (!fusionMap.has(item.rowIndex)) { fusionMap.set(item.rowIndex, { rrfScore: 0, queryScores: [] }); } - const entry = fusionMap.get(item.rowIndex); + const entry = fusionMap.get(item.rowIndex)!; entry.rrfScore += 1 / (k + item.rank); entry.queryScores.push({ - query: queries[qi], + query: queries[qi]!, similarity: item.similarity, rank: item.rank, }); } } - // Build results sorted by RRF score - const hc = new Map(); - const results = []; + const hc = new Map(); + const results: MultiSearchResult['results'] = []; for (const [rowIndex, entry] of fusionMap) { - const row = rows[rowIndex]; + const row = rows[rowIndex]!; results.push({ - ...normalizeSymbol(row, db, hc), + ...normalizeSymbol(row, db as BetterSqlite3Database, hc), rrf: entry.rrfScore, queryScores: entry.queryScores, }); diff --git a/src/domain/search/stores/fts5.js b/src/domain/search/stores/fts5.ts similarity index 58% rename from src/domain/search/stores/fts5.js rename to src/domain/search/stores/fts5.ts index 9b902dce..0733c0f4 100644 --- a/src/domain/search/stores/fts5.js +++ b/src/domain/search/stores/fts5.ts @@ -1,12 +1,11 @@ +import type { BetterSqlite3Database } from '../../../types.js'; + /** * Sanitize a user query for FTS5 MATCH syntax. - * Wraps each token as an implicit OR and escapes special FTS5 characters. */ -export function sanitizeFtsQuery(query) { - // Remove FTS5 special chars that could cause syntax errors +export function sanitizeFtsQuery(query: string): string | null { const cleaned = query.replace(/[*"():^{}~<>]/g, ' ').trim(); if (!cleaned) return null; - // Split into tokens, wrap with OR for multi-token queries const tokens = cleaned.split(/\s+/).filter((t) => t.length > 0); if (tokens.length === 0) return null; if (tokens.length === 1) return `"${tokens[0]}"`; @@ -15,12 +14,13 @@ export function sanitizeFtsQuery(query) { /** * Check if the FTS5 index exists in the database. - * Returns true if fts_index table exists and has rows, false otherwise. */ -export function hasFtsIndex(db) { +export function hasFtsIndex(db: BetterSqlite3Database): boolean { try { - const row = db.prepare('SELECT COUNT(*) as c FROM fts_index').get(); - return row.c > 0; + const row = db.prepare('SELECT COUNT(*) as c FROM fts_index').get() as + | { c: number } + | undefined; + return (row?.c ?? 0) > 0; } catch { return false; } diff --git a/src/domain/search/stores/sqlite-blob.ts b/src/domain/search/stores/sqlite-blob.ts new file mode 100644 index 00000000..e4b992b1 --- /dev/null +++ b/src/domain/search/stores/sqlite-blob.ts @@ -0,0 +1,15 @@ +/** + * Cosine similarity between two Float32Arrays. + */ +export function cosineSim(a: Float32Array, b: Float32Array): number { + let dot = 0; + let normA = 0; + let normB = 0; + for (let i = 0; i < a.length; i++) { + dot += a[i]! * b[i]!; + normA += a[i]! * a[i]!; + normB += b[i]! * b[i]!; + } + const denom = Math.sqrt(normA) * Math.sqrt(normB); + return denom === 0 ? 0 : dot / denom; +} diff --git a/src/domain/search/strategies/source.js b/src/domain/search/strategies/source.ts similarity index 71% rename from src/domain/search/strategies/source.js rename to src/domain/search/strategies/source.ts index 3b25e0f3..aa91c8ac 100644 --- a/src/domain/search/strategies/source.js +++ b/src/domain/search/strategies/source.ts @@ -1,9 +1,14 @@ +import type { NodeRow } from '../../../types.js'; import { splitIdentifier } from './text-utils.js'; /** * Build raw source-code text for a symbol (original strategy). */ -export function buildSourceText(node, file, lines) { +export function buildSourceText( + node: Pick, + file: string, + lines: string[], +): string { const startLine = Math.max(0, node.line - 1); const endLine = node.end_line ? Math.min(lines.length, node.end_line) diff --git a/src/domain/search/strategies/structured.js b/src/domain/search/strategies/structured.ts similarity index 71% rename from src/domain/search/strategies/structured.js rename to src/domain/search/strategies/structured.ts index 83f5ff0a..705492d8 100644 --- a/src/domain/search/strategies/structured.js +++ b/src/domain/search/strategies/structured.ts @@ -1,35 +1,40 @@ import { findCalleeNames, findCallerNames } from '../../../db/index.js'; +import type { BetterSqlite3Database, NodeRow } from '../../../types.js'; import { extractLeadingComment, splitIdentifier } from './text-utils.js'; +interface NodeWithId extends Pick { + id: number; +} + /** * Build graph-enriched text for a symbol using dependency context. - * Produces compact, semantic text (~100 tokens) instead of full source code. */ -export function buildStructuredText(node, file, lines, db) { +export function buildStructuredText( + node: NodeWithId, + file: string, + lines: string[], + db: BetterSqlite3Database, +): string { const readable = splitIdentifier(node.name); - const parts = [`${node.kind} ${node.name} (${readable}) in ${file}`]; + const parts: string[] = [`${node.kind} ${node.name} (${readable}) in ${file}`]; const startLine = Math.max(0, node.line - 1); - // Extract parameters from signature (best-effort, single-line) const sigLine = lines[startLine] || ''; const paramMatch = sigLine.match(/\(([^)]*)\)/); if (paramMatch?.[1]?.trim()) { parts.push(`Parameters: ${paramMatch[1].trim()}`); } - // Graph context: callees (capped at 10) const callees = findCalleeNames(db, node.id); if (callees.length > 0) { parts.push(`Calls: ${callees.slice(0, 10).join(', ')}`); } - // Graph context: callers (capped at 10) const callers = findCallerNames(db, node.id); if (callers.length > 0) { parts.push(`Called by: ${callers.slice(0, 10).join(', ')}`); } - // Leading comment (high semantic value) or first few lines of code const comment = extractLeadingComment(lines, startLine); if (comment) { parts.push(comment); diff --git a/src/domain/search/strategies/text-utils.js b/src/domain/search/strategies/text-utils.ts similarity index 53% rename from src/domain/search/strategies/text-utils.js rename to src/domain/search/strategies/text-utils.ts index fca8f29e..48e873cb 100644 --- a/src/domain/search/strategies/text-utils.js +++ b/src/domain/search/strategies/text-utils.ts @@ -1,8 +1,7 @@ /** * Split an identifier into readable words. - * camelCase/PascalCase -> "camel Case", snake_case -> "snake case", kebab-case -> "kebab case" */ -export function splitIdentifier(name) { +export function splitIdentifier(name: string): string { return name .replace(/([a-z])([A-Z])/g, '$1 $2') .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2') @@ -11,15 +10,14 @@ export function splitIdentifier(name) { } /** - * Extract leading comment text (JSDoc, //, #, etc.) above a function line. - * Returns the cleaned comment text or null if none found. + * Extract leading comment text above a function line. */ -export function extractLeadingComment(lines, fnLineIndex) { +export function extractLeadingComment(lines: string[], fnLineIndex: number): string | null { if (fnLineIndex > lines.length) return null; - const raw = []; + const raw: string[] = []; for (let i = fnLineIndex - 1; i >= Math.max(0, fnLineIndex - 15); i--) { if (i >= lines.length) continue; - const trimmed = lines[i].trim(); + const trimmed = lines[i]!.trim(); if (/^(\/\/|\/\*|\*\/|\*|#|\/\/\/)/.test(trimmed)) { raw.unshift(trimmed); } else if (trimmed === '') { @@ -32,10 +30,10 @@ export function extractLeadingComment(lines, fnLineIndex) { return raw .map((line) => line - .replace(/^\/\*\*?\s?|\*\/$/g, '') // opening /** or /* and closing */ - .replace(/^\*\s?/, '') // middle * lines - .replace(/^\/\/\/?\s?/, '') // // or /// - .replace(/^#\s?/, '') // # (Python/Ruby) + .replace(/^\/\*\*?\s?|\*\/$/g, '') + .replace(/^\*\s?/, '') + .replace(/^\/\/\/?\s?/, '') + .replace(/^#\s?/, '') .trim(), ) .filter((l) => l.length > 0) diff --git a/src/types.ts b/src/types.ts index 393e2bef..ded5b19a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -857,6 +857,8 @@ export interface BuildGraphOpts { engine?: EngineMode; dataflow?: boolean; ast?: boolean; + scope?: string[]; + skipRegistry?: boolean; } /** Build timing result from buildGraph. */ From e272e2f3ed254b7667f45eb57aab2d188eedba87 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:06:06 -0600 Subject: [PATCH 32/33] chore: delete stale .js files replaced by TypeScript counterparts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove cycles.js and sqlite-blob.js — their .ts replacements were committed in the previous commit. --- src/domain/graph/cycles.js | 82 ------------------------- src/domain/search/stores/sqlite-blob.js | 24 -------- 2 files changed, 106 deletions(-) delete mode 100644 src/domain/graph/cycles.js delete mode 100644 src/domain/search/stores/sqlite-blob.js diff --git a/src/domain/graph/cycles.js b/src/domain/graph/cycles.js deleted file mode 100644 index c7872a61..00000000 --- a/src/domain/graph/cycles.js +++ /dev/null @@ -1,82 +0,0 @@ -import { tarjan } from '../../graph/algorithms/tarjan.js'; -import { buildDependencyGraph } from '../../graph/builders/dependency.js'; -import { CodeGraph } from '../../graph/model.js'; -import { loadNative } from '../../infrastructure/native.js'; - -/** - * Detect circular dependencies in the codebase using Tarjan's SCC algorithm. - * Dispatches to native Rust implementation when available, falls back to JS. - * @param {object} db - Open SQLite database - * @param {object} opts - { fileLevel: true, noTests: false } - * @returns {string[][]} Array of cycles, each cycle is an array of file paths - */ -export function findCycles(db, opts = {}) { - const fileLevel = opts.fileLevel !== false; - const noTests = opts.noTests || false; - - const graph = buildDependencyGraph(db, { fileLevel, noTests }); - - // Build a label map: DB string ID → human-readable key - // File-level: file path; Function-level: name|file composite (for native Rust compat) - const idToLabel = new Map(); - for (const [id, attrs] of graph.nodes()) { - if (fileLevel) { - idToLabel.set(id, attrs.file); - } else { - idToLabel.set(id, `${attrs.label}|${attrs.file}`); - } - } - - // Build edge array with human-readable keys (for native engine) - const edges = graph.toEdgeArray().map((e) => ({ - source: idToLabel.get(e.source), - target: idToLabel.get(e.target), - })); - - // Try native Rust implementation - const native = loadNative(); - if (native) { - return native.detectCycles(edges); - } - - // Fallback: JS Tarjan via graph subsystem - // Re-key graph with human-readable labels for consistent output - const labelGraph = new CodeGraph(); - for (const { source, target } of edges) { - labelGraph.addEdge(source, target); - } - return tarjan(labelGraph); -} - -/** - * Pure-JS Tarjan's SCC implementation. - * Kept for backward compatibility — accepts raw {source, target}[] edges. - */ -export function findCyclesJS(edges) { - const graph = new CodeGraph(); - for (const { source, target } of edges) { - graph.addEdge(source, target); - } - return tarjan(graph); -} - -/** - * Format cycles for human-readable output. - */ -export function formatCycles(cycles) { - if (cycles.length === 0) { - return 'No circular dependencies detected.'; - } - - const lines = [`Found ${cycles.length} circular dependency cycle(s):\n`]; - for (let i = 0; i < cycles.length; i++) { - const cycle = cycles[i]; - lines.push(` Cycle ${i + 1} (${cycle.length} files):`); - for (const file of cycle) { - lines.push(` -> ${file}`); - } - lines.push(` -> ${cycle[0]} (back to start)`); - lines.push(''); - } - return lines.join('\n'); -} diff --git a/src/domain/search/stores/sqlite-blob.js b/src/domain/search/stores/sqlite-blob.js deleted file mode 100644 index 75037ffa..00000000 --- a/src/domain/search/stores/sqlite-blob.js +++ /dev/null @@ -1,24 +0,0 @@ -/** - * @typedef {object} VectorStore - * @property {(queryVec: Float32Array, rows: Array<{vector: Buffer}>) => Array<{index: number, score: number}>} search - * Score every row against a query vector and return scored indices. - * - * Future implementations (e.g. HNSW via `hnsw.js`) implement this same shape - * for approximate nearest-neighbor search. - */ - -/** - * Cosine similarity between two Float32Arrays. - */ -export function cosineSim(a, b) { - let dot = 0, - normA = 0, - normB = 0; - for (let i = 0; i < a.length; i++) { - dot += a[i] * b[i]; - normA += a[i] * a[i]; - normB += b[i] * b[i]; - } - const denom = Math.sqrt(normA) * Math.sqrt(normB); - return denom === 0 ? 0 : dot / denom; -} From 451d55833b2875713865f6b452881beac2a90b90 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Mon, 23 Mar 2026 18:49:52 -0600 Subject: [PATCH 33/33] fix(types): annotate implicit any parameters in resolve-imports barrel handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `parseFilesAuto` return type is `Map`, causing `imp` parameters in `.filter()` and `.map()` callbacks to have implicit `any` type — failing strict TypeScript checks in CI. --- src/domain/graph/builder/stages/resolve-imports.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/domain/graph/builder/stages/resolve-imports.ts b/src/domain/graph/builder/stages/resolve-imports.ts index 66dc4551..eb828386 100644 --- a/src/domain/graph/builder/stages/resolve-imports.ts +++ b/src/domain/graph/builder/stages/resolve-imports.ts @@ -1,5 +1,6 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; +import type { Import } from '../../../../types.js'; import { parseFilesAuto } from '../../../parser.js'; import { resolveImportPath, resolveImportsBatch } from '../../resolve.js'; import type { PipelineContext } from '../context.js'; @@ -54,11 +55,11 @@ export async function resolveImports(ctx: PipelineContext): Promise { if (fileSym) { fileSymbols.set(relPath, fileSym); ctx.barrelOnlyFiles.add(relPath); - const reexports = fileSym.imports.filter((imp) => imp.reexport); + const reexports = fileSym.imports.filter((imp: Import) => imp.reexport); if (reexports.length > 0) { ctx.reexportMap.set( relPath, - reexports.map((imp) => ({ + reexports.map((imp: Import) => ({ source: getResolved(ctx, absPath, imp.source), names: imp.names, wildcardReexport: imp.wildcardReexport || false,