From c823cf41def4a2eaeee5a211cc7774f54c980dc2 Mon Sep 17 00:00:00 2001 From: JeremyDev87 Date: Sun, 12 Apr 2026 09:30:50 +0900 Subject: [PATCH] fix(hooks): persist tool stats from short-lived hook processes PostToolUse hooks run in fresh Python processes that record exactly one tool call before exiting. With flush_interval=10, that single record_tool_call() never reached disk, leaving the Stop hook summary permanently stuck at "[CB] Xm | 0 tools | 0 errors" no matter how many tools were used in the session. - post-tool-use.py: explicit stats.flush() right after record for immediate visibility (statusline, Stop summary). - stats.py: track every live SessionStats in a module-level WeakSet and flush them via a single atexit handler. WeakSet keeps GC unchanged and avoids the per-instance handler leak that a naive atexit.register(self.method) would cause. - finalize() drops the instance from the WeakSet so the post-finalize flush sweep does not resurrect the cleaned-up file. - Module docstring documents the short-lived process pattern. - Regression tests cover: WeakSet membership, persistence via the module flush handler, finalize() removal, and the no-handler-leak invariant (20 instances must not register 20 handlers). --- .../claude-code-plugin/hooks/lib/stats.py | 58 ++++++++++++- .../claude-code-plugin/hooks/post-tool-use.py | 7 ++ .../claude-code-plugin/tests/test_stats.py | 87 +++++++++++++++++++ 3 files changed, 151 insertions(+), 1 deletion(-) diff --git a/packages/claude-code-plugin/hooks/lib/stats.py b/packages/claude-code-plugin/hooks/lib/stats.py index 275552ec..b5b80566 100644 --- a/packages/claude-code-plugin/hooks/lib/stats.py +++ b/packages/claude-code-plugin/hooks/lib/stats.py @@ -2,10 +2,25 @@ Tracks tool call count, tool names, errors, and session duration. Uses fcntl.flock() for file-level locking on every IO operation. + +Short-lived hook processes (e.g. PostToolUse) typically create a +SessionStats, call record_tool_call() once, then exit. Because the +default flush_interval is 10, that single call would never reach disk. +Two safety nets cover this: + +1. Callers in short-lived processes SHOULD call ``flush()`` explicitly + right after recording, for immediate visibility (statusline, Stop + hook summary, etc.). +2. As a fallback, every live SessionStats instance is tracked in a + module-level WeakSet and flushed once at interpreter exit by a single + atexit handler. This avoids leaking a per-instance atexit handler and + keeps GC behavior unchanged. """ +import atexit import json import os import time +import weakref from typing import Any, Dict, List, Optional try: @@ -17,8 +32,38 @@ DEFAULT_DATA_DIR = os.path.join(os.path.expanduser("~"), ".codingbuddy") +# Track every live SessionStats instance with weak references so they +# can be flushed at interpreter exit without preventing garbage +# collection. Using a WeakSet means dead instances are removed +# automatically; using a single module-level atexit handler avoids the +# per-instance handler leak that would otherwise accumulate when many +# SessionStats are created in one process. +_live_instances: "weakref.WeakSet[SessionStats]" = weakref.WeakSet() + + +def _flush_all_pending() -> None: + """Flush every live SessionStats instance. + + Registered as a single atexit handler at module load time. Iterates + a snapshot of the WeakSet so concurrent GC during iteration is safe. + All exceptions are swallowed because atexit handlers must not raise + during interpreter shutdown. + """ + for inst in list(_live_instances): + try: + inst.flush() + except Exception: + pass + + +atexit.register(_flush_all_pending) + + class SessionStats: - """Track operational metrics for a Claude Code session.""" + """Track operational metrics for a Claude Code session. + + See module docstring for the short-lived process pattern. + """ def __init__(self, session_id: str, data_dir: Optional[str] = None, flush_interval: int = 10): """Initialize stats tracker. @@ -67,6 +112,13 @@ def __init__(self, session_id: str, data_dir: Optional[str] = None, flush_interv self._mem_tool_names: Dict[str, int] = {} self._mem_hook_timings: Dict[str, List[float]] = {} + # Add to the module-level WeakSet so the single atexit handler + # can flush this instance on interpreter exit. WeakSet does not + # prevent garbage collection; once the caller drops its last + # reference (and finalize() has cleared mem state), the instance + # disappears from the set automatically. + _live_instances.add(self) + def record_hook_timing(self, hook_name: str, elapsed_ms: float) -> None: """Record a hook execution timing in memory. @@ -205,6 +257,10 @@ def finalize(self) -> Dict[str, Any]: except OSError: pass + # Remove from the live set so the atexit flush does not resurrect + # the file we just removed. + _live_instances.discard(self) + return data @staticmethod diff --git a/packages/claude-code-plugin/hooks/post-tool-use.py b/packages/claude-code-plugin/hooks/post-tool-use.py index 75347416..f331e1d7 100644 --- a/packages/claude-code-plugin/hooks/post-tool-use.py +++ b/packages/claude-code-plugin/hooks/post-tool-use.py @@ -34,6 +34,13 @@ def handle_post_tool_use(data: dict): stats = SessionStats(session_id=session_id) tool_name = data.get("tool_name", "unknown") stats.record_tool_call(tool_name, success=True) + # PostToolUse runs in a fresh short-lived process per call. The + # default flush_interval (10) means a single record_tool_call would + # never reach disk. Flush immediately so subsequent reads (statusline, + # Stop hook summary) see accurate counts. atexit in SessionStats is a + # safety net, but explicit flush keeps the disk state observable + # right after this hook exits. + stats.flush() except Exception: pass # Never block tool execution diff --git a/packages/claude-code-plugin/tests/test_stats.py b/packages/claude-code-plugin/tests/test_stats.py index 0cb8b26a..fa4bfc76 100644 --- a/packages/claude-code-plugin/tests/test_stats.py +++ b/packages/claude-code-plugin/tests/test_stats.py @@ -207,6 +207,93 @@ def test_format_summary_uses_memory_data(self, data_dir): assert "Bash:2" in summary +class TestShortLivedProcess: + """Regression tests for short-lived hook process pattern. + + Each hook invocation (e.g. PostToolUse) is a fresh Python process that + creates one SessionStats, calls record_tool_call() exactly once, then + exits. With flush_interval > 1 and no explicit flush, the in-memory + increment was lost — leaving disk counters stuck at 0 for the entire + session. This produced statusline/stop-summary output like + `[CB] Xm | 0 tools | 0 errors` even after many tool calls. + + The library now tracks every live SessionStats in a module-level + WeakSet and flushes them via a single atexit handler. These tests + drive that handler directly (rather than calling + ``atexit._run_exitfuncs()``) so they do not interfere with other + tests' atexit registrations. + """ + + def test_instance_added_to_live_set(self, data_dir): + """Newly constructed SessionStats must be tracked for atexit flush.""" + from stats import _live_instances + + s = SessionStats( + session_id="weakset-test", data_dir=data_dir, flush_interval=10 + ) + assert s in _live_instances + + def test_record_persists_via_module_atexit_handler(self, data_dir): + """Records should reach disk via _flush_all_pending() when caller forgets.""" + from stats import _flush_all_pending + + s1 = SessionStats( + session_id="exit-flush-test", data_dir=data_dir, flush_interval=10 + ) + s1.record_tool_call("Bash") + # Caller does NOT call flush() — simulates short-lived hook process. + # Drive the same code path the atexit handler would run. + _flush_all_pending() + + # Fresh instance reads from disk only — must see the recorded call. + s2 = SessionStats( + session_id="exit-flush-test", data_dir=data_dir, flush_interval=10 + ) + on_disk = s2._locked_read() + assert on_disk["tool_count"] == 1 + assert on_disk["tool_names"]["Bash"] == 1 + + def test_finalize_removes_instance_from_live_set(self, data_dir): + """finalize() removes the file AND drops the instance from the live set.""" + from stats import _flush_all_pending, _live_instances + + s = SessionStats( + session_id="finalize-weakset", data_dir=data_dir, flush_interval=10 + ) + s.record_tool_call("Bash") + s.finalize() + stats_file = os.path.join(data_dir, "finalize-weakset.json") + assert not os.path.exists(stats_file) + assert s not in _live_instances + + # Subsequent flush sweeps must NOT recreate the file + _flush_all_pending() + assert not os.path.exists(stats_file) + + def test_only_one_atexit_handler_regardless_of_instance_count(self, data_dir): + """Library must register a single atexit handler, not one per instance.""" + import atexit + + # Snapshot atexit handler count, create many instances, snapshot again. + # The implementation uses a module-level WeakSet + single handler, + # so the count should not grow with instance creation. + before = len(getattr(atexit, "_exithandlers", [])) + instances = [ + SessionStats( + session_id=f"leak-test-{i}", + data_dir=data_dir, + flush_interval=10, + ) + for i in range(20) + ] + after = len(getattr(atexit, "_exithandlers", [])) + # Allow for at most the single module-level registration that may + # have happened on first import in this test session. + assert after - before <= 1 + # Keep references alive until the assertion runs + assert len(instances) == 20 + + class TestHookTimingIntegration: """Tests for hook timing integration in SessionStats (#945)."""