elara-labs · rajkumarsakthivel · May 16, 2026 · May 15, 2026 · May 15, 2026 · May 16, 2026
@@ -441,6 +441,55 @@ All other text files are chunked by line range. Binary files are skipped.
 
 ---
 
+## FAQ
+
+### Does CCE affect response quality?
+
+No. Quality stays the same or slightly improves.
+
+CCE replaces "dump the entire file" with "search for the relevant function." The model still gets the code it needs (0.90 Recall@10 in benchmarks). Less irrelevant context means less noise competing for attention, which can improve the model's focus on your actual question.
+
+### How do I increase output token savings?
+
+Set the output compression level in your project config (`cce.yaml`):
+
+```yaml
+compression:
+  output: max       # off | lite | standard | max
+```
+
+Or change it at runtime via the MCP tool:
+
+```
+set_output_level output_level=max
+```
+
+| Level | Savings | What it does |
+|-------|---------|--------------|
+| `off` | 0% | No compression |
+| `lite` | ~25% | Removes filler/hedging/pleasantries + diff-only for code changes |
+| `standard` | ~70% | Drops articles, fragments, short synonyms + diff-only for code |
+| `max` | ~80% | Telegraphic style + diff-only for code |
+
+Default is `standard`. All levels include **code output rules** that instruct the model to show only changed lines (not full file rewrites), which is where most output tokens go in coding sessions. The `max` level produces very terse prose (similar to "caveman mode"). Code blocks, paths, and commands are never compressed regardless of level.
+
+### Where do the savings come from?
+
+Most savings are **input tokens** (what goes into the model):
+
+| Layer | Type | Typical savings |
+|-------|------|-----------------|
+| Retrieval | Input | 94% (full files → relevant chunks) |
+| Chunk compression | Input | 89% (chunks → signatures) |
+| Grammar compression | Input | 13% (article/filler removal) |
+| Turn summarization | Input | varies (session history) |
+| Progressive disclosure | Input | varies (tool payloads) |
+| Output compression | Output | 25-80% (depends on level) |
+
+Output tokens cost 5x more per token (e.g. Opus: $15/1M input vs $75/1M output), so even a small output reduction has outsized cost impact.
+
+---
+
 ## Roadmap
 
 - [x] Multi-repo benchmarks (FastAPI, chi, fiber)

@@ -24,7 +24,7 @@
     "operatingSystem": "macOS, Linux, Windows",
     "license": "https://opensource.org/licenses/MIT",
     "downloadUrl": "https://pypi.org/project/code-context-engine/",
-    "softwareVersion": "0.4.20",
+    "softwareVersion": "0.4.21",
     "offers": { "@type": "Offer", "price": "0", "priceCurrency": "USD" },
     "author": { "@type": "Organization", "name": "Elara Labs", "url": "https://github.com/elara-labs" }
   }

@@ -1,6 +1,6 @@
 [project]
 name = "code-context-engine"
-version = "0.4.20"
+version = "0.4.21"
 description = "Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server."
 readme = {file = "README.md", content-type = "text/markdown"}
 license = "MIT"

@@ -7,13 +7,13 @@
     "url": "https://github.com/elara-labs/code-context-engine",
     "source": "github"
   },
-  "version": "0.4.20",
+  "version": "0.4.21",
   "packages": [
     {
       "registryType": "pypi",
       "registryBaseUrl": "https://pypi.org",
       "identifier": "code-context-engine",
-      "version": "0.4.20",
+      "version": "0.4.21",
       "runtimeHint": "uvx",
       "transport": {
         "type": "stdio"

@@ -56,6 +56,88 @@ def _safe_cwd() -> Path:
         ) from exc
 
 
+# ── Update check ─────────────────────────────────────────────────────
+_CCE_HOME = Path.home() / ".cce"
+_UPDATE_CACHE = _CCE_HOME / "update_check.json"
+_UPDATE_CHECK_TTL = 24 * 3600  # 1 day
+
+
+def _version_tuple(v: str) -> tuple[int, ...]:
+    """Parse '0.4.21' into (0, 4, 21) for comparison."""
+    return tuple(int(x) for x in v.split(".") if x.isdigit())
+
+
+def _check_for_update() -> str | None:
+    """Return the latest PyPI version if newer than installed, else None.
+
+    Checks at most once per day. Best-effort: swallows all errors.
+    """
+    import time
+    from importlib.metadata import version as pkg_version
+
+    try:
+        current = pkg_version("code-context-engine")
+    except Exception:
+        return None
+
+    # Read cache
+    try:
+        if _UPDATE_CACHE.exists():
+            data = json.loads(_UPDATE_CACHE.read_text())
+            if time.time() - data.get("ts", 0) < _UPDATE_CHECK_TTL:
+                latest = data.get("latest", "")
+                if latest and _version_tuple(latest) > _version_tuple(current):
+                    return latest
+                return None
+    except Exception:
+        pass
+
+    # Fetch from PyPI
+    latest = None
+    try:
+        import httpx
+        resp = httpx.get(
+            "https://pypi.org/pypi/code-context-engine/json",
+            timeout=3.0,
+            follow_redirects=True,
+        )
+        if resp.status_code == 200:
+            latest = resp.json()["info"]["version"]
+    except Exception:
+        pass
+
+    # Cache result
+    try:
+        _CCE_HOME.mkdir(parents=True, exist_ok=True)
+        _UPDATE_CACHE.write_text(json.dumps({"ts": time.time(), "latest": latest or ""}))
+    except Exception:
+        pass
+
+    if latest and _version_tuple(latest) > _version_tuple(current):
+        return latest
+    return None
+
+
+def _show_update_notice() -> None:
+    """Print a one-line update notice if a newer version is available."""
+    from importlib.metadata import version as pkg_version
+
+    try:
+        latest = _check_for_update()
+        if latest:
+            current = pkg_version("code-context-engine")
+            click.echo(
+                f"\n  {click.style('Update available', fg='yellow', bold=True)} "
+                f"{click.style(current, dim=True)} → "
+                f"{click.style(latest, fg='green', bold=True)}  "
+                f"{click.style('Run', dim=True)} "
+                f"{click.style('cce upgrade', fg='cyan')} "
+                f"{click.style('to update', dim=True)}"
+            )
+    except Exception:
+        pass
+
+
 def _configure_mcp(project_dir: Path) -> bool:
     """Write MCP server config to .mcp.json in the project directory.
 
@@ -681,6 +763,16 @@ def main(ctx: click.Context, verbose: bool) -> None:
         _show_welcome_banner(ctx.obj["config"])
 
 
+@main.result_callback()
+@click.pass_context
+def _after_command(ctx: click.Context, *_args, **_kwargs) -> None:
+    """Run after every command. Shows update notice if available."""
+    # Skip for serve (long-running MCP server) and upgrade (already handles it)
+    if ctx.invoked_subcommand in ("serve", "upgrade"):
+        return
+    _show_update_notice()
+
+
 _INIT_AGENT_CHOICES = ("auto", "claude", "codex", "copilot", "all")
 _INIT_AGENT_TO_EDITORS = {
     "claude": {"claude"},

@@ -11,15 +11,27 @@
 
 # Advertised output-token reduction per level. Sourced from the level
 # descriptions ("~65% savings", "~75% savings"). `lite` has no advertised
-# number; we use a conservative 20% based on how much filler/hedging
-# typically lives in default-mode replies.
+# number; we use a conservative 25% based on filler removal + code diff rules.
+# The code output rules (show diffs, not full files) add ~5-10% on top of
+# prose compression since code responses are a large share of output tokens.
 ADVERTISED_PCT = {
     "off": 0.0,
-    "lite": 0.20,
-    "standard": 0.65,
-    "max": 0.75,
+    "lite": 0.25,
+    "standard": 0.70,
+    "max": 0.80,
 }
 
+# Code output rules — appended to all non-off levels to reduce code token waste.
+_CODE_RULES = (
+    "\n\n## Code Output Rules\n"
+    "When suggesting code changes:\n"
+    "- Show ONLY the changed lines with minimal surrounding context (3 lines above/below)\n"
+    "- Use edit format: file path, then the specific change. Never rewrite entire files.\n"
+    "- If multiple changes in one file, show each change separately, not the whole file\n"
+    "- Never echo back unchanged code the user already has\n"
+    "- For new files, show the full file. For edits, show only what changes."
+)
+
 _RULES = {
     "lite": (
         "## Output Compression: Lite\n"
@@ -30,6 +42,7 @@
         "- No trailing summaries — the diff/output speaks for itself\n"
         "- Keep full grammar and articles\n"
         "- Code blocks, paths, commands, URLs: NEVER compress"
+        + _CODE_RULES
     ),
     "standard": (
         "## Output Compression: Standard\n"
@@ -43,6 +56,7 @@
         "- One-line explanations unless detail is asked for\n"
         "- Code blocks, paths, commands, URLs, errors: NEVER compress\n"
         "- Security warnings and destructive action confirmations: use full clarity"
+        + _CODE_RULES
     ),
     "max": (
         "## Output Compression: Max\n"
@@ -55,6 +69,7 @@
         "- Pattern: [thing] → [action]. [reason].\n"
         "- Code blocks, paths, commands, URLs, errors: NEVER compress\n"
         "- Security warnings and destructive action confirmations: use full clarity"
+        + _CODE_RULES
     ),
 }
 
@@ -70,8 +85,8 @@ def get_level_description(level: str) -> str:
     """Return a human-readable description of the compression level."""
     descriptions = {
         "off": "No output compression — Claude responds normally",
-        "lite": "Removes filler, hedging, and pleasantries. Keeps full grammar.",
-        "standard": "Drops articles, uses fragments, short synonyms. ~65% output token savings.",
-        "max": "Telegraphic style with abbreviations and symbols. ~75% output token savings.",
+        "lite": "Removes filler, hedging, and pleasantries. Diff-only for code. ~25% savings.",
+        "standard": "Drops articles, uses fragments, short synonyms. Diff-only for code. ~70% savings.",
+        "max": "Telegraphic style with abbreviations and symbols. Diff-only for code. ~80% savings.",
     }
     return descriptions.get(level, "Unknown level")
@@ -303,3 +303,36 @@ def test_grid_bar_high_usage(runner, tmp_path):
     assert result.exit_code == 0
     # 20% savings = 80% usage = 8 filled cells
     assert result.output.count("⛁") >= 7
+
+
+# ── Update check ──────────────────────────────────────
+
+
+def test_version_tuple_comparison():
+    """_version_tuple correctly compares version strings."""
+    from context_engine.cli import _version_tuple
+    assert _version_tuple("0.4.21") > _version_tuple("0.4.20")
+    assert _version_tuple("1.0.0") > _version_tuple("0.99.99")
+    assert _version_tuple("0.4.20") == _version_tuple("0.4.20")
+
+
+def test_update_check_shows_notice_when_newer(runner, storage):
+    """Update notice shown when PyPI has a newer version."""
+    p1, p2 = _patch_config(str(storage))
+    with runner.isolated_filesystem(), p1, p2, \
+         patch("context_engine.cli._check_for_update", return_value="99.0.0"):
+        result = runner.invoke(main, ["savings"])
+    assert result.exit_code == 0
+    assert "Update available" in result.output
+    assert "99.0.0" in result.output
+    assert "cce upgrade" in result.output
+
+
+def test_update_check_silent_when_current(runner, storage):
+    """No update notice when already on latest."""
+    p1, p2 = _patch_config(str(storage))
+    with runner.isolated_filesystem(), p1, p2, \
+         patch("context_engine.cli._check_for_update", return_value=None):
+        result = runner.invoke(main, ["savings"])
+    assert result.exit_code == 0
+    assert "Update available" not in result.output