RelationalAI · nystrom · Feb 26, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/meta/bench/README.md b/meta/bench/README.md
@@ -0,0 +1,48 @@
+# LQP Parser & Pretty-Printer Benchmarks
+
+Microbenchmarks comparing the current generated LL(k) parser and pretty-printer
+(`lqp>=0.3.0`) against the old Lark-based implementation (`lqp==0.2.3`).
+
+## What's measured
+
+- **Parse**: full text-to-protobuf pipeline. The old version goes through an
+  intermediate IR (`text → IR → protobuf`); the new version parses directly
+  (`text → protobuf`).
+- **Pretty-print**: protobuf/IR to text. The old version prints from its IR;
+  the new version prints from protobuf messages.
+
+All `.lqp` files under `tests/lqp/` are used as inputs. Files that fail to
+parse under either version are skipped (the old parser doesn't support some
+newer syntax).
+
+## Running
+
+```
+uv run --no-project python meta/bench/run.py [iterations]
+```
+
+`iterations` defaults to 20. Each file is parsed and pretty-printed that many
+times; the reported time is the per-iteration average.
+
+The runner uses `uv run --with` to create ephemeral environments for each
+version — no manual venv setup needed.
+
+## Running a single version
+
+To benchmark only one version (outputs JSON):
+
+```
+# Old (Lark-based)
+uv run --no-project --with "lqp==0.2.3" python meta/bench/benchmark.py
+
+# New (generated)
+uv run --no-project --with ./sdks/python python meta/bench/benchmark.py
+```
+
+Control iterations via `BENCH_ITERATIONS` env var.
+
+## Files
+
+- `run.py` — orchestrator: runs both versions, prints comparison table.
+- `benchmark.py` — timing logic: auto-detects which `lqp` is installed,
+  benchmarks all test files, outputs JSON to stdout.
diff --git a/meta/bench/benchmark.py b/meta/bench/benchmark.py
@@ -0,0 +1,172 @@
+"""
+Benchmark LQP parsing and pretty-printing.
+
+Auto-detects whether the old (Lark-based, lqp<=0.2.3) or new (generated LL(k),
+lqp>=0.3.0) implementation is installed, and benchmarks accordingly.
+
+Outputs JSON results to stdout.
+"""
+
+import json
+import os
+import sys
+import timeit
+from pathlib import Path
+
+WARMUP_ITERATIONS = 3
+
+
+def detect_version():
+    """Detect which lqp version is installed based on available modules."""
+    try:
+        from lqp.gen.parser import parse  # noqa: F401
+
+        return "new"
+    except ImportError:
+        pass
+    try:
+        from lqp.parser import parse_lqp  # noqa: F401
+
+        return "old"
+    except ImportError:
+        pass
+    print("error: no lqp package found", file=sys.stderr)
+    sys.exit(1)
+
+
+def find_lqp_files(tests_dir: Path):
+    """Find all .lqp test files."""
+    lqp_dir = tests_dir / "lqp"
+    if not lqp_dir.is_dir():
+        print(f"error: {lqp_dir} not found", file=sys.stderr)
+        sys.exit(1)
+    return sorted(lqp_dir.glob("*.lqp"))
+
+
+def warmup(fn):
+    """Run a function several times to warm up caches."""
+    for _ in range(WARMUP_ITERATIONS):
+        fn()
+
+
+def bench_old(lqp_files, iterations):
+    """Benchmark the old Lark-based parser and pretty-printer."""
+    from lqp.emit import ir_to_proto
+    from lqp.parser import parse_lqp
+    from lqp.print import to_string
+
+    results = []
+    for path in lqp_files:
+        name = path.stem
+        text = path.read_text()
+        filename = str(path)
+
+        try:
+            ir_node = parse_lqp(filename, text)
+            _ = ir_to_proto(ir_node)
+        except Exception:
+            print(f"skip {name} (parse failed)", file=sys.stderr)
+            results.append(
+                {
+                    "file": name,
+                    "parse_ms": None,
+                    "parse_emit_ms": None,
+                    "pretty_ms": None,
+                }
+            )
+            continue
+
+        def do_parse():
+            return parse_lqp(filename, text)
+
+        def do_parse_emit():
+            return ir_to_proto(parse_lqp(filename, text))
+
+        def do_pretty():
+            return to_string(ir_node)
+
+        warmup(do_parse_emit)
+        parse_time = timeit.timeit(do_parse, number=iterations)
+        parse_emit_time = timeit.timeit(do_parse_emit, number=iterations)
+
+        warmup(do_pretty)
+        pretty_time = timeit.timeit(do_pretty, number=iterations)
+
+        results.append(
+            {
+                "file": name,
+                "parse_ms": parse_time / iterations * 1000,
+                "parse_emit_ms": parse_emit_time / iterations * 1000,
+                "pretty_ms": pretty_time / iterations * 1000,
+            }
+        )
+
+    return results
+
+
+def bench_new(lqp_files, iterations):
+    """Benchmark the new generated parser and pretty-printer."""
+    from lqp.gen.parser import parse
+    from lqp.gen.pretty import pretty
+
+    results = []
+    for path in lqp_files:
+        name = path.stem
+        text = path.read_text()
+
+        try:
+            proto = parse(text)
+        except Exception:
+            print(f"skip {name} (parse failed)", file=sys.stderr)
+            results.append({"file": name, "parse_ms": None, "pretty_ms": None})
+            continue
+
+        def do_parse():
+            return parse(text)
+
+        def do_pretty():
+            return pretty(proto)
+
+        warmup(do_parse)
+        parse_time = timeit.timeit(do_parse, number=iterations)
+
+        warmup(do_pretty)
+        pretty_time = timeit.timeit(do_pretty, number=iterations)
+
+        results.append(
+            {
+                "file": name,
+                "parse_ms": parse_time / iterations * 1000,
+                "pretty_ms": pretty_time / iterations * 1000,
+            }
+        )
+
+    return results
+
+
+def main():
+    iterations = int(os.environ.get("BENCH_ITERATIONS", "20"))
+    default_tests_dir = Path(__file__).resolve().parent.parent.parent / "tests"
+    tests_dir_env = os.environ.get("BENCH_TESTS_DIR")
+    tests_dir = Path(tests_dir_env) if tests_dir_env else default_tests_dir
+
+    version = detect_version()
+    lqp_files = find_lqp_files(tests_dir)
+
+    if version == "old":
+        results = bench_old(lqp_files, iterations)
+    else:
+        results = bench_new(lqp_files, iterations)
+
+    output = {
+        "version": version,
+        "iterations": iterations,
+        "files": len(lqp_files),
+        "results": results,
+    }
+    json.dump(output, sys.stdout, indent=2)
+    print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/meta/bench/run.py b/meta/bench/run.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python3
+"""
+Run LQP parser/pretty-printer benchmarks comparing old (Lark) vs new (generated).
+
+Uses `uv run --with` to create ephemeral environments for each version.
+"""
+
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parent.parent.parent
+BENCHMARK_SCRIPT = Path(__file__).resolve().parent / "benchmark.py"
+SDK_PYTHON = REPO_ROOT / "sdks" / "python"
+TESTS_DIR = REPO_ROOT / "tests"
+OLD_PACKAGE = "lqp==0.2.3"
+
+
+def run_benchmark(label: str, with_pkg: str, iterations: int):
+    """Run benchmark.py in an ephemeral uv environment."""
+    env = {
+        **os.environ,
+        "BENCH_TESTS_DIR": str(TESTS_DIR),
+        "BENCH_ITERATIONS": str(iterations),
+    }
+    cmd = [
+        "uv",
+        "run",
+        "--no-project",
+        "--with",
+        with_pkg,
+        "python",
+        str(BENCHMARK_SCRIPT),
+    ]
+    print(f"Running {label} benchmarks...", file=sys.stderr)
+    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
+    if result.stderr:
+        print(result.stderr, end="", file=sys.stderr)
+    if result.returncode != 0:
+        print(f"error: {label} benchmark failed", file=sys.stderr)
+        sys.exit(1)
+    return json.loads(result.stdout)
+
+
+def fmt_speedup(old_ms, new_ms):
+    """Format a speedup ratio."""
+    if new_ms > 0:
+        return f"{old_ms / new_ms:>7.2f}x"
+    return f"{'inf':>7}"
+
+
+def print_parse_table(old_by_file, new_by_file, all_files):
+    """Print parser comparison table."""
+    print()
+    print("## Parser")
+    print()
+    hdr = f"{'file':<25} {'old parse':>10} {'old p+emit':>11} {'new parse':>10} {'speedup':>8}"
+    print(hdr)
+    print("-" * len(hdr))
+
+    total_old = 0.0
+    total_old_emit = 0.0
+    total_new = 0.0
+    skipped = 0
+    compared = 0
+
+    for f in all_files:
+        o = old_by_file[f]
+        n = new_by_file[f]
+
+        po = o["parse_ms"]
+        peo = o.get("parse_emit_ms")
+        pn = n["parse_ms"]
+
+        if po is None or pn is None:
+            print(f"{f:<25} {'skip':>10} {'skip':>11} {'skip':>10} {'':>8}")
+            skipped += 1
+            continue
+
+        compared += 1
+        pe = peo if peo else po
+        total_old += po
+        total_old_emit += pe
+        total_new += pn
+
+        print(f"{f:<25} {po:>9.3f}ms {pe:>10.3f}ms {pn:>9.3f}ms {fmt_speedup(pe, pn)}")
+
+    print("-" * len(hdr))
+    print(
+        f"{'TOTAL':<25} {total_old:>9.3f}ms {total_old_emit:>10.3f}ms"
+        f" {total_new:>9.3f}ms {fmt_speedup(total_old_emit, total_new)}"
+    )
+
+    print()
+    print("old parse   = Lark parse to IR")
+    print("old p+emit  = Lark parse to IR + ir_to_proto")
+    print("new parse   = generated LL(k) parser to protobuf")
+    print("speedup     = old p+emit / new parse")
+
+    return compared, skipped
+
+
+def print_pretty_table(old_by_file, new_by_file, all_files):
+    """Print pretty-printer comparison table."""
+    print()
+    print("## Pretty-printer")
+    print()
+    hdr = f"{'file':<25} {'old':>10} {'new':>10} {'speedup':>8}"
+    print(hdr)
+    print("-" * len(hdr))
+
+    total_old = 0.0
+    total_new = 0.0
+
+    for f in all_files:
+        o = old_by_file[f]
+        n = new_by_file[f]
+
+        pro = o["pretty_ms"]
+        prn = n["pretty_ms"]
+
+        if pro is None or prn is None:
+            print(f"{f:<25} {'skip':>10} {'skip':>10} {'':>8}")
+            continue
+
+        total_old += pro
+        total_new += prn
+
+        print(f"{f:<25} {pro:>9.3f}ms {prn:>9.3f}ms {fmt_speedup(pro, prn)}")
+
+    print("-" * len(hdr))
+    print(
+        f"{'TOTAL':<25} {total_old:>9.3f}ms {total_new:>9.3f}ms {fmt_speedup(total_old, total_new)}"
+    )
+
+    print()
+    print("old = IR to text")
+    print("new = protobuf to text")
+
+
+def print_comparison(old_data, new_data):
+    """Print formatted comparison tables."""
+    old_by_file = {r["file"]: r for r in old_data["results"]}
+    new_by_file = {r["file"]: r for r in new_data["results"]}
+    all_files = sorted(set(old_by_file) & set(new_by_file))
+
+    compared, skipped = print_parse_table(old_by_file, new_by_file, all_files)
+    print_pretty_table(old_by_file, new_by_file, all_files)
+
+    print()
+    print(f"Iterations per file: {old_data['iterations']}")
+    print(f"Files compared: {compared}")
+    if skipped:
+        print(f"Files skipped: {skipped} (unsupported by old parser)")
+
+
+def main():
+    iterations = int(sys.argv[1]) if len(sys.argv) > 1 else 20
+
+    old_data = run_benchmark("old (Lark)", OLD_PACKAGE, iterations)
+    new_data = run_benchmark("new (generated)", str(SDK_PYTHON), iterations)
+    print_comparison(old_data, new_data)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/meta/pyrefly.toml b/meta/pyrefly.toml
@@ -2,6 +2,7 @@ project_includes = ["**/*"]
 project_excludes = [
     "**/.[!/.]*",
     "**/*venv/**/*",
+    "bench",
     "build",
 ]
 search_path = ["."]