From 84ad379f8e3eed66aa2c908c0fe043fd4ba2a63e Mon Sep 17 00:00:00 2001
From: Nate Nystrom <nate.nystrom@gmail.com>
Date: Wed, 25 Feb 2026 11:27:51 +0100
Subject: [PATCH 1/4] Parser and pretty printer benchmarks

---
 meta/bench/README.md    |  48 ++++++++++++
 meta/bench/benchmark.py | 159 +++++++++++++++++++++++++++++++++++++++
 meta/bench/run.py       | 160 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 367 insertions(+)
 create mode 100644 meta/bench/README.md
 create mode 100644 meta/bench/benchmark.py
 create mode 100644 meta/bench/run.py

diff --git a/meta/bench/README.md b/meta/bench/README.md
new file mode 100644
index 00000000..10af084b
--- /dev/null
+++ b/meta/bench/README.md
@@ -0,0 +1,48 @@
+# LQP Parser & Pretty-Printer Benchmarks
+
+Microbenchmarks comparing the current generated LL(k) parser and pretty-printer
+(`lqp>=0.3.0`) against the old Lark-based implementation (`lqp==0.2.3`).
+
+## What's measured
+
+- **Parse**: full text-to-protobuf pipeline. The old version goes through an
+  intermediate IR (`text → IR → protobuf`); the new version parses directly
+  (`text → protobuf`).
+- **Pretty-print**: protobuf/IR to text. The old version prints from its IR;
+  the new version prints from protobuf messages.
+
+All `.lqp` files under `tests/lqp/` are used as inputs. Files that fail to
+parse under either version are skipped (the old parser doesn't support some
+newer syntax).
+
+## Running
+
+```
+uv run --no-project python meta/bench/run.py [iterations]
+```
+
+`iterations` defaults to 20. Each file is parsed and pretty-printed that many
+times; the reported time is the per-iteration average.
+
+The runner uses `uv run --with` to create ephemeral environments for each
+version — no manual venv setup needed.
+
+## Running a single version
+
+To benchmark only one version (outputs JSON):
+
+```
+# Old (Lark-based)
+uv run --no-project --with "lqp==0.2.3" python meta/bench/benchmark.py
+
+# New (generated)
+uv run --no-project --with ./sdks/python python meta/bench/benchmark.py
+```
+
+Control iterations via `BENCH_ITERATIONS` env var.
+
+## Files
+
+- `run.py` — orchestrator: runs both versions, prints comparison table.
+- `benchmark.py` — timing logic: auto-detects which `lqp` is installed,
+  benchmarks all test files, outputs JSON to stdout.
diff --git a/meta/bench/benchmark.py b/meta/bench/benchmark.py
new file mode 100644
index 00000000..ff654f97
--- /dev/null
+++ b/meta/bench/benchmark.py
@@ -0,0 +1,159 @@
+"""
+Benchmark LQP parsing and pretty-printing.
+
+Auto-detects whether the old (Lark-based, lqp<=0.2.3) or new (generated LL(k),
+lqp>=0.3.0) implementation is installed, and benchmarks accordingly.
+
+Outputs JSON results to stdout.
+"""
+
+import json
+import os
+import sys
+import timeit
+from pathlib import Path
+
+WARMUP_ITERATIONS = 3
+
+
+def detect_version():
+    """Detect which lqp version is installed based on available modules."""
+    try:
+        from lqp.gen.parser import parse  # noqa: F401
+        return "new"
+    except ImportError:
+        pass
+    try:
+        from lqp.parser import parse_lqp  # noqa: F401
+        return "old"
+    except ImportError:
+        pass
+    print("error: no lqp package found", file=sys.stderr)
+    sys.exit(1)
+
+
+def find_lqp_files(tests_dir: Path):
+    """Find all .lqp test files."""
+    lqp_dir = tests_dir / "lqp"
+    if not lqp_dir.is_dir():
+        print(f"error: {lqp_dir} not found", file=sys.stderr)
+        sys.exit(1)
+    return sorted(lqp_dir.glob("*.lqp"))
+
+
+def warmup(fn):
+    """Run a function several times to warm up caches."""
+    for _ in range(WARMUP_ITERATIONS):
+        fn()
+
+
+def bench_old(lqp_files, iterations):
+    """Benchmark the old Lark-based parser and pretty-printer."""
+    from lqp.parser import parse_lqp
+    from lqp.emit import ir_to_proto
+    from lqp.print import to_string
+
+    results = []
+    for path in lqp_files:
+        name = path.stem
+        text = path.read_text()
+        filename = str(path)
+
+        try:
+            ir_node = parse_lqp(filename, text)
+            _ = ir_to_proto(ir_node)
+        except Exception:
+            print(f"skip {name} (parse failed)", file=sys.stderr)
+            results.append({"file": name, "parse_ms": None, "parse_emit_ms": None, "pretty_ms": None})
+            continue
+
+        def do_parse():
+            return parse_lqp(filename, text)
+
+        def do_parse_emit():
+            return ir_to_proto(parse_lqp(filename, text))
+
+        def do_pretty():
+            return to_string(ir_node)
+
+        warmup(do_parse_emit)
+        parse_time = timeit.timeit(do_parse, number=iterations)
+        parse_emit_time = timeit.timeit(do_parse_emit, number=iterations)
+
+        warmup(do_pretty)
+        pretty_time = timeit.timeit(do_pretty, number=iterations)
+
+        results.append({
+            "file": name,
+            "parse_ms": parse_time / iterations * 1000,
+            "parse_emit_ms": parse_emit_time / iterations * 1000,
+            "pretty_ms": pretty_time / iterations * 1000,
+        })
+
+    return results
+
+
+def bench_new(lqp_files, iterations):
+    """Benchmark the new generated parser and pretty-printer."""
+    from lqp.gen.parser import parse
+    from lqp.gen.pretty import pretty
+
+    results = []
+    for path in lqp_files:
+        name = path.stem
+        text = path.read_text()
+
+        try:
+            proto = parse(text)
+        except Exception:
+            print(f"skip {name} (parse failed)", file=sys.stderr)
+            results.append({"file": name, "parse_ms": None, "pretty_ms": None})
+            continue
+
+        def do_parse():
+            return parse(text)
+
+        def do_pretty():
+            return pretty(proto)
+
+        warmup(do_parse)
+        parse_time = timeit.timeit(do_parse, number=iterations)
+
+        warmup(do_pretty)
+        pretty_time = timeit.timeit(do_pretty, number=iterations)
+
+        results.append({
+            "file": name,
+            "parse_ms": parse_time / iterations * 1000,
+            "pretty_ms": pretty_time / iterations * 1000,
+        })
+
+    return results
+
+
+def main():
+    iterations = int(os.environ.get("BENCH_ITERATIONS", "20"))
+    default_tests_dir = Path(__file__).resolve().parent.parent.parent / "tests"
+    tests_dir_env = os.environ.get("BENCH_TESTS_DIR")
+    tests_dir = Path(tests_dir_env) if tests_dir_env else default_tests_dir
+
+    version = detect_version()
+    lqp_files = find_lqp_files(tests_dir)
+
+    if version == "old":
+        results = bench_old(lqp_files, iterations)
+    else:
+        results = bench_new(lqp_files, iterations)
+
+    output = {
+        "version": version,
+        "iterations": iterations,
+        "files": len(lqp_files),
+        "results": results,
+    }
+    json.dump(output, sys.stdout, indent=2)
+    print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/meta/bench/run.py b/meta/bench/run.py
new file mode 100644
index 00000000..91e5a237
--- /dev/null
+++ b/meta/bench/run.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+"""
+Run LQP parser/pretty-printer benchmarks comparing old (Lark) vs new (generated).
+
+Uses `uv run --with` to create ephemeral environments for each version.
+"""
+
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parent.parent.parent
+BENCHMARK_SCRIPT = Path(__file__).resolve().parent / "benchmark.py"
+SDK_PYTHON = REPO_ROOT / "sdks" / "python"
+TESTS_DIR = REPO_ROOT / "tests"
+OLD_PACKAGE = "lqp==0.2.3"
+
+
+def run_benchmark(label: str, with_pkg: str, iterations: int):
+    """Run benchmark.py in an ephemeral uv environment."""
+    env = {
+        **os.environ,
+        "BENCH_TESTS_DIR": str(TESTS_DIR),
+        "BENCH_ITERATIONS": str(iterations),
+    }
+    cmd = [
+        "uv", "run", "--no-project",
+        "--with", with_pkg,
+        "python", str(BENCHMARK_SCRIPT),
+    ]
+    print(f"Running {label} benchmarks...", file=sys.stderr)
+    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
+    if result.stderr:
+        print(result.stderr, end="", file=sys.stderr)
+    if result.returncode != 0:
+        print(f"error: {label} benchmark failed", file=sys.stderr)
+        sys.exit(1)
+    return json.loads(result.stdout)
+
+
+def fmt_speedup(old_ms, new_ms):
+    """Format a speedup ratio."""
+    if new_ms > 0:
+        return f"{old_ms / new_ms:>7.2f}x"
+    return f"{'inf':>7}"
+
+
+def print_parse_table(old_by_file, new_by_file, all_files):
+    """Print parser comparison table."""
+    print()
+    print("## Parser")
+    print()
+    hdr = f"{'file':<25} {'old parse':>10} {'old p+emit':>11} {'new parse':>10} {'speedup':>8}"
+    print(hdr)
+    print("-" * len(hdr))
+
+    total_old = 0.0
+    total_old_emit = 0.0
+    total_new = 0.0
+    skipped = 0
+    compared = 0
+
+    for f in all_files:
+        o = old_by_file[f]
+        n = new_by_file[f]
+
+        po = o["parse_ms"]
+        peo = o.get("parse_emit_ms")
+        pn = n["parse_ms"]
+
+        if po is None or pn is None:
+            print(f"{f:<25} {'skip':>10} {'skip':>11} {'skip':>10} {'':>8}")
+            skipped += 1
+            continue
+
+        compared += 1
+        pe = peo if peo else po
+        total_old += po
+        total_old_emit += pe
+        total_new += pn
+
+        print(f"{f:<25} {po:>9.3f}ms {pe:>10.3f}ms {pn:>9.3f}ms {fmt_speedup(pe, pn)}")
+
+    print("-" * len(hdr))
+    print(f"{'TOTAL':<25} {total_old:>9.3f}ms {total_old_emit:>10.3f}ms"
+          f" {total_new:>9.3f}ms {fmt_speedup(total_old_emit, total_new)}")
+
+    print()
+    print("old parse   = Lark parse to IR")
+    print("old p+emit  = Lark parse to IR + ir_to_proto")
+    print("new parse   = generated LL(k) parser to protobuf")
+    print("speedup     = old p+emit / new parse")
+
+    return compared, skipped
+
+
+def print_pretty_table(old_by_file, new_by_file, all_files):
+    """Print pretty-printer comparison table."""
+    print()
+    print("## Pretty-printer")
+    print()
+    hdr = f"{'file':<25} {'old':>10} {'new':>10} {'speedup':>8}"
+    print(hdr)
+    print("-" * len(hdr))
+
+    total_old = 0.0
+    total_new = 0.0
+
+    for f in all_files:
+        o = old_by_file[f]
+        n = new_by_file[f]
+
+        pro = o["pretty_ms"]
+        prn = n["pretty_ms"]
+
+        if pro is None or prn is None:
+            print(f"{f:<25} {'skip':>10} {'skip':>10} {'':>8}")
+            continue
+
+        total_old += pro
+        total_new += prn
+
+        print(f"{f:<25} {pro:>9.3f}ms {prn:>9.3f}ms {fmt_speedup(pro, prn)}")
+
+    print("-" * len(hdr))
+    print(f"{'TOTAL':<25} {total_old:>9.3f}ms {total_new:>9.3f}ms {fmt_speedup(total_old, total_new)}")
+
+    print()
+    print("old = IR to text")
+    print("new = protobuf to text")
+
+
+def print_comparison(old_data, new_data):
+    """Print formatted comparison tables."""
+    old_by_file = {r["file"]: r for r in old_data["results"]}
+    new_by_file = {r["file"]: r for r in new_data["results"]}
+    all_files = sorted(set(old_by_file) & set(new_by_file))
+
+    compared, skipped = print_parse_table(old_by_file, new_by_file, all_files)
+    print_pretty_table(old_by_file, new_by_file, all_files)
+
+    print()
+    print(f"Iterations per file: {old_data['iterations']}")
+    print(f"Files compared: {compared}")
+    if skipped:
+        print(f"Files skipped: {skipped} (unsupported by old parser)")
+
+
+def main():
+    iterations = int(sys.argv[1]) if len(sys.argv) > 1 else 20
+
+    old_data = run_benchmark("old (Lark)", OLD_PACKAGE, iterations)
+    new_data = run_benchmark("new (generated)", str(SDK_PYTHON), iterations)
+    print_comparison(old_data, new_data)
+
+
+if __name__ == "__main__":
+    main()

From ed4bee151b42487e22ec5f45aee33ca3842c6dd9 Mon Sep 17 00:00:00 2001
From: Nate Nystrom <nate.nystrom@gmail.com>
Date: Wed, 25 Feb 2026 11:33:27 +0100
Subject: [PATCH 2/4] fix lint

---
 meta/bench/benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meta/bench/benchmark.py b/meta/bench/benchmark.py
index ff654f97..119701c0 100644
--- a/meta/bench/benchmark.py
+++ b/meta/bench/benchmark.py
@@ -49,8 +49,8 @@ def warmup(fn):
 
 def bench_old(lqp_files, iterations):
     """Benchmark the old Lark-based parser and pretty-printer."""
-    from lqp.parser import parse_lqp
     from lqp.emit import ir_to_proto
+    from lqp.parser import parse_lqp
     from lqp.print import to_string
 
     results = []

From 5c5bca695a9e93f2fbc32e9772a02afacc89b79d Mon Sep 17 00:00:00 2001
From: Nate Nystrom <nate.nystrom@gmail.com>
Date: Wed, 25 Feb 2026 11:37:52 +0100
Subject: [PATCH 3/4] format

---
 meta/bench/benchmark.py | 37 +++++++++++++++++++++++++------------
 meta/bench/run.py       | 20 ++++++++++++++------
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/meta/bench/benchmark.py b/meta/bench/benchmark.py
index 119701c0..9ea39da8 100644
--- a/meta/bench/benchmark.py
+++ b/meta/bench/benchmark.py
@@ -20,11 +20,13 @@ def detect_version():
     """Detect which lqp version is installed based on available modules."""
     try:
         from lqp.gen.parser import parse  # noqa: F401
+
         return "new"
     except ImportError:
         pass
     try:
         from lqp.parser import parse_lqp  # noqa: F401
+
         return "old"
     except ImportError:
         pass
@@ -64,7 +66,14 @@ def bench_old(lqp_files, iterations):
             _ = ir_to_proto(ir_node)
         except Exception:
             print(f"skip {name} (parse failed)", file=sys.stderr)
-            results.append({"file": name, "parse_ms": None, "parse_emit_ms": None, "pretty_ms": None})
+            results.append(
+                {
+                    "file": name,
+                    "parse_ms": None,
+                    "parse_emit_ms": None,
+                    "pretty_ms": None,
+                }
+            )
             continue
 
         def do_parse():
@@ -83,12 +92,14 @@ def do_pretty():
         warmup(do_pretty)
         pretty_time = timeit.timeit(do_pretty, number=iterations)
 
-        results.append({
-            "file": name,
-            "parse_ms": parse_time / iterations * 1000,
-            "parse_emit_ms": parse_emit_time / iterations * 1000,
-            "pretty_ms": pretty_time / iterations * 1000,
-        })
+        results.append(
+            {
+                "file": name,
+                "parse_ms": parse_time / iterations * 1000,
+                "parse_emit_ms": parse_emit_time / iterations * 1000,
+                "pretty_ms": pretty_time / iterations * 1000,
+            }
+        )
 
     return results
 
@@ -122,11 +133,13 @@ def do_pretty():
         warmup(do_pretty)
         pretty_time = timeit.timeit(do_pretty, number=iterations)
 
-        results.append({
-            "file": name,
-            "parse_ms": parse_time / iterations * 1000,
-            "pretty_ms": pretty_time / iterations * 1000,
-        })
+        results.append(
+            {
+                "file": name,
+                "parse_ms": parse_time / iterations * 1000,
+                "pretty_ms": pretty_time / iterations * 1000,
+            }
+        )
 
     return results
 
diff --git a/meta/bench/run.py b/meta/bench/run.py
index 91e5a237..e7c2b5f7 100644
--- a/meta/bench/run.py
+++ b/meta/bench/run.py
@@ -26,9 +26,13 @@ def run_benchmark(label: str, with_pkg: str, iterations: int):
         "BENCH_ITERATIONS": str(iterations),
     }
     cmd = [
-        "uv", "run", "--no-project",
-        "--with", with_pkg,
-        "python", str(BENCHMARK_SCRIPT),
+        "uv",
+        "run",
+        "--no-project",
+        "--with",
+        with_pkg,
+        "python",
+        str(BENCHMARK_SCRIPT),
     ]
     print(f"Running {label} benchmarks...", file=sys.stderr)
     result = subprocess.run(cmd, capture_output=True, text=True, env=env)
@@ -84,8 +88,10 @@ def print_parse_table(old_by_file, new_by_file, all_files):
         print(f"{f:<25} {po:>9.3f}ms {pe:>10.3f}ms {pn:>9.3f}ms {fmt_speedup(pe, pn)}")
 
     print("-" * len(hdr))
-    print(f"{'TOTAL':<25} {total_old:>9.3f}ms {total_old_emit:>10.3f}ms"
-          f" {total_new:>9.3f}ms {fmt_speedup(total_old_emit, total_new)}")
+    print(
+        f"{'TOTAL':<25} {total_old:>9.3f}ms {total_old_emit:>10.3f}ms"
+        f" {total_new:>9.3f}ms {fmt_speedup(total_old_emit, total_new)}"
+    )
 
     print()
     print("old parse   = Lark parse to IR")
@@ -125,7 +131,9 @@ def print_pretty_table(old_by_file, new_by_file, all_files):
         print(f"{f:<25} {pro:>9.3f}ms {prn:>9.3f}ms {fmt_speedup(pro, prn)}")
 
     print("-" * len(hdr))
-    print(f"{'TOTAL':<25} {total_old:>9.3f}ms {total_new:>9.3f}ms {fmt_speedup(total_old, total_new)}")
+    print(
+        f"{'TOTAL':<25} {total_old:>9.3f}ms {total_new:>9.3f}ms {fmt_speedup(total_old, total_new)}"
+    )
 
     print()
     print("old = IR to text")

From 149fa313d583916b78415e939df6deb8ddff494b Mon Sep 17 00:00:00 2001
From: Nate Nystrom <nate.nystrom@gmail.com>
Date: Wed, 25 Feb 2026 16:23:16 +0100
Subject: [PATCH 4/4] ignore bench in pyrefly check

---
 meta/pyrefly.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/meta/pyrefly.toml b/meta/pyrefly.toml
index f3cf4a6a..955e6c7a 100644
--- a/meta/pyrefly.toml
+++ b/meta/pyrefly.toml
@@ -2,6 +2,7 @@ project_includes = ["**/*"]
 project_excludes = [
     "**/.[!/.]*",
     "**/*venv/**/*",
+    "bench",
     "build",
 ]
 search_path = ["."]