diff --git a/HANDOFF_sqlname_chain.md b/HANDOFF_sqlname_chain.md new file mode 100644 index 0000000..fa76091 --- /dev/null +++ b/HANDOFF_sqlname_chain.md @@ -0,0 +1,35 @@ +# Handoff → MobilityDB/MEOS session: SQL-name chain + Doxygen `@csqlfn` irregularities + +**From:** MobilityDuck session · **Re:** ecosystem naming policy (the `MEOS-C → MobilityDB-C → SQL` name chain) +**Status:** report only — the source edits below are **yours** (MobilityDB repo). No changes pushed to MobilityDB by me. + +## 1. Context — the naming policy this supports +Every function has up to three names, linked in the C sources by Doxygen tags: + +| Layer | Form | Example | Tag | +|---|---|---|---| +| MEOS C (lib) | lowercase snake | `tdistance_tnumber_number` | carries `@csqlfn #` | +| MobilityDB C (PG wrapper, `PG_FUNCTION_ARGS`) | PascalCase (avoids symbol clash) | `Tdistance_tnumber_number` | carries `@sqlfn`/`@sqlop` | +| MobilityDB SQL (user) | lowerCamel, overloaded | `tDistance` + op `<->` | — | + +SQL names are **frozen** (production); bindings conform *to* MobilityDB. The chain is the source of truth for every binding's names. + +## 2. What I built (MEOS-API, for your awareness — not requiring action) +A catalog extractor on branch `feat/sql-name-chain` (fork `estebanzimanyi/MEOS-API`): `parser/sqlnames.py` + a `run.py` step that attaches `mobilitydb` / `sql` / `sqlop` to each IDL function from the `@csqlfn`/`@sqlfn`/`@sqlop` tags. It now resolves the chain for **1589** functions. (I'll PR this separately; it only *reads* your tags.) + +## 3. The irregularity to fix in MobilityDB source (the actual ask) +`@csqlfn` is placed on **core Datum-generic** functions in addition to the binding-callable typed datum-hiding wrappers in `*_meos.c`. Since a Datum-generic is **not** binding-callable (per the external-typed-wrapper rule), its `@csqlfn` is redundant and makes the chain attach a SQL name to an uncallable function. + +**Rule:** `@csqlfn` belongs **only** on binding-callable functions (the typed `*_meos.c` wrappers), never on the core Datum-generic. + +### (A) 31 — safe to de-tag now (a `*_meos.c` wrapper already carries the same `@csqlfn`) +- **24** comparison-family, `meos/src/temporal/temporal_compops.c`: `always_{ne,ge,gt,le,lt}_{base_temporal,temporal_base}` and `ever_{ne,ge,gt,le,lt}_{base_temporal,temporal_base}` — wrappers `always_ne_int_tint`/`_float_tfloat`/`_text_ttext` (etc.) keep the tag. +- **7** others: `set_make`, `set_value_n` (set.c); `numspan_shift_scale` (span.c); `union_span_value` (span_ops.c); `adjacent_value_spanset` (spanset_ops.c); `temporal_value_n` (temporal.c); `tinstant_value_at_timestamptz` (tinstant.c); `tsequence_value_at_timestamptz` (tsequence.c); `tsequenceset_value_n` (tsequenceset.c); `tnumberinst_shift_value` (tinstant.c); `tnumberseq_shift_scale_value` (tsequence.c). + +→ Edit: remove the `@csqlfn …` line from each core Datum-generic's Doxygen block. + +### (B) 4 — need a typed `*_meos.c` wrapper added first, then de-tag +No `*_meos.c` carrier exists, so don't just delete the tag: `tbox_expand_value` (tbox.c), `number_tstzspan_to_tbox` (tbox.c), `number_tbox`→`Number_to_tbox` (tbox.c), `tnumber_value_time_boxes` (temporal_tile.c). + +## 4. How to verify +After the edits, re-run the MEOS-API extractor — the count of "Datum-param functions carrying a SQL name" should drop from 35 → 4 (then → 0 once the (B) wrappers exist). That number is a standing regression gate for this class of irregularity. diff --git a/parser/sqlnames.py b/parser/sqlnames.py new file mode 100644 index 0000000..407fc40 --- /dev/null +++ b/parser/sqlnames.py @@ -0,0 +1,100 @@ +"""Extract the MEOS-C -> MobilityDB-C -> SQL name chain from the Doxygen tags. + +Every MobilityDB function carries up to three names, linked in the C sources by +Doxygen tags: + + * the MEOS C library function (lowercase snake, e.g. ``tdistance_tnumber_number``) + documents ``@csqlfn #()`` -> the PG wrapper it backs; + * the MobilityDB C wrapper (PascalCase + ``PG_FUNCTION_ARGS``, e.g. + ``Tdistance_tnumber_number`` -- Pascal-cased to avoid a symbol clash with the + linked MEOS symbol) documents ``@sqlfn ()`` and optionally + ``@sqlop @p `` -> the user-facing SQL name (lowerCamel, overloaded) + and operator. + +So the canonical SQL name + operator of every MEOS function is machine-derivable +from one source of truth, and bindings translate names from it instead of +hand-maintaining per-binding name maps. See docs/ECOSYSTEM_NAMING_POLICY.md. +""" +import re +from pathlib import Path + +# A Doxygen block `/** ... */` immediately followed by the declaration it +# documents, captured up to the first `(`. Pairing the block with the function +# RIGHT AFTER it (rather than a loose `.*?` that can cross into a neighbouring +# function) is what makes the @csqlfn/@sqlfn association correct. +_DOC_FN = re.compile(r"/\*\*(.*?)\*/[ \t]*\n(.*?)\(", re.S) +_IDENT = re.compile(r"[A-Za-z_]\w*") +_CSQLFN_TAG = re.compile(r"@csqlfn\s+#(\w+)") # first wrapper this MEOS fn backs +_SQLFN_TAG = re.compile(r"@sqlfn\s+(\w+)") +_SQLOP_TAG = re.compile(r"@sqlop\s+@p\s+(\S+)") + + +def _read(path: Path) -> str: + return path.read_text(encoding="utf-8", errors="ignore") + + +def _doc_fn_pairs(text: str): + """Yield (doc_text, function_name) for each Doxygen block immediately + followed by a function definition. The function name is the identifier just + before the `(` of that definition.""" + for m in _DOC_FN.finditer(text): + ids = _IDENT.findall(m.group(2)) + if ids: + yield m.group(1), ids[-1] + + +def extract_sql_chain(src_root: Path) -> dict: + """Return ``{meos_c_name: {"mobilitydb", "sql", "sqlop"}}`` for every function + whose name chain is documented in ``src_root`` (a MobilityDB checkout with + ``meos/src`` and ``mobilitydb/src``).""" + meos_src = src_root / "meos" / "src" + mdb_src = src_root / "mobilitydb" / "src" + if not meos_src.is_dir() or not mdb_src.is_dir(): + return {} + + # MEOS-C library name -> MobilityDB-C wrapper name (@csqlfn, in meos/src). + # The tag is ON the MEOS fn (lowercase), and MANY typed datum-hiding wrappers + # (tdistance_tint_int, tdistance_tfloat_float, in *_meos.c) back ONE wrapper + # (Tdistance_tnumber_number) -> key by the MEOS name to keep every variant. + # Block-based pairing (doc block + the function right after it) prevents + # mis-associating the tag with a neighbouring function. + meos_to_mdb: dict[str, str] = {} + for f in meos_src.rglob("*.c"): + for doc, fn in _doc_fn_pairs(_read(f)): + t = _CSQLFN_TAG.search(doc) + if t and fn[:1].islower(): + meos_to_mdb.setdefault(fn, t.group(1)) + + # MobilityDB-C wrapper name -> (SQL name, operator) (@sqlfn/@sqlop, in mobilitydb/src) + mdb_to_sql: dict[str, tuple[str, str | None]] = {} + for f in mdb_src.rglob("*.c"): + for doc, fn in _doc_fn_pairs(_read(f)): + s = _SQLFN_TAG.search(doc) + if s: + op = _SQLOP_TAG.search(doc) + mdb_to_sql.setdefault(fn, (s.group(1), op.group(1) if op else None)) + + chain: dict[str, dict] = {} + for meos, mdb in meos_to_mdb.items(): + sqlinfo = mdb_to_sql.get(mdb) + if sqlinfo: + sql, op = sqlinfo + entry = {"mobilitydb": mdb, "sql": sql} + if op: + entry["sqlop"] = op + chain[meos] = entry + return chain + + +def merge_sql_names(idl: dict, src_root: Path): + """Attach ``mobilitydb`` (PG wrapper), ``sql`` (SQL name) and ``sqlop`` to each + IDL function whose chain is documented. Returns ``(idl, n_attached)``. + No-op (returns 0) when the sources are unavailable.""" + chain = extract_sql_chain(src_root) + n = 0 + for fn in idl.get("functions", []): + c = chain.get(fn["name"]) + if c: + fn.update(c) + n += 1 + return idl, n diff --git a/run.py b/run.py index 8b505dd..3d0b74a 100644 --- a/run.py +++ b/run.py @@ -4,12 +4,16 @@ from parser.parser import parse_all_headers, merge_meta from parser.portable import attach_portable_aliases +from parser.sqlnames import merge_sql_names HEADERS_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("./meos/include") META_PATH = Path("./meta/meos-meta.json") PORTABLE_PATH = Path("./meta/portable-aliases.json") OUTPUT_DIR = Path("./output") +# Full MobilityDB checkout (meos/src + mobilitydb/src) carrying the Doxygen +# @csqlfn / @sqlfn / @sqlop tags that link the MEOS-C, MobilityDB-C and SQL names. +MDB_SRC = Path("./_mobilitydb") def main(): @@ -26,6 +30,12 @@ def main(): else: print(f"[2/3] No meta found at {META_PATH}, skipping.", file=sys.stderr) + # 2b. Attach the MEOS-C -> MobilityDB-C -> SQL name chain (+ operator) from the + # @csqlfn / @sqlfn / @sqlop Doxygen tags, so every binding derives its names + # from one source of truth instead of a hand-maintained per-binding map. + idl, sn = merge_sql_names(idl, MDB_SRC) + print(f" SQL name chain attached to {sn} functions", file=sys.stderr) + # 3. Attach the canonical portable bare-name mapping (codegen truth) print(f"[3/3] Attaching portable aliases from {PORTABLE_PATH}...", file=sys.stderr) diff --git a/setup.py b/setup.py index 6094cfe..0d9b447 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,10 @@ def step_clone(branch: str) -> None: REPO_URL, str(CLONE_DIR), ]) - run(["git", "-C", str(CLONE_DIR), "sparse-checkout", "set", "meos/include", "postgres"]) + # meos/include = public headers; meos/src + mobilitydb/src carry the + # @csqlfn / @sqlfn / @sqlop Doxygen tags for the SQL-name chain. + run(["git", "-C", str(CLONE_DIR), "sparse-checkout", "set", + "meos/include", "meos/src", "mobilitydb/src", "postgres"]) print(f" Done.") diff --git a/tests/test_sqlnames.py b/tests/test_sqlnames.py new file mode 100644 index 0000000..6973c41 --- /dev/null +++ b/tests/test_sqlnames.py @@ -0,0 +1,77 @@ +from pathlib import Path + +from parser.sqlnames import extract_sql_chain, merge_sql_names + + +def _write(p: Path, text: str): + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(text) + + +def test_typed_wrappers_resolve_datum_generic_does_not(tmp_path): + # Two typed datum-hiding wrappers (in *_meos.c) back one PG wrapper; the + # Datum-generic carries no @csqlfn -> only the wrappers get the chain. + _write(tmp_path / "meos/src/tnumber_distance_meos.c", """ +/** + * @csqlfn #Tdistance_tnumber_number() + */ +Temporal * +tdistance_tint_int(const Temporal *temp, int i) +{ return 0; } + +/** + * @csqlfn #Tdistance_tnumber_number() + */ +Temporal * +tdistance_tfloat_float(const Temporal *temp, double d) +{ return 0; } +""") + _write(tmp_path / "meos/src/tnumber_distance.c", """ +Temporal * +tdistance_tnumber_number(const Temporal *temp, Datum value) +{ return 0; } +""") + _write(tmp_path / "mobilitydb/src/tnumber_distance.c", """ +/** + * @sqlfn tDistance() + * @sqlop @p <-> + */ +Datum +Tdistance_tnumber_number(PG_FUNCTION_ARGS) +{ return 0; } +""") + chain = extract_sql_chain(tmp_path) + assert chain["tdistance_tint_int"] == { + "mobilitydb": "Tdistance_tnumber_number", "sql": "tDistance", "sqlop": "<->"} + assert chain["tdistance_tfloat_float"]["sql"] == "tDistance" + assert "tdistance_tnumber_number" not in chain # Datum-internal, no chain + + +def test_merge_attaches_to_matching_idl_functions(tmp_path): + _write(tmp_path / "meos/src/a_meos.c", """ +/** + * @csqlfn #Foo_bar() + */ +int +foo_bar_int(int i) +{ return 0; } +""") + _write(tmp_path / "mobilitydb/src/b.c", """ +/** + * @sqlfn fooBar() + */ +Datum +Foo_bar(PG_FUNCTION_ARGS) +{ return 0; } +""") + idl = {"functions": [{"name": "foo_bar_int"}, {"name": "unrelated"}]} + idl, n = merge_sql_names(idl, tmp_path) + assert n == 1 + assert idl["functions"][0]["sql"] == "fooBar" + assert "sql" not in idl["functions"][1] + + +def test_missing_sources_is_noop(tmp_path): + idl = {"functions": [{"name": "x"}]} + _, n = merge_sql_names(idl, tmp_path) + assert n == 0