diff --git a/apps/dev-playground/shared/appkit-types/analytics.d.ts b/apps/dev-playground/shared/appkit-types/analytics.d.ts index 43666dd06..52d64f2d1 100644 --- a/apps/dev-playground/shared/appkit-types/analytics.d.ts +++ b/apps/dev-playground/shared/appkit-types/analytics.d.ts @@ -105,7 +105,7 @@ declare module "@databricks/appkit-ui/react" { parameters: { /** STRING - use sql.string() */ stringParam: SQLStringMarker; - /** NUMERIC - use sql.number() */ + /** NUMERIC - use sql.numeric() */ numberParam: SQLNumberMarker; /** BOOLEAN - use sql.boolean() */ booleanParam: SQLBooleanMarker; diff --git a/docs/docs/api/appkit/Variable.sql.md b/docs/docs/api/appkit/Variable.sql.md index 1b42dcd0d..9750c9c57 100644 --- a/docs/docs/api/appkit/Variable.sql.md +++ b/docs/docs/api/appkit/Variable.sql.md @@ -2,10 +2,25 @@ ```ts const sql: { + bigint: SQLNumberMarker & { + __sql_type: "BIGINT"; + }; binary: SQLBinaryMarker; boolean: SQLBooleanMarker; date: SQLDateMarker; + double: SQLNumberMarker & { + __sql_type: "DOUBLE"; + }; + float: SQLNumberMarker & { + __sql_type: "FLOAT"; + }; + int: SQLNumberMarker & { + __sql_type: "INT"; + }; number: SQLNumberMarker; + numeric: SQLNumberMarker & { + __sql_type: "NUMERIC"; + }; string: SQLStringMarker; timestamp: SQLTimestampMarker; }; @@ -15,6 +30,43 @@ SQL helper namespace ## Type Declaration +### bigint() + +```ts +bigint(value: string | number | bigint): SQLNumberMarker & { + __sql_type: "BIGINT"; +}; +``` + +Creates a `BIGINT` (64-bit signed integer) parameter. Accepts JS +`bigint` so callers can round-trip values outside `Number.MAX_SAFE_INTEGER` +without precision loss; for `number` inputs, requires +`Number.isSafeInteger(value)`. + +Rejects values outside the signed 64-bit range `[-2^63, 2^63 - 1]`. + +#### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `value` | `string` \| `number` \| `bigint` | Integer number, bigint, or integer-shaped string | + +#### Returns + +`SQLNumberMarker` & \{ + `__sql_type`: `"BIGINT"`; +\} + +Marker pinned to `BIGINT` + +#### Example + +```typescript +sql.bigint(42); // { __sql_type: "BIGINT", value: "42" } +sql.bigint(9007199254740993n); // { __sql_type: "BIGINT", value: "9007199254740993" } +sql.bigint("9007199254740993"); // { __sql_type: "BIGINT", value: "9007199254740993" } +``` + ### binary() ```ts @@ -134,14 +186,133 @@ const params = { startDate: sql.date("2024-01-01") }; params = { startDate: "2024-01-01" } ``` +### double() + +```ts +double(value: string | number): SQLNumberMarker & { + __sql_type: "DOUBLE"; +}; +``` + +Creates a `DOUBLE` (double-precision, 64-bit) parameter. Same precision +as a JS `number`, so `sql.double(value)` is exact for any JS number. + +#### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `value` | `string` \| `number` | Number or numeric string | + +#### Returns + +`SQLNumberMarker` & \{ + `__sql_type`: `"DOUBLE"`; +\} + +Marker pinned to `DOUBLE` + +#### Example + +```typescript +sql.double(3.14); // { __sql_type: "DOUBLE", value: "3.14" } +``` + +### float() + +```ts +float(value: string | number): SQLNumberMarker & { + __sql_type: "FLOAT"; +}; +``` + +Creates a `FLOAT` (single-precision, 32-bit) parameter. Note that JS +numbers are 64-bit doubles, so values may be rounded to fit FLOAT +precision at bind time. + +#### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `value` | `string` \| `number` | Number or numeric string | + +#### Returns + +`SQLNumberMarker` & \{ + `__sql_type`: `"FLOAT"`; +\} + +Marker pinned to `FLOAT` + +#### Example + +```typescript +sql.float(3.14); // { __sql_type: "FLOAT", value: "3.14" } +``` + +### int() + +```ts +int(value: string | number): SQLNumberMarker & { + __sql_type: "INT"; +}; +``` + +Creates an `INT` (32-bit signed integer) parameter. Use when the column +or context requires `INT` specifically (e.g. legacy schemas, or to make +the wire type explicit). + +Rejects non-integers, values outside `Number.MAX_SAFE_INTEGER` (for +number inputs), and values outside the signed 32-bit range +`[-2^31, 2^31 - 1]`. + +#### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `value` | `string` \| `number` | Integer number or integer-shaped string | + +#### Returns + +`SQLNumberMarker` & \{ + `__sql_type`: `"INT"`; +\} + +Marker pinned to `INT` + +#### Example + +```typescript +sql.int(42); // { __sql_type: "INT", value: "42" } +sql.int("42"); // { __sql_type: "INT", value: "42" } +``` + ### number() ```ts number(value: string | number): SQLNumberMarker; ``` -Creates a NUMERIC type parameter -Accepts numbers or numeric strings +Creates a numeric type parameter. The wire SQL type is inferred from the +value so the parameter binds correctly in any context, including `LIMIT` +and `OFFSET`: + +- JS integer in `[-2^31, 2^31 - 1]` → `INT` +- JS integer outside `INT` but within `Number.MAX_SAFE_INTEGER` → `BIGINT` +- JS non-integer (`3.14`) → `DOUBLE` +- integer-shaped string in `INT` range → `INT` (common HTTP-input case) +- integer-shaped string outside `INT` but within `BIGINT` → `BIGINT` +- decimal-shaped string (`"123.45"`) → `NUMERIC` (preserves precision) + +Why default to `INT`? Spark's `LIMIT` and `OFFSET` operators require +`IntegerType` specifically — `BIGINT` (`LongType`) is rejected with +`INVALID_LIMIT_LIKE_EXPRESSION.DATA_TYPE`. Catalyst auto-widens `INT` +to `BIGINT` / `DECIMAL` / `DOUBLE` for wider columns, so `INT` is a +strictly better default than `BIGINT`. + +Throws on `NaN`, `Infinity`, JS integers outside `Number.MAX_SAFE_INTEGER`, +integer-shaped strings outside the `BIGINT` range, or non-numeric strings. +Reach for `sql.int()`, `sql.bigint()`, `sql.float()`, `sql.double()`, or +`sql.numeric()` to override the inferred type. #### Parameters @@ -153,18 +324,51 @@ Accepts numbers or numeric strings `SQLNumberMarker` -Marker object for NUMERIC type parameter +Marker for a numeric SQL parameter -#### Examples +#### Example ```typescript -const params = { userId: sql.number(123) }; -params = { userId: "123" } +sql.number(123); // { __sql_type: "INT", value: "123" } +sql.number(3_000_000_000); // { __sql_type: "BIGINT", value: "3000000000" } +sql.number(0.5); // { __sql_type: "DOUBLE", value: "0.5" } +sql.number("10"); // { __sql_type: "INT", value: "10" } +sql.number("123.45"); // { __sql_type: "NUMERIC", value: "123.45" } +``` + +### numeric() + +```ts +numeric(value: string | number): SQLNumberMarker & { + __sql_type: "NUMERIC"; +}; ``` +Creates a `NUMERIC` (fixed-point DECIMAL) parameter. Use when you need +exact decimal arithmetic (currency, percentages) — pass values as +strings to avoid JS-number precision loss. + +Note: passing a JS `number` is accepted but lossy for many values +(e.g. `0.1 + 0.2` → `"0.30000000000000004"`). Prefer strings. + +#### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `value` | `string` \| `number` | Number or numeric string (strings preferred for precision) | + +#### Returns + +`SQLNumberMarker` & \{ + `__sql_type`: `"NUMERIC"`; +\} + +Marker pinned to `NUMERIC` + +#### Example + ```typescript -const params = { userId: sql.number("123") }; -params = { userId: "123" } +sql.numeric("12345.6789"); // { __sql_type: "NUMERIC", value: "12345.6789" } ``` ### string() diff --git a/docs/docs/plugins/analytics.md b/docs/docs/plugins/analytics.md index 22204f529..9fd0b4a30 100644 --- a/docs/docs/plugins/analytics.md +++ b/docs/docs/plugins/analytics.md @@ -43,14 +43,23 @@ Use `:paramName` placeholders and optionally annotate parameter types using SQL ```sql -- @param startDate DATE -- @param endDate DATE --- @param limit NUMERIC +-- @param limit INT SELECT ... WHERE usage_date BETWEEN :startDate AND :endDate LIMIT :limit ``` +`LIMIT` / `OFFSET` require Spark `IntegerType` specifically — `BIGINT` +(`LongType`) is rejected with `INVALID_LIMIT_LIKE_EXPRESSION.DATA_TYPE`. +Annotate with `INT`, or use `sql.number()` (auto-infers `INT` for values in +`[-2^31, 2^31-1]`, falling back to `BIGINT` for wider values) / `sql.int()` +at the call site. + **Supported `-- @param` types** (case-insensitive): -- `STRING`, `NUMERIC`, `BOOLEAN`, `DATE`, `TIMESTAMP`, `BINARY` +- `STRING`, `BOOLEAN`, `DATE`, `TIMESTAMP`, `BINARY` +- `INT`, `BIGINT`, `TINYINT`, `SMALLINT` — bind via `sql.int()` / `sql.bigint()` +- `FLOAT`, `DOUBLE` — bind via `sql.float()` / `sql.double()` +- `NUMERIC`, `DECIMAL` — bind via `sql.numeric()` (pass strings for precision) ## Server-injected parameters diff --git a/packages/appkit/src/plugins/analytics/tests/query.test.ts b/packages/appkit/src/plugins/analytics/tests/query.test.ts index 7840b2526..bbf0a2ce4 100644 --- a/packages/appkit/src/plugins/analytics/tests/query.test.ts +++ b/packages/appkit/src/plugins/analytics/tests/query.test.ts @@ -32,7 +32,7 @@ describe("QueryProcessor", () => { expect(result.statement).toBe(query); expect(result.parameters).toHaveLength(2); expect(result.parameters).toEqual([ - { name: "user_id", value: "123", type: "NUMERIC" }, + { name: "user_id", value: "123", type: "INT" }, { name: "name", value: "Alice", type: "STRING" }, ]); }); @@ -167,17 +167,97 @@ describe("QueryProcessor", () => { test("should not override workspace_id if already provided", async () => { const query = "SELECT * FROM data WHERE workspace_id = :workspaceId"; + // 9876543210 exceeds INT_MAX (2^31 - 1) so inference falls through to + // BIGINT — appropriate for ID columns. const parameters = { workspaceId: sql.number("9876543210") }; const result = await processor.processQueryParams(query, parameters); expect(result.workspaceId).toEqual({ - __sql_type: "NUMERIC", + __sql_type: "BIGINT", value: "9876543210", }); }); }); + describe("LIMIT / OFFSET bindings (regression for #323)", () => { + // Spark requires IntegerType for LIMIT/OFFSET; BIGINT/LongType is + // rejected with INVALID_LIMIT_LIKE_EXPRESSION.DATA_TYPE. These tests + // pin INT inference so sql.number(req.query.n) works against the + // warehouse without explicit casting. + // + // These tests are MOCKED — they assert the wire-type string the + // helper emits, not warehouse round-trip behaviour. To re-validate + // that the mocked assertions still match production Spark semantics: + // + // 1. Pick any RUNNING SQL Warehouse you can reach + // (`databricks warehouses list -p ` and grep for RUNNING). + // 2. POST /api/2.0/sql/statements with the helper's wire-type strings + // directly, using the same VALUES-based query so no table is + // required: + // + // databricks api post /api/2.0/sql/statements --json '{ + // "statement": "SELECT x FROM (VALUES (1),(2),(3),(4),(5)) AS t(x) ORDER BY x LIMIT :n OFFSET :m", + // "warehouse_id": "", + // "wait_timeout": "30s", + // "parameters": [ + // {"name": "n", "value": "2", "type": "INT"}, + // {"name": "m", "value": "1", "type": "INT"} + // ] + // }' + // + // 3. Expect: `status.state == "SUCCEEDED"`, `result.row_count == 2`. + // 4. Swap both parameter `type` values to `"BIGINT"` and re-run. + // Expect: `status.state == "FAILED"`, error message + // `[INVALID_LIMIT_LIKE_EXPRESSION.DATA_TYPE] ... must be integer + // type, but got "BIGINT". SQLSTATE: 42K0E`. + // + // If (3) fails or (4) starts succeeding, Spark's LIMIT type contract + // has changed and the INT-by-default inference should be re-evaluated. + test("sql.number(integer) binds as INT for LIMIT/OFFSET", () => { + const query = "SELECT * FROM events LIMIT :n OFFSET :m"; + const parameters = { + n: sql.number(10), + m: sql.number(20), + }; + + const result = processor.convertToSQLParameters(query, parameters); + + expect(result.parameters).toEqual([ + { name: "n", value: "10", type: "INT" }, + { name: "m", value: "20", type: "INT" }, + ]); + }); + + test("sql.number(integer-shaped string) binds as INT for LIMIT/OFFSET", () => { + // Express/URLSearchParams return strings — this is the common + // handler pattern: sql.number(req.query.n). + const query = "SELECT * FROM events LIMIT :n OFFSET :m"; + const parameters = { + n: sql.number("10"), + m: sql.number("20"), + }; + + const result = processor.convertToSQLParameters(query, parameters); + + expect(result.parameters).toEqual([ + { name: "n", value: "10", type: "INT" }, + { name: "m", value: "20", type: "INT" }, + ]); + }); + + test("sql.int(string) binds as INT for LIMIT/OFFSET (explicit form)", () => { + const query = "SELECT * FROM events LIMIT :n"; + const parameters = { n: sql.int("10") }; + + const result = processor.convertToSQLParameters(query, parameters); + + expect(result.parameters).toEqual([ + { name: "n", value: "10", type: "INT" }, + ]); + }); + }); + describe("_createParameter - Type Handling", () => { test("should handle date parameters with sql.date()", () => { const query = "SELECT * FROM events WHERE event_date = :startDate"; @@ -229,7 +309,7 @@ describe("QueryProcessor", () => { expect(result.parameters[0]).toEqual({ name: "age", value: "25", - type: "NUMERIC", + type: "INT", }); }); diff --git a/packages/appkit/src/type-generator/query-registry.ts b/packages/appkit/src/type-generator/query-registry.ts index 196690c2d..06ee64bac 100644 --- a/packages/appkit/src/type-generator/query-registry.ts +++ b/packages/appkit/src/type-generator/query-registry.ts @@ -193,8 +193,10 @@ function generateUnknownResultQuery(sql: string, queryName: string): string { export function extractParameterTypes(sql: string): Record { const paramTypes: Record = {}; + // Alternation order matters: TIMESTAMP_NTZ must precede TIMESTAMP so the + // regex engine doesn't greedy-match TIMESTAMP and leave `_NTZ` unconsumed. const regex = - /--\s*@param\s+(\w+)\s+(STRING|NUMERIC|BOOLEAN|DATE|TIMESTAMP|BINARY)/gi; + /--\s*@param\s+(\w+)\s+(STRING|NUMERIC|DECIMAL|BIGINT|TINYINT|SMALLINT|INT|FLOAT|DOUBLE|BOOLEAN|DATE|TIMESTAMP_NTZ|TIMESTAMP|BINARY)\b/gi; const matches = sql.matchAll(regex); for (const match of matches) { const [, paramName, paramType] = match; @@ -207,7 +209,15 @@ export function extractParameterTypes(sql: string): Record { export function defaultForType(sqlType: string | undefined): string { switch (sqlType?.toUpperCase()) { case "NUMERIC": + case "DECIMAL": + case "BIGINT": + case "TINYINT": + case "SMALLINT": + case "INT": return "0"; + case "FLOAT": + case "DOUBLE": + return "0.0"; case "STRING": return "''"; case "BOOLEAN": @@ -216,6 +226,8 @@ export function defaultForType(sqlType: string | undefined): string { return "'2000-01-01'"; case "TIMESTAMP": return "'2000-01-01T00:00:00Z'"; + case "TIMESTAMP_NTZ": + return "'2000-01-01T00:00:00'"; case "BINARY": return "X'00'"; default: diff --git a/packages/appkit/src/type-generator/tests/query-registry.test.ts b/packages/appkit/src/type-generator/tests/query-registry.test.ts index 8d46f98e9..b149d5bbe 100644 --- a/packages/appkit/src/type-generator/tests/query-registry.test.ts +++ b/packages/appkit/src/type-generator/tests/query-registry.test.ts @@ -148,21 +148,47 @@ SELECT * FROM users WHERE date BETWEEN :startDate AND :endDate`; test("handles all supported types", () => { const sql = `-- @param str STRING -- @param num NUMERIC +-- @param dec DECIMAL +-- @param i INT +-- @param big BIGINT +-- @param tiny TINYINT +-- @param small SMALLINT +-- @param f FLOAT +-- @param d DOUBLE -- @param bool BOOLEAN -- @param dt DATE -- @param ts TIMESTAMP +-- @param tsNtz TIMESTAMP_NTZ -- @param bin BINARY SELECT 1`; const types = extractParameterTypes(sql); expect(types.str).toBe("STRING"); expect(types.num).toBe("NUMERIC"); + expect(types.dec).toBe("DECIMAL"); + expect(types.i).toBe("INT"); + expect(types.big).toBe("BIGINT"); + expect(types.tiny).toBe("TINYINT"); + expect(types.small).toBe("SMALLINT"); + expect(types.f).toBe("FLOAT"); + expect(types.d).toBe("DOUBLE"); expect(types.bool).toBe("BOOLEAN"); expect(types.dt).toBe("DATE"); expect(types.ts).toBe("TIMESTAMP"); + expect(types.tsNtz).toBe("TIMESTAMP_NTZ"); expect(types.bin).toBe("BINARY"); }); + test("TIMESTAMP_NTZ is not partially matched as TIMESTAMP", () => { + // Regression: the alternation TIMESTAMP_NTZ must come before TIMESTAMP + // (and end with a word boundary) so the regex engine doesn't capture + // `TIMESTAMP` and leave `_NTZ` unconsumed. + const sql = `-- @param eventTs TIMESTAMP_NTZ +SELECT 1`; + const types = extractParameterTypes(sql); + expect(types.eventTs).toBe("TIMESTAMP_NTZ"); + }); + test("ignores malformed @param comments", () => { const sql = `-- @param startDate -- @param INVALID @@ -213,6 +239,23 @@ describe("defaultForType", () => { expect(defaultForType("BINARY")).toBe("X'00'"); }); + test("returns '0' for integer aliases (INT/BIGINT/TINYINT/SMALLINT/DECIMAL)", () => { + expect(defaultForType("INT")).toBe("0"); + expect(defaultForType("BIGINT")).toBe("0"); + expect(defaultForType("TINYINT")).toBe("0"); + expect(defaultForType("SMALLINT")).toBe("0"); + expect(defaultForType("DECIMAL")).toBe("0"); + }); + + test("returns '0.0' for FLOAT and DOUBLE", () => { + expect(defaultForType("FLOAT")).toBe("0.0"); + expect(defaultForType("DOUBLE")).toBe("0.0"); + }); + + test("returns NTZ-shaped literal for TIMESTAMP_NTZ", () => { + expect(defaultForType("TIMESTAMP_NTZ")).toBe("'2000-01-01T00:00:00'"); + }); + test("returns empty string literal for undefined (unknown fallback)", () => { expect(defaultForType(undefined)).toBe("''"); }); diff --git a/packages/appkit/src/type-generator/types.ts b/packages/appkit/src/type-generator/types.ts index 5af43591a..f54176a8c 100644 --- a/packages/appkit/src/type-generator/types.ts +++ b/packages/appkit/src/type-generator/types.ts @@ -50,15 +50,17 @@ export const sqlTypeToHelper: Record = { BINARY: "sql.binary()", // boolean BOOLEAN: "sql.boolean()", - // numeric - NUMERIC: "sql.number()", - INT: "sql.number()", - BIGINT: "sql.number()", - TINYINT: "sql.number()", - SMALLINT: "sql.number()", - FLOAT: "sql.number()", - DOUBLE: "sql.number()", - DECIMAL: "sql.number()", + // numeric — route each SQL type to its closest typed helper. INT/BIGINT + // are critical for LIMIT/OFFSET; FLOAT/DOUBLE preserve precision intent; + // NUMERIC/DECIMAL route to sql.numeric() for exact-decimal columns. + NUMERIC: "sql.numeric()", + DECIMAL: "sql.numeric()", + BIGINT: "sql.bigint()", + INT: "sql.int()", + TINYINT: "sql.int()", + SMALLINT: "sql.int()", + FLOAT: "sql.float()", + DOUBLE: "sql.double()", // date/time DATE: "sql.date()", TIMESTAMP: "sql.timestamp()", diff --git a/packages/shared/src/sql/helpers.ts b/packages/shared/src/sql/helpers.ts index 85b39520f..e130de1c2 100644 --- a/packages/shared/src/sql/helpers.ts +++ b/packages/shared/src/sql/helpers.ts @@ -8,6 +8,94 @@ import type { SQLTypeMarker, } from "./types"; +// Strict numeric-literal regex used by string-input paths. Rejects empty +// strings, whitespace, hex/octal/binary, `NaN`, `Infinity`, and other forms +// that JS `Number()` would silently coerce. +const NUMERIC_LITERAL_RE = /^-?(\d+\.?\d*|\.\d+)([eE][+-]?\d+)?$/; +const INTEGER_LITERAL_RE = /^-?\d+$/; + +// 32-bit signed INT range +const INT_MIN = -(2n ** 31n); +const INT_MAX = 2n ** 31n - 1n; +// 64-bit signed BIGINT range +const BIGINT_MIN = -(2n ** 63n); +const BIGINT_MAX = 2n ** 63n - 1n; + +function ensureFiniteNumber(value: number, fnName: string): void { + if (!Number.isFinite(value)) { + throw new Error(`${fnName}() expects a finite number, got: ${value}`); + } +} + +function ensureSafeInteger(value: number, fnName: string): void { + if (!Number.isSafeInteger(value)) { + throw new Error( + `${fnName}() received an integer outside Number.MAX_SAFE_INTEGER ` + + `(${value}); JS numbers cannot represent it exactly. ` + + `Pass a bigint (sql.bigint(BigInt("..."))) or an integer-shaped string instead.`, + ); + } +} + +function ensureInBigIntRange( + parsed: bigint, + min: bigint, + max: bigint, + typeName: string, + fnName: string, + hint: string, +): void { + if (parsed < min || parsed > max) { + throw new Error( + `${fnName}() value ${parsed} is outside ${typeName} range [${min}, ${max}]. ${hint}`, + ); + } +} + +function coerceNumericLike(value: number | string, fnName: string): string { + if (typeof value === "number") { + ensureFiniteNumber(value, fnName); + return value.toString(); + } + if (typeof value === "string") { + if (!NUMERIC_LITERAL_RE.test(value)) { + throw new Error( + `${fnName}() expects number or numeric string, got: ${value === "" ? "empty string" : value}`, + ); + } + return value; + } + throw new Error( + `${fnName}() expects number or numeric string, got: ${typeof value}`, + ); +} + +function coerceIntegerLike(value: number | string, fnName: string): string { + if (typeof value === "number") { + ensureFiniteNumber(value, fnName); + if (!Number.isInteger(value)) { + throw new Error( + `${fnName}() expects an integer, got non-integer number: ${value}`, + ); + } + ensureSafeInteger(value, fnName); + // BigInt(value).toString() emits canonical decimal-integer text; + // Number.prototype.toString emits exponent notation for values like 1e21. + return BigInt(value).toString(); + } + if (typeof value === "string") { + if (!INTEGER_LITERAL_RE.test(value)) { + throw new Error( + `${fnName}() expects integer number or integer-shaped string, got: ${value === "" ? "empty string" : value}`, + ); + } + return value; + } + throw new Error( + `${fnName}() expects integer number or integer-shaped string, got: ${typeof value}`, + ); +} + /** * SQL helper namespace */ @@ -109,47 +197,217 @@ export const sql = { }, /** - * Creates a NUMERIC type parameter - * Accepts numbers or numeric strings + * Creates a numeric type parameter. The wire SQL type is inferred from the + * value so the parameter binds correctly in any context, including `LIMIT` + * and `OFFSET`: + * + * - JS integer in `[-2^31, 2^31 - 1]` → `INT` + * - JS integer outside `INT` but within `Number.MAX_SAFE_INTEGER` → `BIGINT` + * - JS non-integer (`3.14`) → `DOUBLE` + * - integer-shaped string in `INT` range → `INT` (common HTTP-input case) + * - integer-shaped string outside `INT` but within `BIGINT` → `BIGINT` + * - decimal-shaped string (`"123.45"`) → `NUMERIC` (preserves precision) + * + * Why default to `INT`? Spark's `LIMIT` and `OFFSET` operators require + * `IntegerType` specifically — `BIGINT` (`LongType`) is rejected with + * `INVALID_LIMIT_LIKE_EXPRESSION.DATA_TYPE`. Catalyst auto-widens `INT` + * to `BIGINT` / `DECIMAL` / `DOUBLE` for wider columns, so `INT` is a + * strictly better default than `BIGINT`. + * + * Throws on `NaN`, `Infinity`, JS integers outside `Number.MAX_SAFE_INTEGER`, + * integer-shaped strings outside the `BIGINT` range, or non-numeric strings. + * Reach for `sql.int()`, `sql.bigint()`, `sql.float()`, `sql.double()`, or + * `sql.numeric()` to override the inferred type. + * * @param value - Number or numeric string - * @returns Marker object for NUMERIC type parameter - * @example - * ```typescript - * const params = { userId: sql.number(123) }; - * params = { userId: "123" } - * ``` + * @returns Marker for a numeric SQL parameter * @example * ```typescript - * const params = { userId: sql.number("123") }; - * params = { userId: "123" } + * sql.number(123); // { __sql_type: "INT", value: "123" } + * sql.number(3_000_000_000); // { __sql_type: "BIGINT", value: "3000000000" } + * sql.number(0.5); // { __sql_type: "DOUBLE", value: "0.5" } + * sql.number("10"); // { __sql_type: "INT", value: "10" } + * sql.number("123.45"); // { __sql_type: "NUMERIC", value: "123.45" } * ``` */ number(value: number | string): SQLNumberMarker { - let numValue: string = ""; - - // check if value is a number if (typeof value === "number") { - numValue = value.toString(); + ensureFiniteNumber(value, "sql.number"); + if (Number.isInteger(value)) { + ensureSafeInteger(value, "sql.number"); + const asBigInt = BigInt(value); + // INT (32-bit) is required by Spark for LIMIT/OFFSET; Catalyst + // widens INT → BIGINT/DECIMAL/DOUBLE automatically. + if (asBigInt >= INT_MIN && asBigInt <= INT_MAX) { + return { __sql_type: "INT", value: asBigInt.toString() }; + } + return { __sql_type: "BIGINT", value: asBigInt.toString() }; + } + return { __sql_type: "DOUBLE", value: value.toString() }; } - // check if value is a string - else if (typeof value === "string") { - if (value === "" || Number.isNaN(Number(value))) { + if (typeof value === "string") { + if (!NUMERIC_LITERAL_RE.test(value)) { throw new Error( `sql.number() expects number or numeric string, got: ${value === "" ? "empty string" : value}`, ); } - numValue = value; + // Integer-shaped strings get the same INT-preferring inference, so + // `sql.number(req.query.n)` (Express/URLSearchParams strings) works + // with LIMIT/OFFSET out of the box. Out-of-BIGINT-range throws — + // sql.numeric() is the right helper for arbitrary-precision integers. + if (INTEGER_LITERAL_RE.test(value)) { + const parsed = BigInt(value); + ensureInBigIntRange( + parsed, + BIGINT_MIN, + BIGINT_MAX, + "BIGINT (64-bit signed)", + "sql.number", + "Use sql.numeric() with a string for arbitrary-precision integers.", + ); + if (parsed >= INT_MIN && parsed <= INT_MAX) { + return { __sql_type: "INT", value }; + } + return { __sql_type: "BIGINT", value }; + } + // Non-integer strings stay NUMERIC: the caller chose to pass a string, + // honour their precision intent rather than coercing through JS number. + return { __sql_type: "NUMERIC", value }; } - // if value is not a number or string, throw an error - else { - throw new Error( - `sql.number() expects number or numeric string, got: ${typeof value}`, + throw new Error( + `sql.number() expects number or numeric string, got: ${typeof value}`, + ); + }, + + /** + * Creates an `INT` (32-bit signed integer) parameter. Use when the column + * or context requires `INT` specifically (e.g. legacy schemas, or to make + * the wire type explicit). + * + * Rejects non-integers, values outside `Number.MAX_SAFE_INTEGER` (for + * number inputs), and values outside the signed 32-bit range + * `[-2^31, 2^31 - 1]`. + * + * @param value - Integer number or integer-shaped string + * @returns Marker pinned to `INT` + * @example + * ```typescript + * sql.int(42); // { __sql_type: "INT", value: "42" } + * sql.int("42"); // { __sql_type: "INT", value: "42" } + * ``` + */ + int(value: number | string): SQLNumberMarker & { __sql_type: "INT" } { + const stringValue = coerceIntegerLike(value, "sql.int"); + ensureInBigIntRange( + BigInt(stringValue), + INT_MIN, + INT_MAX, + "INT (32-bit signed)", + "sql.int", + "Use sql.bigint() for 64-bit values.", + ); + return { __sql_type: "INT", value: stringValue }; + }, + + /** + * Creates a `BIGINT` (64-bit signed integer) parameter. Accepts JS + * `bigint` so callers can round-trip values outside `Number.MAX_SAFE_INTEGER` + * without precision loss; for `number` inputs, requires + * `Number.isSafeInteger(value)`. + * + * Rejects values outside the signed 64-bit range `[-2^63, 2^63 - 1]`. + * + * @param value - Integer number, bigint, or integer-shaped string + * @returns Marker pinned to `BIGINT` + * @example + * ```typescript + * sql.bigint(42); // { __sql_type: "BIGINT", value: "42" } + * sql.bigint(9007199254740993n); // { __sql_type: "BIGINT", value: "9007199254740993" } + * sql.bigint("9007199254740993"); // { __sql_type: "BIGINT", value: "9007199254740993" } + * ``` + */ + bigint( + value: number | bigint | string, + ): SQLNumberMarker & { __sql_type: "BIGINT" } { + if (typeof value === "bigint") { + ensureInBigIntRange( + value, + BIGINT_MIN, + BIGINT_MAX, + "BIGINT (64-bit signed)", + "sql.bigint", + "Use sql.numeric() with a string for arbitrary-precision integers.", ); + return { __sql_type: "BIGINT", value: value.toString() }; } + const stringValue = coerceIntegerLike(value, "sql.bigint"); + ensureInBigIntRange( + BigInt(stringValue), + BIGINT_MIN, + BIGINT_MAX, + "BIGINT (64-bit signed)", + "sql.bigint", + "Use sql.numeric() with a string for arbitrary-precision integers.", + ); + return { __sql_type: "BIGINT", value: stringValue }; + }, + + /** + * Creates a `FLOAT` (single-precision, 32-bit) parameter. Note that JS + * numbers are 64-bit doubles, so values may be rounded to fit FLOAT + * precision at bind time. + * + * @param value - Number or numeric string + * @returns Marker pinned to `FLOAT` + * @example + * ```typescript + * sql.float(3.14); // { __sql_type: "FLOAT", value: "3.14" } + * ``` + */ + float(value: number | string): SQLNumberMarker & { __sql_type: "FLOAT" } { + return { + __sql_type: "FLOAT", + value: coerceNumericLike(value, "sql.float"), + }; + }, + /** + * Creates a `DOUBLE` (double-precision, 64-bit) parameter. Same precision + * as a JS `number`, so `sql.double(value)` is exact for any JS number. + * + * @param value - Number or numeric string + * @returns Marker pinned to `DOUBLE` + * @example + * ```typescript + * sql.double(3.14); // { __sql_type: "DOUBLE", value: "3.14" } + * ``` + */ + double(value: number | string): SQLNumberMarker & { __sql_type: "DOUBLE" } { + return { + __sql_type: "DOUBLE", + value: coerceNumericLike(value, "sql.double"), + }; + }, + + /** + * Creates a `NUMERIC` (fixed-point DECIMAL) parameter. Use when you need + * exact decimal arithmetic (currency, percentages) — pass values as + * strings to avoid JS-number precision loss. + * + * Note: passing a JS `number` is accepted but lossy for many values + * (e.g. `0.1 + 0.2` → `"0.30000000000000004"`). Prefer strings. + * + * @param value - Number or numeric string (strings preferred for precision) + * @returns Marker pinned to `NUMERIC` + * @example + * ```typescript + * sql.numeric("12345.6789"); // { __sql_type: "NUMERIC", value: "12345.6789" } + * ``` + */ + numeric(value: number | string): SQLNumberMarker & { __sql_type: "NUMERIC" } { return { __sql_type: "NUMERIC", - value: numValue, + value: coerceNumericLike(value, "sql.numeric"), }; }, diff --git a/packages/shared/src/sql/tests/sql-helpers.test.ts b/packages/shared/src/sql/tests/sql-helpers.test.ts index 9b62f4831..e35dacb07 100644 --- a/packages/shared/src/sql/tests/sql-helpers.test.ts +++ b/packages/shared/src/sql/tests/sql-helpers.test.ts @@ -37,31 +37,141 @@ describe("SQL Helpers", () => { }); describe("number()", () => { - it("should create a NUMERIC type parameter from a number", () => { - const number = 1234567890; - const result = sql.number(number); + it("should bind a JS integer in INT range as INT (works with Spark LIMIT/OFFSET)", () => { + // Spark requires IntegerType for LIMIT/OFFSET; BIGINT/LongType is + // rejected with INVALID_LIMIT_LIKE_EXPRESSION.DATA_TYPE. INT is + // auto-widened to BIGINT/DECIMAL/DOUBLE by Catalyst for wider columns. + const result = sql.number(1234567890); expect(result).toEqual({ - __sql_type: "NUMERIC", + __sql_type: "INT", value: "1234567890", }); }); - it("should create a NUMERIC type parameter from a numeric string", () => { - const number = "1234567890"; - const result = sql.number(number); + it("should bind a JS integer outside INT range as BIGINT", () => { + const result = sql.number(3_000_000_000); expect(result).toEqual({ - __sql_type: "NUMERIC", + __sql_type: "BIGINT", + value: "3000000000", + }); + }); + + it("should bind INT boundaries correctly", () => { + expect(sql.number(2147483647)).toEqual({ + __sql_type: "INT", + value: "2147483647", + }); + expect(sql.number(-2147483648)).toEqual({ + __sql_type: "INT", + value: "-2147483648", + }); + // Just past INT_MAX → BIGINT + expect(sql.number(2147483648)).toEqual({ + __sql_type: "BIGINT", + value: "2147483648", + }); + expect(sql.number(-2147483649)).toEqual({ + __sql_type: "BIGINT", + value: "-2147483649", + }); + }); + + it("should bind a JS non-integer as DOUBLE", () => { + const result = sql.number(3.14); + expect(result).toEqual({ + __sql_type: "DOUBLE", + value: "3.14", + }); + }); + + it("should bind an integer-shaped string in INT range as INT (HTTP-input case)", () => { + // Express/URLSearchParams return strings; common pattern is + // sql.number(req.query.n) which must work with Spark LIMIT/OFFSET. + const result = sql.number("1234567890"); + expect(result).toEqual({ + __sql_type: "INT", value: "1234567890", }); }); - it("should reject non-numeric string", () => { - const number = "hello"; - expect(() => sql.number(number as any)).toThrow( - "sql.number() expects number or numeric string, got: hello", + it("should bind an integer-shaped string outside INT range as BIGINT", () => { + const result = sql.number("3000000000"); + expect(result).toEqual({ + __sql_type: "BIGINT", + value: "3000000000", + }); + }); + + it("should accept BIGINT-boundary integer strings", () => { + expect(sql.number("9223372036854775807")).toEqual({ + __sql_type: "BIGINT", + value: "9223372036854775807", + }); + expect(sql.number("-9223372036854775808")).toEqual({ + __sql_type: "BIGINT", + value: "-9223372036854775808", + }); + }); + + it("should reject integer strings outside 64-bit signed range", () => { + // String input bypasses Number.MAX_SAFE_INTEGER guards, but the + // BIGINT wire type still cannot hold values outside 2^63. + expect(() => sql.number("9223372036854775808")).toThrow( + /BIGINT \(64-bit signed\) range/, ); + expect(() => sql.number("-9223372036854775809")).toThrow( + /BIGINT \(64-bit signed\) range/, + ); + }); + + it("should bind decimal-shaped strings as NUMERIC (preserve precision)", () => { + const result = sql.number("123.4500000000001"); + expect(result).toEqual({ + __sql_type: "NUMERIC", + value: "123.4500000000001", + }); + }); + + it("should reject JS integers outside Number.MAX_SAFE_INTEGER", () => { + // 9007199254740993 is MAX_SAFE_INTEGER + 2 and cannot be represented + // exactly as a JS number. The marker would advertise BIGINT but the + // value is already wrong before the helper runs. + expect(() => sql.number(Number.MAX_SAFE_INTEGER + 2)).toThrow( + /outside Number\.MAX_SAFE_INTEGER/, + ); + }); + + it("should reject Infinity / -Infinity / NaN", () => { + expect(() => sql.number(Number.POSITIVE_INFINITY)).toThrow( + /finite number/, + ); + expect(() => sql.number(Number.NEGATIVE_INFINITY)).toThrow( + /finite number/, + ); + expect(() => sql.number(Number.NaN)).toThrow(/finite number/); + }); + + it("should emit canonical decimal text (no exponent) for large safe integers", () => { + // Sanity check: even though Number.prototype.toString could emit + // exponent form for very large integers, the helper always emits + // decimal text via BigInt(value).toString(). 1e15 is outside INT + // range, so the wire type is BIGINT. + const result = sql.number(1e15); + expect(result).toEqual({ + __sql_type: "BIGINT", + value: "1000000000000000", + }); }); + it.each([["NaN"], ["Infinity"], ["0x10"], [" "], ["hello"]])( + "should reject non-numeric string %s", + (input) => { + expect(() => sql.number(input as any)).toThrow( + /expects number or numeric string/, + ); + }, + ); + it("should reject empty string", () => { expect(() => sql.number("")).toThrow( "sql.number() expects number or numeric string, got: empty string", @@ -69,13 +179,145 @@ describe("SQL Helpers", () => { }); it("should reject boolean value", () => { - const number = true; - expect(() => sql.number(number as any)).toThrow( + expect(() => sql.number(true as any)).toThrow( "sql.number() expects number or numeric string, got: boolean", ); }); }); + describe("int() / bigint() / float() / double() / numeric()", () => { + it("sql.int() should produce INT", () => { + expect(sql.int(42)).toEqual({ __sql_type: "INT", value: "42" }); + expect(sql.int("42")).toEqual({ __sql_type: "INT", value: "42" }); + }); + + it("sql.int() should reject non-integers", () => { + expect(() => sql.int(3.14)).toThrow( + "sql.int() expects an integer, got non-integer number: 3.14", + ); + expect(() => sql.int("3.14")).toThrow( + "sql.int() expects integer number or integer-shaped string, got: 3.14", + ); + }); + + it("sql.int() should reject values outside 32-bit signed range", () => { + // 2^31 is just outside INT_MAX + expect(() => sql.int(2147483648)).toThrow(/INT \(32-bit signed\) range/); + expect(() => sql.int(-2147483649)).toThrow(/INT \(32-bit signed\) range/); + // string-shaped out-of-range value + expect(() => sql.int("9999999999999999999")).toThrow( + /INT \(32-bit signed\) range/, + ); + }); + + it("sql.int() should accept the INT boundaries", () => { + expect(sql.int(2147483647)).toEqual({ + __sql_type: "INT", + value: "2147483647", + }); + expect(sql.int(-2147483648)).toEqual({ + __sql_type: "INT", + value: "-2147483648", + }); + }); + + it("sql.bigint() should produce BIGINT and accept JS bigint", () => { + expect(sql.bigint(42)).toEqual({ __sql_type: "BIGINT", value: "42" }); + expect(sql.bigint("9007199254740993")).toEqual({ + __sql_type: "BIGINT", + value: "9007199254740993", + }); + expect(sql.bigint(9007199254740993n)).toEqual({ + __sql_type: "BIGINT", + value: "9007199254740993", + }); + }); + + it("sql.bigint(number) should reject values outside Number.MAX_SAFE_INTEGER", () => { + expect(() => sql.bigint(Number.MAX_SAFE_INTEGER + 2)).toThrow( + /outside Number\.MAX_SAFE_INTEGER/, + ); + }); + + it("sql.bigint(bigint) should reject values outside 64-bit signed range", () => { + expect(() => sql.bigint(2n ** 63n)).toThrow( + /BIGINT \(64-bit signed\) range/, + ); + expect(() => sql.bigint(-(2n ** 63n) - 1n)).toThrow( + /BIGINT \(64-bit signed\) range/, + ); + }); + + it("sql.bigint() should accept the BIGINT boundaries", () => { + expect(sql.bigint(2n ** 63n - 1n)).toEqual({ + __sql_type: "BIGINT", + value: "9223372036854775807", + }); + expect(sql.bigint(-(2n ** 63n))).toEqual({ + __sql_type: "BIGINT", + value: "-9223372036854775808", + }); + }); + + it("sql.float() should produce FLOAT", () => { + expect(sql.float(3.14)).toEqual({ __sql_type: "FLOAT", value: "3.14" }); + expect(sql.float("3.14")).toEqual({ + __sql_type: "FLOAT", + value: "3.14", + }); + }); + + it("sql.float() should reject non-finite and non-numeric inputs", () => { + expect(() => sql.float(Number.POSITIVE_INFINITY)).toThrow( + /finite number/, + ); + expect(() => sql.float("hello" as any)).toThrow( + /expects number or numeric string/, + ); + }); + + it("sql.double() should produce DOUBLE", () => { + expect(sql.double(3.14)).toEqual({ + __sql_type: "DOUBLE", + value: "3.14", + }); + expect(sql.double("3.14")).toEqual({ + __sql_type: "DOUBLE", + value: "3.14", + }); + }); + + it("sql.double() should reject non-finite and non-numeric inputs", () => { + expect(() => sql.double(Number.NaN)).toThrow(/finite number/); + expect(() => sql.double("0x10" as any)).toThrow( + /expects number or numeric string/, + ); + }); + + it("sql.numeric() should produce NUMERIC from a string", () => { + expect(sql.numeric("12345.6789")).toEqual({ + __sql_type: "NUMERIC", + value: "12345.6789", + }); + }); + + it("sql.numeric(number) is lossy by design — caller is warned via docstring", () => { + // Regression test: passing a JS number to sql.numeric serialises with + // JS-double precision. This pins the behaviour the docstring warns + // about so the precision-loss caveat is visible in the test suite. + expect(sql.numeric(0.1 + 0.2)).toEqual({ + __sql_type: "NUMERIC", + value: "0.30000000000000004", + }); + }); + + it("sql.numeric() should reject non-numeric strings", () => { + expect(() => sql.numeric("hello" as any)).toThrow( + /expects number or numeric string/, + ); + }); + }); + describe("string()", () => { it("should create a STRING type parameter from a string", () => { const string = "Hello, world!"; diff --git a/packages/shared/src/sql/types.ts b/packages/shared/src/sql/types.ts index e2dabcbdf..8d94b5358 100644 --- a/packages/shared/src/sql/types.ts +++ b/packages/shared/src/sql/types.ts @@ -3,8 +3,20 @@ export interface SQLStringMarker { value: string; } +/** + * SQL numeric parameter marker. The wire type controls how Databricks SQL + * binds the value — notably, only integer types satisfy the `LIMIT` and + * `OFFSET` clauses. + * + * - `BIGINT` / `INT` — integer columns, LIMIT/OFFSET, IDs + * - `FLOAT` / `DOUBLE` — floating-point columns + * - `NUMERIC` — fixed-point DECIMAL columns (preserves precision) + * + * Created by `sql.number()` (auto-inferred), or by typed variants + * `sql.int()`, `sql.bigint()`, `sql.float()`, `sql.double()`, `sql.numeric()`. + */ export interface SQLNumberMarker { - __sql_type: "NUMERIC"; + __sql_type: "INT" | "BIGINT" | "FLOAT" | "DOUBLE" | "NUMERIC"; value: string; } @@ -31,7 +43,9 @@ export interface SQLTimestampMarker { /** * Object that identifies a typed SQL parameter. - * Created using sql.date(), sql.string(), sql.number(), sql.boolean(), sql.timestamp(), sql.binary(), or sql.interval(). + * Created using sql.date(), sql.string(), sql.number() (or the typed numeric + * variants sql.int/bigint/float/double/numeric), sql.boolean(), + * sql.timestamp(), or sql.binary(). */ export type SQLTypeMarker = | SQLStringMarker