Skip to content

Commit 21d98cd

Browse files
committed
Sanitizing ClickHouse errors from TSQL
1 parent dd3c6ea commit 21d98cd

File tree

4 files changed

+277
-4
lines changed

4 files changed

+277
-4
lines changed

internal-packages/clickhouse/src/client/tsql.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import type { ClickHouseSettings } from "@clickhouse/client";
99
import { z } from "zod";
1010
import {
1111
compileTSQL,
12+
sanitizeErrorMessage,
1213
transformResults,
1314
type TableSchema,
1415
type QuerySettings,
@@ -137,7 +138,9 @@ export async function executeTSQL<TOut extends z.ZodSchema>(
137138
const [error, result] = await queryFn(params);
138139

139140
if (error) {
140-
return [error, null];
141+
// Sanitize error message to show TSQL names instead of ClickHouse internals
142+
const sanitizedMessage = sanitizeErrorMessage(error.message, options.tableSchema);
143+
return [new QueryError(sanitizedMessage, { query: options.query }), null];
141144
}
142145

143146
const { rows, stats } = result;
@@ -156,7 +159,7 @@ export async function executeTSQL<TOut extends z.ZodSchema>(
156159
} catch (error) {
157160
const errorMessage = error instanceof Error ? error.message : "Unknown error";
158161

159-
// Log TSQL compilation or unexpected errors
162+
// Log TSQL compilation or unexpected errors (with original message for debugging)
160163
logger.error("[TSQL] Query error", {
161164
name: options.name,
162165
error: errorMessage,
@@ -165,8 +168,11 @@ export async function executeTSQL<TOut extends z.ZodSchema>(
165168
generatedParams: generatedParams ?? {},
166169
});
167170

171+
// Sanitize error message to show TSQL names instead of ClickHouse internals
172+
const sanitizedMessage = sanitizeErrorMessage(errorMessage, options.tableSchema);
173+
168174
if (error instanceof Error) {
169-
return [new QueryError(error.message, { query: options.query }), null];
175+
return [new QueryError(sanitizedMessage, { query: options.query }), null];
170176
}
171177
return [new QueryError("Unknown error executing TSQL query", { query: options.query }), null];
172178
}

internal-packages/tsql/src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ export {
7676
isValidUserValue,
7777
// Virtual column utilities
7878
isVirtualColumn,
79+
// Error message sanitization
80+
sanitizeErrorMessage,
7981
validateFilterColumn,
8082
validateGroupColumn,
8183
validateSelectColumn,

internal-packages/tsql/src/query/schema.test.ts

Lines changed: 163 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ import {
1111
getExternalValue,
1212
getInternalValueFromMapping,
1313
getInternalValueFromMappingCaseInsensitive,
14+
sanitizeErrorMessage,
1415
type ColumnSchema,
1516
type FieldMappings,
17+
type TableSchema,
1618
} from "./schema.js";
1719

1820
describe("Value mapping helper functions", () => {
@@ -363,7 +365,9 @@ describe("Field mapping helper functions (runtime dynamic mappings)", () => {
363365
});
364366

365367
it("should return null if mapping name does not exist", () => {
366-
expect(getInternalValueFromMapping(fieldMappings, "nonexistent", "my-project-ref")).toBeNull();
368+
expect(
369+
getInternalValueFromMapping(fieldMappings, "nonexistent", "my-project-ref")
370+
).toBeNull();
367371
});
368372

369373
it("should return null for empty mappings", () => {
@@ -454,3 +458,161 @@ describe("Field mapping helper functions (runtime dynamic mappings)", () => {
454458
});
455459
});
456460

461+
describe("Error message sanitization", () => {
462+
// Test schema mimicking the real runs schema
463+
const runsSchema: TableSchema = {
464+
name: "runs",
465+
clickhouseName: "trigger_dev.task_runs_v2",
466+
description: "Task runs table",
467+
tenantColumns: {
468+
organizationId: "organization_id",
469+
projectId: "project_id",
470+
environmentId: "environment_id",
471+
},
472+
columns: {
473+
run_id: {
474+
name: "run_id",
475+
clickhouseName: "friendly_id",
476+
...column("String"),
477+
},
478+
triggered_at: {
479+
name: "triggered_at",
480+
clickhouseName: "created_at",
481+
...column("DateTime64"),
482+
},
483+
machine: {
484+
name: "machine",
485+
clickhouseName: "machine_preset",
486+
...column("String"),
487+
},
488+
status: {
489+
name: "status",
490+
// No clickhouseName - same as name
491+
...column("String"),
492+
},
493+
task_identifier: {
494+
name: "task_identifier",
495+
// No clickhouseName - same as name
496+
...column("String"),
497+
},
498+
},
499+
};
500+
501+
describe("sanitizeErrorMessage", () => {
502+
it("should replace fully qualified table.column references", () => {
503+
const error = "Missing column trigger_dev.task_runs_v2.friendly_id in query";
504+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
505+
expect(sanitized).toBe("Missing column runs.run_id in query");
506+
});
507+
508+
it("should replace standalone table names", () => {
509+
const error = "Table trigger_dev.task_runs_v2 does not exist";
510+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
511+
expect(sanitized).toBe("Table runs does not exist");
512+
});
513+
514+
it("should replace standalone column names with different clickhouseName", () => {
515+
const error = "Unknown identifier: friendly_id";
516+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
517+
expect(sanitized).toBe("Unknown identifier: run_id");
518+
});
519+
520+
it("should replace multiple occurrences in the same message", () => {
521+
const error =
522+
"Cannot compare friendly_id with created_at: incompatible types in trigger_dev.task_runs_v2";
523+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
524+
expect(sanitized).toBe("Cannot compare run_id with triggered_at: incompatible types in runs");
525+
});
526+
527+
it("should not replace column names that have no clickhouseName mapping", () => {
528+
const error = "Invalid value for column status";
529+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
530+
expect(sanitized).toBe("Invalid value for column status");
531+
});
532+
533+
it("should handle error messages with quoted identifiers", () => {
534+
const error = "Column 'machine_preset' is not of type String";
535+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
536+
expect(sanitized).toBe("Column 'machine' is not of type String");
537+
});
538+
539+
it("should handle error messages with backtick identifiers", () => {
540+
const error = "Unknown column `friendly_id` in table";
541+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
542+
expect(sanitized).toBe("Unknown column `run_id` in table");
543+
});
544+
545+
it("should not replace partial matches within larger identifiers", () => {
546+
const error = "Column my_friendly_id_column not found";
547+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
548+
// Should not replace "friendly_id" within "my_friendly_id_column"
549+
expect(sanitized).toBe("Column my_friendly_id_column not found");
550+
});
551+
552+
it("should return original message if no schemas provided", () => {
553+
const error = "Some error with trigger_dev.task_runs_v2";
554+
const sanitized = sanitizeErrorMessage(error, []);
555+
expect(sanitized).toBe("Some error with trigger_dev.task_runs_v2");
556+
});
557+
558+
it("should return original message if no matches found", () => {
559+
const error = "Generic database error occurred";
560+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
561+
expect(sanitized).toBe("Generic database error occurred");
562+
});
563+
564+
it("should handle multiple tables", () => {
565+
const eventsSchema: TableSchema = {
566+
name: "events",
567+
clickhouseName: "trigger_dev.task_events",
568+
description: "Task events table",
569+
tenantColumns: {
570+
organizationId: "organization_id",
571+
projectId: "project_id",
572+
environmentId: "environment_id",
573+
},
574+
columns: {
575+
event_id: {
576+
name: "event_id",
577+
clickhouseName: "internal_event_id",
578+
...column("String"),
579+
},
580+
},
581+
};
582+
583+
const error =
584+
"Cannot join trigger_dev.task_runs_v2 with trigger_dev.task_events on internal_event_id";
585+
const sanitized = sanitizeErrorMessage(error, [runsSchema, eventsSchema]);
586+
expect(sanitized).toBe("Cannot join runs with events on event_id");
587+
});
588+
589+
it("should handle real ClickHouse error format", () => {
590+
const error =
591+
"Unable to query clickhouse: Code: 47. DB::Exception: Missing columns: 'friendly_id' while processing query";
592+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
593+
expect(sanitized).toBe(
594+
"Unable to query clickhouse: Code: 47. DB::Exception: Missing columns: 'run_id' while processing query"
595+
);
596+
});
597+
598+
it("should handle error with column in parentheses", () => {
599+
const error = "Function count(friendly_id) expects different arguments";
600+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
601+
expect(sanitized).toBe("Function count(run_id) expects different arguments");
602+
});
603+
604+
it("should handle error with column after comma", () => {
605+
const error = "SELECT friendly_id, created_at FROM table";
606+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
607+
expect(sanitized).toBe("SELECT run_id, triggered_at FROM table");
608+
});
609+
610+
it("should prioritize longer matches (table.column before standalone column)", () => {
611+
// This tests that we replace "trigger_dev.task_runs_v2.friendly_id" as a unit,
612+
// not "trigger_dev.task_runs_v2" and then "friendly_id" separately
613+
const error = "Error in trigger_dev.task_runs_v2.friendly_id";
614+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
615+
expect(sanitized).toBe("Error in runs.run_id");
616+
});
617+
});
618+
});

internal-packages/tsql/src/query/schema.ts

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,3 +682,106 @@ export function getTableColumnNames(schema: SchemaRegistry, tableName: string):
682682
export function getAllTableNames(schema: SchemaRegistry): string[] {
683683
return Object.keys(schema.tables);
684684
}
685+
686+
// ============================================================
687+
// Error Message Sanitization
688+
// ============================================================
689+
690+
/**
691+
* Sanitize a ClickHouse error message by replacing internal ClickHouse names
692+
* with their user-facing TSQL equivalents.
693+
*
694+
* This function handles:
695+
* - Fully qualified table names (e.g., `trigger_dev.task_runs_v2` → `runs`)
696+
* - Column names with table prefix (e.g., `trigger_dev.task_runs_v2.friendly_id` → `runs.run_id`)
697+
* - Standalone column names (e.g., `friendly_id` → `run_id`)
698+
*
699+
* @param message - The error message from ClickHouse
700+
* @param schemas - The table schemas defining name mappings
701+
* @returns The sanitized error message with TSQL names
702+
*
703+
* @example
704+
* ```typescript
705+
* const sanitized = sanitizeErrorMessage(
706+
* "Missing column trigger_dev.task_runs_v2.friendly_id",
707+
* [runsSchema]
708+
* );
709+
* // Returns: "Missing column runs.run_id"
710+
* ```
711+
*/
712+
export function sanitizeErrorMessage(message: string, schemas: TableSchema[]): string {
713+
// Build reverse lookup maps
714+
const tableNameMap = new Map<string, string>(); // clickhouseName -> tsqlName
715+
const columnNameMap = new Map<string, { tsqlName: string; tableTsqlName: string }>(); // clickhouseName -> { tsqlName, tableTsqlName }
716+
717+
for (const table of schemas) {
718+
// Map table names
719+
tableNameMap.set(table.clickhouseName, table.name);
720+
721+
// Map column names
722+
for (const col of Object.values(table.columns)) {
723+
const clickhouseColName = col.clickhouseName ?? col.name;
724+
if (clickhouseColName !== col.name) {
725+
// Only add to map if there's actually a different ClickHouse name
726+
columnNameMap.set(clickhouseColName, {
727+
tsqlName: col.name,
728+
tableTsqlName: table.name,
729+
});
730+
}
731+
}
732+
}
733+
734+
let result = message;
735+
736+
// Replace fully qualified column references first (table.column)
737+
// This handles patterns like: trigger_dev.task_runs_v2.friendly_id
738+
for (const table of schemas) {
739+
for (const col of Object.values(table.columns)) {
740+
const clickhouseColName = col.clickhouseName ?? col.name;
741+
const fullyQualified = `${table.clickhouseName}.${clickhouseColName}`;
742+
const tsqlQualified = `${table.name}.${col.name}`;
743+
744+
if (fullyQualified !== tsqlQualified) {
745+
// Use word boundary-aware replacement
746+
result = replaceAllOccurrences(result, fullyQualified, tsqlQualified);
747+
}
748+
}
749+
}
750+
751+
// Replace standalone table names (after column references to avoid partial matches)
752+
// Sort by length descending to replace longer names first
753+
const sortedTableNames = [...tableNameMap.entries()].sort((a, b) => b[0].length - a[0].length);
754+
for (const [clickhouseName, tsqlName] of sortedTableNames) {
755+
if (clickhouseName !== tsqlName) {
756+
result = replaceAllOccurrences(result, clickhouseName, tsqlName);
757+
}
758+
}
759+
760+
// Replace standalone column names (for unqualified references)
761+
// Sort by length descending to replace longer names first
762+
const sortedColumnNames = [...columnNameMap.entries()].sort((a, b) => b[0].length - a[0].length);
763+
for (const [clickhouseName, { tsqlName }] of sortedColumnNames) {
764+
result = replaceAllOccurrences(result, clickhouseName, tsqlName);
765+
}
766+
767+
return result;
768+
}
769+
770+
/**
771+
* Replace all occurrences of a string, respecting word boundaries where sensible.
772+
* Uses word-boundary matching to avoid replacing substrings within larger identifiers.
773+
*/
774+
function replaceAllOccurrences(text: string, search: string, replacement: string): string {
775+
// Escape special regex characters in the search string
776+
const escaped = search.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
777+
778+
// Use word boundary matching - identifiers are typically surrounded by
779+
// non-identifier characters (spaces, quotes, parentheses, operators, etc.)
780+
// We use a pattern that matches the identifier when it's not part of a larger identifier
781+
const pattern = new RegExp(
782+
`(?<![a-zA-Z0-9_])${escaped}(?![a-zA-Z0-9_])`,
783+
"g"
784+
);
785+
786+
return text.replace(pattern, replacement);
787+
}

0 commit comments

Comments
 (0)