Skip to content

Commit 07e76bd

Browse files
committed
Remove tenant details
1 parent 21d98cd commit 07e76bd

File tree

2 files changed

+251
-12
lines changed

2 files changed

+251
-12
lines changed

internal-packages/tsql/src/query/schema.test.ts

Lines changed: 164 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -590,11 +590,19 @@ describe("Error message sanitization", () => {
590590
const error =
591591
"Unable to query clickhouse: Code: 47. DB::Exception: Missing columns: 'friendly_id' while processing query";
592592
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
593+
// Should remove the "Unable to query clickhouse:" prefix
593594
expect(sanitized).toBe(
594-
"Unable to query clickhouse: Code: 47. DB::Exception: Missing columns: 'run_id' while processing query"
595+
"Code: 47. DB::Exception: Missing columns: 'run_id' while processing query"
595596
);
596597
});
597598

599+
it("should remove 'Unable to query clickhouse:' prefix", () => {
600+
const error = "Unable to query clickhouse: Something went wrong";
601+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
602+
expect(sanitized).toBe("Something went wrong");
603+
expect(sanitized).not.toContain("Unable to query clickhouse");
604+
});
605+
598606
it("should handle error with column in parentheses", () => {
599607
const error = "Function count(friendly_id) expects different arguments";
600608
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
@@ -614,5 +622,160 @@ describe("Error message sanitization", () => {
614622
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
615623
expect(sanitized).toBe("Error in runs.run_id");
616624
});
625+
626+
it("should remove tenant isolation filters from error messages", () => {
627+
const error =
628+
"Unknown identifier in scope SELECT run_id FROM runs WHERE ((organization_id = 'org123') AND (project_id = 'proj456') AND (environment_id = 'env789')) AND (status = 'Failed')";
629+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
630+
expect(sanitized).not.toContain("organization_id");
631+
expect(sanitized).not.toContain("project_id");
632+
expect(sanitized).not.toContain("environment_id");
633+
expect(sanitized).toContain("status = 'Failed'");
634+
});
635+
636+
it("should remove redundant column aliases like 'run_id AS run_id'", () => {
637+
const error = "Error in SELECT run_id AS run_id, machine AS machine FROM runs";
638+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
639+
expect(sanitized).toBe("Error in SELECT run_id, machine FROM runs");
640+
});
641+
642+
it("should remove redundant table aliases like 'runs AS runs'", () => {
643+
const error = "Error in SELECT * FROM runs AS runs WHERE status = 'Failed'";
644+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
645+
expect(sanitized).toBe("Error in SELECT * FROM runs WHERE status = 'Failed'");
646+
});
647+
648+
it("should handle real error with tenant filters and aliases", () => {
649+
const error =
650+
"Unable to query clickhouse: Unknown expression identifier `triggered_ata` in scope SELECT run_id AS run_id, machine AS machine FROM runs AS runs WHERE ((organization_id = 'cm5qtzpb800007cp7h6ebwt2i') AND (project_id = 'cme2p1yep00007calt8ugarkr') AND (environment_id = 'cme2p1ygj00027caln51kyiwl')) AND (status = 'Complted') ORDER BY triggered_ata DESC LIMIT 100.";
651+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
652+
653+
// Should not contain internal prefix
654+
expect(sanitized).not.toContain("Unable to query clickhouse");
655+
656+
// Should not contain internal IDs
657+
expect(sanitized).not.toContain("cm5qtzpb800007cp7h6ebwt2i");
658+
expect(sanitized).not.toContain("cme2p1yep00007calt8ugarkr");
659+
expect(sanitized).not.toContain("cme2p1ygj00027caln51kyiwl");
660+
expect(sanitized).not.toContain("organization_id");
661+
expect(sanitized).not.toContain("project_id");
662+
expect(sanitized).not.toContain("environment_id");
663+
664+
// Should not have redundant aliases
665+
expect(sanitized).not.toContain("run_id AS run_id");
666+
expect(sanitized).not.toContain("machine AS machine");
667+
expect(sanitized).not.toContain("runs AS runs");
668+
669+
// Should still have the user's query parts
670+
expect(sanitized).toContain("status = 'Complted'");
671+
expect(sanitized).toContain("triggered_ata");
672+
expect(sanitized).toContain("LIMIT 100");
673+
});
674+
675+
it("should remove required filters like engine = 'V2'", () => {
676+
// Schema with required filters
677+
const schemaWithRequiredFilters: TableSchema = {
678+
name: "runs",
679+
clickhouseName: "trigger_dev.task_runs_v2",
680+
description: "Task runs table",
681+
tenantColumns: {
682+
organizationId: "organization_id",
683+
projectId: "project_id",
684+
environmentId: "environment_id",
685+
},
686+
requiredFilters: [{ column: "engine", value: "V2" }],
687+
columns: {
688+
run_id: {
689+
name: "run_id",
690+
...column("String"),
691+
},
692+
},
693+
};
694+
695+
const error =
696+
"Error in SELECT run_id FROM runs WHERE ((organization_id = 'org1') AND (engine = 'V2')) AND (status = 'Failed')";
697+
const sanitized = sanitizeErrorMessage(error, [schemaWithRequiredFilters]);
698+
699+
expect(sanitized).not.toContain("engine = 'V2'");
700+
expect(sanitized).not.toContain("organization_id");
701+
expect(sanitized).toContain("status = 'Failed'");
702+
});
703+
704+
it("should handle project and environment field mappings in tenant columns", () => {
705+
// The schema uses 'project' and 'environment' as column names with field mappings
706+
const schemaWithFieldMappedTenants: TableSchema = {
707+
name: "runs",
708+
clickhouseName: "trigger_dev.task_runs_v2",
709+
description: "Task runs table",
710+
tenantColumns: {
711+
organizationId: "organization_id",
712+
projectId: "project",
713+
environmentId: "environment",
714+
},
715+
columns: {
716+
run_id: {
717+
name: "run_id",
718+
...column("String"),
719+
},
720+
},
721+
};
722+
723+
const error =
724+
"Error WHERE ((organization_id = 'org1') AND (project = 'proj1') AND (environment = 'env1')) AND (status = 'ok')";
725+
const sanitized = sanitizeErrorMessage(error, [schemaWithFieldMappedTenants]);
726+
727+
expect(sanitized).not.toContain("organization_id = 'org1'");
728+
expect(sanitized).not.toContain("project = 'proj1'");
729+
expect(sanitized).not.toContain("environment = 'env1'");
730+
expect(sanitized).toContain("status = 'ok'");
731+
});
732+
733+
it("should handle queries with only automatic WHERE filters (no user WHERE clause)", () => {
734+
// When user writes: SELECT * FROM runs LIMIT 10
735+
// The compiled query becomes: SELECT * FROM runs WHERE (org_id = '...') AND (proj_id = '...') AND (env_id = '...')
736+
const error =
737+
"Unable to query clickhouse: Some error in SELECT run_id FROM runs WHERE ((organization_id = 'org1') AND (project_id = 'proj1') AND (environment_id = 'env1')) LIMIT 10";
738+
const sanitized = sanitizeErrorMessage(error, [runsSchema]);
739+
740+
expect(sanitized).not.toContain("Unable to query clickhouse");
741+
expect(sanitized).not.toContain("organization_id");
742+
expect(sanitized).not.toContain("project_id");
743+
expect(sanitized).not.toContain("environment_id");
744+
expect(sanitized).not.toContain("WHERE");
745+
expect(sanitized).toContain("SELECT run_id FROM runs");
746+
expect(sanitized).toContain("LIMIT 10");
747+
});
748+
749+
it("should handle queries with only automatic filters including engine filter", () => {
750+
const schemaWithEngine: TableSchema = {
751+
name: "runs",
752+
clickhouseName: "trigger_dev.task_runs_v2",
753+
description: "Task runs table",
754+
tenantColumns: {
755+
organizationId: "organization_id",
756+
projectId: "project_id",
757+
environmentId: "environment_id",
758+
},
759+
requiredFilters: [{ column: "engine", value: "V2" }],
760+
columns: {
761+
run_id: {
762+
name: "run_id",
763+
...column("String"),
764+
},
765+
},
766+
};
767+
768+
const error =
769+
"Error in SELECT * FROM runs WHERE ((organization_id = 'org1') AND (project_id = 'proj1') AND (environment_id = 'env1') AND (engine = 'V2')) ORDER BY run_id";
770+
const sanitized = sanitizeErrorMessage(error, [schemaWithEngine]);
771+
772+
expect(sanitized).not.toContain("organization_id");
773+
expect(sanitized).not.toContain("project_id");
774+
expect(sanitized).not.toContain("environment_id");
775+
expect(sanitized).not.toContain("engine");
776+
expect(sanitized).not.toContain("WHERE");
777+
expect(sanitized).toContain("SELECT * FROM runs");
778+
expect(sanitized).toContain("ORDER BY run_id");
779+
});
617780
});
618781
});

internal-packages/tsql/src/query/schema.ts

Lines changed: 87 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -689,12 +689,15 @@ export function getAllTableNames(schema: SchemaRegistry): string[] {
689689

690690
/**
691691
* Sanitize a ClickHouse error message by replacing internal ClickHouse names
692-
* with their user-facing TSQL equivalents.
692+
* with their user-facing TSQL equivalents and removing internal implementation details.
693693
*
694694
* This function handles:
695695
* - Fully qualified table names (e.g., `trigger_dev.task_runs_v2` → `runs`)
696696
* - Column names with table prefix (e.g., `trigger_dev.task_runs_v2.friendly_id` → `runs.run_id`)
697697
* - Standalone column names (e.g., `friendly_id` → `run_id`)
698+
* - Removes tenant isolation filters (organization_id, project_id, environment_id)
699+
* - Removes required filters (e.g., engine = 'V2')
700+
* - Removes redundant aliases (e.g., `run_id AS run_id` → `run_id`)
698701
*
699702
* @param message - The error message from ClickHouse
700703
* @param schemas - The table schemas defining name mappings
@@ -714,10 +717,30 @@ export function sanitizeErrorMessage(message: string, schemas: TableSchema[]): s
714717
const tableNameMap = new Map<string, string>(); // clickhouseName -> tsqlName
715718
const columnNameMap = new Map<string, { tsqlName: string; tableTsqlName: string }>(); // clickhouseName -> { tsqlName, tableTsqlName }
716719

720+
// Collect tenant column names and required filter columns to strip from errors
721+
const columnsToStrip: string[] = [];
722+
const tableAliasPatterns: RegExp[] = [];
723+
717724
for (const table of schemas) {
718725
// Map table names
719726
tableNameMap.set(table.clickhouseName, table.name);
720727

728+
// Collect tenant column names to strip
729+
const tenantCols = table.tenantColumns;
730+
columnsToStrip.push(tenantCols.organizationId, tenantCols.projectId, tenantCols.environmentId);
731+
732+
// Collect required filter columns to strip
733+
if (table.requiredFilters) {
734+
for (const filter of table.requiredFilters) {
735+
columnsToStrip.push(filter.column);
736+
}
737+
}
738+
739+
// Build pattern to remove table aliases like "FROM runs AS runs"
740+
tableAliasPatterns.push(
741+
new RegExp(`\\b${escapeRegex(table.name)}\\s+AS\\s+${escapeRegex(table.name)}\\b`, "gi")
742+
);
743+
721744
// Map column names
722745
for (const col of Object.values(table.columns)) {
723746
const clickhouseColName = col.clickhouseName ?? col.name;
@@ -733,7 +756,51 @@ export function sanitizeErrorMessage(message: string, schemas: TableSchema[]): s
733756

734757
let result = message;
735758

736-
// Replace fully qualified column references first (table.column)
759+
// Step 0: Remove internal prefixes that leak implementation details
760+
result = result.replace(/^Unable to query clickhouse:\s*/i, "");
761+
762+
// Step 1: Remove tenant isolation and required filter conditions
763+
// We need to handle multiple patterns:
764+
// - (column = 'value') AND ...
765+
// - ... AND (column = 'value')
766+
// - (column = 'value') at end of expression
767+
for (const colName of columnsToStrip) {
768+
const escaped = escapeRegex(colName);
769+
// Match: (column = 'value') AND (with optional surrounding parens)
770+
result = result.replace(new RegExp(`\\(${escaped}\\s*=\\s*'[^']*'\\)\\s*AND\\s*`, "gi"), "");
771+
// Match: AND (column = 'value') (handles middle/end conditions)
772+
result = result.replace(new RegExp(`\\s*AND\\s*\\(${escaped}\\s*=\\s*'[^']*'\\)`, "gi"), "");
773+
// Match standalone: (column = 'value') with no AND (for when it's the only/last condition)
774+
result = result.replace(new RegExp(`\\(${escaped}\\s*=\\s*'[^']*'\\)`, "gi"), "");
775+
}
776+
777+
// Step 2: Clean up any leftover empty WHERE conditions or double parentheses
778+
// Clean up empty nested parens: "(())" or "( () )" -> ""
779+
result = result.replace(/\(\s*\(\s*\)\s*\)/g, "");
780+
// Clean up empty parens: "()" -> ""
781+
result = result.replace(/\(\s*\)/g, "");
782+
// Clean up "WHERE AND" -> "WHERE"
783+
result = result.replace(/\bWHERE\s+AND\b/gi, "WHERE");
784+
// Clean up double ANDs: "AND AND" -> "AND"
785+
result = result.replace(/\bAND\s+AND\b/gi, "AND");
786+
// Clean up "WHERE ((" with user condition "))" -> "WHERE (condition)"
787+
// First normalize: "(( (condition) ))" patterns
788+
result = result.replace(/\(\(\s*\(/g, "(");
789+
result = result.replace(/\)\s*\)\)/g, ")");
790+
// Clean double parens around single condition
791+
result = result.replace(/\(\(([^()]+)\)\)/g, "($1)");
792+
// Remove "WHERE ()" if the whole WHERE is now empty
793+
result = result.replace(/\bWHERE\s*\(\s*\)\s*/gi, "");
794+
// Clean up trailing " )" before ORDER/LIMIT/etc
795+
result = result.replace(/\s+\)\s*(ORDER|LIMIT|GROUP|HAVING)/gi, " $1");
796+
// Remove empty WHERE clause: "WHERE ORDER" or "WHERE LIMIT" -> just "ORDER" or "LIMIT"
797+
result = result.replace(/\bWHERE\s+(ORDER|LIMIT|GROUP|HAVING)\b/gi, "$1");
798+
// Remove empty WHERE at end of string: "WHERE " at end -> ""
799+
result = result.replace(/\bWHERE\s*$/gi, "");
800+
// Clean up multiple spaces
801+
result = result.replace(/\s{2,}/g, " ");
802+
803+
// Step 3: Replace fully qualified column references first (table.column)
737804
// This handles patterns like: trigger_dev.task_runs_v2.friendly_id
738805
for (const table of schemas) {
739806
for (const col of Object.values(table.columns)) {
@@ -748,7 +815,7 @@ export function sanitizeErrorMessage(message: string, schemas: TableSchema[]): s
748815
}
749816
}
750817

751-
// Replace standalone table names (after column references to avoid partial matches)
818+
// Step 4: Replace standalone table names (after column references to avoid partial matches)
752819
// Sort by length descending to replace longer names first
753820
const sortedTableNames = [...tableNameMap.entries()].sort((a, b) => b[0].length - a[0].length);
754821
for (const [clickhouseName, tsqlName] of sortedTableNames) {
@@ -757,31 +824,40 @@ export function sanitizeErrorMessage(message: string, schemas: TableSchema[]): s
757824
}
758825
}
759826

760-
// Replace standalone column names (for unqualified references)
827+
// Step 5: Replace standalone column names (for unqualified references)
761828
// Sort by length descending to replace longer names first
762829
const sortedColumnNames = [...columnNameMap.entries()].sort((a, b) => b[0].length - a[0].length);
763830
for (const [clickhouseName, { tsqlName }] of sortedColumnNames) {
764831
result = replaceAllOccurrences(result, clickhouseName, tsqlName);
765832
}
766833

834+
// Step 6: Remove redundant column aliases like "run_id AS run_id"
835+
result = result.replace(/\b(\w+)\s+AS\s+\1\b/gi, "$1");
836+
837+
// Step 7: Remove table aliases like "runs AS runs"
838+
for (const pattern of tableAliasPatterns) {
839+
result = result.replace(pattern, (match) => match.split(/\s+AS\s+/i)[0]);
840+
}
841+
767842
return result;
768843
}
769844

845+
/**
846+
* Escape special regex characters in a string
847+
*/
848+
function escapeRegex(str: string): string {
849+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
850+
}
851+
770852
/**
771853
* Replace all occurrences of a string, respecting word boundaries where sensible.
772854
* Uses word-boundary matching to avoid replacing substrings within larger identifiers.
773855
*/
774856
function replaceAllOccurrences(text: string, search: string, replacement: string): string {
775-
// Escape special regex characters in the search string
776-
const escaped = search.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
777-
778857
// Use word boundary matching - identifiers are typically surrounded by
779858
// non-identifier characters (spaces, quotes, parentheses, operators, etc.)
780859
// We use a pattern that matches the identifier when it's not part of a larger identifier
781-
const pattern = new RegExp(
782-
`(?<![a-zA-Z0-9_])${escaped}(?![a-zA-Z0-9_])`,
783-
"g"
784-
);
860+
const pattern = new RegExp(`(?<![a-zA-Z0-9_])${escapeRegex(search)}(?![a-zA-Z0-9_])`, "g");
785861

786862
return text.replace(pattern, replacement);
787863
}

0 commit comments

Comments
 (0)