diff --git a/packages/opencode/src/altimate/native/altimate-core.ts b/packages/opencode/src/altimate/native/altimate-core.ts index d44de33d26..f3c900e2be 100644 --- a/packages/opencode/src/altimate/native/altimate-core.ts +++ b/packages/opencode/src/altimate/native/altimate-core.ts @@ -38,33 +38,77 @@ function fail(error: unknown): AltimateCoreResult { } // --------------------------------------------------------------------------- -// IFF / QUALIFY transpile transforms (ported from Python guard.py) +// Snowflake → target dialect transpile transforms // --------------------------------------------------------------------------- const IFF_PATTERN = /\bIFF\s*\(([^,()]+),\s*([^,()]+),\s*([^()]+)\)/gi /** - * Iteratively convert Snowflake IFF(cond, a, b) to - * CASE WHEN cond THEN a ELSE b END. + * Convert Snowflake IFF(cond, a, b) to IF(cond, a, b) for BigQuery, + * or CASE WHEN cond THEN a ELSE b END for other targets. */ -export function preprocessIff(sql: string): string { +export function preprocessIff(sql: string, targetDialect?: string): string { + const target = targetDialect?.toLowerCase() ?? "" + const useIf = target === "bigquery" let current = sql for (let i = 0; i < 10; i++) { - const next = current.replace( - IFF_PATTERN, - "CASE WHEN $1 THEN $2 ELSE $3 END", - ) + const next = useIf + ? current.replace(IFF_PATTERN, "IF($1, $2, $3)") + : current.replace(IFF_PATTERN, "CASE WHEN $1 THEN $2 ELSE $3 END") if (next === current) break current = next } return current } +/** + * Convert Snowflake TRY_TO_NUMBER/TRY_TO_DECIMAL/TRY_TO_NUMERIC to SAFE_CAST for BigQuery. + */ +export function preprocessTryToNumber(sql: string): string { + return sql.replace( + /\bTRY_TO_(?:NUMBER|DECIMAL|NUMERIC)\s*\(\s*([^()]+?)\s*\)/gi, + "SAFE_CAST($1 AS NUMERIC)", + ) +} + +/** + * Convert Snowflake ARRAY_AGG(expr) WITHIN GROUP (ORDER BY ...) to + * ARRAY_AGG(expr ORDER BY ...) for BigQuery. + */ +export function preprocessArrayAggWithinGroup(sql: string): string { + return sql.replace( + /\bARRAY_AGG\s*\(\s*([^()]+?)\s*\)\s*WITHIN\s+GROUP\s*\(\s*ORDER\s+BY\s+([^()]+?)\s*\)/gi, + "ARRAY_AGG($1 ORDER BY $2)", + ) +} + +/** + * Convert Snowflake FLATTEN(input => expr) to UNNEST(expr) for BigQuery. + */ +export function preprocessFlatten(sql: string): string { + return sql.replace( + /\bFLATTEN\s*\(\s*(?:input\s*=>\s*)?([^()]+?)\s*\)/gi, + "UNNEST($1)", + ) +} + +/** + * Apply all Snowflake → BigQuery preprocessing transforms. + */ +export function preprocessSnowflakeToBigQuery(sql: string): string { + let result = preprocessIff(sql, "bigquery") + result = preprocessTryToNumber(result) + result = preprocessArrayAggWithinGroup(result) + result = preprocessFlatten(result) + return result +} + const QUALIFY_PATTERN = /\bQUALIFY\b\s+(.+?)(?=\s*(?:LIMIT\s+\d|ORDER\s+BY|;|$))/is /** * Wrap QUALIFY clause into outer SELECT for targets that lack native support. + * BigQuery supports QUALIFY natively, so it's excluded. */ export function postprocessQualify(sql: string): string { const m = QUALIFY_PATTERN.exec(sql) @@ -76,7 +120,89 @@ export function postprocessQualify(sql: string): string { return suffix ? `${wrapped} ${suffix}` : wrapped } -const QUALIFY_TARGETS = new Set(["bigquery", "databricks", "spark", "trino"]) +// BigQuery supports QUALIFY natively — only wrap for dialects that don't +const QUALIFY_TARGETS = new Set(["databricks", "spark", "trino"]) + +// --------------------------------------------------------------------------- +// SQL keyword fuzzy correction +// --------------------------------------------------------------------------- + +const SQL_KEYWORDS = [ + "SELECT", "FROM", "WHERE", "JOIN", "LEFT", "RIGHT", "INNER", "OUTER", + "FULL", "CROSS", "ON", "AND", "OR", "NOT", "IN", "BETWEEN", "LIKE", + "IS", "NULL", "AS", "ORDER", "BY", "GROUP", "HAVING", "LIMIT", + "OFFSET", "UNION", "ALL", "DISTINCT", "INSERT", "INTO", "VALUES", + "UPDATE", "SET", "DELETE", "CREATE", "TABLE", "ALTER", "DROP", + "INDEX", "VIEW", "CASE", "WHEN", "THEN", "ELSE", "END", "EXISTS", + "WITH", "OVER", "PARTITION", "WINDOW", "ROWS", "RANGE", "UNBOUNDED", + "PRECEDING", "FOLLOWING", "CURRENT", "ROW", "CAST", "COALESCE", + "NULLIF", "COUNT", "SUM", "AVG", "MIN", "MAX", "HAVING", "ASC", + "DESC", "TRUE", "FALSE", "PRIMARY", "KEY", "FOREIGN", "REFERENCES", + "CONSTRAINT", "DEFAULT", "CHECK", "UNIQUE", "TRIGGER", "PROCEDURE", + "FUNCTION", "RETURN", "RETURNS", "BEGIN", "DECLARE", "IF", "WHILE", + "FOR", "EACH", "AFTER", "BEFORE", "INSTEAD", "OF", "EXECUTE", + "GRANT", "REVOKE", "COMMIT", "ROLLBACK", "SAVEPOINT", "TRUNCATE", + "MERGE", "USING", "MATCHED", "QUALIFY", "EXCEPT", "INTERSECT", +] + +/** + * Levenshtein distance between two strings. + */ +function levenshtein(a: string, b: string): number { + const m = a.length + const n = b.length + const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0)) + for (let i = 0; i <= m; i++) dp[i][0] = i + for (let j = 0; j <= n; j++) dp[0][j] = j + for (let i = 1; i <= m; i++) { + for (let j = 1; j <= n; j++) { + dp[i][j] = a[i - 1] === b[j - 1] + ? dp[i - 1][j - 1] + : 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + } + } + return dp[m][n] +} + +/** + * Pre-process SQL to fix obvious keyword typos before passing to the Rust fix engine. + * Returns the corrected SQL and a list of corrections made. + */ +export function fixKeywordTypos(sql: string): { sql: string; corrections: string[] } { + const corrections: string[] = [] + // Tokenize by splitting on whitespace and punctuation boundaries + const result = sql.replace(/\b([A-Za-z_]\w*)\b/g, (match) => { + const upper = match.toUpperCase() + // Skip if it's already a valid keyword + if (SQL_KEYWORDS.includes(upper)) return match + // Skip if it looks like an identifier (lowercase, mixed case with underscores) + if (match.includes("_") || (match[0] === match[0].toLowerCase() && match.length > 3)) return match + + // Only try to fix short all-caps or Title-case tokens that look like keyword typos + if (match.length < 3 || match.length > 12) return match + + // Find closest keyword by Levenshtein distance + let bestKeyword = "" + let bestDist = Infinity + for (const kw of SQL_KEYWORDS) { + if (Math.abs(kw.length - upper.length) > 2) continue + const dist = levenshtein(upper, kw) + if (dist < bestDist) { + bestDist = dist + bestKeyword = kw + } + } + + // Accept if edit distance is 1-2 and the token is short enough relative to keyword + const maxDist = upper.length <= 4 ? 1 : 2 + if (bestDist > 0 && bestDist <= maxDist && bestKeyword) { + corrections.push(`${match} → ${bestKeyword}`) + return bestKeyword + } + return match + }) + return { sql: result, corrections } +} // --------------------------------------------------------------------------- // Handler registrations @@ -124,12 +250,21 @@ register("altimate_core.safety", async (params) => { // 4. altimate_core.transpile — with IFF/QUALIFY transforms register("altimate_core.transpile", async (params) => { try { - const processed = preprocessIff(params.sql) + const sourceLower = params.from_dialect.toLowerCase() + const targetLower = params.to_dialect.toLowerCase() + + // Apply dialect-specific preprocessing + let processed = params.sql + if (sourceLower === "snowflake" && targetLower === "bigquery") { + processed = preprocessSnowflakeToBigQuery(processed) + } else { + processed = preprocessIff(processed, targetLower) + } + const raw = core.transpile(processed, params.from_dialect, params.to_dialect) const data = toData(raw) // Post-process QUALIFY for targets that lack native support - const targetLower = params.to_dialect.toLowerCase() if (QUALIFY_TARGETS.has(targetLower)) { // Rust returns transpiled_sql as string[] — use first element const transpiled = Array.isArray(data.transpiled_sql) @@ -183,40 +318,161 @@ register("altimate_core.check", async (params) => { } }) -// 7. altimate_core.fix +// 7. altimate_core.fix — with keyword typo correction register("altimate_core.fix", async (params) => { try { + // Pre-fix keyword typos before passing to Rust + const { sql: preprocessed, corrections } = fixKeywordTypos(params.sql) + const schema = schemaOrEmpty(params.schema_path, params.schema_context) const raw = await core.fix( - params.sql, + preprocessed, schema, params.max_iterations ?? undefined, ) const data = toData(raw) + + // If we fixed keyword typos and Rust didn't change anything further, + // report the keyword fixes as the result + if (corrections.length > 0 && !data.fixed && preprocessed !== params.sql) { + return ok(true, { + ...data, + fixed: true, + original_sql: params.sql, + fixed_sql: data.fixed_sql ?? preprocessed, + fixes_applied: [ + ...(data.fixes_applied as any[] ?? []), + ...corrections.map((c) => ({ + action: "replace_keyword", + original: c.split(" → ")[0], + replacement: c.split(" → ")[1], + confidence: 0.95, + explanation: `Fixed keyword typo: ${c}`, + })), + ], + }) + } + + // If Rust fixed it, ensure we preserve the keyword corrections too + if (corrections.length > 0 && data.fixed) { + data.original_sql = params.sql + data.fixes_applied = [ + ...corrections.map((c) => ({ + action: "replace_keyword", + original: c.split(" → ")[0], + replacement: c.split(" → ")[1], + confidence: 0.95, + explanation: `Fixed keyword typo: ${c}`, + })), + ...(data.fixes_applied as any[] ?? []), + ] + } + return ok(data.fixed !== false, data) } catch (e) { return fail(e) } }) -// 8. altimate_core.policy +// 8. altimate_core.policy — with DML-aware schema fallback register("altimate_core.policy", async (params) => { try { - const schema = schemaOrEmpty(params.schema_path, params.schema_context) + let schema = schemaOrEmpty(params.schema_path, params.schema_context) + + // If no explicit schema provided, extract table names from SQL and build + // a minimal schema so DML statements (DELETE, UPDATE, INSERT, MERGE) + // don't fail validation before policy checks can run. + if (!params.schema_path && !params.schema_context) { + try { + const meta = core.extractMetadata(params.sql) + if (meta.tables?.length) { + const ddl = meta.tables + .map((t: string) => `CREATE TABLE ${t} (id INT);`) + .join("\n") + schema = core.Schema.fromDdl(ddl) + } + } catch { + // Fall back to empty schema if metadata extraction fails + } + } + const raw = await core.checkPolicy(params.sql, schema, params.policy_json) const data = toData(raw) return ok(data.allowed !== false, data) } catch (e) { + // If the error is a validation error, try to extract policy-relevant info + // and provide a useful response instead of a raw error + const errStr = String(e) + if (errStr.includes("validation") || errStr.includes("parse")) { + // Parse the policy JSON to do a basic statement-type check + try { + const policy = JSON.parse(params.policy_json) + const rules = policy.rules ?? [] + const violations: Array> = [] + + // Check forbidden_operations rules against statement type + for (const rule of rules) { + if (rule.type === "forbidden_operations" && Array.isArray(rule.operations)) { + const sqlUpper = params.sql.trim().toUpperCase() + for (const op of rule.operations) { + if (sqlUpper.startsWith(op.toUpperCase())) { + violations.push({ + rule: "forbidden_operations", + category: "query_patterns", + severity: "error", + message: `${op} is a forbidden operation`, + detail: { type: "blocked_statement", statement: op }, + }) + } + } + } + } + + if (violations.length > 0) { + return ok(false, { + allowed: false, + violations, + warnings: [], + policies_evaluated: rules.length, + }) + } + } catch { + // If policy parsing fails too, return the original error + } + } return fail(e) } }) -// 9. altimate_core.semantics +// 9. altimate_core.semantics — with implicit cross join detection register("altimate_core.semantics", async (params) => { try { const schema = schemaOrEmpty(params.schema_path, params.schema_context) const raw = await core.checkSemantics(params.sql, schema) const data = toData(raw) + + // Augment with TypeScript-level detection for comma-joins (implicit cross joins) + const findings = (data.findings as any[]) ?? [] + const commaJoinPattern = /\bFROM\b\s+(\w+)\s+\w*\s*,\s*(\w+)/gi + const match = commaJoinPattern.exec(params.sql) + if (match) { + const hasCommaJoinFinding = findings.some( + (f) => f.rule === "implicit_cross_join" || f.rule === "comma_join", + ) + if (!hasCommaJoinFinding) { + findings.push({ + rule: "implicit_cross_join", + severity: "warning", + message: `Implicit cross join detected: comma-separated tables in FROM clause (${match[1]}, ${match[2]}). Use explicit JOIN syntax for clarity.`, + explanation: "Comma-separated tables in the FROM clause create an implicit cross join. While a WHERE clause may filter the result, explicit JOIN...ON syntax is preferred for readability and to avoid accidental cartesian products.", + suggestion: "Rewrite using explicit JOIN syntax: FROM table1 JOIN table2 ON ...", + confidence: 0.8, + }) + data.findings = findings + if (findings.length > 0) data.valid = false + } + } + return ok(data.valid !== false, data) } catch (e) { return fail(e) @@ -234,12 +490,59 @@ register("altimate_core.testgen", async (params) => { } }) -// 11. altimate_core.equivalence +// 11. altimate_core.equivalence — with normalization for = vs IN register("altimate_core.equivalence", async (params) => { try { const schema = schemaOrEmpty(params.schema_path, params.schema_context) + + // First try direct equivalence check const raw = await core.checkEquivalence(params.sql1, params.sql2, schema) const data = toData(raw) + + // If Rust says "not equivalent", try normalizing = X to IN (X) and vice versa + if (data.equivalent === false) { + const normalizeEqToIn = (sql: string): string => + sql.replace( + /(\w+)\s*=\s*('(?:[^'\\]|\\.)*'|\d+(?:\.\d+)?)\b/g, + "$1 IN ($2)", + ) + const normalizeInToEq = (sql: string): string => + sql.replace( + /(\w+)\s+IN\s*\(\s*('(?:[^'\\]|\\.)*'|\d+(?:\.\d+)?)\s*\)/gi, + "$1 = $2", + ) + + // Normalize both to IN form and re-check + const norm1 = normalizeEqToIn(params.sql1) + const norm2 = normalizeEqToIn(params.sql2) + if (norm1 !== params.sql1 || norm2 !== params.sql2) { + const retryRaw = await core.checkEquivalence(norm1, norm2, schema) + const retryData = toData(retryRaw) + if (retryData.equivalent) { + return ok(true, { + ...retryData, + equivalent: true, + normalization_applied: "= X ↔ IN (X)", + }) + } + } + + // Also try normalizing both to = form + const eq1 = normalizeInToEq(params.sql1) + const eq2 = normalizeInToEq(params.sql2) + if (eq1 !== params.sql1 || eq2 !== params.sql2) { + const retryRaw = await core.checkEquivalence(eq1, eq2, schema) + const retryData = toData(retryRaw) + if (retryData.equivalent) { + return ok(true, { + ...retryData, + equivalent: true, + normalization_applied: "IN (X) ↔ = X", + }) + } + } + } + return ok(data.equivalent !== false, data) } catch (e) { return fail(e) @@ -285,12 +588,32 @@ register("altimate_core.rewrite", async (params) => { } }) -// 15. altimate_core.correct +// 15. altimate_core.correct — with keyword typo correction register("altimate_core.correct", async (params) => { try { + // Pre-fix keyword typos before passing to Rust + const { sql: preprocessed, corrections } = fixKeywordTypos(params.sql) + const schema = schemaOrEmpty(params.schema_path, params.schema_context) - const raw = await core.correct(params.sql, schema) + const raw = await core.correct(preprocessed, schema) const data = toData(raw) + + // If we fixed keyword typos, merge that info + if (corrections.length > 0) { + data.original_sql = params.sql + if (data.status === "unfixable" && preprocessed !== params.sql) { + // Rust couldn't fix further, but keyword corrections succeeded + data.status = corrections.length > 0 ? "fixed" : data.status + data.corrected_sql = data.corrected_sql ?? preprocessed + } + data.changes = [ + ...corrections.map((c) => ({ + description: `Fixed keyword typo: ${c}`, + })), + ...((data.changes as any[]) ?? []), + ] + } + return ok(data.status !== "unfixable", data) } catch (e) { return fail(e) @@ -308,12 +631,54 @@ register("altimate_core.grade", async (params) => { } }) -// 17. altimate_core.classify_pii +// 17. altimate_core.classify_pii — with credit card pattern augmentation register("altimate_core.classify_pii", async (params) => { try { const schema = schemaOrEmpty(params.schema_path, params.schema_context) const raw = core.classifyPii(schema) - return ok(true, toData(raw)) + const data = toData(raw) + + // Augment: add credit card patterns that Rust PII classifier misses + const creditCardPatterns = [ + /credit.?card/i, /card.?number/i, /cc.?num/i, /card.?no/i, + /card.?last.?four/i, /last.?four/i, /card.?expir/i, /expiry.?date/i, + /cvv/i, /cvc/i, /card.?holder/i, /cardholder/i, /pan(?:_|\b)/i, + ] + + const columns = (data.columns as any[]) ?? [] + const existingCols = new Set(columns.map((c: any) => `${c.table}.${c.column}`)) + + // Scan schema tables for credit card column patterns + const schemaJson = schema.toJson() + if (schemaJson?.tables) { + for (const [tableName, tableDef] of Object.entries(schemaJson.tables)) { + const table = tableDef as any + for (const col of table.columns ?? []) { + const colKey = `${tableName}.${col.name}` + if (existingCols.has(colKey)) continue + + const isCreditCard = creditCardPatterns.some((p) => p.test(col.name)) + if (isCreditCard) { + columns.push({ + table: tableName, + column: col.name, + classification: "Financial", + confidence: 0.9, + detection_method: "column_name_pattern", + suggested_masking: "hash", + }) + existingCols.add(colKey) + } + } + } + } + + data.columns = columns + data.pii_count = columns.filter((c: any) => + c.classification !== "None" && c.confidence > 0, + ).length + + return ok(true, data) } catch (e) { return fail(e) } @@ -343,7 +708,7 @@ register("altimate_core.resolve_term", async (params) => { } }) -// 20. altimate_core.column_lineage +// 20. altimate_core.column_lineage — with full depth for CTE tracing register("altimate_core.column_lineage", async (params) => { try { const schema = resolveSchema(params.schema_path, params.schema_context) @@ -351,6 +716,9 @@ register("altimate_core.column_lineage", async (params) => { params.sql, params.dialect || undefined, schema ?? undefined, + params.default_database ?? undefined, + params.default_schema ?? undefined, + "full", // Ensure full depth for multi-hop CTE lineage tracing ) return ok(true, toData(raw)) } catch (e) { diff --git a/packages/opencode/src/altimate/native/sql/register.ts b/packages/opencode/src/altimate/native/sql/register.ts index 78b6ac5a2a..1f463a038e 100644 --- a/packages/opencode/src/altimate/native/sql/register.ts +++ b/packages/opencode/src/altimate/native/sql/register.ts @@ -9,7 +9,7 @@ import * as core from "@altimateai/altimate-core" import { register } from "../dispatcher" import { schemaOrEmpty, resolveSchema } from "../schema-resolver" -import { preprocessIff, postprocessQualify } from "../altimate-core" +import { preprocessIff, postprocessQualify, preprocessSnowflakeToBigQuery } from "../altimate-core" import type { SqlAnalyzeResult, SqlAnalyzeIssue, @@ -74,6 +74,46 @@ register("sql.analyze", async (params) => { }) } + // TypeScript-level rule: WINDOW_WITHOUT_PARTITION + // Detect window functions with empty OVER() or OVER () (no PARTITION BY) + const windowNoPartition = /\bOVER\s*\(\s*(?:ORDER\s+BY\b[^)]*)?(? + i.message.includes("WINDOW_WITHOUT_PARTITION") || i.message.includes("window") && i.message.includes("PARTITION"), + ) + if (!hasRule) { + issues.push({ + type: "lint", + severity: "warning", + message: "WINDOW_WITHOUT_PARTITION: Window function without PARTITION BY operates on the entire result set", + recommendation: "Add a PARTITION BY clause to limit the window scope, or confirm whole-table windowing is intended.", + confidence: "high", + }) + } + } + + // TypeScript-level rule: LARGE_IN_LIST + // Detect IN (...) with 20+ values + const inListPattern = /\bIN\s*\(([^)]+)\)/gi + let inMatch: RegExpExecArray | null + while ((inMatch = inListPattern.exec(params.sql)) !== null) { + const values = inMatch[1].split(",") + if (values.length >= 20) { + const hasRule = issues.some((i) => i.message.includes("LARGE_IN_LIST")) + if (!hasRule) { + issues.push({ + type: "lint", + severity: "info", + message: `LARGE_IN_LIST: IN clause contains ${values.length} values. Consider using a CTE with VALUES or a temporary table for better readability and performance.`, + recommendation: "Replace large IN lists with a CTE: WITH vals AS (SELECT * FROM VALUES (...)) SELECT ... WHERE col IN (SELECT * FROM vals)", + confidence: "high", + }) + break // Only report once + } + } + } + return { success: issues.length === 0, issues, @@ -98,13 +138,25 @@ register("sql.analyze", async (params) => { // --------------------------------------------------------------------------- register("sql.translate", async (params) => { try { - const processed = preprocessIff(params.sql) + const source = params.source_dialect.toLowerCase() + const target = params.target_dialect.toLowerCase() + + // Apply dialect-specific preprocessing before transpile + let processed = params.sql + if (source === "snowflake" && target === "bigquery") { + processed = preprocessSnowflakeToBigQuery(processed) + } else { + processed = preprocessIff(processed, target) + } + const raw = core.transpile(processed, params.source_dialect, params.target_dialect) const result = JSON.parse(JSON.stringify(raw)) let translatedSql = result.transpiled_sql?.[0] ?? "" - const target = params.target_dialect.toLowerCase() - if (["bigquery", "databricks", "spark", "trino"].includes(target)) { + + // Only wrap QUALIFY for targets that don't support it natively + // BigQuery supports QUALIFY natively since 2021 + if (["databricks", "spark", "trino"].includes(target)) { if (translatedSql.toUpperCase().includes("QUALIFY")) { translatedSql = postprocessQualify(translatedSql) } @@ -142,13 +194,25 @@ register("sql.optimize", async (params) => { const rewrite = JSON.parse(JSON.stringify(rewriteRaw)) const lint = JSON.parse(JSON.stringify(lintRaw)) - const suggestions: SqlOptimizeSuggestion[] = (rewrite.suggestions ?? []).map((s: any) => ({ - type: "REWRITE", - description: s.explanation ?? s.rule ?? "", - before: params.sql, - after: s.rewritten_sql, - impact: s.confidence > 0.7 ? "high" : s.confidence > 0.4 ? "medium" : "low", - })) + // TypeScript-level non-sargable rewrites + const tsRewrites = detectNonSargableRewrites(params.sql) + + const suggestions: SqlOptimizeSuggestion[] = [ + ...(rewrite.suggestions ?? []).map((s: any) => ({ + type: "REWRITE", + description: s.explanation ?? s.rule ?? "", + before: params.sql, + after: s.rewritten_sql, + impact: s.confidence > 0.7 ? "high" : s.confidence > 0.4 ? "medium" : "low", + })), + ...tsRewrites.map((r) => ({ + type: "REWRITE", + description: r.explanation, + before: r.original_fragment, + after: r.rewritten_fragment, + impact: r.confidence > 0.7 ? "high" : r.confidence > 0.4 ? "medium" : "low", + })), + ] const antiPatterns = (lint.findings ?? []).map((f: any) => ({ type: f.rule ?? "lint", @@ -159,7 +223,7 @@ register("sql.optimize", async (params) => { confidence: "high", })) - const bestRewrite = rewrite.suggestions?.[0]?.rewritten_sql + const bestRewrite = rewrite.suggestions?.[0]?.rewritten_sql ?? tsRewrites[0]?.rewritten_sql return { success: true, @@ -184,18 +248,91 @@ register("sql.optimize", async (params) => { // --------------------------------------------------------------------------- // sql.format // --------------------------------------------------------------------------- + +/** + * Pretty-print SQL by inserting line breaks before major clauses + * and indenting sub-clauses. Used when core.formatSql returns single-line. + */ +function prettyPrintSql(sql: string, indent: number = 2): string { + const pad = " ".repeat(indent) + + // Major clause keywords that start a new line (not indented) + const majorClauses = /\b(SELECT|FROM|WHERE|GROUP\s+BY|ORDER\s+BY|HAVING|LIMIT|OFFSET|UNION\s+ALL|UNION|INTERSECT|EXCEPT|INSERT\s+INTO|UPDATE|DELETE\s+FROM|SET|VALUES|RETURNING)\b/gi + + // Sub-clauses that should be indented + const subClauses = /\b(LEFT\s+JOIN|RIGHT\s+JOIN|INNER\s+JOIN|FULL\s+(?:OUTER\s+)?JOIN|CROSS\s+JOIN|JOIN|ON|AND|OR|WHEN|THEN|ELSE|END)\b/gi + + // First, normalize whitespace + let formatted = sql.replace(/\s+/g, " ").trim() + + // Break before major clauses + formatted = formatted.replace(majorClauses, "\n$1") + + // Break before sub-clauses and indent + formatted = formatted.replace(subClauses, (match) => { + const upper = match.toUpperCase().trim() + // AND/OR get indented under WHERE/ON + if (upper === "AND" || upper === "OR") return `\n${pad}${pad}${match}` + // WHEN/THEN/ELSE/END for CASE expressions + if (["WHEN", "THEN", "ELSE", "END"].includes(upper)) return `\n${pad}${pad}${match}` + // JOINs get single indent + return `\n${match}` + }) + + // Indent SELECT column list: break after commas in SELECT clause + const lines = formatted.split("\n") + const result: string[] = [] + let inSelect = false + for (const line of lines) { + const trimmed = line.trim() + if (!trimmed) continue + + if (/^SELECT\b/i.test(trimmed)) { + inSelect = true + // Split comma-separated columns onto separate lines + const afterSelect = trimmed.replace(/^SELECT\s+/i, "").trim() + if (afterSelect.includes(",")) { + result.push("SELECT") + const cols = afterSelect.split(",").map((c) => c.trim()) + for (let i = 0; i < cols.length; i++) { + result.push(`${pad}${cols[i]}${i < cols.length - 1 ? "," : ""}`) + } + } else { + result.push(trimmed) + } + continue + } + + if (/^(?:FROM|WHERE|GROUP|ORDER|HAVING|LIMIT|UNION|INSERT|UPDATE|DELETE|SET|VALUES)\b/i.test(trimmed)) { + inSelect = false + } + + result.push(trimmed) + } + + return result.join("\n") +} + register("sql.format", async (params) => { try { const raw = core.formatSql(params.sql, params.dialect) const result = JSON.parse(JSON.stringify(raw)) + let formattedSql = result.formatted_sql ?? params.sql + + // If the core formatter returned single-line SQL, apply pretty-printing + if (formattedSql && !formattedSql.includes("\n")) { + formattedSql = prettyPrintSql(formattedSql, params.indent ?? 2) + } + return { success: result.success ?? true, - formatted_sql: result.formatted_sql ?? params.sql, + formatted_sql: formattedSql, + statement_count: result.statement_count ?? 1, dialect: params.dialect ?? "generic", error: result.error, } } catch (e) { - return { success: false, formatted_sql: params.sql, dialect: params.dialect ?? "generic", error: String(e) } + return { success: false, formatted_sql: params.sql, statement_count: 0, dialect: params.dialect ?? "generic", error: String(e) } } }) @@ -326,51 +463,177 @@ register("sql.diff", async (params) => { : null const compare = compareRaw ? JSON.parse(JSON.stringify(compareRaw)) : null - // Simple line-based diff + // Token-aware diff: normalize whitespace and compare tokens + const normalizeForCompare = (s: string) => s.replace(/\s+/g, " ").trim() + const tokensA = normalizeForCompare(sqlA) + const tokensB = normalizeForCompare(sqlB) + + // Line-based diff for display const linesA = sqlA.split("\n") const linesB = sqlB.split("\n") const diffLines: string[] = [] + let additions = 0 + let deletions = 0 const maxLen = Math.max(linesA.length, linesB.length) for (let i = 0; i < maxLen; i++) { const a = linesA[i] ?? "" const b = linesB[i] ?? "" if (a !== b) { - if (a) diffLines.push(`- ${a}`) - if (b) diffLines.push(`+ ${b}`) + if (a) { diffLines.push(`- ${a}`); deletions++ } + if (b) { diffLines.push(`+ ${b}`); additions++ } + } else if (a) { + diffLines.push(` ${a}`) } } + // If both queries are on a single line, do a token-level comparison + if (diffLines.length === 0 && tokensA !== tokensB) { + diffLines.push(`- ${sqlA}`) + diffLines.push(`+ ${sqlB}`) + additions = 1 + deletions = 1 + } + + const hasChanges = tokensA !== tokensB + const changeCount = additions + deletions + + // Compute similarity ratio (Dice coefficient on character bigrams) + const bigrams = (s: string): Set => { + const set = new Set() + for (let i = 0; i < s.length - 1; i++) set.add(s.slice(i, i + 2)) + return set + } + const bA = bigrams(tokensA) + const bB = bigrams(tokensB) + let intersection = 0 + for (const b of bA) { if (bB.has(b)) intersection++ } + const similarity = bA.size + bB.size > 0 + ? (2 * intersection) / (bA.size + bB.size) + : 1.0 + return { success: true, - diff: diffLines.join("\n"), - equivalent: compare?.equivalent ?? false, - equivalence_confidence: compare?.confidence ?? 0, - differences: compare?.differences ?? [], + has_changes: hasChanges, + unified_diff: diffLines.join("\n"), + additions, + deletions, + change_count: changeCount, + similarity, + changes: compare?.differences ?? [], } } catch (e) { - return { success: false, diff: "", equivalent: false, equivalence_confidence: 0, differences: [], error: String(e) } + return { success: false, has_changes: false, unified_diff: "", additions: 0, deletions: 0, change_count: 0, similarity: 0, changes: [], error: String(e) } } }) // --------------------------------------------------------------------------- // sql.rewrite // --------------------------------------------------------------------------- + +/** + * Detect and rewrite non-sargable function-wrapped predicates in TypeScript. + * Covers YEAR(), MONTH(), DAY(), DATE() wrapped columns in WHERE clauses. + */ +function detectNonSargableRewrites(sql: string): Array<{ + rule: string + original_fragment: string + rewritten_fragment: string + rewritten_sql: string + explanation: string + confidence: number +}> { + const rewrites: Array<{ + rule: string + original_fragment: string + rewritten_fragment: string + rewritten_sql: string + explanation: string + confidence: number + }> = [] + + // Match YEAR(col) = NNNN pattern + const yearPattern = /\bYEAR\s*\(\s*([a-zA-Z_][\w.]*)\s*\)\s*=\s*(\d{4})\b/gi + let match: RegExpExecArray | null + let rewrittenSql = sql + while ((match = yearPattern.exec(sql)) !== null) { + const col = match[1] + const year = parseInt(match[2], 10) + const original = match[0] + const replacement = `${col} >= '${year}-01-01' AND ${col} < '${year + 1}-01-01'` + rewrittenSql = rewrittenSql.replace(original, replacement) + rewrites.push({ + rule: "NON_SARGABLE_PREDICATE", + original_fragment: original, + rewritten_fragment: replacement, + rewritten_sql: "", + explanation: `Predicate pushdown: YEAR(${col}) = ${year} → range predicate. Allows index/partition pruning.`, + confidence: 0.9, + }) + } + + // Match MONTH(col) = N pattern + const monthPattern = /\bMONTH\s*\(\s*([a-zA-Z_][\w.]*)\s*\)\s*=\s*(\d{1,2})\b/gi + while ((match = monthPattern.exec(sql)) !== null) { + const col = match[1] + const month = parseInt(match[2], 10) + const original = match[0] + const paddedMonth = String(month).padStart(2, "0") + const nextMonth = month === 12 ? "01" : String(month + 1).padStart(2, "0") + const yearOffset = month === 12 ? "+1 year" : "" + const replacement = `EXTRACT(MONTH FROM ${col}) = ${month} /* consider range predicate for partition pruning */` + rewrittenSql = rewrittenSql.replace(original, replacement) + rewrites.push({ + rule: "NON_SARGABLE_PREDICATE", + original_fragment: original, + rewritten_fragment: replacement, + rewritten_sql: "", + explanation: `Function-wrapped predicate MONTH(${col}) prevents index usage. Consider a range predicate if combined with a year filter.`, + confidence: 0.7, + }) + } + + // Set the full rewritten SQL on all entries + for (const r of rewrites) { + r.rewritten_sql = rewrittenSql + } + + return rewrites +} + register("sql.rewrite", async (params) => { try { const schema = schemaOrEmpty(params.schema_path, params.schema_context) const raw = core.rewrite(params.sql, schema) const result = JSON.parse(JSON.stringify(raw)) - return { - success: true, - original_sql: params.sql, - rewritten_sql: result.suggestions?.[0]?.rewritten_sql ?? null, - rewrites_applied: result.suggestions?.map((s: any) => ({ + + // Merge Rust suggestions with TypeScript-level rewrites + const coreSuggestions = result.suggestions ?? [] + const tsRewrites = detectNonSargableRewrites(params.sql) + + const allSuggestions = [ + ...coreSuggestions.map((s: any) => ({ rule: s.rule, original_fragment: params.sql, rewritten_fragment: s.rewritten_sql ?? params.sql, explanation: s.explanation ?? s.improvement ?? "", can_auto_apply: (s.confidence ?? 0) >= 0.7, - })) ?? [], + })), + ...tsRewrites.map((r) => ({ + rule: r.rule, + original_fragment: r.original_fragment, + rewritten_fragment: r.rewritten_fragment, + explanation: r.explanation, + can_auto_apply: r.confidence >= 0.7, + })), + ] + + const bestRewrite = coreSuggestions[0]?.rewritten_sql ?? tsRewrites[0]?.rewritten_sql ?? null + + return { + success: true, + original_sql: params.sql, + rewritten_sql: bestRewrite, + rewrites_applied: allSuggestions, } } catch (e) { return { success: false, original_sql: params.sql, rewritten_sql: null, rewrites_applied: [], error: String(e) } @@ -389,14 +652,109 @@ register("sql.schema_diff", async (params) => { const raw = core.diffSchemas(oldSchema, newSchema) const result = JSON.parse(JSON.stringify(raw)) - const changes = result.changes ?? [] - const hasBreaking = changes.some((c: any) => c.severity === "breaking") + // Transform SchemaChange tagged union into ColumnChange format + const rawChanges = result.changes ?? [] + const changes: Array<{ + column: string + change_type: string + severity: string + message: string + old_type?: string + new_type?: string + new_name?: string + }> = [] + + let dropped = 0, added = 0, typeChanged = 0, renamed = 0 + + for (const c of rawChanges) { + switch (c.type) { + case "column_removed": + dropped++ + changes.push({ + column: `${c.table}.${c.column}`, + change_type: "DROPPED", + severity: "breaking", + message: `Column '${c.table}.${c.column}' was removed`, + }) + break + case "column_added": + added++ + changes.push({ + column: `${c.table}.${c.column}`, + change_type: "ADDED", + severity: "info", + message: `Column '${c.table}.${c.column}' was added (${c.data_type})`, + }) + break + case "column_type_changed": + typeChanged++ + changes.push({ + column: `${c.table}.${c.column}`, + change_type: "TYPE_CHANGED", + severity: "breaking", + message: `Column '${c.table}.${c.column}' type changed: ${c.old_type} → ${c.new_type}`, + old_type: c.old_type, + new_type: c.new_type, + }) + break + case "nullability_changed": + changes.push({ + column: `${c.table}.${c.column}`, + change_type: "NULLABILITY_CHANGED", + severity: c.new_nullable ? "info" : "warning", + message: `Column '${c.table}.${c.column}' nullability changed: ${c.old_nullable ? "nullable" : "not null"} → ${c.new_nullable ? "nullable" : "not null"}`, + }) + break + case "table_added": + added++ + changes.push({ + column: c.table, + change_type: "TABLE_ADDED", + severity: "info", + message: `Table '${c.table}' was added`, + }) + break + case "table_removed": + dropped++ + changes.push({ + column: c.table, + change_type: "TABLE_REMOVED", + severity: "breaking", + message: `Table '${c.table}' was removed`, + }) + break + default: + changes.push({ + column: c.table ?? "", + change_type: c.type ?? "UNKNOWN", + severity: "warning", + message: c.description ?? JSON.stringify(c), + }) + } + } + + const hasBreaking = changes.some((c) => c.severity === "breaking") + + // Build summary from old/new schema + const oldColCount = oldSchema.tableNames().reduce( + (sum, t) => sum + (oldSchema.columnNames(t)?.length ?? 0), 0, + ) + const newColCount = newSchema.tableNames().reduce( + (sum, t) => sum + (newSchema.columnNames(t)?.length ?? 0), 0, + ) return { success: true, changes, has_breaking_changes: hasBreaking, - summary: result.summary ?? {}, + summary: { + old_column_count: oldColCount, + new_column_count: newColCount, + dropped, + added, + type_changed: typeChanged, + renamed, + }, error: undefined, } satisfies SchemaDiffResult } catch (e) { diff --git a/packages/opencode/src/altimate/tools/altimate-core-compare.ts b/packages/opencode/src/altimate/tools/altimate-core-compare.ts index d877eee608..27d0d6a05f 100644 --- a/packages/opencode/src/altimate/tools/altimate-core-compare.ts +++ b/packages/opencode/src/altimate/tools/altimate-core-compare.ts @@ -18,9 +18,11 @@ export const AltimateCoreCompareTool = Tool.define("altimate_core_compare", { dialect: args.dialect ?? "", }) const data = result.data as Record - const diffCount = data.differences?.length ?? 0 + const diffs = data.diffs ?? data.differences ?? [] + const diffCount = diffs.length + const isIdentical = data.identical ?? (diffCount === 0) return { - title: `Compare: ${diffCount === 0 ? "IDENTICAL" : `${diffCount} difference(s)`}`, + title: `Compare: ${isIdentical ? "IDENTICAL" : `${diffCount} difference(s)`}`, metadata: { success: result.success, difference_count: diffCount }, output: formatCompare(data), } @@ -33,10 +35,13 @@ export const AltimateCoreCompareTool = Tool.define("altimate_core_compare", { function formatCompare(data: Record): string { if (data.error) return `Error: ${data.error}` - if (!data.differences?.length) return "Queries are structurally identical." - const lines = ["Structural differences:\n"] - for (const d of data.differences) { - lines.push(` [${d.type ?? "change"}] ${d.description ?? d.message ?? d}`) + // CompareResult uses "diffs" field, not "differences" + const diffs = data.diffs ?? data.differences ?? [] + const isIdentical = data.identical ?? (diffs.length === 0) + if (isIdentical && diffs.length === 0) return "Queries are structurally identical." + const lines = [`Structural differences (${diffs.length}):\n`] + for (const d of diffs) { + lines.push(` [${d.change_type ?? d.type ?? "change"}] ${d.description ?? d.message ?? d}`) } return lines.join("\n") } diff --git a/packages/opencode/src/altimate/tools/altimate-core-correct.ts b/packages/opencode/src/altimate/tools/altimate-core-correct.ts index d2ef172f19..2318547388 100644 --- a/packages/opencode/src/altimate/tools/altimate-core-correct.ts +++ b/packages/opencode/src/altimate/tools/altimate-core-correct.ts @@ -37,11 +37,22 @@ function formatCorrect(data: Record): string { lines.push("Corrected SQL:") lines.push(data.corrected_sql) } - if (data.iterations) lines.push(`\nIterations: ${data.iterations}`) + // iterations is CorrectionIteration[] — serialize properly + if (data.iterations != null) { + if (Array.isArray(data.iterations)) { + lines.push(`\nIterations: ${data.iterations.length}`) + for (const iter of data.iterations) { + const desc = iter.fix_description ?? iter.result ?? "correction step" + lines.push(` ${iter.iteration ?? "-"}. ${desc}`) + } + } else if (typeof data.iterations === "number") { + lines.push(`\nIterations: ${data.iterations}`) + } + } if (data.changes?.length) { lines.push("\nCorrections applied:") for (const c of data.changes) { - lines.push(` - ${c.description ?? c}`) + lines.push(` - ${typeof c === "string" ? c : c.description ?? c.fix_description ?? JSON.stringify(c)}`) } } if (!data.corrected_sql && !data.changes?.length) { diff --git a/packages/opencode/src/altimate/tools/altimate-core-migration.ts b/packages/opencode/src/altimate/tools/altimate-core-migration.ts index 0fae1f80d1..731bc35b2d 100644 --- a/packages/opencode/src/altimate/tools/altimate-core-migration.ts +++ b/packages/opencode/src/altimate/tools/altimate-core-migration.ts @@ -18,9 +18,12 @@ export const AltimateCoreMigrationTool = Tool.define("altimate_core_migration", dialect: args.dialect ?? "", }) const data = result.data as Record - const riskCount = data.risks?.length ?? 0 + // MigrationResult uses "findings" not "risks" + const findings = data.findings ?? data.risks ?? [] + const riskCount = findings.length + const isSafe = data.safe ?? (riskCount === 0) return { - title: `Migration: ${riskCount === 0 ? "SAFE" : `${riskCount} risk(s)`}`, + title: `Migration: ${isSafe ? "SAFE" : `${riskCount} risk(s)`}`, metadata: { success: result.success, risk_count: riskCount }, output: formatMigration(data), } @@ -33,11 +36,24 @@ export const AltimateCoreMigrationTool = Tool.define("altimate_core_migration", function formatMigration(data: Record): string { if (data.error) return `Error: ${data.error}` - if (!data.risks?.length) return "Migration appears safe. No risks detected." - const lines = ["Migration risks:\n"] - for (const r of data.risks) { - lines.push(` [${r.severity ?? "warning"}] ${r.type}: ${r.message}`) - if (r.recommendation) lines.push(` Recommendation: ${r.recommendation}`) + // MigrationResult uses "findings" with MigrationFinding shape: + // { risk: MigrationRisk, operation: string, message: string, mitigation?: string, rollback_sql?: string } + const findings = data.findings ?? data.risks ?? [] + if (!findings.length) return "Migration appears safe. No risks detected." + + const lines: string[] = [] + if (data.overall_risk) lines.push(`Overall risk: ${data.overall_risk}`) + lines.push(`\nMigration risks (${findings.length}):\n`) + for (const r of findings) { + const severity = r.risk ?? r.severity ?? "warning" + const operation = r.operation ?? r.type ?? "change" + lines.push(` [${severity.toUpperCase()}] ${operation}: ${r.message}`) + if (r.mitigation ?? r.recommendation) { + lines.push(` Mitigation: ${r.mitigation ?? r.recommendation}`) + } + if (r.rollback_sql) { + lines.push(` Rollback: ${r.rollback_sql}`) + } } return lines.join("\n") } diff --git a/packages/opencode/src/altimate/tools/altimate-core-track-lineage.ts b/packages/opencode/src/altimate/tools/altimate-core-track-lineage.ts index 9e961b0b9b..483a005140 100644 --- a/packages/opencode/src/altimate/tools/altimate-core-track-lineage.ts +++ b/packages/opencode/src/altimate/tools/altimate-core-track-lineage.ts @@ -18,11 +18,13 @@ export const AltimateCoreTrackLineageTool = Tool.define("altimate_core_track_lin schema_context: args.schema_context, }) const data = result.data as Record - const edgeCount = data.edges?.length ?? 0 + // LineageResult has queries[].edges, not root-level edges + const allEdges = extractAllEdges(data) + const edgeCount = allEdges.length return { title: `Track Lineage: ${edgeCount} edge(s) across ${args.queries.length} queries`, metadata: { success: result.success, edge_count: edgeCount }, - output: formatTrackLineage(data), + output: formatTrackLineage(data, allEdges), } } catch (e) { const msg = e instanceof Error ? e.message : String(e) @@ -31,12 +33,64 @@ export const AltimateCoreTrackLineageTool = Tool.define("altimate_core_track_lin }, }) -function formatTrackLineage(data: Record): string { +/** + * Extract all edges from LineageResult's nested queries[].edges structure. + */ +function extractAllEdges(data: Record): Array> { + // Try root-level edges first (in case format changes) + if (data.edges?.length) return data.edges + + // Extract from queries[].edges (LineageResult structure) + const allEdges: Array> = [] + if (Array.isArray(data.queries)) { + for (const q of data.queries) { + for (const edge of q.edges ?? []) { + allEdges.push(edge) + } + } + } + + // Also include impact_map entries as cross-query edges + if (Array.isArray(data.impact_map)) { + for (const entry of data.impact_map) { + const src = entry.source + for (const affected of entry.affected ?? []) { + allEdges.push({ + source: src, + target: affected, + transform_type: "cross_query", + }) + } + } + } + + return allEdges +} + +function formatEdgeRef(ref: any): string { + if (typeof ref === "string") return ref + if (ref?.table && ref?.column) return `${ref.table}.${ref.column}` + return JSON.stringify(ref) +} + +function formatTrackLineage(data: Record, allEdges: Array>): string { if (data.error) return `Error: ${data.error}` - if (!data.edges?.length) return "No lineage edges found across queries." - const lines = ["Lineage graph:\n"] - for (const edge of data.edges) { - lines.push(` ${edge.source} -> ${edge.target}${edge.transform ? ` (${edge.transform})` : ""}`) + if (allEdges.length === 0) return "No lineage edges found across queries." + + const lines: string[] = [] + + // Show dependency order if available + if (data.dependency_order?.length) { + lines.push(`Dependency order: ${data.dependency_order.join(" → ")}`) + lines.push("") + } + + lines.push("Lineage graph:\n") + for (const edge of allEdges) { + const src = formatEdgeRef(edge.source) + const tgt = formatEdgeRef(edge.target) + const transform = edge.transform_type ?? edge.transform ?? "" + lines.push(` ${src} -> ${tgt}${transform ? ` (${transform})` : ""}`) } return lines.join("\n") } diff --git a/packages/opencode/src/altimate/tools/sql-fix.ts b/packages/opencode/src/altimate/tools/sql-fix.ts index 7bcfcb0673..432d5b8801 100644 --- a/packages/opencode/src/altimate/tools/sql-fix.ts +++ b/packages/opencode/src/altimate/tools/sql-fix.ts @@ -45,7 +45,7 @@ function formatFix(result: SqlFixResult): string { lines.push(`Error: ${result.error_message}`) lines.push("") - if (result.fixed_sql) { + if (result.success && result.fixed_sql && result.fixed_sql !== result.original_sql) { lines.push("=== Auto-Fixed SQL ===") lines.push(result.fixed_sql) lines.push("")