From 5fde8c72ede6a5e3c6285cecee4949eb62efc2a5 Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Thu, 21 May 2026 19:21:41 -0700 Subject: [PATCH] feat: scaffold @altimateai/mcp-data-agent with 20 tool stubs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds packages/mcp-data-agent/ — a stdio MCP server that will expose 20 curated SQL, dbt, FinOps, lineage, and PII tools from altimate-engine over the Model Context Protocol. Per docs/internal/2026-05-21-claude-code-plugin-mcp-spec.md. This is the contract surface only: - MCP server scaffolded with @modelcontextprotocol/sdk@^1.29.0 over stdio - 20 tool files, each declaring its zod input schema, description, and mutating/read-only annotation. Every handler currently throws NotImplementedError pointing at a tracking issue placeholder - Write gate: mutating tools (sql_execute, dbt_run) refuse to run unless ALTIMATE_MCP_ALLOW_WRITE=true. The refusal happens before the handler, so the "not implemented" stub is unreachable in read-only mode - bin/altimate-mcp shim mirrors the dbt-tools pattern - server.json manifest for the official MCP registry submission - Smoke tests: registry exports 20 tools, names are unique, tools/list over an in-memory MCP transport returns all 20, tools/call returns a not-implemented error for read tools, mutating tools refuse without the env gate - Workspace registration: packages/mcp-data-agent added to root package.json workspaces Local validation: - bun --cwd packages/mcp-data-agent run typecheck: pass - bun --cwd packages/mcp-data-agent test: 5/5 pass - bun build src/index.ts: produces dist/index.js - node bin/altimate-mcp over stdio: tools/list returns 20 tools - bun run script/upstream/analyze.ts --markers --base main --strict: pass (no upstream-shared files touched) Wiring tool handlers to altimate-engine is a separate workstream. --- bun.lock | 24 ++++ package.json | 3 +- packages/mcp-data-agent/.gitignore | 4 + packages/mcp-data-agent/README.md | 63 ++++++++++ packages/mcp-data-agent/bin/altimate-mcp | 2 + packages/mcp-data-agent/package.json | 49 ++++++++ packages/mcp-data-agent/server.json | 28 +++++ packages/mcp-data-agent/src/auth.ts | 29 +++++ packages/mcp-data-agent/src/index.ts | 17 +++ packages/mcp-data-agent/src/server.ts | 119 ++++++++++++++++++ .../src/tools/account_usage_query.ts | 46 +++++++ .../src/tools/data_parity_check.ts | 31 +++++ .../mcp-data-agent/src/tools/dbt_compile.ts | 27 ++++ packages/mcp-data-agent/src/tools/dbt_diff.ts | 26 ++++ .../src/tools/dbt_impact_analyze.ts | 20 +++ .../mcp-data-agent/src/tools/dbt_lineage.ts | 31 +++++ packages/mcp-data-agent/src/tools/dbt_run.ts | 25 ++++ packages/mcp-data-agent/src/tools/dbt_test.ts | 21 ++++ .../src/tools/finops_anomaly_scan.ts | 25 ++++ .../src/tools/finops_clustering_roi.ts | 26 ++++ .../src/tools/finops_credits_summary.ts | 30 +++++ .../src/tools/finops_expensive_queries.ts | 21 ++++ .../src/tools/finops_unused_resources.ts | 25 ++++ .../src/tools/finops_warehouse_advice.ts | 22 ++++ packages/mcp-data-agent/src/tools/index.ts | 47 +++++++ packages/mcp-data-agent/src/tools/pii_scan.ts | 27 ++++ .../src/tools/query_history_search.ts | 36 ++++++ .../src/tools/schema_introspect.ts | 20 +++ .../mcp-data-agent/src/tools/sql_analyze.ts | 30 +++++ .../mcp-data-agent/src/tools/sql_execute.ts | 37 ++++++ .../mcp-data-agent/src/tools/sql_explain.ts | 34 +++++ packages/mcp-data-agent/test/server.test.ts | 70 +++++++++++ packages/mcp-data-agent/tsconfig.json | 7 ++ 33 files changed, 1021 insertions(+), 1 deletion(-) create mode 100644 packages/mcp-data-agent/.gitignore create mode 100644 packages/mcp-data-agent/README.md create mode 100755 packages/mcp-data-agent/bin/altimate-mcp create mode 100644 packages/mcp-data-agent/package.json create mode 100644 packages/mcp-data-agent/server.json create mode 100644 packages/mcp-data-agent/src/auth.ts create mode 100644 packages/mcp-data-agent/src/index.ts create mode 100644 packages/mcp-data-agent/src/server.ts create mode 100644 packages/mcp-data-agent/src/tools/account_usage_query.ts create mode 100644 packages/mcp-data-agent/src/tools/data_parity_check.ts create mode 100644 packages/mcp-data-agent/src/tools/dbt_compile.ts create mode 100644 packages/mcp-data-agent/src/tools/dbt_diff.ts create mode 100644 packages/mcp-data-agent/src/tools/dbt_impact_analyze.ts create mode 100644 packages/mcp-data-agent/src/tools/dbt_lineage.ts create mode 100644 packages/mcp-data-agent/src/tools/dbt_run.ts create mode 100644 packages/mcp-data-agent/src/tools/dbt_test.ts create mode 100644 packages/mcp-data-agent/src/tools/finops_anomaly_scan.ts create mode 100644 packages/mcp-data-agent/src/tools/finops_clustering_roi.ts create mode 100644 packages/mcp-data-agent/src/tools/finops_credits_summary.ts create mode 100644 packages/mcp-data-agent/src/tools/finops_expensive_queries.ts create mode 100644 packages/mcp-data-agent/src/tools/finops_unused_resources.ts create mode 100644 packages/mcp-data-agent/src/tools/finops_warehouse_advice.ts create mode 100644 packages/mcp-data-agent/src/tools/index.ts create mode 100644 packages/mcp-data-agent/src/tools/pii_scan.ts create mode 100644 packages/mcp-data-agent/src/tools/query_history_search.ts create mode 100644 packages/mcp-data-agent/src/tools/schema_introspect.ts create mode 100644 packages/mcp-data-agent/src/tools/sql_analyze.ts create mode 100644 packages/mcp-data-agent/src/tools/sql_execute.ts create mode 100644 packages/mcp-data-agent/src/tools/sql_explain.ts create mode 100644 packages/mcp-data-agent/test/server.test.ts create mode 100644 packages/mcp-data-agent/tsconfig.json diff --git a/bun.lock b/bun.lock index b21e475280..24a38b9b91 100644 --- a/bun.lock +++ b/bun.lock @@ -55,6 +55,22 @@ "snowflake-sdk": "^2.0.3", }, }, + "packages/mcp-data-agent": { + "name": "@altimateai/mcp-data-agent", + "version": "0.1.0", + "bin": { + "altimate-mcp": "./bin/altimate-mcp", + }, + "dependencies": { + "@modelcontextprotocol/sdk": "^1.29.0", + "zod": "catalog:", + }, + "devDependencies": { + "@tsconfig/bun": "catalog:", + "@types/bun": "catalog:", + "typescript": "catalog:", + }, + }, "packages/opencode": { "name": "@altimateai/altimate-code", "version": "1.2.20", @@ -395,6 +411,8 @@ "@altimateai/drivers": ["@altimateai/drivers@workspace:packages/drivers"], + "@altimateai/mcp-data-agent": ["@altimateai/mcp-data-agent@workspace:packages/mcp-data-agent"], + "@ampproject/remapping": ["@ampproject/remapping@2.3.0", "", { "dependencies": { "@jridgewell/gen-mapping": "^0.3.5", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw=="], "@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.71.2", "", { "dependencies": { "json-schema-to-ts": "^3.1.1" }, "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" }, "optionalPeers": ["zod"], "bin": { "anthropic-ai-sdk": "bin/cli" } }, "sha512-TGNDEUuEstk/DKu0/TflXAEt+p+p/WhTlFzEnoosvbaDU2LTjm42igSdlL0VijrKpWejtOKxX0b8A7uc+XiSAQ=="], @@ -2659,6 +2677,8 @@ "@altimateai/dbt-integration/@altimateai/altimate-core": ["@altimateai/altimate-core@0.1.6", "", { "optionalDependencies": { "@altimateai/altimate-core-darwin-arm64": "0.1.6", "@altimateai/altimate-core-darwin-x64": "0.1.6", "@altimateai/altimate-core-linux-arm64-gnu": "0.1.6", "@altimateai/altimate-core-linux-x64-gnu": "0.1.6", "@altimateai/altimate-core-win32-x64-msvc": "0.1.6" } }, "sha512-Kl0hjT88Q56AdGxKJyCcPElxcpZYDYmLhDHK7ZeZIn2oVaXyynExLcIHn+HktUe9USuWtba3tZA/52jJsMyrGg=="], + "@altimateai/mcp-data-agent/@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.29.0", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ=="], + "@aws-crypto/sha1-browser/@smithy/util-utf8": ["@smithy/util-utf8@2.3.0", "", { "dependencies": { "@smithy/util-buffer-from": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A=="], "@aws-crypto/sha256-browser/@smithy/util-utf8": ["@smithy/util-utf8@2.3.0", "", { "dependencies": { "@smithy/util-buffer-from": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A=="], @@ -3239,6 +3259,10 @@ "@altimateai/dbt-integration/@altimateai/altimate-core/@altimateai/altimate-core-win32-x64-msvc": ["@altimateai/altimate-core-win32-x64-msvc@0.1.6", "", { "os": "win32", "cpu": "x64" }, "sha512-6Sbneg0DLHMmo1lDVd9oDgGtqPJpDUXZvXwAbGb7eoh+vUmXMxABA43//hBbwkMVsWKClKjv1KXSKp44shrUiw=="], + "@altimateai/mcp-data-agent/@modelcontextprotocol/sdk/hono": ["hono@4.12.9", "", {}, "sha512-wy3T8Zm2bsEvxKZM5w21VdHDDcwVS1yUFFY6i8UobSsKfFceT7TOwhbhfKsDyx7tYQlmRM5FLpIuYvNFyjctiA=="], + + "@altimateai/mcp-data-agent/@modelcontextprotocol/sdk/zod-to-json-schema": ["zod-to-json-schema@3.25.2", "", { "peerDependencies": { "zod": "^3.25.28 || ^4" } }, "sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA=="], + "@aws-crypto/sha1-browser/@smithy/util-utf8/@smithy/util-buffer-from": ["@smithy/util-buffer-from@2.2.0", "", { "dependencies": { "@smithy/is-array-buffer": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA=="], "@aws-crypto/sha256-browser/@smithy/util-utf8/@smithy/util-buffer-from": ["@smithy/util-buffer-from@2.2.0", "", { "dependencies": { "@smithy/is-array-buffer": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA=="], diff --git a/package.json b/package.json index b808d08a6a..ed0ed44a58 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,8 @@ "packages/util", "packages/sdk/js", "packages/dbt-tools", - "packages/drivers" + "packages/drivers", + "packages/mcp-data-agent" ], "catalog": { "@types/bun": "1.3.9", diff --git a/packages/mcp-data-agent/.gitignore b/packages/mcp-data-agent/.gitignore new file mode 100644 index 0000000000..e0fc267f5b --- /dev/null +++ b/packages/mcp-data-agent/.gitignore @@ -0,0 +1,4 @@ +dist/ +node_modules/ +*.log +.DS_Store diff --git a/packages/mcp-data-agent/README.md b/packages/mcp-data-agent/README.md new file mode 100644 index 0000000000..da2bcf8d01 --- /dev/null +++ b/packages/mcp-data-agent/README.md @@ -0,0 +1,63 @@ +# @altimateai/mcp-data-agent + +A local-first [MCP](https://modelcontextprotocol.io) server that exposes 20 curated data-engineering tools — SQL analysis, dbt workflow, FinOps cost intelligence, lineage, and PII detection — to any MCP-compatible client (Claude Code, Claude Desktop, Cursor, Windsurf, Goose, Cline). The server runs on your machine and talks to your warehouse directly. + +## Install + +The package is `npx`-installable and runs over stdio. To wire it into Claude Code, add to your project's `.mcp.json`: + +```json +{ + "mcpServers": { + "altimate-data": { + "command": "npx", + "args": ["-y", "@altimateai/mcp-data-agent@latest"] + } + } +} +``` + +Cursor, Windsurf, and Claude Desktop use the same shape — see each client's MCP docs for the exact config file location. + +## Configuration + +All configuration is via environment variables. There is no file-based config in v1. + +| Variable | Purpose | +| --- | --- | +| `SNOWFLAKE_ACCOUNT`, `SNOWFLAKE_USER`, `SNOWFLAKE_PASSWORD` | Snowflake credentials (or use OAuth). | +| `ALTIMATE_API_KEY` | Optional, only required for hosted services. | +| `ALTIMATE_MCP_ALLOW_WRITE` | Set to `true` to enable mutating tools (`sql_execute` with mutating SQL, `dbt_run`). Defaults to `false`. | + +## Security + +The server runs locally. No query text, schema metadata, or warehouse data leaves your machine. Your warehouse credentials are read from your environment, your LLM endpoint is whatever the client is already configured to use. Write tools are gated behind an explicit opt-in env var. + +## Available tools + +| Tool | Purpose | +| --- | --- | +| `sql_execute` | Run a SQL query and return rows (mutating SQL gated by `ALTIMATE_MCP_ALLOW_WRITE`). | +| `sql_analyze` | Static SQL anti-pattern detection with severity-ranked findings. | +| `sql_explain` | Return the warehouse EXPLAIN plan as JSON. | +| `schema_introspect` | Inspect tables, views, and schemas. | +| `dbt_compile` | Compile a dbt model or arbitrary Jinja SQL. | +| `dbt_run` | Materialize models (write-gated). | +| `dbt_test` | Run dbt tests and return failing-row samples. | +| `dbt_lineage` | Model- and column-level lineage from the dbt manifest. | +| `dbt_impact_analyze` | Classify downstream impact of a model change as BREAKING / SAFE / UNKNOWN. | +| `dbt_diff` | Row-level diff between two materializations of a model. | +| `finops_credits_summary` | Credit consumption grouped by warehouse / role / user. | +| `finops_expensive_queries` | Top-N most expensive queries by credits / bytes / elapsed. | +| `finops_warehouse_advice` | Auto-suspend, cluster count, and size recommendations for one warehouse. | +| `finops_unused_resources` | Dormant tables, idle warehouses, unused materialized views. | +| `finops_anomaly_scan` | Day-over-day and week-over-week cost spike detection. | +| `finops_clustering_roi` | Reclustering credits vs query-time savings per table. | +| `query_history_search` | Query history filtered by user, role, table reference, regex, or time range. | +| `pii_scan` | Heuristic PII detection on columns by name and sample values. | +| `data_parity_check` | Row-level parity check between two tables. | +| `account_usage_query` | Parameterized access to warehouse observability views. | + +## Status + +Alpha. The 20 tools are scaffolded with the schemas they will eventually expose, but every handler currently throws "not yet wired to altimate-engine". Wiring is in progress — see the [altimate-code repo](https://github.com/AltimateAI/altimate-code) for status. diff --git a/packages/mcp-data-agent/bin/altimate-mcp b/packages/mcp-data-agent/bin/altimate-mcp new file mode 100755 index 0000000000..489c4a0367 --- /dev/null +++ b/packages/mcp-data-agent/bin/altimate-mcp @@ -0,0 +1,2 @@ +#!/usr/bin/env node +import("../dist/index.js") diff --git a/packages/mcp-data-agent/package.json b/packages/mcp-data-agent/package.json new file mode 100644 index 0000000000..b9b005f26a --- /dev/null +++ b/packages/mcp-data-agent/package.json @@ -0,0 +1,49 @@ +{ + "name": "@altimateai/mcp-data-agent", + "version": "0.1.0", + "description": "Local-first MCP server exposing SQL, dbt, FinOps, lineage, and PII tools for data engineering. Query text stays on your machine.", + "type": "module", + "private": false, + "license": "MIT", + "repository": { + "type": "git", + "url": "https://github.com/AltimateAI/altimate-code", + "directory": "packages/mcp-data-agent" + }, + "homepage": "https://altimate.ai", + "keywords": [ + "mcp", + "model-context-protocol", + "dbt", + "snowflake", + "bigquery", + "databricks", + "finops", + "sql", + "lineage", + "data-engineering" + ], + "bin": { + "altimate-mcp": "./bin/altimate-mcp" + }, + "files": [ + "bin", + "dist", + "server.json", + "README.md" + ], + "scripts": { + "build": "bun build src/index.ts --outdir dist --target node --format esm", + "typecheck": "tsc --noEmit", + "test": "bun test" + }, + "dependencies": { + "@modelcontextprotocol/sdk": "^1.29.0", + "zod": "catalog:" + }, + "devDependencies": { + "@tsconfig/bun": "catalog:", + "@types/bun": "catalog:", + "typescript": "catalog:" + } +} diff --git a/packages/mcp-data-agent/server.json b/packages/mcp-data-agent/server.json new file mode 100644 index 0000000000..8deae01187 --- /dev/null +++ b/packages/mcp-data-agent/server.json @@ -0,0 +1,28 @@ +{ + "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json", + "name": "io.github.altimateai/altimate-code-data-agent", + "title": "Altimate Code Data Agent", + "description": "SQL, dbt, FinOps, lineage tools for data engineering. Local-first — query text never leaves your machine. Works with Snowflake, BigQuery, Databricks, Postgres, Redshift, MySQL, SQL Server, Oracle, DuckDB, SQLite.", + "version": "0.1.0", + "repository": { + "url": "https://github.com/AltimateAI/altimate-code", + "source": "github" + }, + "websiteUrl": "https://altimate.ai", + "packages": [ + { + "registryType": "npm", + "identifier": "@altimateai/mcp-data-agent", + "version": "0.1.0", + "transport": { "type": "stdio" }, + "runtimeHint": "npx", + "environmentVariables": [ + { "name": "SNOWFLAKE_ACCOUNT", "description": "Snowflake account locator", "isRequired": false }, + { "name": "SNOWFLAKE_USER", "description": "Snowflake username", "isRequired": false }, + { "name": "SNOWFLAKE_PASSWORD", "description": "Snowflake password or PAT", "isSecret": true, "isRequired": false }, + { "name": "ALTIMATE_API_KEY", "description": "Optional Altimate API key for hosted services", "isSecret": true, "isRequired": false }, + { "name": "ALTIMATE_MCP_ALLOW_WRITE", "description": "Set true to enable write tools (sql_execute mutating SQL, dbt_run). Default false (read-only).", "isRequired": false } + ] + } + ] +} diff --git a/packages/mcp-data-agent/src/auth.ts b/packages/mcp-data-agent/src/auth.ts new file mode 100644 index 0000000000..d4a9b69d64 --- /dev/null +++ b/packages/mcp-data-agent/src/auth.ts @@ -0,0 +1,29 @@ +/** + * Environment-variable based auth + the write-gate. + * + * Phase 1 keeps the surface trivially auditable: no file-based config, no + * embedded secrets, no network round-trip to resolve credentials. Every value + * comes from `process.env`, and mutating tools require an explicit opt-in + * through `ALTIMATE_MCP_ALLOW_WRITE=true`. + */ + +const TRUTHY = new Set(["1", "true", "yes", "on"]) + +export function isWriteAllowed(env: NodeJS.ProcessEnv = process.env): boolean { + const raw = env.ALTIMATE_MCP_ALLOW_WRITE + if (!raw) return false + return TRUTHY.has(raw.trim().toLowerCase()) +} + +export class WriteNotAllowedError extends Error { + constructor(toolName: string) { + super( + `${toolName}: write operations are disabled. Set ALTIMATE_MCP_ALLOW_WRITE=true to enable mutating tools.`, + ) + this.name = "WriteNotAllowedError" + } +} + +export function assertWriteAllowed(toolName: string, env: NodeJS.ProcessEnv = process.env): void { + if (!isWriteAllowed(env)) throw new WriteNotAllowedError(toolName) +} diff --git a/packages/mcp-data-agent/src/index.ts b/packages/mcp-data-agent/src/index.ts new file mode 100644 index 0000000000..dfa0f730d0 --- /dev/null +++ b/packages/mcp-data-agent/src/index.ts @@ -0,0 +1,17 @@ +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" +import { createServer } from "./server.js" + +async function main(): Promise { + const server = await createServer() + const transport = new StdioServerTransport() + await server.connect(transport) +} + +main().catch((err) => { + // The MCP client can't receive structured errors here — the transport has + // not yet been negotiated. Write to stderr and exit non-zero so the parent + // process (claude-code, cursor, etc.) shows the failure in its UI. + const message = err instanceof Error ? err.stack ?? err.message : String(err) + process.stderr.write(`altimate-mcp: fatal: ${message}\n`) + process.exit(1) +}) diff --git a/packages/mcp-data-agent/src/server.ts b/packages/mcp-data-agent/src/server.ts new file mode 100644 index 0000000000..dd67478b12 --- /dev/null +++ b/packages/mcp-data-agent/src/server.ts @@ -0,0 +1,119 @@ +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import type { z, ZodRawShape } from "zod" +import { assertWriteAllowed } from "./auth.js" + +/** + * Shape used by every tool file. We keep the input schema as a Zod raw shape + * (a `{ key: ZodType }` map) because that is what `McpServer.registerTool` + * consumes directly — it builds the JSON Schema sent to clients from it. + */ +export interface ToolDefinition { + name: string + description: string + mutating: boolean + input: Shape + handler: (input: InferShape) => Promise +} + +export type InferShape = { + [K in keyof Shape]: z.infer +} + +export interface ToolResult { + /** Human-readable text the model can read. */ + text: string + /** Optional structured payload returned alongside the text. */ + data?: Record + /** When true, the call surfaces as an error to the client. */ + isError?: boolean +} + +/** + * Helper used by each tool file to declare itself. Keeping this trivial — it + * is essentially an identity function that pins down types per-tool. The + * registration with the McpServer happens in `register()` below, which + * widens the shape because the tools registry holds a heterogeneous list. + */ +export function defineTool(def: ToolDefinition): ToolDefinition { + return def +} + +/** + * Tracking issue placeholder used by every stub. Replaced with the real issue + * URL once wiring lands. + */ +export const NOT_IMPLEMENTED_ISSUE = "https://github.com/AltimateAI/altimate-code/issues/TBD" + +export class NotImplementedError extends Error { + constructor(toolName: string) { + super(`${toolName}: not yet wired to altimate-engine. Track at ${NOT_IMPLEMENTED_ISSUE}`) + this.name = "NotImplementedError" + } +} + +/** + * Register one tool with an `McpServer`. Mutating tools are wrapped so they + * refuse to run unless `ALTIMATE_MCP_ALLOW_WRITE=true` — the refusal happens + * before the handler executes, so a stub that throws "not implemented" never + * runs in write-disallowed mode either. + * + * The function is intentionally typed loosely (`ToolDefinition` without a + * shape parameter) so we can iterate over a heterogeneous list of tools + * without TypeScript collapsing them into one shape. + */ +export function register(server: McpServer, tool: ToolDefinition): void { + // The SDK callback type is generic over the shape; we lose precision here + // on purpose because the registry holds tools with different shapes. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const handler = async (args: any) => { + try { + if (tool.mutating) assertWriteAllowed(tool.name) + const result = await tool.handler(args) + return { + content: [{ type: "text" as const, text: result.text }], + ...(result.data !== undefined ? { structuredContent: result.data } : {}), + isError: result.isError === true, + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + return { + content: [{ type: "text" as const, text: message }], + isError: true, + } + } + } + + server.registerTool( + tool.name, + { + description: tool.description, + inputSchema: tool.input, + annotations: { + readOnlyHint: !tool.mutating, + destructiveHint: tool.mutating, + }, + }, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + handler as any, + ) +} + +/** + * Build a fresh `McpServer` and register every tool from the registry. Kept + * separate from `index.ts` so tests can spin up a server without taking over + * stdio. + */ +export async function createServer(): Promise { + const server = new McpServer( + { + name: "io.github.altimateai/altimate-code-data-agent", + version: "0.1.0", + }, + { + capabilities: { tools: {} }, + }, + ) + const { tools } = await import("./tools/index.js") + for (const tool of tools) register(server, tool as unknown as ToolDefinition) + return server +} diff --git a/packages/mcp-data-agent/src/tools/account_usage_query.ts b/packages/mcp-data-agent/src/tools/account_usage_query.ts new file mode 100644 index 0000000000..1e5c300611 --- /dev/null +++ b/packages/mcp-data-agent/src/tools/account_usage_query.ts @@ -0,0 +1,46 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const accountUsageQuery = defineTool({ + name: "account_usage_query", + description: + "Run a parameterized query against the warehouse observability layer (Snowflake ACCOUNT_USAGE, BigQuery INFORMATION_SCHEMA.JOBS, Databricks system tables, etc.) using a curated set of named views. Use when the canned FinOps tools do not cover the question and you need raw access to billing / metadata data. Read-only.", + mutating: false, + input: { + view: z + .enum([ + "query_history", + "warehouse_metering_history", + "warehouse_load_history", + "automatic_clustering_history", + "materialized_view_refresh_history", + "pipe_usage_history", + "search_optimization_history", + "serverless_task_history", + "storage_usage", + "table_storage_metrics", + ]) + .describe("Named observability view to query."), + filters: z + .record(z.string(), z.union([z.string(), z.number(), z.boolean()])) + .optional() + .describe("Column-equality filters applied as a WHERE clause. Values are bound as parameters."), + days: z + .number() + .int() + .positive() + .max(365) + .optional() + .describe("Trailing window applied to the view's primary timestamp column. Defaults to 7."), + limit: z + .number() + .int() + .positive() + .max(10_000) + .optional() + .describe("Row cap. Defaults to 500."), + }, + handler: async () => { + throw new NotImplementedError("account_usage_query") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/data_parity_check.ts b/packages/mcp-data-agent/src/tools/data_parity_check.ts new file mode 100644 index 0000000000..27c4c713f1 --- /dev/null +++ b/packages/mcp-data-agent/src/tools/data_parity_check.ts @@ -0,0 +1,31 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const dataParityCheck = defineTool({ + name: "data_parity_check", + description: + "Compare two tables across warehouses or schemas for parity: row count, column-level checksums, and sampled-value drift. Use to validate migrations, replication, and dual-writes. Read-only — issues SELECTs only.", + mutating: false, + input: { + leftRelation: z.string().describe("Fully-qualified left relation (e.g. 'prod_db.analytics.fct_orders')."), + rightRelation: z.string().describe("Fully-qualified right relation."), + primaryKey: z + .array(z.string()) + .min(1) + .describe("Primary key columns used to align rows."), + columns: z + .array(z.string()) + .optional() + .describe("Specific columns to check. When omitted, compares every column present on both sides."), + sampleLimit: z + .number() + .int() + .positive() + .max(10_000) + .optional() + .describe("Max differing rows to surface in the response. Defaults to 100."), + }, + handler: async () => { + throw new NotImplementedError("data_parity_check") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/dbt_compile.ts b/packages/mcp-data-agent/src/tools/dbt_compile.ts new file mode 100644 index 0000000000..1f082d3423 --- /dev/null +++ b/packages/mcp-data-agent/src/tools/dbt_compile.ts @@ -0,0 +1,27 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const dbtCompile = defineTool({ + name: "dbt_compile", + description: + "Compile a dbt model (or arbitrary Jinja SQL) into the final SQL the warehouse would receive. Resolves refs, sources, and macros. Read-only — does not execute the model.", + mutating: false, + input: { + model: z + .string() + .optional() + .describe("dbt model name to compile (e.g. 'fct_orders'). Mutually exclusive with `sql`."), + sql: z + .string() + .optional() + .describe("Raw Jinja SQL to compile against the dbt project context. Mutually exclusive with `model`."), + projectDir: z + .string() + .optional() + .describe("Path to the dbt project root. Defaults to the current working directory."), + target: z.string().optional().describe("dbt target profile name to compile against."), + }, + handler: async () => { + throw new NotImplementedError("dbt_compile") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/dbt_diff.ts b/packages/mcp-data-agent/src/tools/dbt_diff.ts new file mode 100644 index 0000000000..7f2fbf72a7 --- /dev/null +++ b/packages/mcp-data-agent/src/tools/dbt_diff.ts @@ -0,0 +1,26 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const dbtDiff = defineTool({ + name: "dbt_diff", + description: + "Compare two materializations of the same dbt model (e.g. prod vs a dev branch build) row-by-row and column-by-column. Returns row count delta, value mismatches per column, and a small sample of differing rows. Read-only.", + mutating: false, + input: { + model: z.string().describe("dbt model name to diff."), + baseRelation: z + .string() + .describe("Fully-qualified baseline relation (e.g. 'analytics_prod.fct_orders')."), + targetRelation: z + .string() + .describe("Fully-qualified target relation (e.g. 'analytics_dev_pr123.fct_orders')."), + primaryKey: z + .array(z.string()) + .min(1) + .describe("Primary key columns used to align rows for comparison."), + sampleLimit: z.number().int().positive().max(1000).optional().describe("Max differing rows to return."), + }, + handler: async () => { + throw new NotImplementedError("dbt_diff") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/dbt_impact_analyze.ts b/packages/mcp-data-agent/src/tools/dbt_impact_analyze.ts new file mode 100644 index 0000000000..1b94b9c638 --- /dev/null +++ b/packages/mcp-data-agent/src/tools/dbt_impact_analyze.ts @@ -0,0 +1,20 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const dbtImpactAnalyze = defineTool({ + name: "dbt_impact_analyze", + description: + "Given a proposed change to a dbt model (added/removed/renamed columns, materialization change, filter change), classify the downstream impact across the dbt DAG into BREAKING, SAFE, and UNKNOWN buckets. Use before opening a PR that modifies a high-traffic model.", + mutating: false, + input: { + model: z.string().describe("dbt model name being changed."), + diffSql: z + .string() + .optional() + .describe("Optional new SQL for the model. When omitted, compares HEAD against the working tree."), + projectDir: z.string().optional().describe("Path to the dbt project root."), + }, + handler: async () => { + throw new NotImplementedError("dbt_impact_analyze") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/dbt_lineage.ts b/packages/mcp-data-agent/src/tools/dbt_lineage.ts new file mode 100644 index 0000000000..c63a76568a --- /dev/null +++ b/packages/mcp-data-agent/src/tools/dbt_lineage.ts @@ -0,0 +1,31 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const dbtLineage = defineTool({ + name: "dbt_lineage", + description: + "Return the model-level or column-level lineage for a dbt model: upstream sources, downstream consumers, and (optionally) the column-to-column edges parsed from compiled SQL. Read-only, local — uses the project manifest, no network egress.", + mutating: false, + input: { + model: z.string().describe("dbt model name to compute lineage for."), + direction: z + .enum(["upstream", "downstream", "both"]) + .optional() + .describe("Lineage direction. Defaults to 'both'."), + columnLevel: z + .boolean() + .optional() + .describe("When true, returns column-level lineage edges (requires SQL parsing — slower)."), + depth: z + .number() + .int() + .positive() + .max(20) + .optional() + .describe("How many hops to traverse. Defaults to unlimited."), + projectDir: z.string().optional().describe("Path to the dbt project root."), + }, + handler: async () => { + throw new NotImplementedError("dbt_lineage") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/dbt_run.ts b/packages/mcp-data-agent/src/tools/dbt_run.ts new file mode 100644 index 0000000000..f7dc4f0707 --- /dev/null +++ b/packages/mcp-data-agent/src/tools/dbt_run.ts @@ -0,0 +1,25 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const dbtRun = defineTool({ + name: "dbt_run", + description: + "Execute `dbt run` for a selected model or selector. Materializes data in the warehouse. Refused unless ALTIMATE_MCP_ALLOW_WRITE=true is set. Returns per-model status, row counts, and elapsed time.", + mutating: true, + input: { + select: z + .string() + .optional() + .describe("dbt selector (model name, +downstream, tag:..., etc.). Defaults to running the full project."), + fullRefresh: z.boolean().optional().describe("Pass --full-refresh to dbt."), + projectDir: z.string().optional().describe("Path to the dbt project root."), + target: z.string().optional().describe("dbt target profile name."), + vars: z + .record(z.string(), z.unknown()) + .optional() + .describe("Vars passed to dbt as --vars. Use sparingly — values appear in dbt logs."), + }, + handler: async () => { + throw new NotImplementedError("dbt_run") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/dbt_test.ts b/packages/mcp-data-agent/src/tools/dbt_test.ts new file mode 100644 index 0000000000..effcc454ae --- /dev/null +++ b/packages/mcp-data-agent/src/tools/dbt_test.ts @@ -0,0 +1,21 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const dbtTest = defineTool({ + name: "dbt_test", + description: + "Run dbt tests for a selected model or selector and return pass/fail counts plus failing-row samples. Issues SELECT statements only — does not modify warehouse data and so does not require the write gate.", + mutating: false, + input: { + select: z.string().optional().describe("dbt selector. Defaults to all tests in the project."), + projectDir: z.string().optional().describe("Path to the dbt project root."), + target: z.string().optional().describe("dbt target profile name."), + storeFailures: z + .boolean() + .optional() + .describe("When true, persist failing rows to the failures table. Ignored unless ALTIMATE_MCP_ALLOW_WRITE=true."), + }, + handler: async () => { + throw new NotImplementedError("dbt_test") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/finops_anomaly_scan.ts b/packages/mcp-data-agent/src/tools/finops_anomaly_scan.ts new file mode 100644 index 0000000000..120cb57eba --- /dev/null +++ b/packages/mcp-data-agent/src/tools/finops_anomaly_scan.ts @@ -0,0 +1,25 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const finopsAnomalyScan = defineTool({ + name: "finops_anomaly_scan", + description: + "Detect day-over-day and week-over-week cost anomalies at warehouse and user level. Flags cost spikes, new expensive query patterns, and unusual usage surges. Returns an anomaly digest ranked by dollar impact. Read-only.", + mutating: false, + input: { + days: z + .number() + .int() + .positive() + .max(180) + .optional() + .describe("Window analyzed for anomalies. Defaults to 30."), + sensitivity: z + .enum(["low", "medium", "high"]) + .optional() + .describe("Detection sensitivity. Higher means more flags. Defaults to 'medium'."), + }, + handler: async () => { + throw new NotImplementedError("finops_anomaly_scan") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/finops_clustering_roi.ts b/packages/mcp-data-agent/src/tools/finops_clustering_roi.ts new file mode 100644 index 0000000000..e0ab957340 --- /dev/null +++ b/packages/mcp-data-agent/src/tools/finops_clustering_roi.ts @@ -0,0 +1,26 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const finopsClusteringRoi = defineTool({ + name: "finops_clustering_roi", + description: + "For each automatically-clustered table, compute the ratio of reclustering credits to query-time credits saved by clustering. Identifies tables where clustering cost exceeds query benefit and recommends suspend, drop, or new clustering key. Read-only.", + mutating: false, + input: { + days: z + .number() + .int() + .positive() + .max(180) + .optional() + .describe("History window for the ROI calculation. Defaults to 30."), + minCredits: z + .number() + .positive() + .optional() + .describe("Only return tables that burned at least this many reclustering credits. Defaults to 1."), + }, + handler: async () => { + throw new NotImplementedError("finops_clustering_roi") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/finops_credits_summary.ts b/packages/mcp-data-agent/src/tools/finops_credits_summary.ts new file mode 100644 index 0000000000..7949e7e65d --- /dev/null +++ b/packages/mcp-data-agent/src/tools/finops_credits_summary.ts @@ -0,0 +1,30 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const finopsCreditsSummary = defineTool({ + name: "finops_credits_summary", + description: + "Summarize warehouse credit consumption over a time window, grouped by warehouse, role, or user. Returns total credits, dollar-equivalent, day-over-day trend, and top contributors. Read-only — queries ACCOUNT_USAGE / INFORMATION_SCHEMA views.", + mutating: false, + input: { + days: z + .number() + .int() + .positive() + .max(365) + .optional() + .describe("Trailing window in days. Defaults to 30."), + groupBy: z + .enum(["warehouse", "role", "user", "database"]) + .optional() + .describe("Grouping dimension. Defaults to 'warehouse'."), + creditRate: z + .number() + .positive() + .optional() + .describe("Override the contract credit rate ($/credit) used to compute dollar equivalents."), + }, + handler: async () => { + throw new NotImplementedError("finops_credits_summary") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/finops_expensive_queries.ts b/packages/mcp-data-agent/src/tools/finops_expensive_queries.ts new file mode 100644 index 0000000000..8c4955c336 --- /dev/null +++ b/packages/mcp-data-agent/src/tools/finops_expensive_queries.ts @@ -0,0 +1,21 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const finopsExpensiveQueries = defineTool({ + name: "finops_expensive_queries", + description: + "Identify the top-N most expensive queries in a time window, ranked by credits, bytes scanned, or elapsed time. Returns query text, user, role, warehouse, and a parameterized hash to detect duplicates. Read-only.", + mutating: false, + input: { + days: z.number().int().positive().max(365).optional().describe("Trailing window in days. Defaults to 7."), + limit: z.number().int().positive().max(500).optional().describe("Number of queries to return. Defaults to 25."), + rankBy: z + .enum(["credits", "bytes_scanned", "elapsed", "rows_produced"]) + .optional() + .describe("Ranking metric. Defaults to 'credits'."), + warehouse: z.string().optional().describe("Restrict to a single warehouse."), + }, + handler: async () => { + throw new NotImplementedError("finops_expensive_queries") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/finops_unused_resources.ts b/packages/mcp-data-agent/src/tools/finops_unused_resources.ts new file mode 100644 index 0000000000..49657f752d --- /dev/null +++ b/packages/mcp-data-agent/src/tools/finops_unused_resources.ts @@ -0,0 +1,25 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const finopsUnusedResources = defineTool({ + name: "finops_unused_resources", + description: + "Find unused or low-utilization warehouse resources: dormant tables (not queried in N days), warehouses with no queries, materialized views with no downstream reads, and unused secondary clusters. Returns per-resource proposals (drop, transient conversion, suspend reclustering). Read-only.", + mutating: false, + input: { + days: z + .number() + .int() + .positive() + .max(365) + .optional() + .describe("Dormancy threshold in days. Defaults to 90."), + resourceType: z + .enum(["all", "tables", "warehouses", "materialized_views", "clusters"]) + .optional() + .describe("Restrict the scan to a single resource category. Defaults to 'all'."), + }, + handler: async () => { + throw new NotImplementedError("finops_unused_resources") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/finops_warehouse_advice.ts b/packages/mcp-data-agent/src/tools/finops_warehouse_advice.ts new file mode 100644 index 0000000000..6dc12cc2eb --- /dev/null +++ b/packages/mcp-data-agent/src/tools/finops_warehouse_advice.ts @@ -0,0 +1,22 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const finopsWarehouseAdvice = defineTool({ + name: "finops_warehouse_advice", + description: + "For one warehouse, recommend auto-suspend, cluster count, size, and query acceleration changes based on the last 30 days of usage. Returns up to 3 plain-text recommendations or an explicit 'no change' with the numbers that ruled out each option. Read-only.", + mutating: false, + input: { + warehouse: z.string().describe("Warehouse name to advise on."), + days: z + .number() + .int() + .positive() + .max(90) + .optional() + .describe("History window used for the recommendation. Defaults to 30."), + }, + handler: async () => { + throw new NotImplementedError("finops_warehouse_advice") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/index.ts b/packages/mcp-data-agent/src/tools/index.ts new file mode 100644 index 0000000000..b11f8553e9 --- /dev/null +++ b/packages/mcp-data-agent/src/tools/index.ts @@ -0,0 +1,47 @@ +import { sqlExecute } from "./sql_execute.js" +import { sqlAnalyze } from "./sql_analyze.js" +import { sqlExplain } from "./sql_explain.js" +import { schemaIntrospect } from "./schema_introspect.js" +import { dbtCompile } from "./dbt_compile.js" +import { dbtRun } from "./dbt_run.js" +import { dbtTest } from "./dbt_test.js" +import { dbtLineage } from "./dbt_lineage.js" +import { dbtImpactAnalyze } from "./dbt_impact_analyze.js" +import { dbtDiff } from "./dbt_diff.js" +import { finopsCreditsSummary } from "./finops_credits_summary.js" +import { finopsExpensiveQueries } from "./finops_expensive_queries.js" +import { finopsWarehouseAdvice } from "./finops_warehouse_advice.js" +import { finopsUnusedResources } from "./finops_unused_resources.js" +import { finopsAnomalyScan } from "./finops_anomaly_scan.js" +import { finopsClusteringRoi } from "./finops_clustering_roi.js" +import { queryHistorySearch } from "./query_history_search.js" +import { piiScan } from "./pii_scan.js" +import { dataParityCheck } from "./data_parity_check.js" +import { accountUsageQuery } from "./account_usage_query.js" + +/** + * The 20 curated tools exposed over MCP. Order is preserved for `tools/list` + * — keeps category-grouped output for clients that surface the list to humans. + */ +export const tools = [ + sqlExecute, + sqlAnalyze, + sqlExplain, + schemaIntrospect, + dbtCompile, + dbtRun, + dbtTest, + dbtLineage, + dbtImpactAnalyze, + dbtDiff, + finopsCreditsSummary, + finopsExpensiveQueries, + finopsWarehouseAdvice, + finopsUnusedResources, + finopsAnomalyScan, + finopsClusteringRoi, + queryHistorySearch, + piiScan, + dataParityCheck, + accountUsageQuery, +] as const diff --git a/packages/mcp-data-agent/src/tools/pii_scan.ts b/packages/mcp-data-agent/src/tools/pii_scan.ts new file mode 100644 index 0000000000..ba917e389f --- /dev/null +++ b/packages/mcp-data-agent/src/tools/pii_scan.ts @@ -0,0 +1,27 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const piiScan = defineTool({ + name: "pii_scan", + description: + "Scan one or more tables for columns that look like PII (email, phone, SSN, credit card, address, IP, names) using column-name heuristics and sample-value regexes. Returns per-column confidence and the rule that matched. Read-only — fetches a small sample of rows.", + mutating: false, + input: { + database: z.string().optional().describe("Database to scan."), + schema: z.string().optional().describe("Schema to scan."), + table: z + .string() + .optional() + .describe("Single table to scan. When omitted, scans every table in the schema."), + sampleRows: z + .number() + .int() + .positive() + .max(10_000) + .optional() + .describe("Number of sample rows per table used for value-pattern checks. Defaults to 100."), + }, + handler: async () => { + throw new NotImplementedError("pii_scan") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/query_history_search.ts b/packages/mcp-data-agent/src/tools/query_history_search.ts new file mode 100644 index 0000000000..746feb2071 --- /dev/null +++ b/packages/mcp-data-agent/src/tools/query_history_search.ts @@ -0,0 +1,36 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const queryHistorySearch = defineTool({ + name: "query_history_search", + description: + "Search query history by user, role, warehouse, table reference, regex over query text, or time range. Returns matching queries with elapsed time, credits, rows produced, and execution status. Read-only — issues SELECTs against ACCOUNT_USAGE / INFORMATION_SCHEMA.", + mutating: false, + input: { + user: z.string().optional().describe("Filter to a single user."), + role: z.string().optional().describe("Filter to a single role."), + warehouse: z.string().optional().describe("Filter to a single warehouse."), + referencesTable: z + .string() + .optional() + .describe("Return only queries that reference this fully-qualified table name."), + textRegex: z.string().optional().describe("Regex matched against the query text (case-insensitive)."), + days: z + .number() + .int() + .positive() + .max(90) + .optional() + .describe("Trailing window in days. Defaults to 7."), + limit: z + .number() + .int() + .positive() + .max(500) + .optional() + .describe("Maximum number of rows to return. Defaults to 50."), + }, + handler: async () => { + throw new NotImplementedError("query_history_search") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/schema_introspect.ts b/packages/mcp-data-agent/src/tools/schema_introspect.ts new file mode 100644 index 0000000000..3736bac15d --- /dev/null +++ b/packages/mcp-data-agent/src/tools/schema_introspect.ts @@ -0,0 +1,20 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const schemaIntrospect = defineTool({ + name: "schema_introspect", + description: + "Inspect a warehouse object (table, view, or schema) and return its columns, data types, nullability, primary/foreign keys, partition/cluster keys, and row count estimate. Use to ground answers about column names and types before writing SQL.", + mutating: false, + input: { + database: z.string().optional().describe("Database name. Defaults to the configured database."), + schema: z.string().optional().describe("Schema name. Defaults to the configured schema."), + table: z + .string() + .optional() + .describe("Optional table or view name. When omitted, returns the list of objects in the schema."), + }, + handler: async () => { + throw new NotImplementedError("schema_introspect") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/sql_analyze.ts b/packages/mcp-data-agent/src/tools/sql_analyze.ts new file mode 100644 index 0000000000..60f8ed151f --- /dev/null +++ b/packages/mcp-data-agent/src/tools/sql_analyze.ts @@ -0,0 +1,30 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const sqlAnalyze = defineTool({ + name: "sql_analyze", + description: + "Analyze a SQL query for anti-patterns (SELECT *, missing predicates on partition keys, implicit casts, cartesian joins, unnecessary ORDER BY in subqueries, scalar UDFs in WHERE clauses). Returns severity-ranked findings with rewrite suggestions. Read-only and static — does not contact the warehouse.", + mutating: false, + input: { + sql: z.string().describe("The SQL query to analyze."), + dialect: z + .enum([ + "snowflake", + "bigquery", + "databricks", + "postgres", + "redshift", + "mysql", + "sqlserver", + "oracle", + "duckdb", + "sqlite", + ]) + .optional() + .describe("Target SQL dialect for parser selection."), + }, + handler: async () => { + throw new NotImplementedError("sql_analyze") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/sql_execute.ts b/packages/mcp-data-agent/src/tools/sql_execute.ts new file mode 100644 index 0000000000..5cf1126c2a --- /dev/null +++ b/packages/mcp-data-agent/src/tools/sql_execute.ts @@ -0,0 +1,37 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const sqlExecute = defineTool({ + name: "sql_execute", + description: + "Execute a SQL query against the configured warehouse and return rows. Use for ad-hoc lookups, validation, and read paths. Mutating SQL (INSERT, UPDATE, DELETE, MERGE, CREATE, DROP, ALTER, TRUNCATE) is refused unless ALTIMATE_MCP_ALLOW_WRITE=true. Returns columns, rows, row count, and elapsed time.", + mutating: true, + input: { + sql: z.string().describe("The SQL statement to execute."), + dialect: z + .enum([ + "snowflake", + "bigquery", + "databricks", + "postgres", + "redshift", + "mysql", + "sqlserver", + "oracle", + "duckdb", + "sqlite", + ]) + .optional() + .describe("Target SQL dialect. Defaults to the warehouse configured by environment variables."), + limit: z + .number() + .int() + .positive() + .max(100_000) + .optional() + .describe("Optional row cap applied before returning results to the client. Defaults to 1000."), + }, + handler: async () => { + throw new NotImplementedError("sql_execute") + }, +}) diff --git a/packages/mcp-data-agent/src/tools/sql_explain.ts b/packages/mcp-data-agent/src/tools/sql_explain.ts new file mode 100644 index 0000000000..dd79b28613 --- /dev/null +++ b/packages/mcp-data-agent/src/tools/sql_explain.ts @@ -0,0 +1,34 @@ +import { z } from "zod" +import { defineTool, NotImplementedError } from "../server.js" + +export const sqlExplain = defineTool({ + name: "sql_explain", + description: + "Return the warehouse's EXPLAIN plan for a SQL query as structured JSON: estimated rows, bytes scanned, join order, pruning details, and the operators that dominate cost. Read-only — issues EXPLAIN, never executes the underlying query.", + mutating: false, + input: { + sql: z.string().describe("The SQL query to explain."), + dialect: z + .enum([ + "snowflake", + "bigquery", + "databricks", + "postgres", + "redshift", + "mysql", + "sqlserver", + "oracle", + "duckdb", + "sqlite", + ]) + .optional() + .describe("Target SQL dialect. Defaults to the configured warehouse."), + format: z + .enum(["text", "json"]) + .optional() + .describe("Output format. Defaults to 'json' for downstream programmatic use."), + }, + handler: async () => { + throw new NotImplementedError("sql_explain") + }, +}) diff --git a/packages/mcp-data-agent/test/server.test.ts b/packages/mcp-data-agent/test/server.test.ts new file mode 100644 index 0000000000..0e22cc59f0 --- /dev/null +++ b/packages/mcp-data-agent/test/server.test.ts @@ -0,0 +1,70 @@ +import { describe, expect, test, afterEach } from "bun:test" +import { Client } from "@modelcontextprotocol/sdk/client/index.js" +import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js" +import { createServer } from "../src/server.js" +import { tools } from "../src/tools/index.js" + +const EXPECTED_TOOL_COUNT = 20 + +describe("mcp-data-agent server", () => { + let client: Client | undefined + + afterEach(async () => { + if (client) { + await client.close() + client = undefined + } + }) + + test("registry exports exactly 20 tools", () => { + expect(tools.length).toBe(EXPECTED_TOOL_COUNT) + }) + + test("tool names are unique", () => { + const names = tools.map((t) => t.name) + expect(new Set(names).size).toBe(names.length) + }) + + test("tools/list returns 20 tools over MCP", async () => { + const server = await createServer() + const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair() + client = new Client({ name: "test-client", version: "0.0.0" }, { capabilities: {} }) + await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]) + + const listed = await client.listTools() + expect(listed.tools.length).toBe(EXPECTED_TOOL_COUNT) + + const names = new Set(listed.tools.map((t) => t.name)) + for (const tool of tools) expect(names.has(tool.name)).toBe(true) + }) + + test("tools/call returns a not-implemented error for a read-only stub", async () => { + const server = await createServer() + const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair() + client = new Client({ name: "test-client", version: "0.0.0" }, { capabilities: {} }) + await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]) + + const result = await client.callTool({ name: "sql_analyze", arguments: { sql: "SELECT 1" } }) + expect(result.isError).toBe(true) + const content = result.content as Array<{ type: string; text: string }> + expect(content[0]?.text).toContain("not yet wired to altimate-engine") + }) + + test("mutating tool refuses when ALTIMATE_MCP_ALLOW_WRITE is unset", async () => { + const prior = process.env.ALTIMATE_MCP_ALLOW_WRITE + delete process.env.ALTIMATE_MCP_ALLOW_WRITE + try { + const server = await createServer() + const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair() + client = new Client({ name: "test-client", version: "0.0.0" }, { capabilities: {} }) + await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]) + + const result = await client.callTool({ name: "dbt_run", arguments: { select: "fct_orders" } }) + expect(result.isError).toBe(true) + const content = result.content as Array<{ type: string; text: string }> + expect(content[0]?.text).toContain("write operations are disabled") + } finally { + if (prior !== undefined) process.env.ALTIMATE_MCP_ALLOW_WRITE = prior + } + }) +}) diff --git a/packages/mcp-data-agent/tsconfig.json b/packages/mcp-data-agent/tsconfig.json new file mode 100644 index 0000000000..1f888652e3 --- /dev/null +++ b/packages/mcp-data-agent/tsconfig.json @@ -0,0 +1,7 @@ +{ + "extends": "@tsconfig/bun/tsconfig.json", + "compilerOptions": { + "strict": true, + "noUncheckedIndexedAccess": true + } +}