AltimateAI · anandgupta42 · May 22, 2026
diff --git a/bun.lock b/bun.lock
diff --git a/package.json b/package.json
@@ -29,7 +29,8 @@
       "packages/util",
       "packages/sdk/js",
       "packages/dbt-tools",
-      "packages/drivers"
+      "packages/drivers",
+      "packages/mcp-data-agent"
     ],
     "catalog": {
       "@types/bun": "1.3.9",

diff --git a/packages/mcp-data-agent/.gitignore b/packages/mcp-data-agent/.gitignore
@@ -0,0 +1,4 @@
+dist/
+node_modules/
+*.log
+.DS_Store
diff --git a/packages/mcp-data-agent/README.md b/packages/mcp-data-agent/README.md
@@ -0,0 +1,63 @@
+# @altimateai/mcp-data-agent
+
+A local-first [MCP](https://modelcontextprotocol.io) server that exposes 20 curated data-engineering tools — SQL analysis, dbt workflow, FinOps cost intelligence, lineage, and PII detection — to any MCP-compatible client (Claude Code, Claude Desktop, Cursor, Windsurf, Goose, Cline). The server runs on your machine and talks to your warehouse directly.
+
+## Install
+
+The package is `npx`-installable and runs over stdio. To wire it into Claude Code, add to your project's `.mcp.json`:
+
+```json
+{
+  "mcpServers": {
+    "altimate-data": {
+      "command": "npx",
+      "args": ["-y", "@altimateai/mcp-data-agent@latest"]
+    }
+  }
+}
+```
+
+Cursor, Windsurf, and Claude Desktop use the same shape — see each client's MCP docs for the exact config file location.
+
+## Configuration
+
+All configuration is via environment variables. There is no file-based config in v1.
+
+| Variable | Purpose |
+| --- | --- |
+| `SNOWFLAKE_ACCOUNT`, `SNOWFLAKE_USER`, `SNOWFLAKE_PASSWORD` | Snowflake credentials (or use OAuth). |
+| `ALTIMATE_API_KEY` | Optional, only required for hosted services. |
+| `ALTIMATE_MCP_ALLOW_WRITE` | Set to `true` to enable mutating tools (`sql_execute` with mutating SQL, `dbt_run`). Defaults to `false`. |
+
+## Security
+
+The server runs locally. No query text, schema metadata, or warehouse data leaves your machine. Your warehouse credentials are read from your environment, your LLM endpoint is whatever the client is already configured to use. Write tools are gated behind an explicit opt-in env var.
+
+## Available tools
+
+| Tool | Purpose |
+| --- | --- |
+| `sql_execute` | Run a SQL query and return rows (mutating SQL gated by `ALTIMATE_MCP_ALLOW_WRITE`). |
+| `sql_analyze` | Static SQL anti-pattern detection with severity-ranked findings. |
+| `sql_explain` | Return the warehouse EXPLAIN plan as JSON. |
+| `schema_introspect` | Inspect tables, views, and schemas. |
+| `dbt_compile` | Compile a dbt model or arbitrary Jinja SQL. |
+| `dbt_run` | Materialize models (write-gated). |
+| `dbt_test` | Run dbt tests and return failing-row samples. |
+| `dbt_lineage` | Model- and column-level lineage from the dbt manifest. |
+| `dbt_impact_analyze` | Classify downstream impact of a model change as BREAKING / SAFE / UNKNOWN. |
+| `dbt_diff` | Row-level diff between two materializations of a model. |
+| `finops_credits_summary` | Credit consumption grouped by warehouse / role / user. |
+| `finops_expensive_queries` | Top-N most expensive queries by credits / bytes / elapsed. |
+| `finops_warehouse_advice` | Auto-suspend, cluster count, and size recommendations for one warehouse. |
+| `finops_unused_resources` | Dormant tables, idle warehouses, unused materialized views. |
+| `finops_anomaly_scan` | Day-over-day and week-over-week cost spike detection. |
+| `finops_clustering_roi` | Reclustering credits vs query-time savings per table. |
+| `query_history_search` | Query history filtered by user, role, table reference, regex, or time range. |
+| `pii_scan` | Heuristic PII detection on columns by name and sample values. |
+| `data_parity_check` | Row-level parity check between two tables. |
+| `account_usage_query` | Parameterized access to warehouse observability views. |
+
+## Status
+
+Alpha. The 20 tools are scaffolded with the schemas they will eventually expose, but every handler currently throws "not yet wired to altimate-engine". Wiring is in progress — see the [altimate-code repo](https://github.com/AltimateAI/altimate-code) for status.
diff --git a/packages/mcp-data-agent/bin/altimate-mcp b/packages/mcp-data-agent/bin/altimate-mcp
@@ -0,0 +1,2 @@
+#!/usr/bin/env node
+import("../dist/index.js")
diff --git a/packages/mcp-data-agent/package.json b/packages/mcp-data-agent/package.json
@@ -0,0 +1,49 @@
+{
+  "name": "@altimateai/mcp-data-agent",
+  "version": "0.1.0",
+  "description": "Local-first MCP server exposing SQL, dbt, FinOps, lineage, and PII tools for data engineering. Query text stays on your machine.",
+  "type": "module",
+  "private": false,
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/AltimateAI/altimate-code",
+    "directory": "packages/mcp-data-agent"
+  },
+  "homepage": "https://altimate.ai",
+  "keywords": [
+    "mcp",
+    "model-context-protocol",
+    "dbt",
+    "snowflake",
+    "bigquery",
+    "databricks",
+    "finops",
+    "sql",
+    "lineage",
+    "data-engineering"
+  ],
+  "bin": {
+    "altimate-mcp": "./bin/altimate-mcp"
+  },
+  "files": [
+    "bin",
+    "dist",
+    "server.json",
+    "README.md"
+  ],
+  "scripts": {
+    "build": "bun build src/index.ts --outdir dist --target node --format esm",
+    "typecheck": "tsc --noEmit",
+    "test": "bun test"
+  },
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.29.0",
+    "zod": "catalog:"
+  },
+  "devDependencies": {
+    "@tsconfig/bun": "catalog:",
+    "@types/bun": "catalog:",
+    "typescript": "catalog:"
+  }
+}
diff --git a/packages/mcp-data-agent/server.json b/packages/mcp-data-agent/server.json
@@ -0,0 +1,28 @@
+{
+  "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json",
+  "name": "io.github.altimateai/altimate-code-data-agent",
+  "title": "Altimate Code Data Agent",
+  "description": "SQL, dbt, FinOps, lineage tools for data engineering. Local-first — query text never leaves your machine. Works with Snowflake, BigQuery, Databricks, Postgres, Redshift, MySQL, SQL Server, Oracle, DuckDB, SQLite.",
+  "version": "0.1.0",
+  "repository": {
+    "url": "https://github.com/AltimateAI/altimate-code",
+    "source": "github"
+  },
+  "websiteUrl": "https://altimate.ai",
+  "packages": [
+    {
+      "registryType": "npm",
+      "identifier": "@altimateai/mcp-data-agent",
+      "version": "0.1.0",
+      "transport": { "type": "stdio" },
+      "runtimeHint": "npx",
+      "environmentVariables": [
+        { "name": "SNOWFLAKE_ACCOUNT", "description": "Snowflake account locator", "isRequired": false },
+        { "name": "SNOWFLAKE_USER", "description": "Snowflake username", "isRequired": false },
+        { "name": "SNOWFLAKE_PASSWORD", "description": "Snowflake password or PAT", "isSecret": true, "isRequired": false },
+        { "name": "ALTIMATE_API_KEY", "description": "Optional Altimate API key for hosted services", "isSecret": true, "isRequired": false },
+        { "name": "ALTIMATE_MCP_ALLOW_WRITE", "description": "Set true to enable write tools (sql_execute mutating SQL, dbt_run). Default false (read-only).", "isRequired": false }
+      ]
+    }
+  ]
+}
diff --git a/packages/mcp-data-agent/src/auth.ts b/packages/mcp-data-agent/src/auth.ts
@@ -0,0 +1,29 @@
+/**
+ * Environment-variable based auth + the write-gate.
+ *
+ * Phase 1 keeps the surface trivially auditable: no file-based config, no
+ * embedded secrets, no network round-trip to resolve credentials. Every value
+ * comes from `process.env`, and mutating tools require an explicit opt-in
+ * through `ALTIMATE_MCP_ALLOW_WRITE=true`.
+ */
+
+const TRUTHY = new Set(["1", "true", "yes", "on"])
+
+export function isWriteAllowed(env: NodeJS.ProcessEnv = process.env): boolean {
+  const raw = env.ALTIMATE_MCP_ALLOW_WRITE
+  if (!raw) return false
+  return TRUTHY.has(raw.trim().toLowerCase())
+}
+
+export class WriteNotAllowedError extends Error {
+  constructor(toolName: string) {
+    super(
+      `${toolName}: write operations are disabled. Set ALTIMATE_MCP_ALLOW_WRITE=true to enable mutating tools.`,
+    )
+    this.name = "WriteNotAllowedError"
+  }
+}
+
+export function assertWriteAllowed(toolName: string, env: NodeJS.ProcessEnv = process.env): void {
+  if (!isWriteAllowed(env)) throw new WriteNotAllowedError(toolName)
+}
diff --git a/packages/mcp-data-agent/src/index.ts b/packages/mcp-data-agent/src/index.ts
@@ -0,0 +1,17 @@
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
+import { createServer } from "./server.js"
+
+async function main(): Promise<void> {
+  const server = await createServer()
+  const transport = new StdioServerTransport()
+  await server.connect(transport)
+}
+
+main().catch((err) => {
+  // The MCP client can't receive structured errors here — the transport has
+  // not yet been negotiated. Write to stderr and exit non-zero so the parent
+  // process (claude-code, cursor, etc.) shows the failure in its UI.
+  const message = err instanceof Error ? err.stack ?? err.message : String(err)
+  process.stderr.write(`altimate-mcp: fatal: ${message}\n`)
+  process.exit(1)
+})
diff --git a/packages/mcp-data-agent/src/server.ts b/packages/mcp-data-agent/src/server.ts
@@ -0,0 +1,119 @@
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"
+import type { z, ZodRawShape } from "zod"
+import { assertWriteAllowed } from "./auth.js"
+
+/**
+ * Shape used by every tool file. We keep the input schema as a Zod raw shape
+ * (a `{ key: ZodType }` map) because that is what `McpServer.registerTool`
+ * consumes directly — it builds the JSON Schema sent to clients from it.
+ */
+export interface ToolDefinition<Shape extends ZodRawShape = ZodRawShape> {
+  name: string
+  description: string
+  mutating: boolean
+  input: Shape
+  handler: (input: InferShape<Shape>) => Promise<ToolResult>
+}
+
+export type InferShape<Shape extends ZodRawShape> = {
+  [K in keyof Shape]: z.infer<Shape[K]>
+}
+
+export interface ToolResult {
+  /** Human-readable text the model can read. */
+  text: string
+  /** Optional structured payload returned alongside the text. */
+  data?: Record<string, unknown>
+  /** When true, the call surfaces as an error to the client. */
+  isError?: boolean
+}
+
+/**
+ * Helper used by each tool file to declare itself. Keeping this trivial — it
+ * is essentially an identity function that pins down types per-tool. The
+ * registration with the McpServer happens in `register()` below, which
+ * widens the shape because the tools registry holds a heterogeneous list.
+ */
+export function defineTool<Shape extends ZodRawShape>(def: ToolDefinition<Shape>): ToolDefinition<Shape> {
+  return def
+}
+
+/**
+ * Tracking issue placeholder used by every stub. Replaced with the real issue
+ * URL once wiring lands.
+ */
+export const NOT_IMPLEMENTED_ISSUE = "https://github.com/AltimateAI/altimate-code/issues/TBD"
+
+export class NotImplementedError extends Error {
+  constructor(toolName: string) {
+    super(`${toolName}: not yet wired to altimate-engine. Track at ${NOT_IMPLEMENTED_ISSUE}`)
+    this.name = "NotImplementedError"
+  }
+}
+
+/**
+ * Register one tool with an `McpServer`. Mutating tools are wrapped so they
+ * refuse to run unless `ALTIMATE_MCP_ALLOW_WRITE=true` — the refusal happens
+ * before the handler executes, so a stub that throws "not implemented" never
+ * runs in write-disallowed mode either.
+ *
+ * The function is intentionally typed loosely (`ToolDefinition` without a
+ * shape parameter) so we can iterate over a heterogeneous list of tools
+ * without TypeScript collapsing them into one shape.
+ */
+export function register(server: McpServer, tool: ToolDefinition): void {
+  // The SDK callback type is generic over the shape; we lose precision here
+  // on purpose because the registry holds tools with different shapes.
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  const handler = async (args: any) => {
+    try {
+      if (tool.mutating) assertWriteAllowed(tool.name)
+      const result = await tool.handler(args)
+      return {
+        content: [{ type: "text" as const, text: result.text }],
+        ...(result.data !== undefined ? { structuredContent: result.data } : {}),
+        isError: result.isError === true,
+      }
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err)
+      return {
+        content: [{ type: "text" as const, text: message }],
+        isError: true,
+      }
+    }
+  }
+
+  server.registerTool(
+    tool.name,
+    {
+      description: tool.description,
+      inputSchema: tool.input,
+      annotations: {
+        readOnlyHint: !tool.mutating,
+        destructiveHint: tool.mutating,
+      },
+    },
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    handler as any,
+  )
+}
+
+/**
+ * Build a fresh `McpServer` and register every tool from the registry. Kept
+ * separate from `index.ts` so tests can spin up a server without taking over
+ * stdio.
+ */
+export async function createServer(): Promise<McpServer> {
+  const server = new McpServer(
+    {
+      name: "io.github.altimateai/altimate-code-data-agent",
+      version: "0.1.0",
+    },
+    {
+      capabilities: { tools: {} },
+    },
+  )
+  const { tools } = await import("./tools/index.js")
+  for (const tool of tools) register(server, tool as unknown as ToolDefinition)
+  return server
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		#!/usr/bin/env node
		import("../dist/index.js")