diff --git a/.gitignore b/.gitignore index 5ecc269..8ada863 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,9 @@ Thumbs.db .idea/ .vscode/ *.swp + +# Sample database files (downloaded at runtime by setup scripts) +docker/northwind/northwind.sql + +# User-created memory entries (runtime data, not committed) +memory/*.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..5c21b16 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,33 @@ +# Open Data Agent — Project Rules + +This project is a **data query tool**, not a typical software project. +When a user asks a question, it is almost always a question **about the data in the database**, +not about the source code. + +## CRITICAL — Handling user questions + +When the user asks anything that sounds like a data question (e.g. "Which country has the most +customers?", "Show me top products by revenue", "How many orders last month?"): + +1. **Do NOT scan the codebase.** This is not a coding task. +2. **Do NOT read `src/`, `tests/`, or any source files.** +3. Read `docs/data-catalog/_index.md` to identify the relevant tables. +4. Read the relevant per-table doc under `docs/data-catalog/` for column names and types. +5. Run `uv run oda query ""` to execute the query and return results to the user. + +The active database connection and full command reference are in `.opencode/rules/data-agent.md`. +Read that file if you need to know which database is active or how to use `oda` commands. +That file is generated by running `uv run oda connect ` from the project root. + +## What this project does + +`oda` (Open Data Agent) is a local CLI that connects to databases and runs read-only SQL queries. +All queries go through `uv run oda query "..."` — never connect to the database directly. + +## When to touch the source code + +Only read or edit files in `src/`, `tests/`, or config files when the user explicitly asks to: +- fix a bug +- add a feature +- run tests +- change configuration diff --git a/README.md b/README.md index 91fe80b..5794ad2 100644 --- a/README.md +++ b/README.md @@ -183,6 +183,15 @@ strict_mode: false # if true: block queries when docs are stale # equivalent to passing --strict on every oda query call ``` +### Testing with Sample Data + +A ready-to-run guide using the Northwind database is available at [`docs/testing-with-northwind.md`](docs/testing-with-northwind.md). It covers spinning up a local PostgreSQL container, registering the connection, generating schema docs, and running sample queries that exercise every major `oda` feature. + +```bash +# One command to get started: +bash scripts/setup-northwind.sh +``` + ### Development ```bash diff --git a/docker-compose-northwind.yml b/docker-compose-northwind.yml new file mode 100644 index 0000000..0db6a17 --- /dev/null +++ b/docker-compose-northwind.yml @@ -0,0 +1,20 @@ +services: + northwind: + image: postgres:16-alpine + environment: + POSTGRES_DB: northwind + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + volumes: + - ./docker/northwind/northwind.sql:/docker-entrypoint-initdb.d/northwind.sql:ro + - northwind_data:/var/lib/postgresql/data + ports: + - "5433:5432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres -d northwind"] + interval: 5s + timeout: 5s + retries: 10 + +volumes: + northwind_data: diff --git a/docs/opencode-northwind-example.png b/docs/opencode-northwind-example.png new file mode 100644 index 0000000..89ced90 Binary files /dev/null and b/docs/opencode-northwind-example.png differ diff --git a/docs/testing-with-northwind.md b/docs/testing-with-northwind.md new file mode 100644 index 0000000..f2b3238 --- /dev/null +++ b/docs/testing-with-northwind.md @@ -0,0 +1,314 @@ +# Testing oda with the Northwind Sample Database + +This guide walks you through spinning up the Northwind PostgreSQL database with Docker and using it to test every major `oda` feature end-to-end. + +## About Northwind + +Northwind is a classic sample database originally created by Microsoft. It models a fictional food trading company with 14 tables covering customers, orders, products, employees, suppliers, shippers, and more — rich enough to write interesting queries across multiple joined tables. + +**Tables:** `categories`, `customers`, `employees`, `employee_territories`, `order_details`, `orders`, `products`, `region`, `shippers`, `suppliers`, `territories`, `us_states`, `customer_customer_demo`, `customer_demographics` + +--- + +## Step 1 — Start the Northwind container + +Run the setup script from the project root. It downloads `northwind.sql` (if not already present) and starts a PostgreSQL container with the database pre-loaded: + +```bash +bash scripts/setup-northwind.sh +``` + +The script will print connection details and confirm the container is healthy before exiting. The database is exposed on **port 5433** (to avoid clashing with the test container on port 5432). + +To stop the container later: + +```bash +docker compose -f docker-compose-northwind.yml down # stop, keep data +docker compose -f docker-compose-northwind.yml down -v # stop, wipe data +``` + +--- + +## Step 2 — Initialise oda + +If you haven't run `oda init` yet: + +```bash +uv run oda init +``` + +--- + +## Step 3 — Add the Northwind connection + +```bash +uv run oda connections add +``` + +Enter the following when prompted: + +| Field | Value | +|----------|---------------| +| Name | `northwind` | +| Dialect | `postgresql` | +| Host | `localhost` | +| Port | `5433` | +| Database | `northwind` | +| User | `postgres` | +| Password | `postgres` | + +Verify the connection is live: + +```bash +uv run oda connections test northwind +``` + +--- + +## Step 4 — Activate the connection + +```bash +uv run oda connect northwind +``` + +This sets `northwind` as the active connection and writes `.opencode/rules/data-agent.md` for OpenCode. + +--- + +## Step 5 — Explore the schema + +List all schemas: + +```bash +uv run oda schemas +``` + +List all tables: + +```bash +uv run oda tables +``` + +Inspect a specific table: + +```bash +uv run oda describe orders +uv run oda describe order_details +uv run oda describe customers +``` + +Preview sample rows: + +```bash +uv run oda sample orders +uv run oda sample products --n 10 +``` + +Profile a table (null counts, distinct values, min/max): + +```bash +uv run oda profile orders +uv run oda profile products +``` + +--- + +## Step 6 — Generate schema docs + +```bash +uv run oda docs generate +``` + +This writes a markdown file per table under `docs/data-catalog/`. Add `--enrich` to include column statistics: + +```bash +uv run oda docs generate --enrich +``` + +Check doc freshness at any time: + +```bash +uv run oda docs status +``` + +--- + +## Step 7 — Run sample queries + +### Basic selects + +```bash +uv run oda query "SELECT company_name, country FROM customers ORDER BY country LIMIT 10" + +uv run oda query "SELECT product_name, unit_price, units_in_stock FROM products ORDER BY unit_price DESC LIMIT 10" + +uv run oda query "SELECT first_name, last_name, title FROM employees ORDER BY last_name" +``` + +### Aggregations + +```bash +uv run oda query "SELECT country, COUNT(*) AS customer_count FROM customers GROUP BY country ORDER BY customer_count DESC" + +uv run oda query "SELECT category_id, COUNT(*) AS product_count, AVG(unit_price) AS avg_price FROM products GROUP BY category_id ORDER BY category_id" +``` + +### Joins + +```bash +uv run oda query " + SELECT o.order_id, c.company_name, o.order_date, o.freight + FROM orders o + JOIN customers c ON o.customer_id = c.customer_id + ORDER BY o.order_date DESC + LIMIT 10 +" +``` + +```bash +uv run oda query " + SELECT p.product_name, c.category_name, p.unit_price + FROM products p + JOIN categories c ON p.category_id = c.category_id + ORDER BY c.category_name, p.product_name + LIMIT 20 +" +``` + +### Revenue analysis + +```bash +uv run oda query " + SELECT + c.company_name, + COUNT(DISTINCT o.order_id) AS order_count, + ROUND(SUM(od.unit_price * od.quantity * (1 - od.discount))::numeric, 2) AS total_revenue + FROM customers c + JOIN orders o ON c.customer_id = o.customer_id + JOIN order_details od ON o.order_id = od.order_id + GROUP BY c.company_name + ORDER BY total_revenue DESC + LIMIT 10 +" +``` + +### Employees and territories + +```bash +uv run oda query " + SELECT e.first_name, e.last_name, t.territory_description, r.region_description + FROM employees e + JOIN employee_territories et ON e.employee_id = et.employee_id + JOIN territories t ON et.territory_id = t.territory_id + JOIN region r ON t.region_id = r.region_id + ORDER BY e.last_name +" +``` + +### Output formats + +```bash +# JSON output +uv run oda query "SELECT * FROM shippers" --format json + +# CSV output +uv run oda query "SELECT product_name, unit_price FROM products ORDER BY unit_price DESC LIMIT 20" --format csv +``` + +--- + +## Step 8 — Test safety enforcement + +`oda` hard-blocks all write operations. These should all fail with a `SafetyError`: + +```bash +# Write operations are blocked +uv run oda query "INSERT INTO customers (customer_id, company_name) VALUES ('TEST', 'Test Co')" +uv run oda query "UPDATE products SET unit_price = 0 WHERE product_id = 1" +uv run oda query "DELETE FROM orders WHERE order_id = 10248" +uv run oda query "DROP TABLE customers" +``` + +--- + +## Step 9 — Test query history + +After running a few queries, inspect the history: + +```bash +uv run oda history list +uv run oda history list --n 5 +uv run oda history search "customers" +uv run oda history stats +``` + +--- + +## Step 10 — Add a memory entry + +Record a data quirk about the Northwind schema for future sessions: + +```bash +uv run oda memory add \ + --title "Order revenue calculation" \ + --category data_quality \ + --content "Use unit_price * quantity * (1 - discount) from order_details for accurate revenue. The discount column is a float between 0 and 1." +``` + +Verify it was saved: + +```bash +uv run oda memory list +uv run oda memory search "revenue" +``` + +--- + +## Step 11 — Use with OpenCode (natural language queries) + +With the connection active and docs generated, open OpenCode from the project root: + +```bash +opencode +``` + +Then ask questions in plain English: + +``` +Which country has the most customers? +``` + +``` +Show me the top 5 products by total revenue across all orders. +``` + +``` +Which employees have the most orders assigned to them? +``` + +``` +List all customers from Germany with their total order count. +``` + +OpenCode reads `docs/data-catalog/` for schema context, constructs the SQL, and calls `oda query` to execute it. If a query returns zero rows, the diagnostic output is fed back to OpenCode automatically so it can self-correct and retry. + +### Example + +The screenshot below shows OpenCode answering "Show me the top 5 products by total revenue across all orders." — it reads the schema catalog, constructs the correct three-table join with discount calculation, executes it via `uv run oda query`, and presents the ranked results: + +![OpenCode answering a natural language revenue question against the Northwind database](opencode-northwind-example.png) + +--- + +## Teardown + +```bash +# Stop the container, keep the data volume +docker compose -f docker-compose-northwind.yml down + +# Stop and wipe all data (forces fresh reload of northwind.sql on next start) +docker compose -f docker-compose-northwind.yml down -v + +# Remove the downloaded SQL file +rm docker/northwind/northwind.sql +``` diff --git a/opencode.json b/opencode.json new file mode 100644 index 0000000..76d170f --- /dev/null +++ b/opencode.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://opencode.ai/config.json", + "instructions": [ + ".opencode/rules/data-agent.md" + ] +} diff --git a/scripts/setup-northwind.sh b/scripts/setup-northwind.sh new file mode 100755 index 0000000..12280ca --- /dev/null +++ b/scripts/setup-northwind.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +set -euo pipefail + +NORTHWIND_SQL="docker/northwind/northwind.sql" +NORTHWIND_URL="https://raw.githubusercontent.com/pthom/northwind_psql/master/northwind.sql" +COMPOSE_FILE="docker-compose-northwind.yml" + +# ── colours ──────────────────────────────────────────────────────────────────── +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'; NC='\033[0m' +info() { echo -e "${CYAN}[oda]${NC} $*"; } +success() { echo -e "${GREEN}[oda]${NC} $*"; } +warn() { echo -e "${YELLOW}[oda]${NC} $*"; } +error() { echo -e "${RED}[oda]${NC} $*" >&2; exit 1; } + +# ── prerequisites ────────────────────────────────────────────────────────────── +command -v docker >/dev/null 2>&1 || error "docker is not installed or not on PATH" +docker compose version >/dev/null 2>&1 || error "docker compose (v2) is required" + +# ── download northwind.sql if missing ────────────────────────────────────────── +if [[ -f "$NORTHWIND_SQL" ]]; then + warn "northwind.sql already exists, skipping download" +else + info "Downloading northwind.sql from pthom/northwind_psql ..." + mkdir -p "$(dirname "$NORTHWIND_SQL")" + if command -v curl >/dev/null 2>&1; then + curl -fsSL "$NORTHWIND_URL" -o "$NORTHWIND_SQL" + elif command -v wget >/dev/null 2>&1; then + wget -q "$NORTHWIND_URL" -O "$NORTHWIND_SQL" + else + error "curl or wget is required to download northwind.sql" + fi + success "Downloaded $NORTHWIND_SQL ($(wc -l < "$NORTHWIND_SQL") lines)" +fi + +# ── spin up the container ────────────────────────────────────────────────────── +info "Starting Northwind PostgreSQL container ..." +docker compose -f "$COMPOSE_FILE" up -d --wait + +success "Northwind is ready!" +echo +echo -e " ${CYAN}Host:${NC} localhost" +echo -e " ${CYAN}Port:${NC} 5433" +echo -e " ${CYAN}Database:${NC} northwind" +echo -e " ${CYAN}User:${NC} postgres" +echo -e " ${CYAN}Password:${NC} postgres" +echo +echo -e "${YELLOW}Next steps:${NC}" +echo -e " uv run oda connections add" +echo -e " # name: northwind | dialect: postgresql | host: localhost" +echo -e " # port: 5433 | database: northwind | user: postgres | password: postgres" +echo +echo -e " uv run oda connect northwind" +echo -e " uv run oda docs generate" +echo -e " uv run oda query \"SELECT company_name, country FROM customers ORDER BY country LIMIT 10\"" +echo +echo -e "${YELLOW}To stop:${NC}" +echo -e " docker compose -f $COMPOSE_FILE down # keep data" +echo -e " docker compose -f $COMPOSE_FILE down -v # wipe data" diff --git a/src/open_data_agent/cli_schema.py b/src/open_data_agent/cli_schema.py index 7bf4705..5c3599d 100644 --- a/src/open_data_agent/cli_schema.py +++ b/src/open_data_agent/cli_schema.py @@ -25,8 +25,8 @@ err_console = Console(stderr=True) -def _get_inspector() -> tuple[SchemaInspector, str, Any]: - """Return (inspector, active_name, conn) or raise SystemExit.""" +def _get_inspector() -> tuple[SchemaInspector, str, Any, str]: + """Return (inspector, active_name, conn, default_schema) or raise SystemExit.""" mgr = ConnectionManager() active = mgr.get_active_connection() if active is None: @@ -40,6 +40,7 @@ def _get_inspector() -> tuple[SchemaInspector, str, Any]: conn: Any = None adapter: DialectAdapter + default_schema: str try: if db_type == "sqlite": import sqlite3 @@ -47,6 +48,7 @@ def _get_inspector() -> tuple[SchemaInspector, str, Any]: conn = sqlite3.connect(params["database"], check_same_thread=False) conn.row_factory = sqlite3.Row adapter = SQLiteAdapter() + default_schema = "main" elif db_type == "postgresql": import psycopg @@ -59,6 +61,7 @@ def _get_inspector() -> tuple[SchemaInspector, str, Any]: autocommit=True, ) adapter = PostgreSQLAdapter() + default_schema = "public" elif db_type == "mysql": import pymysql @@ -70,11 +73,12 @@ def _get_inspector() -> tuple[SchemaInspector, str, Any]: password=params["password"], ) adapter = MySQLAdapter() + default_schema = params["database"] else: err_console.print(f"[red]✗[/red] Unsupported db_type: {db_type}") raise SystemExit(1) - return SchemaInspector(adapter, conn), active, conn + return SchemaInspector(adapter, conn), active, conn, default_schema except SystemExit: if conn is not None: conn.close() @@ -85,7 +89,7 @@ def _get_inspector() -> tuple[SchemaInspector, str, Any]: raise -def _parse_table_arg(table_arg: str, default_schema: str = "main") -> tuple[str, str]: +def _parse_table_arg(table_arg: str, default_schema: str) -> tuple[str, str]: """Parse 'schema.table' or 'table' into (schema, table).""" if "." in table_arg: parts = table_arg.split(".", 1) @@ -98,7 +102,7 @@ def schemas() -> None: """List all schemas in the active database.""" conn = None try: - inspector, active, conn = _get_inspector() + inspector, active, conn, _default_schema = _get_inspector() except SystemExit: raise @@ -118,14 +122,16 @@ def schemas() -> None: @click.command(name="tables") -@click.argument("schema", default="main") -def tables(schema: str) -> None: +@click.argument("schema", default=None, required=False) +def tables(schema: str | None) -> None: """List all tables in the given schema of the active database.""" conn = None try: - inspector, active, conn = _get_inspector() + inspector, active, conn, default_schema = _get_inspector() except SystemExit: raise + if schema is None: + schema = default_schema try: table_list = inspector.get_tables(schema) @@ -150,12 +156,12 @@ def describe(table: str) -> None: """Show column definitions for TABLE (schema.table or table).""" conn = None try: - inspector, _, conn = _get_inspector() + inspector, _, conn, default_schema = _get_inspector() except SystemExit: raise try: - schema_name, table_name = _parse_table_arg(table) + schema_name, table_name = _parse_table_arg(table, default_schema) cols = inspector.get_columns(schema_name, table_name) t = Table(title=f"Columns: {table}") t.add_column("Name", style="bold cyan") @@ -194,12 +200,12 @@ def sample(table: str, n: int) -> None: conn = None try: - inspector, _, conn = _get_inspector() + inspector, _, conn, default_schema = _get_inspector() except SystemExit: raise try: - schema_name, table_name = _parse_table_arg(table) + schema_name, table_name = _parse_table_arg(table, default_schema) result = inspector.get_sample(schema_name, table_name, n=n) t = Table(title=f"Sample: {table} (up to {n} rows)") for col in result.columns: @@ -221,12 +227,12 @@ def profile(table: str) -> None: """Show column statistics for TABLE (schema.table or table).""" conn = None try: - inspector, _, conn = _get_inspector() + inspector, _, conn, default_schema = _get_inspector() except SystemExit: raise try: - schema_name, table_name = _parse_table_arg(table) + schema_name, table_name = _parse_table_arg(table, default_schema) from open_data_agent.config import get_config prof = inspector.get_profile( diff --git a/src/open_data_agent/templates/data-agent.md.tmpl b/src/open_data_agent/templates/data-agent.md.tmpl index d94354d..84da114 100644 --- a/src/open_data_agent/templates/data-agent.md.tmpl +++ b/src/open_data_agent/templates/data-agent.md.tmpl @@ -5,6 +5,21 @@ --- +## IMPORTANT — How to handle user questions + +When the user asks a question about data (e.g. "Which country has the most customers?", +"Show me top products by revenue", "How many orders were placed last month?"): + +1. **Do NOT scan the codebase.** This is not a coding task. +2. **Do NOT read source files or project directories.** +3. Read `docs/data-catalog/_index.md` to find the relevant tables. +4. Read the relevant per-table doc (e.g. `docs/data-catalog/public/customers.md`) for column names. +5. Run `uv run oda query ""` to execute the query and return results. + +This applies to any question that is about the *data* in the database, not about the code. + +--- + ## Block 1 — Connection Context You are a data agent connected to a **{{db_type}}** database. @@ -18,7 +33,7 @@ You are a data agent connected to a **{{db_type}}** database. | Rules generated at | `{{generated_at}}` | Your role is to translate natural language questions into accurate, read-only SQL queries -using the `oda` CLI commands documented below. +using the `uv run oda` CLI commands documented below. --- @@ -28,29 +43,29 @@ Use these commands to discover the database structure before writing SQL: ```bash # List all schemas (namespaces/databases) -oda schemas +uv run oda schemas # List tables in a schema (omit schema for default) -oda tables -oda tables +uv run oda tables +uv run oda tables # Show column definitions for a table -oda describe +uv run oda describe # Example: -oda describe public.customers +uv run oda describe public.customers # Preview sample rows (default 5 rows) -oda sample -oda sample --n 10 +uv run oda sample +uv run oda sample --n 10 # Column statistics: null count, distinct count, min/max values -oda profile +uv run oda profile # Example: -oda profile public.orders +uv run oda profile public.orders ``` -**Workflow:** Always run `oda describe ` before querying an unfamiliar table. -Run `oda profile
` to understand value distributions and detect data quality issues. +**Workflow:** Always run `uv run oda describe
` before querying an unfamiliar table. +Run `uv run oda profile
` to understand value distributions and detect data quality issues. --- @@ -58,17 +73,17 @@ Run `oda profile
` to understand value distributions and detect data qual ```bash # Execute a read-only SQL query (LIMIT auto-injected, default 1000 rows) -oda query "" +uv run oda query "" # Examples: -oda query "SELECT * FROM customers WHERE status = 'active'" -oda query "SELECT COUNT(*) FROM orders GROUP BY status" +uv run oda query "SELECT * FROM customers WHERE status = 'active'" +uv run oda query "SELECT COUNT(*) FROM orders GROUP BY status" # Strict mode: blocks execution if schema docs are stale -oda query "" --strict +uv run oda query "" --strict # Query with strict mode example: -oda query "SELECT SUM(amount) FROM payments" --strict +uv run oda query "SELECT SUM(amount) FROM payments" --strict ``` **Behaviour:** @@ -86,16 +101,16 @@ oda query "SELECT SUM(amount) FROM payments" --strict ```bash # Regenerate the full schema documentation catalog -oda docs generate +uv run oda docs generate # Skip tables that already have fresh docs -oda docs generate --skip-existing +uv run oda docs generate --skip-existing # Include column profile statistics (nulls, distinct, min/max) in generated docs -oda docs generate --enrich +uv run oda docs generate --enrich # Check whether docs are up to date (exit 0 = fresh, exit 1 = stale) -oda docs status +uv run oda docs status ``` **Schema docs** are written to `docs/data-catalog/`: @@ -112,28 +127,28 @@ Warn the user if `generated_at` in any doc is older than 7 days. ```bash # Add a curated knowledge entry (interactive) -oda memory add +uv run oda memory add # Add non-interactively -oda memory add --title "Revenue column" --category data_quality --content "Use net_item_price not item_price" +uv run oda memory add --title "Revenue column" --category data_quality --content "Use net_item_price not item_price" # With tags -oda memory add --title "Status values" --category business_context --content "Active=1, Inactive=0" --tags "status,lookup" +uv run oda memory add --title "Status values" --category business_context --content "Active=1, Inactive=0" --tags "status,lookup" # List all memory entries -oda memory list +uv run oda memory list # Search memory by keyword (matches title, body content, tags — case-insensitive) -oda memory search revenue -oda memory search "net price" +uv run oda memory search revenue +uv run oda memory search "net price" ``` **Categories:** `query_pattern` | `business_context` | `correction` | `data_quality` **Workflow:** -1. Before writing SQL for any column involved in known data quality issues, run `oda memory search ` -2. After discovering a data quirk, run `oda memory add` to record it for future reference -3. Run `oda memory list` to review the full knowledge base +1. Before writing SQL for any column involved in known data quality issues, run `uv run oda memory search ` +2. After discovering a data quirk, run `uv run oda memory add` to record it for future reference +3. Run `uv run oda memory list` to review the full knowledge base Memory files are stored in `./memory/` as markdown with YAML frontmatter. @@ -145,7 +160,7 @@ When queries return 0 rows, errors, or unexpected results, follow this step-by-s ### Zero-Row Diagnostics -1. **Read the diagnostic output** — when a query returns 0 rows, `oda query` automatically +1. **Read the diagnostic output** — when a query returns 0 rows, `uv run oda query` automatically prints diagnostic information to stderr: - Row counts for each table referenced in the SQL - Up to 3 sample values for each filter column @@ -153,15 +168,15 @@ When queries return 0 rows, errors, or unexpected results, follow this step-by-s - Suggested checks (e.g. "Column 'status' values seen: ['active', 'pending'] — your filter used 'closed'") 2. **Check date filter ranges** against the `column range` shown in diagnostic context. - Use `oda profile
` to see actual min/max dates. + Use `uv run oda profile
` to see actual min/max dates. -3. **Run `oda memory search `** for known issues with filter columns. - Example: `oda memory search status` +3. **Run `uv run oda memory search `** for known issues with filter columns. + Example: `uv run oda memory search status` 4. **Try a broader query first**, then narrow down: ```bash - oda query "SELECT DISTINCT status FROM orders" # see all values - oda query "SELECT * FROM orders LIMIT 5" # confirm table has rows + uv run oda query "SELECT DISTINCT status FROM orders" # see all values + uv run oda query "SELECT * FROM orders LIMIT 5" # confirm table has rows ``` 5. **Check column value casing** — `{{db_type}}` may be case-sensitive. @@ -169,18 +184,18 @@ When queries return 0 rows, errors, or unexpected results, follow this step-by-s 6. **Verify JOIN conditions** are not silently eliminating rows: ```bash - oda query "SELECT COUNT(*) FROM orders" # rows in orders - oda query "SELECT COUNT(*) FROM orders JOIN customers ON ..." # rows after join + uv run oda query "SELECT COUNT(*) FROM orders" # rows in orders + uv run oda query "SELECT COUNT(*) FROM orders JOIN customers ON ..." # rows after join ``` ### Stale Docs Resolution -If `oda query --strict` is blocked with "Docs are stale": +If `uv run oda query --strict` is blocked with "Docs are stale": ```bash -oda docs generate # regenerate full catalog -oda docs generate --enrich # include profile stats -oda docs status # verify docs are now fresh +uv run oda docs generate # regenerate full catalog +uv run oda docs generate --enrich # include profile stats +uv run oda docs status # verify docs are now fresh ``` ### Eval Run Workflow @@ -188,9 +203,9 @@ oda docs status # verify docs are now fresh Run the golden SQL regression suite to validate agent quality after schema changes: ```bash -oda eval run # run all golden queries for active dialect -oda eval results # review last run results -oda eval add # add a new known-good query as a regression test +uv run oda eval run # run all golden queries for active dialect +uv run oda eval results # review last run results +uv run oda eval add # add a new known-good query as a regression test ``` The eval suite exits with code `1` if any query fails — suitable for CI integration. diff --git a/tests/unit/cli/test_cli_schema.py b/tests/unit/cli/test_cli_schema.py index 368939d..835240a 100644 --- a/tests/unit/cli/test_cli_schema.py +++ b/tests/unit/cli/test_cli_schema.py @@ -64,6 +64,43 @@ def test_lists_tables(self, tmp_path: Path) -> None: assert result.exit_code == 0 assert "customers" in result.output + def test_no_argument_defaults_to_main_for_sqlite(self, tmp_path: Path) -> None: + """oda tables with no argument should use 'main' as the default schema for SQLite.""" + db_path = _setup_db(tmp_path) + runner = CliRunner() + with patch("open_data_agent.cli_schema.ConnectionManager") as mock_mgr: + mock_mgr.return_value.get_active_connection.return_value = "test" + mock_mgr.return_value.get_connection.return_value = _make_params(db_path) + result = runner.invoke(cli, ["tables"]) + assert result.exit_code == 0 + assert "customers" in result.output + assert "main" in result.output + + def test_no_argument_defaults_to_public_for_postgresql(self, tmp_path: Path) -> None: + """oda tables with no argument should use 'public' as the default schema for PostgreSQL.""" + runner = CliRunner() + pg_params = { + "db_type": "postgresql", + "host": "localhost", + "port": 5432, + "database": "testdb", + "username": "user", + "password": "pass", + } + mock_conn = __import__("unittest.mock", fromlist=["MagicMock"]).MagicMock() + mock_conn.execute.return_value.fetchall.return_value = [("customers",)] + with ( + patch("open_data_agent.cli_schema.ConnectionManager") as mock_mgr, + patch("psycopg.connect", return_value=mock_conn), + ): + mock_mgr.return_value.get_active_connection.return_value = "pgtest" + mock_mgr.return_value.get_connection.return_value = pg_params + result = runner.invoke(cli, ["tables"]) + assert result.exit_code == 0 + # 'public' appears in the table title; Rich may wrap it so check for the substring + assert "publ" in result.output # 'public' schema confirmed in title + assert "customers" in result.output + def test_exits_1_when_no_active_connection(self) -> None: runner = CliRunner() with patch("open_data_agent.cli_schema.ConnectionManager") as mock_mgr: