hotdata-dev
diff --git a/‎Cargo.lock‎
Lines changed: 84 additions & 1 deletion b/‎Cargo.lock‎
Lines changed: 84 additions & 1 deletion
diff --git a/‎Cargo.toml‎
Lines changed: 2 additions & 0 deletions b/‎Cargo.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎skills/hotdata-cli/SKILL.md‎
Lines changed: 41 additions & 1 deletion b/‎skills/hotdata-cli/SKILL.md‎
Lines changed: 41 additions & 1 deletion
diff --git a/‎src/command.rs‎
Lines changed: 24 additions & 94 deletions b/‎src/command.rs‎
Lines changed: 24 additions & 94 deletions
@@ -27,6 +27,8 @@ rand = "0.8"
 sha2 = "0.10"
 tiny_http = "0.12"
 comfy-table = "7"
+indicatif = "0.17"
+nix = { version = "0.29", features = ["fs"] }
 flate2 = "1"
 tar = "0.4"
 semver = "1"
 
@@ -1,6 +1,6 @@
 ---
 name: hotdata-cli
-description: Use this skill when the user wants to run hotdata CLI commands, query the HotData API, list workspaces, list connections, list tables, execute SQL queries, or interact with the hotdata service. Activate when the user says "run hotdata", "query hotdata", "list workspaces", "list connections", "list tables", "execute a query", or asks you to use the hotdata CLI.
+description: Use this skill when the user wants to run hotdata CLI commands, query the HotData API, list workspaces, list connections, list tables, manage datasets, execute SQL queries, or interact with the hotdata service. Activate when the user says "run hotdata", "query hotdata", "list workspaces", "list connections", "list tables", "list datasets", "create a dataset", "upload a dataset", "execute a query", or asks you to use the hotdata CLI.
 version: 0.1.3
 ---
 
@@ -50,6 +50,46 @@ hotdata tables list [--workspace-id <workspace_id>] [--connection-id <connection
 - `--schema` and `--table` support SQL `%` wildcard patterns (e.g. `--table order%` matches `orders`, `order_items`, etc.).
 - Results are paginated (default 100 per page). If more results are available, a `--cursor` token is printed — pass it to fetch the next page.
 
+### Datasets
+
+Datasets are managed files uploaded to HotData and queryable as tables.
+
+#### List datasets
+```
+hotdata datasets list [--workspace-id <workspace_id>] [--limit <int>] [--offset <int>] [--format table|json|yaml]
+```
+- Default format is `table`.
+- Returns `id`, `label`, `table_name`, `created_at`.
+- Results are paginated (default 100). Use `--offset` to fetch further pages.
+
+#### Get dataset details
+```
+hotdata datasets <dataset_id> [--workspace-id <workspace_id>] [--format table|json|yaml]
+```
+- Shows dataset metadata and a full column listing with `name`, `data_type`, `nullable`.
+- Use this to inspect schema before querying.
+
+#### Create a dataset
+```
+hotdata datasets create --label "My Dataset" --file data.csv [--table-name my_dataset] [--workspace-id <workspace_id>]
+```
+- `--file` uploads a local file. Omit to pipe data via stdin: `cat data.csv | hotdata datasets create --label "My Dataset"`
+- Format is auto-detected from file extension (`.csv`, `.json`, `.parquet`) or file content.
+- `--label` is optional when `--file` is provided — defaults to the filename without extension.
+- `--table-name` is optional — derived from the label if omitted.
+
+#### Querying datasets
+
+Datasets are queryable using the catalog `datasets` and schema `main`. Always reference dataset tables as:
+```
+datasets.main.<table_name>
+```
+For example:
+```
+hotdata query "SELECT * FROM datasets.main.my_dataset LIMIT 10"
+```
+Use `hotdata datasets <dataset_id>` to look up the `table_name` before writing queries.
+
 ### Execute SQL Query
 ```
 hotdata query "<sql>" [--workspace-id <workspace_id>] [--connection <connection_id>] [--format table|json|csv]
 
@@ -16,8 +16,19 @@ pub enum Commands {
 
     /// Manage datasets
     Datasets {
+        /// Dataset ID to show details
+        id: Option<String>,
+
+        /// Workspace ID (defaults to first workspace from login)
+        #[arg(long)]
+        workspace_id: Option<String>,
+
+        /// Output format (used with dataset ID)
+        #[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])]
+        format: String,
+
         #[command(subcommand)]
-        command: DatasetsCommands,
+        command: Option<DatasetsCommands>,
     },
 
     /// Execute a SQL query
@@ -155,115 +166,34 @@ pub enum AuthKeysCommands {
 pub enum DatasetsCommands {
     /// List all datasets in a workspace
     List {
-        /// Workspace ID (defaults to first workspace from login)
+        /// Maximum number of results (default: 100, max: 1000)
         #[arg(long)]
-        workspace_id: Option<String>,
-
-        /// Output format
-        #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])]
-        format: String,
-    },
+        limit: Option<u32>,
 
-    /// Get details for a specific dataset
-    Get {
-        /// Workspace ID (defaults to first workspace from login)
+        /// Pagination offset
         #[arg(long)]
-        workspace_id: Option<String>,
-
-        /// Dataset ID
-        dataset_id: String,
+        offset: Option<u32>,
 
         /// Output format
-        #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])]
+        #[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])]
         format: String,
     },
 
-    /// Create a new dataset in a workspace
+    /// Create a new dataset from a file or piped stdin
     Create {
-        /// Workspace ID (defaults to first workspace from login)
-        #[arg(long)]
-        workspace_id: Option<String>,
-
-        /// Dataset name
+        /// Dataset label (derived from filename if omitted)
         #[arg(long)]
-        name: String,
+        label: Option<String>,
 
-        /// SQL query for the dataset
+        /// Table name (derived from label if omitted)
         #[arg(long)]
-        sql: Option<String>,
+        table_name: Option<String>,
 
-        /// Connection ID for the dataset
+        /// Path to a file to upload (omit to read from stdin)
         #[arg(long)]
-        connection_id: Option<String>,
-
-        /// Output format
-        #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])]
-        format: String,
+        file: Option<String>,
     },
 
-    /// Update a dataset in a workspace
-    Update {
-        /// Workspace ID (defaults to first workspace from login)
-        #[arg(long)]
-        workspace_id: Option<String>,
-
-        /// Dataset ID
-        dataset_id: String,
-
-        /// New dataset name
-        #[arg(long)]
-        name: Option<String>,
-
-        /// New SQL query for the dataset
-        #[arg(long)]
-        query: Option<String>,
-
-        /// Output format
-        #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])]
-        format: String,
-    },
-
-    /// Delete a dataset from a workspace
-    Delete {
-        /// Workspace ID (defaults to first workspace from login)
-        #[arg(long)]
-        workspace_id: Option<String>,
-
-        /// Dataset ID
-        dataset_id: String,
-    },
-
-    /// Update the SQL query for a dataset
-    UpdateSql {
-        /// Workspace ID (defaults to first workspace from login)
-        #[arg(long)]
-        workspace_id: Option<String>,
-
-        /// Dataset ID
-        dataset_id: String,
-
-        /// New SQL query for the dataset
-        #[arg(long)]
-        sql: String,
-
-        /// Output format
-        #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])]
-        format: String,
-    },
-
-    /// Execute a dataset
-    Execute {
-        /// Workspace ID (defaults to first workspace from login)
-        #[arg(long)]
-        workspace_id: Option<String>,
-
-        /// Dataset ID
-        dataset_id: String,
-
-        /// Output format
-        #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])]
-        format: String,
-    },
 }