Skip to content

Commit d5ac1e1

Browse files
feat(datasets): Add datasets commands to create, list, and view datasets in CLI
* Add dataset creation endpoint and consolidate upload api * Add single dataset get command * Add skill writeup for datasets commands and show full_name in table details for datasets * Update src/datasets.rs Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> * Remove inner workspace-id flag on datasets subcommands * Fix match in io probe for file type in upload_from_file * reset file read after probing file type * Stream stdin for dataset creation and fix response syntax --------- Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com>
1 parent c949e05 commit d5ac1e1

File tree

6 files changed

+653
-98
lines changed

6 files changed

+653
-98
lines changed

Cargo.lock

Lines changed: 84 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ rand = "0.8"
2727
sha2 = "0.10"
2828
tiny_http = "0.12"
2929
comfy-table = "7"
30+
indicatif = "0.17"
31+
nix = { version = "0.29", features = ["fs"] }
3032
flate2 = "1"
3133
tar = "0.4"
3234
semver = "1"

skills/hotdata-cli/SKILL.md

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
name: hotdata-cli
3-
description: Use this skill when the user wants to run hotdata CLI commands, query the HotData API, list workspaces, list connections, list tables, execute SQL queries, or interact with the hotdata service. Activate when the user says "run hotdata", "query hotdata", "list workspaces", "list connections", "list tables", "execute a query", or asks you to use the hotdata CLI.
3+
description: Use this skill when the user wants to run hotdata CLI commands, query the HotData API, list workspaces, list connections, list tables, manage datasets, execute SQL queries, or interact with the hotdata service. Activate when the user says "run hotdata", "query hotdata", "list workspaces", "list connections", "list tables", "list datasets", "create a dataset", "upload a dataset", "execute a query", or asks you to use the hotdata CLI.
44
version: 0.1.3
55
---
66

@@ -50,6 +50,46 @@ hotdata tables list [--workspace-id <workspace_id>] [--connection-id <connection
5050
- `--schema` and `--table` support SQL `%` wildcard patterns (e.g. `--table order%` matches `orders`, `order_items`, etc.).
5151
- Results are paginated (default 100 per page). If more results are available, a `--cursor` token is printed — pass it to fetch the next page.
5252

53+
### Datasets
54+
55+
Datasets are managed files uploaded to HotData and queryable as tables.
56+
57+
#### List datasets
58+
```
59+
hotdata datasets list [--workspace-id <workspace_id>] [--limit <int>] [--offset <int>] [--format table|json|yaml]
60+
```
61+
- Default format is `table`.
62+
- Returns `id`, `label`, `table_name`, `created_at`.
63+
- Results are paginated (default 100). Use `--offset` to fetch further pages.
64+
65+
#### Get dataset details
66+
```
67+
hotdata datasets <dataset_id> [--workspace-id <workspace_id>] [--format table|json|yaml]
68+
```
69+
- Shows dataset metadata and a full column listing with `name`, `data_type`, `nullable`.
70+
- Use this to inspect schema before querying.
71+
72+
#### Create a dataset
73+
```
74+
hotdata datasets create --label "My Dataset" --file data.csv [--table-name my_dataset] [--workspace-id <workspace_id>]
75+
```
76+
- `--file` uploads a local file. Omit to pipe data via stdin: `cat data.csv | hotdata datasets create --label "My Dataset"`
77+
- Format is auto-detected from file extension (`.csv`, `.json`, `.parquet`) or file content.
78+
- `--label` is optional when `--file` is provided — defaults to the filename without extension.
79+
- `--table-name` is optional — derived from the label if omitted.
80+
81+
#### Querying datasets
82+
83+
Datasets are queryable using the catalog `datasets` and schema `main`. Always reference dataset tables as:
84+
```
85+
datasets.main.<table_name>
86+
```
87+
For example:
88+
```
89+
hotdata query "SELECT * FROM datasets.main.my_dataset LIMIT 10"
90+
```
91+
Use `hotdata datasets <dataset_id>` to look up the `table_name` before writing queries.
92+
5393
### Execute SQL Query
5494
```
5595
hotdata query "<sql>" [--workspace-id <workspace_id>] [--connection <connection_id>] [--format table|json|csv]

src/command.rs

Lines changed: 24 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,19 @@ pub enum Commands {
1616

1717
/// Manage datasets
1818
Datasets {
19+
/// Dataset ID to show details
20+
id: Option<String>,
21+
22+
/// Workspace ID (defaults to first workspace from login)
23+
#[arg(long)]
24+
workspace_id: Option<String>,
25+
26+
/// Output format (used with dataset ID)
27+
#[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])]
28+
format: String,
29+
1930
#[command(subcommand)]
20-
command: DatasetsCommands,
31+
command: Option<DatasetsCommands>,
2132
},
2233

2334
/// Execute a SQL query
@@ -155,115 +166,34 @@ pub enum AuthKeysCommands {
155166
pub enum DatasetsCommands {
156167
/// List all datasets in a workspace
157168
List {
158-
/// Workspace ID (defaults to first workspace from login)
169+
/// Maximum number of results (default: 100, max: 1000)
159170
#[arg(long)]
160-
workspace_id: Option<String>,
161-
162-
/// Output format
163-
#[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])]
164-
format: String,
165-
},
171+
limit: Option<u32>,
166172

167-
/// Get details for a specific dataset
168-
Get {
169-
/// Workspace ID (defaults to first workspace from login)
173+
/// Pagination offset
170174
#[arg(long)]
171-
workspace_id: Option<String>,
172-
173-
/// Dataset ID
174-
dataset_id: String,
175+
offset: Option<u32>,
175176

176177
/// Output format
177-
#[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])]
178+
#[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])]
178179
format: String,
179180
},
180181

181-
/// Create a new dataset in a workspace
182+
/// Create a new dataset from a file or piped stdin
182183
Create {
183-
/// Workspace ID (defaults to first workspace from login)
184-
#[arg(long)]
185-
workspace_id: Option<String>,
186-
187-
/// Dataset name
184+
/// Dataset label (derived from filename if omitted)
188185
#[arg(long)]
189-
name: String,
186+
label: Option<String>,
190187

191-
/// SQL query for the dataset
188+
/// Table name (derived from label if omitted)
192189
#[arg(long)]
193-
sql: Option<String>,
190+
table_name: Option<String>,
194191

195-
/// Connection ID for the dataset
192+
/// Path to a file to upload (omit to read from stdin)
196193
#[arg(long)]
197-
connection_id: Option<String>,
198-
199-
/// Output format
200-
#[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])]
201-
format: String,
194+
file: Option<String>,
202195
},
203196

204-
/// Update a dataset in a workspace
205-
Update {
206-
/// Workspace ID (defaults to first workspace from login)
207-
#[arg(long)]
208-
workspace_id: Option<String>,
209-
210-
/// Dataset ID
211-
dataset_id: String,
212-
213-
/// New dataset name
214-
#[arg(long)]
215-
name: Option<String>,
216-
217-
/// New SQL query for the dataset
218-
#[arg(long)]
219-
query: Option<String>,
220-
221-
/// Output format
222-
#[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])]
223-
format: String,
224-
},
225-
226-
/// Delete a dataset from a workspace
227-
Delete {
228-
/// Workspace ID (defaults to first workspace from login)
229-
#[arg(long)]
230-
workspace_id: Option<String>,
231-
232-
/// Dataset ID
233-
dataset_id: String,
234-
},
235-
236-
/// Update the SQL query for a dataset
237-
UpdateSql {
238-
/// Workspace ID (defaults to first workspace from login)
239-
#[arg(long)]
240-
workspace_id: Option<String>,
241-
242-
/// Dataset ID
243-
dataset_id: String,
244-
245-
/// New SQL query for the dataset
246-
#[arg(long)]
247-
sql: String,
248-
249-
/// Output format
250-
#[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])]
251-
format: String,
252-
},
253-
254-
/// Execute a dataset
255-
Execute {
256-
/// Workspace ID (defaults to first workspace from login)
257-
#[arg(long)]
258-
workspace_id: Option<String>,
259-
260-
/// Dataset ID
261-
dataset_id: String,
262-
263-
/// Output format
264-
#[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])]
265-
format: String,
266-
},
267197
}
268198

269199

0 commit comments

Comments
 (0)