From 3ad40dd03feb388a53a9eb1ab036e4ad4b549c0f Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Fri, 13 Mar 2026 13:36:44 -0700 Subject: [PATCH 1/8] Add dataset creation endpoint and consolidate upload api --- Cargo.lock | 85 +++++++++++++- Cargo.toml | 2 + src/command.rs | 19 ++-- src/datasets.rs | 295 ++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 11 +- 5 files changed, 398 insertions(+), 14 deletions(-) create mode 100644 src/datasets.rs diff --git a/Cargo.lock b/Cargo.lock index 4b69ae8..8041168 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,6 +125,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chunked_transfer" version = "1.5.0" @@ -188,6 +194,19 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.59.0", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -338,6 +357,12 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -567,6 +592,8 @@ dependencies = [ "directories", "dotenvy", "flate2", + "indicatif", + "nix", "open", "rand", "reqwest", @@ -823,6 +850,19 @@ dependencies = [ "serde_core", ] +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-width", + "web-time", +] + [[package]] name = "ipnet" version = "2.12.0" @@ -994,6 +1034,24 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "once_cell" version = "1.21.3" @@ -1126,6 +1184,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + [[package]] name = "potential_utf" version = "0.1.4" @@ -1303,7 +1367,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2019,6 +2083,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" @@ -2085,6 +2159,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.60.2" diff --git a/Cargo.toml b/Cargo.toml index 2e77777..45916de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,8 @@ rand = "0.8" sha2 = "0.10" tiny_http = "0.12" comfy-table = "7" +indicatif = "0.17" +nix = { version = "0.29", features = ["fs"] } flate2 = "1" tar = "0.4" semver = "1" diff --git a/src/command.rs b/src/command.rs index 28c48ba..8c90e4a 100644 --- a/src/command.rs +++ b/src/command.rs @@ -178,27 +178,23 @@ pub enum DatasetsCommands { format: String, }, - /// Create a new dataset in a workspace + /// Create a new dataset from a file or piped stdin Create { /// Workspace ID (defaults to first workspace from login) #[arg(long)] workspace_id: Option, - /// Dataset name + /// Dataset label (derived from filename if omitted) #[arg(long)] - name: String, + label: Option, - /// SQL query for the dataset + /// Table name (derived from label if omitted) #[arg(long)] - sql: Option, + table_name: Option, - /// Connection ID for the dataset + /// Path to a file to upload (omit to read from stdin) #[arg(long)] - connection_id: Option, - - /// Output format - #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])] - format: String, + file: Option, }, /// Update a dataset in a workspace @@ -264,6 +260,7 @@ pub enum DatasetsCommands { #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])] format: String, }, + } diff --git a/src/datasets.rs b/src/datasets.rs new file mode 100644 index 0000000..2cfc7bf --- /dev/null +++ b/src/datasets.rs @@ -0,0 +1,295 @@ +use crate::config; +use indicatif::{ProgressBar, ProgressStyle}; +use serde::Deserialize; +use serde_json::json; +use std::path::Path; + +#[derive(Deserialize)] +struct Dataset { + id: String, + label: String, + table_name: String, +} + +struct FileType { + content_type: &'static str, + format: &'static str, +} + +fn detect_from_bytes(bytes: &[u8]) -> FileType { + if bytes.starts_with(b"PAR1") { + return FileType { content_type: "application/octet-stream", format: "parquet" }; + } + let first = bytes.iter().find(|&&b| !b.is_ascii_whitespace()).copied(); + if matches!(first, Some(b'{') | Some(b'[')) { + return FileType { content_type: "application/json", format: "json" }; + } + FileType { content_type: "text/csv", format: "csv" } +} + +fn detect_from_path(path: &str) -> Option { + match Path::new(path).extension().and_then(|e| e.to_str()) { + Some("csv") => Some(FileType { content_type: "text/csv", format: "csv" }), + Some("json") => Some(FileType { content_type: "application/json", format: "json" }), + Some("parquet") => Some(FileType { content_type: "application/octet-stream", format: "parquet" }), + _ => None, + } +} + +/// Try to resolve the filename of the file redirected into stdin. +/// Works for `cmd < file.csv` but not for pipes (`cat file.csv | cmd`). +fn stdin_redirect_filename() -> Option { + #[cfg(target_os = "linux")] + { + std::fs::read_link("/proc/self/fd/0") + .ok() + .and_then(|p| p.file_stem().map(|s| s.to_string_lossy().into_owned())) + } + #[cfg(target_os = "macos")] + { + use std::os::unix::io::AsRawFd; + use nix::fcntl::{fcntl, FcntlArg}; + let fd = std::io::stdin().as_raw_fd(); + let mut path = std::path::PathBuf::new(); + match fcntl(fd, FcntlArg::F_GETPATH(&mut path)) { + Ok(_) => path.file_stem().map(|s| s.to_string_lossy().into_owned()), + Err(_) => None, + } + } + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + { + None + } +} + +fn api_error(body: String) -> String { + serde_json::from_str::(&body) + .ok() + .and_then(|v| v["error"]["message"].as_str().map(str::to_string)) + .unwrap_or(body) +} + +fn make_progress_bar(total: u64) -> ProgressBar { + let pb = ProgressBar::new(total); + pb.set_style( + ProgressStyle::with_template( + "{spinner:.green} [{bar:40.cyan/blue}] {bytes}/{total_bytes} ({eta})", + ) + .unwrap() + .progress_chars("=>-"), + ); + pb +} + +fn do_upload( + client: &reqwest::blocking::Client, + api_key: &str, + workspace_id: &str, + api_url: &str, + content_type: &str, + reader: R, + pb: ProgressBar, +) -> String { + let url = format!("{api_url}/files"); + + let resp = match client + .post(&url) + .header("Authorization", format!("Bearer {api_key}")) + .header("X-Workspace-Id", workspace_id) + .header("Content-Type", content_type) + .body(reqwest::blocking::Body::new(reader)) + .send() + { + Ok(r) => r, + Err(e) => { + pb.finish_and_clear(); + eprintln!("error uploading: {e}"); + std::process::exit(1); + } + }; + + pb.finish_and_clear(); + + if !resp.status().is_success() { + use crossterm::style::Stylize; + eprintln!("{}", api_error(resp.text().unwrap_or_default()).red()); + std::process::exit(1); + } + + let body: serde_json::Value = match resp.json() { + Ok(v) => v, + Err(e) => { + eprintln!("error parsing upload response: {e}"); + std::process::exit(1); + } + }; + + match body["id"].as_str() { + Some(id) => id.to_string(), + None => { + eprintln!("error: upload response missing id"); + std::process::exit(1); + } + } +} + +// Returns (upload_id, format) +fn upload_from_file( + client: &reqwest::blocking::Client, + api_key: &str, + workspace_id: &str, + api_url: &str, + path: &str, +) -> (String, &'static str) { + let f = match std::fs::File::open(path) { + Ok(f) => f, + Err(e) => { + eprintln!("error opening file '{path}': {e}"); + std::process::exit(1); + } + }; + + let ft = detect_from_path(path).unwrap_or_else(|| { + use std::io::Read; + let mut probe = [0u8; 512]; + let n = match std::fs::File::open(path) { + Ok(mut f2) => f2.read(&mut probe).unwrap_or(0), + Err(_) => 0, + }; + detect_from_bytes(&probe[..n]) + }); + + let file_size = f.metadata().map(|m| m.len()).unwrap_or(0); + let pb = make_progress_bar(file_size); + let reader = pb.wrap_read(f); + + let id = do_upload(client, api_key, workspace_id, api_url, ft.content_type, reader, pb); + (id, ft.format) +} + +// Returns (upload_id, format) +fn upload_from_stdin( + client: &reqwest::blocking::Client, + api_key: &str, + workspace_id: &str, + api_url: &str, +) -> (String, &'static str) { + use std::io::Read; + let mut buf = Vec::new(); + if let Err(e) = std::io::stdin().read_to_end(&mut buf) { + eprintln!("error reading stdin: {e}"); + std::process::exit(1); + } + + let ft = detect_from_bytes(&buf); + let total = buf.len() as u64; + let pb = make_progress_bar(total); + let reader = pb.wrap_read(std::io::Cursor::new(buf)); + + let id = do_upload(client, api_key, workspace_id, api_url, ft.content_type, reader, pb); + (id, ft.format) +} + +pub fn create( + workspace_id: &str, + label: Option<&str>, + table_name: Option<&str>, + file: Option<&str>, +) { + let profile_config = match config::load("default") { + Ok(c) => c, + Err(e) => { + eprintln!("{e}"); + std::process::exit(1); + } + }; + + let api_key = match &profile_config.api_key { + Some(key) if key != "PLACEHOLDER" => key.clone(), + _ => { + eprintln!("error: not authenticated. Run 'hotdata auth login' to log in."); + std::process::exit(1); + } + }; + + let label_derived; + let label: &str = match label { + Some(l) => l, + None => match file { + Some(path) => { + label_derived = Path::new(path) + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("dataset") + .to_string(); + &label_derived + } + None => match stdin_redirect_filename() { + Some(name) => { + label_derived = name; + &label_derived + } + None => { + eprintln!("error: no label provided. Use --label to name the dataset."); + std::process::exit(1); + } + }, + }, + }; + + let client = reqwest::blocking::Client::new(); + + let (upload_id, format) = match file { + Some(path) => upload_from_file(&client, &api_key, workspace_id, &profile_config.api_url, path), + None => { + use std::io::IsTerminal; + if std::io::stdin().is_terminal() { + eprintln!("error: no input data. Use --file or pipe data via stdin."); + std::process::exit(1); + } + upload_from_stdin(&client, &api_key, workspace_id, &profile_config.api_url) + } + }; + + let source = json!({ "upload_id": upload_id, "format": format }); + let mut body = json!({ "label": label, "source": source }); + if let Some(tn) = table_name { + body["table_name"] = json!(tn); + } + + let url = format!("{}/datasets", profile_config.api_url); + + let resp = match client + .post(&url) + .header("Authorization", format!("Bearer {api_key}")) + .header("X-Workspace-Id", workspace_id) + .json(&body) + .send() + { + Ok(r) => r, + Err(e) => { + eprintln!("error connecting to API: {e}"); + std::process::exit(1); + } + }; + + if !resp.status().is_success() { + use crossterm::style::Stylize; + eprintln!("{}", api_error(resp.text().unwrap_or_default()).red()); + std::process::exit(1); + } + + let dataset: Dataset = match resp.json() { + Ok(v) => v, + Err(e) => { + eprintln!("error parsing response: {e}"); + std::process::exit(1); + } + }; + + use crossterm::style::Stylize; + println!("{}", "Dataset created".green()); + println!("id: {}", dataset.id); + println!("label: {}", dataset.label); + println!("table_name: {}", dataset.table_name); +} diff --git a/src/main.rs b/src/main.rs index ffe841d..df6c1d0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ mod auth; mod command; mod config; mod connections; +mod datasets; mod init; mod query; mod results; @@ -12,7 +13,7 @@ mod workspace; use anstyle::AnsiColor; use clap::{Parser, builder::Styles}; -use command::{AuthCommands, Commands, ConnectionsCommands, SkillCommands, TablesCommands, WorkspaceCommands}; +use command::{AuthCommands, Commands, ConnectionsCommands, DatasetsCommands, SkillCommands, TablesCommands, WorkspaceCommands}; #[derive(Parser)] #[command(name = "hotdata", version, about = concat!("HotData CLI - Command line interface for HotData (v", env!("CARGO_PKG_VERSION"), ")"), long_about = None, disable_version_flag = true)] @@ -60,7 +61,13 @@ fn main() { AuthCommands::Status { profile } => auth::status(&profile), _ => eprintln!("not yet implemented"), }, - Commands::Datasets { .. } => eprintln!("not yet implemented"), + Commands::Datasets { command } => match command { + DatasetsCommands::Create { workspace_id, label, table_name, file } => { + let workspace_id = resolve_workspace(workspace_id); + datasets::create(&workspace_id, label.as_deref(), table_name.as_deref(), file.as_deref()) + } + _ => eprintln!("not yet implemented"), + }, Commands::Query { sql, workspace_id, connection, format } => { let workspace_id = resolve_workspace(workspace_id); query::execute(&sql, &workspace_id, connection.as_deref(), &format) From 4dffa69510ad99fb07b213378258a4931ebf1952 Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Fri, 13 Mar 2026 14:15:31 -0700 Subject: [PATCH 2/8] Add single dataset get command --- src/command.rs | 95 +++++-------------------- src/datasets.rs | 181 +++++++++++++++++++++++++++++++++++++++++++++++- src/main.rs | 27 ++++++-- 3 files changed, 218 insertions(+), 85 deletions(-) diff --git a/src/command.rs b/src/command.rs index 8c90e4a..c1d61c4 100644 --- a/src/command.rs +++ b/src/command.rs @@ -16,8 +16,19 @@ pub enum Commands { /// Manage datasets Datasets { + /// Dataset ID to show details + id: Option, + + /// Workspace ID (defaults to first workspace from login) + #[arg(long)] + workspace_id: Option, + + /// Output format (used with dataset ID) + #[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])] + format: String, + #[command(subcommand)] - command: DatasetsCommands, + command: Option, }, /// Execute a SQL query @@ -159,22 +170,16 @@ pub enum DatasetsCommands { #[arg(long)] workspace_id: Option, - /// Output format - #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])] - format: String, - }, - - /// Get details for a specific dataset - Get { - /// Workspace ID (defaults to first workspace from login) + /// Maximum number of results (default: 100, max: 1000) #[arg(long)] - workspace_id: Option, + limit: Option, - /// Dataset ID - dataset_id: String, + /// Pagination offset + #[arg(long)] + offset: Option, /// Output format - #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])] + #[arg(long, default_value = "table", value_parser = ["table", "json", "yaml"])] format: String, }, @@ -197,70 +202,6 @@ pub enum DatasetsCommands { file: Option, }, - /// Update a dataset in a workspace - Update { - /// Workspace ID (defaults to first workspace from login) - #[arg(long)] - workspace_id: Option, - - /// Dataset ID - dataset_id: String, - - /// New dataset name - #[arg(long)] - name: Option, - - /// New SQL query for the dataset - #[arg(long)] - query: Option, - - /// Output format - #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])] - format: String, - }, - - /// Delete a dataset from a workspace - Delete { - /// Workspace ID (defaults to first workspace from login) - #[arg(long)] - workspace_id: Option, - - /// Dataset ID - dataset_id: String, - }, - - /// Update the SQL query for a dataset - UpdateSql { - /// Workspace ID (defaults to first workspace from login) - #[arg(long)] - workspace_id: Option, - - /// Dataset ID - dataset_id: String, - - /// New SQL query for the dataset - #[arg(long)] - sql: String, - - /// Output format - #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])] - format: String, - }, - - /// Execute a dataset - Execute { - /// Workspace ID (defaults to first workspace from login) - #[arg(long)] - workspace_id: Option, - - /// Dataset ID - dataset_id: String, - - /// Output format - #[arg(long, default_value = "yaml", value_parser = ["table", "json", "yaml"])] - format: String, - }, - } diff --git a/src/datasets.rs b/src/datasets.rs index 2cfc7bf..ad5af9a 100644 --- a/src/datasets.rs +++ b/src/datasets.rs @@ -1,14 +1,42 @@ use crate::config; use indicatif::{ProgressBar, ProgressStyle}; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use serde_json::json; use std::path::Path; -#[derive(Deserialize)] +#[derive(Deserialize, Serialize)] struct Dataset { id: String, label: String, table_name: String, + created_at: String, + updated_at: String, +} + +#[derive(Deserialize)] +struct ListResponse { + datasets: Vec, + count: u64, + has_more: bool, +} + +#[derive(Deserialize, Serialize)] +struct Column { + name: String, + data_type: String, + nullable: bool, +} + +#[derive(Deserialize, Serialize)] +struct DatasetDetail { + id: String, + label: String, + schema_name: String, + table_name: String, + source_type: String, + created_at: String, + updated_at: String, + columns: Vec, } struct FileType { @@ -293,3 +321,152 @@ pub fn create( println!("label: {}", dataset.label); println!("table_name: {}", dataset.table_name); } + +pub fn list(workspace_id: &str, limit: Option, offset: Option, format: &str) { + let profile_config = match config::load("default") { + Ok(c) => c, + Err(e) => { + eprintln!("{e}"); + std::process::exit(1); + } + }; + + let api_key = match &profile_config.api_key { + Some(key) if key != "PLACEHOLDER" => key.clone(), + _ => { + eprintln!("error: not authenticated. Run 'hotdata auth login' to log in."); + std::process::exit(1); + } + }; + + let mut url = format!("{}/datasets", profile_config.api_url); + let mut params = vec![]; + if let Some(l) = limit { params.push(format!("limit={l}")); } + if let Some(o) = offset { params.push(format!("offset={o}")); } + if !params.is_empty() { url = format!("{url}?{}", params.join("&")); } + + let client = reqwest::blocking::Client::new(); + let resp = match client + .get(&url) + .header("Authorization", format!("Bearer {api_key}")) + .header("X-Workspace-Id", workspace_id) + .send() + { + Ok(r) => r, + Err(e) => { + eprintln!("error connecting to API: {e}"); + std::process::exit(1); + } + }; + + if !resp.status().is_success() { + use crossterm::style::Stylize; + eprintln!("{}", api_error(resp.text().unwrap_or_default()).red()); + std::process::exit(1); + } + + let body: ListResponse = match resp.json() { + Ok(v) => v, + Err(e) => { + eprintln!("error parsing response: {e}"); + std::process::exit(1); + } + }; + + match format { + "json" => println!("{}", serde_json::to_string_pretty(&body.datasets).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&body.datasets).unwrap()), + "table" => { + let mut table = crate::util::make_table(); + table.set_header(["ID", "LABEL", "TABLE NAME", "CREATED AT"]); + table.column_mut(1).unwrap().set_constraint( + comfy_table::ColumnConstraint::UpperBoundary(comfy_table::Width::Fixed(30)) + ); + for d in &body.datasets { + let created_at = d.created_at.split('.').next().unwrap_or(&d.created_at).replace('T', " "); + table.add_row([&d.id, &d.label, &d.table_name, &created_at]); + } + println!("{table}"); + if body.has_more { + let next = offset.unwrap_or(0) + body.count as u32; + use crossterm::style::Stylize; + eprintln!("{}", format!("showing {} results — use --offset {next} for more", body.count).dark_grey()); + } + } + _ => unreachable!(), + } +} + +pub fn get(dataset_id: &str, workspace_id: &str, format: &str) { + let profile_config = match config::load("default") { + Ok(c) => c, + Err(e) => { + eprintln!("{e}"); + std::process::exit(1); + } + }; + + let api_key = match &profile_config.api_key { + Some(key) if key != "PLACEHOLDER" => key.clone(), + _ => { + eprintln!("error: not authenticated. Run 'hotdata auth login' to log in."); + std::process::exit(1); + } + }; + + let url = format!("{}/datasets/{dataset_id}", profile_config.api_url); + let client = reqwest::blocking::Client::new(); + + let resp = match client + .get(&url) + .header("Authorization", format!("Bearer {api_key}")) + .header("X-Workspace-Id", workspace_id) + .send() + { + Ok(r) => r, + Err(e) => { + eprintln!("error connecting to API: {e}"); + std::process::exit(1); + } + }; + + if !resp.status().is_success() { + use crossterm::style::Stylize; + eprintln!("{}", api_error(resp.text().unwrap_or_default()).red()); + std::process::exit(1); + } + + let d: DatasetDetail = match resp.json() { + Ok(v) => v, + Err(e) => { + eprintln!("error parsing response: {e}"); + std::process::exit(1); + } + }; + + match format { + "json" => println!("{}", serde_json::to_string_pretty(&d).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&d).unwrap()), + "table" => { + let created_at = d.created_at.split('.').next().unwrap_or(&d.created_at).replace('T', " "); + let updated_at = d.updated_at.split('.').next().unwrap_or(&d.updated_at).replace('T', " "); + println!("id: {}", d.id); + println!("label: {}", d.label); + println!("schema: {}", d.schema_name); + println!("table: {}", d.table_name); + println!("source_type: {}", d.source_type); + println!("created_at: {created_at}"); + println!("updated_at: {updated_at}"); + if !d.columns.is_empty() { + println!(); + let mut table = crate::util::make_table(); + table.set_header(["COLUMN", "DATA TYPE", "NULLABLE"]); + for col in &d.columns { + table.add_row([&col.name, &col.data_type, &col.nullable.to_string()]); + } + println!("{table}"); + } + } + _ => unreachable!(), + } +} diff --git a/src/main.rs b/src/main.rs index df6c1d0..3f10341 100644 --- a/src/main.rs +++ b/src/main.rs @@ -61,13 +61,28 @@ fn main() { AuthCommands::Status { profile } => auth::status(&profile), _ => eprintln!("not yet implemented"), }, - Commands::Datasets { command } => match command { - DatasetsCommands::Create { workspace_id, label, table_name, file } => { - let workspace_id = resolve_workspace(workspace_id); - datasets::create(&workspace_id, label.as_deref(), table_name.as_deref(), file.as_deref()) + Commands::Datasets { id, workspace_id, format, command } => { + let workspace_id = resolve_workspace(workspace_id); + if let Some(id) = id { + datasets::get(&id, &workspace_id, &format) + } else { + match command { + Some(DatasetsCommands::List { workspace_id: ws, limit, offset, format }) => { + let workspace_id = resolve_workspace(ws); + datasets::list(&workspace_id, limit, offset, &format) + } + Some(DatasetsCommands::Create { workspace_id: ws, label, table_name, file }) => { + let workspace_id = resolve_workspace(ws); + datasets::create(&workspace_id, label.as_deref(), table_name.as_deref(), file.as_deref()) + } + None => { + use clap::CommandFactory; + Cli::command().find_subcommand_mut("datasets").unwrap().print_help().unwrap(); + println!(); + } + } } - _ => eprintln!("not yet implemented"), - }, + } Commands::Query { sql, workspace_id, connection, format } => { let workspace_id = resolve_workspace(workspace_id); query::execute(&sql, &workspace_id, connection.as_deref(), &format) From dcf53f3e03085124bd440eaaccebdab3bed3c319 Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Fri, 13 Mar 2026 14:34:12 -0700 Subject: [PATCH 3/8] Add skill writeup for datasets commands and show full_name in table details for datasets --- skills/hotdata-cli/SKILL.md | 42 ++++++++++++++++++++++++++++++++++++- src/datasets.rs | 10 ++++----- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/skills/hotdata-cli/SKILL.md b/skills/hotdata-cli/SKILL.md index b4bcae2..102b180 100644 --- a/skills/hotdata-cli/SKILL.md +++ b/skills/hotdata-cli/SKILL.md @@ -1,6 +1,6 @@ --- name: hotdata-cli -description: Use this skill when the user wants to run hotdata CLI commands, query the HotData API, list workspaces, list connections, list tables, execute SQL queries, or interact with the hotdata service. Activate when the user says "run hotdata", "query hotdata", "list workspaces", "list connections", "list tables", "execute a query", or asks you to use the hotdata CLI. +description: Use this skill when the user wants to run hotdata CLI commands, query the HotData API, list workspaces, list connections, list tables, manage datasets, execute SQL queries, or interact with the hotdata service. Activate when the user says "run hotdata", "query hotdata", "list workspaces", "list connections", "list tables", "list datasets", "create a dataset", "upload a dataset", "execute a query", or asks you to use the hotdata CLI. version: 0.1.3 --- @@ -50,6 +50,46 @@ hotdata tables list [--workspace-id ] [--connection-id ] [--limit ] [--offset ] [--format table|json|yaml] +``` +- Default format is `table`. +- Returns `id`, `label`, `table_name`, `created_at`. +- Results are paginated (default 100). Use `--offset` to fetch further pages. + +#### Get dataset details +``` +hotdata datasets [--workspace-id ] [--format table|json|yaml] +``` +- Shows dataset metadata and a full column listing with `name`, `data_type`, `nullable`. +- Use this to inspect schema before querying. + +#### Create a dataset +``` +hotdata datasets create --label "My Dataset" --file data.csv [--table-name my_dataset] [--workspace-id ] +``` +- `--file` uploads a local file. Omit to pipe data via stdin: `cat data.csv | hotdata datasets create --label "My Dataset"` +- Format is auto-detected from file extension (`.csv`, `.json`, `.parquet`) or file content. +- `--label` is optional when `--file` is provided — defaults to the filename without extension. +- `--table-name` is optional — derived from the label if omitted. + +#### Querying datasets + +Datasets are queryable using the catalog `datasets` and schema `main`. Always reference dataset tables as: +``` +datasets.main. +``` +For example: +``` +hotdata query "SELECT * FROM datasets.main.my_dataset LIMIT 10" +``` +Use `hotdata datasets ` to look up the `table_name` before writing queries. + ### Execute SQL Query ``` hotdata query "" [--workspace-id ] [--connection ] [--format table|json|csv] diff --git a/src/datasets.rs b/src/datasets.rs index ad5af9a..7637133 100644 --- a/src/datasets.rs +++ b/src/datasets.rs @@ -319,7 +319,7 @@ pub fn create( println!("{}", "Dataset created".green()); println!("id: {}", dataset.id); println!("label: {}", dataset.label); - println!("table_name: {}", dataset.table_name); + println!("full_name: datasets.main.{}", dataset.table_name); } pub fn list(workspace_id: &str, limit: Option, offset: Option, format: &str) { @@ -378,13 +378,14 @@ pub fn list(workspace_id: &str, limit: Option, offset: Option, format: "yaml" => print!("{}", serde_yaml::to_string(&body.datasets).unwrap()), "table" => { let mut table = crate::util::make_table(); - table.set_header(["ID", "LABEL", "TABLE NAME", "CREATED AT"]); + table.set_header(["ID", "LABEL", "FULL NAME", "CREATED AT"]); table.column_mut(1).unwrap().set_constraint( comfy_table::ColumnConstraint::UpperBoundary(comfy_table::Width::Fixed(30)) ); for d in &body.datasets { let created_at = d.created_at.split('.').next().unwrap_or(&d.created_at).replace('T', " "); - table.add_row([&d.id, &d.label, &d.table_name, &created_at]); + let full_name = format!("datasets.main.{}", d.table_name); + table.add_row([&d.id, &d.label, &full_name, &created_at]); } println!("{table}"); if body.has_more { @@ -452,8 +453,7 @@ pub fn get(dataset_id: &str, workspace_id: &str, format: &str) { let updated_at = d.updated_at.split('.').next().unwrap_or(&d.updated_at).replace('T', " "); println!("id: {}", d.id); println!("label: {}", d.label); - println!("schema: {}", d.schema_name); - println!("table: {}", d.table_name); + println!("full_name: datasets.main.{}", d.table_name); println!("source_type: {}", d.source_type); println!("created_at: {created_at}"); println!("updated_at: {updated_at}"); From 16c85dfb944996251b4f3ef197f68f5356c39653 Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Fri, 13 Mar 2026 14:36:02 -0700 Subject: [PATCH 4/8] Update src/datasets.rs Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> --- src/datasets.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets.rs b/src/datasets.rs index 7637133..6d12dc3 100644 --- a/src/datasets.rs +++ b/src/datasets.rs @@ -180,7 +180,7 @@ fn upload_from_file( let ft = detect_from_path(path).unwrap_or_else(|| { use std::io::Read; let mut probe = [0u8; 512]; - let n = match std::fs::File::open(path) { + let n = { use std::io::Read; f.read(&mut probe).unwrap_or(0) }; Ok(mut f2) => f2.read(&mut probe).unwrap_or(0), Err(_) => 0, }; From fce6199e12291464c6a98930c80762f310891a92 Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Fri, 13 Mar 2026 14:40:07 -0700 Subject: [PATCH 5/8] Remove inner workspace-id flag on datasets subcommands --- src/command.rs | 8 -------- src/main.rs | 6 ++---- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/command.rs b/src/command.rs index c1d61c4..442ba93 100644 --- a/src/command.rs +++ b/src/command.rs @@ -166,10 +166,6 @@ pub enum AuthKeysCommands { pub enum DatasetsCommands { /// List all datasets in a workspace List { - /// Workspace ID (defaults to first workspace from login) - #[arg(long)] - workspace_id: Option, - /// Maximum number of results (default: 100, max: 1000) #[arg(long)] limit: Option, @@ -185,10 +181,6 @@ pub enum DatasetsCommands { /// Create a new dataset from a file or piped stdin Create { - /// Workspace ID (defaults to first workspace from login) - #[arg(long)] - workspace_id: Option, - /// Dataset label (derived from filename if omitted) #[arg(long)] label: Option, diff --git a/src/main.rs b/src/main.rs index 3f10341..715cca0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -67,12 +67,10 @@ fn main() { datasets::get(&id, &workspace_id, &format) } else { match command { - Some(DatasetsCommands::List { workspace_id: ws, limit, offset, format }) => { - let workspace_id = resolve_workspace(ws); + Some(DatasetsCommands::List { limit, offset, format }) => { datasets::list(&workspace_id, limit, offset, &format) } - Some(DatasetsCommands::Create { workspace_id: ws, label, table_name, file }) => { - let workspace_id = resolve_workspace(ws); + Some(DatasetsCommands::Create { label, table_name, file }) => { datasets::create(&workspace_id, label.as_deref(), table_name.as_deref(), file.as_deref()) } None => { From 265b69baeee7b48b03bb1bf2a0f4b0b0636d0c91 Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Fri, 13 Mar 2026 14:52:45 -0700 Subject: [PATCH 6/8] Fix match in io probe for file type in upload_from_file --- src/datasets.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/datasets.rs b/src/datasets.rs index 6d12dc3..d20f869 100644 --- a/src/datasets.rs +++ b/src/datasets.rs @@ -169,7 +169,7 @@ fn upload_from_file( api_url: &str, path: &str, ) -> (String, &'static str) { - let f = match std::fs::File::open(path) { + let mut f = match std::fs::File::open(path) { Ok(f) => f, Err(e) => { eprintln!("error opening file '{path}': {e}"); @@ -180,10 +180,7 @@ fn upload_from_file( let ft = detect_from_path(path).unwrap_or_else(|| { use std::io::Read; let mut probe = [0u8; 512]; - let n = { use std::io::Read; f.read(&mut probe).unwrap_or(0) }; - Ok(mut f2) => f2.read(&mut probe).unwrap_or(0), - Err(_) => 0, - }; + let n = f.read(&mut probe).unwrap_or(0); detect_from_bytes(&probe[..n]) }); From e371a65c8bac064d74188f6ebee439375b15247a Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Fri, 13 Mar 2026 15:00:25 -0700 Subject: [PATCH 7/8] reset file read after probing file type --- src/datasets.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/datasets.rs b/src/datasets.rs index d20f869..425d3c5 100644 --- a/src/datasets.rs +++ b/src/datasets.rs @@ -178,9 +178,10 @@ fn upload_from_file( }; let ft = detect_from_path(path).unwrap_or_else(|| { - use std::io::Read; + use std::io::{Read, Seek}; let mut probe = [0u8; 512]; let n = f.read(&mut probe).unwrap_or(0); + let _ = f.seek(std::io::SeekFrom::Start(0)); detect_from_bytes(&probe[..n]) }); From 596e08960fdab1c68aac2603ff0570e084d53c31 Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Fri, 13 Mar 2026 15:07:05 -0700 Subject: [PATCH 8/8] Stream stdin for dataset creation and fix response syntax --- src/datasets.rs | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/datasets.rs b/src/datasets.rs index 425d3c5..a9b44a1 100644 --- a/src/datasets.rs +++ b/src/datasets.rs @@ -13,6 +13,13 @@ struct Dataset { updated_at: String, } +#[derive(Deserialize)] +struct CreateResponse { + id: String, + label: String, + table_name: String, +} + #[derive(Deserialize)] struct ListResponse { datasets: Vec, @@ -201,16 +208,19 @@ fn upload_from_stdin( api_url: &str, ) -> (String, &'static str) { use std::io::Read; - let mut buf = Vec::new(); - if let Err(e) = std::io::stdin().read_to_end(&mut buf) { - eprintln!("error reading stdin: {e}"); - std::process::exit(1); - } + let mut probe = [0u8; 512]; + let n = std::io::stdin().read(&mut probe).unwrap_or(0); + let ft = detect_from_bytes(&probe[..n]); + + let reader = std::io::Cursor::new(probe[..n].to_vec()).chain(std::io::stdin()); - let ft = detect_from_bytes(&buf); - let total = buf.len() as u64; - let pb = make_progress_bar(total); - let reader = pb.wrap_read(std::io::Cursor::new(buf)); + let pb = ProgressBar::new_spinner(); + pb.set_style( + ProgressStyle::with_template("{spinner:.green} {bytes} uploaded ({elapsed})") + .unwrap(), + ); + pb.enable_steady_tick(std::time::Duration::from_millis(80)); + let reader = pb.wrap_read(reader); let id = do_upload(client, api_key, workspace_id, api_url, ft.content_type, reader, pb); (id, ft.format) @@ -305,7 +315,7 @@ pub fn create( std::process::exit(1); } - let dataset: Dataset = match resp.json() { + let dataset: CreateResponse = match resp.json() { Ok(v) => v, Err(e) => { eprintln!("error parsing response: {e}");