diff --git a/Cargo.lock b/Cargo.lock index 808956cd9..9d6e90c85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3414,6 +3414,7 @@ dependencies = [ "bytes", "futures", "openshell-core", + "serde", "tar", "tempfile", "tokio", @@ -3436,6 +3437,7 @@ dependencies = [ "openshell-core", "prost", "prost-types", + "serde", "serde_json", "thiserror 2.0.18", "tokio", @@ -3665,6 +3667,7 @@ dependencies = [ "tokio-rustls", "tokio-stream", "tokio-tungstenite 0.26.2", + "toml", "tonic", "tower 0.5.3", "tower-http 0.6.8", @@ -5126,6 +5129,15 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -6063,6 +6075,47 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap 2.14.0", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + [[package]] name = "tonic" version = "0.12.3" @@ -7152,6 +7205,15 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +dependencies = [ + "memchr", +] + [[package]] name = "wiremock" version = "0.6.5" diff --git a/Cargo.toml b/Cargo.toml index 9bc3f9ea2..7f283eb24 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,7 @@ nix = { version = "0.29", features = ["signal", "process", "user", "fs", "term"] serde = { version = "1", features = ["derive"] } serde_json = "1" serde_yml = "0.0.12" +toml = "0.8" apollo-parser = "0.8.5" # HTTP client diff --git a/architecture/gateway.md b/architecture/gateway.md index d89706e64..99d45859b 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -158,6 +158,37 @@ pre-created Secrets) disable the Helm hook via `pkiInitJob.enabled=false`. The chart also ships a `certManager.*` path that produces equivalent Secrets through cert-manager `Issuer`/`Certificate` resources. +## Configuration + +The gateway reads its configuration from three sources, merged in this +precedence (highest first): + +``` +CLI flag > OPENSHELL_* env var > TOML file > built-in default +``` + +The TOML file is opt-in via `--config ` / `OPENSHELL_GATEWAY_CONFIG`. +When unset, the gateway behaves exactly as before — CLI flags and env vars +drive every setting. See `examples/gateway/gateway.example.toml` for a +worked example and RFC 0003 for the full schema. + +`database_url` is env-only and rejected when present in the file +(`OPENSHELL_DB_URL` / `--db-url`). + +### Driver inheritance + +`[openshell.gateway]` carries a small set of values (`default_image`, +`supervisor_image`, `image_pull_policy`, `guest_tls_ca/cert/key`, +`client_tls_secret_name`, `host_gateway_ip`, `enable_user_namespaces`) that +are inherited into each driver's `[openshell.drivers.]` table when +the driver-specific table does not override them. The allowlist is +per-driver so a gateway-wide default cannot land in a driver that does not +understand it (e.g. `client_tls_secret_name` is K8s-only). + +Driver-specific values that are not part of the inheritance allowlist +(e.g. K8s `namespace`, Podman `socket_path`, VM `vcpus`) only come from +the driver's own table. + ## Operational Constraints - Gateway TLS and client certificate distribution are deployment concerns owned diff --git a/crates/openshell-driver-docker/Cargo.toml b/crates/openshell-driver-docker/Cargo.toml index 79d4fb37d..e2c97532a 100644 --- a/crates/openshell-driver-docker/Cargo.toml +++ b/crates/openshell-driver-docker/Cargo.toml @@ -19,6 +19,7 @@ futures = { workspace = true } tokio-stream = { workspace = true } tracing = { workspace = true } bytes = { workspace = true } +serde = { workspace = true } bollard = { version = "0.20" } tar = "0.4" tempfile = "3" diff --git a/crates/openshell-driver-docker/src/lib.rs b/crates/openshell-driver-docker/src/lib.rs index cff4c57b9..5fb3580ee 100644 --- a/crates/openshell-driver-docker/src/lib.rs +++ b/crates/openshell-driver-docker/src/lib.rs @@ -127,7 +127,8 @@ pub trait SupervisorReadiness: Send + Sync + 'static { } /// Gateway-local configuration for the Docker compute driver. -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +#[serde(default, deny_unknown_fields)] pub struct DockerComputeConfig { /// Optional override for the Linux `openshell-sandbox` binary mounted into containers. pub supervisor_bin: Option, @@ -151,6 +152,19 @@ pub struct DockerComputeConfig { pub network_name: String, } +impl Default for DockerComputeConfig { + fn default() -> Self { + Self { + supervisor_bin: None, + supervisor_image: None, + guest_tls_ca: None, + guest_tls_cert: None, + guest_tls_key: None, + network_name: DEFAULT_DOCKER_NETWORK_NAME.to_string(), + } + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct DockerGuestTlsPaths { pub(crate) ca: PathBuf, diff --git a/crates/openshell-driver-kubernetes/Cargo.toml b/crates/openshell-driver-kubernetes/Cargo.toml index 5e247dc77..c222c9c31 100644 --- a/crates/openshell-driver-kubernetes/Cargo.toml +++ b/crates/openshell-driver-kubernetes/Cargo.toml @@ -26,6 +26,7 @@ tokio-stream = { workspace = true } kube = { workspace = true } kube-runtime = { workspace = true } k8s-openapi = { workspace = true } +serde = { workspace = true } serde_json = { workspace = true } clap = { workspace = true } tracing = { workspace = true } diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index e9baf0e5c..2eab5a4c7 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -1,8 +1,15 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +use openshell_core::config::{ + DEFAULT_IMAGE_PULL_POLICY, DEFAULT_K8S_NAMESPACE, DEFAULT_SSH_HANDSHAKE_SKEW_SECS, + DEFAULT_SUPERVISOR_IMAGE, +}; +use serde::{Deserialize, Serialize}; + /// How the supervisor binary is delivered into sandbox pods. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] pub enum SupervisorSideloadMethod { /// Mount the supervisor OCI image directly as a read-only volume /// (requires Kubernetes >= v1.33 with the `ImageVolume` feature gate, @@ -37,7 +44,8 @@ impl std::str::FromStr for SupervisorSideloadMethod { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(default, deny_unknown_fields)] pub struct KubernetesComputeConfig { pub namespace: String, pub default_image: String, @@ -59,3 +67,23 @@ pub struct KubernetesComputeConfig { pub host_gateway_ip: String, pub enable_user_namespaces: bool, } + +impl Default for KubernetesComputeConfig { + fn default() -> Self { + Self { + namespace: DEFAULT_K8S_NAMESPACE.to_string(), + default_image: String::new(), + image_pull_policy: DEFAULT_IMAGE_PULL_POLICY.to_string(), + supervisor_image: DEFAULT_SUPERVISOR_IMAGE.to_string(), + supervisor_image_pull_policy: String::new(), + supervisor_sideload_method: SupervisorSideloadMethod::default(), + grpc_endpoint: String::new(), + ssh_socket_path: "/run/openshell/ssh.sock".to_string(), + ssh_handshake_secret: String::new(), + ssh_handshake_skew_secs: DEFAULT_SSH_HANDSHAKE_SKEW_SECS, + client_tls_secret_name: String::new(), + host_gateway_ip: String::new(), + enable_user_namespaces: false, + } + } +} diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index a6107f907..cde1f4b22 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -2308,10 +2308,18 @@ mod tests { #[test] fn log_level_propagates_as_env_var_to_sandbox_pod() { - let spec = SandboxSpec { log_level: "debug".to_string(), ..SandboxSpec::default() }; + let spec = SandboxSpec { + log_level: "debug".to_string(), + ..SandboxSpec::default() + }; let cr = sandbox_to_k8s_spec(Some(&spec), &SandboxPodParams::default()); - let env = cr["spec"]["podTemplate"]["spec"]["containers"][0]["env"].as_array().unwrap(); - assert!(env.iter().any(|e| e["name"] == "OPENSHELL_LOG_LEVEL" && e["value"] == "debug")); + let env = cr["spec"]["podTemplate"]["spec"]["containers"][0]["env"] + .as_array() + .unwrap(); + assert!( + env.iter() + .any(|e| e["name"] == "OPENSHELL_LOG_LEVEL" && e["value"] == "debug") + ); assert!(cr["spec"].get("logLevel").is_none()); } } diff --git a/crates/openshell-driver-podman/src/config.rs b/crates/openshell-driver-podman/src/config.rs index d82b8d0b0..cfb58751e 100644 --- a/crates/openshell-driver-podman/src/config.rs +++ b/crates/openshell-driver-podman/src/config.rs @@ -61,7 +61,8 @@ impl FromStr for ImagePullPolicy { } } -#[derive(Clone)] +#[derive(Clone, serde::Serialize, serde::Deserialize)] +#[serde(default, deny_unknown_fields)] pub struct PodmanComputeConfig { /// Path to the Podman API Unix socket. /// Default: `$XDG_RUNTIME_DIR/podman/podman.sock` (Linux), diff --git a/crates/openshell-server/Cargo.toml b/crates/openshell-server/Cargo.toml index 9cba99045..f03f6f3cc 100644 --- a/crates/openshell-server/Cargo.toml +++ b/crates/openshell-server/Cargo.toml @@ -74,6 +74,7 @@ bytes = { workspace = true } pin-project-lite = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +toml = { workspace = true } tokio-stream = { workspace = true } sqlx = { workspace = true } reqwest = { workspace = true } diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index 534e3da37..813ea5d94 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -3,7 +3,8 @@ //! Shared CLI entrypoint for the gateway binaries. -use clap::{Command, CommandFactory, FromArgMatches, Parser}; +use clap::parser::ValueSource; +use clap::{ArgMatches, Command, CommandFactory, FromArgMatches, Parser}; use miette::{IntoDiagnostic, Result}; use openshell_core::ComputeDriverKind; use openshell_core::config::{ @@ -17,6 +18,7 @@ use tracing_subscriber::EnvFilter; use crate::certgen; use crate::compute::{DockerComputeConfig, VmComputeConfig}; +use crate::config_file::{self, ConfigFile, GatewayFileSection}; use crate::{run_server, tracing_bus::TracingLogBus}; /// `OpenShell` gateway process - gRPC and HTTP server with protocol multiplexing. @@ -43,6 +45,14 @@ enum Commands { #[derive(clap::Args, Debug)] struct RunArgs { + /// Path to a TOML configuration file (see RFC 0003). + /// + /// When set, gateway-wide settings and per-driver tables are read from + /// the file. Command-line flags and `OPENSHELL_*` environment variables + /// continue to take precedence over file values. + #[arg(long, env = "OPENSHELL_GATEWAY_CONFIG")] + config: Option, + /// IP address to bind the server, health, and metrics listeners to. #[arg(long, default_value = "127.0.0.1", env = "OPENSHELL_BIND_ADDRESS")] bind_address: IpAddr, @@ -318,15 +328,28 @@ pub async fn run_cli() -> Result<()> { .install_default() .map_err(|e| miette::miette!("failed to install rustls crypto provider: {e:?}"))?; - let cli = Cli::from_arg_matches(&command().get_matches()).expect("clap validated args"); + let matches = command().get_matches(); + let cli = Cli::from_arg_matches(&matches).expect("clap validated args"); match cli.command { Some(Commands::GenerateCerts(args)) => certgen::run(args).await, - None => Box::pin(run_from_args(cli.run)).await, + None => Box::pin(run_from_args(cli.run, matches)).await, } } -async fn run_from_args(args: RunArgs) -> Result<()> { +async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> { + // Load TOML file when --config / OPENSHELL_GATEWAY_CONFIG is set. + // File values are applied below for any argument that is still at its + // built-in default — CLI flags and OPENSHELL_* env vars always win. + let file: Option = if let Some(path) = args.config.clone() { + Some(config_file::load(&path).map_err(|e| miette::miette!("{e}"))?) + } else { + None + }; + if let Some(file) = file.as_ref() { + merge_file_into_args(&mut args, &file.openshell.gateway, &matches); + } + let tracing_log_bus = TracingLogBus::new(); tracing_log_bus.install_subscriber( EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(&args.log_level)), @@ -337,15 +360,15 @@ async fn run_from_args(args: RunArgs) -> Result<()> { let tls = if args.disable_tls { None } else { - let cert_path = args.tls_cert.ok_or_else(|| { + let cert_path = args.tls_cert.clone().ok_or_else(|| { miette::miette!( "--tls-cert is required when TLS is enabled (use --disable-tls to skip)" ) })?; - let key_path = args.tls_key.ok_or_else(|| { + let key_path = args.tls_key.clone().ok_or_else(|| { miette::miette!("--tls-key is required when TLS is enabled (use --disable-tls to skip)") })?; - let client_ca_path = args.tls_client_ca.ok_or_else(|| { + let client_ca_path = args.tls_client_ca.clone().ok_or_else(|| { miette::miette!( "--tls-client-ca is required when TLS is enabled (use --disable-tls to skip)" ) @@ -360,6 +383,7 @@ async fn run_from_args(args: RunArgs) -> Result<()> { let db_url = args .db_url + .clone() .ok_or_else(|| miette::miette!("--db-url is required (or set OPENSHELL_DB_URL)"))?; let mut config = openshell_core::Config::new(tls) @@ -396,72 +420,54 @@ async fn run_from_args(args: RunArgs) -> Result<()> { config = config .with_database_url(db_url) - .with_compute_drivers(args.drivers) - .with_sandbox_namespace(args.sandbox_namespace) - .with_ssh_gateway_host(args.ssh_gateway_host) + .with_compute_drivers(args.drivers.clone()) + .with_sandbox_namespace(args.sandbox_namespace.clone()) + .with_ssh_gateway_host(args.ssh_gateway_host.clone()) .with_ssh_gateway_port(args.ssh_gateway_port) - .with_ssh_connect_path(args.ssh_connect_path) + .with_ssh_connect_path(args.ssh_connect_path.clone()) .with_sandbox_ssh_port(args.sandbox_ssh_port) .with_ssh_handshake_skew_secs(args.ssh_handshake_skew_secs); - if let Some(image) = args.sandbox_image { + if let Some(image) = args.sandbox_image.clone() { config = config.with_sandbox_image(image); } - if let Some(policy) = args.sandbox_image_pull_policy { + if let Some(policy) = args.sandbox_image_pull_policy.clone() { config = config.with_sandbox_image_pull_policy(policy); } - if let Some(endpoint) = args.grpc_endpoint { + if let Some(endpoint) = args.grpc_endpoint.clone() { config = config.with_grpc_endpoint(endpoint); } - if let Some(secret) = args.ssh_handshake_secret { + if let Some(secret) = args.ssh_handshake_secret.clone() { config = config.with_ssh_handshake_secret(secret); } - if let Some(name) = args.client_tls_secret_name { + if let Some(name) = args.client_tls_secret_name.clone() { config = config.with_client_tls_secret_name(name); } - if let Some(ip) = args.host_gateway_ip { + if let Some(ip) = args.host_gateway_ip.clone() { config = config.with_host_gateway_ip(ip); } - if let Some(issuer) = args.oidc_issuer { + if let Some(issuer) = args.oidc_issuer.clone() { config = config.with_oidc(openshell_core::OidcConfig { issuer, - audience: args.oidc_audience, + audience: args.oidc_audience.clone(), jwks_ttl_secs: args.oidc_jwks_ttl, - roles_claim: args.oidc_roles_claim, - admin_role: args.oidc_admin_role, - user_role: args.oidc_user_role, - scopes_claim: args.oidc_scopes_claim, + roles_claim: args.oidc_roles_claim.clone(), + admin_role: args.oidc_admin_role.clone(), + user_role: args.oidc_user_role.clone(), + scopes_claim: args.oidc_scopes_claim.clone(), }); } config.enable_user_namespaces = args.enable_user_namespaces; - let vm_config = VmComputeConfig { - state_dir: args.vm_driver_state_dir, - driver_dir: args.driver_dir, - default_image: config.sandbox_image.clone(), - krun_log_level: args.vm_krun_log_level, - vcpus: args.vm_vcpus, - mem_mib: args.vm_mem_mib, - guest_tls_ca: args.vm_tls_ca, - guest_tls_cert: args.vm_tls_cert, - guest_tls_key: args.vm_tls_key, - }; - - let docker_config = DockerComputeConfig { - supervisor_bin: args.docker_supervisor_bin, - supervisor_image: args.docker_supervisor_image, - guest_tls_ca: args.docker_tls_ca, - guest_tls_cert: args.docker_tls_cert, - guest_tls_key: args.docker_tls_key, - network_name: args.docker_network_name, - }; + let vm_config = build_vm_config(&args, &matches, &config, file.as_ref())?; + let docker_config = build_docker_config(&args, &matches, file.as_ref())?; if args.disable_tls { info!("TLS disabled — listening on plaintext HTTP"); @@ -471,15 +477,263 @@ async fn run_from_args(args: RunArgs) -> Result<()> { info!(bind = %config.bind_address, "Starting OpenShell server"); - run_server(config, vm_config, docker_config, tracing_log_bus) - .await - .into_diagnostic() + Box::pin(run_server( + config, + vm_config, + docker_config, + file, + tracing_log_bus, + )) + .await + .into_diagnostic() } fn parse_compute_driver(value: &str) -> std::result::Result { value.parse() } +/// Returns `true` when an argument's value came from clap's built-in default +/// (or was never supplied at all). When the predicate is `true`, the loader +/// is free to replace the value with one read from the TOML config file. +fn arg_defaulted(matches: &ArgMatches, id: &str) -> bool { + matches!( + matches.value_source(id), + None | Some(ValueSource::DefaultValue) + ) +} + +/// Apply gateway-wide values from `[openshell.gateway]` onto `RunArgs` for +/// every argument that is still sourced from clap's built-in default. +/// +/// The function intentionally does not touch `database_url` or +/// `ssh_handshake_secret` — those secrets are env-only and the loader +/// already rejected them when they appear in the file. +fn merge_file_into_args(args: &mut RunArgs, file: &GatewayFileSection, matches: &ArgMatches) { + if let Some(addr) = file.bind_address { + if arg_defaulted(matches, "bind_address") { + args.bind_address = addr.ip(); + } + if arg_defaulted(matches, "port") { + args.port = addr.port(); + } + } + if let Some(addr) = file.health_bind_address + && arg_defaulted(matches, "health_port") + { + args.health_port = addr.port(); + } + if let Some(addr) = file.metrics_bind_address + && arg_defaulted(matches, "metrics_port") + { + args.metrics_port = addr.port(); + } + if let Some(level) = &file.log_level + && arg_defaulted(matches, "log_level") + { + args.log_level.clone_from(level); + } + if let Some(drivers) = &file.compute_drivers + && arg_defaulted(matches, "drivers") + { + args.drivers.clone_from(drivers); + } + if let Some(ns) = &file.sandbox_namespace + && arg_defaulted(matches, "sandbox_namespace") + { + args.sandbox_namespace.clone_from(ns); + } + if let Some(port) = file.sandbox_ssh_port + && arg_defaulted(matches, "sandbox_ssh_port") + { + args.sandbox_ssh_port = port; + } + if let Some(host) = &file.ssh_gateway_host + && arg_defaulted(matches, "ssh_gateway_host") + { + args.ssh_gateway_host.clone_from(host); + } + if let Some(port) = file.ssh_gateway_port + && arg_defaulted(matches, "ssh_gateway_port") + { + args.ssh_gateway_port = port; + } + if let Some(path) = &file.ssh_connect_path + && arg_defaulted(matches, "ssh_connect_path") + { + args.ssh_connect_path.clone_from(path); + } + if let Some(skew) = file.ssh_handshake_skew_secs + && arg_defaulted(matches, "ssh_handshake_skew_secs") + { + args.ssh_handshake_skew_secs = skew; + } + if let Some(image) = &file.default_image + && args.sandbox_image.is_none() + && arg_defaulted(matches, "sandbox_image") + { + args.sandbox_image = Some(image.clone()); + } + if let Some(policy) = &file.image_pull_policy + && args.sandbox_image_pull_policy.is_none() + && arg_defaulted(matches, "sandbox_image_pull_policy") + { + args.sandbox_image_pull_policy = Some(policy.clone()); + } + if let Some(secret) = &file.client_tls_secret_name + && args.client_tls_secret_name.is_none() + && arg_defaulted(matches, "client_tls_secret_name") + { + args.client_tls_secret_name = Some(secret.clone()); + } + if let Some(ip) = &file.host_gateway_ip + && args.host_gateway_ip.is_none() + && arg_defaulted(matches, "host_gateway_ip") + { + args.host_gateway_ip = Some(ip.clone()); + } + if let Some(enabled) = file.enable_user_namespaces + && arg_defaulted(matches, "enable_user_namespaces") + { + args.enable_user_namespaces = enabled; + } + // TLS gateway listener fields + if let Some(tls) = &file.tls { + if args.tls_cert.is_none() && arg_defaulted(matches, "tls_cert") { + args.tls_cert = Some(tls.cert_path.clone()); + } + if args.tls_key.is_none() && arg_defaulted(matches, "tls_key") { + args.tls_key = Some(tls.key_path.clone()); + } + if args.tls_client_ca.is_none() && arg_defaulted(matches, "tls_client_ca") { + args.tls_client_ca = Some(tls.client_ca_path.clone()); + } + if tls.allow_unauthenticated && arg_defaulted(matches, "disable_gateway_auth") { + args.disable_gateway_auth = true; + } + } + // OIDC fields + if let Some(oidc) = &file.oidc { + if args.oidc_issuer.is_none() && arg_defaulted(matches, "oidc_issuer") { + args.oidc_issuer = Some(oidc.issuer.clone()); + } + if arg_defaulted(matches, "oidc_audience") { + args.oidc_audience.clone_from(&oidc.audience); + } + if arg_defaulted(matches, "oidc_jwks_ttl") { + args.oidc_jwks_ttl = oidc.jwks_ttl_secs; + } + if arg_defaulted(matches, "oidc_roles_claim") { + args.oidc_roles_claim.clone_from(&oidc.roles_claim); + } + if arg_defaulted(matches, "oidc_admin_role") { + args.oidc_admin_role.clone_from(&oidc.admin_role); + } + if arg_defaulted(matches, "oidc_user_role") { + args.oidc_user_role.clone_from(&oidc.user_role); + } + if arg_defaulted(matches, "oidc_scopes_claim") { + args.oidc_scopes_claim.clone_from(&oidc.scopes_claim); + } + } +} + +/// Build [`VmComputeConfig`] by overlaying CLI args on top of the +/// `[openshell.drivers.vm]` table inherited from `[openshell.gateway]`. +fn build_vm_config( + args: &RunArgs, + matches: &ArgMatches, + config: &openshell_core::Config, + file: Option<&ConfigFile>, +) -> Result { + let mut cfg = if let Some(file) = file { + let merged = config_file::driver_table( + ComputeDriverKind::Vm, + &file.openshell.gateway, + file.openshell.drivers.get("vm"), + ); + merged + .try_into::() + .map_err(|e| miette::miette!("invalid [openshell.drivers.vm] table: {e}"))? + } else { + VmComputeConfig::default() + }; + + // CLI/env overrides — and `state_dir` is also pulled from RunArgs when the + // file did not set it, so the gateway always has a working directory. + if !arg_defaulted(matches, "vm_driver_state_dir") || cfg.state_dir.as_os_str().is_empty() { + cfg.state_dir.clone_from(&args.vm_driver_state_dir); + } + if !arg_defaulted(matches, "driver_dir") || cfg.driver_dir.is_none() { + cfg.driver_dir.clone_from(&args.driver_dir); + } + if !arg_defaulted(matches, "vm_krun_log_level") { + cfg.krun_log_level = args.vm_krun_log_level; + } + if !arg_defaulted(matches, "vm_vcpus") { + cfg.vcpus = args.vm_vcpus; + } + if !arg_defaulted(matches, "vm_mem_mib") { + cfg.mem_mib = args.vm_mem_mib; + } + if let Some(p) = args.vm_tls_ca.clone() { + cfg.guest_tls_ca = Some(p); + } + if let Some(p) = args.vm_tls_cert.clone() { + cfg.guest_tls_cert = Some(p); + } + if let Some(p) = args.vm_tls_key.clone() { + cfg.guest_tls_key = Some(p); + } + // Fall through: image inherited from gateway-wide `sandbox_image` when + // the merged table did not supply `default_image`. + if cfg.default_image.is_empty() { + cfg.default_image.clone_from(&config.sandbox_image); + } + Ok(cfg) +} + +/// Build [`DockerComputeConfig`] using the same inheritance pattern as +/// [`build_vm_config`]. +fn build_docker_config( + args: &RunArgs, + matches: &ArgMatches, + file: Option<&ConfigFile>, +) -> Result { + let mut cfg = if let Some(file) = file { + let merged = config_file::driver_table( + ComputeDriverKind::Docker, + &file.openshell.gateway, + file.openshell.drivers.get("docker"), + ); + merged + .try_into::() + .map_err(|e| miette::miette!("invalid [openshell.drivers.docker] table: {e}"))? + } else { + DockerComputeConfig::default() + }; + + if args.docker_supervisor_bin.is_some() { + cfg.supervisor_bin.clone_from(&args.docker_supervisor_bin); + } + if args.docker_supervisor_image.is_some() { + cfg.supervisor_image + .clone_from(&args.docker_supervisor_image); + } + if args.docker_tls_ca.is_some() { + cfg.guest_tls_ca.clone_from(&args.docker_tls_ca); + } + if args.docker_tls_cert.is_some() { + cfg.guest_tls_cert.clone_from(&args.docker_tls_cert); + } + if args.docker_tls_key.is_some() { + cfg.guest_tls_key.clone_from(&args.docker_tls_key); + } + if !arg_defaulted(matches, "docker_network_name") { + cfg.network_name.clone_from(&args.docker_network_name); + } + Ok(cfg) +} + #[cfg(test)] mod tests { use super::{Cli, command}; @@ -647,4 +901,167 @@ mod tests { assert!(cli.command.is_none()); assert!(cli.run.db_url.is_none()); } + + // ── Config-file merge tests ────────────────────────────────────────── + // + // `merge_file_into_args` is the bridge between `config_file::ConfigFile` + // and `RunArgs`. These cases lock in the precedence rule: + // + // CLI flag > OPENSHELL_* env var > TOML file > built-in default + // + // by exercising each combination on representative gateway fields. + + use super::{ConfigFile, merge_file_into_args}; + use clap::FromArgMatches; + + fn parse_with_args(argv: &[&str]) -> (super::RunArgs, clap::ArgMatches) { + let matches = command().try_get_matches_from(argv).expect("parses"); + let cli = Cli::from_arg_matches(&matches).expect("from arg matches"); + (cli.run, matches) + } + + fn config_file_from_toml(toml: &str) -> ConfigFile { + toml::from_str(toml).expect("valid TOML in test fixture") + } + + #[test] + fn file_value_applies_when_cli_uses_default() { + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _g1 = EnvVarGuard::remove("OPENSHELL_BIND_ADDRESS"); + let _g2 = EnvVarGuard::remove("OPENSHELL_SERVER_PORT"); + let _g3 = EnvVarGuard::remove("OPENSHELL_LOG_LEVEL"); + + let (mut args, matches) = + parse_with_args(&["openshell-gateway", "--db-url", "sqlite::memory:"]); + let file = config_file_from_toml( + r#" +[openshell.gateway] +bind_address = "0.0.0.0:9090" +log_level = "debug" +"#, + ); + merge_file_into_args(&mut args, &file.openshell.gateway, &matches); + + assert_eq!(args.bind_address, IpAddr::V4(Ipv4Addr::UNSPECIFIED)); + assert_eq!(args.port, 9090); + assert_eq!(args.log_level, "debug"); + } + + #[test] + fn cli_flag_overrides_file_value() { + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _g1 = EnvVarGuard::remove("OPENSHELL_BIND_ADDRESS"); + let _g2 = EnvVarGuard::remove("OPENSHELL_LOG_LEVEL"); + + let (mut args, matches) = parse_with_args(&[ + "openshell-gateway", + "--db-url", + "sqlite::memory:", + "--log-level", + "warn", + ]); + let file = config_file_from_toml( + r#" +[openshell.gateway] +log_level = "debug" +"#, + ); + merge_file_into_args(&mut args, &file.openshell.gateway, &matches); + + assert_eq!(args.log_level, "warn", "CLI flag must win over file"); + } + + #[test] + fn env_var_overrides_file_value() { + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _g = EnvVarGuard::set("OPENSHELL_LOG_LEVEL", "trace"); + + let (mut args, matches) = + parse_with_args(&["openshell-gateway", "--db-url", "sqlite::memory:"]); + let file = config_file_from_toml( + r#" +[openshell.gateway] +log_level = "debug" +"#, + ); + merge_file_into_args(&mut args, &file.openshell.gateway, &matches); + + assert_eq!(args.log_level, "trace", "env var must win over file"); + } + + #[test] + fn file_oidc_block_populates_oidc_args() { + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _g1 = EnvVarGuard::remove("OPENSHELL_OIDC_ISSUER"); + let _g2 = EnvVarGuard::remove("OPENSHELL_OIDC_AUDIENCE"); + + let (mut args, matches) = + parse_with_args(&["openshell-gateway", "--db-url", "sqlite::memory:"]); + let file = config_file_from_toml( + r#" +[openshell.gateway.oidc] +issuer = "https://idp.example.com" +audience = "openshell-cli" +"#, + ); + merge_file_into_args(&mut args, &file.openshell.gateway, &matches); + + assert_eq!(args.oidc_issuer.as_deref(), Some("https://idp.example.com")); + assert_eq!(args.oidc_audience, "openshell-cli"); + } + + #[test] + fn driver_inherits_shared_image_from_gateway_section() { + // [openshell.gateway].default_image inherits into the K8s driver + // table when the driver-specific table does not set it. + let file = config_file_from_toml( + r#" +[openshell.gateway] +default_image = "ghcr.io/nvidia/openshell/sandbox:1.0" + +[openshell.drivers.kubernetes] +namespace = "agents" +"#, + ); + let merged = crate::config_file::driver_table( + super::ComputeDriverKind::Kubernetes, + &file.openshell.gateway, + file.openshell.drivers.get("kubernetes"), + ); + let parsed = merged + .try_into::() + .expect("merged table deserializes"); + assert_eq!(parsed.default_image, "ghcr.io/nvidia/openshell/sandbox:1.0"); + assert_eq!(parsed.namespace, "agents"); + } + + #[test] + fn driver_specific_value_overrides_gateway_inheritance() { + let file = config_file_from_toml( + r#" +[openshell.gateway] +default_image = "gateway-default:1.0" + +[openshell.drivers.kubernetes] +default_image = "k8s-specific:1.0" +"#, + ); + let merged = crate::config_file::driver_table( + super::ComputeDriverKind::Kubernetes, + &file.openshell.gateway, + file.openshell.drivers.get("kubernetes"), + ); + let parsed = merged + .try_into::() + .expect("deserializes"); + assert_eq!(parsed.default_image, "k8s-specific:1.0"); + } } diff --git a/crates/openshell-server/src/compute/vm.rs b/crates/openshell-server/src/compute/vm.rs index 1e62d4942..3a67f381b 100644 --- a/crates/openshell-server/src/compute/vm.rs +++ b/crates/openshell-server/src/compute/vm.rs @@ -60,7 +60,8 @@ const COMPUTE_DRIVER_SOCKET_RUN_DIR: &str = "run"; const COMPUTE_DRIVER_SOCKET_NAME: &str = "compute-driver.sock"; /// Configuration for launching and talking to the VM compute driver. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +#[serde(default, deny_unknown_fields)] pub struct VmComputeConfig { /// Working directory for VM driver sandbox state. pub state_dir: PathBuf, diff --git a/crates/openshell-server/src/config_file.rs b/crates/openshell-server/src/config_file.rs new file mode 100644 index 000000000..9ffebe5a7 --- /dev/null +++ b/crates/openshell-server/src/config_file.rs @@ -0,0 +1,532 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! TOML configuration file loader for the gateway. +//! +//! See `rfc/0003-gateway-configuration/README.md` for the file format. This +//! module parses the file into [`ConfigFile`], rejects fields that must be +//! supplied via env/CLI (database URL, SSH handshake secret), and provides +//! [`driver_table`] which overlays shared `[openshell.gateway]` defaults onto +//! a `[openshell.drivers.]` table so each driver crate's +//! `Deserialize` impl sees a fully-populated table. +//! +//! The merge precedence at the gateway level is: +//! ```text +//! CLI flag > OPENSHELL_* env var > TOML file > built-in default +//! ``` +//! Per-field application of file values happens in [`crate::cli`], which uses +//! clap's `ArgMatches::value_source` to detect arguments that fell back to +//! their default and are therefore eligible for replacement by file values. + +use std::collections::BTreeMap; +use std::net::SocketAddr; +use std::path::{Path, PathBuf}; + +use openshell_core::config::ComputeDriverKind; +use openshell_core::{OidcConfig, TlsConfig}; +use serde::{Deserialize, Serialize}; + +/// Latest schema version this build understands. +pub const SCHEMA_VERSION: u32 = 1; + +/// Root of the gateway TOML config file. +/// +/// The file is rooted at `[openshell]` to reserve room for future components +/// (CLI, sandbox, router) to share a single config file without key +/// collisions. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct ConfigFile { + #[serde(default)] + pub openshell: OpenShellRoot, +} + +/// `[openshell]` table. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct OpenShellRoot { + /// Reserved for future schema migrations. Versions greater than + /// [`SCHEMA_VERSION`] are rejected at load time. + #[serde(default)] + pub version: Option, + + #[serde(default)] + pub gateway: GatewayFileSection, + + /// `[openshell.drivers.]` tables — passed verbatim to each driver + /// crate's `Deserialize` impl after the gateway-side inheritance merge. + /// Stored as raw [`toml::Value`] so each driver can evolve its schema + /// independently of this crate. + #[serde(default)] + pub drivers: BTreeMap, +} + +/// `[openshell.gateway]` section. +/// +/// All fields are `Option` so the loader can tell whether a key was set +/// in the file (`Some`) or not (`None` — value is taken from CLI/env/default). +/// +/// The fields under "Shared driver defaults" are inherited into +/// `[openshell.drivers.]` tables per [`inheritable_keys`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct GatewayFileSection { + // ── Listeners ──────────────────────────────────────────────────────── + #[serde(default)] + pub bind_address: Option, + #[serde(default)] + pub health_bind_address: Option, + #[serde(default)] + pub metrics_bind_address: Option, + + // ── Logging ────────────────────────────────────────────────────────── + #[serde(default)] + pub log_level: Option, + + // ── Drivers ────────────────────────────────────────────────────────── + #[serde(default)] + pub compute_drivers: Option>, + + // ── Sandbox / SSH ──────────────────────────────────────────────────── + #[serde(default)] + pub sandbox_namespace: Option, + #[serde(default)] + pub sandbox_ssh_port: Option, + #[serde(default)] + pub ssh_gateway_host: Option, + #[serde(default)] + pub ssh_gateway_port: Option, + #[serde(default)] + pub ssh_connect_path: Option, + #[serde(default)] + pub ssh_handshake_skew_secs: Option, + #[serde(default)] + pub ssh_session_ttl_secs: Option, + + // ── Shared driver defaults (inherited into [openshell.drivers.]) ─ + #[serde(default)] + pub default_image: Option, + #[serde(default)] + pub supervisor_image: Option, + #[serde(default)] + pub image_pull_policy: Option, + #[serde(default)] + pub client_tls_secret_name: Option, + #[serde(default)] + pub host_gateway_ip: Option, + #[serde(default)] + pub enable_user_namespaces: Option, + #[serde(default)] + pub guest_tls_ca: Option, + #[serde(default)] + pub guest_tls_cert: Option, + #[serde(default)] + pub guest_tls_key: Option, + + // ── Nested tables ──────────────────────────────────────────────────── + #[serde(default)] + pub tls: Option, + #[serde(default)] + pub oidc: Option, + + // ── Disallowed-in-file fields ──────────────────────────────────────── + // + // Captured so we can produce a friendly "set this via env/CLI instead" + // error rather than a generic "unknown field" message. Validated and + // rejected in [`load`]. + #[serde(default)] + pub database_url: Option, + #[serde(default)] + pub ssh_handshake_secret: Option, +} + +#[derive(Debug, thiserror::Error)] +pub enum ConfigFileError { + #[error("failed to read gateway config file '{}': {source}", path.display())] + Io { + path: PathBuf, + #[source] + source: std::io::Error, + }, + #[error("failed to parse gateway config file '{}': {source}", path.display())] + Parse { + path: PathBuf, + #[source] + source: toml::de::Error, + }, + #[error( + "unsupported gateway config version {version}; this build only supports version {SCHEMA_VERSION}" + )] + UnsupportedVersion { version: u32 }, + #[error( + "`{field}` is not allowed in the gateway config file — set the {env} env var or pass {cli} on the command line" + )] + SecretInFile { + field: &'static str, + env: &'static str, + cli: &'static str, + }, +} + +/// Load and validate a TOML config file. +/// +/// Returns `Ok(ConfigFile::default())` for an empty file (the gateway then +/// falls back entirely to CLI/env/built-in defaults). +pub fn load(path: &Path) -> Result { + let contents = std::fs::read_to_string(path).map_err(|source| ConfigFileError::Io { + path: path.to_path_buf(), + source, + })?; + if contents.trim().is_empty() { + return Ok(ConfigFile::default()); + } + let file: ConfigFile = toml::from_str(&contents).map_err(|source| ConfigFileError::Parse { + path: path.to_path_buf(), + source, + })?; + + if let Some(version) = file.openshell.version + && version > SCHEMA_VERSION + { + return Err(ConfigFileError::UnsupportedVersion { version }); + } + + if file.openshell.gateway.database_url.is_some() { + return Err(ConfigFileError::SecretInFile { + field: "database_url", + env: "OPENSHELL_DB_URL", + cli: "--db-url", + }); + } + if file.openshell.gateway.ssh_handshake_secret.is_some() { + return Err(ConfigFileError::SecretInFile { + field: "ssh_handshake_secret", + env: "OPENSHELL_SSH_HANDSHAKE_SECRET", + cli: "--ssh-handshake-secret", + }); + } + + Ok(file) +} + +/// Build the merged TOML table for `driver` by overlaying inheritable +/// `[openshell.gateway]` defaults onto `[openshell.drivers.]`. +/// +/// The returned [`toml::Value`] is a Table ready to feed into the driver's +/// `Deserialize` impl — keys present in `raw` win over the gateway defaults. +/// Keys outside [`inheritable_keys`] for this driver are never copied from +/// the gateway section, which keeps each driver's `deny_unknown_fields` +/// invariant intact. +pub fn driver_table( + driver: ComputeDriverKind, + gateway: &GatewayFileSection, + raw: Option<&toml::Value>, +) -> toml::Value { + let mut merged = match raw { + Some(toml::Value::Table(table)) => table.clone(), + _ => toml::Table::new(), + }; + + for key in inheritable_keys(driver) { + if merged.contains_key(*key) { + continue; + } + if let Some(value) = gateway_inherited_value(gateway, key) { + merged.insert((*key).to_string(), value); + } + } + + toml::Value::Table(merged) +} + +/// Inheritance allowlist (the Q4 "high-overlap set"). Each driver opts in +/// to a specific subset so a gateway-wide default does not accidentally land +/// in a driver table that does not understand the field. +fn inheritable_keys(driver: ComputeDriverKind) -> &'static [&'static str] { + match driver { + ComputeDriverKind::Kubernetes => &[ + "default_image", + "supervisor_image", + "image_pull_policy", + "client_tls_secret_name", + "host_gateway_ip", + "ssh_handshake_skew_secs", + "enable_user_namespaces", + ], + ComputeDriverKind::Docker => &[ + "supervisor_image", + "guest_tls_ca", + "guest_tls_cert", + "guest_tls_key", + ], + ComputeDriverKind::Podman => &[ + "default_image", + "supervisor_image", + "image_pull_policy", + "guest_tls_ca", + "guest_tls_cert", + "guest_tls_key", + "ssh_handshake_skew_secs", + ], + ComputeDriverKind::Vm => &[ + "default_image", + "guest_tls_ca", + "guest_tls_cert", + "guest_tls_key", + ], + } +} + +fn gateway_inherited_value(g: &GatewayFileSection, key: &str) -> Option { + match key { + "default_image" => g.default_image.as_deref().map(string_value), + "supervisor_image" => g.supervisor_image.as_deref().map(string_value), + "image_pull_policy" => g.image_pull_policy.as_deref().map(string_value), + "client_tls_secret_name" => g.client_tls_secret_name.as_deref().map(string_value), + "host_gateway_ip" => g.host_gateway_ip.as_deref().map(string_value), + "ssh_handshake_skew_secs" => g.ssh_handshake_skew_secs.and_then(skew_value), + "enable_user_namespaces" => g.enable_user_namespaces.map(toml::Value::Boolean), + "guest_tls_ca" => g.guest_tls_ca.as_deref().map(path_value), + "guest_tls_cert" => g.guest_tls_cert.as_deref().map(path_value), + "guest_tls_key" => g.guest_tls_key.as_deref().map(path_value), + _ => None, + } +} + +fn string_value(s: &str) -> toml::Value { + toml::Value::String(s.to_owned()) +} + +fn path_value(p: &Path) -> toml::Value { + toml::Value::String(p.display().to_string()) +} + +fn skew_value(n: u64) -> Option { + i64::try_from(n).ok().map(toml::Value::Integer) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + fn write_tmp(contents: &str) -> tempfile::NamedTempFile { + let mut tmp = tempfile::Builder::new() + .suffix(".toml") + .tempfile() + .expect("tempfile"); + tmp.write_all(contents.as_bytes()).expect("write"); + tmp + } + + #[test] + fn empty_file_yields_default_config() { + let tmp = write_tmp(""); + let file = load(tmp.path()).expect("empty file parses"); + assert!(file.openshell.version.is_none()); + assert!(file.openshell.gateway.bind_address.is_none()); + assert!(file.openshell.drivers.is_empty()); + } + + #[test] + fn parses_full_example() { + let toml = r#" +[openshell] +version = 1 + +[openshell.gateway] +bind_address = "0.0.0.0:8080" +health_bind_address = "0.0.0.0:8081" +log_level = "info" +compute_drivers = ["kubernetes"] +sandbox_namespace = "agents" +default_image = "ghcr.io/nvidia/openshell/sandbox:latest" +supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" +image_pull_policy = "IfNotPresent" +client_tls_secret_name = "openshell-sandbox-tls" + +[openshell.gateway.tls] +cert_path = "/etc/openshell/certs/gateway.pem" +key_path = "/etc/openshell/certs/gateway-key.pem" +client_ca_path = "/etc/openshell/certs/client-ca.pem" + +[openshell.gateway.oidc] +issuer = "https://idp.example.com/realms/openshell" +audience = "openshell-cli" + +[openshell.drivers.kubernetes] +namespace = "agents" +grpc_endpoint = "https://openshell-gateway.agents.svc:8080" +"#; + let tmp = write_tmp(toml); + let file = load(tmp.path()).expect("valid file parses"); + let gw = &file.openshell.gateway; + assert_eq!(gw.log_level.as_deref(), Some("info")); + assert_eq!( + gw.default_image.as_deref(), + Some("ghcr.io/nvidia/openshell/sandbox:latest") + ); + assert!(gw.tls.is_some()); + assert!(gw.oidc.is_some()); + assert!(file.openshell.drivers.contains_key("kubernetes")); + } + + #[test] + fn rejects_database_url_in_file() { + let toml = r#" +[openshell.gateway] +database_url = "sqlite::memory:" +"#; + let tmp = write_tmp(toml); + let err = load(tmp.path()).expect_err("database_url must be rejected"); + assert!(matches!( + err, + ConfigFileError::SecretInFile { + field: "database_url", + .. + } + )); + } + + #[test] + fn rejects_ssh_handshake_secret_in_file() { + let toml = r#" +[openshell.gateway] +ssh_handshake_secret = "leaked" +"#; + let tmp = write_tmp(toml); + let err = load(tmp.path()).expect_err("ssh_handshake_secret must be rejected"); + assert!(matches!( + err, + ConfigFileError::SecretInFile { + field: "ssh_handshake_secret", + .. + } + )); + } + + #[test] + fn rejects_unknown_gateway_field() { + let toml = r" +[openshell.gateway] +nonsense = true +"; + let tmp = write_tmp(toml); + let err = load(tmp.path()).expect_err("unknown field must be rejected"); + assert!(matches!(err, ConfigFileError::Parse { .. })); + } + + #[test] + fn rejects_unsupported_version() { + let toml = r" +[openshell] +version = 2 +"; + let tmp = write_tmp(toml); + let err = load(tmp.path()).expect_err("version > 1 must be rejected"); + assert!(matches!( + err, + ConfigFileError::UnsupportedVersion { version: 2 } + )); + } + + #[test] + fn driver_table_inherits_gateway_defaults() { + let gateway = GatewayFileSection { + default_image: Some("ghcr.io/nvidia/openshell/sandbox:0.9".to_string()), + supervisor_image: Some("ghcr.io/nvidia/openshell/supervisor:0.9".to_string()), + image_pull_policy: Some("IfNotPresent".to_string()), + ..Default::default() + }; + let raw = toml::toml! { + namespace = "agents" + }; + let merged = driver_table( + ComputeDriverKind::Kubernetes, + &gateway, + Some(&toml::Value::Table(raw)), + ); + let table = merged.as_table().expect("table"); + assert_eq!( + table.get("namespace").and_then(|v| v.as_str()), + Some("agents") + ); + assert_eq!( + table.get("default_image").and_then(|v| v.as_str()), + Some("ghcr.io/nvidia/openshell/sandbox:0.9") + ); + assert_eq!( + table.get("supervisor_image").and_then(|v| v.as_str()), + Some("ghcr.io/nvidia/openshell/supervisor:0.9") + ); + } + + #[test] + fn driver_table_specific_value_overrides_gateway_default() { + let gateway = GatewayFileSection { + default_image: Some("gateway-default".to_string()), + ..Default::default() + }; + let raw = toml::toml! { + default_image = "driver-specific" + }; + let merged = driver_table( + ComputeDriverKind::Podman, + &gateway, + Some(&toml::Value::Table(raw)), + ); + assert_eq!( + merged + .as_table() + .unwrap() + .get("default_image") + .and_then(|v| v.as_str()), + Some("driver-specific") + ); + } + + #[test] + fn driver_table_does_not_leak_keys_outside_allowlist() { + // `client_tls_secret_name` is K8s-only; Docker must not receive it + // even when set at gateway scope. + let gateway = GatewayFileSection { + client_tls_secret_name: Some("openshell-sandbox-tls".to_string()), + ..Default::default() + }; + let merged = driver_table(ComputeDriverKind::Docker, &gateway, None); + assert!( + !merged + .as_table() + .unwrap() + .contains_key("client_tls_secret_name") + ); + } + + #[test] + fn missing_path_is_io_error() { + let err = load(Path::new("/nonexistent/openshell-gateway.toml")) + .expect_err("missing file must be io error"); + assert!(matches!(err, ConfigFileError::Io { .. })); + } + + /// Lock in that every checked-in example under `examples/gateway/` + /// round-trips through the loader. Catches schema drift the moment a + /// field is renamed or a doc snippet falls out of date. + #[test] + fn checked_in_examples_parse() { + let repo_root = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(Path::parent) + .expect("crate manifest has grandparent"); + for name in [ + "gateway.example.toml", + "kubernetes.example.toml", + "docker.example.toml", + "podman.example.toml", + "microvm.example.toml", + ] { + let path = repo_root.join("examples/gateway").join(name); + load(&path).unwrap_or_else(|e| panic!("{name} failed to parse: {e}")); + } + } +} diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 07c3cef5c..b3277e842 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -23,6 +23,7 @@ mod auth; pub mod certgen; pub mod cli; mod compute; +pub mod config_file; mod grpc; mod http; mod inference; @@ -151,6 +152,7 @@ pub async fn run_server( config: Config, vm_config: VmComputeConfig, docker_config: DockerComputeConfig, + config_file: Option, tracing_log_bus: TracingLogBus, ) -> Result<()> { let database_url = config.database_url.trim(); @@ -193,6 +195,7 @@ pub async fn run_server( &config, &vm_config, &docker_config, + config_file.as_ref(), store.clone(), sandbox_index.clone(), sandbox_watch_bus.clone(), @@ -460,6 +463,7 @@ async fn build_compute_runtime( config: &Config, vm_config: &VmComputeConfig, docker_config: &DockerComputeConfig, + file: Option<&config_file::ConfigFile>, store: Arc, sandbox_index: SandboxIndex, sandbox_watch_bus: SandboxWatchBus, @@ -471,37 +475,42 @@ async fn build_compute_runtime( match driver { ComputeDriverKind::Kubernetes => { - let supervisor_image = std::env::var("OPENSHELL_SUPERVISOR_IMAGE") - .ok() - .filter(|s| !s.is_empty()) - .unwrap_or_else(|| openshell_core::config::DEFAULT_SUPERVISOR_IMAGE.to_string()); - let supervisor_image_pull_policy = - std::env::var("OPENSHELL_SUPERVISOR_IMAGE_PULL_POLICY") - .ok() - .filter(|s| !s.is_empty()) - .unwrap_or_default(); + let mut k8s = kubernetes_config_from_file(file)?; + // Env overrides file for fields not represented in Config. + if let Ok(v) = std::env::var("OPENSHELL_SUPERVISOR_IMAGE") + && !v.is_empty() + { + k8s.supervisor_image = v; + } + if let Ok(v) = std::env::var("OPENSHELL_SUPERVISOR_IMAGE_PULL_POLICY") + && !v.is_empty() + { + k8s.supervisor_image_pull_policy = v; + } + if let Ok(v) = std::env::var("OPENSHELL_SUPERVISOR_SIDELOAD_METHOD") + && !v.is_empty() + && let Ok(parsed) = v.parse() + { + k8s.supervisor_sideload_method = parsed; + } + // Shared fields are sourced from Config, which already merged + // file + CLI/env at startup. + k8s.namespace.clone_from(&config.sandbox_namespace); + k8s.default_image.clone_from(&config.sandbox_image); + k8s.image_pull_policy + .clone_from(&config.sandbox_image_pull_policy); + k8s.grpc_endpoint.clone_from(&config.grpc_endpoint); + k8s.ssh_socket_path + .clone_from(&config.sandbox_ssh_socket_path); + k8s.ssh_handshake_secret + .clone_from(&config.ssh_handshake_secret); + k8s.ssh_handshake_skew_secs = config.ssh_handshake_skew_secs; + k8s.client_tls_secret_name + .clone_from(&config.client_tls_secret_name); + k8s.host_gateway_ip.clone_from(&config.host_gateway_ip); + k8s.enable_user_namespaces = config.enable_user_namespaces; ComputeRuntime::new_kubernetes( - KubernetesComputeConfig { - namespace: config.sandbox_namespace.clone(), - default_image: config.sandbox_image.clone(), - image_pull_policy: config.sandbox_image_pull_policy.clone(), - supervisor_image, - supervisor_image_pull_policy, - supervisor_sideload_method: std::env::var( - "OPENSHELL_SUPERVISOR_SIDELOAD_METHOD", - ) - .ok() - .filter(|s| !s.is_empty()) - .and_then(|s| s.parse().ok()) - .unwrap_or_default(), - grpc_endpoint: config.grpc_endpoint.clone(), - ssh_socket_path: config.sandbox_ssh_socket_path.clone(), - ssh_handshake_secret: config.ssh_handshake_secret.clone(), - ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, - client_tls_secret_name: config.client_tls_secret_name.clone(), - host_gateway_ip: config.host_gateway_ip.clone(), - enable_user_namespaces: config.enable_user_namespaces, - }, + k8s, store, sandbox_index, sandbox_watch_bus, @@ -537,63 +546,60 @@ async fn build_compute_runtime( .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))) } ComputeDriverKind::Podman => { - let socket_path = std::env::var("OPENSHELL_PODMAN_SOCKET") - .ok() - .filter(|s| !s.is_empty()) - .map_or_else( - openshell_driver_podman::PodmanComputeConfig::default_socket_path, - std::path::PathBuf::from, - ); - - let network_name = std::env::var("OPENSHELL_NETWORK_NAME") - .ok() - .filter(|s| !s.is_empty()) - .unwrap_or_else(|| openshell_core::config::DEFAULT_NETWORK_NAME.to_string()); - - let stop_timeout_secs: u32 = std::env::var("OPENSHELL_STOP_TIMEOUT") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(openshell_core::config::DEFAULT_STOP_TIMEOUT_SECS); - - let supervisor_image = std::env::var("OPENSHELL_SUPERVISOR_IMAGE") - .ok() - .filter(|s| !s.is_empty()) - .unwrap_or_else(|| openshell_core::config::DEFAULT_SUPERVISOR_IMAGE.to_string()); - - // TLS client cert paths for sandbox mTLS. When all three are - // set, the Podman driver bind-mounts them into sandbox - // containers and switches the endpoint to https://. - let podman_tls_ca = std::env::var("OPENSHELL_PODMAN_TLS_CA") - .ok() - .filter(|s| !s.is_empty()) - .map(std::path::PathBuf::from); - let podman_tls_cert = std::env::var("OPENSHELL_PODMAN_TLS_CERT") - .ok() - .filter(|s| !s.is_empty()) - .map(std::path::PathBuf::from); - let podman_tls_key = std::env::var("OPENSHELL_PODMAN_TLS_KEY") - .ok() - .filter(|s| !s.is_empty()) - .map(std::path::PathBuf::from); + let mut podman = podman_config_from_file(file)?; + // Env overrides file for fields not represented in Config. + if let Ok(v) = std::env::var("OPENSHELL_PODMAN_SOCKET") + && !v.is_empty() + { + podman.socket_path = std::path::PathBuf::from(v); + } + if let Ok(v) = std::env::var("OPENSHELL_NETWORK_NAME") + && !v.is_empty() + { + podman.network_name = v; + } + if let Ok(v) = std::env::var("OPENSHELL_STOP_TIMEOUT") + && let Ok(parsed) = v.parse() + { + podman.stop_timeout_secs = parsed; + } + if let Ok(v) = std::env::var("OPENSHELL_SUPERVISOR_IMAGE") + && !v.is_empty() + { + podman.supervisor_image = v; + } + if let Ok(v) = std::env::var("OPENSHELL_PODMAN_TLS_CA") + && !v.is_empty() + { + podman.guest_tls_ca = Some(std::path::PathBuf::from(v)); + } + if let Ok(v) = std::env::var("OPENSHELL_PODMAN_TLS_CERT") + && !v.is_empty() + { + podman.guest_tls_cert = Some(std::path::PathBuf::from(v)); + } + if let Ok(v) = std::env::var("OPENSHELL_PODMAN_TLS_KEY") + && !v.is_empty() + { + podman.guest_tls_key = Some(std::path::PathBuf::from(v)); + } + // Shared fields are sourced from Config (which already merged + // file + CLI/env at startup). + podman.default_image.clone_from(&config.sandbox_image); + podman.image_pull_policy = config.sandbox_image_pull_policy.parse().unwrap_or_default(); + podman.grpc_endpoint.clone_from(&config.grpc_endpoint); + podman.gateway_port = config.bind_address.port(); + podman + .sandbox_ssh_socket_path + .clone_from(&config.sandbox_ssh_socket_path); + podman.ssh_port = config.sandbox_ssh_port; + podman + .ssh_handshake_secret + .clone_from(&config.ssh_handshake_secret); + podman.ssh_handshake_skew_secs = config.ssh_handshake_skew_secs; ComputeRuntime::new_podman( - openshell_driver_podman::PodmanComputeConfig { - socket_path, - default_image: config.sandbox_image.clone(), - image_pull_policy: config.sandbox_image_pull_policy.parse().unwrap_or_default(), - grpc_endpoint: config.grpc_endpoint.clone(), - gateway_port: config.bind_address.port(), - sandbox_ssh_socket_path: config.sandbox_ssh_socket_path.clone(), - network_name, - ssh_port: config.sandbox_ssh_port, - ssh_handshake_secret: config.ssh_handshake_secret.clone(), - ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, - stop_timeout_secs, - supervisor_image, - guest_tls_ca: podman_tls_ca, - guest_tls_cert: podman_tls_cert, - guest_tls_key: podman_tls_key, - }, + podman, store, sandbox_index, sandbox_watch_bus, @@ -606,6 +612,43 @@ async fn build_compute_runtime( } } +/// Build a [`KubernetesComputeConfig`] from the file's +/// `[openshell.drivers.kubernetes]` table merged with inheritable +/// `[openshell.gateway]` defaults. Falls back to the driver's `Default` +/// when no file is present. +fn kubernetes_config_from_file( + file: Option<&config_file::ConfigFile>, +) -> Result { + let Some(file) = file else { + return Ok(KubernetesComputeConfig::default()); + }; + let merged = config_file::driver_table( + ComputeDriverKind::Kubernetes, + &file.openshell.gateway, + file.openshell.drivers.get("kubernetes"), + ); + merged + .try_into() + .map_err(|e| Error::config(format!("invalid [openshell.drivers.kubernetes] table: {e}"))) +} + +/// Same pattern as [`kubernetes_config_from_file`] but for Podman. +fn podman_config_from_file( + file: Option<&config_file::ConfigFile>, +) -> Result { + let Some(file) = file else { + return Ok(openshell_driver_podman::PodmanComputeConfig::default()); + }; + let merged = config_file::driver_table( + ComputeDriverKind::Podman, + &file.openshell.gateway, + file.openshell.drivers.get("podman"), + ); + merged + .try_into() + .map_err(|e| Error::config(format!("invalid [openshell.drivers.podman] table: {e}"))) +} + fn configured_compute_driver(config: &Config) -> Result { match config.compute_drivers.as_slice() { [] => openshell_core::config::detect_driver().ok_or_else(|| { diff --git a/deploy/helm/openshell/templates/gateway-config.yaml b/deploy/helm/openshell/templates/gateway-config.yaml new file mode 100644 index 000000000..6caf44985 --- /dev/null +++ b/deploy/helm/openshell/templates/gateway-config.yaml @@ -0,0 +1,91 @@ +{{/* +ConfigMap holding the gateway TOML config file (RFC 0003). + +The gateway reads `/etc/openshell/gateway.toml` (mounted from this ConfigMap) +at startup. CLI flags and OPENSHELL_* env vars on the StatefulSet container +still override anything in this file. + +Two values are intentionally NOT rendered here: + - server.dbUrl → passed via --db-url in the StatefulSet args + - sshHandshake secret → injected as OPENSHELL_SSH_HANDSHAKE_SECRET env var + from a Kubernetes Secret reference. +*/}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "openshell.fullname" . }}-config + labels: + {{- include "openshell.labels" . | nindent 4 }} +data: + gateway.toml: | + [openshell] + version = 1 + + [openshell.gateway] + bind_address = "0.0.0.0:{{ .Values.service.port }}" + {{- if .Values.service.healthPort }} + health_bind_address = "0.0.0.0:{{ .Values.service.healthPort }}" + {{- end }} + {{- if .Values.service.metricsPort }} + metrics_bind_address = "0.0.0.0:{{ .Values.service.metricsPort }}" + {{- end }} + log_level = {{ .Values.server.logLevel | quote }} + sandbox_namespace = {{ include "openshell.sandboxNamespace" . | quote }} + default_image = {{ .Values.server.sandboxImage | quote }} + supervisor_image = {{ include "openshell.supervisorImage" . | quote }} + {{- if .Values.server.sandboxImagePullPolicy }} + image_pull_policy = {{ .Values.server.sandboxImagePullPolicy | quote }} + {{- end }} + {{- if .Values.supervisor.image.pullPolicy }} + supervisor_image_pull_policy = {{ .Values.supervisor.image.pullPolicy | quote }} + {{- end }} + grpc_endpoint = {{ include "openshell.grpcEndpoint" . | quote }} + {{- if .Values.server.sshGatewayHost }} + ssh_gateway_host = {{ .Values.server.sshGatewayHost | quote }} + {{- end }} + {{- if .Values.server.sshGatewayPort }} + ssh_gateway_port = {{ .Values.server.sshGatewayPort }} + {{- end }} + {{- if .Values.server.hostGatewayIP }} + host_gateway_ip = {{ .Values.server.hostGatewayIP | quote }} + {{- end }} + {{- if .Values.server.enableUserNamespaces }} + enable_user_namespaces = true + {{- end }} + {{- if not .Values.server.disableTls }} + client_tls_secret_name = {{ .Values.server.tls.clientTlsSecretName | quote }} + {{- end }} + + {{- if not .Values.server.disableTls }} + + [openshell.gateway.tls] + cert_path = "/etc/openshell-tls/server/tls.crt" + key_path = "/etc/openshell-tls/server/tls.key" + client_ca_path = "/etc/openshell-tls/client-ca/ca.crt" + {{- if .Values.server.disableGatewayAuth }} + allow_unauthenticated = true + {{- end }} + {{- end }} + + {{- if .Values.server.oidc.issuer }} + + [openshell.gateway.oidc] + issuer = {{ .Values.server.oidc.issuer | quote }} + audience = {{ .Values.server.oidc.audience | quote }} + jwks_ttl_secs = {{ .Values.server.oidc.jwksTtl }} + {{- if .Values.server.oidc.rolesClaim }} + roles_claim = {{ .Values.server.oidc.rolesClaim | quote }} + {{- end }} + {{- if .Values.server.oidc.adminRole }} + admin_role = {{ .Values.server.oidc.adminRole | quote }} + {{- end }} + {{- if .Values.server.oidc.userRole }} + user_role = {{ .Values.server.oidc.userRole | quote }} + {{- end }} + {{- if .Values.server.oidc.scopesClaim }} + scopes_claim = {{ .Values.server.oidc.scopesClaim | quote }} + {{- end }} + {{- end }} + + [openshell.drivers.kubernetes] + supervisor_sideload_method = {{ include "openshell.supervisorSideloadMethod" . | quote }} diff --git a/deploy/helm/openshell/templates/statefulset.yaml b/deploy/helm/openshell/templates/statefulset.yaml index 4a0b70621..cbd70094a 100644 --- a/deploy/helm/openshell/templates/statefulset.yaml +++ b/deploy/helm/openshell/templates/statefulset.yaml @@ -47,104 +47,24 @@ spec: image: {{ include "openshell.image" . | quote }} imagePullPolicy: {{ .Values.image.pullPolicy }} args: - - --bind-address - - "0.0.0.0" - - --port - - {{ .Values.service.port | quote }} - - --health-port - - {{ .Values.service.healthPort | quote }} - {{- if .Values.service.metricsPort }} - - --metrics-port - - {{ .Values.service.metricsPort | quote }} - {{- end }} - - --log-level - - {{ .Values.server.logLevel }} + - --config + - /etc/openshell/gateway.toml - --db-url - {{ .Values.server.dbUrl | quote }} env: - - name: OPENSHELL_SANDBOX_NAMESPACE - value: {{ include "openshell.sandboxNamespace" . | quote }} - - name: OPENSHELL_SANDBOX_IMAGE - value: {{ .Values.server.sandboxImage | quote }} - {{- if .Values.server.sandboxImagePullPolicy }} - - name: OPENSHELL_SANDBOX_IMAGE_PULL_POLICY - value: {{ .Values.server.sandboxImagePullPolicy | quote }} - {{- end }} - - name: OPENSHELL_SUPERVISOR_IMAGE - value: {{ include "openshell.supervisorImage" . | quote }} - {{- if .Values.supervisor.image.pullPolicy }} - - name: OPENSHELL_SUPERVISOR_IMAGE_PULL_POLICY - value: {{ .Values.supervisor.image.pullPolicy | quote }} - {{- end }} - - name: OPENSHELL_SUPERVISOR_SIDELOAD_METHOD - value: {{ include "openshell.supervisorSideloadMethod" . | quote }} - - name: OPENSHELL_GRPC_ENDPOINT - value: {{ include "openshell.grpcEndpoint" . | quote }} - {{- if .Values.server.sshGatewayHost }} - - name: OPENSHELL_SSH_GATEWAY_HOST - value: {{ .Values.server.sshGatewayHost | quote }} - {{- end }} - {{- if .Values.server.sshGatewayPort }} - - name: OPENSHELL_SSH_GATEWAY_PORT - value: {{ .Values.server.sshGatewayPort | quote }} - {{- end }} - {{- if .Values.server.hostGatewayIP }} - - name: OPENSHELL_HOST_GATEWAY_IP - value: {{ .Values.server.hostGatewayIP | quote }} - {{- end }} - {{- if .Values.server.enableUserNamespaces }} - - name: OPENSHELL_ENABLE_USER_NAMESPACES - value: "true" - {{- end }} + # Secrets are env-only. Everything else lives in the ConfigMap- + # backed TOML file mounted at /etc/openshell/gateway.toml. - name: OPENSHELL_SSH_HANDSHAKE_SECRET valueFrom: secretKeyRef: name: {{ .Values.server.sshHandshakeSecretName | quote }} key: secret - {{- if .Values.server.disableTls }} - - name: OPENSHELL_DISABLE_TLS - value: "true" - {{- else }} - - name: OPENSHELL_TLS_CERT - value: /etc/openshell-tls/server/tls.crt - - name: OPENSHELL_TLS_KEY - value: /etc/openshell-tls/server/tls.key - - name: OPENSHELL_TLS_CLIENT_CA - value: /etc/openshell-tls/client-ca/ca.crt - - name: OPENSHELL_CLIENT_TLS_SECRET_NAME - value: {{ .Values.server.tls.clientTlsSecretName | quote }} - {{- if .Values.server.disableGatewayAuth }} - - name: OPENSHELL_DISABLE_GATEWAY_AUTH - value: "true" - {{- end }} - {{- end }} - {{- if .Values.server.oidc.issuer }} - - name: OPENSHELL_OIDC_ISSUER - value: {{ .Values.server.oidc.issuer | quote }} - - name: OPENSHELL_OIDC_AUDIENCE - value: {{ .Values.server.oidc.audience | quote }} - - name: OPENSHELL_OIDC_JWKS_TTL - value: {{ .Values.server.oidc.jwksTtl | quote }} - {{- if .Values.server.oidc.rolesClaim }} - - name: OPENSHELL_OIDC_ROLES_CLAIM - value: {{ .Values.server.oidc.rolesClaim | quote }} - {{- end }} - {{- if .Values.server.oidc.adminRole }} - - name: OPENSHELL_OIDC_ADMIN_ROLE - value: {{ .Values.server.oidc.adminRole | quote }} - {{- end }} - {{- if .Values.server.oidc.userRole }} - - name: OPENSHELL_OIDC_USER_ROLE - value: {{ .Values.server.oidc.userRole | quote }} - {{- end }} - {{- if .Values.server.oidc.scopesClaim }} - - name: OPENSHELL_OIDC_SCOPES_CLAIM - value: {{ .Values.server.oidc.scopesClaim | quote }} - {{- end }} - {{- end }} volumeMounts: - name: openshell-data mountPath: /var/openshell + - name: gateway-config + mountPath: /etc/openshell + readOnly: true {{- if not .Values.server.disableTls }} - name: tls-cert mountPath: /etc/openshell-tls/server @@ -191,6 +111,9 @@ spec: resources: {{- toYaml .Values.resources | nindent 12 }} volumes: + - name: gateway-config + configMap: + name: {{ include "openshell.fullname" . }}-config {{- if not .Values.server.disableTls }} - name: tls-cert secret: diff --git a/deploy/helm/openshell/tests/sandbox_namespace_test.yaml b/deploy/helm/openshell/tests/sandbox_namespace_test.yaml index a128cd440..2d3461c6f 100644 --- a/deploy/helm/openshell/tests/sandbox_namespace_test.yaml +++ b/deploy/helm/openshell/tests/sandbox_namespace_test.yaml @@ -3,32 +3,28 @@ suite: sandboxNamespace defaulting templates: - - templates/statefulset.yaml + - templates/gateway-config.yaml - templates/networkpolicy.yaml release: name: openshell namespace: my-namespace tests: - - it: defaults OPENSHELL_SANDBOX_NAMESPACE to release namespace - template: templates/statefulset.yaml + - it: defaults sandbox_namespace to release namespace in the TOML config + template: templates/gateway-config.yaml asserts: - - contains: - path: spec.template.spec.containers[0].env - content: - name: OPENSHELL_SANDBOX_NAMESPACE - value: "my-namespace" + - matchRegex: + path: data["gateway.toml"] + pattern: 'sandbox_namespace\s*=\s*"my-namespace"' - it: uses explicit sandboxNamespace when set - template: templates/statefulset.yaml + template: templates/gateway-config.yaml set: server.sandboxNamespace: other-ns asserts: - - contains: - path: spec.template.spec.containers[0].env - content: - name: OPENSHELL_SANDBOX_NAMESPACE - value: "other-ns" + - matchRegex: + path: data["gateway.toml"] + pattern: 'sandbox_namespace\s*=\s*"other-ns"' - it: defaults NetworkPolicy namespace to release namespace template: templates/networkpolicy.yaml diff --git a/examples/gateway/docker.example.toml b/examples/gateway/docker.example.toml new file mode 100644 index 000000000..27238fe0a --- /dev/null +++ b/examples/gateway/docker.example.toml @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Docker-driver example. Sandboxes run as containers on a local bridge +# network. The supervisor binary is bind-mounted from the host (no in-cluster +# image pull required); guest mTLS material is supplied as host paths. +# +# `database_url` is env-only — set OPENSHELL_DB_URL / --db-url. + +[openshell] +version = 1 + +[openshell.gateway] +bind_address = "127.0.0.1:8080" +log_level = "info" +compute_drivers = ["docker"] + +# Shared defaults inherited into [openshell.drivers.docker]. +supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" +guest_tls_ca = "/etc/openshell/certs/ca.pem" +guest_tls_cert = "/etc/openshell/certs/client.pem" +guest_tls_key = "/etc/openshell/certs/client-key.pem" + +[openshell.drivers.docker] +network_name = "openshell-docker" +# Skip the image-pull-and-extract step by pointing at a locally built binary. +supervisor_bin = "/usr/local/libexec/openshell/openshell-sandbox" diff --git a/examples/gateway/gateway.example.toml b/examples/gateway/gateway.example.toml new file mode 100644 index 000000000..81a78fd63 --- /dev/null +++ b/examples/gateway/gateway.example.toml @@ -0,0 +1,77 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Example gateway configuration. See RFC 0003 for the schema and +# rfc/0003-gateway-configuration/README.md for the field-by-field reference. +# +# Source precedence (highest first): +# CLI flag > OPENSHELL_* env var > this file > built-in default +# +# `database_url` is deliberately env-only and MUST NOT appear in this file +# (set OPENSHELL_DB_URL / --db-url instead). + +[openshell] +version = 1 + +[openshell.gateway] +bind_address = "0.0.0.0:8080" +health_bind_address = "0.0.0.0:8081" +metrics_bind_address = "0.0.0.0:9090" + +log_level = "info" + +# When empty the gateway auto-detects (Kubernetes -> Podman -> Docker). VM is +# never auto-detected and requires an explicit entry here. +compute_drivers = ["kubernetes"] + +sandbox_namespace = "openshell" +sandbox_ssh_port = 2222 +ssh_gateway_host = "127.0.0.1" +ssh_gateway_port = 8080 +ssh_connect_path = "/connect/ssh" + +# Shared driver defaults — inherited into [openshell.drivers.] tables +# when the driver-specific table does not override them. +default_image = "ghcr.io/nvidia/openshell/sandbox:latest" +supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" +image_pull_policy = "IfNotPresent" +client_tls_secret_name = "openshell-client-tls" + +# Gateway listener TLS (distinct from the per-driver guest_tls_*). +[openshell.gateway.tls] +cert_path = "/etc/openshell/certs/gateway.pem" +key_path = "/etc/openshell/certs/gateway-key.pem" +client_ca_path = "/etc/openshell/certs/client-ca.pem" +allow_unauthenticated = false + +[openshell.gateway.oidc] +issuer = "https://idp.example.com/realms/openshell" +audience = "openshell-cli" +jwks_ttl_secs = 3600 +roles_claim = "realm_access.roles" +admin_role = "openshell-admin" +user_role = "openshell-user" + +# ── Driver tables ────────────────────────────────────────────────────────── +# Each table inherits the shared defaults set above. Keys present here win +# over the inherited value. + +[openshell.drivers.kubernetes] +namespace = "agents" +grpc_endpoint = "https://openshell-gateway.agents.svc:8080" + +[openshell.drivers.docker] +network_name = "openshell-docker" +# supervisor_image and guest_tls_* inherit from [openshell.gateway] + +[openshell.drivers.podman] +socket_path = "/run/podman/podman.sock" +network_name = "openshell" +stop_timeout_secs = 10 + +[openshell.drivers.vm] +state_dir = "/var/lib/openshell/vm" +driver_dir = "/usr/local/libexec/openshell" +vcpus = 2 +mem_mib = 2048 +krun_log_level = 1 diff --git a/examples/gateway/kubernetes.example.toml b/examples/gateway/kubernetes.example.toml new file mode 100644 index 000000000..9a5b77605 --- /dev/null +++ b/examples/gateway/kubernetes.example.toml @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Kubernetes-driver example. The gateway runs as a Pod and creates sandbox +# Pods in another namespace. mTLS material for sandboxes is delivered via a +# Kubernetes Secret rather than host-side file paths. +# +# `database_url` is env-only — set OPENSHELL_DB_URL / --db-url. + +[openshell] +version = 1 + +[openshell.gateway] +bind_address = "0.0.0.0:8080" +health_bind_address = "0.0.0.0:8081" +metrics_bind_address = "0.0.0.0:9090" +log_level = "info" +compute_drivers = ["kubernetes"] + +# Shared defaults inherited into [openshell.drivers.kubernetes]. +default_image = "ghcr.io/nvidia/openshell/sandbox:latest" +supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" +image_pull_policy = "IfNotPresent" +client_tls_secret_name = "openshell-client-tls" + +[openshell.gateway.tls] +cert_path = "/etc/openshell-tls/server/tls.crt" +key_path = "/etc/openshell-tls/server/tls.key" +client_ca_path = "/etc/openshell-tls/client-ca/ca.crt" + +[openshell.drivers.kubernetes] +namespace = "agents" +grpc_endpoint = "https://openshell-gateway.agents.svc:8080" +# Use the image volume on K8s >= 1.35 (GA in 1.36); switch to "init-container" +# on older clusters or where the ImageVolume feature gate is off. +supervisor_sideload_method = "image-volume" diff --git a/examples/gateway/microvm.example.toml b/examples/gateway/microvm.example.toml new file mode 100644 index 000000000..60bbd4356 --- /dev/null +++ b/examples/gateway/microvm.example.toml @@ -0,0 +1,31 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# microVM-driver example. Each sandbox runs inside its own libkrun microVM +# managed by the standalone `openshell-driver-vm` subprocess. Use this driver +# when you want stronger isolation than container namespaces alone. +# +# `database_url` is env-only — set OPENSHELL_DB_URL / --db-url. + +[openshell] +version = 1 + +[openshell.gateway] +bind_address = "127.0.0.1:8080" +log_level = "info" +# VM is never auto-detected; an explicit entry here is required. +compute_drivers = ["vm"] + +# Shared defaults inherited into [openshell.drivers.vm]. +default_image = "ghcr.io/nvidia/openshell/sandbox:latest" +guest_tls_ca = "/var/lib/openshell/guest-tls/ca.pem" +guest_tls_cert = "/var/lib/openshell/guest-tls/client.pem" +guest_tls_key = "/var/lib/openshell/guest-tls/client-key.pem" + +[openshell.drivers.vm] +state_dir = "/var/lib/openshell/vm" +# Where the gateway looks for the openshell-driver-vm subprocess binary. +driver_dir = "/usr/local/libexec/openshell" +vcpus = 2 +mem_mib = 2048 +krun_log_level = 1 diff --git a/examples/gateway/podman.example.toml b/examples/gateway/podman.example.toml new file mode 100644 index 000000000..11d241d6e --- /dev/null +++ b/examples/gateway/podman.example.toml @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Podman-driver example. Sandboxes run as Podman containers on a user-mode +# bridge network. The supervisor image is mounted read-only via Podman's +# `type=image` mount; guest mTLS material is supplied as host paths. +# +# `database_url` is env-only — set OPENSHELL_DB_URL / --db-url. + +[openshell] +version = 1 + +[openshell.gateway] +bind_address = "127.0.0.1:8080" +log_level = "info" +compute_drivers = ["podman"] + +# Shared defaults inherited into [openshell.drivers.podman]. +default_image = "ghcr.io/nvidia/openshell/sandbox:latest" +supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" +image_pull_policy = "missing" # Podman pull policy: always | missing | never | newer +guest_tls_ca = "/etc/openshell/certs/ca.pem" +guest_tls_cert = "/etc/openshell/certs/client.pem" +guest_tls_key = "/etc/openshell/certs/client-key.pem" + +[openshell.drivers.podman] +# Rootless socket path. For root Podman use /run/podman/podman.sock. +socket_path = "/run/user/1000/podman/podman.sock" +network_name = "openshell" +stop_timeout_secs = 10 diff --git a/rfc/0003-gateway-configuration/README.md b/rfc/0003-gateway-configuration/README.md index 70ca09f7b..17f3ae292 100644 --- a/rfc/0003-gateway-configuration/README.md +++ b/rfc/0003-gateway-configuration/README.md @@ -37,7 +37,7 @@ Three sources are merged at startup, in descending priority: CLI flags > OPENSHELL_* environment variables > TOML config file > built-in defaults ``` -The TOML file is optional. If neither `--config` nor `OPENSHELL_CONFIG` is set, the gateway behaves exactly as before. Any field present in the file is overridden by a CLI flag or matching environment variable. +The TOML file is optional. If neither `--config` nor `OPENSHELL_GATEWAY_CONFIG` is set, the gateway behaves exactly as before. Any field present in the file is overridden by a CLI flag or matching environment variable. ### Loading the file @@ -261,7 +261,7 @@ The chart owners can migrate one section at a time: `OPENSHELL_*` env vars and t No part of this RFC has shipped yet. The work breaks down as: 1. **Add a config-file loader to `openshell-server`** — define a `GatewayConfigFile` struct that mirrors the schema above, parse it with `serde` + `toml`, and merge it into `openshell_core::Config` plus the per-driver structs in `compute/`. -2. **Wire the merge into `cli.rs`** — add `--config` / `OPENSHELL_CONFIG`, gate each existing flag's "apply from file" path on clap `ValueSource::DefaultValue`, and run cross-field validation after the merge. +2. **Wire the merge into `cli.rs`** — add `--config` / `OPENSHELL_GATEWAY_CONFIG`, gate each existing flag's "apply from file" path on clap `ValueSource::DefaultValue`, and run cross-field validation after the merge. 3. **Per-driver deserialization** — give each driver crate (`openshell-driver-{kubernetes,docker,podman,vm}`) a `from_toml` (or `serde::Deserialize`) entry point so the gateway can hand each driver its own table. 4. **Test coverage** — file parsing, env-overrides-file, CLI-overrides-env, partial TLS error, port-collision error, unknown-field rejection, missing driver table fallback. 5. **Helm chart migration** — add `gateway.config` value tree, render the `ConfigMap`, mount it, switch the gateway container to `--config`. Keep the `OPENSHELL_*` env names available as opt-in overrides for secrets.