diff --git a/cortex-app-server/src/config.rs b/cortex-app-server/src/config.rs index 98516162..3deae765 100644 --- a/cortex-app-server/src/config.rs +++ b/cortex-app-server/src/config.rs @@ -257,6 +257,9 @@ pub struct RateLimitConfig { /// Rate limit by IP address. #[serde(default = "default_true")] pub by_ip: bool, + /// Trust proxy headers (X-Real-IP, X-Forwarded-For) for client IP detection. + #[serde(default)] + pub trust_proxy: bool, /// Rate limit by API key. #[serde(default)] pub by_api_key: bool, @@ -283,6 +286,7 @@ impl Default for RateLimitConfig { requests_per_minute: default_rpm(), burst_size: default_burst(), by_ip: true, + trust_proxy: false, by_api_key: false, by_user: false, exempt_paths: vec!["/health".to_string()], diff --git a/cortex-cli/src/agent_cmd.rs b/cortex-cli/src/agent_cmd.rs index a241bd4e..e3cec19d 100644 --- a/cortex-cli/src/agent_cmd.rs +++ b/cortex-cli/src/agent_cmd.rs @@ -1027,13 +1027,25 @@ async fn run_list(args: ListArgs) -> Result<()> { async fn run_show(args: ShowArgs) -> Result<()> { let agents = load_all_agents()?; - let agent = agents - .iter() - .find(|a| a.name == args.name) - .ok_or_else(|| anyhow::anyhow!("Agent '{}' not found", args.name))?; + let agent = match agents.iter().find(|a| a.name == args.name) { + Some(a) => a, + None => { + // Return JSON error if --json flag is used + if args.json { + let error_json = serde_json::json!({ + "error": format!("Agent '{}' not found", args.name), + "available_agents": agents.iter().map(|a| &a.name).collect::>() + }); + println!("{}", serde_json::to_string_pretty(&error_json)?); + // Return Ok to prevent additional error output + return Ok(()); + } + bail!("Agent '{}' not found", args.name); + } + }; - // Warn if the agent is hidden - if agent.hidden { + // Warn if the agent is hidden (only in non-JSON mode) + if agent.hidden && !args.json { eprintln!( "Note: '{}' is a hidden agent (not shown in default listings).", agent.name diff --git a/cortex-cli/src/debug_cmd.rs b/cortex-cli/src/debug_cmd.rs index 2d8f0afc..7e645081 100644 --- a/cortex-cli/src/debug_cmd.rs +++ b/cortex-cli/src/debug_cmd.rs @@ -405,7 +405,7 @@ fn generate_unified_diff(old_content: &str, new_content: &str) -> String { #[derive(Debug, Parser)] pub struct FileArgs { /// Path to the file to inspect. - pub path: PathBuf, + pub path: Option, /// Output as JSON. #[arg(long)] @@ -456,13 +456,114 @@ struct FileMetadata { readonly: bool, } +/// Check if a path is within a safe directory (working directory or user home). +/// Returns an error message if the path is outside allowed directories. +fn validate_path_security(path: &std::path::Path) -> Result<()> { + let path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf()); + let path_str = path.to_string_lossy(); + + // Block access to /proc filesystem - can leak environment variables and secrets + if path_str.starts_with("/proc") { + bail!( + "Access denied: /proc filesystem is blocked for security reasons.\n\ + The /proc filesystem can expose sensitive runtime information like environment \ + variables, command line arguments, and process details." + ); + } + + // Block access to /sys filesystem + if path_str.starts_with("/sys") { + bail!( + "Access denied: /sys filesystem is blocked for security reasons.\n\ + The /sys filesystem can expose sensitive hardware and kernel information." + ); + } + + // Block access to sensitive system files + let sensitive_paths = [ + "/etc/shadow", + "/etc/gshadow", + "/etc/sudoers", + "/etc/ssh/ssh_host", + "/root/.ssh", + "/root/.gnupg", + "/root/.bash_history", + "/root/.zsh_history", + "/var/lib/postgresql/.pgpass", + ]; + + for sensitive in sensitive_paths { + if path_str.starts_with(sensitive) { + bail!( + "Access denied: '{}' is a sensitive system file.\n\ + For security reasons, debug file cannot access sensitive system paths.", + path.display() + ); + } + } + + // Get current working directory and user home directory + let cwd = std::env::current_dir().ok(); + let home = dirs::home_dir(); + + // Check if path is within allowed directories + let is_in_cwd = cwd.as_ref().map(|c| path.starts_with(c)).unwrap_or(false); + let is_in_home = home.as_ref().map(|h| path.starts_with(h)).unwrap_or(false); + + // Allow relative paths that stay within cwd + if !is_in_cwd && !is_in_home && path.is_absolute() { + // Check if it's a system path that should be blocked + let is_system_path = path_str.starts_with("/etc/") + || path_str.starts_with("/var/") + || path_str.starts_with("/root/") + || path_str.starts_with("/home/") && !is_in_home; + + if is_system_path { + bail!( + "Access denied: '{}' is outside the working directory and user home.\n\ + For security reasons, debug file can only access files within:\n\ + - Current working directory: {}\n\ + - User home directory: {}", + path.display(), + cwd.as_ref() + .map(|p| p.display().to_string()) + .unwrap_or_else(|| "(unknown)".to_string()), + home.as_ref() + .map(|p| p.display().to_string()) + .unwrap_or_else(|| "(unknown)".to_string()) + ); + } + } + + Ok(()) +} + async fn run_file(args: FileArgs) -> Result<()> { - let path = if args.path.is_absolute() { - args.path.clone() + // Handle missing path argument with JSON-aware error + let provided_path = match args.path { + Some(p) => p, + None => { + if args.json { + let error_json = serde_json::json!({ + "error": "Missing required argument: ", + "usage": "cortex debug file [--json]" + }); + println!("{}", serde_json::to_string_pretty(&error_json)?); + return Ok(()); + } + bail!("Missing required argument: \n\nUsage: cortex debug file [--json]"); + } + }; + + let path = if provided_path.is_absolute() { + provided_path.clone() } else { - std::env::current_dir()?.join(&args.path) + std::env::current_dir()?.join(&provided_path) }; + // Security check: validate path is within allowed directories + validate_path_security(&path)?; + let exists = path.exists(); // Detect special file types using stat() BEFORE attempting any reads diff --git a/cortex-cli/src/lib.rs b/cortex-cli/src/lib.rs index d68e04e3..c008e4c5 100644 --- a/cortex-cli/src/lib.rs +++ b/cortex-cli/src/lib.rs @@ -140,7 +140,7 @@ fn cleanup_temp_files(temp_dir: &std::path::Path) { /// Install a panic hook that tracks panics in background threads. /// This ensures the main thread can detect background panics and exit /// with an appropriate error code (#2805). -fn install_panic_hook() { +pub fn install_panic_hook() { // Only install once if PANIC_HOOK_INSTALLED.swap(true, Ordering::SeqCst) { return; diff --git a/cortex-cli/src/main.rs b/cortex-cli/src/main.rs index 28edb7b4..f0d496c1 100644 --- a/cortex-cli/src/main.rs +++ b/cortex-cli/src/main.rs @@ -747,6 +747,11 @@ struct ServeCommand { #[arg(short, long, default_value = "3000")] port: u16, + /// Model to use for AI requests (e.g., gpt-4o, claude-sonnet-4-20250514). + /// Overrides the default model from config. + #[arg(short, long)] + model: Option, + /// Host address to bind the server to. /// Examples: /// 127.0.0.1 - Local only (default, most secure) @@ -2072,10 +2077,21 @@ async fn run_serve(serve_cli: ServeCommand) -> Result<()> { vec![] }; + // Configure provider settings with model override if specified + let providers = if let Some(ref model) = serve_cli.model { + cortex_app_server::config::ProviderConfig { + default_model: model.clone(), + ..Default::default() + } + } else { + cortex_app_server::config::ProviderConfig::default() + }; + let config = cortex_app_server::ServerConfig { listen_addr: format!("{}:{}", serve_cli.host, serve_cli.port), auth: auth_config, cors_origins, + providers, ..Default::default() }; @@ -2090,6 +2106,11 @@ async fn run_serve(serve_cli: ServeCommand) -> Result<()> { serve_cli.host, serve_cli.port, auth_status )); + // Print model information if specified + if let Some(ref model) = serve_cli.model { + println!("Model: {}", model); + } + if serve_cli.cors || !serve_cli.cors_origins.is_empty() { if serve_cli.cors_origins.is_empty() { println!("CORS: Allowing all origins (*)"); diff --git a/cortex-cli/src/mcp_cmd.rs b/cortex-cli/src/mcp_cmd.rs index 26ec37f0..88a3a31c 100644 --- a/cortex-cli/src/mcp_cmd.rs +++ b/cortex-cli/src/mcp_cmd.rs @@ -199,6 +199,18 @@ fn validate_env_var_value(value: &str) -> Result<()> { Ok(()) } +/// Dangerous shell patterns that could cause resource exhaustion +const DANGEROUS_SHELL_PATTERNS: &[&str] = &[ + "while true", + "while :", + "while 1", + "for ((", + ":(){", // Fork bomb pattern + "yes |", + "cat /dev/zero", + "cat /dev/urandom", +]; + /// Validates command arguments for stdio transport. fn validate_command_args(args: &[String]) -> Result<()> { if args.is_empty() { @@ -211,6 +223,11 @@ fn validate_command_args(args: &[String]) -> Result<()> { MAX_COMMAND_ARGS ); } + + // Concatenate all args to check for dangerous patterns + let full_command = args.join(" "); + let command_lower = full_command.to_lowercase(); + for (i, arg) in args.iter().enumerate() { if arg.len() > MAX_COMMAND_ARG_LENGTH { bail!( @@ -224,6 +241,43 @@ fn validate_command_args(args: &[String]) -> Result<()> { bail!("Command argument {} contains null bytes", i + 1); } } + + // Check for dangerous shell patterns that could cause resource exhaustion + for pattern in DANGEROUS_SHELL_PATTERNS { + if command_lower.contains(pattern) { + bail!( + "Command contains potentially dangerous pattern '{}' which could cause resource exhaustion.\n\ + Infinite loops and resource-intensive patterns are not allowed in MCP server commands.\n\ + If this is intentional, consider wrapping the command in a script with proper resource limits.", + pattern + ); + } + } + + // Warn about bash -c with complex commands (common vector for dangerous commands) + if (args.first().map(|s| s.as_str()) == Some("bash") + || args.first().map(|s| s.as_str()) == Some("sh")) + && args.iter().any(|a| a == "-c") + { + // Check the -c argument for infinite loops + if let Some(c_idx) = args.iter().position(|a| a == "-c") { + if let Some(script) = args.get(c_idx + 1) { + let script_lower = script.to_lowercase(); + if script_lower.contains("while") + && (script_lower.contains("true") + || script_lower.contains(":") + || script_lower.contains("1;")) + { + bail!( + "Shell command appears to contain an infinite loop.\n\ + Commands that run indefinitely are not suitable for MCP servers.\n\ + MCP server processes should respond to requests and exit gracefully." + ); + } + } + } + } + Ok(()) } diff --git a/cortex-cli/src/run_cmd.rs b/cortex-cli/src/run_cmd.rs index cb937759..027bd90e 100644 --- a/cortex-cli/src/run_cmd.rs +++ b/cortex-cli/src/run_cmd.rs @@ -102,8 +102,8 @@ pub struct RunCli { /// Output format alias (same as --format). /// Valid values: default, json, jsonl. - #[arg(long = "output", value_enum, conflicts_with = "format")] - pub output: Option, + #[arg(long = "output-format", value_enum, conflicts_with = "format")] + pub output_format_alias: Option, /// File(s) to attach to the message. /// Can be specified multiple times. @@ -675,8 +675,8 @@ impl RunCli { return self.run_dry_run(message, attachments).await; } - // Use --output if provided, otherwise use --format - let effective_format = self.output.unwrap_or(self.format); + // Use --output-format if provided, otherwise use --format + let effective_format = self.output_format_alias.unwrap_or(self.format); let is_json = matches!(effective_format, OutputFormat::Json | OutputFormat::Jsonl); let is_terminal = io::stdout().is_terminal(); let streaming_enabled = self.is_streaming_enabled(); diff --git a/cortex-cli/src/scrape_cmd.rs b/cortex-cli/src/scrape_cmd.rs index 272c39f5..065744c6 100644 --- a/cortex-cli/src/scrape_cmd.rs +++ b/cortex-cli/src/scrape_cmd.rs @@ -203,6 +203,9 @@ fn validate_url_security(url: &str) -> Result<()> { Ok(()) } +/// Maximum allowed timeout in seconds (1 hour) +const MAX_TIMEOUT_SECONDS: u64 = 3600; + impl ScrapeCommand { /// Run the scrape command. pub async fn run(self) -> Result<()> { @@ -214,15 +217,33 @@ impl ScrapeCommand { // Validate URL for security (control characters, etc.) (#2448) validate_url_security(&self.url)?; + // Validate URL scheme - must be http or https + let url_lower = self.url.to_lowercase(); + if !url_lower.starts_with("http://") && !url_lower.starts_with("https://") { + bail!( + "Unsupported URL scheme. Use http:// or https://\n\ + Got: {}", + self.url.chars().take(30).collect::() + ); + } + + // Validate timeout is within reasonable bounds + if self.timeout > MAX_TIMEOUT_SECONDS { + bail!( + "Timeout value {} seconds exceeds maximum allowed value of {} seconds (1 hour).\n\ + Use a timeout value between 1 and {}.", + self.timeout, + MAX_TIMEOUT_SECONDS, + MAX_TIMEOUT_SECONDS + ); + } + // Parse output format let format: OutputFormat = self.format.parse()?; - // Build HTTP client with redirect policy and cookie store - // Cookie store is enabled to persist cookies across redirects, which is - // required for auth-gated pages that set cookies then redirect. - let mut client_builder = create_client_builder() - .redirect(reqwest::redirect::Policy::limited(10)) - .cookie_store(true); + // Build HTTP client with redirect policy + let mut client_builder = + create_client_builder().redirect(reqwest::redirect::Policy::limited(10)); // Override timeout if specified (0 means no timeout) // Apply timeout to both connection phase and overall request @@ -322,15 +343,86 @@ impl ScrapeCommand { .and_then(|v| v.to_str().ok()) .unwrap_or("text/html"); + // Check Content-Encoding for compressed responses + let content_encoding = response + .headers() + .get("content-encoding") + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_lowercase()); + if self.verbose { eprintln!("Content-Type: {content_type}"); + if let Some(ref enc) = content_encoding { + eprintln!("Content-Encoding: {enc}"); + } + } + + // Check for non-text content types + let is_text_content = content_type.starts_with("text/") + || content_type.contains("application/json") + || content_type.contains("application/xml") + || content_type.contains("application/javascript") + || content_type.contains("application/xhtml") + || content_type.contains("+xml") + || content_type.contains("+json"); + + if !is_text_content { + bail!( + "Content is binary ({}), not text.\n\ + The scrape command is designed for text-based content (HTML, JSON, XML).\n\ + Binary content types like images, videos, and raw bytes cannot be displayed as text.", + content_type + ); } - let html = response - .text() + // Get the raw bytes first to check for binary content + let bytes = response + .bytes() .await .context("Failed to read response body")?; + // Check if the response is compressed but we received raw bytes + // This can happen if Accept-Encoding was not sent and server sends compressed anyway + if let Some(ref enc) = content_encoding { + if enc == "gzip" || enc == "deflate" || enc == "br" { + bail!( + "Response is compressed with {} encoding but automatic decompression failed.\n\ + The server returned compressed data. Try:\n\ + - Using curl or wget which handle compression automatically\n\ + - Adding --header \"Accept-Encoding: identity\" to request uncompressed content", + enc + ); + } + } + + // Check for binary content in the bytes (null bytes or high ratio of non-printable chars) + let null_byte_count = bytes.iter().filter(|&&b| b == 0).count(); + let non_printable_count = bytes + .iter() + .filter(|&&b| b < 32 && b != b'\n' && b != b'\r' && b != b'\t') + .count(); + + if null_byte_count > 0 || (bytes.len() > 100 && non_printable_count > bytes.len() / 10) { + bail!( + "Content appears to be binary data, not text.\n\ + Detected {} null bytes and {} non-printable characters in {} bytes.\n\ + The scrape command cannot process binary content.", + null_byte_count, + non_printable_count, + bytes.len() + ); + } + + // Convert bytes to string + let html = String::from_utf8(bytes.to_vec()) + .or_else(|_| { + // Try to decode as lossy UTF-8 if strict fails + Ok::( + String::from_utf8_lossy(&bytes).to_string(), + ) + }) + .context("Failed to decode response as text")?; + if self.verbose { eprintln!("Received {} bytes", html.len()); }