diff --git a/CHANGELOG.md b/CHANGELOG.md index a6b9bff..38b1ddc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- **CLI startup robustness** — `.env` loading is now non-fatal. + - `stackdog --help` and other commands no longer panic when `.env` is missing or contains malformed lines. + - Stackdog now logs a warning and continues with existing environment variables. + +- **Installer release resolution** — `install.sh` now handles missing `/releases/latest` responses gracefully. + - Falls back to the most recent release entry when no stable "latest" release is available. + - Improves error messaging and updates install examples to use the `main` branch script URL. + ### Added #### Log Sniffing & Analysis (`stackdog sniff`) diff --git a/README.md b/README.md index 7509523..a4a1f86 100644 --- a/README.md +++ b/README.md @@ -47,14 +47,16 @@ ### Install with curl (Linux) ```bash -curl -fsSL https://raw.githubusercontent.com/vsilent/stackdog/dev/install.sh | sudo bash +curl -fsSL https://raw.githubusercontent.com/vsilent/stackdog/main/install.sh | sudo bash ``` Pin a specific version: ```bash -curl -fsSL https://raw.githubusercontent.com/vsilent/stackdog/dev/install.sh | sudo bash -s -- --version v0.2.0 +curl -fsSL https://raw.githubusercontent.com/vsilent/stackdog/main/install.sh | sudo bash -s -- --version v0.2.1 ``` +If your repository has no published stable release yet, use `--version` explicitly. + ### Run as Binary ```bash diff --git a/src/main.rs b/src/main.rs index a665795..3eefadd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -31,7 +31,12 @@ use tracing_subscriber::FmtSubscriber; #[actix_rt::main] async fn main() -> io::Result<()> { // Load environment - dotenv::dotenv().expect("Could not read .env file"); + if let Err(err) = dotenv::dotenv() { + eprintln!( + "Warning: could not load .env file ({}). Continuing with existing environment.", + err + ); + } // Parse CLI arguments let cli = Cli::parse(); diff --git a/src/sniff/analyzer.rs b/src/sniff/analyzer.rs index f0275f7..26a720f 100644 --- a/src/sniff/analyzer.rs +++ b/src/sniff/analyzer.rs @@ -8,9 +8,14 @@ use anyhow::{Context, Result}; use async_trait::async_trait; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; +use std::collections::HashSet; use crate::sniff::reader::LogEntry; +const MAX_PROMPT_LINES: usize = 200; +const MAX_PROMPT_CHARS: usize = 16_000; +const MAX_LINE_CHARS: usize = 500; + /// Summary produced by AI analysis of log entries #[derive(Debug, Clone, Serialize, Deserialize)] pub struct LogSummary { @@ -69,6 +74,17 @@ pub struct OpenAiAnalyzer { } impl OpenAiAnalyzer { + fn push_selected_index( + selected_indices: &mut Vec, + seen: &mut HashSet, + idx: usize, + total_entries: usize, + ) { + if idx < total_entries && seen.insert(idx) { + selected_indices.push(idx); + } + } + pub fn new(api_url: String, api_key: Option, model: String) -> Self { Self { api_url, @@ -79,8 +95,21 @@ impl OpenAiAnalyzer { } fn build_prompt(entries: &[LogEntry]) -> String { - let lines: Vec<&str> = entries.iter().map(|e| e.line.as_str()).collect(); - let log_block = lines.join("\n"); + let prompt_entries = Self::select_prompt_entries(entries); + let included_count = prompt_entries.len(); + let included_chars: usize = prompt_entries.iter().map(|line| line.len()).sum(); + let was_truncated = included_count < entries.len(); + let truncation_note = if was_truncated { + format!( + "Only {} of {} entries are included below to keep the request bounded. \ + Prioritize the included lines when identifying anomalies, but keep the full batch size in mind.\n", + included_count, + entries.len() + ) + } else { + String::new() + }; + let log_block = prompt_entries.join("\n"); format!( "Analyze these log entries and provide a JSON response with:\n\ @@ -90,9 +119,107 @@ impl OpenAiAnalyzer { 4. \"key_events\": Array of important events (max 5)\n\ 5. \"anomalies\": Array of objects with \"description\", \"severity\" (Low/Medium/High/Critical), \"sample_line\"\n\n\ Respond ONLY with valid JSON, no markdown.\n\n\ - Log entries:\n{}", log_block + Batch metadata:\n\ + - total_entries: {}\n\ + - included_entries: {}\n\ + - included_characters: {}\n\ + {}\ + Log entries:\n{}", + entries.len(), + included_count, + included_chars, + truncation_note, + log_block ) } + + fn select_prompt_entries(entries: &[LogEntry]) -> Vec { + if entries.is_empty() { + return Vec::new(); + } + + let mut selected_indices = Vec::new(); + let mut seen = HashSet::new(); + + for (idx, entry) in entries.iter().enumerate() { + if Self::is_priority_line(&entry.line) { + Self::push_selected_index(&mut selected_indices, &mut seen, idx, entries.len()); + } + } + + let recent_window_start = entries.len().saturating_sub(MAX_PROMPT_LINES); + for idx in recent_window_start..entries.len() { + Self::push_selected_index(&mut selected_indices, &mut seen, idx, entries.len()); + } + + if selected_indices.len() < MAX_PROMPT_LINES { + let stride = (entries.len() / MAX_PROMPT_LINES.max(1)).max(1); + let mut idx = 0; + while idx < entries.len() && selected_indices.len() < MAX_PROMPT_LINES { + Self::push_selected_index(&mut selected_indices, &mut seen, idx, entries.len()); + idx += stride; + } + } + + selected_indices.sort_unstable(); + + let mut prompt_entries = Vec::new(); + let mut total_chars = 0; + + for idx in selected_indices { + if prompt_entries.len() >= MAX_PROMPT_LINES { + break; + } + + let line = Self::truncate_line(&entries[idx].line); + let next_chars = if prompt_entries.is_empty() { + line.len() + } else { + total_chars + 1 + line.len() + }; + + if next_chars > MAX_PROMPT_CHARS { + break; + } + + total_chars = next_chars; + prompt_entries.push(line); + } + + if prompt_entries.is_empty() { + prompt_entries.push(Self::truncate_line(&entries[entries.len() - 1].line)); + } + + prompt_entries + } + + fn is_priority_line(line: &str) -> bool { + let lower = line.to_ascii_lowercase(); + [ + "error", + "warn", + "fatal", + "panic", + "exception", + "denied", + "unauthorized", + "failed", + "timeout", + "attack", + "anomaly", + ] + .iter() + .any(|pattern| lower.contains(pattern)) + } + + fn truncate_line(line: &str) -> String { + let truncated: String = line.chars().take(MAX_LINE_CHARS).collect(); + if truncated.len() == line.len() { + truncated + } else { + format!("{}...[truncated]", truncated) + } + } } /// Response structure from the LLM @@ -262,10 +389,11 @@ impl LogAnalyzer for OpenAiAnalyzer { let source_id = &entries[0].source_id; log::debug!( - "Sending {} entries to AI API (model: {}, url: {})", + "Sending {} entries to AI API (model: {}, url: {}, prompt_chars: {})", entries.len(), self.model, - self.api_url + self.api_url, + prompt.len() ); log::trace!("Prompt:\n{}", prompt); @@ -492,6 +620,58 @@ mod tests { assert!(prompt.contains("JSON")); } + #[test] + fn test_build_prompt_limits_included_entries() { + let entries: Vec = (0..250) + .map(|i| LogEntry { + source_id: "test-source".into(), + timestamp: Utc::now(), + line: format!("INFO line {}", i), + metadata: HashMap::new(), + }) + .collect(); + + let prompt = OpenAiAnalyzer::build_prompt(&entries); + + assert!(prompt.contains("- total_entries: 250")); + assert!(prompt.contains("- included_entries: 200")); + assert!(prompt.contains("Only 200 of 250 entries are included below")); + assert!(prompt.contains("INFO line 249")); + assert!(!prompt.contains("INFO line 0")); + } + + #[test] + fn test_select_prompt_entries_preserves_priority_lines() { + let mut entries: Vec = (0..260) + .map(|i| LogEntry { + source_id: "test-source".into(), + timestamp: Utc::now(), + line: format!("INFO line {}", i), + metadata: HashMap::new(), + }) + .collect(); + entries[10].line = "ERROR: early failure".into(); + + let selected = OpenAiAnalyzer::select_prompt_entries(&entries); + + assert_eq!(selected.len(), 200); + assert!(selected + .iter() + .any(|line| line.contains("ERROR: early failure"))); + } + + #[test] + fn test_select_prompt_entries_truncates_long_lines() { + let long_line = "x".repeat(MAX_LINE_CHARS + 50); + let entries = make_entries(&[&long_line]); + + let selected = OpenAiAnalyzer::select_prompt_entries(&entries); + + assert_eq!(selected.len(), 1); + assert!(selected[0].ends_with("...[truncated]")); + assert!(selected[0].len() > MAX_LINE_CHARS); + } + #[test] fn test_parse_llm_response_valid() { let entries = make_entries(&["test line"]);